def bulk_insert(self, curs, data, table = None): """Copy data to table. If table not provided, use temp table """ if not data: return _table = table or self.temp self.log.debug("bulk: COPY %d rows into %s" % (len(data), _table)) if not table: self.create_temp(curs) skytools.magic_insert(curs, _table, data, self.fields) if not table: self.analyze(curs)
def bulk_insert(self, curs, data, table=None): """Copy data to table. If table not provided, use temp table """ if not data: return _table = table or self.temp self.log.debug("bulk: COPY %d rows into %s" % (len(data), _table)) if not table: self.create_temp(curs) skytools.magic_insert(curs, _table, data, self.fields) if not table: self.analyze(curs)
def dispatch(self, dst_db, ev_list): """Generic dispatcher.""" # load data tables = {} for ev in ev_list: row = skytools.db_urldecode(ev.data) # guess dest table if self.part_field: if self.part_field == "_EVTIME": partval = str(ev.creation_date) else: partval = str(row[self.part_field]) partval = partval.split(' ')[0] date = partval.split('-') if self.part_method == 'monthly': date = date[:2] suffix = '_'.join(date) tbl = "%s_%s" % (self.dest_table, suffix) else: tbl = self.dest_table # map fields if self.field_map is None: dstrow = row else: dstrow = {} for k, v in self.field_map.items(): dstrow[v] = row[k] # add row into table if not tbl in tables: tables[tbl] = [dstrow] else: tables[tbl].append(dstrow) ev.tag_done() # create tables if needed self.check_tables(dst_db, tables) # insert into data tables curs = dst_db.cursor() for tbl, tbl_rows in tables.items(): skytools.magic_insert(curs, tbl, tbl_rows)
def bulk_insert(self, curs, data, table=None): """Copy data to table. If table not provided, use temp table. When re-using existing temp table, it is always truncated first and analyzed after copy. """ if not data: return _use_temp = table is None # if table not specified use temp if _use_temp: table = self.temp # truncate when re-using existing table if not self.create_temp(curs): self.truncate(curs) self.log.debug("bulk: COPY %d rows into %s" % (len(data), table)) skytools.magic_insert(curs, table, data, self.fields, quoted_table=True) if _use_temp: self.analyze(curs)
def dispatch(self, dst_db, ev_list): """Generic dispatcher.""" # load data tables = {} for ev in ev_list: row = skytools.db_urldecode(ev.data) # guess dest table if self.part_field: if self.part_field == "_EVTIME": partval = str(ev.creation_date) else: partval = str(row[self.part_field]) partval = partval.split(' ')[0] date = partval.split('-') if self.part_method == 'monthly': date = date[:2] suffix = '_'.join(date) tbl = "%s_%s" % (self.dest_table, suffix) else: tbl = self.dest_table # map fields if self.field_map is None: dstrow = row else: dstrow = {} for k, v in self.field_map.items(): dstrow[v] = row[k] # add row into table if not tbl in tables: tables[tbl] = [dstrow] else: tables[tbl].append(dstrow) # create tables if needed self.check_tables(dst_db, tables) # insert into data tables curs = dst_db.cursor() for tbl, tbl_rows in tables.items(): skytools.magic_insert(curs, tbl, tbl_rows)
def bulk_insert(self, curs, data, table=None): """Copy data to table. If table not provided, use temp table. When re-using existing temp table, it is always truncated first and analyzed after copy. """ if not data: return _use_temp = table is None # if table not specified use temp if _use_temp: table = self.temp # truncate when re-using existing table if not self.create_temp(curs): self.truncate(curs) self.log.debug("bulk: COPY %d rows into %s", len(data), table) skytools.magic_insert(curs, table, data, self.fields, quoted_table=True) if _use_temp and self.run_analyze: self.analyze(curs)
def apply_rows(self, curs): fields = self.get_fields() skytools.magic_insert(curs, self.table_name, self.rows, fields)
def process_one_table(self, dst_db, tbl, cache): del_list = cache.final_del_list ins_list = cache.final_ins_list upd_list = cache.final_upd_list col_list = cache.col_list real_update_count = len(upd_list) self.log.debug("process_one_table: %s (I/U/D = %d/%d/%d)" % (tbl, len(ins_list), len(upd_list), len(del_list))) if tbl in self.remap_tables: old = tbl tbl = self.remap_tables[tbl] self.log.debug("Redirect %s to %s" % (old, tbl)) # hack to unbroke stuff if self.load_method == METH_MERGED: upd_list += ins_list ins_list = [] # check if interesting table curs = dst_db.cursor() if not skytools.exists_table(curs, tbl): self.log.warning("Ignoring events for table: %s" % tbl) return # fetch distribution fields dist_fields = find_dist_fields(curs, tbl) extra_fields = [] for fld in dist_fields: if fld not in cache.pkey_list: extra_fields.append(fld) self.log.debug("PKey fields: %s Extra fields: %s" % (",".join(cache.pkey_list), ",".join(extra_fields))) # create temp table temp = self.create_temp_table(curs, tbl) # where expr must have pkey and dist fields klist = [] for pk in cache.pkey_list + extra_fields: exp = "%s.%s = %s.%s" % (quote_fqident(tbl), quote_ident(pk), quote_fqident(temp), quote_ident(pk)) klist.append(exp) whe_expr = " and ".join(klist) # create del sql del_sql = "delete from only %s using %s where %s" % ( quote_fqident(tbl), quote_fqident(temp), whe_expr) # create update sql slist = [] key_fields = cache.pkey_list + extra_fields for col in cache.col_list: if col not in key_fields: exp = "%s = %s.%s" % (quote_ident(col), quote_fqident(temp), quote_ident(col)) slist.append(exp) upd_sql = "update only %s set %s from %s where %s" % (quote_fqident( tbl), ", ".join(slist), quote_fqident(temp), whe_expr) # insert sql colstr = ",".join([quote_ident(c) for c in cache.col_list]) ins_sql = "insert into %s (%s) select %s from %s" % ( quote_fqident(tbl), colstr, colstr, quote_fqident(temp)) # process deleted rows if len(del_list) > 0: self.log.info("Deleting %d rows from %s" % (len(del_list), tbl)) # delete old rows q = "truncate %s" % quote_fqident(temp) self.log.debug(q) curs.execute(q) # copy rows self.log.debug("COPY %d rows into %s" % (len(del_list), temp)) skytools.magic_insert(curs, temp, del_list, col_list) # delete rows self.log.debug(del_sql) curs.execute(del_sql) self.log.debug("%s - %d" % (curs.statusmessage, curs.rowcount)) self.log.debug(curs.statusmessage) if len(del_list) != curs.rowcount: self.log.warning("Delete mismatch: expected=%s updated=%d" % (len(del_list), curs.rowcount)) # process updated rows if len(upd_list) > 0: self.log.info("Updating %d rows in %s" % (len(upd_list), tbl)) # delete old rows q = "truncate %s" % quote_fqident(temp) self.log.debug(q) curs.execute(q) # copy rows self.log.debug("COPY %d rows into %s" % (len(upd_list), temp)) skytools.magic_insert(curs, temp, upd_list, col_list) if self.load_method == METH_CORRECT: # update main table self.log.debug(upd_sql) curs.execute(upd_sql) self.log.debug(curs.statusmessage) # check count if len(upd_list) != curs.rowcount: self.log.warning( "Update mismatch: expected=%s updated=%d" % (len(upd_list), curs.rowcount)) else: # delete from main table self.log.debug(del_sql) curs.execute(del_sql) self.log.debug(curs.statusmessage) # check count if real_update_count != curs.rowcount: self.log.warning( "Update mismatch: expected=%s deleted=%d" % (real_update_count, curs.rowcount)) # insert into main table if AVOID_BIZGRES_BUG: # copy again, into main table self.log.debug("COPY %d rows into %s" % (len(upd_list), tbl)) skytools.magic_insert(curs, tbl, upd_list, col_list) else: # better way, but does not work due bizgres bug self.log.debug(ins_sql) curs.execute(ins_sql) self.log.debug(curs.statusmessage) # process new rows if len(ins_list) > 0: self.log.info("Inserting %d rows into %s" % (len(ins_list), tbl)) skytools.magic_insert(curs, tbl, ins_list, col_list) # delete remaining rows if USE_LONGLIVED_TEMP_TABLES: q = "truncate %s" % quote_fqident(temp) else: # fscking problems with long-lived temp tables q = "drop table %s" % quote_fqident(temp) self.log.debug(q) curs.execute(q)
def bulk_flush(self, curs): ins_list, upd_list, del_list = self.prepare_data() # reorder cols, put pks first col_list = self.pkey_list[:] for k in self.col_list: if k not in self.pkey_list: col_list.append(k) real_update_count = len(upd_list) self.log.debug("bulk_flush: %s (I/U/D = %d/%d/%d)", self.table_name, len(ins_list), len(upd_list), len(del_list)) # hack to unbroke stuff if self.method == METH_MERGED: upd_list += ins_list ins_list = [] # fetch distribution fields if self.dist_fields is None: self.dist_fields = self.find_dist_fields(curs) key_fields = self.pkey_list[:] for fld in self.dist_fields: if fld not in key_fields: key_fields.append(fld) self.log.debug("PKey fields: %s Dist fields: %s", ",".join(self.pkey_list), ",".join(self.dist_fields)) # create temp table temp, qtemp = self.create_temp_table(curs) tbl = self.dest_table qtbl = self.fq_dest_table # where expr must have pkey and dist fields klist = [] for pk in key_fields: exp = "%s.%s = %s.%s" % (qtbl, quote_ident(pk), qtemp, quote_ident(pk)) klist.append(exp) whe_expr = " and ".join(klist) # create del sql del_sql = "delete from only %s using %s where %s" % (qtbl, qtemp, whe_expr) # create update sql slist = [] for col in col_list: if col not in key_fields: exp = "%s = %s.%s" % (quote_ident(col), qtemp, quote_ident(col)) slist.append(exp) upd_sql = "update only %s set %s from %s where %s" % ( qtbl, ", ".join(slist), qtemp, whe_expr) # avoid updates on pk-only table if not slist: upd_list = [] # insert sql colstr = ",".join([quote_ident(c) for c in col_list]) ins_sql = "insert into %s (%s) select %s from %s" % (qtbl, colstr, colstr, qtemp) temp_used = False # process deleted rows if len(del_list) > 0: self.log.debug("bulk: Deleting %d rows from %s", len(del_list), tbl) # delete old rows q = "truncate %s" % qtemp self.log.debug('bulk: %s', q) curs.execute(q) # copy rows self.log.debug("bulk: COPY %d rows into %s", len(del_list), temp) skytools.magic_insert(curs, qtemp, del_list, col_list, quoted_table=1) # delete rows self.log.debug('bulk: %s', del_sql) curs.execute(del_sql) self.log.debug("bulk: %s - %d", curs.statusmessage, curs.rowcount) if len(del_list) != curs.rowcount: self.log.warning("Delete mismatch: expected=%s deleted=%d", len(del_list), curs.rowcount) temp_used = True # process updated rows if len(upd_list) > 0: self.log.debug("bulk: Updating %d rows in %s", len(upd_list), tbl) # delete old rows q = "truncate %s" % qtemp self.log.debug('bulk: %s', q) curs.execute(q) # copy rows self.log.debug("bulk: COPY %d rows into %s", len(upd_list), temp) skytools.magic_insert(curs, qtemp, upd_list, col_list, quoted_table=1) temp_used = True if self.method == METH_CORRECT: # update main table self.log.debug('bulk: %s', upd_sql) curs.execute(upd_sql) self.log.debug("bulk: %s - %d", curs.statusmessage, curs.rowcount) # check count if len(upd_list) != curs.rowcount: self.log.warning("Update mismatch: expected=%s updated=%d", len(upd_list), curs.rowcount) else: # delete from main table self.log.debug('bulk: %s', del_sql) curs.execute(del_sql) self.log.debug('bulk: %s', curs.statusmessage) # check count if real_update_count != curs.rowcount: self.log.warning( "bulk: Update mismatch: expected=%s deleted=%d", real_update_count, curs.rowcount) # insert into main table if AVOID_BIZGRES_BUG: # copy again, into main table self.log.debug("bulk: COPY %d rows into %s", len(upd_list), tbl) skytools.magic_insert(curs, qtbl, upd_list, col_list, quoted_table=1) else: # better way, but does not work due bizgres bug self.log.debug('bulk: %s', ins_sql) curs.execute(ins_sql) self.log.debug('bulk: %s', curs.statusmessage) # process new rows if len(ins_list) > 0: self.log.debug("bulk: Inserting %d rows into %s", len(ins_list), tbl) self.log.debug("bulk: COPY %d rows into %s", len(ins_list), tbl) skytools.magic_insert(curs, qtbl, ins_list, col_list, quoted_table=1) # delete remaining rows if temp_used: if USE_LONGLIVED_TEMP_TABLES or USE_REAL_TABLE: q = "truncate %s" % qtemp else: # fscking problems with long-lived temp tables q = "drop table %s" % qtemp self.log.debug('bulk: %s', q) curs.execute(q) self.reset()
def bulk_flush(self, curs): ins_list, upd_list, del_list = self.prepare_data() # reorder cols, put pks first col_list = self.pkey_list[:] for k in self.col_list: if k not in self.pkey_list: col_list.append(k) real_update_count = len(upd_list) self.log.debug("bulk_flush: %s (I/U/D = %d/%d/%d)" % ( self.table_name, len(ins_list), len(upd_list), len(del_list))) # hack to unbroke stuff if self.method == METH_MERGED: upd_list += ins_list ins_list = [] # fetch distribution fields if self.dist_fields is None: self.dist_fields = self.find_dist_fields(curs) key_fields = self.pkey_list[:] for fld in self.dist_fields: if fld not in key_fields: key_fields.append(fld) self.log.debug("PKey fields: %s Dist fields: %s" % ( ",".join(self.pkey_list), ",".join(self.dist_fields))) # create temp table temp, qtemp = self.create_temp_table(curs) tbl = self.table_name qtbl = quote_fqident(self.table_name) # where expr must have pkey and dist fields klist = [] for pk in key_fields: exp = "%s.%s = %s.%s" % (qtbl, quote_ident(pk), qtemp, quote_ident(pk)) klist.append(exp) whe_expr = " and ".join(klist) # create del sql del_sql = "delete from only %s using %s where %s" % (qtbl, qtemp, whe_expr) # create update sql slist = [] for col in col_list: if col not in key_fields: exp = "%s = %s.%s" % (quote_ident(col), qtemp, quote_ident(col)) slist.append(exp) upd_sql = "update only %s set %s from %s where %s" % ( qtbl, ", ".join(slist), qtemp, whe_expr) # avoid updates on pk-only table if not slist: upd_list = [] # insert sql colstr = ",".join([quote_ident(c) for c in col_list]) ins_sql = "insert into %s (%s) select %s from %s" % ( qtbl, colstr, colstr, qtemp) temp_used = False # process deleted rows if len(del_list) > 0: self.log.debug("bulk: Deleting %d rows from %s" % (len(del_list), tbl)) # delete old rows q = "truncate %s" % qtemp self.log.debug('bulk: %s' % q) curs.execute(q) # copy rows self.log.debug("bulk: COPY %d rows into %s" % (len(del_list), temp)) skytools.magic_insert(curs, qtemp, del_list, col_list, quoted_table=1) # delete rows self.log.debug('bulk: ' + del_sql) curs.execute(del_sql) self.log.debug("bulk: %s - %d" % (curs.statusmessage, curs.rowcount)) if len(del_list) != curs.rowcount: self.log.warning("Delete mismatch: expected=%s deleted=%d" % (len(del_list), curs.rowcount)) temp_used = True # process updated rows if len(upd_list) > 0: self.log.debug("bulk: Updating %d rows in %s" % (len(upd_list), tbl)) # delete old rows q = "truncate %s" % qtemp self.log.debug('bulk: ' + q) curs.execute(q) # copy rows self.log.debug("bulk: COPY %d rows into %s" % (len(upd_list), temp)) skytools.magic_insert(curs, qtemp, upd_list, col_list, quoted_table=1) temp_used = True if self.method == METH_CORRECT: # update main table self.log.debug('bulk: ' + upd_sql) curs.execute(upd_sql) self.log.debug("bulk: %s - %d" % (curs.statusmessage, curs.rowcount)) # check count if len(upd_list) != curs.rowcount: self.log.warning("Update mismatch: expected=%s updated=%d" % (len(upd_list), curs.rowcount)) else: # delete from main table self.log.debug('bulk: ' + del_sql) curs.execute(del_sql) self.log.debug('bulk: ' + curs.statusmessage) # check count if real_update_count != curs.rowcount: self.log.warning("bulk: Update mismatch: expected=%s deleted=%d" % (real_update_count, curs.rowcount)) # insert into main table if AVOID_BIZGRES_BUG: # copy again, into main table self.log.debug("bulk: COPY %d rows into %s" % (len(upd_list), tbl)) skytools.magic_insert(curs, qtbl, upd_list, col_list, quoted_table=1) else: # better way, but does not work due bizgres bug self.log.debug('bulk: ' + ins_sql) curs.execute(ins_sql) self.log.debug('bulk: ' + curs.statusmessage) # process new rows if len(ins_list) > 0: self.log.debug("bulk: Inserting %d rows into %s" % (len(ins_list), tbl)) self.log.debug("bulk: COPY %d rows into %s" % (len(ins_list), tbl)) skytools.magic_insert(curs, qtbl, ins_list, col_list, quoted_table=1) # delete remaining rows if temp_used: if USE_LONGLIVED_TEMP_TABLES or USE_REAL_TABLE: q = "truncate %s" % qtemp else: # fscking problems with long-lived temp tables q = "drop table %s" % qtemp self.log.debug('bulk: ' + q) curs.execute(q) self.reset()
def flush(self, curs): ins_list, upd_list, del_list = self.prepare_data() # reorder cols col_list = self.pkey_list[:] for k in self.col_list: if k not in self.pkey_list: col_list.append(k) real_update_count = len(upd_list) #self.log.debug("process_one_table: %s (I/U/D = %d/%d/%d)" % ( # tbl, len(ins_list), len(upd_list), len(del_list))) # hack to unbroke stuff if LOAD_METHOD == METH_MERGED: upd_list += ins_list ins_list = [] # fetch distribution fields if self.dist_fields is None: self.dist_fields = self.find_dist_fields(curs) key_fields = self.pkey_list[:] for fld in self.dist_fields: if fld not in key_fields: key_fields.append(fld) #self.log.debug("PKey fields: %s Extra fields: %s" % ( # ",".join(cache.pkey_list), ",".join(extra_fields))) # create temp table temp = self.create_temp_table(curs) tbl = self.table_name # where expr must have pkey and dist fields klist = [] for pk in key_fields: exp = "%s.%s = %s.%s" % (quote_fqident(tbl), quote_ident(pk), quote_fqident(temp), quote_ident(pk)) klist.append(exp) whe_expr = " and ".join(klist) # create del sql del_sql = "delete from only %s using %s where %s" % ( quote_fqident(tbl), quote_fqident(temp), whe_expr) # create update sql slist = [] for col in col_list: if col not in key_fields: exp = "%s = %s.%s" % (quote_ident(col), quote_fqident(temp), quote_ident(col)) slist.append(exp) upd_sql = "update only %s set %s from %s where %s" % (quote_fqident( tbl), ", ".join(slist), quote_fqident(temp), whe_expr) # insert sql colstr = ",".join([quote_ident(c) for c in col_list]) ins_sql = "insert into %s (%s) select %s from %s" % ( quote_fqident(tbl), colstr, colstr, quote_fqident(temp)) temp_used = False # process deleted rows if len(del_list) > 0: #self.log.info("Deleting %d rows from %s" % (len(del_list), tbl)) # delete old rows q = "truncate %s" % quote_fqident(temp) self.log.debug(q) curs.execute(q) # copy rows self.log.debug("COPY %d rows into %s" % (len(del_list), temp)) skytools.magic_insert(curs, temp, del_list, col_list) # delete rows self.log.debug(del_sql) curs.execute(del_sql) self.log.debug("%s - %d" % (curs.statusmessage, curs.rowcount)) if len(del_list) != curs.rowcount: self.log.warning("Delete mismatch: expected=%s deleted=%d" % (len(del_list), curs.rowcount)) temp_used = True # process updated rows if len(upd_list) > 0: #self.log.info("Updating %d rows in %s" % (len(upd_list), tbl)) # delete old rows q = "truncate %s" % quote_fqident(temp) self.log.debug(q) curs.execute(q) # copy rows self.log.debug("COPY %d rows into %s" % (len(upd_list), temp)) skytools.magic_insert(curs, temp, upd_list, col_list) temp_used = True if LOAD_METHOD == METH_CORRECT: # update main table self.log.debug(upd_sql) curs.execute(upd_sql) self.log.debug("%s - %d" % (curs.statusmessage, curs.rowcount)) # check count if len(upd_list) != curs.rowcount: self.log.warning( "Update mismatch: expected=%s updated=%d" % (len(upd_list), curs.rowcount)) else: # delete from main table self.log.debug(del_sql) curs.execute(del_sql) self.log.debug(curs.statusmessage) # check count if real_update_count != curs.rowcount: self.log.warning( "Update mismatch: expected=%s deleted=%d" % (real_update_count, curs.rowcount)) # insert into main table if AVOID_BIZGRES_BUG: # copy again, into main table self.log.debug("COPY %d rows into %s" % (len(upd_list), tbl)) skytools.magic_insert(curs, tbl, upd_list, col_list) else: # better way, but does not work due bizgres bug self.log.debug(ins_sql) curs.execute(ins_sql) self.log.debug(curs.statusmessage) # process new rows if len(ins_list) > 0: self.log.info("Inserting %d rows into %s" % (len(ins_list), tbl)) skytools.magic_insert(curs, tbl, ins_list, col_list) # delete remaining rows if temp_used: if USE_LONGLIVED_TEMP_TABLES: q = "truncate %s" % quote_fqident(temp) else: # fscking problems with long-lived temp tables q = "drop table %s" % quote_fqident(temp) self.log.debug(q) curs.execute(q) self.reset()
def process_one_table(self, dst_db, tbl, cache): del_list = cache.final_del_list ins_list = cache.final_ins_list upd_list = cache.final_upd_list col_list = cache.col_list real_update_count = len(upd_list) self.log.debug("process_one_table: %s (I/U/D = %d/%d/%d)" % ( tbl, len(ins_list), len(upd_list), len(del_list))) if tbl in self.remap_tables: old = tbl tbl = self.remap_tables[tbl] self.log.debug("Redirect %s to %s" % (old, tbl)) # hack to unbroke stuff if self.load_method == METH_MERGED: upd_list += ins_list ins_list = [] # check if interesting table curs = dst_db.cursor() if not skytools.exists_table(curs, tbl): self.log.warning("Ignoring events for table: %s" % tbl) return # fetch distribution fields dist_fields = find_dist_fields(curs, tbl) extra_fields = [] for fld in dist_fields: if fld not in cache.pkey_list: extra_fields.append(fld) self.log.debug("PKey fields: %s Extra fields: %s" % ( ",".join(cache.pkey_list), ",".join(extra_fields))) # create temp table temp = self.create_temp_table(curs, tbl) # where expr must have pkey and dist fields klist = [] for pk in cache.pkey_list + extra_fields: exp = "%s.%s = %s.%s" % (quote_fqident(tbl), quote_ident(pk), quote_fqident(temp), quote_ident(pk)) klist.append(exp) whe_expr = " and ".join(klist) # create del sql del_sql = "delete from only %s using %s where %s" % ( quote_fqident(tbl), quote_fqident(temp), whe_expr) # create update sql slist = [] key_fields = cache.pkey_list + extra_fields for col in cache.col_list: if col not in key_fields: exp = "%s = %s.%s" % (quote_ident(col), quote_fqident(temp), quote_ident(col)) slist.append(exp) upd_sql = "update only %s set %s from %s where %s" % ( quote_fqident(tbl), ", ".join(slist), quote_fqident(temp), whe_expr) # insert sql colstr = ",".join([quote_ident(c) for c in cache.col_list]) ins_sql = "insert into %s (%s) select %s from %s" % ( quote_fqident(tbl), colstr, colstr, quote_fqident(temp)) # process deleted rows if len(del_list) > 0: self.log.info("Deleting %d rows from %s" % (len(del_list), tbl)) # delete old rows q = "truncate %s" % quote_fqident(temp) self.log.debug(q) curs.execute(q) # copy rows self.log.debug("COPY %d rows into %s" % (len(del_list), temp)) skytools.magic_insert(curs, temp, del_list, col_list) # delete rows self.log.debug(del_sql) curs.execute(del_sql) self.log.debug("%s - %d" % (curs.statusmessage, curs.rowcount)) self.log.debug(curs.statusmessage) if len(del_list) != curs.rowcount: self.log.warning("Delete mismatch: expected=%s updated=%d" % (len(del_list), curs.rowcount)) # process updated rows if len(upd_list) > 0: self.log.info("Updating %d rows in %s" % (len(upd_list), tbl)) # delete old rows q = "truncate %s" % quote_fqident(temp) self.log.debug(q) curs.execute(q) # copy rows self.log.debug("COPY %d rows into %s" % (len(upd_list), temp)) skytools.magic_insert(curs, temp, upd_list, col_list) if self.load_method == METH_CORRECT: # update main table self.log.debug(upd_sql) curs.execute(upd_sql) self.log.debug(curs.statusmessage) # check count if len(upd_list) != curs.rowcount: self.log.warning("Update mismatch: expected=%s updated=%d" % (len(upd_list), curs.rowcount)) else: # delete from main table self.log.debug(del_sql) curs.execute(del_sql) self.log.debug(curs.statusmessage) # check count if real_update_count != curs.rowcount: self.log.warning("Update mismatch: expected=%s deleted=%d" % (real_update_count, curs.rowcount)) # insert into main table if 0: # does not work due bizgres bug self.log.debug(ins_sql) curs.execute(ins_sql) self.log.debug(curs.statusmessage) else: # copy again, into main table self.log.debug("COPY %d rows into %s" % (len(upd_list), tbl)) skytools.magic_insert(curs, tbl, upd_list, col_list) # process new rows if len(ins_list) > 0: self.log.info("Inserting %d rows into %s" % (len(ins_list), tbl)) skytools.magic_insert(curs, tbl, ins_list, col_list) # delete remaining rows if USE_LONGLIVED_TEMP_TABLES: q = "truncate %s" % quote_fqident(temp) else: # fscking problems with long-lived temp tables q = "drop table %s" % quote_fqident(temp) self.log.debug(q) curs.execute(q)
def bulk_insert_events(curs, rows, fields, queue_name): q = "select pgq.current_event_table(%s)" curs.execute(q, [queue_name]) tbl = curs.fetchone()[0] db_fields = map(_fldmap.get, fields) skytools.magic_insert(curs, tbl, rows, db_fields)
def flush(self, curs): ins_list, upd_list, del_list = self.prepare_data() # reorder cols col_list = self.pkey_list[:] for k in self.col_list: if k not in self.pkey_list: col_list.append(k) real_update_count = len(upd_list) #self.log.debug("process_one_table: %s (I/U/D = %d/%d/%d)", # tbl, len(ins_list), len(upd_list), len(del_list)) # hack to unbroke stuff if LOAD_METHOD == METH_MERGED: upd_list += ins_list ins_list = [] # fetch distribution fields if self.dist_fields is None: self.dist_fields = self.find_dist_fields(curs) key_fields = self.pkey_list[:] for fld in self.dist_fields: if fld not in key_fields: key_fields.append(fld) #self.log.debug("PKey fields: %s Extra fields: %s", # ",".join(cache.pkey_list), ",".join(extra_fields)) # create temp table temp = self.create_temp_table(curs) tbl = self.table_name # where expr must have pkey and dist fields klist = [] for pk in key_fields: exp = "%s.%s = %s.%s" % (quote_fqident(tbl), quote_ident(pk), quote_fqident(temp), quote_ident(pk)) klist.append(exp) whe_expr = " and ".join(klist) # create del sql del_sql = "delete from only %s using %s where %s" % ( quote_fqident(tbl), quote_fqident(temp), whe_expr) # create update sql slist = [] for col in col_list: if col not in key_fields: exp = "%s = %s.%s" % (quote_ident(col), quote_fqident(temp), quote_ident(col)) slist.append(exp) upd_sql = "update only %s set %s from %s where %s" % ( quote_fqident(tbl), ", ".join(slist), quote_fqident(temp), whe_expr) # insert sql colstr = ",".join([quote_ident(c) for c in col_list]) ins_sql = "insert into %s (%s) select %s from %s" % ( quote_fqident(tbl), colstr, colstr, quote_fqident(temp)) temp_used = False # process deleted rows if len(del_list) > 0: #self.log.info("Deleting %d rows from %s", len(del_list), tbl) # delete old rows q = "truncate %s" % quote_fqident(temp) self.log.debug(q) curs.execute(q) # copy rows self.log.debug("COPY %d rows into %s", len(del_list), temp) skytools.magic_insert(curs, temp, del_list, col_list) # delete rows self.log.debug(del_sql) curs.execute(del_sql) self.log.debug("%s - %d", curs.statusmessage, curs.rowcount) if len(del_list) != curs.rowcount: self.log.warning("Delete mismatch: expected=%d deleted=%d", len(del_list), curs.rowcount) temp_used = True # process updated rows if len(upd_list) > 0: #self.log.info("Updating %d rows in %s", len(upd_list), tbl) # delete old rows q = "truncate %s" % quote_fqident(temp) self.log.debug(q) curs.execute(q) # copy rows self.log.debug("COPY %d rows into %s", len(upd_list), temp) skytools.magic_insert(curs, temp, upd_list, col_list) temp_used = True if LOAD_METHOD == METH_CORRECT: # update main table self.log.debug(upd_sql) curs.execute(upd_sql) self.log.debug("%s - %d", curs.statusmessage, curs.rowcount) # check count if len(upd_list) != curs.rowcount: self.log.warning("Update mismatch: expected=%d updated=%d", len(upd_list), curs.rowcount) else: # delete from main table self.log.debug(del_sql) curs.execute(del_sql) self.log.debug(curs.statusmessage) # check count if real_update_count != curs.rowcount: self.log.warning("Update mismatch: expected=%d deleted=%d", real_update_count, curs.rowcount) # insert into main table if AVOID_BIZGRES_BUG: # copy again, into main table self.log.debug("COPY %d rows into %s", len(upd_list), tbl) skytools.magic_insert(curs, tbl, upd_list, col_list) else: # better way, but does not work due bizgres bug self.log.debug(ins_sql) curs.execute(ins_sql) self.log.debug(curs.statusmessage) # process new rows if len(ins_list) > 0: self.log.info("Inserting %d rows into %s", len(ins_list), tbl) skytools.magic_insert(curs, tbl, ins_list, col_list) # delete remaining rows if temp_used: if USE_LONGLIVED_TEMP_TABLES: q = "truncate %s" % quote_fqident(temp) else: # fscking problems with long-lived temp tables q = "drop table %s" % quote_fqident(temp) self.log.debug(q) curs.execute(q) self.reset()