def process_sync(self, t1, t2, src_db, dst_db): """Actual comparison.""" src_tbl = t1.dest_table dst_tbl = t2.dest_table src_curs = src_db.cursor() dst_curs = dst_db.cursor() dst_where = t2.plugin.get_copy_condition(src_curs, dst_curs) src_where = dst_where self.log.info('Counting %s' % dst_tbl) # get common cols cols = self.calc_cols(src_curs, src_tbl, dst_curs, dst_tbl) # get sane query v1 = src_db.server_version v2 = dst_db.server_version if v1 < 80300 or v2 < 80300: # 8.2- does not have record to text and text to bit casts, so we need to use a bit of evil hackery q = "select count(1) as cnt, sum(bit_in(textout('x'||substr(md5(textin(record_out(_COLS_))),1,16)), 0, 64)::bigint) as chksum from only _TABLE_" elif (v1 < 80400 or v2 < 80400) and v1 != v2: # hashtext changed in 8.4 so we need to use md5 in case there is 8.3 vs 8.4+ comparison q = "select count(1) as cnt, sum(('x'||substr(md5(_COLS_::text),1,16))::bit(64)::bigint) as chksum from only _TABLE_" else: # this way is much faster than the above q = "select count(1) as cnt, sum(hashtext(_COLS_::text)::bigint) as chksum from only _TABLE_" q = self.cf.get('compare_sql', q) q = q.replace("_COLS_", cols) src_q = q.replace('_TABLE_', skytools.quote_fqident(src_tbl)) if src_where: src_q = src_q + " WHERE " + src_where dst_q = q.replace('_TABLE_', skytools.quote_fqident(dst_tbl)) if dst_where: dst_q = dst_q + " WHERE " + dst_where f = "%(cnt)d rows, checksum=%(chksum)s" f = self.cf.get('compare_fmt', f) self.log.debug("srcdb: " + src_q) src_curs.execute(src_q) src_row = src_curs.fetchone() src_str = f % src_row self.log.info("srcdb: %s" % src_str) src_db.commit() self.log.debug("dstdb: " + dst_q) dst_curs.execute(dst_q) dst_row = dst_curs.fetchone() dst_str = f % dst_row self.log.info("dstdb: %s" % dst_str) dst_db.commit() if src_str != dst_str: self.log.warning("%s: Results do not match!" % dst_tbl) return 1 return 0
def check_tables(self, dcon, tables): """Checks that tables needed for copy are there. If not then creates them. Used by other procedures to ensure that table is there before they start inserting. The commits should not be dangerous, as we haven't done anything with cdr's yet, so they should still be in one TX. Although it would be nicer to have a lock for table creation. """ dcur = dcon.cursor() exist_map = {} for tbl, inf in tables.items(): if skytools.exists_table(dcur, tbl): continue sql = self.part_template sql = sql.replace('_DEST_TABLE', skytools.quote_fqident(inf['table'])) sql = sql.replace('_PARENT', skytools.quote_fqident(inf['parent'])) sql = sql.replace('_PKEY', inf['key_list']) # be similar to table_dispatcher schema_table = inf['table'].replace(".", "__") sql = sql.replace('_SCHEMA_TABLE', skytools.quote_ident(schema_table)) dcur.execute(sql) dcon.commit() self.log.info('%s: Created table %s' % (self.job_name, tbl))
def create_temp_table(self, curs): if USE_REAL_TABLE: tempname = self.table_name + "_loadertmpx" else: # create temp table for loading tempname = self.table_name.replace('.', '_') + "_loadertmp" # check if exists if USE_REAL_TABLE: if skytools.exists_table(curs, tempname): self.log.debug("bulk: Using existing real table %s" % tempname) return tempname, quote_fqident(tempname) # create non-temp table q = "create table %s (like %s)" % ( quote_fqident(tempname), quote_fqident(self.table_name)) self.log.debug("bulk: Creating real table: %s" % q) curs.execute(q) return tempname, quote_fqident(tempname) elif USE_LONGLIVED_TEMP_TABLES: if skytools.exists_temp_table(curs, tempname): self.log.debug("bulk: Using existing temp table %s" % tempname) return tempname, quote_ident(tempname) # bizgres crashes on delete rows # removed arg = "on commit delete rows" arg = "on commit preserve rows" # create temp table for loading q = "create temp table %s (like %s) %s" % ( quote_ident(tempname), quote_fqident(self.table_name), arg) self.log.debug("bulk: Creating temp table: %s" % q) curs.execute(q) return tempname, quote_ident(tempname)
def process_sync(self, src_tbl, dst_tbl, src_db, dst_db): """Actual comparision.""" src_curs = src_db.cursor() dst_curs = dst_db.cursor() self.log.info('Counting %s' % dst_tbl) q = "select count(1) as cnt, sum(hashtext(t.*::text)) as chksum from only _TABLE_ t" q = self.cf.get('compare_sql', q) src_q = q.replace('_TABLE_', skytools.quote_fqident(src_tbl)) dst_q = q.replace('_TABLE_', skytools.quote_fqident(dst_tbl)) f = "%(cnt)d rows, checksum=%(chksum)s" f = self.cf.get('compare_fmt', f) self.log.debug("srcdb: " + src_q) src_curs.execute(src_q) src_row = src_curs.fetchone() src_str = f % src_row self.log.info("srcdb: %s" % src_str) src_db.commit() self.log.debug("dstdb: " + dst_q) dst_curs.execute(dst_q) dst_row = dst_curs.fetchone() dst_str = f % dst_row self.log.info("dstdb: %s" % dst_str) dst_db.commit() if src_str != dst_str: self.log.warning("%s: Results do not match!" % dst_tbl)
def create_temp_table(self, curs): if USE_REAL_TABLE: tempname = self.dest_table + "_loadertmpx" else: # create temp table for loading tempname = self.dest_table.replace('.', '_') + "_loadertmp" # check if exists if USE_REAL_TABLE: if skytools.exists_table(curs, tempname): self.log.debug("bulk: Using existing real table %s", tempname) return tempname, quote_fqident(tempname) # create non-temp table q = "create table %s (like %s)" % (quote_fqident(tempname), quote_fqident(self.dest_table)) self.log.debug("bulk: Creating real table: %s", q) curs.execute(q) return tempname, quote_fqident(tempname) elif USE_LONGLIVED_TEMP_TABLES: if skytools.exists_temp_table(curs, tempname): self.log.debug("bulk: Using existing temp table %s", tempname) return tempname, quote_ident(tempname) # bizgres crashes on delete rows # removed arg = "on commit delete rows" arg = "on commit preserve rows" # create temp table for loading q = "create temp table %s (like %s) %s" % ( quote_ident(tempname), quote_fqident(self.dest_table), arg) self.log.debug("bulk: Creating temp table: %s", q) curs.execute(q) return tempname, quote_ident(tempname)
def __init__(self, table_name, args, dest_table): self.table_name = table_name self.dest_table = dest_table or table_name self.fq_table_name = skytools.quote_fqident(self.table_name) self.fq_dest_table = skytools.quote_fqident(self.dest_table) self.args = args self._check_args(args)
def check_tables(self, dcon, tables): """Checks that tables needed for copy are there. If not then creates them. Used by other procedures to ensure that table is there before they start inserting. The commits should not be dangerous, as we haven't done anything with cdr's yet, so they should still be in one TX. Although it would be nicer to have a lock for table creation. """ dcur = dcon.cursor() for tbl, inf in tables.items(): if skytools.exists_table(dcur, tbl): continue sql = self.part_template sql = sql.replace('_DEST_TABLE', skytools.quote_fqident(inf['table'])) sql = sql.replace('_PARENT', skytools.quote_fqident(inf['parent'])) sql = sql.replace('_PKEY', inf['key_list']) # be similar to table_dispatcher schema_table = inf['table'].replace(".", "__") sql = sql.replace('_SCHEMA_TABLE', skytools.quote_ident(schema_table)) dcur.execute(sql) dcon.commit() self.log.info('%s: Created table %s' % (self.job_name, tbl))
def __init__(self, table_name, args, dest_table): self.table_name = table_name self.dest_table = dest_table or table_name self.fq_table_name = skytools.quote_fqident(self.table_name) self.fq_dest_table = skytools.quote_fqident(self.dest_table) self.args = args self._check_args(args) self.conf = self.get_config()
def process_sync(self, t1, t2, src_db, dst_db): """Actual comparision.""" src_tbl = t1.dest_table dst_tbl = t2.dest_table src_curs = src_db.cursor() dst_curs = dst_db.cursor() dst_where = t2.plugin.get_copy_condition(src_curs, dst_curs) src_where = dst_where self.log.info('Counting %s' % dst_tbl) # get common cols cols = self.calc_cols(src_curs, src_tbl, dst_curs, dst_tbl) # get sane query v1 = src_db.server_version v2 = dst_db.server_version if (v1 < 80400 or v2 < 80400) and v1 != v2: q = "select count(1) as cnt, sum(('x'||substr(md5(_COLS_::text),1,16))::bit(64)::bigint) as chksum from only _TABLE_" else: q = "select count(1) as cnt, sum(hashtext(_COLS_::text)::bigint) as chksum from only _TABLE_" q = self.cf.get('compare_sql', q) q = q.replace("_COLS_", cols) src_q = q.replace('_TABLE_', skytools.quote_fqident(src_tbl)) if src_where: src_q = src_q + " WHERE " + src_where dst_q = q.replace('_TABLE_', skytools.quote_fqident(dst_tbl)) if dst_where: dst_q = dst_q + " WHERE " + dst_where f = "%(cnt)d rows, checksum=%(chksum)s" f = self.cf.get('compare_fmt', f) self.log.debug("srcdb: " + src_q) src_curs.execute(src_q) src_row = src_curs.fetchone() src_str = f % src_row self.log.info("srcdb: %s" % src_str) src_db.commit() self.log.debug("dstdb: " + dst_q) dst_curs.execute(dst_q) dst_row = dst_curs.fetchone() dst_str = f % dst_row self.log.info("dstdb: %s" % dst_str) dst_db.commit() if src_str != dst_str: self.log.warning("%s: Results do not match!" % dst_tbl) return 1 return 0
def __init__(self, table, pkeys, log, conf): BaseBulkCollectingLoader.__init__(self, table, pkeys, log, conf) # temp table name self.temp = self.table.replace('.', '_') + "_loadertmp" # quoted table names self.qtable = quote_fqident(self.table) self.qtemp = quote_fqident(self.temp) # all fields self.fields = None # key fields used in where part, possible to add non pk fields # (like dist keys in gp) self.keys = self.pkeys[:]
def get_create_sql(self, curs, new_seq_name=None): """Generate creation SQL.""" # we are in table def, forget full def if self.owner: sql = "ALTER SEQUENCE %s\n OWNED BY %s;" % (quote_fqident(self.name), self.owner) return sql name = self.name if new_seq_name: name = new_seq_name sql = "CREATE SEQUENCE %s %s;" % (quote_fqident(name), self.defn) return sql
def check_part(self, curs, dst, pkey_list): if skytools.exists_table(curs, dst): return if not self.split_part_template: raise UsageError('Partition %s does not exist and split_part_template not specified' % dst) vals = { 'dest': quote_fqident(dst), 'part': quote_fqident(dst), 'parent': quote_fqident(self.table_name), 'pkey': ",".join(pkey_list), # quoting? } sql = self.split_part_template % vals curs.execute(sql)
def get_create_sql(self, curs, new_seq_name=None): """Generate creation SQL.""" # we are in table def, forget full def if self.owner: sql = "ALTER SEQUENCE %s\n OWNED BY %s;" % (quote_fqident( self.name), self.owner) return sql name = self.name if new_seq_name: name = new_seq_name sql = 'CREATE SEQUENCE %s %s;' % (quote_fqident(name), self.defn) return sql
def retval(self, service_name = None, params = None, **kvargs): """ Return collected resultsets and append to the end messages to the users Method is called usually as last statment in dbservice to return the results Also converts results into desired format """ params = params or kvargs self.raise_if_errors() if len( self.messages ): self.return_next( self.messages, "_status" ) if self.sqls is not None and len( self.sqls ): self.return_next( self.sqls, "_sql" ) results = [] for r in self._retval: res_name = r[0] rows = r[1] res_count = str(len(rows)) if self._is_test and len(rows) > 0: results.append([res_name, res_count, res_name]) n = 1 for trow in render_table(rows, rows[0].keys()): results.append([res_name, n, trow]) n += 1 else: res_rows = make_record_array(rows) results.append([res_name, res_count, res_rows]) if service_name: sql = "select * from %s( {i_context}, {i_params} );" % skytools.quote_fqident(service_name) par = dbdict( i_context = self._context, i_params = make_record(params) ) res = self.run_query( sql, par ) for r in res: results.append((r.res_code, r.res_text, r.res_rows)) return results
def lock_table_root(self, lock_db, setup_db, dst_db, src_tbl, dst_tbl): setup_curs = setup_db.cursor() lock_curs = lock_db.cursor() # lock table in separate connection self.log.info('Locking %s' % src_tbl) lock_db.commit() self.set_lock_timeout(lock_curs) lock_time = time.time() lock_curs.execute("LOCK TABLE %s IN SHARE MODE" % skytools.quote_fqident(src_tbl)) # now wait until consumer has updated target table until locking self.log.info('Syncing %s' % dst_tbl) # consumer must get futher than this tick tick_id = self.force_tick(setup_curs) # try to force second tick also self.force_tick(setup_curs) # now wait while 1: time.sleep(0.5) q = "select * from pgq_node.get_node_info(%s)" res = self.exec_cmd(dst_db, q, [self.queue_name]) last_tick = res[0]['worker_last_tick'] if last_tick > tick_id: break # limit lock time if time.time() > lock_time + self.lock_timeout and not self.options.force: self.log.error('Consumer lagging too much, exiting') lock_db.rollback() sys.exit(1)
def __init__(self, rowhandler, table_name, table_mode, cf, log): self.part_map = {} self.rowhandler = rowhandler self.table_name = table_name self.quoted_name = quote_fqident(table_name) self.log = log if table_mode == 'direct': self.split = False elif table_mode == 'split': self.split = True smode = cf.get('split_mode', 'by-batch-time') sfield = None if smode.find(':') > 0: smode, sfield = smode.split(':', 1) self.split_field = sfield self.split_part = cf.get('split_part', '%(table_name)s_%(year)s_%(month)s_%(day)s') self.split_part_template = cf.get('split_part_template', '') if smode == 'by-batch-time': self.split_format = self.split_date_from_batch elif smode == 'by-event-time': self.split_format = self.split_date_from_event elif smode == 'by-date-field': self.split_format = self.split_date_from_field else: raise UsageError('Bad value for split_mode: '+smode) self.log.debug("%s: split_mode=%s, split_field=%s, split_part=%s", self.table_name, smode, self.split_field, self.split_part) elif table_mode == 'ignore': pass else: raise UsageError('Bad value for table_mode: '+table_mode)
def gen_copy_tbl(self, tbl, src_curs, dst_curs): """Create COPY expession from common fields.""" self.pkey_list = get_pkey_list(src_curs, tbl) dst_pkey = get_pkey_list(dst_curs, tbl) if dst_pkey != self.pkey_list: self.log.error('pkeys do not match') sys.exit(1) src_cols = get_column_list(src_curs, tbl) dst_cols = get_column_list(dst_curs, tbl) field_list = [] for f in self.pkey_list: field_list.append(f) for f in src_cols: if f in self.pkey_list: continue if f in dst_cols: field_list.append(f) self.common_fields = field_list fqlist = [skytools.quote_ident(col) for col in field_list] tbl_expr = "%s (%s)" % (skytools.quote_fqident(tbl), ",".join(fqlist)) self.log.debug("using copy expr: %s" % tbl_expr) return tbl_expr
def get_create_sql(self, curs, new_table_name=None): """Generate creation SQL.""" if not new_table_name: sql = self.defn table = self.table_name else: idrx = r'''([a-z0-9._]+|"([^"]+|"")+")+''' # fixme: broken / quoting rx = r"\bTO[ ]" + idrx rc = re.compile(rx, re.X) m = rc.search(self.defn) if not m: raise Exception('Cannot find table name in rule') old_tbl = m.group(1) new_tbl = quote_fqident(new_table_name) sql = self.defn.replace(old_tbl, new_tbl) table = new_table_name if self.enabled != 'O': # O - rule fires in origin and local modes # D - rule is disabled # R - rule fires in replica mode # A - rule fires always action = {'R': 'ENABLE REPLICA', 'A': 'ENABLE ALWAYS', 'D': 'DISABLE'}[self.enabled] sql += ('\nALTER TABLE %s %s RULE %s;' % (table, action, self.name)) return sql
def get_create_sql(self, curs, new_table_name=None): """Generate creation SQL.""" if new_table_name: # fixme: seems broken iname = find_new_name(curs, self.name) tname = new_table_name pnew = "INDEX %s ON %s " % (quote_ident(iname), quote_fqident(tname)) rx = r"\bINDEX[ ][a-z0-9._]+[ ]ON[ ][a-z0-9._]+[ ]" sql = rx_replace(rx, self.defn, pnew) else: sql = self.defn iname = self.local_name tname = self.table_name if self.is_clustered: sql += " ALTER TABLE ONLY %s\n CLUSTER ON %s;" % (quote_fqident(tname), quote_ident(iname)) return sql
def check_tables(self, dcon, tables): """Checks that tables needed for copy are there. If not then creates them. Used by other procedures to ensure that table is there before they start inserting. The commits should not be dangerous, as we haven't done anything with cdr's yet, so they should still be in one TX. Although it would be nicer to have a lock for table creation. """ dcur = dcon.cursor() for tbl in tables.keys(): if not skytools.exists_table(dcur, tbl): if not self.part_template: raise Exception( 'Dest table does not exists and no way to create it.') sql = self.part_template sql = sql.replace(DEST_TABLE, skytools.quote_fqident(tbl)) # we do this to make sure that constraints for # tables who contain a schema will still work schema_table = tbl.replace(".", "__") sql = sql.replace(SCHEMA_TABLE, skytools.quote_ident(schema_table)) dcur.execute(sql) dcon.commit() self.log.info('%s: Created table %s' % (self.job_name, tbl))
def mk_insert_sql(row, tbl, pkey_list=None, field_map=None): """Generate INSERT statement from dict data. >>> from collections import OrderedDict >>> row = OrderedDict([('id',1), ('data', None)]) >>> mk_insert_sql(row, 'tbl') "insert into public.tbl (id, data) values ('1', null);" >>> mk_insert_sql(row, 'tbl', ['x'], OrderedDict([('id', 'id_'), ('data', 'data_')])) "insert into public.tbl (id_, data_) values ('1', null);" """ col_list = [] val_list = [] if field_map: for src, dst in field_map.items(): col_list.append(skytools.quote_ident(dst)) val_list.append(skytools.quote_literal(row[src])) else: for c, v in row.items(): col_list.append(skytools.quote_ident(c)) val_list.append(skytools.quote_literal(v)) col_str = ", ".join(col_list) val_str = ", ".join(val_list) return "insert into %s (%s) values (%s);" % ( skytools.quote_fqident(tbl), col_str, val_str)
def process_sync(self, tbl, src_db, dst_db): """Actual comparision.""" src_curs = src_db.cursor() dst_curs = dst_db.cursor() self.log.info('Counting %s' % tbl) q = "select count(1) as cnt, sum(hashtext(t.*::text)) as chksum from only _TABLE_ t" q = self.cf.get('compare_sql', q) q = q.replace('_TABLE_', skytools.quote_fqident(tbl)) f = "%(cnt)d rows, checksum=%(chksum)s" f = self.cf.get('compare_fmt', f) self.log.debug("srcdb: " + q) src_curs.execute(q) src_row = src_curs.fetchone() src_str = f % src_row self.log.info("srcdb: %s" % src_str) self.log.debug("dstdb: " + q) dst_curs.execute(q) dst_row = dst_curs.fetchone() dst_str = f % dst_row self.log.info("dstdb: %s" % dst_str) if src_str != dst_str: self.log.warning("%s: Results do not match!" % tbl)
def mk_update_sql(row, tbl, pkey_list, field_map=None): r"""Generate UPDATE statement from dict data. >>> mk_update_sql({'id': 0, 'id2': '2', 'data': 'str\\'}, 'Table', ['id', 'id2']) 'update only public."Table" set data = E\'str\\\\\' where id = \'0\' and id2 = \'2\';' """ if len(pkey_list) < 1: raise Exception("update needs pkeys") set_list = [] whe_list = [] pkmap = {} for k in pkey_list: pkmap[k] = 1 new_k = field_map and field_map[k] or k col = skytools.quote_ident(new_k) val = skytools.quote_literal(row[k]) whe_list.append("%s = %s" % (col, val)) if field_map: for src, dst in field_map.iteritems(): if src not in pkmap: col = skytools.quote_ident(dst) val = skytools.quote_literal(row[src]) set_list.append("%s = %s" % (col, val)) else: for col, val in row.iteritems(): if col not in pkmap: col = skytools.quote_ident(col) val = skytools.quote_literal(val) set_list.append("%s = %s" % (col, val)) return "update only %s set %s where %s;" % (skytools.quote_fqident(tbl), ", ".join(set_list), " and ".join(whe_list))
def get_create_sql(self, curs, new_table_name=None): """Generate creation SQL.""" if not new_table_name: sql = self.defn table = self.table_name else: idrx = r'''([a-z0-9._]+|"([^"]+|"")+")+''' # fixme: broken / quoting rx = r"\bTO[ ]" + idrx rc = re.compile(rx, re.X) m = rc.search(self.defn) if not m: raise Exception('Cannot find table name in rule') old_tbl = m.group(1) new_tbl = quote_fqident(new_table_name) sql = self.defn.replace(old_tbl, new_tbl) table = new_table_name if self.enabled != 'O': # O - rule fires in origin and local modes # D - rule is disabled # R - rule fires in replica mode # A - rule fires always action = { 'R': 'ENABLE REPLICA', 'A': 'ENABLE ALWAYS', 'D': 'DISABLE' }[self.enabled] sql += ('\nALTER TABLE %s %s RULE %s;' % (table, action, self.name)) return sql
def mk_update_sql(row, tbl, pkey_list, field_map = None): r"""Generate UPDATE statement from dict data. >>> mk_update_sql({'id': 0, 'id2': '2', 'data': 'str\\'}, 'Table', ['id', 'id2']) 'update only public."Table" set data = E\'str\\\\\' where id = \'0\' and id2 = \'2\';' """ if len(pkey_list) < 1: raise Exception("update needs pkeys") set_list = [] whe_list = [] pkmap = {} for k in pkey_list: pkmap[k] = 1 new_k = field_map and field_map[k] or k col = skytools.quote_ident(new_k) val = skytools.quote_literal(row[k]) whe_list.append("%s = %s" % (col, val)) if field_map: for src, dst in field_map.iteritems(): if src not in pkmap: col = skytools.quote_ident(dst) val = skytools.quote_literal(row[src]) set_list.append("%s = %s" % (col, val)) else: for col, val in row.iteritems(): if col not in pkmap: col = skytools.quote_ident(col) val = skytools.quote_literal(val) set_list.append("%s = %s" % (col, val)) return "update only %s set %s where %s;" % (skytools.quote_fqident(tbl), ", ".join(set_list), " and ".join(whe_list))
def get_drop_sql(self, curs): sql_list = [] for user, ___acl, ___who in self.acl_list: sql = "REVOKE ALL FROM %s ON %s;" % (quote_ident(user), quote_fqident(self.name)) sql_list.append(sql) return "\n".join(sql_list)
def __init__(self, table, pkeys, log, conf): super(BaseBulkTempLoader, self).__init__(table, pkeys, log, conf) # temp table name if USE_REAL_TABLE: self.temp = self.table + "_loadertmpx" self.qtemp = quote_fqident(self.temp) else: self.temp = self.table.replace('.', '_') + "_loadertmp" self.qtemp = quote_ident(self.temp) # quoted table name self.qtable = quote_fqident(self.table) # all fields self.fields = None # key fields used in where part, possible to add non pk fields # (like dist keys in gp) self.keys = self.pkeys[:]
def got_missed_delete(self, tbl, dst_row): self.cnt_delete += 1 whe_list = [] for f in self.pkey_list: self.addcmp(whe_list, skytools.quote_ident(f), unescape(dst_row[f])) q = "delete from only %s where %s;" % (skytools.quote_fqident(tbl), " and ".join(whe_list)) self.show_fix(tbl, q, 'delete')
def build_statement(table, cols): qtable = skytools.quote_fqident(table) if cols: qfields = build_qfields(cols) return "%s (%s)" % (qtable, qfields) else: return qtable
def __init__(self, rowhandler, table_name, table_mode, cf, log): self.part_map = {} self.rowhandler = rowhandler self.table_name = table_name self.quoted_name = quote_fqident(table_name) self.log = log if table_mode == 'direct': self.split = False elif table_mode == 'split': self.split = True smode = cf.get('split_mode', 'by-batch-time') sfield = None if smode.find(':') > 0: smode, sfield = smode.split(':', 1) self.split_field = sfield self.split_part = cf.get( 'split_part', '%(table_name)s_%(year)s_%(month)s_%(day)s') self.split_part_template = cf.get('split_part_template', '') if smode == 'by-batch-time': self.split_format = self.split_date_from_batch elif smode == 'by-event-time': self.split_format = self.split_date_from_event elif smode == 'by-date-field': self.split_format = self.split_date_from_field else: raise UsageError('Bad value for split_mode: ' + smode) self.log.debug( "%s: split_mode=%s, split_field=%s, split_part=%s" % (self.table_name, smode, self.split_field, self.split_part)) elif table_mode == 'ignore': pass else: raise UsageError('Bad value for table_mode: ' + table_mode)
def handle_truncate_event(self, ev, dst_curs): """handle one truncate event""" t = self.get_table_by_name(ev.extra1) if not t or not t.interesting(ev, self.cur_tick, self.copy_thread, self.copy_table_name): self.stat_increase('ignored_events') return fqname = skytools.quote_fqident(t.dest_table) try: p = self.used_plugins[ev.extra1] except KeyError: p = t.get_plugin() self.used_plugins[ev.extra1] = p if p.conf.get('ignore_truncate'): self.log.info("ignoring truncate for %s", fqname) return # # Always use CASCADE, because without it the # operation cannot work with FKeys, on both # slave and master. # sql = "TRUNCATE %s CASCADE;" % fqname self.flush_sql(dst_curs) dst_curs.execute(sql)
def magic_insert(curs, tablename, data, fields = None, use_insert = 0, quoted_table = False): r"""Copy/insert a list of dict/list data to database. If curs == None, then the copy or insert statements are returned as string. For list of dict the field list is optional, as its possible to guess them from dict keys. Example: >>> magic_insert(None, 'tbl', [[1, '1'], [2, '2']], ['col1', 'col2']) 'COPY public.tbl (col1,col2) FROM STDIN;\n1\t1\n2\t2\n\\.\n' """ if len(data) == 0: return # decide how to process if hasattr(data[0], 'keys'): if fields == None: fields = data[0].keys() if use_insert: row_func = _gen_dict_insert else: row_func = _gen_dict_copy else: if fields == None: raise Exception("Non-dict data needs field list") if use_insert: row_func = _gen_list_insert else: row_func = _gen_list_copy qfields = [skytools.quote_ident(f) for f in fields] if quoted_table: qtablename = tablename else: qtablename = skytools.quote_fqident(tablename) # init processing buf = StringIO() if curs == None and use_insert == 0: fmt = "COPY %s (%s) FROM STDIN;\n" buf.write(fmt % (qtablename, ",".join(qfields))) # process data for row in data: buf.write(row_func(qtablename, row, fields, qfields)) buf.write("\n") # if user needs only string, return it if curs == None: if use_insert == 0: buf.write("\\.\n") return buf.getvalue() # do the actual copy/inserts if use_insert: curs.execute(buf.getvalue()) else: buf.seek(0) hdr = "%s (%s)" % (qtablename, ",".join(qfields)) curs.copy_from(buf, hdr)
def process_sync(self, tbl, src_db, dst_db): """Actual comparision.""" src_curs = src_db.cursor() dst_curs = dst_db.cursor() self.log.info('Counting %s' % tbl) q = "select count(1) from only _TABLE_" q = self.cf.get('compare_sql', q) q = q.replace('_TABLE_', skytools.quote_fqident(tbl)) self.log.debug("srcdb: " + q) src_curs.execute(q) src_row = src_curs.fetchone() src_str = ", ".join(map(str, src_row)) self.log.info("srcdb: res = %s" % src_str) self.log.debug("dstdb: " + q) dst_curs.execute(q) dst_row = dst_curs.fetchone() dst_str = ", ".join(map(str, dst_row)) self.log.info("dstdb: res = %s" % dst_str) if src_str != dst_str: self.log.warning("%s: Results do not match!" % tbl)
def retval(self, service_name=None, params=None, **kvargs): """ Return collected resultsets and append to the end messages to the users Method is called usually as last statment in dbservice to return the results Also converts results into desired format """ params = params or kvargs self.raise_if_errors() if len(self.messages): self.return_next(self.messages, "_status") if self.sqls is not None and len(self.sqls): self.return_next(self.sqls, "_sql") results = [] for r in self._retval: res_name = r[0] rows = r[1] res_count = str(len(rows)) if self._is_test and len(rows) > 0: results.append([res_name, res_count, res_name]) n = 1 for trow in render_table(rows, rows[0].keys()): results.append([res_name, n, trow]) n += 1 else: res_rows = make_record_array(rows) results.append([res_name, res_count, res_rows]) if service_name: sql = "select * from %s( {i_context}, {i_params} );" % skytools.quote_fqident( service_name) par = dbdict(i_context=self._context, i_params=make_record(params)) res = self.run_query(sql, par) for r in res: results.append((r.res_code, r.res_text, r.res_rows)) return results
def process_request(self, cmsg): msg = cmsg.get_payload(self.xtx) if not msg: return curs = self.db.cursor() func = msg.function args = msg.get('params', []) if isinstance(args, StringType): args = cc.json.loads(args) assert isinstance(args, (DictType, ListType, TupleType)) if len(self.func_list) == 1 and self.func_list[0] == '*': pass elif func in self.func_list: pass else: self.log.error('Function call not allowed: %r', func) return None q = "select %s (%%s)" % (skytools.quote_fqident(func), ) if isinstance(args, DictType): if not all([re.match("^[a-zA-Z0-9_]+$", k) for k in args.keys()]): self.log.error("Invalid DB function argument name in %r", args.keys()) return q %= (", ".join(["%s := %%(%s)s" % (k, k) for k in args.keys()]), ) else: q %= (", ".join(["%s" for a in args]), ) if self.log.isEnabledFor(skytools.skylog.TRACE): self.log.trace('Executing: %s', curs.mogrify(q, args)) else: self.log.debug('Executing: %s', q) curs.execute(q, args) rt = msg.get('return') if rt in (None, '', 'no'): return elif rt == 'all': rs = curs.fetchall() elif rt == 'one': rs = curs.fetchone() elif rt == 'json': rs = curs.fetchone() if rs: jsr = rs[0] else: jsr = '{}' rep = parse_json(jsr) if rt != 'json': rep = ReplyMessage(req="reply.%s" % msg.req, data=rs) if curs.rowcount >= 0: rep.rowcount = curs.rowcount if curs.statusmessage: rep.statusmessage = curs.statusmessage if msg.get('ident'): rep.ident = msg.get('ident') rcm = self.xtx.create_cmsg(rep) rcm.take_route(cmsg) rcm.send_to(self.master)
def mk_delete_sql(row, tbl, pkey_list, field_map=None): """Generate DELETE statement from dict data. >>> mk_delete_sql({'a': 1, 'b':2, 'c':3}, 'tablename', ['a','b']) "delete from only public.tablename where a = '1' and b = '2';" >>> mk_delete_sql({'a': 1, 'b':2, 'c':3}, 'tablename', ['a','b'], {'a': 'aa', 'b':'bb'}) "delete from only public.tablename where aa = '1' and bb = '2';" >>> mk_delete_sql({'a': 1, 'b':2, 'c':3}, 'tablename', []) Traceback (most recent call last): ... Exception: delete needs pkeys """ if len(pkey_list) < 1: raise Exception("delete needs pkeys") whe_list = [] for k in pkey_list: new_k = field_map and field_map[k] or k col = skytools.quote_ident(new_k) val = skytools.quote_literal(row[k]) whe_list.append("%s = %s" % (col, val)) whe_str = " and ".join(whe_list) return "delete from only %s where %s;" % (skytools.quote_fqident(tbl), whe_str)
def mk_insert_sql(row, tbl, pkey_list=None, field_map=None): """Generate INSERT statement from dict data. >>> from collections import OrderedDict >>> row = OrderedDict([('id',1), ('data', None)]) >>> mk_insert_sql(row, 'tbl') "insert into public.tbl (id, data) values ('1', null);" >>> mk_insert_sql(row, 'tbl', ['x'], OrderedDict([('id', 'id_'), ('data', 'data_')])) "insert into public.tbl (id_, data_) values ('1', null);" """ col_list = [] val_list = [] if field_map: for src, dst in field_map.items(): col_list.append(skytools.quote_ident(dst)) val_list.append(skytools.quote_literal(row[src])) else: for c, v in row.items(): col_list.append(skytools.quote_ident(c)) val_list.append(skytools.quote_literal(v)) col_str = ", ".join(col_list) val_str = ", ".join(val_list) return "insert into %s (%s) values (%s);" % (skytools.quote_fqident(tbl), col_str, val_str)
def mk_delete_sql(self, tbl, key_list, data): # generate delete command whe_list = [] for k in key_list: whe_list.append("%s = %s" % (skytools.quote_ident(k), skytools.quote_literal(data[k]))) whe_str = " and ".join(whe_list) return "delete from %s where %s;" % (skytools.quote_fqident(tbl), whe_str)
def check_tables(self, dcon, tables): """Checks that tables needed for copy are there. If not then creates them. Used by other procedures to ensure that table is there before they start inserting. The commits should not be dangerous, as we haven't done anything with cdr's yet, so they should still be in one TX. Although it would be nicer to have a lock for table creation. """ dcur = dcon.cursor() for tbl in tables.keys(): if not skytools.exists_table(dcur, tbl): if not self.part_template: raise Exception('Dest table does not exists and no way to create it.') sql = self.part_template sql = sql.replace(DEST_TABLE, skytools.quote_fqident(tbl)) # we do this to make sure that constraints for # tables who contain a schema will still work schema_table = tbl.replace(".", "__") sql = sql.replace(SCHEMA_TABLE, skytools.quote_ident(schema_table)) dcur.execute(sql) dcon.commit() self.log.info('%s: Created table %s' % (self.job_name, tbl))
def process_request(self, cmsg): msg = cmsg.get_payload(self.xtx) if not msg: return curs = self.get_cursor() func = msg.function args = msg.get("params", []) if isinstance(args, (StringType, unicode)): args = cc.json.loads(args) assert isinstance(args, (DictType, ListType, TupleType)) if len(self.func_list) == 1 and self.func_list[0] == "*": pass elif func in self.func_list: pass else: self.log.error("Function call not allowed: %r", func) return None q = "select %s (%%s)" % (skytools.quote_fqident(func),) if isinstance(args, DictType): if not all([re.match("^[a-zA-Z0-9_]+$", k) for k in args.keys()]): self.log.error("Invalid DB function argument name in %r", args.keys()) return q %= (", ".join(["%s := %%(%s)s" % (k, k) for k in args.keys()]),) else: q %= (", ".join(["%s" for a in args]),) if self.log.isEnabledFor(skytools.skylog.TRACE): self.log.trace("Executing: %s", curs.mogrify(q, args)) else: self.log.debug("Executing: %s", q) curs.execute(q, args) rt = msg.get("return") if rt in (None, "", "no"): return elif rt == "all": rs = curs.fetchall() elif rt == "one": rs = curs.fetchone() elif rt == "json": rs = curs.fetchone() if rs: jsr = rs[0] else: jsr = "{}" rep = parse_json(jsr) if rt != "json": rep = ReplyMessage(req="reply.%s" % msg.req, data=rs) if curs.rowcount >= 0: rep.rowcount = curs.rowcount if curs.statusmessage: rep.statusmessage = curs.statusmessage if msg.get("ident"): rep.ident = msg.get("ident") rcm = self.xtx.create_cmsg(rep) rcm.take_route(cmsg) rcm.send_to(self.master)
def get_create_sql(self, curs, new_table_name=None): """Generate creation SQL.""" if new_table_name: # fixme: seems broken iname = find_new_name(curs, self.name) tname = new_table_name pnew = "INDEX %s ON %s " % (quote_ident(iname), quote_fqident(tname)) rx = r"\bINDEX[ ][a-z0-9._]+[ ]ON[ ][a-z0-9._]+[ ]" sql = rx_replace(rx, self.defn, pnew) else: sql = self.defn iname = self.local_name tname = self.table_name if self.is_clustered: sql += ' ALTER TABLE ONLY %s\n CLUSTER ON %s;' % ( quote_fqident(tname), quote_ident(iname)) return sql
def get_create_sql(self, curs, new_table_name=None): """Generate creation SQL.""" # no ONLY here as table with childs (only case that matters) # cannot have contraints that childs do not have fmt = "ALTER TABLE %s ADD CONSTRAINT %s\n %s;" if new_table_name: name = self.name if self.contype in ('p', 'u'): name = find_new_name(curs, self.name) qtbl = quote_fqident(new_table_name) qname = quote_ident(name) else: qtbl = quote_fqident(self.table_name) qname = quote_ident(self.name) sql = fmt % (qtbl, qname, self.defn) if self.is_clustered: sql += ' ALTER TABLE ONLY %s\n CLUSTER ON %s;' % (qtbl, qname) return sql
def get_create_sql(self, curs, new_name=None): """Generate creation SQL.""" tbl = new_name or self.table_name sql = "ALTER TABLE ONLY %s ALTER COLUMN %s\n SET DEFAULT %s;" % ( quote_fqident(tbl), quote_ident(self.name), self.expr, ) return sql
def get_create_sql(self, curs, new_table_name=None): """Generate creation SQL.""" # no ONLY here as table with childs (only case that matters) # cannot have contraints that childs do not have fmt = "ALTER TABLE %s ADD CONSTRAINT %s\n %s;" if new_table_name: name = self.name if self.contype in ('p', 'u'): name = find_new_name(curs, self.name) qtbl = quote_fqident(new_table_name) qname = quote_ident(name) else: qtbl = quote_fqident(self.table_name) qname = quote_ident(self.name) sql = fmt % (qtbl, qname, self.defn) if self.is_clustered: sql +=' ALTER TABLE ONLY %s\n CLUSTER ON %s;' % (qtbl, qname) return sql
def full_copy( tablename, src_curs, dst_curs, column_list=[], condition=None, dst_tablename=None, dst_column_list=None, write_hook=None, flush_hook=None, ): """COPY table from one db to another.""" # default dst table and dst columns to source ones dst_tablename = dst_tablename or tablename dst_column_list = dst_column_list or column_list[:] if len(dst_column_list) != len(column_list): raise Exception("src and dst column lists must match in length") def build_qfields(cols): if cols: return ",".join([skytools.quote_ident(f) for f in cols]) else: return "*" def build_statement(table, cols): qtable = skytools.quote_fqident(table) if cols: qfields = build_qfields(cols) return "%s (%s)" % (qtable, qfields) else: return qtable dst = build_statement(dst_tablename, dst_column_list) if condition: src = "(SELECT %s FROM %s WHERE %s)" % (build_qfields(cols), skytools.quote_fqident(tablename), condition) else: src = build_statement(tablename, column_list) if hasattr(src_curs, "copy_expert"): sql_to = "COPY %s TO stdout" % src sql_from = "COPY %s FROM stdin" % dst buf = CopyPipe(dst_curs, sql_from=sql_from) buf.write_hook = write_hook buf.flush_hook = flush_hook src_curs.copy_expert(sql_to, buf) else: if condition: # regular psycopg copy_to generates invalid sql for subselect copy raise Exception("copy_expert() is needed for conditional copy") buf = CopyPipe(dst_curs, dst) buf.write_hook = write_hook buf.flush_hook = flush_hook src_curs.copy_to(buf, src) buf.flush() return (buf.total_bytes, buf.total_rows)
def full_copy(tablename, src_curs, dst_curs, column_list=[], condition=None, dst_tablename=None, dst_column_list=None, write_hook=None, flush_hook=None): """COPY table from one db to another.""" # default dst table and dst columns to source ones dst_tablename = dst_tablename or tablename dst_column_list = dst_column_list or column_list[:] if len(dst_column_list) != len(column_list): raise Exception('src and dst column lists must match in length') def build_qfields(cols): if cols: return ",".join([skytools.quote_ident(f) for f in cols]) else: return "*" def build_statement(table, cols): qtable = skytools.quote_fqident(table) if cols: qfields = build_qfields(cols) return "%s (%s)" % (qtable, qfields) else: return qtable dst = build_statement(dst_tablename, dst_column_list) if condition: src = "(SELECT %s FROM %s WHERE %s)" % (build_qfields( column_list), skytools.quote_fqident(tablename), condition) else: src = build_statement(tablename, column_list) if hasattr(src_curs, 'copy_expert'): sql_to = "COPY %s TO stdout" % src sql_from = "COPY %s FROM stdin" % dst buf = CopyPipe(dst_curs, sql_from=sql_from) buf.write_hook = write_hook buf.flush_hook = flush_hook src_curs.copy_expert(sql_to, buf) else: if condition: # regular psycopg copy_to generates invalid sql for subselect copy raise Exception('copy_expert() is needed for conditional copy') buf = CopyPipe(dst_curs, dst) buf.write_hook = write_hook buf.flush_hook = flush_hook src_curs.copy_to(buf, src) buf.flush() return (buf.total_bytes, buf.total_rows)
def handle_truncate_event(self, ev, dst_curs): """handle one truncate event""" t = self.get_table_by_name(ev.extra1) if not t or not t.interesting(ev, self.cur_tick, self.copy_thread): self.stat_increase('ignored_events') return fqname = skytools.quote_fqident(ev.extra1) sql = "TRUNCATE %s;" % fqname self.apply_sql(sql, dst_curs)
def got_missed_delete(self, tbl, dst_row): """Create sql for missed delete.""" self.cnt_delete += 1 whe_list = [] for f in self.pkey_list: self.addcmp(whe_list, skytools.quote_ident(f), unescape(dst_row[f])) q = "delete from only %s where %s;" % (skytools.quote_fqident(tbl), " and ".join(whe_list)) self.show_fix(tbl, q, 'delete')
def create_temp_table(self, curs, tbl): # create temp table for loading tempname = tbl.replace('.', '_') + "_loadertmp" # check if exists if USE_LONGLIVED_TEMP_TABLES: if exists_temp_table(curs, tempname): self.log.debug("Using existing temp table %s" % tempname) return tempname # bizgres crashes on delete rows arg = "on commit delete rows" arg = "on commit preserve rows" # create temp table for loading q = "create temp table %s (like %s) %s" % (quote_fqident(tempname), quote_fqident(tbl), arg) self.log.debug("Creating temp table: %s" % q) curs.execute(q) return tempname
def mk_delete_sql(self, tbl, key_list, data): # generate delete command whe_list = [] for k in key_list: whe_list.append( "%s = %s" % (skytools.quote_ident(k), skytools.quote_literal(data[k]))) whe_str = " and ".join(whe_list) return "delete from %s where %s;" % (skytools.quote_fqident(tbl), whe_str)
def __init__(self, ctx, table, create_log = True ): """ Table name is used to construct insert update and delete statements Table must have primary key field whose name is in format id_<table> Tablename should be in format schema.tablename """ self._ctx = ctx self._table = skytools.quote_fqident(table) self._id = "id_" + skytools.fq_name_parts(table)[1] self._where = skytools.quote_ident(self._id) + " = {" + self._id + "}" self._logging = create_log
def __init__(self, table_name, log): self.table_name = table_name self.fq_table_name = skytools.quote_fqident(table_name) self.log = log self.pkey_list = [] self.common_fields = [] self.apply_fixes = False self.apply_cursor = None self.reset()
def mk_insert_sql(self, tbl, key_list, data): # generate insert command col_list = [] val_list = [] for c, v in data.items(): col_list.append(skytools.quote_ident(c)) val_list.append(skytools.quote_literal(v)) col_str = ",".join(col_list) val_str = ",".join(val_list) return "insert into %s (%s) values (%s);" % ( skytools.quote_fqident(tbl), col_str, val_str)