def make_sql(self, tbl, ev): """Return SQL statement(s) for that event.""" # parse data data = skytools.db_urldecode(ev.data) # parse tbl info if ev.type.find(':') > 0: op, keys = ev.type.split(':') else: op = ev.type keys = ev.extra2 ev.key_list = keys key_list = keys.split(',') if self.keep_latest and len(key_list) == 0: raise Exception('No pkey on table %s' % tbl) # generate sql if op in ('I', 'U'): if self.keep_latest: sql = "%s %s" % (self.mk_delete_sql(tbl, key_list, data), self.mk_insert_sql(tbl, key_list, data)) else: sql = self.mk_insert_sql(tbl, key_list, data) elif op == "D": if not self.keep_latest: raise Exception('Delete op not supported if mode=keep_all') sql = self.mk_delete_sql(tbl, key_list, data) else: raise Exception('Unknown row op: %s' % op) return sql
def process_event(self, db, ev): curs = self.get_database('dst_db', autocommit=1).cursor() if ev.ev_type[:2] not in ('I:', 'U:', 'D:'): return if ev.ev_data is None: payload = {} else: payload = skytools.db_urldecode(ev.ev_data) payload['pgq.tick_id'] = self.batch_info['cur_tick_id'] payload['pgq.ev_id'] = ev.ev_id payload['pgq.ev_time'] = ev.ev_time payload['pgq.ev_type'] = ev.ev_type payload['pgq.ev_data'] = ev.ev_data payload['pgq.ev_extra1'] = ev.ev_extra1 payload['pgq.ev_extra2'] = ev.ev_extra2 payload['pgq.ev_extra3'] = ev.ev_extra3 payload['pgq.ev_extra4'] = ev.ev_extra4 self.log.debug(self.dst_query, payload) curs.execute(self.dst_query, payload) if curs.statusmessage[:6] == 'SELECT': res = curs.fetchall() self.log.debug(res) else: self.log.debug(curs.statusmessage)
def process_event(self, ev, sql_queue_func, arg): """Process a event. Event should be added to sql_queue or executed directly. """ if self.conf.table_mode == 'ignore': return # get data data = skytools.db_urldecode(ev.data) if len(ev.ev_type) < 2 or ev.ev_type[1] != ':': raise Exception('Unsupported event type: %s/extra1=%s/data=%s' % (ev.ev_type, ev.ev_extra1, ev.ev_data)) op, pkeys = ev.type.split(':', 1) if op not in 'IUD': raise Exception('Unknown event type: %s' % ev.ev_type) # process only operations specified if not op in self.conf.event_types: return self.log.debug('dispatch.process_event: %s/%s' % (ev.ev_type, ev.ev_data)) if self.pkeys is None: self.pkeys = self.filter_pkeys(pkeys.split(',')) data = self.filter_data(data) # prepare split table when needed if self.conf.table_mode == 'part': dst, part_time = self.split_format(ev, data) if dst not in self.row_handler.table_map: self.check_part(dst, part_time) else: dst = self.table_name if dst not in self.row_handler.table_map: self.row_handler.add_table(dst, LOADERS[self.conf.load_mode], self.pkeys, self.conf) self.row_handler.process(dst, op, data)
def process_local_event(self, db, batch_id, ev): if ev.ev_type[:2] not in ('I:', 'U:', 'D:'): return if ev.ev_data is None: payload = {} else: payload = skytools.db_urldecode(ev.ev_data) payload['pgq.tick_id'] = self.batch_info['cur_tick_id'] payload['pgq.ev_id'] = ev.ev_id payload['pgq.ev_time'] = ev.ev_time payload['pgq.ev_type'] = ev.ev_type payload['pgq.ev_data'] = ev.ev_data payload['pgq.ev_extra1'] = ev.ev_extra1 payload['pgq.ev_extra2'] = ev.ev_extra2 payload['pgq.ev_extra3'] = ev.ev_extra3 payload['pgq.ev_extra4'] = ev.ev_extra4 self.log.debug(self.dst_query, payload) retries, curs = self.execute_with_retry( 'dst_db', self.dst_query, payload, exceptions=(psycopg2.OperationalError, )) if curs.statusmessage[:6] == 'SELECT': res = curs.fetchall() self.log.debug(res) else: self.log.debug(curs.statusmessage)
def ts_conflict_handler(gd, args): """Conflict handling based on timestamp column.""" conf = skytools.db_urldecode(args[0]) timefield = conf['timefield'] ev_type = args[1] ev_data = args[2] ev_extra1 = args[3] ev_extra2 = args[4] ev_extra3 = args[5] ev_extra4 = args[6] altpk = None if 'altpk' in conf: altpk = conf['altpk'].split(',') def ts_canapply(rnew, rold): return canapply_tstamp_helper(rnew, rold, timefield) return applyrow(ev_extra1, ev_type, ev_data, backup_row=ev_extra2, alt_pkey_cols=altpk, fkey_ref_table=conf.get('fkey_ref_table'), fkey_ref_cols=conf.get('fkey_ref_cols'), fkey_cols=conf.get('fkey_cols'), fn_canapply=ts_canapply)
def process_batch(self, res, mcur, bres): """ Process events in autocommit mode reading results back and trying to make some sense out of them """ try: count = 0 item = bres.copy() for i in res: # for each row in read query result item.update(i) mcur.execute(self.sql_modify, item) self.log.debug(mcur.query) if mcur.statusmessage.startswith('SELECT'): # if select was used we can expect some result mres = mcur.fetchall() for r in mres: if 'stats' in r: # if specially handled column 'stats' is present for k, v in skytools.db_urldecode(r['stats'] or '').items(): self.stat_increase(k, int(v)) self.log.debug(r) else: self.stat_increase('processed', mcur.rowcount) self.log.debug(mcur.statusmessage) if 'cnt' in item: count += item['cnt'] self.stat_increase("count", item['cnt']) else: count += 1 self.stat_increase("count") if self.last_sigint: break return count, item except: # process has crashed, run sql_crash and re-raise the exception if self.sql_crash: dbc = self.get_database("dbcrash", autocommit=1) ccur = dbc.cursor() ccur.execute(self.sql_crash, item) raise
def process_event(self, event, hbase): if event.ev_extra1 in self.table_mappings: table_mapping = self.table_mappings[event.ev_extra1] else: self.log.info("table name not found in config, skipping event") return #hbase.validate_table_name(table_mapping.hbase_table_name) #hbase.validate_column_descriptors(table_mapping.hbase_table_name, table_mapping.hbase_column_descriptors) event_data = skytools.db_urldecode(event.data) event_type = event.type.split(':')[0] batch = BatchMutation() batch.row = table_mapping.hbase_row_prefix + str(event_data[table_mapping.psql_key_column]) batch.mutations = [] for psql_column, hbase_column in zip(table_mapping.psql_columns, table_mapping.hbase_column_descriptors): if event_type == INSERT or event_type == UPDATE: m = Mutation() m.column = hbase_column m.value = str(event_data[psql_column]) elif event_type == DELETE: # delete this column entry m = Mutation() m.isDelete = True m.column = hbase_column else: raise Exception("Invalid event type: %s, event data was: %s" % (event_type, str(event_data))) batch.mutations.append(m) hbase.client.mutateRow(table_mapping.hbase_table_name, batch.row, batch.mutations) event.tag_done()
def loaded_state(self, row): """Update object with info from db.""" self.log.debug("loaded_state: %s: %s / %s", self.name, row['merge_state'], row['custom_snapshot']) self.change_snapshot(row['custom_snapshot'], 0) self.state = self.parse_state(row['merge_state']) self.changed = 0 if row['table_attrs']: self.table_attrs = skytools.db_urldecode(row['table_attrs']) else: self.table_attrs = {} self.copy_role = row['copy_role'] self.dropped_ddl = row['dropped_ddl'] if row['merge_state'] == "?": self.changed = 1 self.copy_pos = int(row.get('copy_pos','0')) self.max_parallel_copy = int(self.table_attrs.get('max_parallel_copy', self.max_parallel_copy)) if 'dest_table' in row and row['dest_table']: self.dest_table = row['dest_table'] else: self.dest_table = self.name hstr = self.table_attrs.get('handlers', '') # compat hstr = self.table_attrs.get('handler', hstr) self.plugin = build_handler(self.name, hstr, self.dest_table)
def process_local_event(self, db, batch_id, ev): curs = self.get_database('dst_db', autocommit = 1).cursor() if ev.ev_type[:2] not in ('I:', 'U:', 'D:'): return if ev.ev_data is None: payload = {} else: payload = skytools.db_urldecode(ev.ev_data) payload['pgq.tick_id'] = self.batch_info['cur_tick_id'] payload['pgq.ev_id'] = ev.ev_id payload['pgq.ev_time'] = ev.ev_time payload['pgq.ev_type'] = ev.ev_type payload['pgq.ev_data'] = ev.ev_data payload['pgq.ev_extra1'] = ev.ev_extra1 payload['pgq.ev_extra2'] = ev.ev_extra2 payload['pgq.ev_extra3'] = ev.ev_extra3 payload['pgq.ev_extra4'] = ev.ev_extra4 self.log.debug(self.dst_query, payload) curs.execute(self.dst_query, payload) if curs.statusmessage[:6] == 'SELECT': res = curs.fetchall() self.log.debug(res) else: self.log.debug(curs.statusmessage)
def process_event(self, ev, sql_queue_func, arg): """Process a event. Event should be added to sql_queue or executed directly. """ if self.conf.table_mode == 'ignore': return # get data data = skytools.db_urldecode(ev.data) if len(ev.ev_type) < 2 or ev.ev_type[1] != ':': raise Exception('Unsupported event type: %s/extra1=%s/data=%s' % ( ev.ev_type, ev.ev_extra1, ev.ev_data)) op, pkeys = ev.type.split(':', 1) if op not in 'IUD': raise Exception('Unknown event type: %s' % ev.ev_type) # process only operations specified if not op in self.conf.event_types: return self.log.debug('dispatch.process_event: %s/%s' % ( ev.ev_type, ev.ev_data)) if self.pkeys is None: self.pkeys = self.filter_pkeys(pkeys.split(',')) data = self.filter_data(data) # prepare split table when needed if self.conf.table_mode == 'part': dst, part_time = self.split_format(ev, data) if dst not in self.row_handler.table_map: self.check_part(dst, part_time) else: dst = self.table_name if dst not in self.row_handler.table_map: self.row_handler.add_table(dst, LOADERS[self.conf.load_mode], self.pkeys, self.conf) self.row_handler.process(dst, op, data)
def process_local_event(self, db, batch_id, ev): if ev.ev_type[:2] not in ('I:', 'U:', 'D:'): return if ev.ev_data is None: payload = {} else: payload = skytools.db_urldecode(ev.ev_data) payload['pgq.tick_id'] = self.batch_info['cur_tick_id'] payload['pgq.ev_id'] = ev.ev_id payload['pgq.ev_time'] = ev.ev_time payload['pgq.ev_type'] = ev.ev_type payload['pgq.ev_data'] = ev.ev_data payload['pgq.ev_extra1'] = ev.ev_extra1 payload['pgq.ev_extra2'] = ev.ev_extra2 payload['pgq.ev_extra3'] = ev.ev_extra3 payload['pgq.ev_extra4'] = ev.ev_extra4 self.log.debug(self.dst_query, payload) retries, curs = self.execute_with_retry('dst_db', self.dst_query, payload, exceptions = (psycopg2.OperationalError,)) if curs.statusmessage[:6] == 'SELECT': res = curs.fetchall() self.log.debug(res) else: self.log.debug(curs.statusmessage)
def register_copy_consumer(self): dst_db = self.get_database('db') dst_curs = dst_db.cursor() # fetch table attrs q = "select * from londiste.get_table_list(%s) where table_name = %s" dst_curs.execute(q, [ self.queue_name, self.copy_table_name ]) rows = dst_curs.fetchall() attrs = {} if len(rows) > 0: v_attrs = rows[0]['table_attrs'] if v_attrs: attrs = skytools.db_urldecode(v_attrs) # do we have node here? if 'copy_node' in attrs: # take node from attrs source_node = attrs['copy_node'] q = "select * from pgq_node.get_queue_locations(%s) where node_name = %s" dst_curs.execute(q, [ self.queue_name, source_node ]) rows = dst_curs.fetchall() if len(rows): source_location = rows[0]['node_location'] else: # fetch parent consumer state q = "select * from pgq_node.get_consumer_state(%s, %s)" rows = self.exec_cmd(dst_db, q, [ self.queue_name, self.old_consumer_name ]) state = rows[0] source_node = state['provider_node'] source_location = state['provider_location'] self.log.info("Using '%s' as source node", source_node) self.register_consumer(source_location)
def ts_conflict_handler(gd, args): """Conflict handling based on timestamp column.""" conf = skytools.db_urldecode(args[0]) timefield = conf['timefield'] ev_type = args[1] ev_data = args[2] ev_extra1 = args[3] ev_extra2 = args[4] ev_extra3 = args[5] ev_extra4 = args[6] altpk = None if 'altpk' in conf: altpk = conf['altpk'].split(',') def ts_canapply(rnew, rold): return canapply_tstamp_helper(rnew, rold, timefield) return applyrow(ev_extra1, ev_type, ev_data, backup_row = ev_extra2, alt_pkey_cols = altpk, fkey_ref_table = conf.get('fkey_ref_table'), fkey_ref_cols = conf.get('fkey_ref_cols'), fkey_cols = conf.get('fkey_cols'), fn_canapply = ts_canapply)
def process_event(self, ev, sql_queue_func, arg): if len(ev.ev_type) < 2 or ev.ev_type[1] != ":": raise Exception("Unsupported event type: %s/extra1=%s/data=%s" % (ev.ev_type, ev.ev_extra1, ev.ev_data)) op = ev.ev_type[0] if op not in "IUD": raise Exception("Unknown event type: " + ev.ev_type) # pkey_list = ev.ev_type[2:].split(',') data = skytools.db_urldecode(ev.ev_data) # get pkey value if self.pkey_list is None: # self.pkey_list = pkey_list self.pkey_list = ev.ev_type[2:].split(",") if len(self.pkey_list) > 0: pk_data = tuple(data[k] for k in self.pkey_list) elif op == "I": # fake pkey, just to get them spread out pk_data = self.fake_seq self.fake_seq += 1 else: raise Exception("non-pk tables not supported: %s" % self.table_name) # get full column list, detect added columns if not self.col_list: self.col_list = data.keys() elif self.col_list != data.keys(): # ^ supposedly python guarantees same order in keys() self.col_list = data.keys() # keep all versions of row data ev = BulkEvent(op, data, pk_data) if ev.pk_data in self.pkey_ev_map: self.pkey_ev_map[ev.pk_data].append(ev) else: self.pkey_ev_map[ev.pk_data] = [ev]
def process_event(self, ev, sql_queue_func, arg): """Filter event by hash in extra3, apply only local slots.""" if ev.extra3: meta = skytools.db_urldecode(ev.extra3) slot = int(meta['hash']) & self.bubbles_max_slot if slot not in self.bubbles_local_slots: return BaseHandler.process_event(self, ev, sql_queue_func, arg)
def __init__(self, row): self.table_name = row['table_name'] self.dest_table = row['dest_table'] or row['table_name'] self.merge_state = row['merge_state'] attrs = row['table_attrs'] or '' self.table_attrs = skytools.db_urldecode(attrs) hstr = self.table_attrs.get('handler', '') self.plugin = build_handler(self.table_name, hstr, row['dest_table'])
def __init__(self, row): self.table_name = row["table_name"] self.dest_table = row["dest_table"] or row["table_name"] self.merge_state = row["merge_state"] attrs = row["table_attrs"] or "" self.table_attrs = skytools.db_urldecode(attrs) hstr = self.table_attrs.get("handler", "") self.plugin = build_handler(self.table_name, hstr, row["dest_table"])
def handler_allows_copy(table_attrs): """Decide if table is copyable based on attrs.""" if not table_attrs: return True attrs = skytools.db_urldecode(table_attrs) hstr = attrs.get('handler', '') p = londiste.handler.build_handler('unused.string', hstr, None) return p.needs_table()
def cmd_tables(self): """Show attached tables.""" q = """select table_name, merge_state, table_attrs from londiste.get_table_list(%s) where local""" db = self.get_database('db') self.display_table(db, "Tables on node", q, [self.set_name], fieldfmt = {'table_attrs': lambda f: '' if f is None else skytools.db_urldecode(f)})
def process_event(self, ev, sql_queue_func, arg): """Filter event by hash in extra3, apply only if for local shard.""" if ev.extra3 and self.hash_key is not None: meta = skytools.db_urldecode(ev.extra3) self.log.debug('shard.process_event: hash=%i, hash_mask=%i, shard_nr=%i', int(meta['hash']), self.hash_mask, self.shard_nr) if (int(meta['hash']) & self.hash_mask) != self.shard_nr: self.log.debug('shard.process_event: not my event') return self._process_event(ev, sql_queue_func, arg)
def process_event(self, ev, sql_queue_func, arg): """Filter event by hash in extra3, apply only local part.""" if ev.extra3: meta = skytools.db_urldecode(ev.extra3) self.log.debug('part.process_event: hash=%d, max_part=%s, local_part=%d' %\ (int(meta['hash']), self.max_part, self.local_part)) if (int(meta['hash']) & self.max_part) != self.local_part: self.log.debug('part.process_event: not my event') return self.log.debug('part.process_event: my event, processing') TableHandler.process_event(self, ev, sql_queue_func, arg)
def process_event(self, ev, sql_queue_func, arg): """Filter event by hash in extra3, apply only local part.""" if ev.extra3: meta = skytools.db_urldecode(ev.extra3) self.log.debug('part.process_event: hash=%d, max_part=%s, local_part=%d', int(meta['hash']), self.max_part, self.local_part) if (int(meta['hash']) & self.max_part) != self.local_part: self.log.debug('part.process_event: not my event') return self.log.debug('part.process_event: my event, processing') TableHandler.process_event(self, ev, sql_queue_func, arg)
def add_event(self, ev): """Store new event.""" # op & data ev.op = ev.ev_type[0] ev.data = skytools.db_urldecode(ev.ev_data) # get pkey column names if self.pkey_str is None: if len(ev.ev_type) > 2: self.pkey_str = ev.ev_type.split(':')[1] else: self.pkey_str = ev.ev_extra2 if self.pkey_str: self.pkey_list = self.pkey_str.split(',') # get pkey value if self.pkey_str: pk_data = [] for k in self.pkey_list: pk_data.append(ev.data[k]) ev.pk_data = tuple(pk_data) elif ev.op == 'I': # fake pkey, just to get them spread out ev.pk_data = ev.id else: raise Exception('non-pk tables not supported: %s' % self.name) # get full column list, detect added columns if not self.col_list: self.col_list = ev.data.keys() elif self.col_list != ev.data.keys(): # ^ supposedly python guarantees same order in keys() # find new columns for c in ev.data.keys(): if c not in self.col_list: for oldev in self.ev_list: oldev.data[c] = None self.col_list = ev.data.keys() # add to list self.ev_list.append(ev) # keep all versions of row data if ev.pk_data in self.pkey_map: self.pkey_map[ev.pk_data].append(ev) else: self.pkey_map[ev.pk_data] = [ev]
def __init__(self, queue_name, nst): self.node_type = nst['node_type'] self.node_name = nst['node_name'] self.local_watermark = nst['local_watermark'] self.global_watermark = nst['global_watermark'] self.node_attrs = {} attrs = nst.get('node_attrs', '') if attrs: self.node_attrs = skytools.db_urldecode(attrs) ntype = nst['node_type'] ctype = nst['combined_type'] if ntype == 'root': self.global_wm_event = 1 self.local_wm_publish = 0 elif ntype == 'branch': self.target_queue = queue_name self.process_batch = 1 self.process_events = 1 self.copy_events = 1 self.process_tick_event = 1 self.keep_event_ids = 1 self.create_tick = 1 if 'sync_watermark' in self.node_attrs: slist = self.node_attrs['sync_watermark'] self.sync_watermark = 1 self.wm_sync_nodes = slist.split(',') else: self.process_global_wm = 1 elif ntype == 'leaf' and not ctype: self.process_batch = 1 self.process_events = 1 elif ntype == 'leaf' and ctype: self.target_queue = nst['combined_queue'] if ctype == 'root': self.process_batch = 1 self.process_events = 1 self.copy_events = 1 self.filtered_copy = 1 self.send_tick_event = 1 elif ctype == 'branch': self.process_batch = 1 self.wait_behind = 1 else: raise Exception('invalid state 1') else: raise Exception('invalid state 2') if ctype and ntype != 'leaf': raise Exception('invalid state 3')
def __init__(self, context, global_dict = None): """ This object must be initiated in the beginning of each db service """ DBService.__init__(self, context, global_dict) rec = skytools.db_urldecode(context) if "username" not in rec: plpy.error("Username must be provided in db service context parameter") self.username = rec['username'] # used for logging purposes res = plpy.execute("select txid_current() as txid;") row = res[0] self.version = row["txid"] self.rows_found = 0 # Flag set by run query to inicate number of rows got
def _parse_handler(hstr): """Parse result of create_handler_string().""" args = {} name = hstr pos = hstr.find('(') if pos > 0: name = hstr[ : pos] if hstr[-1] != ')': raise Exception('invalid handler format: %s' % hstr) astr = hstr[pos + 1 : -1] if astr: astr = astr.replace(',', '&') args = skytools.db_urldecode(astr) return (name, args)
def __init__(self, context, global_dict = None): """ This object must be initiated in the beginning of each db service """ rec = skytools.db_urldecode(context) self._context = context # used to run dbservice in retval self.global_dict = global_dict # used for cacheing query plans self._retval = [] # used to collect return resultsets self._is_test = 'is_test' in rec # used to convert output into human readable form self.sqls = None # if sqls stays None then no recording of sqls is done if "show_sql" in rec: # api must add exected sql to resultset self.sqls = [] # sql's executed by dbservice, used for dubugging self.can_save = True # used to keep value most severe error found so far self.messages = [] # used to hold list of messages to be returned to the user
def loaded_state(self, row): """Update object with info from db.""" self.log.debug("loaded_state: %s: %s / %s" % ( self.name, row['merge_state'], row['custom_snapshot'])) self.change_snapshot(row['custom_snapshot'], 0) self.state = self.parse_state(row['merge_state']) self.changed = 0 if row['table_attrs']: self.table_attrs = skytools.db_urldecode(row['table_attrs']) else: self.table_attrs = {} self.copy_role = row['copy_role'] self.dropped_ddl = row['dropped_ddl'] if row['merge_state'] == "?": self.changed = 1
def add(self, curs, ev, batch_info): data = skytools.db_urldecode(ev.data) op, pkeys = ev.type.split(':', 1) pkey_list = pkeys.split(',') if self.split: dst = self.split_format(ev, data, batch_info) if dst not in self.part_map: self.check_part(curs, dst, pkey_list) else: dst = self.table_name if dst not in self.part_map: self.part_map[dst] = self.rowhandler(dst, self.table_name, self.log) p = self.part_map[dst] p.add_row(op, data, pkey_list)
def get_record(arg): """ Parse data for one urlencoded record. Useful for turning incoming serialized data into structure usable for manipulation. """ if not arg: return dbdict() # allow array of single record if arg[0] in ('{', '['): lst = skytools.parse_pgarray(arg) if len(lst) != 1: raise ValueError('get_record() expects exactly 1 row, got %d' % len(lst)) arg = lst[0] # parse record return dbdict(skytools.db_urldecode(arg))
def dispatch(self, dst_db, ev_list): """Generic dispatcher.""" # load data tables = {} for ev in ev_list: row = skytools.db_urldecode(ev.data) # guess dest table if self.part_field: if self.part_field == "_EVTIME": partval = str(ev.creation_date) else: partval = str(row[self.part_field]) partval = partval.split(' ')[0] date = partval.split('-') if self.part_method == 'monthly': date = date[:2] suffix = '_'.join(date) tbl = "%s_%s" % (self.dest_table, suffix) else: tbl = self.dest_table # map fields if self.field_map is None: dstrow = row else: dstrow = {} for k, v in self.field_map.items(): dstrow[v] = row[k] # add row into table if not tbl in tables: tables[tbl] = [dstrow] else: tables[tbl].append(dstrow) ev.tag_done() # create tables if needed self.check_tables(dst_db, tables) # insert into data tables curs = dst_db.cursor() for tbl, tbl_rows in tables.items(): skytools.magic_insert(curs, tbl, tbl_rows)
def parse_row_data(self, ev): """Extract row data from event, with optional encoding fixes. Returns either string (sql event) or dict (urlenc event). """ if len(ev.type) == 1: if not self.allow_sql_event: raise Exception('SQL events not suppoted by this handler') if self.enc: return self.enc.validate_string(ev.data, self.table_name) return ev.data else: row = skytools.db_urldecode(ev.data) if self.enc: return self.enc.validate_dict(row, self.table_name) return row
def cmd_resync(self, *args): """Reload data from provider node.""" db = self.get_database('db') args = self.expand_arg_list(db, 'r', True, args) if not self.options.find_copy_node: self.load_local_info() src_db = self.get_provider_db() src_curs = src_db.cursor() src_tbls = self.fetch_set_tables(src_curs) src_db.commit() problems = 0 for tbl in args: tbl = skytools.fq_name(tbl) if tbl not in src_tbls or not src_tbls[tbl]['local']: self.log.error( "Table %s does not exist on provider, need to switch to different provider", tbl) problems += 1 if problems > 0: self.log.error("Problems, cancelling operation") sys.exit(1) if self.options.find_copy_node or self.options.copy_node: q = "select table_name, table_attrs from londiste.get_table_list(%s) where local" cur = db.cursor() cur.execute(q, [self.set_name]) for row in cur.fetchall(): if row['table_name'] not in args: continue attrs = skytools.db_urldecode(row['table_attrs'] or '') if self.options.find_copy_node: attrs['copy_node'] = '?' elif self.options.copy_node: attrs['copy_node'] = self.options.copy_node s_attrs = skytools.db_urlencode(attrs) q = "select * from londiste.local_set_table_attrs(%s, %s, %s)" self.exec_cmd(db, q, [self.set_name, row['table_name'], s_attrs]) q = "select * from londiste.local_set_table_state(%s, %s, null, null)" self.exec_cmd_many(db, q, [self.set_name], args)
def loaded_state(self, row): """Update object with info from db.""" self.log.debug("loaded_state: %s: %s / %s" % (self.name, row["merge_state"], row["custom_snapshot"])) self.change_snapshot(row["custom_snapshot"], 0) self.state = self.parse_state(row["merge_state"]) self.changed = 0 if row["table_attrs"]: self.table_attrs = skytools.db_urldecode(row["table_attrs"]) else: self.table_attrs = {} self.copy_role = row["copy_role"] self.dropped_ddl = row["dropped_ddl"] if row["merge_state"] == "?": self.changed = 1 hstr = self.table_attrs.get("handlers", "") self.plugin = parse_handler(self.name, hstr, self.log)
def cmd_change_handler(self, tbl): """Change handler (table_attrs) of the replicated table.""" self.load_local_info() tbl = skytools.fq_name(tbl) db = self.get_database('db') curs = db.cursor() q = "select table_attrs, dest_table "\ " from londiste.get_table_list(%s) "\ " where table_name = %s and local" curs.execute(q, [self.set_name, tbl]) if curs.rowcount == 0: self.log.error("Table %s not found on this node", tbl) sys.exit(1) attrs, dest_table = curs.fetchone() attrs = skytools.db_urldecode(attrs or '') old_handler = attrs.get('handler') tgargs = self.build_tgargs() if self.options.handler: new_handler = self.build_handler(tbl, tgargs, dest_table) else: new_handler = None if old_handler == new_handler: self.log.info( "Handler is already set to desired value, nothing done") sys.exit(0) if new_handler: attrs['handler'] = new_handler elif 'handler' in attrs: del attrs['handler'] args = [self.set_name, tbl, tgargs, None] if attrs: args[3] = skytools.db_urlencode(attrs) q = "select * from londiste.local_change_handler(%s, %s, %s, %s)" self.exec_cmd(curs, q, args) db.commit()
def dispatch(self, dst_db, ev_list): """Generic dispatcher.""" # load data tables = {} for ev in ev_list: row = skytools.db_urldecode(ev.data) # guess dest table if self.part_field: if self.part_field == "_EVTIME": partval = str(ev.creation_date) else: partval = str(row[self.part_field]) partval = partval.split(' ')[0] date = partval.split('-') if self.part_method == 'monthly': date = date[:2] suffix = '_'.join(date) tbl = "%s_%s" % (self.dest_table, suffix) else: tbl = self.dest_table # map fields if self.field_map is None: dstrow = row else: dstrow = {} for k, v in self.field_map.items(): dstrow[v] = row[k] # add row into table if not tbl in tables: tables[tbl] = [dstrow] else: tables[tbl].append(dstrow) # create tables if needed self.check_tables(dst_db, tables) # insert into data tables curs = dst_db.cursor() for tbl, tbl_rows in tables.items(): skytools.magic_insert(curs, tbl, tbl_rows)
def cmd_change_handler(self, tbl): """Change handler (table_attrs) of the replicated table.""" self.load_local_info() tbl = skytools.fq_name(tbl) db = self.get_database('db') curs = db.cursor() q = "select table_attrs, dest_table "\ " from londiste.get_table_list(%s) "\ " where table_name = %s and local" curs.execute(q, [self.set_name, tbl]) if curs.rowcount == 0: self.log.error("Table %s not found on this node", tbl) sys.exit(1) attrs, dest_table = curs.fetchone() attrs = skytools.db_urldecode(attrs or '') old_handler = attrs.get('handler') tgargs = self.build_tgargs() if self.options.handler: new_handler = self.build_handler(tbl, tgargs, dest_table) else: new_handler = None if old_handler == new_handler: self.log.info("Handler is already set to desired value, nothing done") sys.exit(0) if new_handler: attrs['handler'] = new_handler elif 'handler' in attrs: del attrs['handler'] args = [self.set_name, tbl, tgargs, None] if attrs: args[3] = skytools.db_urlencode(attrs) q = "select * from londiste.local_change_handler(%s, %s, %s, %s)" self.exec_cmd(curs, q, args) db.commit()
def loaded_state(self, row): """Update object with info from db.""" self.log.debug("loaded_state: %s: %s / %s" % ( self.name, row['merge_state'], row['custom_snapshot'])) self.change_snapshot(row['custom_snapshot'], 0) self.state = self.parse_state(row['merge_state']) self.changed = 0 if row['table_attrs']: self.table_attrs = skytools.db_urldecode(row['table_attrs']) else: self.table_attrs = {} self.copy_role = row['copy_role'] self.dropped_ddl = row['dropped_ddl'] if row['merge_state'] == "?": self.changed = 1 hstr = row.get('handler', '') self.plugin = parse_handler(self.name, hstr)
def cmd_resync(self, *args): """Reload data from provider node.""" db = self.get_database('db') args = self.expand_arg_list(db, 'r', True, args) if not self.options.find_copy_node: self.load_local_info() src_db = self.get_provider_db() src_curs = src_db.cursor() src_tbls = self.fetch_set_tables(src_curs) src_db.commit() problems = 0 for tbl in args: tbl = skytools.fq_name(tbl) if tbl not in src_tbls or not src_tbls[tbl]['local']: self.log.error("Table %s does not exist on provider, need to switch to different provider", tbl) problems += 1 if problems > 0: self.log.error("Problems, cancelling operation") sys.exit(1) if self.options.find_copy_node or self.options.copy_node: q = "select table_name, table_attrs from londiste.get_table_list(%s) where local" cur = db.cursor() cur.execute(q, [self.set_name]) for row in cur.fetchall(): if row['table_name'] not in args: continue attrs = skytools.db_urldecode (row['table_attrs'] or '') if self.options.find_copy_node: attrs['copy_node'] = '?' elif self.options.copy_node: attrs['copy_node'] = self.options.copy_node attrs = skytools.db_urlencode (attrs) q = "select * from londiste.local_set_table_attrs (%s, %s, %s)" self.exec_cmd(db, q, [self.set_name, row['table_name'], attrs]) q = "select * from londiste.local_set_table_state(%s, %s, null, null)" self.exec_cmd_many(db, q, [self.set_name], args)
def process_event(self, ev, sql_queue_func, arg): if len(ev.type) == 1: # sql event fqname = skytools.quote_fqident(ev.extra1) fmt = self.sql_command[ev.type] sql = fmt % (fqname, ev.data) else: # urlenc event pklist = ev.type[2:].split(',') row = skytools.db_urldecode(ev.data) op = ev.type[0] tbl = ev.extra1 if op == 'I': sql = skytools.mk_insert_sql(row, tbl, pklist) elif op == 'U': sql = skytools.mk_update_sql(row, tbl, pklist) elif op == 'D': sql = skytools.mk_delete_sql(row, tbl, pklist) sql_queue_func(sql, arg)
def process_remote_batch(self, db, batch_id, event_list, dst_db): curs = dst_db.cursor() for ev in event_list: payload = skytools.db_urldecode(ev.data) if payload is None: payload = {} payload['pgq.ev_type'] = ev.type payload['pgq.ev_data'] = ev.data payload['pgq.ev_id'] = ev.id payload['pgq.ev_time'] = ev.time payload['pgq.ev_extra1'] = ev.extra1 payload['pgq.ev_extra2'] = ev.extra2 payload['pgq.ev_extra3'] = ev.extra3 payload['pgq.ev_extra4'] = ev.extra4 self.log.debug(self.dst_query % payload) curs.execute(self.dst_query, payload) try: res = curs.fetchone() self.log.debug(res) except: pass
def __init__(self, queue_name, row, main_worker=True, node_name=None): self.queue_name = queue_name self.member_map = {} self.main_worker = main_worker self.parent = None self.consumer_map = {} self.queue_info = {} self._info_lines = [] self.cascaded_consumer_map = {} self._row = row if not row: self.name = node_name self.type = 'dead' return self.name = row['node_name'] self.type = row['node_type'] self.global_watermark = row['global_watermark'] self.local_watermark = row['local_watermark'] self.completed_tick = row['worker_last_tick'] self.provider_node = row['provider_node'] self.provider_location = row['provider_location'] self.consumer_name = row['worker_name'] self.worker_name = row['worker_name'] self.paused = row['worker_paused'] self.uptodate = row['worker_uptodate'] self.combined_queue = row['combined_queue'] self.combined_type = row['combined_type'] self.last_tick = row['worker_last_tick'] self.node_attrs = {} if 'node_attrs' in row: a = row['node_attrs'] if a: self.node_attrs = skytools.db_urldecode(a)
def process_batch(self, res, mcur, bres): """ Process events in autocommit mode reading results back and trying to make some sense out of them """ try: count = 0 item = bres.copy() for i in res: # for each row in read query result item.update(i) mcur.execute(self.sql_modify, item) self.log.debug(mcur.query) if mcur.statusmessage.startswith( 'SELECT' ): # if select was used we can expect some result mres = mcur.fetchall() for r in mres: if 'stats' in r: # if specially handled column 'stats' is present for k, v in skytools.db_urldecode(r['stats'] or '').items(): self.stat_increase(k, int(v)) self.log.debug(r) else: self.stat_increase('processed', mcur.rowcount) self.log.debug(mcur.statusmessage) if 'cnt' in item: count += item['cnt'] self.stat_increase("count", item['cnt']) else: count += 1 self.stat_increase("count") if self.last_sigint: break return count, item except: # process has crashed, run sql_crash and re-raise the exception if self.sql_crash: dbc = self.get_database("dbcrash", autocommit=1) ccur = dbc.cursor() ccur.execute(self.sql_crash, item) raise