Example #1
0
    def make_sql(self, tbl, ev):
        """Return SQL statement(s) for that event."""

        # parse data
        data = skytools.db_urldecode(ev.data)

        # parse tbl info
        if ev.type.find(':') > 0:
            op, keys = ev.type.split(':')
        else:
            op = ev.type
            keys = ev.extra2
        ev.key_list = keys
        key_list = keys.split(',')
        if self.keep_latest and len(key_list) == 0:
            raise Exception('No pkey on table %s' % tbl)

        # generate sql
        if op in ('I', 'U'):
            if self.keep_latest:
                sql = "%s %s" % (self.mk_delete_sql(tbl, key_list, data),
                                 self.mk_insert_sql(tbl, key_list, data))
            else:
                sql = self.mk_insert_sql(tbl, key_list, data)
        elif op == "D":
            if not self.keep_latest:
                raise Exception('Delete op not supported if mode=keep_all')

            sql = self.mk_delete_sql(tbl, key_list, data)
        else:
            raise Exception('Unknown row op: %s' % op)
        return sql
Example #2
0
    def make_sql(self, tbl, ev):
        """Return SQL statement(s) for that event."""
        
        # parse data
        data = skytools.db_urldecode(ev.data)
            
        # parse tbl info
        if ev.type.find(':') > 0:
            op, keys = ev.type.split(':')
        else:
            op = ev.type
            keys = ev.extra2
        ev.key_list = keys
        key_list = keys.split(',')
        if self.keep_latest and len(key_list) == 0:
            raise Exception('No pkey on table %s' % tbl)

        # generate sql
        if op in ('I', 'U'):
            if self.keep_latest:
                sql = "%s %s" % (self.mk_delete_sql(tbl, key_list, data),
                                 self.mk_insert_sql(tbl, key_list, data))
            else:
                sql = self.mk_insert_sql(tbl, key_list, data)
        elif op == "D":
            if not self.keep_latest:
                raise Exception('Delete op not supported if mode=keep_all')

            sql = self.mk_delete_sql(tbl, key_list, data)
        else:
            raise Exception('Unknown row op: %s' % op)
        return sql
Example #3
0
    def process_event(self, db, ev):
        curs = self.get_database('dst_db', autocommit=1).cursor()

        if ev.ev_type[:2] not in ('I:', 'U:', 'D:'):
            return

        if ev.ev_data is None:
            payload = {}
        else:
            payload = skytools.db_urldecode(ev.ev_data)
        payload['pgq.tick_id'] = self.batch_info['cur_tick_id']
        payload['pgq.ev_id'] = ev.ev_id
        payload['pgq.ev_time'] = ev.ev_time
        payload['pgq.ev_type'] = ev.ev_type
        payload['pgq.ev_data'] = ev.ev_data
        payload['pgq.ev_extra1'] = ev.ev_extra1
        payload['pgq.ev_extra2'] = ev.ev_extra2
        payload['pgq.ev_extra3'] = ev.ev_extra3
        payload['pgq.ev_extra4'] = ev.ev_extra4

        self.log.debug(self.dst_query, payload)
        curs.execute(self.dst_query, payload)
        if curs.statusmessage[:6] == 'SELECT':
            res = curs.fetchall()
            self.log.debug(res)
        else:
            self.log.debug(curs.statusmessage)
Example #4
0
    def process_event(self, ev, sql_queue_func, arg):
        """Process a event.
        Event should be added to sql_queue or executed directly.
        """
        if self.conf.table_mode == 'ignore':
            return
        # get data
        data = skytools.db_urldecode(ev.data)
        if len(ev.ev_type) < 2 or ev.ev_type[1] != ':':
            raise Exception('Unsupported event type: %s/extra1=%s/data=%s' %
                            (ev.ev_type, ev.ev_extra1, ev.ev_data))
        op, pkeys = ev.type.split(':', 1)
        if op not in 'IUD':
            raise Exception('Unknown event type: %s' % ev.ev_type)
        # process only operations specified
        if not op in self.conf.event_types:
            return
        self.log.debug('dispatch.process_event: %s/%s' %
                       (ev.ev_type, ev.ev_data))
        if self.pkeys is None:
            self.pkeys = self.filter_pkeys(pkeys.split(','))
        data = self.filter_data(data)
        # prepare split table when needed
        if self.conf.table_mode == 'part':
            dst, part_time = self.split_format(ev, data)
            if dst not in self.row_handler.table_map:
                self.check_part(dst, part_time)
        else:
            dst = self.table_name

        if dst not in self.row_handler.table_map:
            self.row_handler.add_table(dst, LOADERS[self.conf.load_mode],
                                       self.pkeys, self.conf)
        self.row_handler.process(dst, op, data)
Example #5
0
    def process_local_event(self, db, batch_id, ev):
        if ev.ev_type[:2] not in ('I:', 'U:', 'D:'):
            return

        if ev.ev_data is None:
            payload = {}
        else:
            payload = skytools.db_urldecode(ev.ev_data)

        payload['pgq.tick_id'] = self.batch_info['cur_tick_id']
        payload['pgq.ev_id'] = ev.ev_id
        payload['pgq.ev_time'] = ev.ev_time
        payload['pgq.ev_type'] = ev.ev_type
        payload['pgq.ev_data'] = ev.ev_data
        payload['pgq.ev_extra1'] = ev.ev_extra1
        payload['pgq.ev_extra2'] = ev.ev_extra2
        payload['pgq.ev_extra3'] = ev.ev_extra3
        payload['pgq.ev_extra4'] = ev.ev_extra4

        self.log.debug(self.dst_query, payload)
        retries, curs = self.execute_with_retry(
            'dst_db',
            self.dst_query,
            payload,
            exceptions=(psycopg2.OperationalError, ))
        if curs.statusmessage[:6] == 'SELECT':
            res = curs.fetchall()
            self.log.debug(res)
        else:
            self.log.debug(curs.statusmessage)
Example #6
0
def ts_conflict_handler(gd, args):
    """Conflict handling based on timestamp column."""

    conf = skytools.db_urldecode(args[0])
    timefield = conf['timefield']
    ev_type = args[1]
    ev_data = args[2]
    ev_extra1 = args[3]
    ev_extra2 = args[4]
    ev_extra3 = args[5]
    ev_extra4 = args[6]
    altpk = None
    if 'altpk' in conf:
        altpk = conf['altpk'].split(',')

    def ts_canapply(rnew, rold):
        return canapply_tstamp_helper(rnew, rold, timefield)

    return applyrow(ev_extra1,
                    ev_type,
                    ev_data,
                    backup_row=ev_extra2,
                    alt_pkey_cols=altpk,
                    fkey_ref_table=conf.get('fkey_ref_table'),
                    fkey_ref_cols=conf.get('fkey_ref_cols'),
                    fkey_cols=conf.get('fkey_cols'),
                    fn_canapply=ts_canapply)
Example #7
0
 def process_batch(self, res, mcur, bres):
     """ Process events in autocommit mode reading results back and trying to make some sense out of them
     """
     try:
         count = 0
         item = bres.copy()
         for i in res:   # for each row in read query result
             item.update(i)
             mcur.execute(self.sql_modify, item)
             self.log.debug(mcur.query)
             if mcur.statusmessage.startswith('SELECT'): # if select was used we can expect some result
                 mres = mcur.fetchall()
                 for r in mres:
                     if 'stats' in r: # if specially handled column 'stats' is present
                         for k, v in skytools.db_urldecode(r['stats'] or '').items():
                             self.stat_increase(k, int(v))
                     self.log.debug(r)
             else:
                 self.stat_increase('processed', mcur.rowcount)
                 self.log.debug(mcur.statusmessage)
             if 'cnt' in item:
                 count += item['cnt']
                 self.stat_increase("count", item['cnt'])
             else:
                 count += 1
                 self.stat_increase("count")
             if self.last_sigint:
                 break
         return count, item
     except: # process has crashed, run sql_crash and re-raise the exception
         if self.sql_crash:
             dbc = self.get_database("dbcrash", autocommit=1)
             ccur = dbc.cursor()
             ccur.execute(self.sql_crash, item)
         raise
Example #8
0
 def process_event(self, event, hbase):
   if event.ev_extra1 in self.table_mappings:
     table_mapping = self.table_mappings[event.ev_extra1]
   else:
     self.log.info("table name not found in config, skipping event")
     return
   #hbase.validate_table_name(table_mapping.hbase_table_name)
   #hbase.validate_column_descriptors(table_mapping.hbase_table_name, table_mapping.hbase_column_descriptors)
   event_data = skytools.db_urldecode(event.data)
   event_type = event.type.split(':')[0]
   
   batch = BatchMutation()
   batch.row = table_mapping.hbase_row_prefix + str(event_data[table_mapping.psql_key_column])
       
   batch.mutations = []
   for psql_column, hbase_column in zip(table_mapping.psql_columns, table_mapping.hbase_column_descriptors):
     if event_type == INSERT or event_type == UPDATE:
       m = Mutation()
       m.column = hbase_column
       m.value = str(event_data[psql_column])
     elif event_type == DELETE:
       # delete this column entry
       m = Mutation()
       m.isDelete = True
       m.column = hbase_column
     else:
       raise Exception("Invalid event type: %s, event data was: %s" % (event_type, str(event_data)))
     batch.mutations.append(m)
   hbase.client.mutateRow(table_mapping.hbase_table_name, batch.row, batch.mutations)
   event.tag_done()
Example #9
0
    def loaded_state(self, row):
        """Update object with info from db."""

        self.log.debug("loaded_state: %s: %s / %s",
                       self.name, row['merge_state'], row['custom_snapshot'])
        self.change_snapshot(row['custom_snapshot'], 0)
        self.state = self.parse_state(row['merge_state'])
        self.changed = 0
        if row['table_attrs']:
            self.table_attrs = skytools.db_urldecode(row['table_attrs'])
        else:
            self.table_attrs = {}
        self.copy_role = row['copy_role']
        self.dropped_ddl = row['dropped_ddl']
        if row['merge_state'] == "?":
            self.changed = 1

        self.copy_pos = int(row.get('copy_pos','0'))
        self.max_parallel_copy = int(self.table_attrs.get('max_parallel_copy',
                                                        self.max_parallel_copy))

        if 'dest_table' in row and row['dest_table']:
            self.dest_table = row['dest_table']
        else:
            self.dest_table = self.name

        hstr = self.table_attrs.get('handlers', '') # compat
        hstr = self.table_attrs.get('handler', hstr)
        self.plugin = build_handler(self.name, hstr, self.dest_table)
    def process_local_event(self, db, batch_id, ev):
        curs = self.get_database('dst_db', autocommit = 1).cursor()

        if ev.ev_type[:2] not in ('I:', 'U:', 'D:'):
            return

        if ev.ev_data is None:
            payload = {}
        else:
            payload = skytools.db_urldecode(ev.ev_data)

        payload['pgq.tick_id'] = self.batch_info['cur_tick_id']
        payload['pgq.ev_id'] = ev.ev_id
        payload['pgq.ev_time'] = ev.ev_time
        payload['pgq.ev_type'] = ev.ev_type
        payload['pgq.ev_data'] = ev.ev_data
        payload['pgq.ev_extra1'] = ev.ev_extra1
        payload['pgq.ev_extra2'] = ev.ev_extra2
        payload['pgq.ev_extra3'] = ev.ev_extra3
        payload['pgq.ev_extra4'] = ev.ev_extra4

        self.log.debug(self.dst_query, payload)
        curs.execute(self.dst_query, payload)
        if curs.statusmessage[:6] == 'SELECT':
            res = curs.fetchall()
            self.log.debug(res)
        else:
            self.log.debug(curs.statusmessage)
Example #11
0
    def process_event(self, ev, sql_queue_func, arg):
        """Process a event.
        Event should be added to sql_queue or executed directly.
        """
        if self.conf.table_mode == 'ignore':
            return
        # get data
        data = skytools.db_urldecode(ev.data)
        if len(ev.ev_type) < 2 or ev.ev_type[1] != ':':
            raise Exception('Unsupported event type: %s/extra1=%s/data=%s' % (
                            ev.ev_type, ev.ev_extra1, ev.ev_data))
        op, pkeys = ev.type.split(':', 1)
        if op not in 'IUD':
            raise Exception('Unknown event type: %s' % ev.ev_type)
        # process only operations specified
        if not op in self.conf.event_types:
            return
        self.log.debug('dispatch.process_event: %s/%s' % (
            ev.ev_type, ev.ev_data))
        if self.pkeys is None:
            self.pkeys = self.filter_pkeys(pkeys.split(','))
        data = self.filter_data(data)
        # prepare split table when needed
        if self.conf.table_mode == 'part':
            dst, part_time = self.split_format(ev, data)
            if dst not in self.row_handler.table_map:
                self.check_part(dst, part_time)
        else:
            dst = self.table_name

        if dst not in self.row_handler.table_map:
            self.row_handler.add_table(dst, LOADERS[self.conf.load_mode],
                                    self.pkeys, self.conf)
        self.row_handler.process(dst, op, data)
    def process_local_event(self, db, batch_id, ev):
        if ev.ev_type[:2] not in ('I:', 'U:', 'D:'):
            return

        if ev.ev_data is None:
            payload = {}
        else:
            payload = skytools.db_urldecode(ev.ev_data)

        payload['pgq.tick_id'] = self.batch_info['cur_tick_id']
        payload['pgq.ev_id'] = ev.ev_id
        payload['pgq.ev_time'] = ev.ev_time
        payload['pgq.ev_type'] = ev.ev_type
        payload['pgq.ev_data'] = ev.ev_data
        payload['pgq.ev_extra1'] = ev.ev_extra1
        payload['pgq.ev_extra2'] = ev.ev_extra2
        payload['pgq.ev_extra3'] = ev.ev_extra3
        payload['pgq.ev_extra4'] = ev.ev_extra4

        self.log.debug(self.dst_query, payload)
        retries, curs = self.execute_with_retry('dst_db', self.dst_query, payload,
                                                exceptions = (psycopg2.OperationalError,))
        if curs.statusmessage[:6] == 'SELECT':
            res = curs.fetchall()
            self.log.debug(res)
        else:
            self.log.debug(curs.statusmessage)
Example #13
0
    def register_copy_consumer(self):
        dst_db = self.get_database('db')
        dst_curs = dst_db.cursor()

        # fetch table attrs
        q = "select * from londiste.get_table_list(%s) where table_name = %s"
        dst_curs.execute(q, [ self.queue_name, self.copy_table_name ])
        rows = dst_curs.fetchall()
        attrs = {}
        if len(rows) > 0:
            v_attrs = rows[0]['table_attrs']
            if v_attrs:
                attrs = skytools.db_urldecode(v_attrs)

        # do we have node here?
        if 'copy_node' in attrs:
            # take node from attrs
            source_node = attrs['copy_node']
            q = "select * from pgq_node.get_queue_locations(%s) where node_name = %s"
            dst_curs.execute(q, [ self.queue_name, source_node ])
            rows = dst_curs.fetchall()
            if len(rows):
                source_location = rows[0]['node_location']
        else:
            # fetch parent consumer state
            q = "select * from pgq_node.get_consumer_state(%s, %s)"
            rows = self.exec_cmd(dst_db, q, [ self.queue_name, self.old_consumer_name ])
            state = rows[0]
            source_node = state['provider_node']
            source_location = state['provider_location']

        self.log.info("Using '%s' as source node", source_node)
        self.register_consumer(source_location)
Example #14
0
def ts_conflict_handler(gd, args):
    """Conflict handling based on timestamp column."""

    conf = skytools.db_urldecode(args[0])
    timefield = conf['timefield']
    ev_type = args[1]
    ev_data = args[2]
    ev_extra1 = args[3]
    ev_extra2 = args[4]
    ev_extra3 = args[5]
    ev_extra4 = args[6]
    altpk = None
    if 'altpk' in conf:
        altpk = conf['altpk'].split(',')

    def ts_canapply(rnew, rold):
        return canapply_tstamp_helper(rnew, rold, timefield)

    return applyrow(ev_extra1, ev_type, ev_data,
                    backup_row = ev_extra2,
                    alt_pkey_cols = altpk,
                    fkey_ref_table = conf.get('fkey_ref_table'),
                    fkey_ref_cols = conf.get('fkey_ref_cols'),
                    fkey_cols = conf.get('fkey_cols'),
                    fn_canapply = ts_canapply)
Example #15
0
    def process_event(self, ev, sql_queue_func, arg):
        if len(ev.ev_type) < 2 or ev.ev_type[1] != ":":
            raise Exception("Unsupported event type: %s/extra1=%s/data=%s" % (ev.ev_type, ev.ev_extra1, ev.ev_data))
        op = ev.ev_type[0]
        if op not in "IUD":
            raise Exception("Unknown event type: " + ev.ev_type)
        # pkey_list = ev.ev_type[2:].split(',')
        data = skytools.db_urldecode(ev.ev_data)

        # get pkey value
        if self.pkey_list is None:
            # self.pkey_list = pkey_list
            self.pkey_list = ev.ev_type[2:].split(",")
        if len(self.pkey_list) > 0:
            pk_data = tuple(data[k] for k in self.pkey_list)
        elif op == "I":
            # fake pkey, just to get them spread out
            pk_data = self.fake_seq
            self.fake_seq += 1
        else:
            raise Exception("non-pk tables not supported: %s" % self.table_name)

        # get full column list, detect added columns
        if not self.col_list:
            self.col_list = data.keys()
        elif self.col_list != data.keys():
            # ^ supposedly python guarantees same order in keys()
            self.col_list = data.keys()

        # keep all versions of row data
        ev = BulkEvent(op, data, pk_data)
        if ev.pk_data in self.pkey_ev_map:
            self.pkey_ev_map[ev.pk_data].append(ev)
        else:
            self.pkey_ev_map[ev.pk_data] = [ev]
Example #16
0
 def process_event(self, ev, sql_queue_func, arg):
     """Filter event by hash in extra3, apply only local slots."""
     if ev.extra3:
         meta = skytools.db_urldecode(ev.extra3)
         slot = int(meta['hash']) & self.bubbles_max_slot
         if slot not in self.bubbles_local_slots:
             return
     BaseHandler.process_event(self, ev, sql_queue_func, arg)
Example #17
0
 def __init__(self, row):
     self.table_name = row['table_name']
     self.dest_table = row['dest_table'] or row['table_name']
     self.merge_state = row['merge_state']
     attrs = row['table_attrs'] or ''
     self.table_attrs = skytools.db_urldecode(attrs)
     hstr = self.table_attrs.get('handler', '')
     self.plugin = build_handler(self.table_name, hstr, row['dest_table'])
Example #18
0
 def __init__(self, row):
     self.table_name = row["table_name"]
     self.dest_table = row["dest_table"] or row["table_name"]
     self.merge_state = row["merge_state"]
     attrs = row["table_attrs"] or ""
     self.table_attrs = skytools.db_urldecode(attrs)
     hstr = self.table_attrs.get("handler", "")
     self.plugin = build_handler(self.table_name, hstr, row["dest_table"])
Example #19
0
File: util.py Project: pgq/londiste
def handler_allows_copy(table_attrs):
    """Decide if table is copyable based on attrs."""
    if not table_attrs:
        return True
    attrs = skytools.db_urldecode(table_attrs)
    hstr = attrs.get('handler', '')
    p = londiste.handler.build_handler('unused.string', hstr, None)
    return p.needs_table()
Example #20
0
 def cmd_tables(self):
     """Show attached tables."""
     q = """select table_name, merge_state, table_attrs
     from londiste.get_table_list(%s) where local"""
     db = self.get_database('db')
     self.display_table(db, "Tables on node", q, [self.set_name],
                        fieldfmt = {'table_attrs': lambda f: '' if f is None
                                    else skytools.db_urldecode(f)})
Example #21
0
def handler_allows_copy(table_attrs):
    """Decide if table is copyable based on attrs."""
    if not table_attrs:
        return True
    attrs = skytools.db_urldecode(table_attrs)
    hstr = attrs.get('handler', '')
    p = londiste.handler.build_handler('unused.string', hstr, None)
    return p.needs_table()
Example #22
0
 def __init__(self, row):
     self.table_name = row['table_name']
     self.dest_table = row['dest_table'] or row['table_name']
     self.merge_state = row['merge_state']
     attrs = row['table_attrs'] or ''
     self.table_attrs = skytools.db_urldecode(attrs)
     hstr = self.table_attrs.get('handler', '')
     self.plugin = build_handler(self.table_name, hstr, row['dest_table'])
Example #23
0
 def process_event(self, ev, sql_queue_func, arg):
     """Filter event by hash in extra3, apply only local slots."""
     if ev.extra3:
         meta = skytools.db_urldecode(ev.extra3)
         slot = int(meta['hash']) & self.bubbles_max_slot
         if slot not in self.bubbles_local_slots:
             return
     BaseHandler.process_event(self, ev, sql_queue_func, arg)
Example #24
0
 def process_event(self, ev, sql_queue_func, arg):
     """Filter event by hash in extra3, apply only if for local shard."""
     if ev.extra3 and self.hash_key is not None:
         meta = skytools.db_urldecode(ev.extra3)
         self.log.debug('shard.process_event: hash=%i, hash_mask=%i, shard_nr=%i',
                        int(meta['hash']), self.hash_mask, self.shard_nr)
         if (int(meta['hash']) & self.hash_mask) != self.shard_nr:
             self.log.debug('shard.process_event: not my event')
             return
     self._process_event(ev, sql_queue_func, arg)
Example #25
0
 def process_event(self, ev, sql_queue_func, arg):
     """Filter event by hash in extra3, apply only if for local shard."""
     if ev.extra3 and self.hash_key is not None:
         meta = skytools.db_urldecode(ev.extra3)
         self.log.debug('shard.process_event: hash=%i, hash_mask=%i, shard_nr=%i',
                        int(meta['hash']), self.hash_mask, self.shard_nr)
         if (int(meta['hash']) & self.hash_mask) != self.shard_nr:
             self.log.debug('shard.process_event: not my event')
             return
     self._process_event(ev, sql_queue_func, arg)
Example #26
0
 def process_event(self, ev, sql_queue_func, arg):
     """Filter event by hash in extra3, apply only local part."""
     if ev.extra3:
         meta = skytools.db_urldecode(ev.extra3)
         self.log.debug('part.process_event: hash=%d, max_part=%s, local_part=%d' %\
                        (int(meta['hash']), self.max_part, self.local_part))
         if (int(meta['hash']) & self.max_part) != self.local_part:
             self.log.debug('part.process_event: not my event')
             return
     self.log.debug('part.process_event: my event, processing')
     TableHandler.process_event(self, ev, sql_queue_func, arg)
Example #27
0
 def process_event(self, ev, sql_queue_func, arg):
     """Filter event by hash in extra3, apply only local part."""
     if ev.extra3:
         meta = skytools.db_urldecode(ev.extra3)
         self.log.debug('part.process_event: hash=%d, max_part=%s, local_part=%d',
                        int(meta['hash']), self.max_part, self.local_part)
         if (int(meta['hash']) & self.max_part) != self.local_part:
             self.log.debug('part.process_event: not my event')
             return
     self.log.debug('part.process_event: my event, processing')
     TableHandler.process_event(self, ev, sql_queue_func, arg)
Example #28
0
    def add_event(self, ev):
        """Store new event."""

        # op & data
        ev.op = ev.ev_type[0]
        ev.data = skytools.db_urldecode(ev.ev_data)

        # get pkey column names
        if self.pkey_str is None:
            if len(ev.ev_type) > 2:
                self.pkey_str = ev.ev_type.split(':')[1]
            else:
                self.pkey_str = ev.ev_extra2

            if self.pkey_str:
                self.pkey_list = self.pkey_str.split(',')

        # get pkey value
        if self.pkey_str:
            pk_data = []
            for k in self.pkey_list:
                pk_data.append(ev.data[k])
            ev.pk_data = tuple(pk_data)
        elif ev.op == 'I':
            # fake pkey, just to get them spread out
            ev.pk_data = ev.id
        else:
            raise Exception('non-pk tables not supported: %s' % self.name)

        # get full column list, detect added columns
        if not self.col_list:
            self.col_list = ev.data.keys()
        elif self.col_list != ev.data.keys():
            # ^ supposedly python guarantees same order in keys()

            # find new columns
            for c in ev.data.keys():
                if c not in self.col_list:
                    for oldev in self.ev_list:
                        oldev.data[c] = None
            self.col_list = ev.data.keys()

        # add to list
        self.ev_list.append(ev)

        # keep all versions of row data
        if ev.pk_data in self.pkey_map:
            self.pkey_map[ev.pk_data].append(ev)
        else:
            self.pkey_map[ev.pk_data] = [ev]
Example #29
0
    def __init__(self, queue_name, nst):
        self.node_type = nst['node_type']
        self.node_name = nst['node_name']
        self.local_watermark = nst['local_watermark']
        self.global_watermark = nst['global_watermark']

        self.node_attrs = {}
        attrs = nst.get('node_attrs', '')
        if attrs:
            self.node_attrs = skytools.db_urldecode(attrs)

        ntype = nst['node_type']
        ctype = nst['combined_type']
        if ntype == 'root':
            self.global_wm_event = 1
            self.local_wm_publish = 0
        elif ntype == 'branch':
            self.target_queue = queue_name
            self.process_batch = 1
            self.process_events = 1
            self.copy_events = 1
            self.process_tick_event = 1
            self.keep_event_ids = 1
            self.create_tick = 1
            if 'sync_watermark' in self.node_attrs:
                slist = self.node_attrs['sync_watermark']
                self.sync_watermark = 1
                self.wm_sync_nodes = slist.split(',')
            else:
                self.process_global_wm = 1
        elif ntype == 'leaf' and not ctype:
            self.process_batch = 1
            self.process_events = 1
        elif ntype == 'leaf' and ctype:
            self.target_queue = nst['combined_queue']
            if ctype == 'root':
                self.process_batch = 1
                self.process_events = 1
                self.copy_events = 1
                self.filtered_copy = 1
                self.send_tick_event = 1
            elif ctype == 'branch':
                self.process_batch = 1
                self.wait_behind = 1
            else:
                raise Exception('invalid state 1')
        else:
            raise Exception('invalid state 2')
        if ctype and ntype != 'leaf':
            raise Exception('invalid state 3')
Example #30
0
    def __init__(self, context, global_dict = None):
        """ This object must be initiated in the beginning of each db service
        """
        DBService.__init__(self, context, global_dict)

        rec = skytools.db_urldecode(context)
        if "username" not in rec:
            plpy.error("Username must be provided in db service context parameter")
        self.username = rec['username']     # used for logging purposes

        res = plpy.execute("select txid_current() as txid;")
        row = res[0]
        self.version = row["txid"]
        self.rows_found = 0                 # Flag set by run query to inicate number of rows got
Example #31
0
    def __init__(self, queue_name, nst):
        self.node_type = nst['node_type']
        self.node_name = nst['node_name']
        self.local_watermark = nst['local_watermark']
        self.global_watermark = nst['global_watermark']

        self.node_attrs = {}
        attrs = nst.get('node_attrs', '')
        if attrs:
            self.node_attrs = skytools.db_urldecode(attrs)

        ntype = nst['node_type']
        ctype = nst['combined_type']
        if ntype == 'root':
            self.global_wm_event = 1
            self.local_wm_publish = 0
        elif ntype == 'branch':
            self.target_queue = queue_name
            self.process_batch = 1
            self.process_events = 1
            self.copy_events = 1
            self.process_tick_event = 1
            self.keep_event_ids = 1
            self.create_tick = 1
            if 'sync_watermark' in self.node_attrs:
                slist = self.node_attrs['sync_watermark']
                self.sync_watermark = 1
                self.wm_sync_nodes = slist.split(',')
            else:
                self.process_global_wm = 1
        elif ntype == 'leaf' and not ctype:
            self.process_batch = 1
            self.process_events = 1
        elif ntype == 'leaf' and ctype:
            self.target_queue = nst['combined_queue']
            if ctype == 'root':
                self.process_batch = 1
                self.process_events = 1
                self.copy_events = 1
                self.filtered_copy = 1
                self.send_tick_event = 1
            elif ctype == 'branch':
                self.process_batch = 1
                self.wait_behind = 1
            else:
                raise Exception('invalid state 1')
        else:
            raise Exception('invalid state 2')
        if ctype and ntype != 'leaf':
            raise Exception('invalid state 3')
Example #32
0
def _parse_handler(hstr):
    """Parse result of create_handler_string()."""
    args = {}
    name = hstr
    pos = hstr.find('(')
    if pos > 0:
        name = hstr[ : pos]
        if hstr[-1] != ')':
            raise Exception('invalid handler format: %s' % hstr)
        astr = hstr[pos + 1 : -1]
        if astr:
            astr = astr.replace(',', '&')
            args = skytools.db_urldecode(astr)
    return (name, args)
Example #33
0
    def add_event(self, ev):
        """Store new event."""

        # op & data
        ev.op = ev.ev_type[0]
        ev.data = skytools.db_urldecode(ev.ev_data)

        # get pkey column names
        if self.pkey_str is None:
            if len(ev.ev_type) > 2:
                self.pkey_str = ev.ev_type.split(':')[1]
            else:
                self.pkey_str = ev.ev_extra2

            if self.pkey_str:
                self.pkey_list = self.pkey_str.split(',')

        # get pkey value
        if self.pkey_str:
            pk_data = []
            for k in self.pkey_list:
                pk_data.append(ev.data[k])
            ev.pk_data = tuple(pk_data)
        elif ev.op == 'I':
            # fake pkey, just to get them spread out
            ev.pk_data = ev.id
        else:
            raise Exception('non-pk tables not supported: %s' % self.name)

        # get full column list, detect added columns
        if not self.col_list:
            self.col_list = ev.data.keys()
        elif self.col_list != ev.data.keys():
            # ^ supposedly python guarantees same order in keys()

            # find new columns
            for c in ev.data.keys():
                if c not in self.col_list:
                    for oldev in self.ev_list:
                        oldev.data[c] = None
            self.col_list = ev.data.keys()

        # add to list
        self.ev_list.append(ev)

        # keep all versions of row data
        if ev.pk_data in self.pkey_map:
            self.pkey_map[ev.pk_data].append(ev)
        else:
            self.pkey_map[ev.pk_data] = [ev]
Example #34
0
 def __init__(self, context, global_dict = None):
     """ This object must be initiated in the beginning of each db service
     """
     rec = skytools.db_urldecode(context)
     self._context = context             # used to run dbservice in retval
     self.global_dict = global_dict      # used for cacheing query plans
     self._retval = []                   # used to collect return resultsets
     self._is_test = 'is_test' in rec    # used to convert output into human readable form
     
     self.sqls = None                    # if sqls stays None then no recording of sqls is done 
     if "show_sql" in rec:               # api must add exected sql to resultset
         self.sqls = []                  # sql's executed by dbservice, used for dubugging
     
     self.can_save = True                # used to keep value most severe error found so far
     self.messages = []                  # used to hold list of messages to be returned to the user
Example #35
0
    def loaded_state(self, row):
        """Update object with info from db."""

        self.log.debug("loaded_state: %s: %s / %s" % (
                       self.name, row['merge_state'], row['custom_snapshot']))
        self.change_snapshot(row['custom_snapshot'], 0)
        self.state = self.parse_state(row['merge_state'])
        self.changed = 0
        if row['table_attrs']:
            self.table_attrs = skytools.db_urldecode(row['table_attrs'])
        else:
            self.table_attrs = {}
        self.copy_role = row['copy_role']
        self.dropped_ddl = row['dropped_ddl']
        if row['merge_state'] == "?":
            self.changed = 1
    def add(self, curs, ev, batch_info):
        data = skytools.db_urldecode(ev.data)
        op, pkeys = ev.type.split(':', 1)
        pkey_list = pkeys.split(',')
        if self.split:
            dst = self.split_format(ev, data, batch_info)
            if dst not in self.part_map:
                self.check_part(curs, dst, pkey_list)
        else:
            dst = self.table_name

        if dst not in self.part_map:
            self.part_map[dst] = self.rowhandler(dst, self.table_name, self.log)

        p = self.part_map[dst]
        p.add_row(op, data, pkey_list)
Example #37
0
def get_record(arg):
    """ Parse data for one urlencoded record.
        Useful for turning incoming serialized data into structure usable for manipulation.
    """
    if not arg:
        return dbdict()

    # allow array of single record
    if arg[0] in ('{', '['):
        lst = skytools.parse_pgarray(arg)
        if len(lst) != 1:
            raise ValueError('get_record() expects exactly 1 row, got %d' % len(lst))
        arg = lst[0]

    # parse record
    return dbdict(skytools.db_urldecode(arg))
Example #38
0
    def dispatch(self, dst_db, ev_list):
        """Generic dispatcher."""

        # load data
        tables = {}
        for ev in ev_list:
            row = skytools.db_urldecode(ev.data)

            # guess dest table
            if self.part_field:
                if self.part_field == "_EVTIME":
                    partval = str(ev.creation_date)
                else:
                    partval = str(row[self.part_field])
                partval = partval.split(' ')[0]
                date = partval.split('-')
                if self.part_method == 'monthly':
                    date = date[:2]
                suffix = '_'.join(date)
                tbl = "%s_%s" % (self.dest_table, suffix)
            else:
                tbl = self.dest_table

            # map fields
            if self.field_map is None:
                dstrow = row
            else:
                dstrow = {}
                for k, v in self.field_map.items():
                    dstrow[v] = row[k]

            # add row into table
            if not tbl in tables:
                tables[tbl] = [dstrow]
            else:
                tables[tbl].append(dstrow)

            ev.tag_done()

        # create tables if needed
        self.check_tables(dst_db, tables)

        # insert into data tables
        curs = dst_db.cursor()
        for tbl, tbl_rows in tables.items():
            skytools.magic_insert(curs, tbl, tbl_rows)
Example #39
0
    def parse_row_data(self, ev):
        """Extract row data from event, with optional encoding fixes.
        
        Returns either string (sql event) or dict (urlenc event).
        """

        if len(ev.type) == 1:
            if not self.allow_sql_event:
                raise Exception('SQL events not suppoted by this handler')
            if self.enc:
                return self.enc.validate_string(ev.data, self.table_name)
            return ev.data
        else:
            row = skytools.db_urldecode(ev.data)
            if self.enc:
                return self.enc.validate_dict(row, self.table_name)
            return row
Example #40
0
    def parse_row_data(self, ev):
        """Extract row data from event, with optional encoding fixes.
        
        Returns either string (sql event) or dict (urlenc event).
        """

        if len(ev.type) == 1:
            if not self.allow_sql_event:
                raise Exception('SQL events not suppoted by this handler')
            if self.enc:
                return self.enc.validate_string(ev.data, self.table_name)
            return ev.data
        else:
            row = skytools.db_urldecode(ev.data)
            if self.enc:
                return self.enc.validate_dict(row, self.table_name)
            return row
Example #41
0
    def cmd_resync(self, *args):
        """Reload data from provider node."""
        db = self.get_database('db')
        args = self.expand_arg_list(db, 'r', True, args)

        if not self.options.find_copy_node:
            self.load_local_info()
            src_db = self.get_provider_db()
            src_curs = src_db.cursor()
            src_tbls = self.fetch_set_tables(src_curs)
            src_db.commit()

            problems = 0
            for tbl in args:
                tbl = skytools.fq_name(tbl)
                if tbl not in src_tbls or not src_tbls[tbl]['local']:
                    self.log.error(
                        "Table %s does not exist on provider, need to switch to different provider",
                        tbl)
                    problems += 1
            if problems > 0:
                self.log.error("Problems, cancelling operation")
                sys.exit(1)

        if self.options.find_copy_node or self.options.copy_node:
            q = "select table_name, table_attrs from londiste.get_table_list(%s) where local"
            cur = db.cursor()
            cur.execute(q, [self.set_name])
            for row in cur.fetchall():
                if row['table_name'] not in args:
                    continue
                attrs = skytools.db_urldecode(row['table_attrs'] or '')

                if self.options.find_copy_node:
                    attrs['copy_node'] = '?'
                elif self.options.copy_node:
                    attrs['copy_node'] = self.options.copy_node

                s_attrs = skytools.db_urlencode(attrs)
                q = "select * from londiste.local_set_table_attrs(%s, %s, %s)"
                self.exec_cmd(db, q,
                              [self.set_name, row['table_name'], s_attrs])

        q = "select * from londiste.local_set_table_state(%s, %s, null, null)"
        self.exec_cmd_many(db, q, [self.set_name], args)
Example #42
0
    def loaded_state(self, row):
        """Update object with info from db."""

        self.log.debug("loaded_state: %s: %s / %s" % (self.name, row["merge_state"], row["custom_snapshot"]))
        self.change_snapshot(row["custom_snapshot"], 0)
        self.state = self.parse_state(row["merge_state"])
        self.changed = 0
        if row["table_attrs"]:
            self.table_attrs = skytools.db_urldecode(row["table_attrs"])
        else:
            self.table_attrs = {}
        self.copy_role = row["copy_role"]
        self.dropped_ddl = row["dropped_ddl"]
        if row["merge_state"] == "?":
            self.changed = 1

        hstr = self.table_attrs.get("handlers", "")
        self.plugin = parse_handler(self.name, hstr, self.log)
Example #43
0
    def cmd_change_handler(self, tbl):
        """Change handler (table_attrs) of the replicated table."""

        self.load_local_info()

        tbl = skytools.fq_name(tbl)

        db = self.get_database('db')
        curs = db.cursor()
        q = "select table_attrs, dest_table "\
            " from londiste.get_table_list(%s) "\
            " where table_name = %s and local"
        curs.execute(q, [self.set_name, tbl])
        if curs.rowcount == 0:
            self.log.error("Table %s not found on this node", tbl)
            sys.exit(1)

        attrs, dest_table = curs.fetchone()
        attrs = skytools.db_urldecode(attrs or '')
        old_handler = attrs.get('handler')

        tgargs = self.build_tgargs()
        if self.options.handler:
            new_handler = self.build_handler(tbl, tgargs, dest_table)
        else:
            new_handler = None

        if old_handler == new_handler:
            self.log.info(
                "Handler is already set to desired value, nothing done")
            sys.exit(0)

        if new_handler:
            attrs['handler'] = new_handler
        elif 'handler' in attrs:
            del attrs['handler']

        args = [self.set_name, tbl, tgargs, None]
        if attrs:
            args[3] = skytools.db_urlencode(attrs)

        q = "select * from londiste.local_change_handler(%s, %s, %s, %s)"
        self.exec_cmd(curs, q, args)
        db.commit()
    def dispatch(self, dst_db, ev_list):
        """Generic dispatcher."""

        # load data
        tables = {}
        for ev in ev_list:
            row = skytools.db_urldecode(ev.data)

            # guess dest table
            if self.part_field:
                if self.part_field == "_EVTIME":
                    partval = str(ev.creation_date)
                else:
                    partval = str(row[self.part_field])
                partval = partval.split(' ')[0]
                date = partval.split('-')
                if self.part_method == 'monthly':
                    date = date[:2]
                suffix = '_'.join(date)
                tbl = "%s_%s" % (self.dest_table, suffix)
            else:
                tbl = self.dest_table

            # map fields
            if self.field_map is None:
                dstrow = row
            else:
                dstrow = {}
                for k, v in self.field_map.items():
                    dstrow[v] = row[k]

            # add row into table
            if not tbl in tables:
                tables[tbl] = [dstrow]
            else:
                tables[tbl].append(dstrow)

        # create tables if needed
        self.check_tables(dst_db, tables)

        # insert into data tables
        curs = dst_db.cursor()
        for tbl, tbl_rows in tables.items():
            skytools.magic_insert(curs, tbl, tbl_rows)
Example #45
0
    def cmd_change_handler(self, tbl):
        """Change handler (table_attrs) of the replicated table."""

        self.load_local_info()

        tbl = skytools.fq_name(tbl)

        db = self.get_database('db')
        curs = db.cursor()
        q = "select table_attrs, dest_table "\
            " from londiste.get_table_list(%s) "\
            " where table_name = %s and local"
        curs.execute(q, [self.set_name, tbl])
        if curs.rowcount == 0:
            self.log.error("Table %s not found on this node", tbl)
            sys.exit(1)

        attrs, dest_table = curs.fetchone()
        attrs = skytools.db_urldecode(attrs or '')
        old_handler = attrs.get('handler')

        tgargs = self.build_tgargs()
        if self.options.handler:
            new_handler = self.build_handler(tbl, tgargs, dest_table)
        else:
            new_handler = None

        if old_handler == new_handler:
            self.log.info("Handler is already set to desired value, nothing done")
            sys.exit(0)

        if new_handler:
            attrs['handler'] = new_handler
        elif 'handler' in attrs:
            del attrs['handler']

        args = [self.set_name, tbl, tgargs, None]
        if attrs:
            args[3] = skytools.db_urlencode(attrs)

        q = "select * from londiste.local_change_handler(%s, %s, %s, %s)"
        self.exec_cmd(curs, q, args)
        db.commit()
Example #46
0
    def loaded_state(self, row):
        """Update object with info from db."""

        self.log.debug("loaded_state: %s: %s / %s" % (
                       self.name, row['merge_state'], row['custom_snapshot']))
        self.change_snapshot(row['custom_snapshot'], 0)
        self.state = self.parse_state(row['merge_state'])
        self.changed = 0
        if row['table_attrs']:
            self.table_attrs = skytools.db_urldecode(row['table_attrs'])
        else:
            self.table_attrs = {}
        self.copy_role = row['copy_role']
        self.dropped_ddl = row['dropped_ddl']
        if row['merge_state'] == "?":
            self.changed = 1

        hstr = row.get('handler', '')
        self.plugin = parse_handler(self.name, hstr)
Example #47
0
    def cmd_resync(self, *args):
        """Reload data from provider node."""
        db = self.get_database('db')
        args = self.expand_arg_list(db, 'r', True, args)

        if not self.options.find_copy_node:
            self.load_local_info()
            src_db = self.get_provider_db()
            src_curs = src_db.cursor()
            src_tbls = self.fetch_set_tables(src_curs)
            src_db.commit()

            problems = 0
            for tbl in args:
                tbl = skytools.fq_name(tbl)
                if tbl not in src_tbls or not src_tbls[tbl]['local']:
                    self.log.error("Table %s does not exist on provider, need to switch to different provider", tbl)
                    problems += 1
            if problems > 0:
                self.log.error("Problems, cancelling operation")
                sys.exit(1)

        if self.options.find_copy_node or self.options.copy_node:
            q = "select table_name, table_attrs from londiste.get_table_list(%s) where local"
            cur = db.cursor()
            cur.execute(q, [self.set_name])
            for row in cur.fetchall():
                if row['table_name'] not in args:
                    continue
                attrs = skytools.db_urldecode (row['table_attrs'] or '')

                if self.options.find_copy_node:
                    attrs['copy_node'] = '?'
                elif self.options.copy_node:
                    attrs['copy_node'] = self.options.copy_node

                attrs = skytools.db_urlencode (attrs)
                q = "select * from londiste.local_set_table_attrs (%s, %s, %s)"
                self.exec_cmd(db, q, [self.set_name, row['table_name'], attrs])

        q = "select * from londiste.local_set_table_state(%s, %s, null, null)"
        self.exec_cmd_many(db, q, [self.set_name], args)
Example #48
0
    def process_event(self, ev, sql_queue_func, arg):
        if len(ev.type) == 1:
            # sql event
            fqname = skytools.quote_fqident(ev.extra1)
            fmt = self.sql_command[ev.type]
            sql = fmt % (fqname, ev.data)
        else:
            # urlenc event
            pklist = ev.type[2:].split(',')
            row = skytools.db_urldecode(ev.data)
            op = ev.type[0]
            tbl = ev.extra1
            if op == 'I':
                sql = skytools.mk_insert_sql(row, tbl, pklist)
            elif op == 'U':
                sql = skytools.mk_update_sql(row, tbl, pklist)
            elif op == 'D':
                sql = skytools.mk_delete_sql(row, tbl, pklist)

        sql_queue_func(sql, arg)
    def process_remote_batch(self, db, batch_id, event_list, dst_db):
        curs = dst_db.cursor()
        for ev in event_list:
            payload = skytools.db_urldecode(ev.data)
            if payload is None:
                payload = {}
            payload['pgq.ev_type'] = ev.type
            payload['pgq.ev_data'] = ev.data
            payload['pgq.ev_id'] = ev.id
            payload['pgq.ev_time'] = ev.time
            payload['pgq.ev_extra1'] = ev.extra1
            payload['pgq.ev_extra2'] = ev.extra2
            payload['pgq.ev_extra3'] = ev.extra3
            payload['pgq.ev_extra4'] = ev.extra4

            self.log.debug(self.dst_query % payload)
            curs.execute(self.dst_query, payload)
            try:
                res = curs.fetchone()
                self.log.debug(res)
            except:
                pass
Example #50
0
    def __init__(self, queue_name, row, main_worker=True, node_name=None):
        self.queue_name = queue_name
        self.member_map = {}
        self.main_worker = main_worker

        self.parent = None
        self.consumer_map = {}
        self.queue_info = {}
        self._info_lines = []
        self.cascaded_consumer_map = {}

        self._row = row

        if not row:
            self.name = node_name
            self.type = 'dead'
            return

        self.name = row['node_name']
        self.type = row['node_type']
        self.global_watermark = row['global_watermark']
        self.local_watermark = row['local_watermark']
        self.completed_tick = row['worker_last_tick']
        self.provider_node = row['provider_node']
        self.provider_location = row['provider_location']
        self.consumer_name = row['worker_name']
        self.worker_name = row['worker_name']
        self.paused = row['worker_paused']
        self.uptodate = row['worker_uptodate']
        self.combined_queue = row['combined_queue']
        self.combined_type = row['combined_type']
        self.last_tick = row['worker_last_tick']

        self.node_attrs = {}
        if 'node_attrs' in row:
            a = row['node_attrs']
            if a:
                self.node_attrs = skytools.db_urldecode(a)
 def process_batch(self, res, mcur, bres):
     """ Process events in autocommit mode reading results back and trying to make some sense out of them
     """
     try:
         count = 0
         item = bres.copy()
         for i in res:  # for each row in read query result
             item.update(i)
             mcur.execute(self.sql_modify, item)
             self.log.debug(mcur.query)
             if mcur.statusmessage.startswith(
                     'SELECT'
             ):  # if select was used we can expect some result
                 mres = mcur.fetchall()
                 for r in mres:
                     if 'stats' in r:  # if specially handled column 'stats' is present
                         for k, v in skytools.db_urldecode(r['stats']
                                                           or '').items():
                             self.stat_increase(k, int(v))
                     self.log.debug(r)
             else:
                 self.stat_increase('processed', mcur.rowcount)
                 self.log.debug(mcur.statusmessage)
             if 'cnt' in item:
                 count += item['cnt']
                 self.stat_increase("count", item['cnt'])
             else:
                 count += 1
                 self.stat_increase("count")
             if self.last_sigint:
                 break
         return count, item
     except:  # process has crashed, run sql_crash and re-raise the exception
         if self.sql_crash:
             dbc = self.get_database("dbcrash", autocommit=1)
             ccur = dbc.cursor()
             ccur.execute(self.sql_crash, item)
         raise