コード例 #1
0
ファイル: dispatch.py プロジェクト: ssinger/skytools
 def bulk_insert(self, curs, data, table = None):
     """Copy data to table. If table not provided, use temp table
     """
     if not data:
         return
     _table = table or self.temp
     self.log.debug("bulk: COPY %d rows into %s" % (len(data), _table))
     if not table:
         self.create_temp(curs)
     skytools.magic_insert(curs, _table, data, self.fields)
     if not table:
         self.analyze(curs)
コード例 #2
0
ファイル: dispatch.py プロジェクト: ssinger/skytools
 def bulk_insert(self, curs, data, table=None):
     """Copy data to table. If table not provided, use temp table
     """
     if not data:
         return
     _table = table or self.temp
     self.log.debug("bulk: COPY %d rows into %s" % (len(data), _table))
     if not table:
         self.create_temp(curs)
     skytools.magic_insert(curs, _table, data, self.fields)
     if not table:
         self.analyze(curs)
コード例 #3
0
    def dispatch(self, dst_db, ev_list):
        """Generic dispatcher."""

        # load data
        tables = {}
        for ev in ev_list:
            row = skytools.db_urldecode(ev.data)

            # guess dest table
            if self.part_field:
                if self.part_field == "_EVTIME":
                    partval = str(ev.creation_date)
                else:
                    partval = str(row[self.part_field])
                partval = partval.split(' ')[0]
                date = partval.split('-')
                if self.part_method == 'monthly':
                    date = date[:2]
                suffix = '_'.join(date)
                tbl = "%s_%s" % (self.dest_table, suffix)
            else:
                tbl = self.dest_table

            # map fields
            if self.field_map is None:
                dstrow = row
            else:
                dstrow = {}
                for k, v in self.field_map.items():
                    dstrow[v] = row[k]

            # add row into table
            if not tbl in tables:
                tables[tbl] = [dstrow]
            else:
                tables[tbl].append(dstrow)

            ev.tag_done()

        # create tables if needed
        self.check_tables(dst_db, tables)

        # insert into data tables
        curs = dst_db.cursor()
        for tbl, tbl_rows in tables.items():
            skytools.magic_insert(curs, tbl, tbl_rows)
コード例 #4
0
ファイル: dispatch.py プロジェクト: kevpie/skytools
 def bulk_insert(self, curs, data, table=None):
     """Copy data to table. If table not provided, use temp table.
     When re-using existing temp table, it is always truncated first and
     analyzed after copy.
     """
     if not data:
         return
     _use_temp = table is None
     # if table not specified use temp
     if _use_temp:
         table = self.temp
         # truncate when re-using existing table
         if not self.create_temp(curs):
             self.truncate(curs)
     self.log.debug("bulk: COPY %d rows into %s" % (len(data), table))
     skytools.magic_insert(curs, table, data, self.fields, quoted_table=True)
     if _use_temp:
         self.analyze(curs)
コード例 #5
0
    def dispatch(self, dst_db, ev_list):
        """Generic dispatcher."""

        # load data
        tables = {}
        for ev in ev_list:
            row = skytools.db_urldecode(ev.data)

            # guess dest table
            if self.part_field:
                if self.part_field == "_EVTIME":
                    partval = str(ev.creation_date)
                else:
                    partval = str(row[self.part_field])
                partval = partval.split(' ')[0]
                date = partval.split('-')
                if self.part_method == 'monthly':
                    date = date[:2]
                suffix = '_'.join(date)
                tbl = "%s_%s" % (self.dest_table, suffix)
            else:
                tbl = self.dest_table

            # map fields
            if self.field_map is None:
                dstrow = row
            else:
                dstrow = {}
                for k, v in self.field_map.items():
                    dstrow[v] = row[k]

            # add row into table
            if not tbl in tables:
                tables[tbl] = [dstrow]
            else:
                tables[tbl].append(dstrow)

        # create tables if needed
        self.check_tables(dst_db, tables)

        # insert into data tables
        curs = dst_db.cursor()
        for tbl, tbl_rows in tables.items():
            skytools.magic_insert(curs, tbl, tbl_rows)
コード例 #6
0
 def bulk_insert(self, curs, data, table=None):
     """Copy data to table. If table not provided, use temp table.
     When re-using existing temp table, it is always truncated first and
     analyzed after copy.
     """
     if not data:
         return
     _use_temp = table is None
     # if table not specified use temp
     if _use_temp:
         table = self.temp
         # truncate when re-using existing table
         if not self.create_temp(curs):
             self.truncate(curs)
     self.log.debug("bulk: COPY %d rows into %s", len(data), table)
     skytools.magic_insert(curs, table, data, self.fields,
                           quoted_table=True)
     if _use_temp and self.run_analyze:
         self.analyze(curs)
コード例 #7
0
 def apply_rows(self, curs):
     fields = self.get_fields()
     skytools.magic_insert(curs, self.table_name, self.rows, fields)
コード例 #8
0
    def process_one_table(self, dst_db, tbl, cache):

        del_list = cache.final_del_list
        ins_list = cache.final_ins_list
        upd_list = cache.final_upd_list
        col_list = cache.col_list
        real_update_count = len(upd_list)

        self.log.debug("process_one_table: %s  (I/U/D = %d/%d/%d)" %
                       (tbl, len(ins_list), len(upd_list), len(del_list)))

        if tbl in self.remap_tables:
            old = tbl
            tbl = self.remap_tables[tbl]
            self.log.debug("Redirect %s to %s" % (old, tbl))

        # hack to unbroke stuff
        if self.load_method == METH_MERGED:
            upd_list += ins_list
            ins_list = []

        # check if interesting table
        curs = dst_db.cursor()
        if not skytools.exists_table(curs, tbl):
            self.log.warning("Ignoring events for table: %s" % tbl)
            return

        # fetch distribution fields
        dist_fields = find_dist_fields(curs, tbl)
        extra_fields = []
        for fld in dist_fields:
            if fld not in cache.pkey_list:
                extra_fields.append(fld)
        self.log.debug("PKey fields: %s  Extra fields: %s" %
                       (",".join(cache.pkey_list), ",".join(extra_fields)))

        # create temp table
        temp = self.create_temp_table(curs, tbl)

        # where expr must have pkey and dist fields
        klist = []
        for pk in cache.pkey_list + extra_fields:
            exp = "%s.%s = %s.%s" % (quote_fqident(tbl), quote_ident(pk),
                                     quote_fqident(temp), quote_ident(pk))
            klist.append(exp)
        whe_expr = " and ".join(klist)

        # create del sql
        del_sql = "delete from only %s using %s where %s" % (
            quote_fqident(tbl), quote_fqident(temp), whe_expr)

        # create update sql
        slist = []
        key_fields = cache.pkey_list + extra_fields
        for col in cache.col_list:
            if col not in key_fields:
                exp = "%s = %s.%s" % (quote_ident(col), quote_fqident(temp),
                                      quote_ident(col))
                slist.append(exp)
        upd_sql = "update only %s set %s from %s where %s" % (quote_fqident(
            tbl), ", ".join(slist), quote_fqident(temp), whe_expr)

        # insert sql
        colstr = ",".join([quote_ident(c) for c in cache.col_list])
        ins_sql = "insert into %s (%s) select %s from %s" % (
            quote_fqident(tbl), colstr, colstr, quote_fqident(temp))

        # process deleted rows
        if len(del_list) > 0:
            self.log.info("Deleting %d rows from %s" % (len(del_list), tbl))
            # delete old rows
            q = "truncate %s" % quote_fqident(temp)
            self.log.debug(q)
            curs.execute(q)
            # copy rows
            self.log.debug("COPY %d rows into %s" % (len(del_list), temp))
            skytools.magic_insert(curs, temp, del_list, col_list)
            # delete rows
            self.log.debug(del_sql)
            curs.execute(del_sql)
            self.log.debug("%s - %d" % (curs.statusmessage, curs.rowcount))
            self.log.debug(curs.statusmessage)
            if len(del_list) != curs.rowcount:
                self.log.warning("Delete mismatch: expected=%s updated=%d" %
                                 (len(del_list), curs.rowcount))

        # process updated rows
        if len(upd_list) > 0:
            self.log.info("Updating %d rows in %s" % (len(upd_list), tbl))
            # delete old rows
            q = "truncate %s" % quote_fqident(temp)
            self.log.debug(q)
            curs.execute(q)
            # copy rows
            self.log.debug("COPY %d rows into %s" % (len(upd_list), temp))
            skytools.magic_insert(curs, temp, upd_list, col_list)
            if self.load_method == METH_CORRECT:
                # update main table
                self.log.debug(upd_sql)
                curs.execute(upd_sql)
                self.log.debug(curs.statusmessage)
                # check count
                if len(upd_list) != curs.rowcount:
                    self.log.warning(
                        "Update mismatch: expected=%s updated=%d" %
                        (len(upd_list), curs.rowcount))
            else:
                # delete from main table
                self.log.debug(del_sql)
                curs.execute(del_sql)
                self.log.debug(curs.statusmessage)
                # check count
                if real_update_count != curs.rowcount:
                    self.log.warning(
                        "Update mismatch: expected=%s deleted=%d" %
                        (real_update_count, curs.rowcount))
                # insert into main table
                if AVOID_BIZGRES_BUG:
                    # copy again, into main table
                    self.log.debug("COPY %d rows into %s" %
                                   (len(upd_list), tbl))
                    skytools.magic_insert(curs, tbl, upd_list, col_list)
                else:
                    # better way, but does not work due bizgres bug
                    self.log.debug(ins_sql)
                    curs.execute(ins_sql)
                    self.log.debug(curs.statusmessage)

        # process new rows
        if len(ins_list) > 0:
            self.log.info("Inserting %d rows into %s" % (len(ins_list), tbl))
            skytools.magic_insert(curs, tbl, ins_list, col_list)

        # delete remaining rows
        if USE_LONGLIVED_TEMP_TABLES:
            q = "truncate %s" % quote_fqident(temp)
        else:
            # fscking problems with long-lived temp tables
            q = "drop table %s" % quote_fqident(temp)
        self.log.debug(q)
        curs.execute(q)
コード例 #9
0
    def bulk_flush(self, curs):
        ins_list, upd_list, del_list = self.prepare_data()

        # reorder cols, put pks first
        col_list = self.pkey_list[:]
        for k in self.col_list:
            if k not in self.pkey_list:
                col_list.append(k)

        real_update_count = len(upd_list)

        self.log.debug("bulk_flush: %s  (I/U/D = %d/%d/%d)", self.table_name,
                       len(ins_list), len(upd_list), len(del_list))

        # hack to unbroke stuff
        if self.method == METH_MERGED:
            upd_list += ins_list
            ins_list = []

        # fetch distribution fields
        if self.dist_fields is None:
            self.dist_fields = self.find_dist_fields(curs)

        key_fields = self.pkey_list[:]
        for fld in self.dist_fields:
            if fld not in key_fields:
                key_fields.append(fld)
        self.log.debug("PKey fields: %s  Dist fields: %s",
                       ",".join(self.pkey_list), ",".join(self.dist_fields))

        # create temp table
        temp, qtemp = self.create_temp_table(curs)
        tbl = self.dest_table
        qtbl = self.fq_dest_table

        # where expr must have pkey and dist fields
        klist = []
        for pk in key_fields:
            exp = "%s.%s = %s.%s" % (qtbl, quote_ident(pk), qtemp,
                                     quote_ident(pk))
            klist.append(exp)
        whe_expr = " and ".join(klist)

        # create del sql
        del_sql = "delete from only %s using %s where %s" % (qtbl, qtemp,
                                                             whe_expr)

        # create update sql
        slist = []
        for col in col_list:
            if col not in key_fields:
                exp = "%s = %s.%s" % (quote_ident(col), qtemp,
                                      quote_ident(col))
                slist.append(exp)
        upd_sql = "update only %s set %s from %s where %s" % (
            qtbl, ", ".join(slist), qtemp, whe_expr)

        # avoid updates on pk-only table
        if not slist:
            upd_list = []

        # insert sql
        colstr = ",".join([quote_ident(c) for c in col_list])
        ins_sql = "insert into %s (%s) select %s from %s" % (qtbl, colstr,
                                                             colstr, qtemp)

        temp_used = False

        # process deleted rows
        if len(del_list) > 0:
            self.log.debug("bulk: Deleting %d rows from %s", len(del_list),
                           tbl)
            # delete old rows
            q = "truncate %s" % qtemp
            self.log.debug('bulk: %s', q)
            curs.execute(q)
            # copy rows
            self.log.debug("bulk: COPY %d rows into %s", len(del_list), temp)
            skytools.magic_insert(curs,
                                  qtemp,
                                  del_list,
                                  col_list,
                                  quoted_table=1)
            # delete rows
            self.log.debug('bulk: %s', del_sql)
            curs.execute(del_sql)
            self.log.debug("bulk: %s - %d", curs.statusmessage, curs.rowcount)
            if len(del_list) != curs.rowcount:
                self.log.warning("Delete mismatch: expected=%s deleted=%d",
                                 len(del_list), curs.rowcount)
            temp_used = True

        # process updated rows
        if len(upd_list) > 0:
            self.log.debug("bulk: Updating %d rows in %s", len(upd_list), tbl)
            # delete old rows
            q = "truncate %s" % qtemp
            self.log.debug('bulk: %s', q)
            curs.execute(q)
            # copy rows
            self.log.debug("bulk: COPY %d rows into %s", len(upd_list), temp)
            skytools.magic_insert(curs,
                                  qtemp,
                                  upd_list,
                                  col_list,
                                  quoted_table=1)
            temp_used = True
            if self.method == METH_CORRECT:
                # update main table
                self.log.debug('bulk: %s', upd_sql)
                curs.execute(upd_sql)
                self.log.debug("bulk: %s - %d", curs.statusmessage,
                               curs.rowcount)
                # check count
                if len(upd_list) != curs.rowcount:
                    self.log.warning("Update mismatch: expected=%s updated=%d",
                                     len(upd_list), curs.rowcount)
            else:
                # delete from main table
                self.log.debug('bulk: %s', del_sql)
                curs.execute(del_sql)
                self.log.debug('bulk: %s', curs.statusmessage)
                # check count
                if real_update_count != curs.rowcount:
                    self.log.warning(
                        "bulk: Update mismatch: expected=%s deleted=%d",
                        real_update_count, curs.rowcount)
                # insert into main table
                if AVOID_BIZGRES_BUG:
                    # copy again, into main table
                    self.log.debug("bulk: COPY %d rows into %s", len(upd_list),
                                   tbl)
                    skytools.magic_insert(curs,
                                          qtbl,
                                          upd_list,
                                          col_list,
                                          quoted_table=1)
                else:
                    # better way, but does not work due bizgres bug
                    self.log.debug('bulk: %s', ins_sql)
                    curs.execute(ins_sql)
                    self.log.debug('bulk: %s', curs.statusmessage)

        # process new rows
        if len(ins_list) > 0:
            self.log.debug("bulk: Inserting %d rows into %s", len(ins_list),
                           tbl)
            self.log.debug("bulk: COPY %d rows into %s", len(ins_list), tbl)
            skytools.magic_insert(curs,
                                  qtbl,
                                  ins_list,
                                  col_list,
                                  quoted_table=1)

        # delete remaining rows
        if temp_used:
            if USE_LONGLIVED_TEMP_TABLES or USE_REAL_TABLE:
                q = "truncate %s" % qtemp
            else:
                # fscking problems with long-lived temp tables
                q = "drop table %s" % qtemp
            self.log.debug('bulk: %s', q)
            curs.execute(q)

        self.reset()
コード例 #10
0
ファイル: bulk.py プロジェクト: digideskio/skytools
    def bulk_flush(self, curs):
        ins_list, upd_list, del_list = self.prepare_data()

        # reorder cols, put pks first
        col_list = self.pkey_list[:]
        for k in self.col_list:
            if k not in self.pkey_list:
                col_list.append(k)

        real_update_count = len(upd_list)

        self.log.debug("bulk_flush: %s  (I/U/D = %d/%d/%d)" % (
                       self.table_name, len(ins_list), len(upd_list), len(del_list)))

        # hack to unbroke stuff
        if self.method == METH_MERGED:
            upd_list += ins_list
            ins_list = []

        # fetch distribution fields
        if self.dist_fields is None:
            self.dist_fields = self.find_dist_fields(curs)

        key_fields = self.pkey_list[:]
        for fld in self.dist_fields:
            if fld not in key_fields:
                key_fields.append(fld)
        self.log.debug("PKey fields: %s  Dist fields: %s" % (
                       ",".join(self.pkey_list), ",".join(self.dist_fields)))

        # create temp table
        temp, qtemp = self.create_temp_table(curs)
        tbl = self.table_name
        qtbl = quote_fqident(self.table_name)

        # where expr must have pkey and dist fields
        klist = []
        for pk in key_fields:
            exp = "%s.%s = %s.%s" % (qtbl, quote_ident(pk),
                                     qtemp, quote_ident(pk))
            klist.append(exp)
        whe_expr = " and ".join(klist)

        # create del sql
        del_sql = "delete from only %s using %s where %s" % (qtbl, qtemp, whe_expr)

        # create update sql
        slist = []
        for col in col_list:
            if col not in key_fields:
                exp = "%s = %s.%s" % (quote_ident(col), qtemp, quote_ident(col))
                slist.append(exp)
        upd_sql = "update only %s set %s from %s where %s" % (
                   qtbl, ", ".join(slist), qtemp, whe_expr)

        # avoid updates on pk-only table
        if not slist:
            upd_list = []

        # insert sql
        colstr = ",".join([quote_ident(c) for c in col_list])
        ins_sql = "insert into %s (%s) select %s from %s" % (
                  qtbl, colstr, colstr, qtemp)

        temp_used = False

        # process deleted rows
        if len(del_list) > 0:
            self.log.debug("bulk: Deleting %d rows from %s" % (len(del_list), tbl))
            # delete old rows
            q = "truncate %s" % qtemp
            self.log.debug('bulk: %s' % q)
            curs.execute(q)
            # copy rows
            self.log.debug("bulk: COPY %d rows into %s" % (len(del_list), temp))
            skytools.magic_insert(curs, qtemp, del_list, col_list, quoted_table=1)
            # delete rows
            self.log.debug('bulk: ' + del_sql)
            curs.execute(del_sql)
            self.log.debug("bulk: %s - %d" % (curs.statusmessage, curs.rowcount))
            if len(del_list) != curs.rowcount:
                self.log.warning("Delete mismatch: expected=%s deleted=%d"
                        % (len(del_list), curs.rowcount))
            temp_used = True

        # process updated rows
        if len(upd_list) > 0:
            self.log.debug("bulk: Updating %d rows in %s" % (len(upd_list), tbl))
            # delete old rows
            q = "truncate %s" % qtemp
            self.log.debug('bulk: ' + q)
            curs.execute(q)
            # copy rows
            self.log.debug("bulk: COPY %d rows into %s" % (len(upd_list), temp))
            skytools.magic_insert(curs, qtemp, upd_list, col_list, quoted_table=1)
            temp_used = True
            if self.method == METH_CORRECT:
                # update main table
                self.log.debug('bulk: ' + upd_sql)
                curs.execute(upd_sql)
                self.log.debug("bulk: %s - %d" % (curs.statusmessage, curs.rowcount))
                # check count
                if len(upd_list) != curs.rowcount:
                    self.log.warning("Update mismatch: expected=%s updated=%d"
                            % (len(upd_list), curs.rowcount))
            else:
                # delete from main table
                self.log.debug('bulk: ' + del_sql)
                curs.execute(del_sql)
                self.log.debug('bulk: ' + curs.statusmessage)
                # check count
                if real_update_count != curs.rowcount:
                    self.log.warning("bulk: Update mismatch: expected=%s deleted=%d"
                            % (real_update_count, curs.rowcount))
                # insert into main table
                if AVOID_BIZGRES_BUG:
                    # copy again, into main table
                    self.log.debug("bulk: COPY %d rows into %s" % (len(upd_list), tbl))
                    skytools.magic_insert(curs, qtbl, upd_list, col_list, quoted_table=1)
                else:
                    # better way, but does not work due bizgres bug
                    self.log.debug('bulk: ' + ins_sql)
                    curs.execute(ins_sql)
                    self.log.debug('bulk: ' + curs.statusmessage)

        # process new rows
        if len(ins_list) > 0:
            self.log.debug("bulk: Inserting %d rows into %s" % (len(ins_list), tbl))
            self.log.debug("bulk: COPY %d rows into %s" % (len(ins_list), tbl))
            skytools.magic_insert(curs, qtbl, ins_list, col_list, quoted_table=1)

        # delete remaining rows
        if temp_used:
            if USE_LONGLIVED_TEMP_TABLES or USE_REAL_TABLE:
                q = "truncate %s" % qtemp
            else:
                # fscking problems with long-lived temp tables
                q = "drop table %s" % qtemp
            self.log.debug('bulk: ' + q)
            curs.execute(q)

        self.reset()
コード例 #11
0
    def flush(self, curs):
        ins_list, upd_list, del_list = self.prepare_data()

        # reorder cols
        col_list = self.pkey_list[:]
        for k in self.col_list:
            if k not in self.pkey_list:
                col_list.append(k)

        real_update_count = len(upd_list)

        #self.log.debug("process_one_table: %s  (I/U/D = %d/%d/%d)" % (
        #               tbl, len(ins_list), len(upd_list), len(del_list)))

        # hack to unbroke stuff
        if LOAD_METHOD == METH_MERGED:
            upd_list += ins_list
            ins_list = []

        # fetch distribution fields
        if self.dist_fields is None:
            self.dist_fields = self.find_dist_fields(curs)

        key_fields = self.pkey_list[:]
        for fld in self.dist_fields:
            if fld not in key_fields:
                key_fields.append(fld)
        #self.log.debug("PKey fields: %s  Extra fields: %s" % (
        #               ",".join(cache.pkey_list), ",".join(extra_fields)))

        # create temp table
        temp = self.create_temp_table(curs)
        tbl = self.table_name

        # where expr must have pkey and dist fields
        klist = []
        for pk in key_fields:
            exp = "%s.%s = %s.%s" % (quote_fqident(tbl), quote_ident(pk),
                                     quote_fqident(temp), quote_ident(pk))
            klist.append(exp)
        whe_expr = " and ".join(klist)

        # create del sql
        del_sql = "delete from only %s using %s where %s" % (
            quote_fqident(tbl), quote_fqident(temp), whe_expr)

        # create update sql
        slist = []
        for col in col_list:
            if col not in key_fields:
                exp = "%s = %s.%s" % (quote_ident(col), quote_fqident(temp),
                                      quote_ident(col))
                slist.append(exp)
        upd_sql = "update only %s set %s from %s where %s" % (quote_fqident(
            tbl), ", ".join(slist), quote_fqident(temp), whe_expr)

        # insert sql
        colstr = ",".join([quote_ident(c) for c in col_list])
        ins_sql = "insert into %s (%s) select %s from %s" % (
            quote_fqident(tbl), colstr, colstr, quote_fqident(temp))

        temp_used = False

        # process deleted rows
        if len(del_list) > 0:
            #self.log.info("Deleting %d rows from %s" % (len(del_list), tbl))
            # delete old rows
            q = "truncate %s" % quote_fqident(temp)
            self.log.debug(q)
            curs.execute(q)
            # copy rows
            self.log.debug("COPY %d rows into %s" % (len(del_list), temp))
            skytools.magic_insert(curs, temp, del_list, col_list)
            # delete rows
            self.log.debug(del_sql)
            curs.execute(del_sql)
            self.log.debug("%s - %d" % (curs.statusmessage, curs.rowcount))
            if len(del_list) != curs.rowcount:
                self.log.warning("Delete mismatch: expected=%s deleted=%d" %
                                 (len(del_list), curs.rowcount))
            temp_used = True

        # process updated rows
        if len(upd_list) > 0:
            #self.log.info("Updating %d rows in %s" % (len(upd_list), tbl))
            # delete old rows
            q = "truncate %s" % quote_fqident(temp)
            self.log.debug(q)
            curs.execute(q)
            # copy rows
            self.log.debug("COPY %d rows into %s" % (len(upd_list), temp))
            skytools.magic_insert(curs, temp, upd_list, col_list)
            temp_used = True
            if LOAD_METHOD == METH_CORRECT:
                # update main table
                self.log.debug(upd_sql)
                curs.execute(upd_sql)
                self.log.debug("%s - %d" % (curs.statusmessage, curs.rowcount))
                # check count
                if len(upd_list) != curs.rowcount:
                    self.log.warning(
                        "Update mismatch: expected=%s updated=%d" %
                        (len(upd_list), curs.rowcount))
            else:
                # delete from main table
                self.log.debug(del_sql)
                curs.execute(del_sql)
                self.log.debug(curs.statusmessage)
                # check count
                if real_update_count != curs.rowcount:
                    self.log.warning(
                        "Update mismatch: expected=%s deleted=%d" %
                        (real_update_count, curs.rowcount))
                # insert into main table
                if AVOID_BIZGRES_BUG:
                    # copy again, into main table
                    self.log.debug("COPY %d rows into %s" %
                                   (len(upd_list), tbl))
                    skytools.magic_insert(curs, tbl, upd_list, col_list)
                else:
                    # better way, but does not work due bizgres bug
                    self.log.debug(ins_sql)
                    curs.execute(ins_sql)
                    self.log.debug(curs.statusmessage)

        # process new rows
        if len(ins_list) > 0:
            self.log.info("Inserting %d rows into %s" % (len(ins_list), tbl))
            skytools.magic_insert(curs, tbl, ins_list, col_list)

        # delete remaining rows
        if temp_used:
            if USE_LONGLIVED_TEMP_TABLES:
                q = "truncate %s" % quote_fqident(temp)
            else:
                # fscking problems with long-lived temp tables
                q = "drop table %s" % quote_fqident(temp)
            self.log.debug(q)
            curs.execute(q)

        self.reset()
コード例 #12
0
ファイル: bulk_loader.py プロジェクト: carriercomm/xztech
    def process_one_table(self, dst_db, tbl, cache):

        del_list = cache.final_del_list
        ins_list = cache.final_ins_list
        upd_list = cache.final_upd_list
        col_list = cache.col_list
        real_update_count = len(upd_list)

        self.log.debug("process_one_table: %s  (I/U/D = %d/%d/%d)" % (
                       tbl, len(ins_list), len(upd_list), len(del_list)))

        if tbl in self.remap_tables:
            old = tbl
            tbl = self.remap_tables[tbl]
            self.log.debug("Redirect %s to %s" % (old, tbl))

        # hack to unbroke stuff
        if self.load_method == METH_MERGED:
            upd_list += ins_list
            ins_list = []

        # check if interesting table
        curs = dst_db.cursor()
        if not skytools.exists_table(curs, tbl):
            self.log.warning("Ignoring events for table: %s" % tbl)
            return

        # fetch distribution fields
        dist_fields = find_dist_fields(curs, tbl)
        extra_fields = []
        for fld in dist_fields:
            if fld not in cache.pkey_list:
                extra_fields.append(fld)
        self.log.debug("PKey fields: %s  Extra fields: %s" % (
                       ",".join(cache.pkey_list), ",".join(extra_fields)))

        # create temp table
        temp = self.create_temp_table(curs, tbl)
        
        # where expr must have pkey and dist fields
        klist = []
        for pk in cache.pkey_list + extra_fields:
            exp = "%s.%s = %s.%s" % (quote_fqident(tbl), quote_ident(pk),
                                     quote_fqident(temp), quote_ident(pk))
            klist.append(exp)
        whe_expr = " and ".join(klist)

        # create del sql
        del_sql = "delete from only %s using %s where %s" % (
                  quote_fqident(tbl), quote_fqident(temp), whe_expr)

        # create update sql
        slist = []
        key_fields = cache.pkey_list + extra_fields
        for col in cache.col_list:
            if col not in key_fields:
                exp = "%s = %s.%s" % (quote_ident(col), quote_fqident(temp), quote_ident(col))
                slist.append(exp)
        upd_sql = "update only %s set %s from %s where %s" % (
                    quote_fqident(tbl), ", ".join(slist), quote_fqident(temp), whe_expr)

        # insert sql
        colstr = ",".join([quote_ident(c) for c in cache.col_list])
        ins_sql = "insert into %s (%s) select %s from %s" % (
                  quote_fqident(tbl), colstr, colstr, quote_fqident(temp))

        # process deleted rows
        if len(del_list) > 0:
            self.log.info("Deleting %d rows from %s" % (len(del_list), tbl))
            # delete old rows
            q = "truncate %s" % quote_fqident(temp)
            self.log.debug(q)
            curs.execute(q)
            # copy rows
            self.log.debug("COPY %d rows into %s" % (len(del_list), temp))
            skytools.magic_insert(curs, temp, del_list, col_list)
            # delete rows
            self.log.debug(del_sql)
            curs.execute(del_sql)
            self.log.debug("%s - %d" % (curs.statusmessage, curs.rowcount))
            self.log.debug(curs.statusmessage)
            if len(del_list) != curs.rowcount:
                self.log.warning("Delete mismatch: expected=%s updated=%d"
                        % (len(del_list), curs.rowcount))

        # process updated rows
        if len(upd_list) > 0:
            self.log.info("Updating %d rows in %s" % (len(upd_list), tbl))
            # delete old rows
            q = "truncate %s" % quote_fqident(temp)
            self.log.debug(q)
            curs.execute(q)
            # copy rows
            self.log.debug("COPY %d rows into %s" % (len(upd_list), temp))
            skytools.magic_insert(curs, temp, upd_list, col_list)
            if self.load_method == METH_CORRECT:
                # update main table
                self.log.debug(upd_sql)
                curs.execute(upd_sql)
                self.log.debug(curs.statusmessage)
                # check count
                if len(upd_list) != curs.rowcount:
                    self.log.warning("Update mismatch: expected=%s updated=%d"
                            % (len(upd_list), curs.rowcount))
            else:
                # delete from main table
                self.log.debug(del_sql)
                curs.execute(del_sql)
                self.log.debug(curs.statusmessage)
                # check count
                if real_update_count != curs.rowcount:
                    self.log.warning("Update mismatch: expected=%s deleted=%d"
                            % (real_update_count, curs.rowcount))
                # insert into main table
                if 0:
                    # does not work due bizgres bug
                    self.log.debug(ins_sql)
                    curs.execute(ins_sql)
                    self.log.debug(curs.statusmessage)
                else:
                    # copy again, into main table
                    self.log.debug("COPY %d rows into %s" % (len(upd_list), tbl))
                    skytools.magic_insert(curs, tbl, upd_list, col_list)

        # process new rows
        if len(ins_list) > 0:
            self.log.info("Inserting %d rows into %s" % (len(ins_list), tbl))
            skytools.magic_insert(curs, tbl, ins_list, col_list)

        # delete remaining rows
        if USE_LONGLIVED_TEMP_TABLES:
            q = "truncate %s" % quote_fqident(temp)
        else:
            # fscking problems with long-lived temp tables
            q = "drop table %s" % quote_fqident(temp)
        self.log.debug(q)
        curs.execute(q)
コード例 #13
0
ファイル: producer.py プロジェクト: gridl/python-pgq
def bulk_insert_events(curs, rows, fields, queue_name):
    q = "select pgq.current_event_table(%s)"
    curs.execute(q, [queue_name])
    tbl = curs.fetchone()[0]
    db_fields = map(_fldmap.get, fields)
    skytools.magic_insert(curs, tbl, rows, db_fields)
コード例 #14
0
ファイル: queue_loader.py プロジェクト: David-Gould/skytools
    def flush(self, curs):
        ins_list, upd_list, del_list = self.prepare_data()

        # reorder cols
        col_list = self.pkey_list[:]
        for k in self.col_list:
            if k not in self.pkey_list:
                col_list.append(k)

        real_update_count = len(upd_list)

        #self.log.debug("process_one_table: %s  (I/U/D = %d/%d/%d)",
        #               tbl, len(ins_list), len(upd_list), len(del_list))

        # hack to unbroke stuff
        if LOAD_METHOD == METH_MERGED:
            upd_list += ins_list
            ins_list = []

        # fetch distribution fields
        if self.dist_fields is None:
            self.dist_fields = self.find_dist_fields(curs)

        key_fields = self.pkey_list[:]
        for fld in self.dist_fields:
            if fld not in key_fields:
                key_fields.append(fld)
        #self.log.debug("PKey fields: %s  Extra fields: %s",
        #               ",".join(cache.pkey_list), ",".join(extra_fields))

        # create temp table
        temp = self.create_temp_table(curs)
        tbl = self.table_name

        # where expr must have pkey and dist fields
        klist = []
        for pk in key_fields:
            exp = "%s.%s = %s.%s" % (quote_fqident(tbl), quote_ident(pk),
                                     quote_fqident(temp), quote_ident(pk))
            klist.append(exp)
        whe_expr = " and ".join(klist)

        # create del sql
        del_sql = "delete from only %s using %s where %s" % (
                  quote_fqident(tbl), quote_fqident(temp), whe_expr)

        # create update sql
        slist = []
        for col in col_list:
            if col not in key_fields:
                exp = "%s = %s.%s" % (quote_ident(col), quote_fqident(temp), quote_ident(col))
                slist.append(exp)
        upd_sql = "update only %s set %s from %s where %s" % (
                    quote_fqident(tbl), ", ".join(slist), quote_fqident(temp), whe_expr)

        # insert sql
        colstr = ",".join([quote_ident(c) for c in col_list])
        ins_sql = "insert into %s (%s) select %s from %s" % (
                  quote_fqident(tbl), colstr, colstr, quote_fqident(temp))

        temp_used = False

        # process deleted rows
        if len(del_list) > 0:
            #self.log.info("Deleting %d rows from %s", len(del_list), tbl)
            # delete old rows
            q = "truncate %s" % quote_fqident(temp)
            self.log.debug(q)
            curs.execute(q)
            # copy rows
            self.log.debug("COPY %d rows into %s", len(del_list), temp)
            skytools.magic_insert(curs, temp, del_list, col_list)
            # delete rows
            self.log.debug(del_sql)
            curs.execute(del_sql)
            self.log.debug("%s - %d", curs.statusmessage, curs.rowcount)
            if len(del_list) != curs.rowcount:
                self.log.warning("Delete mismatch: expected=%d deleted=%d",
                                 len(del_list), curs.rowcount)
            temp_used = True

        # process updated rows
        if len(upd_list) > 0:
            #self.log.info("Updating %d rows in %s", len(upd_list), tbl)
            # delete old rows
            q = "truncate %s" % quote_fqident(temp)
            self.log.debug(q)
            curs.execute(q)
            # copy rows
            self.log.debug("COPY %d rows into %s", len(upd_list), temp)
            skytools.magic_insert(curs, temp, upd_list, col_list)
            temp_used = True
            if LOAD_METHOD == METH_CORRECT:
                # update main table
                self.log.debug(upd_sql)
                curs.execute(upd_sql)
                self.log.debug("%s - %d", curs.statusmessage, curs.rowcount)
                # check count
                if len(upd_list) != curs.rowcount:
                    self.log.warning("Update mismatch: expected=%d updated=%d",
                                     len(upd_list), curs.rowcount)
            else:
                # delete from main table
                self.log.debug(del_sql)
                curs.execute(del_sql)
                self.log.debug(curs.statusmessage)
                # check count
                if real_update_count != curs.rowcount:
                    self.log.warning("Update mismatch: expected=%d deleted=%d",
                                     real_update_count, curs.rowcount)
                # insert into main table
                if AVOID_BIZGRES_BUG:
                    # copy again, into main table
                    self.log.debug("COPY %d rows into %s", len(upd_list), tbl)
                    skytools.magic_insert(curs, tbl, upd_list, col_list)
                else:
                    # better way, but does not work due bizgres bug
                    self.log.debug(ins_sql)
                    curs.execute(ins_sql)
                    self.log.debug(curs.statusmessage)

        # process new rows
        if len(ins_list) > 0:
            self.log.info("Inserting %d rows into %s", len(ins_list), tbl)
            skytools.magic_insert(curs, tbl, ins_list, col_list)

        # delete remaining rows
        if temp_used:
            if USE_LONGLIVED_TEMP_TABLES:
                q = "truncate %s" % quote_fqident(temp)
            else:
                # fscking problems with long-lived temp tables
                q = "drop table %s" % quote_fqident(temp)
            self.log.debug(q)
            curs.execute(q)

        self.reset()
コード例 #15
0
ファイル: producer.py プロジェクト: David-Gould/skytools
def bulk_insert_events(curs, rows, fields, queue_name):
    q = "select pgq.current_event_table(%s)"
    curs.execute(q, [queue_name])
    tbl = curs.fetchone()[0]
    db_fields = map(_fldmap.get, fields)
    skytools.magic_insert(curs, tbl, rows, db_fields)