Exemple #1
0
 def exec_dml(self, dml, trans, commit=False):
     #log.debug(dml)
     try:
         out = trans.cur.execute(dml).rowcount
         if commit:
             trans.conn.commit()
             pfmt([[dml]], ['DML'], 'Commited'.upper())
         return out
     except:
         trans.conn.rollback()
         pfmt([[dml]], ['DML'], 'Rolled back'.upper())
         raise
Exemple #2
0
    def MoveSnapFolder(self, okdir):

        dest_folder = self.get_dest_folder(okdir)
        snap_ok_file = okdir + '.ok'
        dest_ok_file = dest_folder + '.ok'
        status_file = dest_folder + '.' + self.status_dir
        cdt = datetime.now()
        cd = cdt.strftime('%Y-%m-%d %H:%M:%S')

        pfmt([[okdir, dest_folder], [snap_ok_file, dest_ok_file]],
             ['From', 'To'], 'Backup')
        assert os.path.isdir(okdir)
        assert os.path.isfile(snap_ok_file)

        assert not os.path.isdir(dest_folder)

        assert not os.path.isfile(dest_ok_file)
        if 1:
            shutil.move(okdir, dest_folder)
            shutil.move(snap_ok_file, dest_ok_file)

            with open(status_file, 'w') as fw:
                fw.write(cd)
                fw.close()
Exemple #3
0
def run():
    skip = 2
    serviceName = 'gfin'
    #deleted = {}

    #loaded  = {}
    #not_loaded  = {}
    #masterTbl = 'gtxMasterPKData'
    #do_not_delete = ['TxFinancingRateHist', masterTbl]
    do_not_load = ['TxFinancingRate',
                   'TxFinancingRateHist']  #'TxFinancingRate',
    for _source, val in cli.cfg['dump'].items():
        cli.set_source(_source)
        _src_class = list(val.keys())[0]

        DirReader = create_reader(aname=_src_class, app_init=app_init)

        if 1:
            cli.set_source(_source)
            dir_scfg = cli.get_dcfg(_src_class)
            path = cli.get_parsed(ckey='dumpDir', cfg=dir_scfg)
            ok_files = InOut(file_names=[])
            DirReader.glob_dir(path=path, out=ok_files, ext='*.ok')

            if 1:
                for _trg_class, val in cli.cfg['target'][_source].items():

                    cli.tcfg = tcfg = cli.get_tcfg(_trg_class)

                    _dbname = tcfg["targetDb"]
                    toDB = create_writer(aname=_trg_class, app_init=app_init)
                    masterTabTag = tcfg['masterTableTag']
                    masterTbl = tcfg['targetTables'][masterTabTag][
                        'table_name']
                    masterTblCol = tcfg['targetTables'][masterTabTag][
                        'column_name']
                    do_not_delete = tcfg['doNotDeleteTables'] + [masterTbl]
                    do_not_load = tcfg['doNotLoadTables']
                    to_conn = InOut()
                    toDB.begin_transaction(env=tcfg['targetDb'], out=to_conn)
                    to_conn.cur.execute('set search_path to CIGRpt')
                    if ok_files.file_names:  # Master first
                        try:

                            stmt = 'drop table %s' % masterTbl
                            to_conn.cur.execute(stmt)
                        except Exception as ex:
                            #raise
                            if not 'Table "%s" does not exist' % masterTbl in str(
                                    ex):
                                raise

                        stmt = 'create local temporary table %s ( %s bigint not null, MartModifiedDate timestamp)\n ON COMMIT PRESERVE ROWS' % (
                            masterTbl, masterTblCol)
                        pfmt([[stmt]], ['Create master temp PK'])
                        to_conn.cur.execute(stmt)
                        #e()
                    stats = {}
                    deleted = {}
                    processed = []
                    not_processed = []
                    for okfn in ok_files.file_names:
                        okFile = create_reader(aname='File',
                                               app_init=app_init,
                                               file_name=okfn,
                                               scfg=dir_scfg)
                        okdir, _ = os.path.splitext(okfn)
                        okbn = os.path.basename(okdir)
                        #e()
                        assert os.path.isdir(okdir)
                        snap_df = cli.get_dest_folder(okdir)
                        if os.path.isdir(snap_df):
                            log.warning('[%s]Destination folder exists: [%s]' %
                                        (okdir, snap_df))
                            not_processed.append(okfn)
                            continue
                        OkReader = create_reader(aname="Dir",
                                                 app_init=app_init)
                        out_files = InOut(file_names=[])

                        DirReader.glob_dir(path=okdir,
                                           out=out_files,
                                           ext='*.out')

                        apx = dict(MartModifiedDate=okFile.get_value(
                            coords=(0, 0), skip=skip))
                        ftlist = []

                        for out_fn in out_files.file_names:
                            print(out_fn)
                            ftlist.append(
                                os.path.basename(out_fn).split('.')[1])

                        pfmt([[x] for x in ftlist], ['Files->Tables'])
                        #e()
                        if 1:
                            ctables = cli.tcfg['targetTables'].keys()

                            extra_file_tables = list(
                                set(ftlist) - set(ctables))

                            pfmt([[x] for x in extra_file_tables],
                                 ['Tables not in config.'])

                            extra_config_tables = list(
                                set(ctables) - set(ftlist))

                            pfmt([[x] for x in extra_config_tables],
                                 ['Tables in config but not in file names.'])

                            assert not extra_file_tables, 'Tables %s are not listed in config["targetTables"].' % extra_file_tables

                        if 0:
                            g = raw_input("Continue?")
                        if 1:  #//create PK file

                            fromFile = create_reader(
                                aname='File',
                                app_init=app_init,
                                file_name=os.path.join(okdir,
                                                       'gfin.Instrument.out'),
                                scfg=dir_scfg)
                            toFile = create_reader(aname='File',
                                                   app_init=app_init,
                                                   file_name=os.path.join(
                                                       okdir, '%s.PK.out' %
                                                       serviceName),
                                                   scfg=dir_scfg,
                                                   parse=False)
                            rowcnt = cli.createPrimaryKeyFile(
                                ffObj=fromFile,
                                pkfn=os.path.join(okdir,
                                                  '%s.PK.out' % serviceName))

                        assert masterTabTag in ftlist, '"%s" file is missing' % masterTabTag

                        if 1:
                            stmt = 'TRUNCATE TABLE %s' % (masterTbl)
                            toDB.exec_dml(stmt, trans=to_conn, commit=False)
                            deleted[masterTbl] = -1
                            #e()
                        #e()
                        loaded = {}
                        not_loaded = {}
                        if 1:
                            pkfn = [
                                x for x in out_files.file_names
                                if os.path.basename(x).split('.')[1] in
                                [masterTabTag]
                            ][0]

                            schema = tcfg['targetSchema']
                            outFile = create_reader(aname="File",
                                                    app_init=app_init,
                                                    file_name=pkfn,
                                                    scfg=dir_scfg)
                            fmt_cols = tcfg['targetTables'][masterTabTag].get(
                                'formatColumns', [])
                            outFile.set_alt_cols()
                            toDB.load_gfin_file(trans=to_conn,
                                                file_obj=outFile,
                                                schema=schema,
                                                table_name=masterTbl,
                                                qname='insertStmt',
                                                fmt_cols=fmt_cols,
                                                cfg=(dir_scfg, tcfg),
                                                skip=skip,
                                                apx=apx,
                                                stats=stats)
                            loaded[out_fn] = masterTbl
                            #e()
                        if 1:
                            stmt = 'SELECT count(*) FROM %s t' % masterTbl
                            pkcnt = toDB.exec_query(stmt).fetchall()[0][0]

                            assert pkcnt == (rowcnt - skip)

                        for out_fn in [
                                x for x in out_files.file_names
                                if not os.path.basename(x).split('.')[1] in
                            [masterTabTag]
                        ]:
                            outFile = create_reader(aname="File",
                                                    app_init=app_init,
                                                    file_name=out_fn,
                                                    scfg=dir_scfg)
                            outCols = [
                                col[0] for col in outFile.get_header_cols()
                            ]
                            tbl = os.path.basename(out_fn).split('.')[1]
                            assert tbl

                            if tbl not in [masterTabTag] + do_not_load:

                                if tbl not in do_not_delete:
                                    stmt = 'DELETE FROM %s WHERE %s in (SELECT t.%s FROM %s t)' % (
                                        tbl, masterTblCol, masterTblCol,
                                        masterTbl)
                                    deleted[tbl] = toDB.exec_dml(stmt,
                                                                 trans=to_conn,
                                                                 commit=False)
                                    pfmt([[deleted[tbl]]],
                                         ['Deleted from %s' % tbl])
                                else:
                                    deleted[tbl] = -1

                                tblCols = toDB.get_columns(tbl).values()
                                pfmt([[x] for x in list(
                                    set(tblCols) - set(outCols) -
                                    set(['MartModifiedDate']))],
                                     ['Columns in Source, but not Target'])

                                missing_cols = list(
                                    set(outCols) - set(tblCols))
                                pfmt([(tbl, x) for x in missing_cols],
                                     ['Table', 'Missing columns'])

                                if missing_cols:
                                    to_conn.conn.rollback()

                                    schema = tcfg["targetSchema"]
                                    toDB.desc_table(schema, tbl)

                                    raise Exception(
                                        'File column %s missing in table "%s".'
                                        % (missing_cols, tbl))

                                if 1:
                                    schema = tcfg['targetSchema']
                                    fmt_cols = tcfg['targetTables'][tbl].get(
                                        'formatColumns', [])
                                    outFile.set_alt_cols()
                                    toDB.load_gfin_file(trans=to_conn,
                                                        file_obj=outFile,
                                                        schema=schema,
                                                        table_name=tbl,
                                                        qname='insertStmt',
                                                        fmt_cols=fmt_cols,
                                                        cfg=(dir_scfg, tcfg),
                                                        skip=skip,
                                                        apx=apx,
                                                        stats=stats)
                                    loaded[out_fn] = tbl
                            else:
                                not_loaded[out_fn] = tbl

                        else:
                            toDB.commit_transaction(trans=to_conn)
                            #pfmt([[k]+[deleted [k]]+list(v)[1:]  for k,v in stats.items() if deleted [k]>=0], ['Table','Deleted', 'Accepted', 'Rejected','Line count','Skip', 'Diff'],'Load completed (deleted)'.upper())
                            #pfmt([(k,v) for k, v in loaded.items()], ['Loaded Files','Loaded Tables'])
                            #pfmt([(k,v) for k, v in not_loaded.items()], ['Not loaded Files','Not loaded Tables'])
                            pfmt(
                                [[k] + [deleted[k]] + list(v.values())[1:]
                                 for k, v in stats.items() if deleted[k] >= 0],
                                [
                                    'Table', 'Deleted', 'Accepted', 'Rejected',
                                    'Line count', 'Skip', 'Diff'
                                ], 'Load completed/deleted'.upper())
                            pfmt([(k, v) for k, v in loaded.items()],
                                 ['Loaded Files', 'Loaded Tables'])
                            pfmt([(k, v) for k, v in not_loaded.items()],
                                 ['Not loaded Files', 'Not loaded Tables'])

                            assert os.path.isdir(okdir)
                            if 0:
                                cli.MoveSnapFolder(okdir)
                            processed.append(okfn)
                        #break;

                if not ok_files.file_names:
                    counter = itertools.count(1)
                    pfmt([['No OK files at working dir: [ %s ]' % cli.pa[0]]],
                         ['No files'])
                if processed:

                    counter = itertools.count(1)
                    pfmt([[next(counter), x] for x in processed],
                         ['##', 'Processed'])
                if not_processed:

                    counter = itertools.count(1)
                    pfmt([[next(counter), x] for x in not_processed],
                         ['##', 'Not processed (backup exists)'])

    if 0:
        email_args.update(dict(cli_stats=None))
        Email.send_email(**email_args)
        cli.done()
Exemple #4
0
    def load_md5(self,
                 trans,
                 file_obj,
                 table_name,
                 qname,
                 fmt_cols,
                 cfg,
                 skip=0,
                 apx=None,
                 stats=None):

        scfg, tcfg = cfg
        file_name = file_obj.file_name

        assert os.path.isfile(file_name)
        if 1:
            colsep = scfg['columnDelimiter']
            assert colsep

            lcnt = file_obj.line_count(file_name)

            if 1:
                cols = ','.join([col[0] for col in file_obj.cols])

                trans.conn.autocommit = False

                copyfmt = ',\n'.join([
                    "%s FORMAT 'hex'" %
                    col[0] if col[0] in fmt_cols else "%s" % col[0]
                    for col in file_obj.cols
                ])
                assert os.path.isfile(file_obj.file_name)
                #print (table_name, apx)
                #e()
                apxq = ',\n'.join(
                    [''] + ["%s AS %s" % (k, v)
                            for k, v in apx.items()]) if apx else ''
                stmt = """
COPY %s (%s %s) 
FROM LOCAL '%s' 
DELIMITER '|' ESCAPE AS '^' NULL '' 
SKIP %d ABORT ON ERROR NO COMMIT """ % (table_name, copyfmt, apxq,
                                        file_obj.file_name, skip)
                try:
                    psql(stmt, 'Load')
                    trans.cur.execute(stmt)

                except:
                    trans.conn.rollback()
                    pfmt([[stmt]])

                    raise

                accepted, rejected = trans.cur.execute(
                    'SELECT GET_NUM_ACCEPTED_ROWS(),GET_NUM_REJECTED_ROWS()'
                ).fetchall()[0]
                pfmt([[lcnt - skip, accepted, rejected]],
                     ['Line count', 'Accepted', 'Rejected'], 'Load stats')
                assert lcnt - skip == accepted

                out = OrderedDict()
                out['table_name'] = table_name
                out['accepted'] = accepted
                out['rejected'] = rejected
                out['linecount'] = lcnt
                out['skip'] = skip
                out['diff'] = lcnt - skip - accepted
                stats.append(out)
Exemple #5
0
    def __load_file_2(self,
                      trans,
                      file_obj,
                      table_name,
                      qname,
                      cfg,
                      create_table=False):

        scfg, tcfg = cfg
        file_name = file_obj.file_name
        #pp(file_obj.cols)
        fmt_cols = ['TxMasterGUID', 'SwapEventGUID']
        if 1:
            assert os.path.isfile(file_name)
            with open(file_name, 'r') as fh:
                colsep = scfg['columnDelimiter']
                assert colsep
                if create_table:
                    self.create_table(fh, cfg, table_name)
                else:
                    fh.readline()
                    fh.readline()
                data = []
                intdata = [[]]
                intcols = []
                for line in [x.strip() for x in fh]:

                    data.append([
                        x if x else None
                        for i, x in enumerate(line.split(colsep)[:-1])
                    ])

                if 1:
                    cols = ','.join([col[0] for col in file_obj.cols])
                    assert len(file_obj.cols) == len(data[0])

                    if 0:
                        tmpTbl = 'tmp_%s' % table_name
                        stmt = 'CREATE LOCAL TEMPORARY TABLE %s AS SELECT * FROM %s WHERE 1=2' % (
                            tmpTbl, table_name)
                        #print(stmt)
                        trans.cur.execute(stmt)
                        #e()
                    assert len(intdata[0]) == len(intcols)

                    trans.conn.autocommit = False
                    if 0:
                        stmt = "COPY %s FROM LOCAL '/home/s_dev_rdm/ab_gtx/iris.csv' DELIMITER '|'" % tmpTbl

                    copyfmt = ',\n'.join([
                        "%s FORMAT 'hex'" %
                        col[0] if col[0] in fmt_cols else "%s" % col[0]
                        for col in file_obj.cols
                    ])
                    stmt = "COPY %s (%s) FROM LOCAL'/home/s_dev_rdm/ab_gtx/iris.csv' DELIMITER '|'" % (
                        table_name, copyfmt)
                    trans.cur.execute(stmt)
                    pfmt(
                        trans.cur.execute(
                            'SELECT GET_NUM_ACCEPTED_ROWS(),GET_NUM_REJECTED_ROWS()'
                        ).fetchall(), ['Accepted', 'Rejected'], 'Load stats')

                    if fmt_cols:

                        show = []
                        for row in trans.cur.execute(
                                'select TO_HEX(%s) from %s LIMIT 5' %
                            ('), TO_HEX('.join(fmt_cols),
                             table_name)).fetchall():
                            show.append(row)
                            #print binascii.hexlify(row[0])
                        pfmt(show, fmt_cols, 'Sample')
Exemple #6
0
def run():
    skip = 1
    total_ins = 0
    term_line = True
    #//validate cols
    for _source, val in cli.cfg['source'].items():
        cli.set_source(_source)
        _src_class = list(val.keys())[0]
        cli.scfg = scfg = cli.get_scfg(_src_class)
        for _trg_class, val in cli.cfg['target'][_source].items() or []:
            cli.tcfg = tcfg = cli.get_tcfg(_trg_class)

            _dbname = tcfg["targetDb"]
            toDB = create_writer(aname=_trg_class, app_init=app_init)

            toDB.begin_transaction(env=tcfg['targetDb'], out=to_conn)
            table = '%s.%s' % (tcfg['targetSchema'], tcfg['targetTable'])
            toDB.desc_table(schema=tcfg['targetSchema'],
                            tbl=tcfg['targetTable'],
                            col_ord=False)
            #// validate cols
            cfg_cols = [x[u'columnName'] for x in cli.scfg[u'columnMappings']]
            tcols = toDB.get_cols()
            t_vs_c = set(tcols) - set(cfg_cols)
            c_vs_t = set(cfg_cols) - set(tcols)
            if t_vs_c:
                pfmtd([dict(c_vs_t=c_vs_t)],
                      'Config has columns missing in target table.')

                raise Exception(
                    'Target table has columns missing in config: %s' % t_vs_c)

            if c_vs_t:
                pfmtd([dict(t_vs_c=t_vs_c)],
                      'Target table has columns missing in config.')
                raise Exception(
                    'Config has columns missing in target table: %s' % c_vs_t)
            toDB.commit_transaction(trans=to_conn)
    #// transfer
    for _source, val in cli.cfg['source'].items():
        cli.set_source(_source)
        _src_class = list(val.keys())[0]
        cli.scfg = scfg = cli.get_scfg(_src_class)

        _dbname = cli.scfg["sourceDb"]
        fromDB = create_reader(aname=_src_class, app_init=app_init)

        fromDB.begin_transaction(env=cli.scfg['sourceDb'], out=from_conn)
        if 1:  #//Extract to Dir

            for _dmp_class, val in cli.cfg['dump'][_source].items() or []:
                FileWriter = create_writer(aname=_dmp_class, app_init=app_init)
                fromDB.set_loader(FileWriter)
                cli.dcfg = cli.get_dcfg(_dmp_class)
                for _trg_class, val in cli.cfg['target'][_source].items(
                ) or []:

                    cli.tcfg = tcfg = cli.get_tcfg(_trg_class)
                    file_ins_cnt = 0
                    FileWriter.open_file(out=dump_file)

                    for iq_data in fromDB.fetch_many(chunk_size=file_size_rows,
                                                     source=cli.scfg,
                                                     qname='sourceStmt',
                                                     out=InOut(),
                                                     skip_header=0,
                                                     terminate_line=term_line):

                        if not file_ins_cnt:
                            FileWriter.create_header(
                                file=dump_file,
                                header=fromDB.get_header(),
                                cfg=cli.dcfg,
                                terminate_line=term_line)
                        FileWriter.append_data(file=dump_file,
                                               data=iq_data,
                                               cfg=cli.dcfg)
                        file_ins_cnt += len(iq_data.data)
                    if not file_ins_cnt:  #in case there's no data
                        FileWriter.create_header(file=dump_file,
                                                 header=fromDB.get_header(),
                                                 cfg=cli.dcfg,
                                                 terminate_line=term_line)
                    FileWriter.close_file(file=dump_file)
                    total_ins += file_ins_cnt
        fromDB.desc_cur(cur=from_conn.cur, colord=False)

        fromDB.commit_transaction(trans=from_conn)
    log.info('Total records saved: %d' % total_ins)
    #// Load to IQ
    for _source, val in cli.cfg['dump'].items():
        cli.set_source(_source)
        _src_class = list(val.keys())[0]

        DirReader = create_reader(aname=_src_class, app_init=app_init)

        if 1:  #//Get the file names
            cli.set_source(_source)
            dir_scfg = cli.get_dcfg(_src_class)
            path = cli.get_parsed(ckey='dumpDir', cfg=dir_scfg)

            DirReader.glob_dir(path=path, out=data_files, ext='*.*')

        if 1:  #//Load to DB

            for _trg_class, val in cli.cfg['target'][_source].items() or []:

                cli.tcfg = tcfg = cli.get_tcfg(_trg_class)

                _dbname = tcfg["targetDb"]
                toDB = create_writer(aname=_trg_class, app_init=app_init)

                toDB.begin_transaction(env=tcfg['targetDb'], out=to_conn)

                table = '%s.%s' % (tcfg['targetSchema'], tcfg['targetTable'])
                toDB.desc_table(schema=tcfg['targetSchema'],
                                tbl=tcfg['targetTable'],
                                col_ord=None)

                #// validate cols
                cfg_cols = [
                    x[u'columnName'] for x in cli.scfg[u'columnMappings']
                ]

                acols = cli.get_alt_cols(scfg)
                tcols = toDB.get_cols()
                fcols_alt = []
                for data_file in data_files.file_names:
                    dataFile = create_reader(aname='File',
                                             app_init=app_init,
                                             file_name=data_file,
                                             scfg=dir_scfg)
                    dataFile.describe()
                    file_stats[data_file] = dataFile.line_count(
                    ) - cli.header_size(dir_scfg)
                    fcols_alt = [
                        acols.get(x.decode(), x.decode())
                        for x in dataFile.get_header(data_file, dir_scfg)
                    ]
                    f_vs_c = set(fcols_alt) - set(cfg_cols)
                    c_vs_f = set(cfg_cols) - set(fcols_alt)
                    f_vs_t = set(fcols_alt) - set(tcols)
                    t_vs_f = set(tcols) - set(fcols_alt)
                    if f_vs_c:
                        pfmtd([dict(c_vs_f=c_vs_f)],
                              'Config has columns missing in dump file.')
                        pfmtd([dict(f_vs_t=f_vs_t)],
                              'Dump file has columns missing in target table.')
                        pfmtd([dict(t_vs_f=t_vs_f)],
                              'Target table has columns missing in dump file.')
                        raise Exception(
                            'Target table has columns missing in config: %s' %
                            f_vs_c)

                    if c_vs_f:
                        pfmtd([dict(f_vs_c=f_vs_c)],
                              'Dump file has columns missing in config.')
                        pfmtd([dict(f_vs_t=f_vs_t)],
                              'Dump file has columns missing in target table.')
                        pfmtd([dict(t_vs_f=t_vs_f)],
                              'Target table has columns missing in dump file.')
                        raise Exception(
                            'Config has columns missing in target table: %s' %
                            c_vs_f)

                    if f_vs_t:
                        pfmtd([dict(f_vs_c=f_vs_c)],
                              'Dump file has columns missing in config.')
                        pfmtd([dict(c_vs_f=c_vs_f)],
                              'Config has columns missing in dump file.')
                        pfmtd([dict(t_vs_f=t_vs_f)],
                              'Target table has columns missing in dump file.')
                        raise Exception(
                            'Dump file has columns missing in target table: %s'
                            % f_vs_t)
                    if t_vs_f:
                        pfmtd([dict(f_vs_c=f_vs_c)],
                              'Dump file has columns missing in config.')
                        pfmtd([dict(c_vs_f=c_vs_f)],
                              'Config has columns missing in dump file.')
                        pfmtd([dict(f_vs_t=f_vs_t)],
                              'Dump file has columns missing in target table.')
                        raise Exception(
                            'Target table has columns missing in dump file: %s'
                            % t_vs_f)

                if 1:
                    for data_fn in [x for x in data_files.file_names]:
                        dataFile = create_reader(aname="File",
                                                 app_init=app_init,
                                                 file_name=data_fn,
                                                 scfg=dir_scfg)
                        dataFile.describe()

                        fileCols = [
                            col.decode() for col in dataFile.get_header_cols()
                        ]
                        tbl = tcfg[
                            "targetTable"]  #tcfg. os.path.basename(data_fn).split('.')[-2]
                        assert tbl
                        if 1:

                            if 0 and tbl not in do_not_delete:
                                stmt = 'DELETE FROM %s WHERE %s in (SELECT t.%s FROM %s t)' % (
                                    tbl, masterTblCol, masterTblCol, masterTbl)
                                deleted[tbl] = toDB.exec_dml(stmt,
                                                             trans=to_conn,
                                                             commit=False)
                                pfmt([[deleted[tbl]]],
                                     ['Deleted from %s' % tbl])
                            else:
                                deleted[tbl] = -1
                            if 0:
                                acols = cli.get_alt_cols(scfg)
                                dataFile.cols_alt = [
                                    acols.get(x.decode(), x.decode())
                                    for x in dataFile.cols
                                ]
                            else:
                                dataFile.set_alt_cols()

                            missing_cols = list(
                                set(dataFile.cols_alt) - set(tcols))
                            pfmt([(tbl, x) for x in missing_cols],
                                 ['Table', 'Missing columns'])
                            schema = tcfg["targetSchema"]
                            if missing_cols:
                                pfmt([[x] for x in missing_cols],
                                     ['Columns in Source, but not Target'])
                                to_conn.conn.rollback()
                                toDB.desc_table(schema, tbl)
                                raise Exception(
                                    'File column %s missing in table "%s".' %
                                    (missing_cols, tbl))

                            if 1:
                                apx = {}
                                fmt_cols = []

                                toDB.load_file(trans=to_conn,
                                               file_obj=dataFile,
                                               schema=schema,
                                               table_name=tbl,
                                               qname='insertStmt',
                                               fmt_cols=fmt_cols,
                                               cfg=(dir_scfg, tcfg),
                                               skip=skip,
                                               apx=apx,
                                               stats=stats)
                                loaded[data_fn] = tbl
                        else:
                            not_loaded[data_fn] = tbl

                    else:
                        if 1:
                            toDB.commit_transaction(trans=to_conn)
                            pfmt(
                                [[k] + [deleted[k]] + list(v)[1:]
                                 for k, v in stats.items() if deleted[k] >= 0],
                                [
                                    'Table', 'Deleted', 'Accepted', 'Rejected',
                                    'Line count', 'Skip', 'Diff'
                                ], 'Load completed (deleted)'.upper())
                            pfmt([(k, v) for k, v in loaded.items()],
                                 ['Loaded Files', 'Loaded Tables'])
                            pfmt([(k, v) for k, v in not_loaded.items()],
                                 ['Not loaded Files', 'Not loaded Tables'])

                e()
                if 0:
                    #toDB.truncate_table		( table = table )
                    toDB.bulk_load(trans=to_conn,
                                   file_names=data_files,
                                   qname='insertStmt',
                                   cfg=(dir_scfg, tcfg),
                                   out=insert_stats)

                for k in file_stats.keys():
                    assert insert_stats[k] == file_stats[
                        k], 'Insert vs file count diff: %s<>%s for file \n%s' % (
                            insert_stats[k], file_stats[k], k)
                toDB.commit_transaction(trans=to_conn)

    if 0:
        Email.send_email(**email_args)
Exemple #7
0
def run():
    skip = 2

    do_not_load = []
    for _source, val in cli.cfg['dump'].items():
        cli.set_source(_source)
        _src_class = list(val.keys())[0]

        DirReader = create_reader(aname=_src_class, app_init=app_init)

        cli.set_source(_source)
        dir_scfg = cli.get_dcfg(_src_class)
        path = cli.get_parsed(ckey='dumpDir', cfg=dir_scfg)
        ok_files = InOut(file_names=[])
        DirReader.glob_dir(path=path, out=ok_files, ext='*.ok')

        loaded = {}

        for _trg_class, val in cli.cfg['target'][_source].items():

            cli.tcfg = tcfg = cli.get_tcfg(_trg_class)

            _dbname = tcfg["targetDb"]
            toDB = create_writer(aname=_trg_class, app_init=app_init)

            do_not_delete = tcfg['doNotDeleteTables']
            do_not_load = tcfg['doNotLoadTables']
            to_conn = InOut()
            toDB.begin_transaction(env=tcfg['targetDb'], out=to_conn)
            toSchema = tcfg['targetSchema']
            stmt = 'set search_path to %s' % toSchema
            psql(stmt)
            to_conn.cur.execute(stmt)

            pkstats = {}
            for okfn in ok_files.file_names:
                okFile = create_reader(aname='File',
                                       app_init=app_init,
                                       file_name=okfn,
                                       scfg=dir_scfg)
                okdir, okname = os.path.splitext(okfn)
                okbn = os.path.basename(okdir)

                out_files = InOut(file_names=[])

                DirReader.glob_dir(path=okdir, out=out_files, ext='*.out')

                #e()
                if 1:  # Check if some there are files missing in config
                    ftlist = []

                    for out_fn in out_files.file_names:
                        print(out_fn)
                        ftlist.append(os.path.basename(out_fn).split('.')[1])

                    pfmt([[x] for x in ftlist], ['Files->Tables'])

                    ctables = cli.tcfg['targetTables'].keys()
                    extra_file_tables = list(set(ftlist) - set(ctables))
                    pfmt([[x] for x in extra_file_tables],
                         ['Tables not in config.'])
                    extra_config_tables = list(set(ctables) - set(ftlist))
                    pfmt([[x] for x in extra_config_tables],
                         ['Tables in config but not in file names.'])
                    assert not extra_file_tables, 'Tables %s are not listed in config["targetTables"].' % extra_file_tables

                for outfn in out_files.file_names:  # Master first

                    outFile = create_reader(aname='File',
                                            app_init=app_init,
                                            file_name=outfn,
                                            scfg=dir_scfg)

                    outbn = os.path.basename(outfn)
                    tbl = outbn.split('.')[1]
                    outTbl = 'tmp_PK_%s' % tbl
                    outCols = outFile.get_header_cols()
                    apxCols = [('MartModifiedDate', 'timestamp'),
                               ('AsOfFrom', 'timestamp'),
                               ('AsOfTo', 'timestamp'), ('MD5', 'char(22)')]
                    outTblCols = toDB.get_create_col_list(outCols, apx=apxCols)

                    toCols = toDB.get_col_types(toSchema, tbl)
                    pp(toCols)

                    toDB.desc_tmp_table(outTbl, outCols + apxCols)

                    do_not_delete.append(outTbl)

                    try:

                        stmt = 'drop table %s' % outTbl
                        to_conn.cur.execute(stmt)
                    except Exception as ex:
                        #raise
                        if not 'Table "%s" does not exist' % outTbl in str(ex):
                            raise
                    psql(outfn)
                    stmt = 'CREATE LOCAL TEMPORARY TABLE %s ( %s )\nON COMMIT PRESERVE ROWS' % (
                        outTbl, ', \n'.join(
                            ['%s %s' % tuple(col) for col in toCols]))
                    pfmt([[stmt]], ['Create master temp PK' + outTbl])
                    toDB.exec_ddl(stmt)
                    if 1:  #//Load data into PK table

                        fmt_cols = {}
                        mmDt = okFile.get_value(coords=(0, 0), skip=skip)

                        md5val = (base64.b64encode(
                            hashlib.md5(b'test').digest()))

                        apx = OrderedDict()
                        apx['MartModifiedDate'] = mmDt
                        apx['AsOfFrom'] = mmDt
                        apx['AsOfTo'] = "12/31/9999"
                        apx['MD5'] = ''  #//defined on row level

                        pk_outfn = '%s.pk' % outfn
                        colsep = dir_scfg['columnDelimiter']

                        with open(pk_outfn, 'wb') as pkfh:
                            with open(outfn, 'rb') as outfh:
                                line = outfh.readline().strip()
                                pkfh.write(line +
                                           colsep.join(apx.keys()).encode() +
                                           os.linesep.encode())
                                line = outfh.readline().strip()
                                apxTypes = colsep.join(
                                    [col[1] for col in apxCols])
                                pkfh.write(line + apxTypes.encode() +
                                           os.linesep.encode())
                                line = outfh.readline().strip()
                                while line:
                                    md5 = (base64.b64encode(
                                        hashlib.md5(line.replace(
                                            b'|', b'')).digest()))
                                    apx['MD5'] = md5.decode('ascii',
                                                            'ignore').strip(
                                                                '=')  #// REDO

                                    pkfh.write(
                                        line +
                                        colsep.join(apx.values()).encode() +
                                        os.linesep.encode())
                                    line = outfh.readline().strip()
                        outPkFile = create_reader(aname='File',
                                                  app_init=app_init,
                                                  file_name=pk_outfn,
                                                  scfg=dir_scfg)
                        outPkFile.set_alt_cols()

                        schema = tcfg['targetSchema']
                        toDB.load_grds_file(trans=to_conn,
                                            file_obj=outPkFile,
                                            schema=schema,
                                            table_name=outTbl,
                                            qname='insertStmt',
                                            fmt_cols=fmt_cols,
                                            cfg=(dir_scfg, tcfg),
                                            skip=skip,
                                            stats=pkstats)
                        loaded[outbn] = outTbl
                        #outPkFile.delete()

                #pfmtd([pkstats])
                #e()
            stats = {}
            deleted = {}
            processed = []
            not_processed = []
            for okfn in ok_files.file_names:
                okFile = create_reader(aname='File',
                                       app_init=app_init,
                                       file_name=okfn,
                                       scfg=dir_scfg)
                okdir, _ = os.path.splitext(okfn)
                okbn = os.path.basename(okdir)
                #e()
                assert os.path.isdir(okdir)
                snap_df = cli.get_dest_folder(okdir)
                if os.path.isdir(snap_df):
                    log.warning('[%s]Destination folder exists: [%s]' %
                                (okdir, snap_df))
                    not_processed.append(okfn)
                    continue

                out_files = InOut(file_names=[])
                DirReader.glob_dir(path=okdir, out=out_files, ext='*.out')
                apx = dict(
                    MartModifiedDate=okFile.get_value(coords=(0,
                                                              0), skip=skip))

                #e()
                if 0:
                    g = raw_input("Continue?")

                not_loaded = {}

                for table_name in ftlist:
                    tmpTbl = 'tmp_PK_%s' % table_name
                    toCols = toDB.get_tab_cols(tmpTbl)
                    #pp(toCols)
                    toDB.desc_table(None, tmpTbl)
                    toDB.desc_table(toSchema, table_name)
                    #e()
                    if table_name in ['TxnLookupMap']:

                        tmpCols = ',\n  '.join(
                            ['tmid.%s' % col[0].decode() for col in toCols])
                        ins = """ 
insert into {0} ( {1} ) 
select distinct {2} 
from {3} tmid LEFT JOIN {0} ta ON ta.{4} = tmid.{4}
AND ta.{5} = tmid.{5}
AND ta.{6} = tmid.{6}
AND ta.ValidFrom = tmid.ValidFrom and ta.AsOfTo = tmid.AsOfTo
where ta.MD5 <> tmid.MD5
OR ta.{4} is NULL
""".format(table_name, ',\n  '.join([col[0].decode() for col in toCols]),
                        tmpCols, tmpTbl, toCols[0][0].decode(), toCols[1][0].decode(),
                        toCols[2][0].decode())
                        psql(ins)
                        inserted = toDB.exec_dml(ins,
                                                 trans=to_conn,
                                                 commit=False)
                        pfmtd([dict(Inserted=inserted)])
                    elif table_name in [
                            'G3Lookup', 'GCLookup', 'GISLookup', 'GPSLookup',
                            'GPXLookup', 'GPosLookup', 'GTxLookup',
                            'FundToBusinessUnitMap', 'TxEditReason'
                    ]:

                        tmpCols = ',\n  '.join(
                            ['tmid.%s' % col[0].decode() for col in toCols])
                        ins = """ 
insert into {0} ( {1} )
select distinct {2}
from {3} tmid LEFT JOIN {0} ta ON ta.{4} = tmid.{4}
AND ta.{5} = tmid.{5}
AND ta.AsOfTo = tmid.AsOfTo
where ta.MD5 <> tmid.MD5
OR ta.{4} is NULL 
""".format(table_name, ',\n  '.join([col[0].decode() for col in toCols]),
                        tmpCols, tmpTbl, toCols[0][0].decode(), toCols[1][0].decode())
                        psql(ins)
                        inserted = toDB.exec_dml(ins,
                                                 trans=to_conn,
                                                 commit=False)
                        pfmtd([dict(Inserted=inserted)])
                    else:
                        tmpCols = ',\n  '.join(
                            ['tmid.%s' % col[0].decode() for col in toCols])
                        ins = """ 
insert into {0} ( {1} )
select distinct {2}
from {3} tmid LEFT JOIN {0} ta ON ta.{4} = tmid.{4}
AND ta.AsOfTo = tmid.AsOfTo
where ta.MD5 <> tmid.MD5
OR ta.{4} is NULL ;
""".format(table_name, ',\n  '.join([col[0].decode() for col in toCols]),
                        tmpCols, tmpTbl, toCols[0][0].decode())
                        psql(ins)
                        inserted = toDB.exec_dml(ins,
                                                 trans=to_conn,
                                                 commit=False)
                        pfmtd([dict(Inserted=inserted)])

                if 1:
                    toDB.commit_transaction(trans=to_conn)

                    pfmt([[k] + list(v.values())[1:]
                          for k, v in pkstats.items()], [
                              'Table', 'Accepted', 'Rejected', 'Line count',
                              'Skip', 'Diff'
                          ], 'Load completed'.upper())
                    pfmt([(k, v) for k, v in loaded.items()],
                         ['Loaded Files', 'Loaded Tables'])
                    pfmt([(k, v) for k, v in not_loaded.items()],
                         ['Not loaded Files', 'Not loaded Tables'])
                    assert os.path.isdir(okdir)
                    if 0:
                        cli.MoveSnapFolder(okdir)
                    processed.append(dict(ProcessedFile=okfn))
                #break;

            if not ok_files.file_names:
                pfmtd([
                    dict(NoFiles='No OK files at working dir: [ %s ]' %
                         cli.pa[0])
                ])

            pfmtd(processed)
            pfmtd(not_processed)

    if 0:
        email_args.update(dict(cli_stats=None))
        Email.send_email(**email_args)
        cli.done()