Example #1
0
def main(args):
    sts = datetime.datetime.now()

    miner = MetaModel(args.src)

    columnmapper = ColumnMapper()
    tablemapper = TableMapper()
    pkmapper = PrimaryKeyMapper()
    fkmapper = ForeignKeyMapper()

    columns = miner.columns()
    tables = miner.tables()
    fks = fkmapper.multiple(miner.foreignKeys())
    pks = pkmapper.multiple(miner.primaryKeys())

    print('## cols: ' + str(len(columns)))
    print('## tables: ' + str(len(tables)))
    print('## fks: ' + str(len(fks)))
    print('## pks: ' + str(len(pks)))

    print('')
    print('## processing columns...')
    pcolumns = []
    if not args.explore:
        cp = MPColumnProcessor(connection_string=args.src,
                columns=columns,
                columnprocessor=NumpyColumnProcessor,
                mapper=columnmapper)
        pcolumns = cp.execute(processes=int(args.cpu), verbose=True)
    else:
        pcolumns = columnmapper.multiple([(column, None) for column in columns])

    # cets = datetime.datetime.now()
    # Notifier.notify(title='cobr.io ds-toolkit',
    #     subtitle='MPColumnProcessor done!',
    #     message='processed: ' + str(len(pcolumns)) + ' columns in ' + str(math.floor((cets - sts).total_seconds())) + ' seconds')

    print('')
    print('## processing tables...')
    tp = MPTableProcessor(connection_string=args.src,
            tables=tables,
            mapper=tablemapper)
    ptables = tp.execute(processes=int(args.cpu), verbose=True)

    # Notifier.notify(title='cobr.io ds-toolkit',
    #     subtitle='MPTableProcessor done!',
    #     message='processed: ' + str(len(ptables)) + ' tables in ' + str(math.floor((datetime.datetime.now() - cets).total_seconds())) + ' seconds')

    if not args.dry_run and args.target:
        engine = create_engine(args.target)
        Session = sessionmaker(bind=engine)
        session = Session()

        writeToDb(session, ptables)
        writeToDb(session, pcolumns)
        writeToDb(session, pks)
        writeToDb(session, fks)

    print('')
    print('## time elapsed: ' + str(datetime.datetime.now() - sts))
Example #2
0
def main(args):
	esIp = args.ip
	esPort = args.port
	connection_string = args.src
	idx = args.idx
	typ = args.type

	sts = datetime.datetime.now()

	pool = Pool(processes=int(args.cpu))

	# get columns from the meta database and populate the worker tasks
	miner = MetaModel(args.src)
	tasks = [ (c, esIp, esPort, connection_string, idx, typ) for c in miner.columns() ]

	print('## going to populate Elastic Search with {0} columns'.format(len(tasks)-1))
	print('\n'*1)

	# execute on multiple threads
	for i, _ in enumerate(pool.imap_unordered(executeOne, tasks)):
		sys.stdout.write("\033[1A")

		totalprogress = "\r\033[K## progress {0}/{1}: {2:.2f}% \n".format(i, len(tasks)-1, round(i/(len(tasks)-1)*100,2))
		sys.stdout.write(totalprogress)
		sys.stdout.flush()

	print('')
	print('## DONE.' )
	print('## time elapsed: {0}'.format(str(datetime.datetime.now() - sts)))
Example #3
0
def main(args):
    miner = MetaModel(args.src)

    tables = miner.tables()
    columns = miner.columns()

    pkexplorer = PkExplorer(connection_string=args.src)
    pks = pkexplorer.doMultipleTables(tables)

    if not args.dry_run and args.target:
        engine = create_engine(args.target)
        Session = sessionmaker(bind=engine)
        session = Session()

        writeToDb(session, pks)
Example #4
0
def main(args):
	colseparator = '|'
	miner = MetaModel(args.src)

	tables = miner.tables()
	columns = miner.columns()

	dic = {}
	for pk in [ c for c in columns if c.primary_key ]:
		if (pk.info['schemaname'], pk.table.name) not in dic:
			dic[(pk.info['schemaname'], pk.table.name)] = []
		dic[(pk.info['schemaname'], pk.table.name)].append(pk)

	# split primary keys into singlecolumn and multicolumn keys
	spks = [ dic[key] for key in dic.keys() if len(dic[key]) == 1 ]
	mpks = [ dic[key] for key in dic.keys() if len(dic[key]) > 1 ]

	print("LOT'S OF WORK IN PROGRESS...")
	exit()

	affaires = Discovery(tables=tables, columns=columns, pksingle=spks, pkmulti=mkps)

	fks = pruneDuplicateFks([ fk[0] for fk in affaires.discoverfks(0.9) ])
Example #5
0
 def test(self):
     mm = MetaModel('sqlite://')
     print(mm.columns())
Example #6
0
            values = [d[0] for d in result.fetchall()]

            cp = self.columnprocessor(column.type, values)

            return self.mapper.single(column, cp.doOperations())
        except Exception as ex:
            print(ex)
            print('')
        finally:
            conn.close()


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-s", "--src", help="connection_string for the subject-database", metavar="string")
    args = parser.parse_args()

    mm = MetaModel(args.src)

    sts = datetime.datetime.now()
    processor = MPColumnProcessor(connection_string=args.src,
        columns=mm.columns(),
        columnprocessor=NumpyColumnProcessor)
    result = processor.execute(processes=1, verbose=True)
    duration = datetime.datetime.now() - sts

    print('number of processed columns: ' + str(len(result)))

    # Calling the notification function
    # Notifier.notify(title='cobr.io ds-toolkit', subtitle='MPColumnProcessor done!', message='processed: ' + str(len(result)) + ' columns in ' + str(math.floor(duration.total_seconds())) + ' seconds')