Ejemplo n.º 1
0
def main(args):
	esIp = args.ip
	esPort = args.port
	connection_string = args.src
	idx = args.idx
	typ = args.type

	sts = datetime.datetime.now()

	pool = Pool(processes=int(args.cpu))

	# get columns from the meta database and populate the worker tasks
	miner = MetaModel(args.src)
	tasks = [ (c, esIp, esPort, connection_string, idx, typ) for c in miner.columns() ]

	print('## going to populate Elastic Search with {0} columns'.format(len(tasks)-1))
	print('\n'*1)

	# execute on multiple threads
	for i, _ in enumerate(pool.imap_unordered(executeOne, tasks)):
		sys.stdout.write("\033[1A")

		totalprogress = "\r\033[K## progress {0}/{1}: {2:.2f}% \n".format(i, len(tasks)-1, round(i/(len(tasks)-1)*100,2))
		sys.stdout.write(totalprogress)
		sys.stdout.flush()

	print('')
	print('## DONE.' )
	print('## time elapsed: {0}'.format(str(datetime.datetime.now() - sts)))
Ejemplo n.º 2
0
def main(args):
    sts = datetime.datetime.now()

    miner = MetaModel(args.src)

    columnmapper = ColumnMapper()
    tablemapper = TableMapper()
    pkmapper = PrimaryKeyMapper()
    fkmapper = ForeignKeyMapper()

    columns = miner.columns()
    tables = miner.tables()
    fks = fkmapper.multiple(miner.foreignKeys())
    pks = pkmapper.multiple(miner.primaryKeys())

    print('## cols: ' + str(len(columns)))
    print('## tables: ' + str(len(tables)))
    print('## fks: ' + str(len(fks)))
    print('## pks: ' + str(len(pks)))

    print('')
    print('## processing columns...')
    pcolumns = []
    if not args.explore:
        cp = MPColumnProcessor(connection_string=args.src,
                columns=columns,
                columnprocessor=NumpyColumnProcessor,
                mapper=columnmapper)
        pcolumns = cp.execute(processes=int(args.cpu), verbose=True)
    else:
        pcolumns = columnmapper.multiple([(column, None) for column in columns])

    # cets = datetime.datetime.now()
    # Notifier.notify(title='cobr.io ds-toolkit',
    #     subtitle='MPColumnProcessor done!',
    #     message='processed: ' + str(len(pcolumns)) + ' columns in ' + str(math.floor((cets - sts).total_seconds())) + ' seconds')

    print('')
    print('## processing tables...')
    tp = MPTableProcessor(connection_string=args.src,
            tables=tables,
            mapper=tablemapper)
    ptables = tp.execute(processes=int(args.cpu), verbose=True)

    # Notifier.notify(title='cobr.io ds-toolkit',
    #     subtitle='MPTableProcessor done!',
    #     message='processed: ' + str(len(ptables)) + ' tables in ' + str(math.floor((datetime.datetime.now() - cets).total_seconds())) + ' seconds')

    if not args.dry_run and args.target:
        engine = create_engine(args.target)
        Session = sessionmaker(bind=engine)
        session = Session()

        writeToDb(session, ptables)
        writeToDb(session, pcolumns)
        writeToDb(session, pks)
        writeToDb(session, fks)

    print('')
    print('## time elapsed: ' + str(datetime.datetime.now() - sts))
Ejemplo n.º 3
0
 def prepareTest(self, file):
     pwd = os.getcwd()
     #root = pwd + "/Tests/"
     root = pwd
     metaModel = MetaModel()
     fileTxt = file.replace(".sql", ".txt")
     metaModel.prepareMetaModel(root + "/" + file, root + "log.txt")
     if not os.path.exists(root + '/temp'):
         os.makedirs(root + '/temp')
     smellDetector = SmellDetector(metaModel, root + "/temp/", file)
     if (os.path.isfile(root + "/temp/" + fileTxt)):
         os.remove(root + "/temp/" + fileTxt)
     smellDetector.detectAllDbSmells()
     return FileUtils.readFileContents(root + "/temp/" + fileTxt)
Ejemplo n.º 4
0
def main(args):
    miner = MetaModel(args.src)

    tables = miner.tables()
    columns = miner.columns()

    pkexplorer = PkExplorer(connection_string=args.src)
    pks = pkexplorer.doMultipleTables(tables)

    if not args.dry_run and args.target:
        engine = create_engine(args.target)
        Session = sessionmaker(bind=engine)
        session = Session()

        writeToDb(session, pks)
Ejemplo n.º 5
0
def main(args):
	colseparator = '|'
	miner = MetaModel(args.src)

	tables = miner.tables()
	columns = miner.columns()

	dic = {}
	for pk in [ c for c in columns if c.primary_key ]:
		if (pk.info['schemaname'], pk.table.name) not in dic:
			dic[(pk.info['schemaname'], pk.table.name)] = []
		dic[(pk.info['schemaname'], pk.table.name)].append(pk)

	# split primary keys into singlecolumn and multicolumn keys
	spks = [ dic[key] for key in dic.keys() if len(dic[key]) == 1 ]
	mpks = [ dic[key] for key in dic.keys() if len(dic[key]) > 1 ]

	print("LOT'S OF WORK IN PROGRESS...")
	exit()

	affaires = Discovery(tables=tables, columns=columns, pksingle=spks, pkmulti=mkps)

	fks = pruneDuplicateFks([ fk[0] for fk in affaires.discoverfks(0.9) ])
Ejemplo n.º 6
0
 def test(self):
     mm = MetaModel('sqlite://')
     print(mm.columns())
Ejemplo n.º 7
0
            values = [d[0] for d in result.fetchall()]

            cp = self.columnprocessor(column.type, values)

            return self.mapper.single(column, cp.doOperations())
        except Exception as ex:
            print(ex)
            print('')
        finally:
            conn.close()


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-s", "--src", help="connection_string for the subject-database", metavar="string")
    args = parser.parse_args()

    mm = MetaModel(args.src)

    sts = datetime.datetime.now()
    processor = MPColumnProcessor(connection_string=args.src,
        columns=mm.columns(),
        columnprocessor=NumpyColumnProcessor)
    result = processor.execute(processes=1, verbose=True)
    duration = datetime.datetime.now() - sts

    print('number of processed columns: ' + str(len(result)))

    # Calling the notification function
    # Notifier.notify(title='cobr.io ds-toolkit', subtitle='MPColumnProcessor done!', message='processed: ' + str(len(result)) + ' columns in ' + str(math.floor(duration.total_seconds())) + ' seconds')
Ejemplo n.º 8
0
from DBWrapper.I3Queries import I3Queries
from TrainObjs import TrainObjs
from MetaModel import MetaModel
import os

if __name__ == '__main__':
    Qs = I3Queries()
    trs = Qs.getTrainFiles()
    objs = TrainObjs()
    for tr in trs:
        objs.pushTrainObj(tr)

    objs.cleanAll()
    if not os.path.exists("test"):
        os.mkdir("test")

    with open("test/langs", "w") as fout:
        objs.saveFilesByLang("test", fout)

    mt = MetaModel("es", "test")
    mt.setSaveLocation("test")
    mt.train()
Ejemplo n.º 9
0
from DBWrapper.I3Queries import I3Queries
from TrainObjs import TrainObjs
from MetaModel import MetaModel
import os

if __name__ == '__main__':
    mt = MetaModel("es", "test")
    mt.setSaveLocation("test")
    mt.load()
    print "ignaci viv madr ignaci citr negr nad"
    print mt.predict("ignaci viv madr ignaci citr negr nad")
Ejemplo n.º 10
0
        model.classifier = nn.Linear(model.classifier.in_features, 1)

    model.load_state_dict(checkpoint)
    _ = model.eval()
    _ = disable_grad(model)
    model = model.to(device)
    stack_models.append(model)

    del checkpoint, model

for meta_raw in meta_models:

    checkpoint = torch.load(WEIGTHS_PATH + meta_raw[0] + WEIGTHS_EXT,
                            map_location=device)

    model = MetaModel(models=raw_data_stack[meta_raw[1]],
                      extended=meta_raw[2]).to(device)

    model.load_state_dict(checkpoint)
    _ = model.eval()
    _ = disable_grad(model)
    model.to(device)
    models.append(model)
    weigths.append(meta_raw[3])

    del model, checkpoint

total = sum([1 - score for score in weigths])
weigths = [(1 - score) / total for score in weigths]

print(
    predict_on_video(face_extractor, normalize_transform, stack_models, models,
Ejemplo n.º 11
0
            conn = engine.connect()

            num_rows = conn.execute(table.count()).fetchone()[0]
            num_columns = len(table.columns)
            num_explicit_outlinks = len(table.foreign_keys)

            return self.mapper.single(table, {'num_rows': num_rows, 'num_columns': num_columns, 'num_explicit_outlinks': num_explicit_outlinks})
            # return (table.name, {'num_rows': num_rows, 'num_columns': num_columns, 'num_explicit_outlinks': num_explicit_outlinks})
            # cp = self.columnprocessor(values)
            # return (column, cp.doOperations())
        except Exception as ex:
            print(ex)
        finally:
            conn.close()

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-s", "--src", help="connection_string for the subject-database", metavar="string")
    args = parser.parse_args()

    mm = MetaModel(args.src)

    sts = datetime.datetime.now()
    processor = MPTableProcessor(connection_string = args.src, tables = mm.tables())
    result = processor.execute(processes=32, verbose=True)
    duration = datetime.datetime.now() - sts

    print('number of processed tables: ' + str(len(result)))

    # Calling the notification function
    # Notifier.notify(title='cobr.io ds-toolkit', subtitle='MPTableProcessor done!', message='processed: ' + str(len(result)) + ' tables in ' + str(math.floor(duration.total_seconds())) + ' seconds')