def main(args): esIp = args.ip esPort = args.port connection_string = args.src idx = args.idx typ = args.type sts = datetime.datetime.now() pool = Pool(processes=int(args.cpu)) # get columns from the meta database and populate the worker tasks miner = MetaModel(args.src) tasks = [ (c, esIp, esPort, connection_string, idx, typ) for c in miner.columns() ] print('## going to populate Elastic Search with {0} columns'.format(len(tasks)-1)) print('\n'*1) # execute on multiple threads for i, _ in enumerate(pool.imap_unordered(executeOne, tasks)): sys.stdout.write("\033[1A") totalprogress = "\r\033[K## progress {0}/{1}: {2:.2f}% \n".format(i, len(tasks)-1, round(i/(len(tasks)-1)*100,2)) sys.stdout.write(totalprogress) sys.stdout.flush() print('') print('## DONE.' ) print('## time elapsed: {0}'.format(str(datetime.datetime.now() - sts)))
def main(args): sts = datetime.datetime.now() miner = MetaModel(args.src) columnmapper = ColumnMapper() tablemapper = TableMapper() pkmapper = PrimaryKeyMapper() fkmapper = ForeignKeyMapper() columns = miner.columns() tables = miner.tables() fks = fkmapper.multiple(miner.foreignKeys()) pks = pkmapper.multiple(miner.primaryKeys()) print('## cols: ' + str(len(columns))) print('## tables: ' + str(len(tables))) print('## fks: ' + str(len(fks))) print('## pks: ' + str(len(pks))) print('') print('## processing columns...') pcolumns = [] if not args.explore: cp = MPColumnProcessor(connection_string=args.src, columns=columns, columnprocessor=NumpyColumnProcessor, mapper=columnmapper) pcolumns = cp.execute(processes=int(args.cpu), verbose=True) else: pcolumns = columnmapper.multiple([(column, None) for column in columns]) # cets = datetime.datetime.now() # Notifier.notify(title='cobr.io ds-toolkit', # subtitle='MPColumnProcessor done!', # message='processed: ' + str(len(pcolumns)) + ' columns in ' + str(math.floor((cets - sts).total_seconds())) + ' seconds') print('') print('## processing tables...') tp = MPTableProcessor(connection_string=args.src, tables=tables, mapper=tablemapper) ptables = tp.execute(processes=int(args.cpu), verbose=True) # Notifier.notify(title='cobr.io ds-toolkit', # subtitle='MPTableProcessor done!', # message='processed: ' + str(len(ptables)) + ' tables in ' + str(math.floor((datetime.datetime.now() - cets).total_seconds())) + ' seconds') if not args.dry_run and args.target: engine = create_engine(args.target) Session = sessionmaker(bind=engine) session = Session() writeToDb(session, ptables) writeToDb(session, pcolumns) writeToDb(session, pks) writeToDb(session, fks) print('') print('## time elapsed: ' + str(datetime.datetime.now() - sts))
def prepareTest(self, file): pwd = os.getcwd() #root = pwd + "/Tests/" root = pwd metaModel = MetaModel() fileTxt = file.replace(".sql", ".txt") metaModel.prepareMetaModel(root + "/" + file, root + "log.txt") if not os.path.exists(root + '/temp'): os.makedirs(root + '/temp') smellDetector = SmellDetector(metaModel, root + "/temp/", file) if (os.path.isfile(root + "/temp/" + fileTxt)): os.remove(root + "/temp/" + fileTxt) smellDetector.detectAllDbSmells() return FileUtils.readFileContents(root + "/temp/" + fileTxt)
def main(args): miner = MetaModel(args.src) tables = miner.tables() columns = miner.columns() pkexplorer = PkExplorer(connection_string=args.src) pks = pkexplorer.doMultipleTables(tables) if not args.dry_run and args.target: engine = create_engine(args.target) Session = sessionmaker(bind=engine) session = Session() writeToDb(session, pks)
def main(args): colseparator = '|' miner = MetaModel(args.src) tables = miner.tables() columns = miner.columns() dic = {} for pk in [ c for c in columns if c.primary_key ]: if (pk.info['schemaname'], pk.table.name) not in dic: dic[(pk.info['schemaname'], pk.table.name)] = [] dic[(pk.info['schemaname'], pk.table.name)].append(pk) # split primary keys into singlecolumn and multicolumn keys spks = [ dic[key] for key in dic.keys() if len(dic[key]) == 1 ] mpks = [ dic[key] for key in dic.keys() if len(dic[key]) > 1 ] print("LOT'S OF WORK IN PROGRESS...") exit() affaires = Discovery(tables=tables, columns=columns, pksingle=spks, pkmulti=mkps) fks = pruneDuplicateFks([ fk[0] for fk in affaires.discoverfks(0.9) ])
def test(self): mm = MetaModel('sqlite://') print(mm.columns())
values = [d[0] for d in result.fetchall()] cp = self.columnprocessor(column.type, values) return self.mapper.single(column, cp.doOperations()) except Exception as ex: print(ex) print('') finally: conn.close() if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("-s", "--src", help="connection_string for the subject-database", metavar="string") args = parser.parse_args() mm = MetaModel(args.src) sts = datetime.datetime.now() processor = MPColumnProcessor(connection_string=args.src, columns=mm.columns(), columnprocessor=NumpyColumnProcessor) result = processor.execute(processes=1, verbose=True) duration = datetime.datetime.now() - sts print('number of processed columns: ' + str(len(result))) # Calling the notification function # Notifier.notify(title='cobr.io ds-toolkit', subtitle='MPColumnProcessor done!', message='processed: ' + str(len(result)) + ' columns in ' + str(math.floor(duration.total_seconds())) + ' seconds')
from DBWrapper.I3Queries import I3Queries from TrainObjs import TrainObjs from MetaModel import MetaModel import os if __name__ == '__main__': Qs = I3Queries() trs = Qs.getTrainFiles() objs = TrainObjs() for tr in trs: objs.pushTrainObj(tr) objs.cleanAll() if not os.path.exists("test"): os.mkdir("test") with open("test/langs", "w") as fout: objs.saveFilesByLang("test", fout) mt = MetaModel("es", "test") mt.setSaveLocation("test") mt.train()
from DBWrapper.I3Queries import I3Queries from TrainObjs import TrainObjs from MetaModel import MetaModel import os if __name__ == '__main__': mt = MetaModel("es", "test") mt.setSaveLocation("test") mt.load() print "ignaci viv madr ignaci citr negr nad" print mt.predict("ignaci viv madr ignaci citr negr nad")
model.classifier = nn.Linear(model.classifier.in_features, 1) model.load_state_dict(checkpoint) _ = model.eval() _ = disable_grad(model) model = model.to(device) stack_models.append(model) del checkpoint, model for meta_raw in meta_models: checkpoint = torch.load(WEIGTHS_PATH + meta_raw[0] + WEIGTHS_EXT, map_location=device) model = MetaModel(models=raw_data_stack[meta_raw[1]], extended=meta_raw[2]).to(device) model.load_state_dict(checkpoint) _ = model.eval() _ = disable_grad(model) model.to(device) models.append(model) weigths.append(meta_raw[3]) del model, checkpoint total = sum([1 - score for score in weigths]) weigths = [(1 - score) / total for score in weigths] print( predict_on_video(face_extractor, normalize_transform, stack_models, models,
conn = engine.connect() num_rows = conn.execute(table.count()).fetchone()[0] num_columns = len(table.columns) num_explicit_outlinks = len(table.foreign_keys) return self.mapper.single(table, {'num_rows': num_rows, 'num_columns': num_columns, 'num_explicit_outlinks': num_explicit_outlinks}) # return (table.name, {'num_rows': num_rows, 'num_columns': num_columns, 'num_explicit_outlinks': num_explicit_outlinks}) # cp = self.columnprocessor(values) # return (column, cp.doOperations()) except Exception as ex: print(ex) finally: conn.close() if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("-s", "--src", help="connection_string for the subject-database", metavar="string") args = parser.parse_args() mm = MetaModel(args.src) sts = datetime.datetime.now() processor = MPTableProcessor(connection_string = args.src, tables = mm.tables()) result = processor.execute(processes=32, verbose=True) duration = datetime.datetime.now() - sts print('number of processed tables: ' + str(len(result))) # Calling the notification function # Notifier.notify(title='cobr.io ds-toolkit', subtitle='MPTableProcessor done!', message='processed: ' + str(len(result)) + ' tables in ' + str(math.floor(duration.total_seconds())) + ' seconds')