def run(conf: DictConfig, current_dir) -> None: """ Run pytorch-lightning model Args: new_dir: conf: hydra config """ set_seed(conf.training.random_seed) hparams = OmegaConf.to_container(conf) trainer = pl.Trainer(**conf.trainer) dm = load_obj(conf.training.data_module_name)(hparams=hparams, conf=conf) dm.setup() model = load_obj(conf.training.lightning_module_name)(hparams=hparams, conf=conf, tag_to_idx=dm.tag_to_idx, embedder=dm.embedder) # best_path = 'C:/Users/Ангелина/Python_codes/wsd_train_folder/outputs/2021-02-02_16-54-30/saved_models/epoch=22_valid_score_mean=0.9609.ckpt' best_path = 'C:/Users/Ангелина/Python_codes/wsd_train_folder/outputs/2021-02-09_19-27-50_elmo/saved_models/epoch=22_valid_score_mean=0.9617.ckpt' model = model.load_from_checkpoint( best_path, hparams=hparams, conf=conf, tag_to_idx=dm.tag_to_idx, embedder=dm.embedder, strict=False ) save_name = best_path.split('/')[-1][:-5] model_name = f'C:/Users/Ангелина/Python_codes/wsd_train_folder/outputs/2021-02-09_19-27-50_elmo/saved_models/{save_name}.pth' print(model_name) torch.save(model.wsd_model.state_dict(), model_name)
def configure_optimizers(self): optimizer = load_obj(self.conf.training.optimizer.name)( self.wsd_model.parameters(), **self.conf.training.optimizer.params) if 'transformers.get_linear_schedule_with_warmup' not in self.conf.training.scheduler.name: scheduler = load_obj(self.conf.train_setup.scheduler.name)( optimizer, **self.conf.train_setup.scheduler.params) scheduler_dict = { 'scheduler': scheduler, 'interval': self.conf.train_setup.scheduler.step, 'monitor': self.conf.train_setup.scheduler.monitor, 'name': 'scheduler', } else: num_train_steps = self.num_steps * (self.conf.trainer.min_epochs + 7) num_warm = round(num_train_steps * 0.1) scheduler = load_obj(self.conf.train_setup.scheduler.name)( optimizer, num_training_steps=num_train_steps, num_warmup_steps=num_warm) scheduler_dict = {'scheduler': scheduler, 'name': 'scheduler'} return [optimizer], [scheduler_dict]
def test_main(line): r_index = load_obj("optimized_index") searcher = Searcher() docs = load_obj("documents") print("readed") qe = QueryExecutor(searcher) start_time = time.time() doc_result = qe.query(line) """ if '&' in line: request = line.split(' & ') for i in range(len(request)): request[i] = unicode(request[i], 'utf-8').lower() doc_result = searcher.and_word_list(request) else: doc_result = searcher.find_word(unicode(line, 'utf-8').lower()) """ print line print len(doc_result) for doc_id in doc_result: print docs[doc_id] print("--- %s seconds ---" % (time.time() - start_time))
def run(conf: DictConfig) -> None: """ Run pytorch-lightning model Args: new_dir: conf: hydra config """ set_seed(conf.training.random_seed) hparams = OmegaConf.to_container(conf) # log_save_path = conf.general.all_logs_storage_path conf.callbacks.model_checkpoint.params.filepath = os.getcwd() + conf.callbacks.model_checkpoint.params.filepath checkpoint_callback: ModelCheckpoint = ModelCheckpoint(**conf.callbacks.model_checkpoint.params) early_stop_callback = EarlyStopping(**conf.callbacks.early_stopping.params) loggers = [] if conf.logging.enable_logging: for logger in conf.logging.loggers: loggers.append(load_obj(logger.class_name)(**logger.params)) trainer = pl.Trainer(logger=loggers, checkpoint_callback=checkpoint_callback, callbacks=[early_stop_callback], **conf.trainer) dm = load_obj(conf.training.data_module_name)(hparams=hparams, conf=conf) dm.setup() num_steps_in_epoch = len(dm.train_dataloader()) model = load_obj(conf.training.lightning_module_name)(hparams=hparams, conf=conf, tag_to_idx=dm.tag_to_idx, embedder=dm.embedder, num_steps=num_steps_in_epoch) trainer.fit(model, dm) if conf.general.save_pytorch_model: if conf.general.save_best: best_path = trainer.checkpoint_callback.best_model_path # type: ignore print('Best model score ', trainer.checkpoint_callback.best_model_score) # extract file name without folder and extension save_name = best_path.split('/')[-1][:-5] model = model.load_from_checkpoint( best_path, hparams=hparams, conf=conf, tag_to_idx=dm.tag_to_idx, embedder=dm.embedder, strict=False ) model_name = f'saved_models/{save_name}.pth' print(model_name) torch.save(model.model.state_dict(), model_name) else: os.makedirs('saved_models', exist_ok=True) model_name = 'saved_models/last.pth' print(model_name) torch.save(model.model.state_dict(), model_name) trainer.test(model=model, datamodule=dm)
def exportAllSes3(): seList = utils.load_obj("%s/SUB/drugSize2CommonSEs" % params.JADER_OUT)[2] # seList = ['product dose omission'] nSize = 50 import os p = "%s/FSUBTEST/3/*" % params.JADER_OUT p = p.replace(" ", "\ ") cmd = "rm %s" % p try: os.system(cmd) except: pass pathInfo1 = "%s/FSUBTEST/3/FileMap.txt" % params.JADER_OUT pathIn1 = "%s/SUB/F3" % (params.JADER_OUT) dirOut1 = "%s/FSUBTEST/3" % params.JADER_OUT fFileNameMap = open(pathInfo1, "w") fFileNameMap.close() nSeg = max(int(len(seList) / nSize), 1) for i in range(nSeg): start = i * nSize end = min((i + 1) * nSize, len(seList)) exportBySE(seList[start:end], pathIn1, dirOut1, pathInfo1)
def recieveTaskAndReturnAnswer(self): path = os.path.join(self.root, self.subfolders['sent_by_parent']) task_params = load_obj(path, f'child{self.id}.pkl') os.remove(os.path.join(path, f'child{self.id}') + '.pkl') if "resend" in task_params: print("asking for work to be resent") self.placeChildFlag( os.path.join(self.root, self.subfolders['send_to_parent'], f'resend{self.id}.txt')) time.sleep(10) self.placeChildFlag(self.available) return try: answer = self.parseRecievedTask(task_params) except ConnectionResetError as e: # die gracefully here. print(f"{self.id} died") os.remove(self.alive) self.placeChildFlag( os.path.join(self.root, self.subfolders['send_to_parent'], f'dead{self.id}.txt')) return self.returnAnswer(answer) del answer self.placeChildFlag(self.available) print('waiting')
def stats2(nSize=0): print("Loading...") drugComb = utils.load_obj("%s/FDrugCombCount_%s" % (params.FADER_OUT, nSize)) print("Sorting..") kvs = utils.sort_dict(drugComb) fout = open("%s/FDrugCombSort_%s" % (params.FADER_OUT, nSize), "w") print("Saving...") cc = 0 for kv in kvs: k, v = kv # print(k, v) cc += v fout.write("%s$%s\n" % (",".join(k), v)) fout.close() print("Total: %s cases" % cc) from plotLib import plotCul2 plotCul2(kvs[::-1], 200, 1, "SelectedCombDrugCutOff", xLabel="ThreshHold: Freq >=", yLabel="Number of Combs")
def validation_step(self, batch, *args, **kwargs): sentences, lengths, tags = batch embeddings = self.embedder(sentences) tag_preds, loss, tag_preds_list = self.model(embeddings, lengths, tags) if self.conf.training.metric.functional: tags = tags.flatten().tolist() tags = [i for i in tags if i != self.tag_to_idx['PAD']] metric_score = load_obj(self.conf.training.metric.metric_class)( tags, tag_preds_list, **self.conf.training.metric.params) metric_score = torch.tensor(metric_score) else: tags = tags.flatten() tags = tags[tags != self.tag_to_idx['PAD']] metric_score = self.metric(tag_preds, tags) log = {'valid_loss': loss.item()} return { 'valid_loss': loss, 'log': log, 'step_metric': metric_score, 'predicted_list': tag_preds_list, 'predicted_seq': tag_preds, 'true_seq': tags }
def exportAllSesG2(): seList = utils.load_obj("%s/SeTopList.txt" % params.CAD_OUT) print(seList[:20]) # seList = ['product dose omission'] nSize = 50 import os p = "%s/FSUBTEST/2/*" % params.CAD_OUT p = p.replace(" ", "\ ") cmd = "rm %s" % p try: os.system(cmd) except: pass pathInfo1 = "%s/FSUBTEST/2/NGFileMap.txt" % params.CAD_OUT pathIn1 = "%s/CADER.txt" % (params.CAD_OUT) dirOut1 = "%s/FSUBTEST/2" % params.CAD_OUT fFileNameMap = open(pathInfo1, "w") fFileNameMap.close() nSeg = max(int(len(seList) / nSize), 1) for i in range(nSeg): start = i * nSize end = min((i + 1) * nSize, len(seList)) exportBySE(seList[start:end], pathIn1, dirOut1, pathInfo1)
def training_step(self, batch, *args, **kwargs): sentences, lengths, tags = batch embeddings = self.embedder(sentences) tag_preds, loss, tag_preds_list = self.model(embeddings, lengths, tags) # if the metric we are using is a function if self.conf.training.metric.functional: # Creating flatten tags list for computing score with sklearn tags = tags.flatten().tolist() tags_list = [i for i in tags if i != self.tag_to_idx['PAD']] metric_score = load_obj(self.conf.training.metric.metric_class)( tags_list, tag_preds_list, **self.conf.training.metric.params) metric_score = torch.tensor(metric_score) else: tags = tags.flatten() tags = tags[tags != self.tag_to_idx['PAD']] metric_score = self.metric(tag_preds, tags) log = {'train_metric': metric_score.item(), 'loss': loss.item()} # metric to be logged to a progress bar prog_log = {'train_metric': metric_score.item()} return {'loss': loss, 'log': log, 'progress_bar': prog_log}
def exportValidSEs(nSize=9210): def loadException(path="%s/InValidSEs.txt" % params.FADER_OUT): lines = open(path).readlines() invalidSes = set() invalidTokens = list() for line in lines: line = line.strip() if line[0] == '#': invalidTokens.append(line[1:]) else: invalidSes.add(line) return invalidSes, invalidTokens invalidSes, invalidTokens = loadException() fout = open("%s/ValidSes.txt" % params.FADER_OUT, "w") d = utils.load_obj("%s/FSECount_%s_0" % (params.FADER_OUT, nSize)) kvs = utils.sort_dict(d) for kv in kvs: k, v = kv if k in invalidSes: continue isInvalid = False for token in invalidTokens: if k.__contains__(token): isInvalid = True break if isInvalid: continue fout.write("%s\t%s\n" % (k, v)) fout.close()
def test_epoch_end(self, outputs: List[Any]) -> Dict: mean_loss = np.stack([x['test_loss'] for x in outputs]).mean() # Computing values for a metric if self.conf.training.metric.functional: true_vals = [x['true_seq'] for x in outputs] y_true = [list for sublist in true_vals for list in sublist] pred_vals = [x['predicted_list'] for x in outputs] y_pred = [list for sublist in pred_vals for list in sublist] test_score = load_obj(self.conf.training.metric.metric_class)( y_true, y_pred, **self.conf.training.metric.params) test_score = torch.tensor(test_score) else: y_true = torch.cat([x['true_seq'] for x in outputs]) y_pred = torch.cat([x['predicted_seq'] for x in outputs]) test_score = self.metric(y_pred.reshape(-1, 1), y_true.reshape(-1, 1)) # PytorchLightning doesn't like not one-element tensors in the output y_true = np.array(y_true).astype(int) y_pred = np.array(y_pred).astype(int) return { 'mean_test_loss': mean_loss, 'test_score': test_score, 'predicted': y_true, 'true': y_pred }
def validation_epoch_end(self, outputs: List[Any]) -> Dict: mean_loss = np.stack([x['valid_loss'] for x in outputs]).mean() mean_metric = np.stack([x['step_metric'] for x in outputs]).mean() # Computing values for a metric if self.conf.training.metric.functional: true_vals = [x['true_seq'] for x in outputs] y_true = [list for sublist in true_vals for list in sublist] pred_vals = [x['predicted_list'] for x in outputs] y_pred = [list for sublist in pred_vals for list in sublist] valid_score = load_obj(self.conf.training.metric.metric_class)( y_true, y_pred, **self.conf.training.metric.params) valid_score = torch.tensor(valid_score) else: y_true = torch.cat([x['true_seq'] for x in outputs]) y_pred = torch.cat([x['predicted_seq'] for x in outputs]) valid_score = self.metric(y_pred.reshape(-1, 1), y_true.reshape(-1, 1)) tensorboard_logs = { 'valid_score': valid_score, 'valid_score_mean': mean_metric, 'valid_mean_loss': mean_loss } return { 'validation_loss': mean_loss, 'log': tensorboard_logs, 'progress_bar': tensorboard_logs }
def exportSeCount(nSize=9210): d = utils.load_obj("%s/FSECount_%s_0" % (params.FADER_OUT, nSize)) kvs = utils.sort_dict(d) fout = open("%s/FSECountSorted_%s_0" % (params.FADER_OUT, nSize), "w") for kv in kvs: k, v = kv fout.write("%s\t%s\n" % (k, v)) fout.close()
def plotSeCount(): seCount = utils.load_obj( "%s/JADERSeCountFX" % params.JADER_OUT) kvs = utils.sort_dict(seCount) from dataProcessing.plotLib import plotCul2, plotCul, plotHistD plotCul(kvs[::-1], 50, 1, "JADERSEFreq", xLabel="Thresholds of SE Frequency", yLabel="Num. SEs")
def runTTest(): producers = [] consumers = [] queue = Queue(params.K_FOLD) counter = Value('i', 0) counter2 = Value('i', 0) dList = utils.load_obj("%s/DataDump.o" % OUT_DIR) dDrugPair2Id, drugPairList = loadDictName2Id("%s/%sPairs.txt" % (OUT_DIR, PREF), nMax=-1, min=1) dDrug2Id, _ = loadDictName2Id("%s/%sADrugs.txt" % (OUT_DIR, PREF)) dInd2Id, _ = loadDictName2Id("%s/%sAInd.txt" % (OUT_DIR, PREF)) dSe2Id, _ = loadDictName2Id("%s/%sASe.txt" % (OUT_DIR, PREF)) dId2Se = utils.reverse_dict(dSe2Id) inputList = loadRawExpose() nInputList = len(inputList) nDPerWorker = int(nInputList / params.N_DATA_WORKER) # assert 'g-csf' in allDrugNames for i in range(params.N_DATA_WORKER): startInd = i * nDPerWorker endInd = (i + 1) * nDPerWorker endInd = min(endInd, nInputList) if i == params.N_DATA_WORKER - 1: endInd = nInputList data = inputList[ startInd:endInd], drugPairList, dDrug2Id, dId2Se, dList producers.append(Process(target=producer, args=(queue, data))) fout = open("%s/%s" % (OUT_DIR, "ttStatsRe"), "w") p = Process(target=consumer, args=(queue, counter, counter2, fout, [])) p.daemon = True consumers.append(p) print("Start Producers...") for p in producers: p.start() print("Start Consumers...") for p in consumers: p.start() for p in producers: p.join() print("Finish Producers") queue.put(None) while True: if counter.value == 0: time.sleep(0.01) continue else: break fout.flush() fout.close()
def test_bilstm_words(emb_matrix, data_path, labels_path, max_seq): print("test_bilstm_words") lr = 0.0001 model = lstm_simple_binary(emb_matrix, lr, nlabels=2, nunits=50, max_seq=max_seq) #model = lstm_simple_binary_attent(embedding_matrix=emb_matrix,lr=lr, nlabels=labels.shape[1], nunits=50, max_seq=max_seq) batch_size = 95 # 55 epochs = 25 data = load_obj(data_path) labels = load_obj(labels_path) x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.15) x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.10) print("X_TRAIN:", x_train.shape) print("Y_TRAIN:", y_train.shape) print("X_TEST:", x_test.shape) print("Y_TEST:", y_test.shape) print("X_VAL:", x_val.shape) print("Y_VAL:", y_val.shape) history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_val, y_val), shuffle=True) #plot_model_history(history) print(model_testing(model, x_test, y_test)) return model
def reset(self, shape_infopack): #the information data of the new shape self.name, vox_l_fn, vox_h_fn, prim_mesh_fn, loop_info_fn, ref_type, ref_fn = shape_infopack #reset all self.step_count = 0 self.step_vec=np.zeros((self.max_step), dtype=np.int) #load reference image img = Image.open(ref_fn) if ref_type == 'rgb': image = Image.new('RGB', size=(600, 600), color=(255,255,255)) image.paste(img, (0, 0), mask=img) img=image #process and reset reference image img = img.convert('L') img = img.resize((self.ref_size, self.ref_size), Image.ANTIALIAS) self.raw_img=copy.copy(img) img = np.array(img) img = np.expand_dims(img, axis=0) self.ref=img/255.0 #load and reset primitive mesh self.prim_v, self.prim_f = utils.load_obj(prim_mesh_fn) self.init_prim_v=copy.copy(self.prim_v) #load and reset edgeloop info valid, self.ctrl_v, self.loop, self.loop_map, self.box_loop = self.load_loop(loop_info_fn) if valid==False or self.prim_v.shape[0]==0: return False, None, None, None #load groundtruth_data shape=utils.load_voxel_data(vox_h_fn).astype(np.int) #reset groundtruth self.target = shape self.target_points=np.argwhere(self.target==1) #alignment and normalization c1,c2 = utils.get_corner(self.target_points) self.transform_scale = np.linalg.norm(c1-c2) self.transform_move = c1/self.transform_scale self.ctrl_v=self.ctrl_v/self.transform_scale-self.transform_move self.prim_v=self.prim_v/self.transform_scale-self.transform_move #reset initial IOU self.last_IOU = self.compute_IOU(self.prim_v) ctrl_info=np.zeros((self.loop_num, 2, p.LOOP_FEAT_DIM)) ctrl_info[:,:,0:3] = self.ctrl_v ctrl_info[:,0,3] = self.loop_map[:,0] ctrl_info[:,1,3] = self.loop_map[:,0] return valid, self.ref, ctrl_info, self.step_vec
def test_cnn_words_chars(data_pathw, emb_matrixw, data_pathc, emb_matrixc, labels_path, nlabels): print("test_cnn_words_chars") lr = 0.001 nfilters = 64 model = cnn_binary_with_emb_layer_char_word(emb_matrixw, emb_matrixc, lr, nlabels, nfilters, MAX_SEQUENCE_LENGTH, MAX_SEQUENCE_CHAR_LENGTH) batch_size = 95 # 55 epochs = 20 dataw = load_obj(data_pathw) datac = load_obj(data_pathc) labels = load_obj(labels_path) print(labels.shape[0]) print("WORD LEVEL") x_trainw, x_testw, y_trainw, y_testw = train_test_split(dataw, labels, test_size=0.15) print("CHAR LEVEL") x_trainc, x_testc, y_trainc, y_testc = train_test_split(datac, labels, test_size=0.15) class_weight = {0: 1., 1: 1.3} history = model.fit([x_trainw, x_trainc], y_trainw, batch_size=batch_size, epochs=epochs, validation_split=0.10, shuffle=True, class_weight=class_weight) #plot_model_history(history) print(model_testing_2inputs(model, x_testw, x_testc, y_testw)) return model
def setup(self, stage: Optional[str] = None) -> None: # Load train-test-validation datasets self.train_df = pd.read_csv( f'{self.conf.data.main_folder}{self.conf.data.train_data_name}', sep='|') self.test_df = pd.read_csv( f'{self.conf.data.main_folder}{self.conf.data.test_data_name}', sep='|') self.valid_df = pd.read_csv( f'{self.conf.data.main_folder}{self.conf.data.valid_data_name}', sep='|') self.collator = load_obj(self.conf.training.collator.name)( model_name=self.conf.model.model_name) self.torch_dataset = load_obj( self.conf.training.torch_dataset_class.name) self.train_dataset = self.torch_dataset(self.train_df) self.test_dataset = self.torch_dataset(self.test_df) self.valid_dataset = self.torch_dataset(self.valid_df)
def plot3X(): dLength = utils.load_obj("%s/FDrugCombLength" % params.FADER_OUT) kvs = utils.sort_dict(dLength) dCount = dict() for kv in kvs: _, v = kv utils.add_dict_counter(dCount, v) maxLength = max(dCount.keys()) x = [i for i in range(1, maxLength + 1)] import numpy as np y = np.zeros(maxLength) for k, v in dCount.items(): y[k - 1] = v fin = open("%s/FDrug2AllSeList.txt" % params.FADER_OUT) dLength2NReports = dict() kv = [] vs = [] while True: line = fin.readline() if line == "": break line = line.strip().split("$") parts = line[0].split(":") c = int(parts[1]) drugCombLenght = len(parts[0].split(",")) utils.add_dict_counter(dLength2NReports, drugCombLenght, c) vs.append(c) kv.append([parts[0], c]) # import matplotlib.pyplot as plt # import numpy as np # maxX = max(dLength2NReports.keys()) x = [i for i in range(1, maxLength + 1)] z = np.zeros(maxLength) for k, v in dLength2NReports.items(): z[k - 1] = v import matplotlib.pyplot as plt import numpy as np fig = plt.figure() ax = fig.add_subplot(projection='3d') ax.plot(x, y, z, marker='>') ax.set_xlabel('DrugComb Length') ax.set_ylabel('DrugComb Count') ax.set_zlabel('NReport') plt.tight_layout plt.savefig("%s/3DDrugCombLengthReport.png" % params.FIG_DIR)
def test_cnn(data_path, labels_path, emb_matrix, nlabels, max_seq): print(emb_matrix.shape, nlabels, max_seq) lr = 0.0001 nfilters = 50 lr = 0.0001 nfilters = 100 model = cnn_binary_with_emb_layer_(emb_matrix, lr, nlabels, nfilters, max_seq) batch_size = 95 # 55 epochs = 100 data = load_obj(data_path) labels = load_obj(labels_path) #x_train, y_train, x_test, y_test, x_val, y_val = create_all_data(data_path, labels_path, test_perc=0.15, # val_per=0.1, val_bias=0) x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.15) #x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.10) class_weight = {0: 1., 1: 1.3} history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.10, shuffle=True, class_weight=class_weight) #plot_model_history(history) print(model_testing(model, x_test, y_test)) return model
def __init__(self, hparams: Dict[str, float], conf: DictConfig, tag_to_idx: Dict, embedder: Embedder, num_steps: int = 0): super().__init__() self.conf = conf self.hparams = hparams self.tag_to_idx = tag_to_idx self.embedder = embedder self.num_steps = num_steps self.model = load_obj(self.conf.model.model_class)( embeddings_dim=self.conf.data.embedding_shape, tag_to_idx=self.tag_to_idx, **self.conf.model.params) # if the metric we are using is a class if self.conf.training.metric.functional is False: self.metric = load_obj(self.conf.training.metric.metric_class)( **self.conf.training.metric.params)
def __init__(self, hparams: Dict[str, float], conf: DictConfig, num_steps: int = 0): super().__init__() self.conf = conf self.hparams = hparams self.num_steps = num_steps self.model = load_obj(self.conf.model.model_class)( pretrained_model_name=self.conf.model.model_name, num_classes=self.conf.data.num_classes) if self.conf.data.num_classes == 2: self.criterion = torch.nn.BCEWithLogitsLoss() else: self.criterion = torch.nn.CrossEntropyLoss() # if the metric we are using is a class if self.conf.training.metric.functional is False: self.metric = load_obj(self.conf.training.metric.metric_class)( **self.conf.training.metric.params) self.softmax = torch.nn.Softmax(dim=1)
def fillMissingSMILEs(): fin = open("%s/DrugBank/MissingSMILEsF.txt" % params.DATA_DIR) lines = fin.readlines() d = utils.load_obj("%s/DrugBank/DrugMorganDes" % params.DATA_DIR) for line in lines: line = line.strip() parts = line.split("||") try: v = genMorganBitVecFromSmiles(parts[1]) except: print(parts[1]) d[parts[0].lower()] = v utils.save_obj(d, "%s/DrugBank/DrugMorganDes" % params.DATA_DIR)
def __init__(self, tile_world, shape, args_file='./args.yml', path='/envs/games/zelda_v0/', generation=0, locations={}, prefix='..', **kwargs): """ :param tile_world: 2d numpy array of map :param path: gym_gvgai.dir :param mechanics: list of sprites you would like to be able to mutate into :param generation: int """ super().__init__() self.args_file = args_file self.args = load_from_yaml(args_file) self.floor = self.args.floor[0] self.game = self.args.game self._length = shape[0] self._height = shape[1] self.BOUNDARY = load_obj(path, f'{self.game}_boundary.pkl') self._tile_world = tile_world self.mechanics = self.args.mechanics # make folder in levels folder self.base_path = path self._path = os.path.join(self.base_path, f'{self.game}_poet_levels') if not os.path.exists(self._path): os.mkdir(self._path) self.generation = generation self.locations = locations if bool( locations) else self._parse_tile_world(tile_world) self.id = EvolutionaryGenerator.id EvolutionaryGenerator.id += 1 self.string = str(self) self.diff = 1
def stats1(nSize=0): print("Loading...") drugComb = utils.load_obj("%s/FDrugNameCount_%s" % (params.FADER_OUT, nSize)) print("Sorting..") kvs = utils.sort_dict(drugComb) fout = open("%s/FDrugNamesSort_%s" % (params.FADER_OUT, nSize), "w") print("Saving...") for kv in kvs: k, v = kv if len(k) <= 1: continue fout.write("%s$%s\n" % (k, v)) fout.close()
def get_word_struct(self, w): w = w.encode('utf-8') __w_hash = mmh3.hash(w) % COUNT_OF_FILES if not self.r_index.has_key(__w_hash%COUNT_OF_FILES): self.r_index[__w_hash] = load_obj("optimized_index_"+str(__w_hash)) if self.encoder is None: if self.r_index[__w_hash]['encoding'] == 'varbyte': self.encoder = VarByteEncoder() else: self.encoder = Simple9() if self.r_index[__w_hash].has_key(w): return self.r_index[__w_hash][w] else: return None
def main(): searcher = Searcher() docs = None qe = QueryExecutor(searcher) for line in fileinput.input(): # TODO всю строку сразу в uft-8 и lower line = line.replace("\n", "") request = line.split('.....')[0] request = unicode(request, 'utf-8').lower() doc_result = qe.query(request) print line print len(doc_result) if docs is None: docs = load_obj("documents") for doc_id in doc_result: print_error(doc_id) print docs[doc_id]
def exportPolySEs(): drugDesMap = utils.load_obj("%s/DrugBank/DrugMorganDes" % params.DATA_DIR) seDict = dict() dComb2Se = dict() fin = open("%s/FTest/FileMap.txt" % params.FADER_OUT) hashFiles = fin.readlines() ln = min(N_FILE, len(hashFiles)) hashFiles = hashFiles[:ln] for hashId in hashFiles: parts = hashId.strip().split("\t") hashId = parts[0] ses = parts[1].split("__") for se in ses: utils.get_update_dict_index(seDict, se) path = "%s/FTest/%s" % (params.FADER_OUT, hashId) print("Reading... ", path) polySes = open(path).readlines() for polySe in polySes: polySe = polySe.strip().split("_") drugComb = polySe[0] seParts = polySe[1].split("\t") se = seParts[0] if seParts[1] == 'inf': pass drugs = drugComb.split(",") isValidComb = True # print(drugs) for drug in drugs: if drug not in drugDesMap: isValidComb = False break if isValidComb: # print(drugComb) sel = utils.get_insert_key_dict(dComb2Se, drugComb, []) sel.append(se) fout = open("%s/PolySes.txt" % params.FADER_OUT, "w") for drugComb, ses in dComb2Se.items(): fout.write("%s\t%s\n" % (drugComb, ",".join(ses)) ) fout.close()
def main2(line): start_time = time.time() #r_index = load_obj("optimized_index") print("--- %s seconds --- INDEX READING" % (time.time() - start_time)) searcher = Searcher() docs = load_obj("documents") print("--- %s seconds --- DOCS READING" % (time.time() - start_time)) start_time = time.time() qe = QueryExecutor(searcher) # TODO всю строку сразу в uft-8 и lower line = line.replace("\n", "") request = line.split('.....')[0] request = unicode(request, 'utf-8').lower() doc_result = qe.query(request) print line print len(doc_result) for doc_id in doc_result: print docs[doc_id] print("--- %s seconds ---" % (time.time() - start_time))
def plotDrugCombLength(): dLength = utils.load_obj("%s/DrugCombLength" % params.JADER_OUT) kvs = utils.sort_dict(dLength) dCount = dict() for kv in kvs: _, v = kv utils.add_dict_counter(dCount, v) maxLength = max(dCount.keys()) x = [i for i in range(1, maxLength+1)] import numpy as np y = np.zeros(maxLength) for k, v in dCount.items(): y[k-1] = v import matplotlib.pyplot as plt plt.scatter(x,y) plt.xlabel("DrugComb length") plt.ylabel("Num DrugComb") plt.tight_layout() plt.savefig("%s/%s.png" % (params.FIG_DIR, "JADERDrugLength"))
def get_model(cfg): model = load_obj(cfg.model.class_name) model = model(**cfg.model.params) return model
test_data_path = os.path.join( irosva_path, char_data + "data_" + k + "_test.pkl") id = DataTask(data_path, labels_path, 0.15, None, test_data_path, None) #id = DataTask(data_path, labels_path, 0, 0) else: id = DataTask(data_path, labels_path, test_perc=0, val_per=0.10) irosva_data[k] = id #HAHA DATA print("HAHA") data_path = os.path.join(haha_path, char_data + "data_train.pkl") labels_path = os.path.join(haha_path, "labels_train.pkl") test_data_path = os.path.join(haha_path, char_data + "data_test.pkl") scores_path = os.path.join(haha_path, "scores_train.pkl") scores = load_obj(scores_path) haha_data = {} haha_score_data = {} if MODE_TEST: id = DataTask(data_path, labels_path, 0.15, None, test_data_path, None) haha_data["es"] = id id = DataTask(data_path, scores_path, 0.15, None, test_data_path, None) haha_score_data["es"] = id else: haha_data["es"] = DataTask(data_path, labels_path, test_perc=0, val_per=0.1) haha_score_data["es"] = DataTask(data_path,