Beispiel #1
0
def run(conf: DictConfig, current_dir) -> None:
    """
    Run pytorch-lightning model

    Args:
        new_dir:
        conf: hydra config

    """
    set_seed(conf.training.random_seed)
    hparams = OmegaConf.to_container(conf)

    trainer = pl.Trainer(**conf.trainer)

    dm = load_obj(conf.training.data_module_name)(hparams=hparams, conf=conf)
    dm.setup()

    model = load_obj(conf.training.lightning_module_name)(hparams=hparams, conf=conf, tag_to_idx=dm.tag_to_idx,
                                                          embedder=dm.embedder)

    # best_path = 'C:/Users/Ангелина/Python_codes/wsd_train_folder/outputs/2021-02-02_16-54-30/saved_models/epoch=22_valid_score_mean=0.9609.ckpt'
    best_path = 'C:/Users/Ангелина/Python_codes/wsd_train_folder/outputs/2021-02-09_19-27-50_elmo/saved_models/epoch=22_valid_score_mean=0.9617.ckpt'
    model = model.load_from_checkpoint(
        best_path, hparams=hparams, conf=conf, tag_to_idx=dm.tag_to_idx, embedder=dm.embedder, strict=False
    )
    save_name = best_path.split('/')[-1][:-5]
    model_name = f'C:/Users/Ангелина/Python_codes/wsd_train_folder/outputs/2021-02-09_19-27-50_elmo/saved_models/{save_name}.pth'
    print(model_name)
    torch.save(model.wsd_model.state_dict(), model_name)
Beispiel #2
0
    def configure_optimizers(self):

        optimizer = load_obj(self.conf.training.optimizer.name)(
            self.wsd_model.parameters(), **self.conf.training.optimizer.params)

        if 'transformers.get_linear_schedule_with_warmup' not in self.conf.training.scheduler.name:
            scheduler = load_obj(self.conf.train_setup.scheduler.name)(
                optimizer, **self.conf.train_setup.scheduler.params)
            scheduler_dict = {
                'scheduler': scheduler,
                'interval': self.conf.train_setup.scheduler.step,
                'monitor': self.conf.train_setup.scheduler.monitor,
                'name': 'scheduler',
            }
        else:

            num_train_steps = self.num_steps * (self.conf.trainer.min_epochs +
                                                7)
            num_warm = round(num_train_steps * 0.1)
            scheduler = load_obj(self.conf.train_setup.scheduler.name)(
                optimizer,
                num_training_steps=num_train_steps,
                num_warmup_steps=num_warm)
            scheduler_dict = {'scheduler': scheduler, 'name': 'scheduler'}

        return [optimizer], [scheduler_dict]
Beispiel #3
0
def test_main(line):
    r_index = load_obj("optimized_index")
    searcher = Searcher()
    docs = load_obj("documents")

    print("readed")
    qe = QueryExecutor(searcher)
    start_time = time.time()
    doc_result = qe.query(line)

    """
    if '&' in line:

        request = line.split(' & ')
        for i in range(len(request)):
            request[i] = unicode(request[i], 'utf-8').lower()

        doc_result = searcher.and_word_list(request)

    else:
        doc_result = searcher.find_word(unicode(line, 'utf-8').lower())
    """

    print line
    print len(doc_result)
    for doc_id in doc_result:
        print docs[doc_id]

    print("--- %s seconds ---" % (time.time() - start_time))
Beispiel #4
0
def run(conf: DictConfig) -> None:
    """
    Run pytorch-lightning model

    Args:
        new_dir:
        conf: hydra config

    """
    set_seed(conf.training.random_seed)

    hparams = OmegaConf.to_container(conf)

    # log_save_path = conf.general.all_logs_storage_path

    conf.callbacks.model_checkpoint.params.filepath = os.getcwd() + conf.callbacks.model_checkpoint.params.filepath

    checkpoint_callback: ModelCheckpoint = ModelCheckpoint(**conf.callbacks.model_checkpoint.params)
    early_stop_callback = EarlyStopping(**conf.callbacks.early_stopping.params)

    loggers = []
    if conf.logging.enable_logging:
        for logger in conf.logging.loggers:
            loggers.append(load_obj(logger.class_name)(**logger.params))

    trainer = pl.Trainer(logger=loggers, checkpoint_callback=checkpoint_callback, callbacks=[early_stop_callback],
                         **conf.trainer)

    dm = load_obj(conf.training.data_module_name)(hparams=hparams, conf=conf)
    dm.setup()
    num_steps_in_epoch = len(dm.train_dataloader())

    model = load_obj(conf.training.lightning_module_name)(hparams=hparams, conf=conf, tag_to_idx=dm.tag_to_idx,
                                                          embedder=dm.embedder, num_steps=num_steps_in_epoch)

    trainer.fit(model, dm)

    if conf.general.save_pytorch_model:
        if conf.general.save_best:
            best_path = trainer.checkpoint_callback.best_model_path  # type: ignore
            print('Best model score ', trainer.checkpoint_callback.best_model_score)
            # extract file name without folder and extension
            save_name = best_path.split('/')[-1][:-5]
            model = model.load_from_checkpoint(
                best_path, hparams=hparams, conf=conf, tag_to_idx=dm.tag_to_idx, embedder=dm.embedder, strict=False
            )
            model_name = f'saved_models/{save_name}.pth'
            print(model_name)
            torch.save(model.model.state_dict(), model_name)
        else:
            os.makedirs('saved_models', exist_ok=True)
            model_name = 'saved_models/last.pth'
            print(model_name)
            torch.save(model.model.state_dict(), model_name)

    trainer.test(model=model, datamodule=dm)
Beispiel #5
0
def exportAllSes3():
    seList = utils.load_obj("%s/SUB/drugSize2CommonSEs" % params.JADER_OUT)[2]
    # seList = ['product dose omission']
    nSize = 50
    import os
    p = "%s/FSUBTEST/3/*" % params.JADER_OUT
    p = p.replace(" ", "\ ")

    cmd = "rm %s" % p
    try:
        os.system(cmd)
    except:
        pass
    pathInfo1 = "%s/FSUBTEST/3/FileMap.txt" % params.JADER_OUT
    pathIn1 = "%s/SUB/F3" % (params.JADER_OUT)
    dirOut1 = "%s/FSUBTEST/3" % params.JADER_OUT
    fFileNameMap = open(pathInfo1, "w")
    fFileNameMap.close()

    nSeg = max(int(len(seList) / nSize), 1)

    for i in range(nSeg):
        start = i * nSize
        end = min((i + 1) * nSize, len(seList))
        exportBySE(seList[start:end], pathIn1, dirOut1, pathInfo1)
Beispiel #6
0
    def recieveTaskAndReturnAnswer(self):

        path = os.path.join(self.root, self.subfolders['sent_by_parent'])

        task_params = load_obj(path, f'child{self.id}.pkl')
        os.remove(os.path.join(path, f'child{self.id}') + '.pkl')

        if "resend" in task_params:
            print("asking for work to be resent")
            self.placeChildFlag(
                os.path.join(self.root, self.subfolders['send_to_parent'],
                             f'resend{self.id}.txt'))
            time.sleep(10)
            self.placeChildFlag(self.available)
            return

        try:
            answer = self.parseRecievedTask(task_params)
        except ConnectionResetError as e:
            # die gracefully here.
            print(f"{self.id} died")
            os.remove(self.alive)
            self.placeChildFlag(
                os.path.join(self.root, self.subfolders['send_to_parent'],
                             f'dead{self.id}.txt'))
            return

        self.returnAnswer(answer)
        del answer
        self.placeChildFlag(self.available)
        print('waiting')
def stats2(nSize=0):
    print("Loading...")
    drugComb = utils.load_obj("%s/FDrugCombCount_%s" %
                              (params.FADER_OUT, nSize))

    print("Sorting..")
    kvs = utils.sort_dict(drugComb)

    fout = open("%s/FDrugCombSort_%s" % (params.FADER_OUT, nSize), "w")

    print("Saving...")
    cc = 0
    for kv in kvs:
        k, v = kv
        # print(k, v)
        cc += v
        fout.write("%s$%s\n" % (",".join(k), v))
    fout.close()
    print("Total: %s cases" % cc)
    from plotLib import plotCul2

    plotCul2(kvs[::-1],
             200,
             1,
             "SelectedCombDrugCutOff",
             xLabel="ThreshHold: Freq >=",
             yLabel="Number of Combs")
    def validation_step(self, batch, *args, **kwargs):

        sentences, lengths, tags = batch
        embeddings = self.embedder(sentences)

        tag_preds, loss, tag_preds_list = self.model(embeddings, lengths, tags)

        if self.conf.training.metric.functional:
            tags = tags.flatten().tolist()
            tags = [i for i in tags if i != self.tag_to_idx['PAD']]

            metric_score = load_obj(self.conf.training.metric.metric_class)(
                tags, tag_preds_list, **self.conf.training.metric.params)
            metric_score = torch.tensor(metric_score)
        else:
            tags = tags.flatten()
            tags = tags[tags != self.tag_to_idx['PAD']]
            metric_score = self.metric(tag_preds, tags)

        log = {'valid_loss': loss.item()}

        return {
            'valid_loss': loss,
            'log': log,
            'step_metric': metric_score,
            'predicted_list': tag_preds_list,
            'predicted_seq': tag_preds,
            'true_seq': tags
        }
Beispiel #9
0
def exportAllSesG2():
    seList = utils.load_obj("%s/SeTopList.txt" % params.CAD_OUT)
    print(seList[:20])
    # seList = ['product dose omission']
    nSize = 50
    import os
    p = "%s/FSUBTEST/2/*" % params.CAD_OUT
    p = p.replace(" ", "\ ")

    cmd = "rm %s" % p
    try:
        os.system(cmd)
    except:
        pass
    pathInfo1 = "%s/FSUBTEST/2/NGFileMap.txt" % params.CAD_OUT
    pathIn1 = "%s/CADER.txt" % (params.CAD_OUT)
    dirOut1 = "%s/FSUBTEST/2" % params.CAD_OUT
    fFileNameMap = open(pathInfo1, "w")
    fFileNameMap.close()

    nSeg = max(int(len(seList) / nSize), 1)

    for i in range(nSeg):
        start = i * nSize
        end = min((i + 1) * nSize, len(seList))
        exportBySE(seList[start:end], pathIn1, dirOut1, pathInfo1)
    def training_step(self, batch, *args, **kwargs):

        sentences, lengths, tags = batch

        embeddings = self.embedder(sentences)

        tag_preds, loss, tag_preds_list = self.model(embeddings, lengths, tags)

        # if the metric we are using is a function
        if self.conf.training.metric.functional:
            # Creating flatten tags list for computing score with sklearn
            tags = tags.flatten().tolist()
            tags_list = [i for i in tags if i != self.tag_to_idx['PAD']]

            metric_score = load_obj(self.conf.training.metric.metric_class)(
                tags_list, tag_preds_list, **self.conf.training.metric.params)
            metric_score = torch.tensor(metric_score)
        else:
            tags = tags.flatten()
            tags = tags[tags != self.tag_to_idx['PAD']]
            metric_score = self.metric(tag_preds, tags)

        log = {'train_metric': metric_score.item(), 'loss': loss.item()}
        # metric to be logged to a progress bar
        prog_log = {'train_metric': metric_score.item()}

        return {'loss': loss, 'log': log, 'progress_bar': prog_log}
Beispiel #11
0
def exportValidSEs(nSize=9210):
    def loadException(path="%s/InValidSEs.txt" % params.FADER_OUT):
        lines = open(path).readlines()
        invalidSes = set()
        invalidTokens = list()
        for line in lines:
            line = line.strip()
            if line[0] == '#':
                invalidTokens.append(line[1:])
            else:
                invalidSes.add(line)
        return invalidSes, invalidTokens

    invalidSes, invalidTokens = loadException()

    fout = open("%s/ValidSes.txt" % params.FADER_OUT, "w")
    d = utils.load_obj("%s/FSECount_%s_0" % (params.FADER_OUT, nSize))
    kvs = utils.sort_dict(d)

    for kv in kvs:
        k, v = kv
        if k in invalidSes:
            continue
        isInvalid = False

        for token in invalidTokens:
            if k.__contains__(token):
                isInvalid = True
                break
        if isInvalid:
            continue
        fout.write("%s\t%s\n" % (k, v))
    fout.close()
    def test_epoch_end(self, outputs: List[Any]) -> Dict:

        mean_loss = np.stack([x['test_loss'] for x in outputs]).mean()

        # Computing values for a metric
        if self.conf.training.metric.functional:

            true_vals = [x['true_seq'] for x in outputs]
            y_true = [list for sublist in true_vals for list in sublist]

            pred_vals = [x['predicted_list'] for x in outputs]
            y_pred = [list for sublist in pred_vals for list in sublist]

            test_score = load_obj(self.conf.training.metric.metric_class)(
                y_true, y_pred, **self.conf.training.metric.params)
            test_score = torch.tensor(test_score)

        else:
            y_true = torch.cat([x['true_seq'] for x in outputs])
            y_pred = torch.cat([x['predicted_seq'] for x in outputs])

            test_score = self.metric(y_pred.reshape(-1, 1),
                                     y_true.reshape(-1, 1))

        # PytorchLightning doesn't like not one-element tensors in the output
        y_true = np.array(y_true).astype(int)
        y_pred = np.array(y_pred).astype(int)

        return {
            'mean_test_loss': mean_loss,
            'test_score': test_score,
            'predicted': y_true,
            'true': y_pred
        }
    def validation_epoch_end(self, outputs: List[Any]) -> Dict:

        mean_loss = np.stack([x['valid_loss'] for x in outputs]).mean()
        mean_metric = np.stack([x['step_metric'] for x in outputs]).mean()
        # Computing values for a metric

        if self.conf.training.metric.functional:

            true_vals = [x['true_seq'] for x in outputs]
            y_true = [list for sublist in true_vals for list in sublist]

            pred_vals = [x['predicted_list'] for x in outputs]
            y_pred = [list for sublist in pred_vals for list in sublist]

            valid_score = load_obj(self.conf.training.metric.metric_class)(
                y_true, y_pred, **self.conf.training.metric.params)
            valid_score = torch.tensor(valid_score)
        else:
            y_true = torch.cat([x['true_seq'] for x in outputs])
            y_pred = torch.cat([x['predicted_seq'] for x in outputs])

            valid_score = self.metric(y_pred.reshape(-1, 1),
                                      y_true.reshape(-1, 1))

        tensorboard_logs = {
            'valid_score': valid_score,
            'valid_score_mean': mean_metric,
            'valid_mean_loss': mean_loss
        }

        return {
            'validation_loss': mean_loss,
            'log': tensorboard_logs,
            'progress_bar': tensorboard_logs
        }
Beispiel #14
0
def exportSeCount(nSize=9210):
    d = utils.load_obj("%s/FSECount_%s_0" % (params.FADER_OUT, nSize))
    kvs = utils.sort_dict(d)
    fout = open("%s/FSECountSorted_%s_0" % (params.FADER_OUT, nSize), "w")
    for kv in kvs:
        k, v = kv
        fout.write("%s\t%s\n" % (k, v))
    fout.close()
Beispiel #15
0
def plotSeCount():
    seCount = utils.load_obj( "%s/JADERSeCountFX" % params.JADER_OUT)
    kvs = utils.sort_dict(seCount)


    from dataProcessing.plotLib import plotCul2, plotCul, plotHistD

    plotCul(kvs[::-1], 50, 1, "JADERSEFreq", xLabel="Thresholds of SE Frequency", yLabel="Num. SEs")
Beispiel #16
0
def runTTest():
    producers = []
    consumers = []
    queue = Queue(params.K_FOLD)
    counter = Value('i', 0)
    counter2 = Value('i', 0)

    dList = utils.load_obj("%s/DataDump.o" % OUT_DIR)
    dDrugPair2Id, drugPairList = loadDictName2Id("%s/%sPairs.txt" %
                                                 (OUT_DIR, PREF),
                                                 nMax=-1,
                                                 min=1)
    dDrug2Id, _ = loadDictName2Id("%s/%sADrugs.txt" % (OUT_DIR, PREF))
    dInd2Id, _ = loadDictName2Id("%s/%sAInd.txt" % (OUT_DIR, PREF))
    dSe2Id, _ = loadDictName2Id("%s/%sASe.txt" % (OUT_DIR, PREF))
    dId2Se = utils.reverse_dict(dSe2Id)

    inputList = loadRawExpose()
    nInputList = len(inputList)

    nDPerWorker = int(nInputList / params.N_DATA_WORKER)
    # assert 'g-csf' in allDrugNames
    for i in range(params.N_DATA_WORKER):
        startInd = i * nDPerWorker
        endInd = (i + 1) * nDPerWorker
        endInd = min(endInd, nInputList)
        if i == params.N_DATA_WORKER - 1:
            endInd = nInputList
        data = inputList[
            startInd:endInd], drugPairList, dDrug2Id, dId2Se, dList
        producers.append(Process(target=producer, args=(queue, data)))

    fout = open("%s/%s" % (OUT_DIR, "ttStatsRe"), "w")
    p = Process(target=consumer, args=(queue, counter, counter2, fout, []))
    p.daemon = True
    consumers.append(p)

    print("Start Producers...")
    for p in producers:
        p.start()
    print("Start Consumers...")
    for p in consumers:
        p.start()

    for p in producers:
        p.join()
    print("Finish Producers")

    queue.put(None)

    while True:
        if counter.value == 0:
            time.sleep(0.01)
            continue
        else:
            break
    fout.flush()
    fout.close()
def test_bilstm_words(emb_matrix, data_path, labels_path, max_seq):
    print("test_bilstm_words")
    lr = 0.0001
    model = lstm_simple_binary(emb_matrix,
                               lr,
                               nlabels=2,
                               nunits=50,
                               max_seq=max_seq)

    #model = lstm_simple_binary_attent(embedding_matrix=emb_matrix,lr=lr, nlabels=labels.shape[1], nunits=50, max_seq=max_seq)

    batch_size = 95  # 55
    epochs = 25

    data = load_obj(data_path)
    labels = load_obj(labels_path)

    x_train, x_test, y_train, y_test = train_test_split(data,
                                                        labels,
                                                        test_size=0.15)

    x_train, x_val, y_train, y_val = train_test_split(x_train,
                                                      y_train,
                                                      test_size=0.10)

    print("X_TRAIN:", x_train.shape)
    print("Y_TRAIN:", y_train.shape)
    print("X_TEST:", x_test.shape)
    print("Y_TEST:", y_test.shape)
    print("X_VAL:", x_val.shape)
    print("Y_VAL:", y_val.shape)

    history = model.fit(x_train,
                        y_train,
                        batch_size=batch_size,
                        epochs=epochs,
                        validation_data=(x_val, y_val),
                        shuffle=True)

    #plot_model_history(history)

    print(model_testing(model, x_test, y_test))

    return model
Beispiel #18
0
    def reset(self, shape_infopack):
        
        #the information data of the new shape
        self.name, vox_l_fn, vox_h_fn, prim_mesh_fn, loop_info_fn, ref_type, ref_fn = shape_infopack
        
        #reset all
        self.step_count = 0
        self.step_vec=np.zeros((self.max_step), dtype=np.int)
        
        #load reference image
        img = Image.open(ref_fn)
        if ref_type == 'rgb':
            image = Image.new('RGB', size=(600, 600), color=(255,255,255))
            image.paste(img, (0, 0), mask=img)
            img=image
            
        #process and reset reference image
        img = img.convert('L')        
        img = img.resize((self.ref_size, self.ref_size), Image.ANTIALIAS)
        self.raw_img=copy.copy(img)
        img = np.array(img)
        img = np.expand_dims(img, axis=0)
        self.ref=img/255.0
        
        #load and reset primitive mesh
        self.prim_v, self.prim_f = utils.load_obj(prim_mesh_fn)
        self.init_prim_v=copy.copy(self.prim_v)

        #load and reset edgeloop info
        valid, self.ctrl_v, self.loop, self.loop_map, self.box_loop = self.load_loop(loop_info_fn)
        if valid==False or self.prim_v.shape[0]==0:
            return False, None, None, None

        #load groundtruth_data
        shape=utils.load_voxel_data(vox_h_fn).astype(np.int)
        
        #reset groundtruth
        self.target = shape
        self.target_points=np.argwhere(self.target==1)
        
        #alignment and normalization     
        c1,c2 = utils.get_corner(self.target_points)
        self.transform_scale = np.linalg.norm(c1-c2)        
        self.transform_move = c1/self.transform_scale
        self.ctrl_v=self.ctrl_v/self.transform_scale-self.transform_move
        self.prim_v=self.prim_v/self.transform_scale-self.transform_move
        
        #reset initial IOU
        self.last_IOU = self.compute_IOU(self.prim_v)
        
        ctrl_info=np.zeros((self.loop_num, 2, p.LOOP_FEAT_DIM))
        ctrl_info[:,:,0:3] = self.ctrl_v
        ctrl_info[:,0,3] = self.loop_map[:,0]
        ctrl_info[:,1,3] = self.loop_map[:,0]
        
        return valid, self.ref, ctrl_info, self.step_vec
def test_cnn_words_chars(data_pathw, emb_matrixw, data_pathc, emb_matrixc,
                         labels_path, nlabels):
    print("test_cnn_words_chars")
    lr = 0.001
    nfilters = 64

    model = cnn_binary_with_emb_layer_char_word(emb_matrixw, emb_matrixc, lr,
                                                nlabels, nfilters,
                                                MAX_SEQUENCE_LENGTH,
                                                MAX_SEQUENCE_CHAR_LENGTH)

    batch_size = 95  # 55
    epochs = 20

    dataw = load_obj(data_pathw)
    datac = load_obj(data_pathc)
    labels = load_obj(labels_path)

    print(labels.shape[0])
    print("WORD LEVEL")

    x_trainw, x_testw, y_trainw, y_testw = train_test_split(dataw,
                                                            labels,
                                                            test_size=0.15)

    print("CHAR LEVEL")
    x_trainc, x_testc, y_trainc, y_testc = train_test_split(datac,
                                                            labels,
                                                            test_size=0.15)
    class_weight = {0: 1., 1: 1.3}
    history = model.fit([x_trainw, x_trainc],
                        y_trainw,
                        batch_size=batch_size,
                        epochs=epochs,
                        validation_split=0.10,
                        shuffle=True,
                        class_weight=class_weight)

    #plot_model_history(history)

    print(model_testing_2inputs(model, x_testw, x_testc, y_testw))

    return model
    def setup(self, stage: Optional[str] = None) -> None:
        # Load train-test-validation datasets
        self.train_df = pd.read_csv(
            f'{self.conf.data.main_folder}{self.conf.data.train_data_name}',
            sep='|')
        self.test_df = pd.read_csv(
            f'{self.conf.data.main_folder}{self.conf.data.test_data_name}',
            sep='|')
        self.valid_df = pd.read_csv(
            f'{self.conf.data.main_folder}{self.conf.data.valid_data_name}',
            sep='|')

        self.collator = load_obj(self.conf.training.collator.name)(
            model_name=self.conf.model.model_name)

        self.torch_dataset = load_obj(
            self.conf.training.torch_dataset_class.name)
        self.train_dataset = self.torch_dataset(self.train_df)
        self.test_dataset = self.torch_dataset(self.test_df)
        self.valid_dataset = self.torch_dataset(self.valid_df)
def plot3X():
    dLength = utils.load_obj("%s/FDrugCombLength" % params.FADER_OUT)

    kvs = utils.sort_dict(dLength)
    dCount = dict()
    for kv in kvs:
        _, v = kv
        utils.add_dict_counter(dCount, v)

    maxLength = max(dCount.keys())
    x = [i for i in range(1, maxLength + 1)]
    import numpy as np

    y = np.zeros(maxLength)
    for k, v in dCount.items():
        y[k - 1] = v

    fin = open("%s/FDrug2AllSeList.txt" % params.FADER_OUT)
    dLength2NReports = dict()
    kv = []
    vs = []
    while True:
        line = fin.readline()
        if line == "":
            break
        line = line.strip().split("$")
        parts = line[0].split(":")
        c = int(parts[1])
        drugCombLenght = len(parts[0].split(","))
        utils.add_dict_counter(dLength2NReports, drugCombLenght, c)
        vs.append(c)
        kv.append([parts[0], c])

    # import matplotlib.pyplot as plt
    # import numpy as np
    # maxX = max(dLength2NReports.keys())
    x = [i for i in range(1, maxLength + 1)]
    z = np.zeros(maxLength)
    for k, v in dLength2NReports.items():
        z[k - 1] = v

    import matplotlib.pyplot as plt
    import numpy as np

    fig = plt.figure()
    ax = fig.add_subplot(projection='3d')

    ax.plot(x, y, z, marker='>')

    ax.set_xlabel('DrugComb Length')
    ax.set_ylabel('DrugComb Count')
    ax.set_zlabel('NReport')
    plt.tight_layout
    plt.savefig("%s/3DDrugCombLengthReport.png" % params.FIG_DIR)
def test_cnn(data_path, labels_path, emb_matrix, nlabels, max_seq):
    print(emb_matrix.shape, nlabels, max_seq)
    lr = 0.0001
    nfilters = 50

    lr = 0.0001
    nfilters = 100

    model = cnn_binary_with_emb_layer_(emb_matrix, lr, nlabels, nfilters,
                                       max_seq)

    batch_size = 95  # 55
    epochs = 100

    data = load_obj(data_path)
    labels = load_obj(labels_path)

    #x_train, y_train, x_test, y_test, x_val, y_val = create_all_data(data_path, labels_path, test_perc=0.15,
    #                                                                       val_per=0.1, val_bias=0)

    x_train, x_test, y_train, y_test = train_test_split(data,
                                                        labels,
                                                        test_size=0.15)
    #x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.10)

    class_weight = {0: 1., 1: 1.3}

    history = model.fit(x_train,
                        y_train,
                        batch_size=batch_size,
                        epochs=epochs,
                        validation_split=0.10,
                        shuffle=True,
                        class_weight=class_weight)

    #plot_model_history(history)

    print(model_testing(model, x_test, y_test))

    return model
    def __init__(self,
                 hparams: Dict[str, float],
                 conf: DictConfig,
                 tag_to_idx: Dict,
                 embedder: Embedder,
                 num_steps: int = 0):
        super().__init__()

        self.conf = conf
        self.hparams = hparams
        self.tag_to_idx = tag_to_idx
        self.embedder = embedder
        self.num_steps = num_steps

        self.model = load_obj(self.conf.model.model_class)(
            embeddings_dim=self.conf.data.embedding_shape,
            tag_to_idx=self.tag_to_idx,
            **self.conf.model.params)
        # if the metric we are using is a class
        if self.conf.training.metric.functional is False:
            self.metric = load_obj(self.conf.training.metric.metric_class)(
                **self.conf.training.metric.params)
Beispiel #24
0
    def __init__(self,
                 hparams: Dict[str, float],
                 conf: DictConfig,
                 num_steps: int = 0):
        super().__init__()

        self.conf = conf
        self.hparams = hparams
        self.num_steps = num_steps
        self.model = load_obj(self.conf.model.model_class)(
            pretrained_model_name=self.conf.model.model_name,
            num_classes=self.conf.data.num_classes)

        if self.conf.data.num_classes == 2:
            self.criterion = torch.nn.BCEWithLogitsLoss()
        else:
            self.criterion = torch.nn.CrossEntropyLoss()

        # if the metric we are using is a class
        if self.conf.training.metric.functional is False:
            self.metric = load_obj(self.conf.training.metric.metric_class)(
                **self.conf.training.metric.params)
        self.softmax = torch.nn.Softmax(dim=1)
Beispiel #25
0
def fillMissingSMILEs():
    fin = open("%s/DrugBank/MissingSMILEsF.txt" % params.DATA_DIR)
    lines = fin.readlines()
    d = utils.load_obj("%s/DrugBank/DrugMorganDes" % params.DATA_DIR)
    for line in lines:
        line = line.strip()
        parts = line.split("||")
        try:
            v = genMorganBitVecFromSmiles(parts[1])
        except:
            print(parts[1])
        d[parts[0].lower()] = v

    utils.save_obj(d, "%s/DrugBank/DrugMorganDes" % params.DATA_DIR)
Beispiel #26
0
    def __init__(self,
                 tile_world,
                 shape,
                 args_file='./args.yml',
                 path='/envs/games/zelda_v0/',
                 generation=0,
                 locations={},
                 prefix='..',
                 **kwargs):
        """

        :param tile_world: 2d numpy array of map
        :param path: gym_gvgai.dir
        :param mechanics: list of sprites you would like to be able to mutate into
        :param generation: int
        """
        super().__init__()

        self.args_file = args_file

        self.args = load_from_yaml(args_file)
        self.floor = self.args.floor[0]

        self.game = self.args.game
        self._length = shape[0]
        self._height = shape[1]

        self.BOUNDARY = load_obj(path, f'{self.game}_boundary.pkl')

        self._tile_world = tile_world

        self.mechanics = self.args.mechanics
        # make folder in levels folder
        self.base_path = path
        self._path = os.path.join(self.base_path, f'{self.game}_poet_levels')
        if not os.path.exists(self._path):
            os.mkdir(self._path)

        self.generation = generation
        self.locations = locations if bool(
            locations) else self._parse_tile_world(tile_world)

        self.id = EvolutionaryGenerator.id
        EvolutionaryGenerator.id += 1

        self.string = str(self)

        self.diff = 1
Beispiel #27
0
def stats1(nSize=0):
    print("Loading...")
    drugComb = utils.load_obj("%s/FDrugNameCount_%s" % (params.FADER_OUT, nSize))
    print("Sorting..")
    kvs = utils.sort_dict(drugComb)

    fout = open("%s/FDrugNamesSort_%s" % (params.FADER_OUT, nSize), "w")
    print("Saving...")
    for kv in kvs:
        k, v = kv
        if len(k) <= 1:
            continue

        fout.write("%s$%s\n" % (k, v))

    fout.close()
Beispiel #28
0
    def get_word_struct(self, w):
        w = w.encode('utf-8')
        __w_hash = mmh3.hash(w) % COUNT_OF_FILES

        if not self.r_index.has_key(__w_hash%COUNT_OF_FILES):
            self.r_index[__w_hash] = load_obj("optimized_index_"+str(__w_hash))

        if self.encoder is None:
            if self.r_index[__w_hash]['encoding'] == 'varbyte':
                self.encoder = VarByteEncoder()
            else:
                self.encoder = Simple9()

        if self.r_index[__w_hash].has_key(w):
            return self.r_index[__w_hash][w]
        else:
            return None
Beispiel #29
0
def main():

    searcher = Searcher()
    docs = None
    qe = QueryExecutor(searcher)

    for line in fileinput.input():
        # TODO всю строку сразу в uft-8 и lower
        line = line.replace("\n", "")
        request = line.split('.....')[0]
        request = unicode(request, 'utf-8').lower()

        doc_result = qe.query(request)
        print line
        print len(doc_result)
        if docs is None:
            docs = load_obj("documents")
        for doc_id in doc_result:
            print_error(doc_id)
            print docs[doc_id]
Beispiel #30
0
def exportPolySEs():
    drugDesMap = utils.load_obj("%s/DrugBank/DrugMorganDes" % params.DATA_DIR)
    seDict = dict()
    dComb2Se = dict()
    fin = open("%s/FTest/FileMap.txt" % params.FADER_OUT)
    hashFiles = fin.readlines()
    ln = min(N_FILE, len(hashFiles))
    hashFiles = hashFiles[:ln]
    for hashId in hashFiles:
        parts = hashId.strip().split("\t")
        hashId = parts[0]
        ses = parts[1].split("__")
        for se in ses:
            utils.get_update_dict_index(seDict, se)
        path = "%s/FTest/%s" % (params.FADER_OUT, hashId)
        print("Reading... ", path)
        polySes = open(path).readlines()
        for polySe in polySes:
            polySe = polySe.strip().split("_")
            drugComb = polySe[0]
            seParts = polySe[1].split("\t")
            se = seParts[0]
            if seParts[1] == 'inf':
                pass
            drugs = drugComb.split(",")
            isValidComb = True
            # print(drugs)
            for drug in drugs:
                if drug not in drugDesMap:
                    isValidComb = False
                    break

            if isValidComb:
                # print(drugComb)
                sel = utils.get_insert_key_dict(dComb2Se, drugComb, [])
                sel.append(se)

    fout = open("%s/PolySes.txt" % params.FADER_OUT, "w")
    for drugComb, ses in dComb2Se.items():
        fout.write("%s\t%s\n" % (drugComb, ",".join(ses)) )
    fout.close()
Beispiel #31
0
def main2(line):
    start_time = time.time()
    #r_index = load_obj("optimized_index")
    print("--- %s seconds --- INDEX READING" % (time.time() - start_time))
    searcher = Searcher()
    docs = load_obj("documents")
    print("--- %s seconds --- DOCS READING" % (time.time() - start_time))
    start_time = time.time()
    qe = QueryExecutor(searcher)


    # TODO всю строку сразу в uft-8 и lower
    line = line.replace("\n", "")
    request = line.split('.....')[0]
    request = unicode(request, 'utf-8').lower()
    doc_result = qe.query(request)
    print line
    print len(doc_result)
    for doc_id in doc_result:
        print docs[doc_id]

    print("--- %s seconds ---" % (time.time() - start_time))
Beispiel #32
0
def plotDrugCombLength():
    dLength = utils.load_obj("%s/DrugCombLength" % params.JADER_OUT)

    kvs = utils.sort_dict(dLength)
    dCount = dict()
    for kv in kvs:
        _, v = kv
        utils.add_dict_counter(dCount, v)

    maxLength = max(dCount.keys())
    x = [i for i in range(1, maxLength+1)]
    import numpy as np

    y = np.zeros(maxLength)
    for k, v in dCount.items():
        y[k-1] = v

    import matplotlib.pyplot as plt
    plt.scatter(x,y)
    plt.xlabel("DrugComb length")
    plt.ylabel("Num DrugComb")
    plt.tight_layout()
    plt.savefig("%s/%s.png" % (params.FIG_DIR, "JADERDrugLength"))
Beispiel #33
0
def get_model(cfg):
    model = load_obj(cfg.model.class_name)
    model = model(**cfg.model.params)

    return model
            test_data_path = os.path.join(
                irosva_path, char_data + "data_" + k + "_test.pkl")
            id = DataTask(data_path, labels_path, 0.15, None, test_data_path,
                          None)
            #id = DataTask(data_path, labels_path, 0, 0)
        else:
            id = DataTask(data_path, labels_path, test_perc=0, val_per=0.10)
        irosva_data[k] = id

    #HAHA DATA
    print("HAHA")
    data_path = os.path.join(haha_path, char_data + "data_train.pkl")
    labels_path = os.path.join(haha_path, "labels_train.pkl")
    test_data_path = os.path.join(haha_path, char_data + "data_test.pkl")
    scores_path = os.path.join(haha_path, "scores_train.pkl")
    scores = load_obj(scores_path)

    haha_data = {}
    haha_score_data = {}

    if MODE_TEST:
        id = DataTask(data_path, labels_path, 0.15, None, test_data_path, None)
        haha_data["es"] = id
        id = DataTask(data_path, scores_path, 0.15, None, test_data_path, None)
        haha_score_data["es"] = id
    else:
        haha_data["es"] = DataTask(data_path,
                                   labels_path,
                                   test_perc=0,
                                   val_per=0.1)
        haha_score_data["es"] = DataTask(data_path,