コード例 #1
0
ファイル: drugDes.py プロジェクト: anhnda/FDAPolyADR
def exportMorginFingerprint():
    fin = open("%s/DrugBank/DrugBankNames.txt" % params.DATA_DIR)
    fsmileMissng = open("%s/DrugBank/MissingSMILEs.txt" % params.DATA_DIR, "w")
    dName2Morgan = dict()
    while True:
        line = fin.readline()
        if line == "":
            break
        parts = line.strip().split("||")
        drugName = parts[0].lower()
        # print(drugName)
        t = parts[1]
        smile = parts[-2]
        if t == "small molecule" and len(smile) > 2:
            try:

                v = genMorganBitVecFromSmiles(smile)
                # print(smile)
                dName2Morgan[drugName] = v
            except:
                fsmileMissng.write("%s\n" % drugName)
                continue
    fin.close()
    print(len(dName2Morgan))
    utils.save_obj(dName2Morgan, "%s/DrugBank/DrugMorganDes" % params.DATA_DIR)
コード例 #2
0
def make_transfer_graph():

    c = {}
    toFromTracker = {}
    for d in os.listdir('.'):
        if os.path.isdir(d):
            try:
                _net = int(d)
            except ValueError as e:
                continue
            for i in range(10, 5010, 10):
                j = i - 1
                if j not in c:
                    c[j] = 0
                    toFromTracker[j] = []
                for f in os.listdir(f'./{_net}'):
                    if f'poet{j}_network' in f:
                        c[j] += 1
                        toFromTracker[j].append(
                            (_net, int(f.split("network_")[1].split("_")[0])))

    df = pd.DataFrame.from_dict(c, orient='index')
    df.to_csv("transfers_per_attempt.csv")
    save_obj(toFromTracker, '.', 'exactTransfers')

    return
コード例 #3
0
def exportSubG2():
    fin = open("%s/JADER.txt" % params.JADER_OUT)
    foutDict = dict()
    dlen2SeCount = dict()
    nA = 0
    print("Reading...")

    while True:
        line = fin.readline()
        if line == "":
            break
        nA += 1
        print("\r%s" % nA, end="")
        parts = line.strip().split("$")
        drugCmb = parts[0]
        ses = parts[1]
        drugs = drugCmb.split(",")
        nD = len(drugs)
        drugs = sorted(drugs)
        sortNames = ",".join(drugs)

        fO = utils.get_dict(foutDict, nD, -1)
        if fO == -1:
            fO = open("%s/SUB/G%s" % (params.JADER_OUT, nD), "w")
            foutDict[nD] = fO
        fO.write("%s$%s\n" % (sortNames, ses))
        if len(drugs) > 2 and len(drugs) <= 20:
            for i in range(len(drugs)):
                for j in range(i + 1, len(drugs)):
                    d1 = drugs[i]
                    d2 = drugs[j]
                    pair = "%s,%s" % (d1, d2)
                    try:
                        f2 = foutDict[2]
                    except:
                        f2 = open("%s/SUB/G%s" % (params.JADER_OUT, 2), "w")
                        foutDict[2] = f2
                    f2.write("%s$%s\n" % (pair, ses))
        len2SeCount = utils.get_insert_key_dict(dlen2SeCount, nD, dict())
        sess = ses.split(",")
        for se in sess:
            utils.add_dict_counter(len2SeCount, se)

    for k, v in foutDict.items():
        v.close()

    d2 = dict()
    for k, v in dlen2SeCount.items():
        kvs = utils.sort_dict(v)
        ks = []
        for kv in kvs:
            kk, _ = kv
            ks.append(kk)
        d2[k] = ks
    utils.save_obj(d2, "%s/SUB/drugSize2CommonSEs" % params.JADER_OUT)
コード例 #4
0
ファイル: drugDes.py プロジェクト: anhnda/FDAPolyADR
def fillMissingSMILEs():
    fin = open("%s/DrugBank/MissingSMILEsF.txt" % params.DATA_DIR)
    lines = fin.readlines()
    d = utils.load_obj("%s/DrugBank/DrugMorganDes" % params.DATA_DIR)
    for line in lines:
        line = line.strip()
        parts = line.split("||")
        try:
            v = genMorganBitVecFromSmiles(parts[1])
        except:
            print(parts[1])
        d[parts[0].lower()] = v

    utils.save_obj(d, "%s/DrugBank/DrugMorganDes" % params.DATA_DIR)
コード例 #5
0
def getAllDrugSet():
    dirs = glob.glob("%s/*" % params.FADER_DIR)
    drugNameSet = dict()
    drugCombSet = dict()
    dMap = loadValidDrugMap()
    nSize = len(dMap)
    print("DMAP SIZE: ", nSize)
    for dir in dirs:
        path = getDrugFile(dir)
        assert os.path.isfile(path)
        getDrugSet(path, drugNameSet, drugCombSet, dMap)

    print("Saving...")
    utils.save_obj(drugNameSet, "%s/FDrugNameCount_%s" % (params.FADER_OUT, nSize))
    utils.save_obj(drugCombSet, "%s/FDrugCombCount_%s" % (params.FADER_OUT, nSize))
コード例 #6
0
def dieAndKillChildren(parent, pairs, stats):

    from utils.utils import save_obj
    save_obj(stats,
             os.path.join(f"{args.result_prefix}", f"results_{unique_run_id}"),
             f"pinsky_stats")

    # [pair.env.close() for pair in pairs]
    path = os.path.join(parent.root,
                        parent.subfolders['alive_signals'])

    alive = os.listdir(path)

    for a in alive:
        os.remove(os.path.join(path, a))
        # create #.txt.done files. 
        parent.placeChildFlag(os.path.join(path, a) + '.done')
コード例 #7
0
ファイル: subStats3.py プロジェクト: anhnda/FDAPolyADR
def statsCommonSes():
    fin = open("%s/CADER.txt" % (params.CAD_OUT))
    dSeCout = dict()
    while True:
        line = fin.readline()
        if line == "":
            break
        parts = line.strip().split("$")
        ses = parts[-1].split(",")
        for se in ses:
            utils.add_dict_counter(dSeCout, se)
    kvs = utils.sort_dict(dSeCout)
    ks = []
    for kv in kvs:
        k, v = kv
        if v <= 20:
            continue
        ks.append(k)
    utils.save_obj(ks, "%s/SeTopList.txt" % params.CAD_OUT)
コード例 #8
0
ファイル: subStats.py プロジェクト: anhnda/FDAPolyADR
def exportSub():
    fin = open("%s/FDrug2SeList_19814.txt" % params.FADER_OUT)
    foutDict = dict()
    dlen2SeCount = dict()
    nA = 0
    print("Reading...")

    while True:
        line = fin.readline()
        if line == "":
            break
        nA += 1
        print("\r%s" % nA, end="")
        parts = line.strip().split("$")
        drugCmb = parts[0]
        ses = parts[1]
        drugs = drugCmb.split(",")
        nD = len(drugs)
        sortNames = ",".join(sorted(drugs))

        fO = utils.get_dict(foutDict, nD, -1)
        if fO == -1:
            fO = open("%s/SUB/%s" % (params.FADER_OUT, nD), "w")
            foutDict[nD] = fO
        fO.write("%s$%s\n" % (sortNames, ses))
        len2SeCount = utils.get_insert_key_dict(dlen2SeCount, nD, dict())
        sess = ses.split(",")
        for se in sess:
            utils.add_dict_counter(len2SeCount, se)

    for k, v in foutDict.items():
        v.close()

    d2 = dict()
    for k, v in dlen2SeCount.items():
        kvs = utils.sort_dict(v)
        ks = []
        for kv in kvs:
            kk, _ = kv
            ks.append(kk)
        d2[k] = ks
    utils.save_obj(d2, "%s/SUB/drugSize2CommonSEs" % params.FADER_OUT)
コード例 #9
0
def exportDrugCom2Side():
    fin = open("%s/JADER.txt" % params.JADER_OUT)
    fout = open("%s/JADER2AllSeList.txt" % params.JADER_OUT, "w")
    dDrugComb2Se = dict()
    dDrugCombCount = dict()
    dDrugCom2Lenght = dict()
    drugCont = dict()
    seCount = dict()
    cc = 0
    while True:
        line = fin.readline()
        if line == "":
            break
        cc += 1
        line = line.strip()
        parts = line.split("$")
        drugCom = parts[0]
        dDrugCom2Lenght[drugCom] = len(drugCom.split(","))

        ses = parts[1].split(",")
        utils.add_dict_counter(dDrugCombCount, drugCom, 1)
        for drug in drugCom.split(","):
            utils.add_dict_counter(drugCont, drug, 1)
        sesComb = utils.get_insert_key_dict(dDrugComb2Se, drugCom, dict())
        for se in ses:
            utils.add_dict_counter(sesComb, se, 1)
            utils.add_dict_counter(seCount, se)

    kvs = utils.sort_dict(dDrugCombCount)
    for kv in kvs:
        k, v = kv
        seCountKv = utils.sort_dict(dDrugComb2Se[k])
        sString = []
        for seCountx in seCountKv:
            se,count = seCountx
            sString.append("%s:%s"% (se, count))

        fout.write("%s:%s$%s$%s\n" % (k, v, len(sString), ",".join(sString)))
    fout.close()
    utils.save_obj(seCount, "%s/JADERSeCountFX" % params.JADER_OUT)
    utils.save_obj(dDrugCom2Lenght, "%s/DrugCombLength" % params.JADER_OUT)
    print(len(drugCont), len(seCount))
コード例 #10
0
def cluster_experiments(learner, criterion, name):
    for clusters in range(0, 5):
        # clusters/4 clusters initially labeled
        print(
            '-' * 8,
            'instances from {}/4 clusters initially labeled'.format(clusters),
            '-' * 8)

        torch.manual_seed(manual_seed)
        random.seed(manual_seed
                    )  # makes sure initially_labeled are euqal in each round
        initially_labeled = [
            initial_indices(clusters) for i in range(kwargs['rounds'])
        ]

        out = experiment(learner,
                         criterion=criterion,
                         initially_labeled=initially_labeled,
                         **kwargs)

        utils.save_obj((out), result_folder + name + '{}'.format(clusters))
コード例 #11
0
def createLearningDeltas(exp):
    deltas = {}
    for d in os.listdir('.'):
        try:
            lvl = int(d)
        except ValueError as e:
            continue
        files = os.listdir(f'./{lvl}')
        opt = [f for f in files if 'scores' in f]
        wins = [f for f in files if 'win' in f]
        sortedWins = sorted(
            wins, key=lambda x: int(x.split('poet')[1].split('.')[0]))
        sortedOpt = sorted(opt,
                           key=lambda x: int(x.split("poet")[1].split("_")[0]))
        birth = int(sortedOpt[0].split('poet')[1].split('_')[0])
        try:
            firstVictory = int(sortedWins[0].split('poet')[1].split('.')[0])
        except IndexError as e:
            firstVictory = np.inf
        deltas[lvl] = firstVictory - birth

    save_obj(deltas, '.', f'{exp}.learningDelta')
コード例 #12
0
def getAllDrugSEMap():
    dirs = glob.glob("%s/*" % params.FADER_DIR)
    validSes = loadValidSEs()
    nSE = len(validSes)
    fout = open("%s/FDrug2SeList_%s.txt" % (params.FADER_OUT, nSE), "w")

    dMap = loadValidDrugMap()
    assert len(dMap) > 0
    nSize = len(dMap)
    print("DMAP SIZE: ", nSize)
    seCount = dict()
    for dir in dirs:
        pathDrug = getDrugFile(dir)
        assert os.path.isfile(pathDrug)
        pathSE = getSEFile(dir)
        caseSEMap = getSideEffectSet(pathSE, seCount, validSes)
        getDrugSEMappingFile(pathDrug, fout, dMap, caseSEMap)

    print("Saving...")
    utils.save_obj(seCount, "%s/FSECount_%s_%s" % (params.FADER_OUT, nSize, nSE))

    fout.close()
コード例 #13
0
ファイル: psmxExporter.py プロジェクト: anhnda/FDAPolyADR
def exportOData():
    dDrug2Id, _ = loadDictName2Id("%s/%sADrugs.txt" % (OUT_DIR, PREF))
    dInd2Id, _ = loadDictName2Id("%s/%sAInd.txt" % (OUT_DIR, PREF))
    dSe2Id, _ = loadDictName2Id("%s/%sASe.txt" % (OUT_DIR, PREF))
    fin = open("%s/JADERInd.txt" % OUT_DIR)

    dList = []
    while True:
        line = fin.readline()
        if line == "":
            break
        parts = line.strip().split("$")
        drugs = parts[0].split(",")
        inds = parts[2].split(",")
        ses = parts[-1].split(",")
        drugIds = []
        indIds = []
        seIds = []
        if len(drugs) > 20:
            continue
        for drug in drugs:
            drugId = utils.get_dict(dDrug2Id, drug, -1)
            # print(drug, drugId)
            if drugId != -1:
                drugIds.append(drugId)
        for ind in inds:
            indId = utils.get_dict(dInd2Id, ind, -1)
            if indId != -1:
                indIds.append(indId)
        for se in ses:
            seId = utils.get_dict(dSe2Id, se, -1)
            if seId != -1:
                seIds.append(seId)
        # print(drugIds, indIds, seIds)
        dList.append([drugIds, indIds, seIds])

    utils.save_obj(dList, "%s/DataDump.o" % OUT_DIR)
コード例 #14
0
    def createChildTask(self, run_id, work_dict, worker_id, task_id,
                        poet_loop_counter, **kwargs):
        """

        :param work_dict: dict of nns, envs, nn_ids, env_ids
        :param worker_id:  child id (int)
        :param task_id: ADP TASK TYPE
        :param poet_loop_counter: poet number loop
        :return:
        """

        work = {
            'run_id': run_id,
            'nns': work_dict['nn'],
            'lvls': work_dict['env'],
            'task_id': task_id,
            'chromosome_ids': work_dict['nn_id'],
            'env_ids': work_dict['env_id'],
            'diff': work_dict['diff'],
            'poet': poet_loop_counter,
            'kwargs': kwargs
        }

        save_obj(work, os.path.join(self.root,
                                    self.subfolders['send_to_child']),
                 f'child{worker_id}')

        available = os.path.join(self.root,
                                 self.subfolders['available_signals'],
                                 f'{worker_id}.txt')

        if not os.path.exists(available):
            time.sleep(10)

        if os.path.exists(available):
            os.remove(available)
コード例 #15
0
    output_path = "/home/upf/corpora/IberLEF2019/multitask"

    irosva_path = "/home/upf/corpora/IberLEF2019/IroSva/preprocessed_data"
    mexa3t_path = "/home/upf/corpora/IberLEF2019/MEX-A3T/preprocessed_data"
    haha_path = "/home/upf/corpora/IberLEF2019/HAHA/preprocessed_data"
    tass_path = "/home/upf/corpora/IberLEF2019/TASS/preprocessed_data"

    tasks = ["irosva", "haha", "mexa3t"]

    tag = "_".join(sorted(tasks))

    word_index_files = []
    word_index_files.append(os.path.join(irosva_path, "word_index_all.txt"))
    word_index_files.append(os.path.join(haha_path, "word_index_train.txt"))
    word_index_files.append(os.path.join(mexa3t_path, "word_index_all.txt"))

    word_index = get_word_index_from_files(word_index_files)

    word_index_all_path = os.path.join(output_path,
                                       "word_index_" + tag + ".txt")
    write_word_index(word_index, word_index_all_path)

    word_index_all_path = os.path.join(output_path,
                                       "word_index_" + tag + ".pkl")
    save_obj(word_index, word_index_all_path)

    word_index_all_path = os.path.join(output_path,
                                       "word_index_" + tag + ".pkl")
    word_index = load_obj(word_index_all_path)
コード例 #16
0
 def returnAnswer(self, answer):
     path = os.path.join(self.root, self.subfolders['send_to_parent'])
     save_obj(answer, path, f'answer{self.id}')
コード例 #17
0
    #emb_matrix_filename = "emb_matrix_fb_"
    #emb_matrix_path = os.path.join(output_path, emb_matrix_filename + ".pkl")
    #emb_matrix = load_obj(emb_matrix_path)



    word_index_path = os.path.join(output_path, "word_index_all.pkl")
    word_index = load_obj(word_index_path)
    dim = 100
    w2v_path = "/home/abravo/corpora/IberLEF2019/regional_emb/es-MX-100d.vec"

    #dim = 300
    #w2v_path = "/home/abravo/corpora/IberLEF2019/regional_emb/model_swm_300-6-10-low_es.w2v"
    emb_matrix = get_embedding_matrix(word_index, dim, w2v_path)
    emb_matrix_path = os.path.join(output_path, "emb_matrix" + ".pkl")
    save_obj(emb_matrix, emb_matrix_path)



    MODE_BOTH = True


    if MODE_BOTH:

        data_path = os.path.join(output_path, "data_train.pkl")
        data = load_obj(data_path)

        labels_path = os.path.join(output_path, "labels_train.pkl")
        labels = load_obj(labels_path)

        scores_path = os.path.join(output_path, "scores_train.pkl")
コード例 #18
0
    join_emb.eval()

    dataset = TextDataset(args.data_path, args.dictionary)
    print("Dataset size: ", len(dataset))

    dataset_loader = DataLoader(dataset, batch_size=args.batch_size, num_workers=3, pin_memory=True, collate_fn=collate_fn_cap_padded)

    caps_enc = list()

    print("### Starting sentence embedding ###")
    end = time.time()
    for i, (caps, length) in enumerate(dataset_loader, 0):

        input_caps = caps.to(device)

        with torch.no_grad():
            _, output_emb = join_emb(None, input_caps, length)

        caps_enc.append(output_emb.cpu().data.numpy())

        if i % 100 == 99:
            print(str((i + 1) * args.batch_size) + "/" + str(len(dataset)) + " captions encoded - Time per batch: " + str((time.time() - end)) + "s")

        end = time.time()

    print("Processing done -> saving")
    caps_stack = np.vstack(caps_enc)

    save_obj(caps_stack, args.output_path)
    print("The data has been save to ", args.output_path)
コード例 #19
0
ファイル: indexing.py プロジェクト: pashna/SearchIndexer

COUNT_OF_FILES = 40

def get_reader():
    reader = DocumentStreamReader(sys.stdin)
    return reader

def create_indexes(encoding):

    reader = get_reader()
    indexer = Indexer(COUNT_OF_FILES, encoding)

    for doc in reader:
        doc_url = doc.url
        doc_text = doc.text if doc.HasField('text') else 0

        if doc_text != 0:
            indexer.add_document_indexes(text=doc_text, url=doc_url)

    return indexer


if __name__ == "__main__":
    encoding = sys.argv[1]
    indexer = create_indexes(encoding)
    for key, r_index in indexer.full_index.iteritems():
        if key != "encoding":
            save_obj(r_index, "indexer_"+str(key))

    save_obj(indexer.documents, "documents")
コード例 #20
0
    nlp = None

    tweet_col = 1
    label_col = 2

    if SAVE_WORD_INDEX:
        if not nlp:
            nlp = get_spacy_nlp('es_core_news_md', True)
        all_files = [preproc_train_path, preproc_test_path]

        word_index = get_word_index(nlp, all_files, True, 1)
        word_index_all_path = os.path.join(output_path, "word_index_all.txt")
        write_word_index(word_index, word_index_all_path)

        word_index_all_path = os.path.join(output_path, "word_index_all.pkl")
        save_obj(word_index, word_index_all_path)

    word_index_path = os.path.join(output_path, "word_index_all.pkl")
    word_index = load_obj(word_index_path)

    if SAVE_CHAR_INDEX:
        if not nlp:
            nlp = get_spacy_nlp('es_core_news_md', True)

        all_files = [preproc_train_path]

        char_index = get_word_index(nlp, all_files, True, tweet_col, True)
        char_index_all_path = os.path.join(output_path, "char_index_train.txt")
        write_word_index(char_index, char_index_all_path)
        char_index_all_path = os.path.join(output_path, "char_index_train.pkl")
        save_obj(char_index, char_index_all_path)
コード例 #21
0
                                batch_size=args.batch_size,
                                num_workers=6,
                                pin_memory=True)

    imgs_enc = list()

    print("### Starting image embedding ###")
    end = time.time()
    for i, imgs in enumerate(dataset_loader, 0):

        input_imgs = imgs.to(device)
        print(input_imgs)
        with torch.no_grad():
            output_emb, _ = join_emb(input_imgs, None, None)

        imgs_enc.append(output_emb.cpu().data.numpy())

        if i % 100 == 99:
            print(
                str((i + 1) * args.batch_size) + "/" + str(len(dataset)) +
                " images encoded - Time per batch: " +
                str((time.time() - end)) + "s")

        end = time.time()

    print("Processing done -> saving")
    imgs_stack = np.vstack(imgs_enc)

    save_obj((imgs_stack, dataset.get_image_list()), args.output_path)
    print("The data has been save to ", args.output_path)
コード例 #22
0
    tweet_col = 0
    label_col = 1

    if SAVE_WORD_INDEX:

        if not nlp:
            nlp = get_spacy_nlp('es_core_news_md', True)

        all_files = [preproc_train_path, preproc_test_path]

        word_index = get_word_index(nlp, all_files, True, tweet_col, False)
        word_index_all_path = os.path.join(output_path, "word_index_all.txt")
        write_word_index(word_index, word_index_all_path)

        word_index_all_path = os.path.join(output_path, "word_index_all.pkl")
        save_obj(word_index, word_index_all_path)
        print("WORD INDEX PROCESSED!")

    word_index_all_path = os.path.join(output_path, "word_index_all.pkl")
    word_index = load_obj(word_index_all_path)

    if SAVE_CHAR_INDEX:
        if not nlp:
            nlp = get_spacy_nlp('es_core_news_md', True)

        all_files = [preproc_train_path, preproc_test_path]

        char_index = get_word_index(nlp, all_files, True, tweet_col, True)
        char_index_all_path = os.path.join(output_path, "char_index_all.txt")
        write_word_index(char_index, char_index_all_path)
コード例 #23
0
def exportPolySes():
    dDrug = dict()
    dSe = dict()
    dDrugComb2Ses = dict()
    fin = open("%s/PolySes.txt" % params.FADER_OUT)
    while True:
        line = fin.readline()
        if line == "":
            break
        line = line.strip()
        parts = line.split("\t")
        drugCom = parts[0]
        ses = parts[1]
        drugs = drugCom.split(",")
        if len(drugs) > params.MAX_N_DRUG:
            continue

        for drug in drugs:
            utils.get_update_dict_index(dDrug, drug)

        ses = ses.split(",")
        for se in ses:
            utils.get_update_dict_index(dSe, se)

        dDrugComb2Ses[drugCom] = ses

    nDrug = len(dDrug)
    nSe = len(dSe)
    nComb = len(dDrugComb2Ses)

    print("Drugs, Ses, Comb: ", nDrug, nSe, nComb)

    fout = open("%s/PolySe_%s" % (params.FADER_OUT, params.MAX_N_DRUG), "w")
    kvs = []
    for drugCom, ses in dDrugComb2Ses.items():
        fout.write("%s\t%s\n" % (drugCom, ",".join(ses)))
        kvs.append([drugCom.split(","), ses])

    fout.close()

    random.shuffle(kvs)

    SEG_SIZE = int(nComb / params.K_FOLD)

    for iFold in range(params.K_FOLD):
        print("Generating fold...", iFold)
        tests = []
        validates = []
        trains = []
        startTest = iFold * SEG_SIZE
        endTest = (iFold + 1) * SEG_SIZE
        if endTest > nComb:
            endTest = nComb

        startValid = endTest
        if iFold == params.K_FOLD - 1:
            startValid = 0

        endValid = startValid + SEG_SIZE

        if endValid > nComb:
            endValid = nComb

        for j, kv in enumerate(kvs):
            if startTest <= j < endTest:
                seg = tests
            elif startValid <= j < endValid:
                seg = validates
            else:
                seg = trains
            seg.append(kv)

        utils.save_obj((dDrug, dSe, trains, tests, validates),
                       "%s/_%s" % (params.FADER_KFOLD, iFold))