コード例 #1
0
if not os.path.exists(tmp_feature_dir):
    os.makedirs(tmp_feature_dir)

mps = []
fp_save_folder = '/raid/shenwanxiang/FP_maps'
for fp_type in fp_types:
    mp = loadmap(os.path.join(fp_save_folder, '%s.mp' % fp_type))
    mps.append(mp)

classification_res = []
## classification
for data in datasets:

    task_name = data.task_name
    task_type = data.task_type
    _, induces = load_data(task_name)
    smiles = data.x
    Y = pd.DataFrame(data.y).fillna(MASK).values

    for mp, fp_type in zip(mps, fp_types):

        print(fp_type)

        X2_name = "X2_%s_%s.data" % (task_name, fp_type)
        X2_name = os.path.join(tmp_feature_dir, X2_name)
        if not os.path.exists(X2_name):
            X2 = mp.batch_transform(smiles, scale=False, n_jobs=16)
            dump(X2, X2_name)
        else:
            X2 = load(X2_name)
コード例 #2
0
ファイル: chembench_utils.py プロジェクト: shunsunsun/MolBERT
def get_data(dataset):
    """ Check if exists, download if not, save splits return paths to separated splits """
    df, indices = load_data(dataset)
    df = df.rename(columns={'smiles': 'SMILES'})
    df.columns = [col.replace(' ', '_') for col in df.columns]
    return df, indices