if not os.path.exists(tmp_feature_dir): os.makedirs(tmp_feature_dir) mps = [] fp_save_folder = '/raid/shenwanxiang/FP_maps' for fp_type in fp_types: mp = loadmap(os.path.join(fp_save_folder, '%s.mp' % fp_type)) mps.append(mp) classification_res = [] ## classification for data in datasets: task_name = data.task_name task_type = data.task_type _, induces = load_data(task_name) smiles = data.x Y = pd.DataFrame(data.y).fillna(MASK).values for mp, fp_type in zip(mps, fp_types): print(fp_type) X2_name = "X2_%s_%s.data" % (task_name, fp_type) X2_name = os.path.join(tmp_feature_dir, X2_name) if not os.path.exists(X2_name): X2 = mp.batch_transform(smiles, scale=False, n_jobs=16) dump(X2, X2_name) else: X2 = load(X2_name)
def get_data(dataset): """ Check if exists, download if not, save splits return paths to separated splits """ df, indices = load_data(dataset) df = df.rename(columns={'smiles': 'SMILES'}) df.columns = [col.replace(' ', '_') for col in df.columns] return df, indices