for i,m in enumerate(actives[k]): fp_dict = scor.getFP(fp_build, m[1]) actives[k][i] = [str(target)+'_'+str(k)+'_A_'+str(i+1), fp_dict] # read in test actives and calculate fps div_actives = [] for line in gzip.open(inpath_cmp+'ChEMBL/cmp_list_ChEMBL_'+str(target)+'_actives.dat.gz', 'r'): if line[0] != '#': # structure of line: [external ID, internal ID, SMILES]] line = line.rstrip().split() fp_dict = scor.getFP(fp_build, line[2]) # store: [internal ID, dict with fps] div_actives.append([line[1], fp_dict]) num_test_actives = conf.num_div_act - 1 # convert fps to numpy arrays np_fps_div_act = ml_func.getNumpy(div_actives) # read in decoys and calculate fps if firstchembl: decoys = [] for line in gzip.open(inpath_cmp+'ChEMBL/cmp_list_ChEMBL_zinc_decoys.dat.gz', 'r'): if line[0] != '#': # structure of line: [external ID, internal ID, SMILES]] line = line.rstrip().split() fp_dict = scor.getFP(fp_build, line[2]) # store: [internal ID, dict with fps] decoys.append([line[1], fp_dict]) # convert fps to numpy arrays np_fps_dcy = ml_func.getNumpy(decoys) firstchembl = False num_decoys = len(decoys)
# read in actives and calculate fps actives = [] for line in gzip.open( inpath_cmp + dataset + '/cmp_list_' + dataset + '_' + str(target) + '_actives.dat.gz', 'r'): if line[0] != '#': # structure of line: [external ID, internal ID, SMILES]] line = line.rstrip().split() fp_dict = scor.getFP(fp_build, line[2]) # store: [internal ID, dict with fps] actives.append([line[1], fp_dict]) num_actives = len(actives) num_test_actives = num_actives - num_query_mols # convert fps to numpy arrays np_fps_act = ml_func.getNumpy(actives) # read in decoys and calculate fps if dataset == 'ChEMBL': if firstchembl: decoys = [] for line in gzip.open( inpath_cmp + dataset + '/cmp_list_' + dataset + '_zinc_decoys.dat.gz', 'r'): if line[0] != '#': # structure of line: [external ID, internal ID, SMILES]] line = line.rstrip().split() fp_dict = scor.getFP(fp_build, line[2]) # store: [internal ID, dict with fps] decoys.append([line[1], fp_dict]) # convert fps to numpy arrays