def get_stats(self): if self.mode in [0, 3, 5]: slope_A, intercept_A, r_value_A, p_value_A, std_err_A = \ stats.linregress(self.fitter._f[self.selection_A], self.exA) slope_B, intercept_B, r_value_B, p_value_B, std_err_B = \ stats.linregress(self.fitter._f[self.selection_B], self.exB) R2_A = r_value_A**2 R2_B = r_value_B**2 return R2_A, R2_B elif self.mode in [1, 4, 6, 7]: ### Selection A Energy slope_A_E, intercept_A_E, r_value_A_E, p_value_A_E, std_err_A_E = \ stats.linregress(self.fitter._f[self.selection_A,0], self.exA[:,0]) ### Selection A Entropy slope_A_S, intercept_A_S, r_value_A_S, p_value_A_S, std_err_A_S = \ stats.linregress(self.fitter._f[self.selection_A,1], self.exA[:,1]) ### Selection B Energy slope_B_E, intercept_B_E, r_value_B_E, p_value_B_E, std_err_B_E = \ stats.linregress(self.fitter._f[self.selection_B,0], self.exB[:,0]) ### Selection B Entropy slope_B_S, intercept_B_S, r_value_B_S, p_value_B_S, std_err_B_S = \ stats.linregress(self.fitter._f[self.selection_B,1], self.exB[:,1]) R2_A = [r_value_A_E**2, r_value_A_S**2] R2_B = [r_value_B_E**2, r_value_B_S**2] return R2_A, R2_B else: mode_error(self.mode)
def init_output(self): ### Open files for writing self.modelparms = open("%sparms.out" % self.prefix, "w") self.modelprediction = open("%sprediction.out" % self.prefix, "w") self.modeldiff = open("%sdiff.out" % self.prefix, "w") now = datetime.datetime.now() ### Start to write the headers ### The modelparms file self.modelparms.write( "### File containing parameters of the functionals.\n") self.modelparms.write("### Written by gips.\n") self.modelparms.write("### Time: %s\n" % now.strftime("%Y-%m-%d %H:%M")) self.modelparms.write("### Hostname: %s\n" % socket.gethostname()) self.modelparms.write( "### The entropy contribution to free energy is calculated at 300 K\n" ) if isinstance(self.optparms, dict): for key, value in self.optparms.items(): self.modelparms.write("### %s: %s\n" % (key, value)) self.modelparms.write("###\n") self.modelparms.write("### Step ") if self.parms == 6: self.modelparms.write("E_aff[kcal/mol] ") elif self.parms == 5: self.modelparms.write("Aff[kcal/mol] ") if self.mode == 7: if not self.pairs: self.modelparms.write("e_co(Rec)[kcal/mol] ") self.modelparms.write("e_co(Cplx)[kcal/mol] ") self.modelparms.write("e_co(Lig)[kcal/mol] ") else: self.modelparms.write("e_co[kcal/mol] ") if self.parms == 6: self.modelparms.write("S_aff[kcal/mol] ") if self.mode == 7: if not self.pairs: self.modelparms.write("s_co(Rec)[kcal/mol] ") self.modelparms.write("s_co(Cplx)[kcal/mol] ") self.modelparms.write("s_co(Lig)[kcal/mol] ") else: self.modelparms.write("s_co[kcal/mol] ") if self.mode in [5, 6, 7]: if not self.pairs: self.modelparms.write("g_co(Rec)[1/Ang^3] ") self.modelparms.write("g_co(Cplx)[1/Ang^3] ") self.modelparms.write("g_co(Lig)[1/Ang^3] ") else: self.modelparms.write("g_co[1/Ang^3] ") if self.mode in [0, 3, 5]: self.modelparms.write("C[kcal/mol] ") elif self.mode in [1, 4, 6, 7]: self.modelparms.write("C_E[kcal/mol] ") self.modelparms.write("C_S[kcal/mol] ") else: mode_error(self.mode) self.modelparms.write("SSE[kcal^2/mol^2](A) ") self.modelparms.write("SSE[kcal^2/mol^2](B) ") self.modelparms.write("R2(A) ") self.modelparms.write("R2(B) ") self.modelparms.write("rmsd[kcal/mol](A) ") self.modelparms.write("rmsd[kcal/mol](B) ") self.modelparms.write("\n") ### The model prediction file self.modelprediction.write( "### File containing predicted values of the model.\n") self.modelprediction.write("### Written by gips.\n") self.modelprediction.write("### Time: %s\n" % now.strftime("%Y-%m-%d %H:%M")) self.modelprediction.write("### Hostname: %s\n" % socket.gethostname()) self.modelprediction.write("### All units in [kcal/mol]\n") if isinstance(self.optparms, dict): for key, value in self.optparms.items(): self.modelprediction.write("### %s: %s\n" % (key, value)) self.modelprediction.write("###\n") self.modelprediction.write("### Model values.\n") self.modelprediction.write("### All values in [kcal/mol].\n") if self.mode in [1, 4, 6, 7]: if self.fitter.decomp_E: self.modelprediction.write( "### First row for each step is free energy, second row is energy.\n" ) elif self.fitter.decomp_S: self.modelprediction.write( "### First row for each step is free energy, second row is entropy.\n" ) else: self.modelprediction.write( "### First row for each step is energy, second row is entropy.\n" ) self.modelprediction.write("#Title ") for name in self.fitter.name: self.modelprediction.write("%s " % name) self.modelprediction.write("\n") self.modelprediction.write("#Selection ") for i, name in enumerate(self.fitter.name): if i in self.selection_A: self.modelprediction.write("A ") elif i in self.selection_B: self.modelprediction.write("B ") else: raise KeyError( "name=%s not found in selection A or selection B." % name) self.modelprediction.write("\n") if self.mode in [0, 3, 5]: self.modelprediction.write("-1 ") for value in self.fitter._exp_data: self.modelprediction.write("%6.3f " % value) elif self.mode in [1, 4, 6, 7]: self.modelprediction.write("-1 ") for value in self.fitter._exp_data[:, 0]: self.modelprediction.write("%6.3f " % value) self.modelprediction.write("\n") self.modelprediction.write("-1 ") for value in self.fitter._exp_data[:, 1]: self.modelprediction.write("%6.3f " % value) else: mode_error(self.mode) self.modelprediction.write("\n") ### The model-experiment difference file self.modeldiff.write( "### File containing differecnes between predicted and experimental values.\n" ) self.modeldiff.write("### Written by gips.\n") self.modeldiff.write("### Time: %s\n" % now.strftime("%Y-%m-%d %H:%M")) self.modeldiff.write("### Hostname: %s\n" % socket.gethostname()) self.modeldiff.write("### All units in [kcal/mol]\n") if isinstance(self.optparms, dict): for key, value in self.optparms.items(): self.modeldiff.write("### %s: %s\n" % (key, value)) self.modeldiff.write("###\n") self.modeldiff.write("### Model-Exp difference.\n") self.modeldiff.write("### All values in [kcal/mol].\n") if self.mode in [1, 4, 6, 7]: if self.fitter.decomp_E: self.modelprediction.write( "### First row for each step is free energy energy, second row is energy.\n" ) elif self.fitter.decomp_S: self.modelprediction.write( "### First row for each step is free energy energy, second row is entropy.\n" ) else: self.modelprediction.write( "### First row for each step is energy, second row is entropy.\n" ) self.modeldiff.write("Title ") for name in self.fitter.name: self.modeldiff.write("%s " % name) self.modeldiff.write("\n") self.modeldiff.write("# Selection ") for i, name in enumerate(self.fitter.name): if i in self.selection_A: self.modeldiff.write("A ") elif i in self.selection_B: self.modeldiff.write("B ") else: raise KeyError( "name=%s not found in selection A or selection B." % name) self.modeldiff.write("\n") ### Flush file contents to disk self.modelparms.flush() self.modelprediction.flush() self.modeldiff.flush()
def finish(self): if self.optimizer == "brute": self.modelparms.write("\n") self.modelprediction.write("\n") self.modeldiff.write("\n") self.modelparms.close() self.modelprediction.close() self.modeldiff.close() return 1 self.x = np.array(self.x) self.f_A = np.array(self.f_A) self.rmsd_A = np.array(self.rmsd_A) self.f_B = np.array(self.f_B) self.rmsd_B = np.array(self.rmsd_B) self.R2_A = np.array(self.R2_A) self.R2_B = np.array(self.R2_B) ### Write out best result for selection A ### ------------------------------------- if self.fitter.decomp: ### ndf (list of 1D NumPy int array): the non-dominated fronts ### dl (list of 1D NumPy int array): the domination list ### dc (1D NumPy int array) : the domination count ### ndr (1D NumPy int array) : the non-domination ranks ndf, dl, dc, ndr = pygmo.fast_non_dominated_sorting(self.f_A) ax_A = pygmo.plot_non_dominated_fronts(self.f_A) ax_A.figure.savefig("%spareto.selectionA.png" % self.prefix, dpi=1000) ax_A.figure.clear("all") ordered_ndf = list() for front in ndf: ordered_ndf.append(pygmo.sort_population_mo(self.f_A[front])) else: ordered_ndf = np.argsort(self.f_A, axis=0) self.modelparms.write("### Best result (A)\n") self.modelprediction.write("### Best result (A)\n") self.modeldiff.write("### Best result (A)\n") for front_count, front in enumerate(ordered_ndf): for solution_i in front: step = self.step[solution_i] x = self.x[solution_i] f_A = self.f_A[solution_i] f_B = self.f_B[solution_i] rmsd_A = self.rmsd_A[solution_i] rmsd_B = self.rmsd_B[solution_i] R2_A = self.R2_A[solution_i] R2_B = self.R2_B[solution_i] self.modelparms.write("%d/%d " % (step, front_count)) self.modelprediction.write("%d/%d " % (step, front_count)) self.modeldiff.write("%d/%d " % (step, front_count)) self.fitter.gist_functional(x) self.fitter._f_process(x) if self.mode in [0, 3, 5]: for i in self.parmidx: self.modelparms.write("%6.3f " % x[i]) self.modelparms.write("%6.3f " % f_A[0]) self.modelparms.write("%6.3f " % f_B[0]) self.modelparms.write("%6.3f " % R2_A) self.modelparms.write("%6.3f " % R2_B) self.modelparms.write("%6.3f " % rmsd_A) self.modelparms.write("%6.3f " % rmsd_B) elif self.mode in [1, 4, 6, 7]: ### Energy Output for i in self.parmidx: self.modelparms.write("%6.3f " % x[i]) self.modelparms.write("%6.3f " % f_A[0]) self.modelparms.write("%6.3f " % f_B[0]) self.modelparms.write("%6.3f " % R2_A[0]) self.modelparms.write("%6.3f " % R2_B[0]) self.modelparms.write("%6.3f " % rmsd_A[0]) self.modelparms.write("%6.3f " % rmsd_B[0]) self.modelparms.write("\n") ### Entropy Output self.modelparms.write("%d/%d " % (step, front_count)) for i in self.parmidx: self.modelparms.write("%6.3f " % x[i]) self.modelparms.write("%6.3f " % f_A[1]) self.modelparms.write("%6.3f " % f_B[1]) self.modelparms.write("%6.3f " % R2_A[1]) self.modelparms.write("%6.3f " % R2_B[1]) self.modelparms.write("%6.3f " % rmsd_A[1]) self.modelparms.write("%6.3f " % rmsd_B[1]) else: mode_error(self.mode) if self.mode in [0, 3, 5]: for i in range(self.N_len): self.modelprediction.write("%6.3f " % self.fitter._f[i]) diff = self.fitter._exp_data[i] - self.fitter._f[i] self.modeldiff.write("%6.3f " % diff) elif self.mode in [1, 4, 6, 7]: for i in range(self.N_len): self.modelprediction.write("%6.3f " % self.fitter._f[i, 0]) diff = self.fitter._exp_data[i, 0] - self.fitter._f[i, 0] self.modeldiff.write("%6.3f " % diff) self.modelprediction.write("\n") self.modelprediction.write("%d/%d " % (step, front_count)) self.modeldiff.write("\n") self.modeldiff.write("%d/%d " % (step, front_count)) for i in range(self.N_len): self.modelprediction.write("%6.3f " % self.fitter._f[i, 1]) diff = self.fitter._exp_data[i, 1] - self.fitter._f[i, 1] self.modeldiff.write("%6.3f " % diff) else: mode_error(self.mode) self.modelparms.write("\n") self.modelprediction.write("\n") self.modeldiff.write("\n") self.modelparms.write("\n") self.modelprediction.write("\n") self.modeldiff.write("\n") self.modelparms.close() self.modelprediction.close() self.modeldiff.close()
def flush(self): self.modelparms.write("%d " % self.step[-1]) self.modelprediction.write("%d " % self.step[-1]) self.modeldiff.write("%d " % self.step[-1]) if self.mode in [0, 3, 5]: for i in self.parmidx: self.modelparms.write("%6.3f " % self.x[-1][i]) self.modelparms.write("%6.3f " % self.f_A[-1][0]) self.modelparms.write("%6.3f " % self.f_B[-1][0]) self.modelparms.write("%6.3f " % self.R2_A[-1]) self.modelparms.write("%6.3f " % self.R2_B[-1]) self.modelparms.write("%6.3f " % self.rmsd_A[-1]) self.modelparms.write("%6.3f " % self.rmsd_B[-1]) elif self.mode in [1, 4, 6, 7]: ### Energy Output for i in self.parmidx: self.modelparms.write("%6.3f " % self.x[-1][i]) self.modelparms.write("%6.3f " % self.f_A[-1][0]) self.modelparms.write("%6.3f " % self.f_B[-1][0]) self.modelparms.write("%6.3f " % self.R2_A[-1][0]) self.modelparms.write("%6.3f " % self.R2_B[-1][0]) self.modelparms.write("%6.3f " % self.rmsd_A[-1][0]) self.modelparms.write("%6.3f " % self.rmsd_B[-1][0]) self.modelparms.write("\n") ### Entropy Output self.modelparms.write("%d " % self.step[-1]) for i in self.parmidx: self.modelparms.write("%6.3f " % self.x[-1][i]) self.modelparms.write("%6.3f " % self.f_A[-1][1]) self.modelparms.write("%6.3f " % self.f_B[-1][1]) ### Note: This line is different then the one in energy output self.modelparms.write("%6.3f " % self.R2_A[-1][1]) self.modelparms.write("%6.3f " % self.R2_B[-1][1]) self.modelparms.write("%6.3f " % self.rmsd_A[-1][1]) self.modelparms.write("%6.3f " % self.rmsd_B[-1][1]) else: mode_error(self.mode) if self.mode in [0, 3, 5]: for i in range(self.N_len): self.modelprediction.write("%6.3f " % self.fitter._f[i]) diff = self.fitter._exp_data[i] - self.fitter._f[i] self.modeldiff.write("%6.3f " % diff) elif self.mode in [1, 4, 6, 7]: for i in range(self.N_len): self.modelprediction.write("%6.3f " % self.fitter._f[i, 0]) diff = self.fitter._exp_data[i, 0] - self.fitter._f[i, 0] self.modeldiff.write("%6.3f " % diff) self.modelprediction.write("\n") self.modelprediction.write("%d " % self.step[-1]) self.modeldiff.write("\n") self.modeldiff.write("%d " % self.step[-1]) for i in range(self.N_len): self.modelprediction.write("%6.3f " % self.fitter._f[i, 1]) diff = self.fitter._exp_data[i, 1] - self.fitter._f[i, 1] self.modeldiff.write("%6.3f " % diff) else: mode_error(self.mode) self.modelparms.write("\n") self.modelprediction.write("\n") self.modeldiff.write("\n") self.modelparms.flush() self.modelprediction.flush() self.modeldiff.flush()
def __init__(self, fitter, mode=0, optimizer="basinhopping", optparms=None, selection_A=None, selection_B=None, prefix=None, verbose=False): self.fitter = fitter self.mode = mode self.optimizer = optimizer self.optparms = optparms self.selection_A = selection_A self.selection_B = selection_B self.prefix = prefix self.verbose = verbose self.len_A = len(self.selection_A) self.len_B = len(self.selection_B) self.pairs = fitter.pairs self.parms = fitter.parms if self.pairs: self.N_len = self.fitter.N_pairs else: self.N_len = self.fitter.N_case if self.mode == 0: self.parmidx = range(self.parms) elif self.mode == 1: self.parmidx = range(self.parms + 1) elif self.mode == 3: self.parmidx = range(self.parms) elif self.mode == 4: self.parmidx = range(self.parms + 1) elif self.mode == 5: if self.pairs: self.parmidx = range(self.parms + 1) else: self.parmidx = range(self.parms + 2) elif self.mode == 6: if self.pairs: self.parmidx = range(self.parms + 2) else: self.parmidx = range(self.parms + 3) elif self.mode == 7: if self.pairs: self.parmidx = range(self.parms + 4) else: self.parmidx = range(self.parms + 7) else: mode_error(self.mode) if prefix == None: prefix = "" elif type(prefix) != str: raise TypeError("prefix must be of type str, but is of type %s" % type(prefix)) self.exA = self.fitter._exp_data[self.selection_A] self.exB = self.fitter._exp_data[self.selection_B] self.x = list() self.f_A = list() self.f_B = list() self.rmsd_A = list() self.rmsd_B = list() self.step = list() self.R2_A = list() self.R2_B = list() self.__counter = 0 self.init_output()
def decomposition(gdatarec_lib, gdata_lib, mode, parms=6, pairs=True, parmsfile=None, frag_file=None, map_file=None, radiusadd=[0., 3.], softness=1., softcut=2., pairfile=None, exclude=None, paircut=0.0, prefix=None, scaling=2.0, verbose=False): if verbose: print "Start mapout procedure with" print "mode = %d" % mode print "softness = %6.3f" % softness print "softcut = %6.3f" % softcut print "parmsfile = %s" % parmsfile if verbose: print "Organizing and preparing data ..." mode_dict = dict() mode_dict = { 0: mode0, 1: mode1, 3: mode3, 4: mode4, 5: mode5, 6: mode6, 7: mode7 } if mode in mode_dict.keys(): fitmode = mode_dict[mode] else: mode_error(mode) has_cplxlig = True if mode in [0, 1]: has_cplxlig = False fitter = fitmode(gdatarec_lib, gdata_lib, parms=parms, pairs=False, radiusadd=radiusadd, softness=softness, softcut=softcut, scaling=scaling, verbose=verbose) parmdict = read_parmsfile(parmsfile) ### Find position of SES in parms file A_SSE = -1 B_SSE = -1 for i, entry in enumerate(parmdict["header"]): if entry.startswith("SSE"): if entry.endswith("(A)"): A_SSE = i elif entry.endswith("(B)"): B_SSE = i ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### ### Find the best Candidate Solutions ### ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### ### Collect all the solution candiates N_entries = len(parmdict.keys()) - 1 A_list = list() B_list = list() x_list = list() A_list_tmp = list() B_list_tmp = list() x_list_tmp = list() for key, value in parmdict.items(): if key == "header": continue A_list_tmp.append(value[A_SSE]) B_list_tmp.append(value[B_SSE]) x_list_tmp.append(value[:fitter._parms]) if fitter.decomp: N_entries = N_entries / 2 for i in range(N_entries): A_list.append([ copy.copy(A_list_tmp[2 * i]), copy.copy(A_list_tmp[2 * i + 1]) ]) B_list.append([ copy.copy(B_list_tmp[2 * i]), copy.copy(B_list_tmp[2 * i + 1]) ]) x_list.append(copy.copy(x_list_tmp[2 * i])) else: A_list = copy.copy(A_list_tmp) B_list = copy.copy(B_list_tmp) x_list = copy.copy(x_list_tmp) A_list = np.array(A_list) B_list = np.array(B_list) ### Find the best candidate solution if fitter.decomp: ndf, dl, dc, ndr = pygmo.fast_non_dominated_sorting(A_list) ordered_ndf = list() for front in ndf: ordered_ndf.append(pygmo.sort_population_mo(A_list[front])) else: ordered_ndf = np.argsort(A_list, axis=0) if fitter.decomp: best_x_A = np.array(x_list[ordered_ndf[0][0]]) else: best_x_A = np.array(x_list[ordered_ndf[0]]) ### ~~~~~~~~~~~~~~~~~~~~~~ ### ### Prepare Exclusion List ### ### ~~~~~~~~~~~~~~~~~~~~~~ ### if exclude != None \ and exclude != "": exclude_list = list() with open(exclude, "r") as fopen: for line in fopen: l = line.rstrip().lstrip().split() if len(l) == 0: continue if l[0].startswith("#"): continue for s in l: exclude_list.append(s) else: exclude_list = list() ### ~~~~~~~~~~~~~~~~~~~~~~~ ### ### Prepare Pairise Fitting ### ### ~~~~~~~~~~~~~~~~~~~~~~~ ### if pairs: if pairfile != None \ and pairfile != "": if type(pairfile) != str: raise TypeError( "The path to pairfile must be of type str, but is of type %s" % type(pairfile)) pairlist = read_pairsfile(pairfile, paircut) pairlist_idx = list() for pair in pairlist: for i in range(fitter.N_case): case1 = fitter.select[i] name1 = fitter.name[case1] if name1 in exclude_list: continue for j in range(fitter.N_case): if j <= i: continue case2 = fitter.select[j] name2 = fitter.name[case2] if name2 in exclude_list: continue if name1==pair[0] \ and name2==pair[1]: pairlist_idx.append([case1, case2]) elif name1==pair[1] \ and name2==pair[0]: pairlist_idx.append([case2, case1]) else: pairlist = None pairlist_idx = list() for i in range(fitter.N_case): name1 = fitter.name[i] if name1 in exclude_list: continue for j in range(fitter.N_case): if j <= i: continue name2 = fitter.name[j] if name2 in exclude_list: continue pairlist_idx.append([i, j]) else: pairlist = None pairlist_idx = None ### ~~~~~~~~~~~~~~~~~ ### ### Build the Library ### ### ~~~~~~~~~~~~~~~~~ ### has_extlib = False ### Check for external mapping files if frag_file != None \ and frag_file != "": has_extlib = True ext_frag = list() ext_frag_name = list() with open(frag_file, "r") as fopen: for line in fopen: l = line.rstrip().lstrip().split() if len(l) == 0: continue if l[0].startswith("#"): continue ext_frag.append(Chem.MolFromSmiles(l[1])) ext_frag_name.append(l[0]) else: ext_frag = None ext_frag_name = None if map_file != None \ and map_file != "": ext_map_frag = list() ext_map_inds = list() ext_map_name = list() with open(map_file, "r") as fopen: for line in fopen: l = line.rstrip().lstrip().split() if len(l) == 0: continue if l[0].startswith("#"): continue ext_map_name.append(l[0]) ext_map_frag.append(list()) ext_map_inds.append(list()) ids_list = l[1].split(",") if len(ids_list) == 1: if ids_list[0] == "-1": continue for i in ids_list: ext_map_frag[-1].append(int(i)) for s in l[2:]: ext_map_inds[-1].append(list()) for i in s.split(","): ext_map_inds[-1][-1].append(int(i)) else: ext_map_frag = None ext_map_inds = None ext_map_name = None if ext_frag==None \ and ext_map_frag!=None: raise IOError("Must provide both, frag_file and map_file.") if ext_frag!=None \ and ext_map_frag==None: raise IOError("Must provide both, frag_file and map_file.") if has_extlib: mol2extmol = list() #frag2extfrag = list() if has_cplxlig: mol2extmol_cplx = list() #frag2extfrag_cplx = list() mol2extmol_lig = list() #frag2extfrag_lig = list() if verbose: "Starting fragment decomposition..." RAND = np.random.randint(9999) frag_lib = frag_library() if has_cplxlig: frag_lib_cplx = frag_library() frag_lib_lig = frag_library() progs = aux_progs(verbose) for case in range(fitter.N_case): valid_poses = np.where(fitter.ind_case == case)[0] name = fitter.name[case] for pose in valid_poses: pmd_instance = fitter.pdat[pose] pmd_instance.save("p%d.mol2" % RAND) args = "-i p%d.mol2 -fi mol2 -o p%d_sybyl.mol2 -fo mol2 -at sybyl -pf y -dr no" % ( RAND, RAND) progs.call(progs.ante_exe, args) mol = Chem.MolFromMol2File("p%d_sybyl.mol2" % RAND, removeHs=False) if verbose: AllChem.Compute2DCoords(mol) if has_extlib: index = ext_map_name.index(name) frag_list = list() for frag_id in ext_map_frag[index]: frag_list.append(ext_frag[frag_id]) ### If we have an external library with mappings ### we must do the refinement manually! mol2extmol.append(index) else: frag_list = get_frag_list(mol) frag_lib.add_frag_list(frag_list, mol) os.remove("p%d.mol2" % RAND) os.remove("p%d_sybyl.mol2" % RAND) if has_cplxlig: valid_poses_cplx = np.where(fitter.ind_case_cplx == case)[0] valid_poses_lig = np.where(fitter.ind_case_lig == case)[0] for pose in valid_poses_cplx: pmd_instance = fitter.pdat_cplx[pose] pmd_instance.save("p%d.mol2" % RAND) args = "-i p%d.mol2 -fi mol2 -o p%d_sybyl.mol2 -fo mol2 -at sybyl -pf y -dr no" % ( RAND, RAND) progs.call(progs.ante_exe, args) mol = Chem.MolFromMol2File("p%d_sybyl.mol2" % RAND, removeHs=False) if verbose: AllChem.Compute2DCoords(mol) if has_extlib: index = ext_map_name.index(name) frag_list = list() for frag_id in ext_map_frag[index]: frag_list.append(ext_frag[frag_id]) ### If we have an external library with mappings ### we must do the refinement manually! mol2extmol_cplx.append(index) else: frag_list = get_frag_list(mol) frag_lib_cplx.add_frag_list(frag_list, mol) os.remove("p%d.mol2" % RAND) os.remove("p%d_sybyl.mol2" % RAND) for pose in valid_poses_lig: pmd_instance = fitter.pdat_lig[pose] pmd_instance.save("p%d.mol2" % RAND) args = "-i p%d.mol2 -fi mol2 -o p%d_sybyl.mol2 -fo mol2 -at sybyl -pf y -dr no" % ( RAND, RAND) progs.call(progs.ante_exe, args) mol = Chem.MolFromMol2File("p%d_sybyl.mol2" % RAND, removeHs=False) if verbose: AllChem.Compute2DCoords(mol) if has_extlib: index = ext_map_name.index(name) frag_list = list() for frag_id in ext_map_frag[index]: frag_list.append(ext_frag[frag_id]) ### If we have an external library with mappings ### we must do the refinement manually! mol2extmol_lig.append(index) else: frag_list = get_frag_list(mol) frag_lib_lig.add_frag_list(frag_list, mol) os.remove("p%d.mol2" % RAND) os.remove("p%d_sybyl.mol2" % RAND) if has_extlib: for frag_id in range(frag_lib.N_frag): frag_lib.frag2mol_mapping.append(list()) for mol_id in frag_lib.frag2mol[frag_id]: frag_id_rank = frag_lib.mol2frag[mol_id].index(frag_id) ext_mol_id = mol2extmol[mol_id] if len(ext_map_inds[ext_mol_id]) == 0: ### If we are here, then the molecule has no fragments. ### The molecule is then treated, as if itself would ### be the fragment mol = frag_lib.mol_list[mol_id] matches = range(mol.GetNumAtoms()) else: matches = ext_map_inds[ext_mol_id][frag_id_rank] frag_lib.frag2mol_mapping[-1].append(matches) if has_cplxlig: for frag_id in range(frag_lib_cplx.N_frag): frag_lib_cplx.frag2mol_mapping.append(list()) for mol_id in frag_lib_cplx.frag2mol[frag_id]: frag_id_rank = frag_lib_cplx.mol2frag[mol_id].index( frag_id) ext_mol_id = mol2extmol_cplx[mol_id] if len(ext_map_inds[ext_mol_id]) == 0: ### If we are here, then the molecule has no fragments. ### The molecule is then treated, as if itself would ### be the fragment mol = frag_lib_cplx.mol_list[mol_id] matches = range(mol.GetNumAtoms()) else: matches = ext_map_inds[ext_mol_id][frag_id_rank] frag_lib_cplx.frag2mol_mapping[-1].append(matches) for frag_id in range(frag_lib_lig.N_frag): frag_lib_lig.frag2mol_mapping.append(list()) for mol_id in frag_lib_lig.frag2mol[frag_id]: frag_id_rank = frag_lib_lig.mol2frag[mol_id].index(frag_id) ext_mol_id = mol2extmol_lig[mol_id] if len(ext_map_inds[ext_mol_id]) == 0: ### If we are here, then the molecule has no fragments. ### The molecule is then treated, as if itself would ### be the fragment mol = frag_lib_lig.mol_list[mol_id] matches = range(mol.GetNumAtoms()) else: matches = ext_map_inds[ext_mol_id][frag_id_rank] frag_lib_lig.frag2mol_mapping[-1].append(matches) else: frag_lib.refine() if has_cplxlig: frag_lib_cplx.refine() frag_lib_lig.refine() if verbose: print "Poses Fragments..." for case in range(fitter.N_case): name = fitter.name[case] valid_poses = np.where(fitter.ind_case == case)[0] print name, for pose in valid_poses: print frag_lib.mol2frag[pose], print "" frag_lib.draw("pos_") if has_cplxlig: print "Cplx Fragments..." for case in range(fitter.N_case): name = fitter.name[case] valid_poses = np.where(fitter.ind_case_cplx == case)[0] print name, for pose in valid_poses: print frag_lib_cplx.mol2frag[pose], print "" frag_lib_cplx.draw("cplx_") print "Lig Fragments..." for case in range(fitter.N_case): name = fitter.name[case] valid_poses = np.where(fitter.ind_case_lig == case)[0] print name, for pose in valid_poses: print frag_lib_lig.mol2frag[pose], print "" frag_lib_lig.draw("lig_") ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### ### Calculate the Fragment weightings ### ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### if verbose: print "Calculate fragment weightings..." ### Constructor for weight_fitting: ### def __init__(self, fitter, x, frag_library, prefix=None, verbose=False): weight = weight_fitting(fitter, best_x_A, pairs, frag_lib, "pos", verbose) weight.process_rec = True weight.process_cplx = False weight.process_lig = False if has_cplxlig: weight_cplx = weight_fitting(fitter, best_x_A, pairs, frag_lib_cplx, "cplx", verbose) weight_cplx.process_rec = False weight_cplx.process_cplx = True weight_cplx.process_lig = False weight_lig = weight_fitting(fitter, best_x_A, pairs, frag_lib_lig, "lig", verbose) weight_lig.process_rec = False weight_lig.process_cplx = False weight_lig.process_lig = True ### Make the fragment-based decomposition of the GIST grids for case in range(fitter.N_case): weight.set_case(case) ### Use the internal write routine as a callback for the process routine weight.process(weight.simple_weighting) if has_cplxlig: weight_cplx.set_case(case) weight_lig.set_case(case) weight_cplx.process(weight_cplx.simple_weighting) weight_lig.process(weight_lig.simple_weighting) ### Combine the individual poses and get the final ### contributions of the fragments calc_data = np.zeros((2, fitter.N_case, frag_lib.N_frag), dtype=DOUBLE) frag_assign = np.zeros((fitter.N_case, frag_lib.N_frag), dtype=int) frag_assign[:] = -1 if has_cplxlig: calc_data_cplx = np.zeros((2, fitter.N_case, frag_lib_cplx.N_frag), dtype=DOUBLE) frag_assign_cplx = np.zeros((fitter.N_case, frag_lib_cplx.N_frag), dtype=int) frag_assign_cplx[:] = -1 calc_data_lig = np.zeros((2, fitter.N_case, frag_lib_lig.N_frag), dtype=DOUBLE) frag_assign_lig = np.zeros((fitter.N_case, frag_lib_lig.N_frag), dtype=int) frag_assign_lig[:] = -1 for case in range(fitter.N_case): weight.set_case(case) _data, _assign = weight.combine() calc_data[0, case, :] = np.copy(_data[0]) calc_data[1, case, :] = np.copy(_data[1]) frag_assign[case, :] = np.copy(_assign) if has_cplxlig: weight_cplx.set_case(case) _data, _assign = weight_cplx.combine() calc_data_cplx[0, case, :] = np.copy(_data[0]) calc_data_cplx[1, case, :] = np.copy(_data[1]) frag_assign_cplx[case, :] = np.copy(_assign) weight_lig.set_case(case) _data, _assign = weight_lig.combine() calc_data_lig[0, case, :] = np.copy(_data[0]) calc_data_lig[1, case, :] = np.copy(_data[1]) frag_assign_lig[case, :] = np.copy(_assign) ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### ### Evaluate the Fragment Properties ### ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### if has_cplxlig: case2frag_cplx = np.zeros((fitter.N_case, frag_lib_cplx.N_frag), dtype=int) case2frag_lig = np.zeros((fitter.N_case, frag_lib_lig.N_frag), dtype=int) case2frag_cplx[:] = -1 case2frag_lig[:] = -1 for case in range(fitter.N_case): valids = np.where(frag_assign[case] > -1)[0] valids_cplx = np.where(frag_assign_cplx[case] > -1)[0] valids_lig = np.where(frag_assign_lig[case] > -1)[0] for frag_id in frag_assign[case, valids]: frag_lib.qp.makeDummiesQueries = False frag_lib_cplx.qp.makeDummiesQueries = False frag_lib_lig.qp.makeDummiesQueries = False frag = Chem.AdjustQueryProperties(frag_lib.frag_list[frag_id],\ frag_lib.qp) for frag_id_cplx in frag_assign_cplx[case, valids_cplx]: frag_cplx = Chem.AdjustQueryProperties(frag_lib_cplx.frag_list[frag_id_cplx],\ frag_lib_cplx.qp) if are_mol_same(frag, frag_cplx, useChirality=True): case2frag_cplx[case, frag_id_cplx] = frag_id break for frag_id_lig in frag_assign_lig[case, valids_lig]: frag_lig = Chem.AdjustQueryProperties(frag_lib_lig.frag_list[frag_id_lig],\ frag_lib_lig.qp) if are_mol_same(frag, frag_lig, useChirality=True): case2frag_lig[case, frag_id_lig] = frag_id break
def gistmodel(gdatarec_lib, gdata_lib, mode, parms=6, pairs=False, decomp_E=False, decomp_S=False, optimizer='evolution', niter=500, nmin=1000, popsize=50, stepsize=0.05, verbose=False, kforce=100., gradient=False, boundary=False, radiusadd=[0., 3.], boundsfile=None, softness=1., softcut=2., pairfile=None, exclude=None, paircut=0.0, shuffle=False, ksplit=5, ksplitfile=None, prefix=None, scaling=2.0, parmsfile=None): if verbose: print "Start optimization with" print "mode = %d" % mode print "optimizer = %s" % optimizer print "niter = %d" % niter print "nmin = %d" % nmin if optimizer == "evolution": print "popsize = %d" % popsize print "kforce = %6.3f" % kforce print "gradient = %s" % gradient print "boundary = %s" % boundary print "pairs = %s" % pairs print "decomp_E = %s" % decomp_E print "decomp_S = %s" % decomp_S if pairs: print "pairfile = %s" % pairfile print "paircut = %6.3f" % paircut print "softness = %6.3f" % softness print "softcut = %6.3f" % softcut print "shuffle = %s" % shuffle print "ksplit = %d" % ksplit if ksplitfile != None \ and ksplitfile != "": print "Splitfile %s" % ksplitfile if exclude != None \ and exclude != "": print "Exclude file %s" % exclude optparms = OrderedDict() optparms["Niter "] = niter optparms["optimizer "] = optimizer if optimizer == "evolution": optparms["Population size "] = popsize elif optimizer == "brute": optparms["Stepsize "] = stepsize else: optparms["Nmin "] = nmin optparms["k_rstr "] = kforce optparms["Radius add "] = radiusadd optparms["Softness "] = softness optparms["Softcut "] = softcut optparms["Functional "] = mode optparms["Analytic Gradient "] = gradient optparms["Analytic Boundaries "] = boundary optparms["Pairs "] = pairs optparms["Scoring "] = parms if boundsfile != None \ and boundsfile != "": optparms["Boundsfile "] = boundsfile else: optparms["Boundsfile "] = "None" if pairfile != None \ and pairfile != "": optparms["Pairfile "] = pairfile else: optparms["Pairfile "] = "None" optparms["paircut "] = paircut optparms["shuffle "] = shuffle optparms["ksplit "] = ksplit if ksplitfile != None \ and ksplitfile != "": optparms["Splitfile "] = ksplitfile if exclude != None \ and exclude != "": optparms["Exclude file "] = exclude if verbose: print "Organizing and preparing data ..." mode_dict = dict() mode_dict = { 0: mode0, 1: mode1, 3: mode3, 4: mode4, 5: mode5, 6: mode6, 7: mode7 } if mode in mode_dict.keys(): fitmode = mode_dict[mode] else: mode_error(mode) if boundsfile != None \ and boundsfile != "": if type(boundsfile) != str: raise TypeError( "The path to boundsfile must be of type str, but is of type %s" % type(boundsfile)) boundsdict = read_boundsfile(boundsfile) else: boundsdict = None if ksplitfile != None \ and ksplitfile != "": ### We need to know some information about the ### data in advance. Therefore, we preload just ### the metadata (this should be fast). if verbose: print "Preloading the gdat lib ..." fit_lib = gdat_fit_lib(gdatarec_dict=gdatarec_lib, gdata_dict=gdata_lib, ref_energy=-11.108, mode=mode, radiusadd=radiusadd, softness=softness, softcut=softcut, exclude=None, scaling=scaling, verbose=verbose) fit_lib.load_metadata() k_groups = list() include_list = list() exclude_list = list() if pairs: pairlist = list() else: pairlist = None with open(ksplitfile, "r") as fopen: for line in fopen: for line in fopen: l = line.rstrip().lstrip().split() if len(l) == 0: continue if l[0].startswith("#"): continue if pairs: pairlist.append([l[0], l[1]]) k_groups.append(int(l[2])) include_list.append(l[0]) include_list.append(l[1]) else: include_list.append(l[0]) k_groups.append(int(l[1])) for name in fit_lib.name: if not name in include_list: exclude_list.append(name) if ksplit not in k_groups: raise ValueError("ksplit %d value is not found in ksplit file.") k_groups = np.array(k_groups) del fit_lib else: if pairs: if pairfile != None \ and pairfile != "": if type(pairfile) != str: raise TypeError( "The path to pairfile must be of type str, but is of type %s" % type(pairfile)) pairlist = read_pairsfile(pairfile, paircut) else: pairlist = None else: pairlist = None if exclude != None \ and exclude != "": exclude_list = list() with open(exclude, "r") as fopen: for line in fopen: l = line.rstrip().lstrip().split() if len(l) == 0: continue if l[0].startswith("#"): continue for s in l: exclude_list.append(s) else: exclude_list = None fitter = fitmode(gdatarec_lib, gdata_lib, parms=parms, pairs=pairs, radiusadd=radiusadd, softness=softness, softcut=softcut, boundsdict=boundsdict, pairlist=pairlist, exclude=exclude_list, decomp_E=decomp_E, decomp_S=decomp_S, verbose=verbose) fitter.anal_boundary = boundary if shuffle: rand = np.arange(fitter._exp_data.shape[0]) np.random.shuffle(rand) _exp = copy.copy(fitter._exp_data) _name = copy.copy(fitter.name) for i, r in enumerate(rand): fitter._exp_data[i] = _exp[r] fitter.name[i] = _name[r] if ksplitfile==None \ or ksplitfile == "": if pairs: k_groups = generate_ksplits(ksplit, fitter.N_pairs) else: k_groups = generate_ksplits(ksplit, fitter.N_case) ksplitlist = range(ksplit) else: ksplitlist = [ksplit] ### Correct for different list ordering ### by swapping indices in k_groups if not pairs: for k, name in enumerate(include_list): i = fitter.name.index(name) k_groups[i] = k_groups[k] else: for k, name in enumerate(pairlist): i = fitter.name.index("%s-%s" % (name[0], name[1])) k_groups[i] = k_groups[k] if parmsfile != None: parmdict = read_parmsfile(parmsfile) for i in ksplitlist: test_group = np.where(k_groups == i)[0] train_group = np.where(k_groups != i)[0] fitter.set_selection(train_group) if not shuffle: fitter.set_functional() fitter.set_bounds() fitter.set_step() fitter.set_x0() if parmsfile != None: _print_fun = print_fun(fitter=fitter, mode=mode, optimizer="brute", optparms=optparms, selection_A=train_group, selection_B=test_group, prefix="k%d." % i + prefix, verbose=verbose) for key, value in parmdict.items(): if key == "header": continue x = np.array(value[:fitter._parms]) _print_fun(x) _print_fun.flush() else: if verbose: print "Start optimization for ksplit=%d ..." % i _print_fun = print_fun(fitter=fitter, mode=mode, optimizer=optimizer, optparms=optparms, selection_A=train_group, selection_B=test_group, prefix="k%d." % i + prefix, verbose=verbose) fitter.optimize(niter=niter, nmin=nmin, kforce=kforce, gradient=gradient, print_fun=_print_fun, popsize=popsize, stepsize=stepsize, optimizer=optimizer) if verbose: print "Generating output ..." _print_fun.finish()
def mapout(gdatarec_lib, gdata_lib, mode, parms=6, pairs=False, parmsfile=None, radiusadd=[0., 3.], softness=1., softcut=2., exclude=None, prefix=None, scaling=2.0, verbose=False): if verbose: print "Start mapout procedure with" print "mode = %d" % mode print "softness = %6.3f" % softness print "softcut = %6.3f" % softcut if verbose: print "Organizing and preparing data ..." mode_dict = dict() mode_dict = { 0: mode0, 1: mode1, 3: mode3, 4: mode4, 5: mode5, 6: mode6, 7: mode7 } if mode in mode_dict.keys(): fitmode = mode_dict[mode] else: mode_error(mode) fitter = fitmode(gdatarec_lib, gdata_lib, parms=parms, pairs=False, radiusadd=radiusadd, softness=softness, softcut=softcut, scaling=scaling, verbose=verbose) ### Find position of SES in parms file if parmsfile == None: raise IOError("Must provide parmsfile.") parmdict = read_parmsfile(parmsfile) A_SSE = -1 B_SSE = -1 for i, entry in enumerate(parmdict["header"]): if entry.startswith("SSE"): if entry.endswith("(A)"): A_SSE = i elif entry.endswith("(B)"): B_SSE = i ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### ### Find the best Candidate Solutions ### ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### ### Collect all the solutions N_entries = len(parmdict.keys()) - 1 A_list = list() B_list = list() x_list = list() A_list_tmp = list() B_list_tmp = list() x_list_tmp = list() for key, value in parmdict.items(): if key == "header": continue A_list_tmp.append(value[A_SSE]) B_list_tmp.append(value[B_SSE]) x_list_tmp.append(value[:fitter._parms]) if fitter.decomp: N_entries = N_entries / 2 for i in range(N_entries): A_list.append([ copy.copy(A_list_tmp[2 * i]), copy.copy(A_list_tmp[2 * i + 1]) ]) B_list.append([ copy.copy(B_list_tmp[2 * i]), copy.copy(B_list_tmp[2 * i + 1]) ]) x_list.append(copy.copy(x_list_tmp[2 * i])) else: A_list = copy.copy(A_list_tmp) B_list = copy.copy(B_list_tmp) x_list = copy.copy(x_list_tmp) A_list = np.array(A_list) B_list = np.array(B_list) ### Find the best solution if fitter.decomp: ndf, dl, dc, ndr = pygmo.fast_non_dominated_sorting(A_list) ordered_ndf = list() for front in ndf: ordered_ndf.append(pygmo.sort_population_mo(A_list[front])) else: ordered_ndf = np.argsort(A_list, axis=0) if fitter.decomp: best_x_A = np.array(x_list[ordered_ndf[0][0]]) else: best_x_A = np.array(x_list[ordered_ndf[0]]) ### ~~~~~~~~~~~~~~~~~~~~~~ ### ### Prepare Exclusion List ### ### ~~~~~~~~~~~~~~~~~~~~~~ ### if exclude != None \ and exclude != "": exclude_list = list() with open(exclude, "r") as fopen: for line in fopen: l = line.rstrip().lstrip().split() if len(l) == 0: continue if l[0].startswith("#"): continue for s in l: exclude_list.append(s) else: exclude_list = list() ### ~~~~~~~~~~~~~~~~~~ ### ### Write out the maps ### ### ~~~~~~~~~~~~~~~~~~ ### ### Write out un-processed dx grids if mode in [0, 1, 2]: counter = 0 for rec_keys in fitter.gdatarec_dict.keys(): recdict = fitter.gdatarec_dict[rec_keys] title = recdict["title"] if title == None: name = "%d" % counter else: name = title for i in range(len(recdict["receptor"])): if recdict["receptor"][i]["gdat"] == None: continue write_maps(recdict["receptor"][i]["gdat"], prefix="rec_%s_%d" % (name, i), pymol=True) counter += 1 else: counter = 0 for rec_keys in fitter.gdatarec_dict.keys(): recdict = fitter.gdatarec_dict[rec_keys] title = recdict["title"] if title == None: name = "%d" % counter else: name = title for i in range(len(recdict["receptor"])): if recdict["receptor"][i]["gdat"] == None: continue write_maps(recdict["receptor"][i]["gdat"], prefix="rec_%s_%d" % (name, i), pymol=True) counter += 1 counter = 0 for cplx_keys in fitter.gdata_dict.keys(): cplxdict = fitter.gdata_dict[cplx_keys] if cplxdict["title"] in fitter.exclude: continue title = cplxdict["title"] if title == None: name = "%d" % counter else: name = title _N_dict = len(cplxdict["complex"]) for i in range(_N_dict): if cplxdict["complex"][i]["gdat"] == None: continue write_maps(cplxdict["complex"][i]["gdat"], prefix="cplx_%s_%d" % (name, i), pymol=True) _N_dict = len(cplxdict["ligand"]) for i in range(_N_dict): if cplxdict["ligand"][i]["gdat"] == None: continue write_maps(cplxdict["ligand"][i]["gdat"], prefix="lig_%s_%d" % (name, i), pymol=True) counter += 1 ### Write out pre-processed xyz grids m = mapout_maps(fitter, best_x_A, pairs, prefix) if mode in [0, 1]: m.process_rec = True m.process_cplx = False m.process_lig = False else: m.process_rec = True m.process_cplx = True m.process_lig = True for case in range(fitter.N_case): if fitter.name[case] in exclude_list: continue m.set_case(case) ### Internal write routine as a callback to the process routine m.process(m.write)