Python validate_conf 예제들, PLg09.g09prepare.validate_conf Python 예제들

예제 #1

0

파일 보기

파일: PrepareAndJobControl.py 프로젝트: penglian518/CADD_QM

    def Submit_jobs(self, conf):
        # validate the conf first
        conf = g09prepare.validate_conf(conf)

        # prepare vars
        folder_name, folder_path, conf_path, xyz_folder, submit_all_file = self.gen_fundamental_vars(conf)

        Remote_folder = '%s/%s/' % (conf['Remote_calculation_folder_name'], conf['Group_name'])

        Cmd = 'ssh %s "%s/%s"' % \
              (conf['Remote_cluster_name'], Remote_folder, os.path.basename(submit_all_file))

        # run the command and capture the output
        Output = subprocess.check_output(Cmd.split())

        # format the output
        Output_list = []
        i = 0
        while i < len(Output.strip().split('\n')):
            if i % 2:
                Output_list.append((Output.strip().split('\n')[i - 1], Output.strip().split('\n')[i]))
            i += 1

        # show the results
        logging.info('\nRun the submit script %s:' % os.path.basename(submit_all_file))
        logging.info(Cmd + '\n')
        logging.info(Output_list)

        #os.system(Cmd)

        # in the format of [(step1_1_10_a.-1, 34030.or-condo-pbs01),(),(),...]
        return Output_list

예제 #2

0

파일 보기

파일: CalAndPlot.py 프로젝트: penglian518/CADD_QM

    def get_xyz_for_a_compound(self, conf, cpd, logfile_step=2, overwrite=True):
        # validate the conf first
        conf = g09prepare.validate_conf(conf)

        # prepare vars
        folder_name, folder_path, conf_path, xyz_folder, submit_all_file = self.gen_fundamental_vars(conf)

        # input/output
        folder = '%s/%s' % (folder_path, cpd)
        output_dir = '../%s/%s/xyz' % (conf['Local_output_folder_name'], conf['Group_name'])
        fout = '%s/%s.xyz' % (output_dir, cpd)

        try:
            os.makedirs(output_dir)
        except:
            pass


        #### for bolzmann sampling structures
        bolz_dir = '../%s/%s/bolzmann/%s' % (conf['Local_output_folder_name'], conf['Group_name'], cpd)
        pre_dir = '../%s/%s/' % (conf['Local_output_folder_name'], conf['Group_name'])
        try:
            bolz_conformations = sorted(os.listdir('%s/xyz' % bolz_dir),
                                        key=lambda x: int(x.split('_l_Xe_r_')[1].split('_')[0]))
        except:
            bolz_conformations = []

        # check Bolzmann first!
        # if there is, used the smallest energy one
        if len(bolz_conformations) > 0:
            bolz_mol = bolz_conformations[0][:-4]

            # update the xyz
            pre_xyz = '%s/xyz/%s.xyz' % (pre_dir, cpd)
            cur_xyz = '%s/xyz/%s.xyz' % (bolz_dir, bolz_mol)
            shutil.copy(cur_xyz, pre_xyz)

            logging.info('Using bolzimann structure.\nCopying %s to %s' % (cur_xyz, pre_xyz))

            return
        else:
            # read log files
            try:
                logfiles = [i for i in os.listdir(folder) if i.endswith('.log') and i.startswith('step%s_' % str(logfile_step))]
            except:
                logging.error('Cannot find log files for %s for Step %s' % (folder, str(logfile_step)))
                return

            mol = pybel.readfile('g09', '%s/%s' % (folder, logfiles[-1])).next()

            # write xyz
            mol.write('xyz', fout, overwrite=overwrite)

        return

예제 #3

0

파일 보기

파일: PrepareAndJobControl.py 프로젝트: penglian518/CADD_QM

    def Check_current_jobs(self, conf):
        # validate the conf first
        conf = g09prepare.validate_conf(conf)

        Cmd = 'ssh %s /opt/torque/bin/qstat -u p6n' % conf['Remote_cluster_name']

        # run the command and capture the output
        Output = subprocess.check_output(Cmd.split())

        Output_list = []
        if len(Output) > 0:
            # put the queue info into a dataframe
            df = pd.DataFrame([i.split() for i in Output.split('\n')[5:-1]])
            # filter the completed jobs, and get the current job IDs
            Output_list = list(df[df.iloc[:, 9] != 'C'].iloc[:, 0].values)

        return Output_list

예제 #4

0

파일 보기

파일: CalAndPlot.py 프로젝트: penglian518/CADD_QM

    def plot_result_for_a_compound(self, conf, cpd, overwrite=True):
        # validate the conf first
        conf = g09prepare.validate_conf(conf)

        # prepare vars
        folder_name, folder_path, conf_path, xyz_folder, submit_all_file = self.gen_fundamental_vars(conf)

        # find the command
        g09pyGauss = '%s/g09pyGauss.py' % os.path.dirname(inspect.getfile(constants))

        # plot
        logging.info('%s\nPlot for %s' % ('-' * 50, cpd))
        folder = '%s/%s' % (folder_path, cpd)

        # if cpd is samplinged, use the bolzmann sampling version.
        bolz_dir = '../%s/%s/bolzmann/%s' % (conf['Local_output_folder_name'], conf['Group_name'], cpd)
        if os.path.exists(bolz_dir):
            self.get_energies(conf, cpd, folder_path)
            return

        plot_cmds = '%s all %s %s' % (g09pyGauss, str(overwrite), folder)
        plotp = subprocess.Popen(plot_cmds, shell=True).wait()


        # move ALL figures to ../output/figures/
        #output_dir = '../%s_figures' % conf['Local_output_folder_name']
        output_dir = '../%s/%s/fig' % (conf['Local_output_folder_name'], conf['Group_name'])
        try:
            os.makedirs(output_dir)
        except:
            pass

        # move
        for f in [i for i in os.listdir(folder) if i.endswith('.png')]:
            dst_file = '%s/%s' % (output_dir, f)
            if os.path.exists(dst_file):
                if overwrite:
                    os.remove(dst_file)
                else:
                    return

            shutil.move('%s/%s' % (folder, f), dst_file)

        return

예제 #5

0

파일 보기

파일: PrepareAndJobControl.py 프로젝트: penglian518/CADD_QM

    def Delete_remote_mols(self, conf, mols):
        # validate the conf first
        conf = g09prepare.validate_conf(conf)

        # prepare vars
        folder_name, folder_path, conf_path, xyz_folder, submit_all_file = self.gen_fundamental_vars(conf)

        Remote_folder = '%s/%s/' % (conf['Remote_calculation_folder_name'], conf['Group_name'])

        mols_tobe_deleted = ['%s/%s' % (Remote_folder, m) for m in mols]

        Cmd = 'ssh %s rm -fr %s' % (conf['Remote_cluster_name'], ' '.join(mols_tobe_deleted))

        # run the command and capture the output
        Output = subprocess.check_output(Cmd.split())

        # show the results
        if len(Output) > 0:
            logging.info(Output)

        logging.info('\nDeleted remote molecules %s:' % ' '.join(mols_tobe_deleted))
        return

예제 #6

0

파일 보기

파일: InitNewCalculation.py 프로젝트: penglian518/CADD_QM

def Cal_for_picked_mols(conf, picked_mols=(), supercycle=5):
    logging.info('Starts function "Cal_for_picked_mols"...')

    PrepareJC = PrepareAndJobControl()
    conf = g09prepare.validate_conf(conf)
    logging.basicConfig(level=logging.INFO)

    if len(picked_mols) == 0:
        logging.error('ERROR: No molecules are specified.')
        return

    ## Init for step 1
    #PrepareJC.InitNewCal(conf, deleteDir=True, picked_mols_list=picked_mols)

    ## Check and recal for step 1
    #PrepareJC.Check_and_recal(conf, gen_inp_for_planB=True, plan_B=step1_plan_B, picked_mols_list=picked_mols)
    #PrepareJC.Rsync_local_to_remote(conf)
    #PrepareJC.Submit_jobs(conf)

    ## cruise for step 1
    try:
        PrepareJC.Cruise_for_one_step(conf,
                                      max_cycles=5,
                                      initNew=True,
                                      deleteDir=True,
                                      gen_inp_for_planB=True,
                                      plan_B=step1_plan_B,
                                      picked_mols_list=picked_mols,
                                      sleep_time_min=5)
    except:
        logging.error('\n\nERROR: Something wrong in Cruise for step%s_%s' %
                      (str(conf['Step']), str(conf['Substep'])))
        exit
        #pass

    # <!-- some check function here, to make sure Step 1 is finished -->
    try:
        failed_mols = PrepareJC.Check_and_recal(conf,
                                                gen_inp_for_planB=False,
                                                picked_mols_list=picked_mols,
                                                returnMols=True)
    except:
        failed_mols = []
        logging.error('ERROR: Failed to get "failed_mols" for step%s_%s' %
                      (str(conf['Step']), str(conf['Substep'])))

    ## check cycle for step 1
    counter = 1
    while counter <= supercycle:
        if len(failed_mols) > 0:
            logging.info('\nThis is the %d cycle for step%s:' %
                         (counter, str(conf['Step'])))
            logging.info('There are %d failed molecules found:\n%s\n' %
                         (len(failed_mols), ' '.join(failed_mols)))

            # pertubate the xyz and then recalculate
            for mol in failed_mols:
                PrepareJC.perturb_xyz(conf, mol, offset_factor=0.1)

            ## cruise for step 1
            try:
                PrepareJC.Cruise_for_one_step(conf,
                                              max_cycles=5,
                                              initNew=True,
                                              deleteDir=True,
                                              gen_inp_for_planB=True,
                                              plan_B=step1_plan_B,
                                              picked_mols_list=failed_mols,
                                              sleep_time_min=5)
            except Exception as e:
                logging.error(
                    '\n\nERROR: Something wrong in Cruise for step%s_%s' %
                    (str(conf['Step']), str(conf['Substep'])))
                logging.error('\n\nERROR Message:\n%s\n\n' % e)
                pass

            # <!-- some check function here, to make sure Step 1 is finished -->
            try:
                failed_mols = PrepareJC.Check_and_recal(
                    conf,
                    gen_inp_for_planB=False,
                    picked_mols_list=failed_mols,
                    returnMols=True)
            except:
                failed_mols = []
                logging.error(
                    'ERROR: Failed to get "failed_mols" for step%s_%s' %
                    (str(conf['Step']), str(conf['Substep'])))

        else:
            logging.warning(
                'No failed mols were found, continue to next step.')
            break

        counter += 1

    #### For step 2 ####
    conf.update(step2_qm_conf)

    ## Init for step 2
    #PrepareJC.InitNewCal(conf, picked_mols_list=picked_mols)
    #PrepareJC.Rsync_local_to_remote(conf)
    #PrepareJC.Submit_jobs(conf)

    ## Check for step 2
    #PrepareJC.Check_and_recal(conf, gen_inp_for_planB=False, plan_B=step1_plan_B, picked_mols_list=picked_mols)

    ## cruise for step 2
    PrepareJC.Cruise_for_one_step(conf,
                                  max_cycles=1,
                                  initNew=True,
                                  deleteDir=False,
                                  gen_inp_for_planB=False,
                                  plan_B=step1_plan_B,
                                  picked_mols_list=picked_mols,
                                  sleep_time_min=5)

    # <!-- some check function here, to make sure Step 2 is finished -->

    return

예제 #7

0

파일 보기

파일: CalAndPlot.py 프로젝트: penglian518/CADD_QM

    def get_charge_for_a_compound(self, conf, cpd, logfile_step=2, overwrite=True):
        # validate the conf first
        conf = g09prepare.validate_conf(conf)

        # prepare vars
        folder_name, folder_path, conf_path, xyz_folder, submit_all_file = self.gen_fundamental_vars(conf)

        # input/output
        folder = '%s/%s' % (folder_path, cpd)
        output_dir = '../%s/%s/charge' % (conf['Local_output_folder_name'], conf['Group_name'])
        fout = '%s/%s.csv' % (output_dir, cpd)


        #### for bolzmann sampling structures
        bolz_dir = '../%s/%s/bolzmann/%s' % (conf['Local_output_folder_name'], conf['Group_name'], cpd)
        pre_dir = '../%s/%s/' % (conf['Local_output_folder_name'], conf['Group_name'])
        try:
            bolz_conformations = os.listdir('%s/xyz' % bolz_dir)
        except:
            bolz_conformations = []

        # check Bolzmann first!
        # if there is, used the one with lowest energy
        if len(bolz_conformations) > 0:
            bolz_mol = bolz_conformations[0][:-4]
            folder = '%s/%s' % (folder_path, bolz_mol)

            # update the logfiles
            try:
                logfiles = [i for i in os.listdir('%s' % folder)
                            if i.endswith('.log') and i.startswith('step%s_' % str(logfile_step))]
            except:
                logging.error('Cannot find log files for %s for Step %s' % (folder, str(logfile_step)))
                return
        else:
            #### for normal structures

            # read log files
            try:
                logfiles = [i for i in os.listdir(folder) if i.endswith('.log') and i.startswith('step%s_' % str(logfile_step))]
            except:
                logging.error('Cannot find log files for %s for Step %s' % (folder, str(logfile_step)))
                return

        try:
            os.makedirs(output_dir)
        except:
            pass


        fcon = open('%s/%s' % (folder, logfiles[-1])).readlines()
        mul_charge = g09checkResults.getMullikenCharge(fcon)
        apt_charge = g09checkResults.getAPTCharge(fcon)
        natural_charge = g09checkResults.getNaturalPop(fcon)

        df_mul = pd.DataFrame(mul_charge, columns=['No_', 'Atom', 'Mulliken'])
        df_apt = pd.DataFrame(apt_charge, columns=['No_', 'Atom', 'APT'])
        df_natural = pd.DataFrame(natural_charge, columns=['Atom', 'No_', 'Natural', 'Core', 'Valence', 'Rydberg', 'Total'])

        df = pd.concat([df_mul, df_apt['APT'], df_natural[range(2, 7)]], axis=1)

        # save the result
        df.to_csv(fout)

        return

예제 #8

0

파일 보기

파일: CalAndPlot.py 프로젝트: penglian518/CADD_QM

    def cal_logK_for_a_reaction(self, conf, reaction, logKorB='logK', silence=True, calpKa=False, save=True):
        # validate the conf first
        conf = g09prepare.validate_conf(conf)

        # prepare vars
        folder_name, folder_path, conf_path, xyz_folder, submit_all_file = self.gen_fundamental_vars(conf)

        # get reactants and products
        reactants = reaction.reac.keys()
        products = reaction.prod.keys()

        # bashsafe
        #reactants_bsafe = [i.replace('(', '_l_').replace(')', '_r_') for i in reactants]
        #products_bsafe = [i.replace('(', '_l_').replace(')', '_r_') for i in products]
        reactants_bsafe = {i: i.replace('(', '_l_').replace(')', '_r_') for i in reactants}
        products_bsafe = {i: i.replace('(', '_l_').replace(')', '_r_') for i in products}


        # get energies for both reactants and products
        #En_reactants = [self.get_energies(conf, i, folder_path) for i in reactants_bsafe]
        #En_products = [self.get_energies(conf, i, folder_path) for i in products_bsafe]

        if conf['Reaction_dataset'] in ['Gsolv_bench.txt']:
            En_reactants = {i: self.get_energies(conf, reactants_bsafe[i], folder_path, correctProton=False, correctOH=False, correctCl=False, correctHg=False) for i in reactants}
            En_products = {i: self.get_energies(conf, products_bsafe[i], folder_path, correctProton=False, correctOH=False, correctCl=False, correctHg=False) for i in products}
        else:
            En_reactants = {i: self.get_energies(conf, reactants_bsafe[i], folder_path, correctProton=True, correctOH=True, correctCl=False, correctHg=False) for i in reactants}
            En_products = {i: self.get_energies(conf, products_bsafe[i], folder_path, correctProton=True, correctOH=True, correctCl=False, correctHg=False) for i in products}




        # please refer g09checkResults.finalE for sequence of energies
        # For DFT it is (finalE, cor_z, cor_u, cor_h, cor_g, z, u, h, g, len(steps))
        # For MP2 it is (finalE, cor_z, cor_u, cor_h, cor_g, z, u, h, g, mp2E_tot, mp2E_cor, len(steps))
        if sum([len(En_reactants[i]) for i in reactants]) == 10 * len(reactants) \
                and sum([len(En_products[i]) for i in products]) == 10 * len(products):
            #logK = self.logK_formula_reaction([en[8] for en in En_reactants], [en[8] for en in En_products], reaction)

            # gen the deltaG energy list for both reactants and products
            dG_reactants = []
            for r in reactants:
                dG_reactants += [En_reactants[r][8]] * reaction.reac[r]

            dG_products = []
            for p in products:
                dG_products += [En_products[p][8]] * reaction.prod[p]

            # calculate the logK/pKa
            if calpKa:
                pKa = self.pKa_formula_reaction(dG_reactants, dG_products, reaction, waterCorrection=True)
            else:
                logK = self.logK_formula_reaction(dG_reactants, dG_products, reaction, waterCorrection=True)


        else:
            logging.error('ERROR: Check the energy items for reactants and products')
            # show details for reactants
            counter = 0
            while counter < len(reactants):
                logging.info('Reactant: %s, %s, %s' % (reactants[counter], reactants_bsafe[reactants[counter]], json.dumps(En_reactants[reactants[counter]])))
                counter += 1
            # show details for products
            counter = 0
            while counter < len(products):
                logging.info('Products: %s, %s, %s' % (products[counter], products_bsafe[products[counter]], json.dumps(En_products[products[counter]])))
                counter += 1

            # error value
            if calpKa:
                pKa = 0
            else:
                logK = 0


        # grab and format the results
        output_series = pd.Series()

        if reaction.param == None:
            if calpKa:
                result_str = 'pKa = %.3f, Exp. = %s, Diff = %s' % (pKa, str(reaction.param), str(reaction.param))
                output_series['Constant'] = 'pKa'
                output_series['Calculated'] = pKa
                output_series['Experimental'] = str(reaction.param)
                output_series['Difference'] = str(reaction.param)
            else:
                result_str = 'logK = %.3f, Exp. = %s, Diff = %s' % (logK, str(reaction.param), str(reaction.param))
                output_series['Constant'] = logKorB
                output_series['Calculated'] = logK
                output_series['Experimental'] = str(reaction.param)
                output_series['Difference'] = str(reaction.param)
        else:
            if calpKa:
                result_str = 'pKa = %.3f, Exp. = %.3f, Diff. = %.3f' % (
                pKa, float(reaction.param), float(pKa - reaction.param))
                output_series['Constant'] = 'pKa'
                output_series['Calculated'] = pKa
                output_series['Experimental'] = float(reaction.param)
                output_series['Difference'] = float(pKa - reaction.param)
            else:
                result_str = 'logK = %.3f, Exp. = %.3f, Diff. = %.3f' % (
                logK, float(reaction.param), float(logK - reaction.param))
                output_series['Constant'] = logKorB
                output_series['Calculated'] = logK
                output_series['Experimental'] = float(reaction.param)
                output_series['Difference'] = float(logK - reaction.param)


        output_str = '#%s\nReaction: %s; %s\n' % ('-' * 50, reaction.string(), result_str)
        output_series['Reaction'] = reaction.string()

        # show details for reactants
        counter = 0
        reactant_str = ''
        reactants_dict = {}
        reactants_bsafe_dict = {}
        while counter < len(reactants):
            reactant_str += 'Reactant: %s, %s\n' % (reactants[counter], str(En_reactants[reactants[counter]][8]))
            reactants_dict[reactants[counter]] = En_reactants[reactants[counter]][8]
            reactants_bsafe_dict[reactants_bsafe[reactants[counter]]] = En_reactants[reactants[counter]][8]
            counter += 1
        output_series['Reactants'] = json.dumps(reactants_dict)
        output_series['ReactantsBSafe'] = json.dumps(reactants_bsafe_dict)

        # show details for products
        counter = 0
        product_str = ''
        products_dict = {}
        products_bsafe_dict = {}
        while counter < len(products):
            product_str += 'Products: %s, %s\n' % (products[counter], str(En_products[products[counter]][8]))
            products_dict[products[counter]] = En_products[products[counter]][8]
            products_bsafe_dict[products_bsafe[products[counter]]] = En_products[products[counter]][8]
            counter += 1
        output_series['Products'] = json.dumps(products_dict)
        output_series['ProductsBSafe'] = json.dumps(products_bsafe_dict)

        output_str = '%s\n%s%s' % (output_str, reactant_str, product_str)

        # add the energy difference between products and reactant
        output_series['deltaG'] = (sum(dG_products) - sum(dG_reactants))*constants.h2kcal

        # show the results
        if not silence:
            logging.info(output_str)

        # save the results
        if save:
            output_dir = '../%s' % conf['Local_output_folder_name']
            output_file = '%s/%s' % (output_dir, conf['Reaction_dataset'])
            try:
                os.mkdir(output_dir)
            except:
                pass

            # save txt
            with open(output_file, 'a') as fout:
                fout.write(output_str)
                fout.close()

        # return the results
        return output_series

예제 #9

0

파일 보기

파일: CalAndPlot.py 프로젝트: penglian518/CADD_QM

    def Bolzmann_weighting(self, conf, eps=0.03, minSamples=1):
        cs = CSearchRand()

        # validate the conf first
        conf = g09prepare.validate_conf(conf)

        # prepare vars
        folder_name, folder_path, conf_path, xyz_folder, submit_all_file = self.gen_fundamental_vars(conf)

        # compounds from conf['XYZ_foldername']
        compounds_bashsafe = self.read_conformations(conf)

        # determine mol name
        mol = xyz_folder.split('/')[-2]

        # input/output
        xyz_dir = '../%s/%s/xyz' % (conf['Local_output_folder_name'], conf['Group_name'])
        bolz_dir = '../%s/%s/bolzmann/%s' % (conf['Local_output_folder_name'], conf['Group_name'], mol)

        try:
            os.makedirs(bolz_dir)
        except:
            pass

        # variables
        pdb_en = '%s/%s.opted.en' % (bolz_dir, mol)
        pdb_combine = '%s/%s.opted.pdb' % (bolz_dir, mol)
        pdb_rms = '%s/%s.opted.rms' % (bolz_dir, mol)

        clusterFig = '%s/%s.cluster.png' % (bolz_dir, mol)
        clusterCsv = '%s/%s.cluster.csv' % (bolz_dir, mol)

        uniqueCsv = '%s/%s.unique.csv' % (bolz_dir, mol)
        uniquePDB = '%s/%s.unique.pdb' % (bolz_dir, mol)

        bolzCsv = '%s/%s.Bolzmann.csv' % (bolz_dir, mol)

        # get free energy for all conformations
        ##Ens = [self.get_energies(conf, i, folder_path)[8] for i in self.read_conformations(conf)]
        #Ens = [self.get_energies(conf, i, folder_path)[8] for i in compounds_bashsafe]
        #Frames = [int(i.split('_')[4]) for i in compounds_bashsafe]
        compounds_bashsafe_updated = []
        Ens = []
        Frames = []

        for c in compounds_bashsafe:
            try:
                i_En = self.get_energies(conf, c, folder_path)[8]
            except:
                i_En = 0.0

            i_Frames = int(c.split('_')[4])

            print c, i_En, i_Frames

            compounds_bashsafe_updated.append(c)
            Ens.append(i_En)
            Frames.append(i_Frames)



        # combine Ens with compound names
        cpd_Ens = zip(compounds_bashsafe_updated, Ens, Frames)
        cpd_Ens_unsorted = np.array(zip(compounds_bashsafe_updated, Ens, Frames))
        cpd_Ens = np.array(sorted(cpd_Ens, key=lambda x: x[1]))

        # output en
        df_tmp = pd.DataFrame(cpd_Ens[:, [1, 2]])
        #open(pdb_en, 'w').writelines("\n".join(cpd_Ens[:, 1]))
        df_tmp.to_csv(pdb_en, header=False, index=False)

        # convert xyz files to one pdb file
        fout_pdb = open(pdb_combine, 'w')
        # structures will be picked from xxx.opted.pdb by its sequence.
        # so, don't sort the write sequence of the structures here.
        for cpd in cpd_Ens_unsorted[:, 0]:
            xyz_file = '%s/%s.xyz' % (xyz_dir, cpd)
            m = pybel.readfile('xyz', xyz_file).next()
            fout_pdb.writelines(m.write('pdb'))
        fout_pdb.close()

        # gene rms
        cs.calcRMS(pdb_combine, pdb_rms, debug=False)

        # plot cluster
        cs.plotCluster(pdb_en, pdb_rms, clusterFig, clusterCsv, eps=eps, minSamples=minSamples, debug=False)

        # pick structures
        print clusterCsv
        print pdb_combine
        print uniqueCsv
        print uniquePDB
        cs.uniqueClusters(clusterCsv, pdb_combine, uniqueCsv, uniquePDB, debug=False)

        # cal Bolzmann weighted energy
        En_weighed = self.calBolzmannWeightedEN(uniqueCsv, bolzCsv)
        logging.info('The Bolzmann weighted energy for %s is %s' % (mol, str(En_weighed)))

        # split pdb
        cs.splitePDBtoXYZ(uniquePDB, '%s/xyz/' % bolz_dir, cs.outPrefix, mol, debug=False)

        return En_weighed

예제 #10

0

파일 보기

파일: PrepareAndJobControl.py 프로젝트: penglian518/CADD_QM

    def Check_and_recal(self, conf, gen_inp_for_planB=True, plan_B={}, picked_mols_list=('all'), returnMols=False):
        '''
        The purpose is to keep the calculations in that step finish, if not create a new substep with planB conf.

        Better turn off gen_inp_for_planB, if there is no planB configurations.


        The logical is list bellow:
        Starts from substep 1, check log files one by one, till find the 'first' one which is normally finished.
        If none of them finished normally, use plan_B update conf then generate the input, submit scripts for next substep of calculation

        :param conf:
        :return:
        '''
        logging.info('Starts function "Check_and_recal" ...')

        # validate the conf first
        conf = g09prepare.validate_conf(conf)

        if conf['Pseudo']:
            CheckPseudo = True
        else:
            CheckPseudo = False


        # prepare vars
        folder_name, folder_path, conf_path, xyz_folder, submit_all_file = self.gen_fundamental_vars(conf)

        # pick molecules to be calculated in this list, without suffix '.xyz'
        if len(picked_mols_list) == 1 and picked_mols_list[0] == 'all':
            CalReaction = False
            xyz_file_names = [i[:-4] for i in os.listdir(xyz_folder) if i.endswith('.xyz')]
        elif len(picked_mols_list) == 1 and picked_mols_list[0] in ['reaction', 'reactions', 'Reaction', 'Reactions']:
            CalReaction = True

            # read reactions
            reactions, reactants, products = self.read_reactions(conf)
            all_strucutres = [i[:-4] for i in os.listdir(xyz_folder) if i.endswith('.xyz')]

            # check if structure file exist
            xyz_file_names = []
            for cpd in list(set(reactants + products)):
                if cpd in all_strucutres:
                    xyz_file_names.append(cpd)
                else:
                    logging.error('Cannot find the strucutre file for %s' % cpd)
                    return 1
        else:
            #CalReaction = False
            CalReaction = True
            xyz_file_names = picked_mols_list

        # get the current log files
        for mol in xyz_file_names:
            self.Rsync_log(conf, mol)


        # check g09 log file one by one
        failed_mols = []
        mol_step_list = []
        for mol in xyz_file_names:
            ## check G09 output
            # path for each mol
            mol_path = '%s/%s' % (folder_path, mol)

            if not os.path.exists(mol_path):
                logging.warning('\nWARN: %s is not exist!' % mol_path)
                continue

            # all log files for this mol, for conf['Step']
            log_files = [i for i in os.listdir(mol_path) if i[-4:] == '.log' and i.startswith('step%s_' % conf['Step'])]
            if len(log_files) == 0:
                logging.warning('WARN: no log files were found for %s' % mol)
                continue

            #### core logical bellow
            # check log one by one
            i_log = 1
            step_num_to_use = False
            while i_log <= len(log_files):
                log_file = '%s/step%s_%s_%s.log' % (mol_path, conf['Step'], i_log, mol)
                return_from_checkfail = g09checkResults.checkfail(log_file, silence=True)

                # if log finished normally then jump to next mol, else check next log file
                if return_from_checkfail == 'pass':
                    step_num_to_use = False
                    break
                else:
                    i_log += 1
                    step_num_to_use = i_log
                    continue

            # step_num_to_use not False means all log files of the mol not finish normally.
            # step1 failed
            if step_num_to_use:
                # give possible reason for fail
                logging.info('\n' + ' '.join(return_from_checkfail))
                #print mol, step_num_to_use

                # save the failed mol names
                failed_mols.append(mol)

                # generate input for plan B
                if gen_inp_for_planB:
                    # prepare for step1 plan B
                    conf.update(plan_B)
                    conf['Substep'] = step_num_to_use

                    # generate the input and pbs scripts for plan B
                    self.gen_com_pbs(conf, mol, xyz_folder, folder_path, conf_path, CalReaction)
                    mol_step_list.append((mol, '%s_%s' % (conf['Step'], conf['Substep'])))

        if gen_inp_for_planB:
            # if there are some mols assign to take plan B then prepare submit all scripts for them
            if len(failed_mols) > 0:
                # prepare the submit all scripts
                #submit_all_str = self.gen_submit_all_script(' '.join(failed_mols), conf['Step'])
                submit_all_str = self.gen_submit_all_script_v2(mol_step_list)
            else:
                # all finished, nothing to submit
                submit_all_str = '#!/bin/bash\necho "Nothing to submit.\n"'

            # write the submit all script
            with open(submit_all_file, 'w') as fout_submit:
                fout_submit.write(submit_all_str + '\n')
            fout_submit.close()

            # make it executable
            os.system('chmod +x %s' % submit_all_file)

        # prepare for return code
        if len(failed_mols) > 0:
            All_mols_are_converged = False
            logging.warning('The following mols are not converged:\n%s' % ', '.join(failed_mols))
        else:
            All_mols_are_converged = True

        if returnMols:
            return failed_mols
        else:
            return All_mols_are_converged

예제 #11

0

파일 보기

파일: PrepareAndJobControl.py 프로젝트: penglian518/CADD_QM

    def InitNewCal(self, conf, picked_mols_list=('all'), deleteDir=False):
        '''
        Prepare input and PBS script for mols in the picked_mols_list.

        It used to start new calculations. Either for step 1 or the following steps

        :param picked_mols_list: only mol name is required, no .xyz is required. e.g. ['mol1', 'mol3']
        '''
        logging.info('Starts function "InitNewCal"...')

        # validate the conf first
        conf = g09prepare.validate_conf(conf)

        if conf['Pseudo']:
            CheckPseudo = True
        else:
            CheckPseudo = False

        # prepare vars
        folder_name, folder_path, conf_path, xyz_folder, submit_all_file = self.gen_fundamental_vars(conf)

        # pick molecules to be calculated in this list, without suffix '.xyz'
        if len(picked_mols_list) == 1 and picked_mols_list[0] == 'all':
            CalReaction = False
            xyz_file_names = [i[:-4] for i in os.listdir(xyz_folder) if i.endswith('.xyz')]

        elif len(picked_mols_list) == 1 and picked_mols_list[0] in ['reaction', 'reactions', 'Reaction', 'Reactions']:
            CalReaction = True

            # read reactions
            reactions, reactants, products = self.read_reactions(conf)
            all_strucutres = [i[:-4] for i in os.listdir(xyz_folder) if i.endswith('.xyz')]

            # check if structure file exist
            xyz_file_names = []
            for cpd in list(set(reactants + products)):
                if cpd in all_strucutres:
                    xyz_file_names.append(cpd)
                else:
                    logging.error('Cannot find the strucutre file for %s' % cpd)
                    return 1
        else:
            #CalReaction = False
            CalReaction = True
            xyz_file_names = picked_mols_list

        # generate g09 input files
        mol_step_list = []
        for xyz in xyz_file_names:
            if xyz.endswith('_l_g_r_'):
                conf.update({'SCRF': False})
            else:
                conf.update({'SCRF': True})
            self.gen_com_pbs(conf, xyz, xyz_folder, folder_path, conf_path, CalReaction, CheckPseudo, deleteDir)
            mol_step_list.append((xyz, '%s_%s' % (conf['Step'], conf['Substep'])))

        # prepare the submit scripts
        #submit_all_str = self.gen_submit_all_script(' '.join(xyz_file_names), '%s_%s' % (conf['Step'], conf['Substep']))
        submit_all_str = self.gen_submit_all_script_v2(mol_step_list)

        # write the submitall script
        with open(submit_all_file, 'w') as fout_submit:
            fout_submit.write(submit_all_str + '\n')
        fout_submit.close()

        # make it executable
        os.system('chmod +x %s' % submit_all_file)


        # delete remote mols
        if deleteDir:
            self.Delete_remote_mols(conf, xyz_file_names)

        return