Exemple #1
0
def dataer(Dataqueue):
    from TensorMol import MSet
    Trainingset = MSet(GPARAMS.Compute_setting.Traininglevel)
    Trainingset.Load()
    Trainingset.mols = Check_MSet(Trainingset.mols, level=1)
    Trainingset.Save()
    if GPARAMS.Esoinn_setting.Ifresp:
        respset = MSet('HF_resp')
        respset.Load()
        respset.mols = Check_MSet(respset.mols, level=1)
        respset.Save()
    print("Trainingset.mols :", len(Trainingset.mols))
    ClusNum = max(GPARAMS.Esoinn_setting.Model.class_id,
                  GPARAMS.Train_setting.Modelnumperpoint)
    print("++++++++++++++++++Dataer++++++++++++++++++++++")
    print("ClusNum:", ClusNum)
    SubTrainList = []
    for i in range(ClusNum):
        SubTrainSet = MSet(GPARAMS.Compute_setting.Traininglevel +
                           '_Cluster%d' % i)
        SubTrainList.append(SubTrainSet)
    print('start make cluster for training set')
    for i in range(len(Trainingset.mols)):
        try:
            EGCM=(Trainingset.mols[i].EGCM-GPARAMS.Esoinn_setting.scalemin)/\
                    (GPARAMS.Esoinn_setting.scalemax-GPARAMS.Esoinn_setting.scalemin)
        except:
            EGCM=(Trainingset.mols[i].Cal_EGCM()-GPARAMS.Esoinn_setting.scalemin)/\
                    (GPARAMS.Esoinn_setting.scalemax-GPARAMS.Esoinn_setting.scalemin)
        EGCM[~np.isfinite(EGCM)] = 0
        if GPARAMS.Esoinn_setting.Model.class_id >= GPARAMS.Train_setting.Modelnumperpoint:
            list = GPARAMS.Esoinn_setting.Model.find_closest_cluster(
                min(GPARAMS.Train_setting.Modelnumperpoint,
                    GPARAMS.Esoinn_setting.Model.class_id), EGCM)
        else:
            list = [i for i in range(GPARAMS.Train_setting.Modelnumperpoint)]
        for j in list:
            SubTrainList[j].mols.append(Trainingset.mols[i])
    for i in range(ClusNum):
        print("Cluster %d has %d mols" % (i, len(SubTrainList[i].mols)))
    for i in range(ClusNum):
        othermollist = []
        for j in range(ClusNum):
            if j != i and len(SubTrainList[j].mols) > 2:
                othermollist += SubTrainList[j].mols
        print("Other mol list for Cluster %d" % i, len(othermollist))
        if len(othermollist) > 0:
            samplenum=min(\
                          math.ceil((len(Trainingset.mols)-len(SubTrainList[i].mols))*GPARAMS.Esoinn_setting.Mixrate),\
                          len(othermollist)\
                         )
            print(len(othermollist), samplenum)
            SubTrainList[i].mols += random.sample(othermollist, samplenum)
        SubTrainList[i].Save()
    for i in range(ClusNum):
        Dataqueue.put((SubTrainList[i], i, GPARAMS.Train_setting.Maxsteps))
        print('%dth cluster is put in queue, mol num: %d!' %
              (i, len(SubTrainList[i].mols)))
Exemple #2
0
def consumer(Queue):
    import time
    from ..Base import Molnew
    import os
    from TensorMol import MSet
    print("Consumer start")
    Newaddedset = MSet('Stage_%d_Newadded' % GPARAMS.Train_setting.Trainstage)
    num = 0
    while True:
        ERROR_mols = Queue.get()
        if ERROR_mols == None:
            break
        for i in range(len(ERROR_mols)):
            ERROR_mols[i][0].name = "Stage_%d_Mol_%d" % (
                GPARAMS.Train_setting.Trainstage, num)
            Newaddedset.mols.append(ERROR_mols[i][0])
        num += 1
        if num % 2000 == 0:
            Newaddedset.Save()
    Dataset = []
    Newaddedset.mols = Check_MSet(Newaddedset.mols)
    if len(
            GPARAMS.Esoinn_setting.Model.nodes
    ) != 0 and GPARAMS.Esoinn_setting.Model.class_id > GPARAMS.Train_setting.Modelnumperpoint:
        for i in Newaddedset.mols:
            try:
                Dataset.append(i.EGCM)
            except:
                Dataset.append(i.Cal_EGCM())
        a, b, c, d, signalmask = GPARAMS.Esoinn_setting.Model.predict(Dataset)
        normalmollist = []
        edgemollist = []
        noisemollist = []
        for i in range(len(Newaddedset.mols)):
            if signalmask[i] == 'Noise':
                noisemollist.append(Newaddedset.mols[i])
            if signalmask[i] == 'Edge':
                edgemollist.append(Newaddedset.mols[i])
            if signalmask[i] == 'Normal':
                normalmollist.append(Newaddedset.mols[i])
        if len(Newaddedset.mols) > 1000:
            edgemollist = random.sample(edgemollist,
                                        min(600, len(edgemollist)))
            noisemollist = random.sample(noisemollist,
                                         min(200, len(noisemollist)))
            normalmollist = random.sample(normalmollist,
                                          min(20, len(normalmollist)))
            Newaddedset.mols = edgemollist + noisemollist + normalmollist
    else:
        if len(Newaddedset.mols) > 1000:
            Newaddedset.mols = random.sample(Newaddedset.mols, 1000)
    Newaddedset.Save()
    return
def main():
    a = MSet()
    m = Mol()
    m.FromXYZString("""4

    C 1. 0. 0.
    H 0. 1. 0.
    N 0. 0. 1.
    O 1. 1. 0.""")
    a.mols.append(m)
    TreatedAtoms = np.array([1, 6, 7, 8], dtype=np.uint8)
    # PARAMS["networks_directory"] =
    #      "/home/animal/Packages/TensorMol/networks/"
    PARAMS["tf_prec"] = "tf.float64"
    PARAMS["NeuronType"] = "sigmoid_with_param"
    PARAMS["sigmoid_alpha"] = 100.0
    PARAMS["HiddenLayers"] = [2000, 2000, 2000]
    PARAMS["EECutoff"] = 15.0
    PARAMS["EECutoffOn"] = 0
    # when elu is used EECutoffOn should always equal to 0
    PARAMS["Elu_Width"] = 4.6
    PARAMS["EECutoffOff"] = 15.0
    PARAMS["AddEcc"] = True
    PARAMS["KeepProb"] = [1.0, 1.0, 1.0, 0.7]
    # Initialize a digester that apply descriptor for the fragme
    d = MolDigester(TreatedAtoms,
                    name_="ANI1_Sym_Direct",
                    OType_="EnergyAndDipole")
    tset = TensorMolData_BP_Direct_EE_WithEle_Release(a,
                                                      d,
                                                      order_=1,
                                                      num_indis_=1,
                                                      type_="mol")
    # WithGrad=True)
    PARAMS["DSFAlpha"] = 0.18
    manager = TFMolManage(
        "chemspider12_solvation", tset, False,
        'fc_sqdiff_BP_Direct_EE_ChargeEncode' +
        '_Update_vdw_DSF_elu_Normalize_Dropout', False, False)
    return manager
Exemple #4
0
from matplotlib import pyplot as plt
parser = arg.ArgumentParser(
    description=
    'Grep qm area from an Amber MDcrd trajory to make training dataset!')
parser.add_argument('-i', '--input')

args = parser.parse_args()
jsonfile = args.input
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
if __name__ == "__main__":
    UpdateGPARAMS(jsonfile)
    LoadModel()
    if not os.path.exists("./results"):
        os.system("mkdir ./results")
    for i in range(len(GPARAMS.Dataset_setting.Inputdatasetlist)):
        TMPSet = MSet(GPARAMS.Dataset_setting.Inputdatasetlist[i])
        TMPSet.Load()
        f1 = open(
            './results/' + GPARAMS.Dataset_setting.Inputdatasetlist[i] +
            '.result', 'w')
        f2 = open(
            './results/' + GPARAMS.Dataset_setting.Inputdatasetlist[i] +
            '_e.csv', 'w')
        f3 = open(
            './results/' + GPARAMS.Dataset_setting.Inputdatasetlist[i] +
            '_f.csv', 'w')
        f4 = open(
            './results/' + GPARAMS.Dataset_setting.Inputdatasetlist[i] +
            '_d.csv', 'w')
        f5 = open(
            './results/' + GPARAMS.Dataset_setting.Inputdatasetlist[i] +
Exemple #5
0
def parallel_caljob(MSetname, manager, ctrlfile):
    para_path = './'
    if GPARAMS.Compute_setting.Traininglevel == "DFTB+":
        os.environ[
            "OMP_NUM_THREADS"] = GPARAMS.Compute_setting.Ncoresperthreads
        para_path = GPARAMS.Software_setting.Dftbparapath
    input_path = './' + GPARAMS.Compute_setting.Traininglevel + '/Consumer/'
    if not os.path.exists(input_path):
        os.system("mkdir -p " + input_path)
    TMPSet = MSet(MSetname)
    TMPSet.Load()
    mols = TMPSet.mols
    print('Nmols in Newaddedset:', len(mols))
    if GPARAMS.Train_setting.Ifwithhelp == True:
        nstage = math.ceil(len(mols) / GPARAMS.Train_setting.framenumperjob)
        print(nstage)
        submollist = [
            mols[i * GPARAMS.Train_setting.framenumperjob:(i + 1) *
                 GPARAMS.Train_setting.framenumperjob] for i in range(nstage)
        ]
        subMSetlist = [MSet(MSetname + '_part%d' % i) for i in range(nstage)]
        subMSetresult = [False for i in range(nstage)]
        for i in range(nstage):
            subMSetlist[i].mols = submollist[i]
            subMSetlist[i].Save()
        trans = pko.Transport((GPARAMS.Train_setting.helpcpunodeip,
                               GPARAMS.Train_setting.helpcpuport))
        trans.connect(username=GPARAMS.Train_setting.helpcpuaccount,
                      password=GPARAMS.Train_setting.helpcpupasswd)
        ssh = pko.SSHClient()
        ssh._transport = trans
        sftp = pko.SFTPClient.from_transport(trans)
        workpath = os.getcwd()
        print(workpath)
        for i in range(nstage):
            subMSetlist[i].mols = submollist[i]
            subMSetlist[i].Save()
            remotepath = GPARAMS.Train_setting.helpcpupath + '/' + MSetname + '/part%d' % i
            srcpath = workpath + '/datasets/%s.pdb' % (MSetname +
                                                       '_part%d' % i)
            print(" Put pdb file:")
            print(remotepath, srcpath)
            stdin, stdout, stderr = ssh.exec_command('mkdir -p %s/datasets' %
                                                     remotepath)
            print(stdout.read().decode)
            sftp.put(
                srcpath,
                remotepath + '/datasets/%s.pdb' % (MSetname + '_part%d' % i))
            if GPARAMS.Train_setting.queuetype == 'PBS':
                pbsrun = open('pbs.run', 'w')
                pbsrun.write(
                    pbsstr %
                    (GPARAMS.Compute_setting.Ncoresperthreads,
                     GPARAMS.Compute_setting.Traininglevel + "_%d" % i))
                pbsrun.write(GPARAMS.Train_setting.helpcpuenv)
                pbsrun.write("python -u Qmcal.py -i %s -d %s> %s.qmout\n" %
                             (ctrlfile, MSetname + '_part%d' % i,
                              MSetname + '_part%d' % i))
                pbsrun.write("rm *.chk\n")
                pbsrun.write("touch finished\n")
                pbsrun.close()
                sftp.put(localpath=workpath + '/pbs.run',
                         remotepath=remotepath + '/pbs.run')
                sftp.put(localpath=workpath + '/' + ctrlfile,
                         remotepath=remotepath + '/' + ctrlfile)
                #ssh.exec_command('cd %s && qsub pbs.run')
        t = 0
        while False in subMSetresult:
            time.sleep(300)
            t += 300
            for i in range(nstage):
                remotepath = GPARAMS.Train_setting.helpcpupath + '/' + MSetname + '/part%d' % i
                stdin, stdout, stderr = ssh.exec_command(
                    "cd %s && ls fininshed" % (remotepath))
                if 'finished' in stdout.read().decode():
                    subMSetresult[i] = True
            print(t, subMSetresult)
        finishmols = []
        subMSetlist = [MSet(MSetname + '_part%d' % i) for i in range(nstage)]
        for i in range(nstage):
            srcpath = workpath + '/datasets/%s.pdb' % (MSetname +
                                                       '_part%d' % i)
            remotepath = GPARAMS.Train_setting.helpcpupath + '/' + MSetname + '/part%d' % i
            os.system('rm %s' % srcpath)
            sftp.get(localpath=srcpath,
                     remotepath=remotepath + '/' + MSetname +
                     '_part%d.pdb' % i)
            subMSetlist[i].Load()
            finishmols += subMSetlist[i].mols
        for i in range(len(finishmols)):
            finishmols[i].Cal_EGCM()
        TMPSet.mols = finishmols
        TMPSet.Save()
    else:
        inpathlist = [input_path] * len(mols)
        parapathlist = [para_path] * len(mols)
        corenumperjob = [GPARAMS.Compute_setting.Ncoresperthreads] * len(mols)
        keywordslist = [GPARAMS.Compute_setting.Gaussiankeywords] * len(mols)
        Atomizationlist = [GPARAMS.Compute_setting.Atomizationlevel
                           ] * len(mols)
        inputlist = list(
            zip(mols, inpathlist, parapathlist, keywordslist, corenumperjob,
                Atomizationlist))
        paracal_pool = manager.Pool(GPARAMS.Compute_setting.Consumerprocessnum)
        results = paracal_pool.map(calculator, inputlist)
        paracal_pool.close()
        paracal_pool.join()
        mollist = []
        for i in range(len(results)):
            if results[i][0] == True:
                mollist.append(results[i][1])
                mollist[-1].Cal_EGCM()
        TMPSet.mols = mollist
        TMPSet.Save()
    return
Exemple #6
0
def chargenet_train(MSetname, GPUQueue, jsonfile):
    print("RESP coming")
    if len(GPARAMS.Neuralnetwork_setting.NNstrucselect) != 0:
        candidate_struc = get_best_struc(2)
        print("Candidate_NNSTRUC:", candidate_struc)
        basestruc = [math.ceil(i) for i in np.mean(candidate_struc, axis=0)]
    else:
        basestruc = GPARAMS.Neuralnetwork_setting.Initstruc
    deltastruc = [math.ceil(i * 0.10) for i in basestruc]
    print("Delta struc:", deltastruc)
    changevector = [random.randint(-5, 5) for i in range(3)]
    evostruc = [
        basestruc[i] + deltastruc[i] * changevector[i] for i in range(3)
    ]
    print("evo struc:", evostruc)
    if GPARAMS.Train_setting.Ifgpuwithhelp == False:
        GPUID = GPUQueue.get()
        os.environ["CUDA_VISIBLE_DEVICES"] = str(GPUID)
        if GPARAMS.Esoinn_setting.Ifresp == True:
            Chargeset = MSet("HF_resp")
            Chargeset.Load()
        else:
            Chargeset = MSet(GPARAMS.Compute_setting.Traininglevel)
            Chargeset.Load()
        GPARAMS.Neuralnetwork_setting.Switchrate = 0.9
        if len(Chargeset.mols) < GPARAMS.Neuralnetwork_setting.Batchsize * 20:
            num = math.ceil(GPARAMS.Neuralnetwork_setting.Batchsize * 20 /
                            len(TMPset.mols))
            Chargeset.mols = Chargeset.mols * num
        TreatedAtoms = Chargeset.AtomTypes()
        d = MolDigester(TreatedAtoms,
                        name_="ANI1_Sym_Direct",
                        OType_="EnergyAndDipole")
        tset = TData_BP_Direct_EE_WithCharge(Chargeset,
                                             d,
                                             order_=1,
                                             num_indis_=1,
                                             type_="mol",
                                             WithGrad_=True,
                                             MaxNAtoms=100)
        NN_name = None
        ifcontinue = False
        SUBNet = BP_HDNN_charge(tset, NN_name, Structure=evostruc)
        SUBNet.train(SUBNet.max_steps, continue_training=ifcontinue)
        GPUQueue.put(GPUID)
    else:
        connectflag = True
        connect_num = 0
        while connectflag:
            try:
                trans = pko.Transport((GPARAMS.Train_setting.helpgpunodeip,
                                       GPARAMS.Train_setting.helpgpuport))
                trans.connect(username=GPARAMS.Train_setting.helpgpuaccount,
                              password=GPARAMS.Train_setting.helpgpupasswd)
                connectflag = False
            except:
                connect_num += 1
                print(
                    f"{connect_num} th reconnect to {((GPARAMS.Train_setting.helpgpunodeip,GPARAMS.Train_setting.helpgpuport))} for {MSetname}"
                )
                time.sleep(5)

        ssh = pko.SSHClient()
        #ssh.connect(hostname=GPARAMS.Train_setting.helpgpunodeip,port=GPARAMS.Train_setting.helpgpuport,username=GPARAMS.Train_setting.helpgpuaccount,password=GPARAMS.Train_setting.helpgpupasswd,banner_timeout=300,timeout=15)
        ssh._transport = trans
        sftp = pko.SFTPClient.from_transport(trans)
        workpath = os.getcwd()
        print(workpath)
        if GPARAMS.Esoinn_setting.Ifresp == True:
            remotepath = GPARAMS.Train_setting.helpgpupath + '/Stage%d/resp' % (
                GPARAMS.Train_setting.Trainstage)
        elif GPARAMS.Esoinn_setting.Ifadch == True:
            remotepath = GPARAMS.Train_setting.helpgpupath + '/Stage%d/adch' % (
                GPARAMS.Train_setting.Trainstage)
        srcpath = workpath + '/datasets/%s.pdb' % (MSetname)
        print(remotepath, srcpath)
        stdin, stdout, stderr = ssh.exec_command('rm %s' % (remotepath))
        stdin, stdout, stderr = ssh.exec_command('mkdir -p %s' %
                                                 (remotepath + '/datasets'))
        print(stdout.read().decode())
        sftp.put(srcpath, remotepath + '/datasets/%s.pdb' % (MSetname))
        shellrun = open('gpu_resp.run', 'w')
        if GPARAMS.Train_setting.gpuqueuetype == 'LSF':
            if GPARAMS.Esoinn_setting.Ifresp == True:
                shellrun.write(lsfgpustr %
                               (GPARAMS.Train_setting.gpuqueuename, 'Resp'))
                print(lsfgpustr % (GPARAMS.Train_setting.gpuqueuename, 'Resp'),
                      MSetname)
            elif GPARAMS.Esoinn_setting.Ifadch == True:
                shellrun.write(lsfgpustr %
                               (GPARAMS.Train_setting.gpuqueuename, 'Adch'))
                print(lsfgpustr % (GPARAMS.Train_setting.gpuqueuename, 'Adch'),
                      MSetname)

            shellrun.write(GPARAMS.Train_setting.helpgpuenv)
        elif GPARAMS.Train_setting.gpuqueuetype == "PBS":

            if GPARAMS.Esoinn_setting.Ifresp == True:
                shellrun = open('gpu_resp.run', 'w')
                shellrun.write(pbsgpustr %
                               (4, GPARAMS.Train_setting.gpuqueuename, 'Resp'))
                print(
                    pbsgpustr %
                    (4, GPARAMS.Train_setting.gpuqueuename, 'Resp'), MSetname)
            if GPARAMS.Esoinn_setting.Ifadch == True:
                shellrun = open('gpu_adch.run', 'w')
                shellrun.write(pbsgpustr %
                               (4, GPARAMS.Train_setting.gpuqueuename, 'Adch'))
                print(
                    pbsgpustr %
                    (4, GPARAMS.Train_setting.gpuqueuename, 'Adch'), MSetname)

            shellrun.write(GPARAMS.Train_setting.helpgpuenv)
        strucstr = "_".join([str(i) for i in evostruc])
        shellrun.write(
            'python -u $ESOIHOME/bin/TrainNN.py -i %s -d %s -s %s -t bpcharge \n'
            % (jsonfile, MSetname, strucstr))
        shellrun.write('touch finished\n')
        shellrun.close()

        sftp.put(localpath=workpath + '/gpu_resp.run',
                 remotepath=remotepath + '/gpu.run')
        sftp.put(localpath=workpath + '/%s' % jsonfile,
                 remotepath=remotepath + '/%s' % jsonfile)
        if GPARAMS.Train_setting.gpuqueuetype == 'LSF':
            stdin, stdout, stderr = ssh.exec_command("cd %s && bsub <gpu.run" %
                                                     remotepath)
        elif GPARAMS.Train_setting.gpuqueuetype == "PBS":
            stdin, stdout, stderr = ssh.exec_command("cd %s && qsub <gpu.run" %
                                                     remotepath)
        flag = True

        while flag:
            stdin, stdout, stderr = ssh.exec_command("cd %s&& ls" % remotepath)
            tmpstr = stdout.read().decode()
            flag = not ('finished' in tmpstr)
            if GPARAMS.Train_setting.gpuqueuetype == "PBS":
                stdin, stdout, stderr = ssh.exec_command(
                    "cd %s && grep 'CUDA_ERROR_OUT_OF_MEMORY' Cluster*.o*" %
                    remotepath)
                tmpstr = stdout.read().decode()
                normalflag = ('CUDA_ERROR_OUT_OF_MEMORY' in tmpstr)
                if normalflag == True:
                    stdin, stdout, stderr = ssh.exec_command(
                        "cd %s && mkdir old && mv Cluster*.o* finished old && bsub < gpu.run"
                        % remotepath)
                    flag = True

        stdin, stdout, stderr = ssh.exec_command(
            "cd %s && mv %s/%s/*.record networks/chargenet.record" %
            (remotepath, remotepath, GPARAMS.Compute_setting.Traininglevel))
        print(stdout.read().decode())
        stdin, stdout, stderr = ssh.exec_command(
            "cd %s/networks && tar zcvf chargenet.tar.gz * && mv chargenet.tar.gz .."
            % remotepath)
        print(stdout.read().decode())
        sftp.get(localpath=workpath+'/networks/chargenet.tar.gz',\
                remotepath=remotepath+'/chargenet.tar.gz')
        os.system(
            'cd ./networks && tar zxvf chargenet.tar.gz && mv *.record ../%s/Stage%d/ && rm chargenet.tar.gz'
            % (GPARAMS.Compute_setting.Traininglevel,
               GPARAMS.Train_setting.Trainstage))
        os.system('rm gpu_*.run')
    return
Exemple #7
0
def esoinner(MSetname=''):
    from ..Comparm import GPARAMS 
    if_continue=True
    if len(GPARAMS.Esoinn_setting.Model.nodes)!=0: 
        cluster_center_before=GPARAMS.Esoinn_setting.Model.cal_cluster_center()
    else:
        cluster_center_before=None 
    if MSetname:
        TotalMSet=MSet(MSetname)
    else:
        TotalMSet=MSet(GPARAMS.Compute_setting.Traininglevel)
    TotalMSet.Load()
    for i in TotalMSet.mols:
        try:
            i.EGCM
        except:
            i.Cal_EGCM()
    TotalMSet.Save()
    Dataset=np.array([i.EGCM for i in TotalMSet.mols])
    try: 
    #if True:
        if not (GPARAMS.Esoinn_setting.scalemax and GPARAMS.Esoinn_setting.scalemin):
            print("++++++++++++++++++++++++++++++++++++++++++++++++")
            print("initialize the Scalefactor!!!")
            print("++++++++++++++++++++++++++++++++++++++++++++++++")
            GPARAMS.Esoinn_setting.scalemax=np.max(Dataset,0)
            GPARAMS.Esoinn_setting.scalemin=np.min(Dataset,0)
            with open("Sfactor.in",'wb') as f:
                pickle.dump((GPARAMS.Esoinn_setting.scalemax,GPARAMS.Esoinn_setting.scalemin),f)
    except:
        pass

    Dataset=(Dataset-GPARAMS.Esoinn_setting.scalemin)/(GPARAMS.Esoinn_setting.scalemax-GPARAMS.Esoinn_setting.scalemin)
    Dataset[~np.isfinite(Dataset)]=0
    if len(GPARAMS.Esoinn_setting.Model.nodes)!=0:
        Noiseset,a,b,c,d=GPARAMS.Esoinn_setting.Model.predict(Dataset)
    else:
        Noiseset=Dataset 

    GPARAMS.Esoinn_setting.Model.fit(Noiseset,iteration_times=GPARAMS.Train_setting.Esoistep,if_reset=False)
    GPARAMS.Esoinn_setting.Model.Save()
    Noiseset,Noiseindex,nodelabel,cluster_label,signalmask=GPARAMS.Esoinn_setting.Model.predict(Dataset)
    signal_cluster=[[] for i in range(GPARAMS.Esoinn_setting.Model.class_id)]
    
    for i in range(len(Dataset)):
        signal_cluster[cluster_label[i][0]].append(Dataset[i])
    signal_num_list=[len(i) for i in signal_cluster]
    judgenum=math.ceil(sum(signal_num_list)*0.2)
    print ("signal_num_list:",signal_num_list,"judgenum",judgenum)

    removecluster=[i for i in range(len(signal_num_list)) if not(signal_num_list[i] > judgenum)]
    print ("removeclusteid:",removecluster)

    GPARAMS.Esoinn_setting.Model.cut_cluster(removecluster)
    GPARAMS.Esoinn_setting.Model.Save()
    print (GPARAMS.Esoinn_setting.Model.Name,GPARAMS.Esoinn_setting.Model.class_id)  
    print("Class id after Cut action:",GPARAMS.Esoinn_setting.Model.class_id)

    cluster_center_after=GPARAMS.Esoinn_setting.Model.cal_cluster_center()
    if cluster_center_before!=None:# and GPARAMS.Esoinn_setting.NNdict["NN"]!=None:
        print ("Update HDNN")
        updaterule=np.zeros(GPARAMS.Esoinn_setting.Model.class_id)
        for i in range(len(cluster_center_after)):
            vec1=cluster_center_after[i]
            dis=np.sum((np.array(cluster_center_before)-np.array([vec1]*len(cluster_center_before)))**2,1) 
            index=np.argmin(dis)
            print (i,index,"+++++++++++++++++++++++++++")
            updaterule[i]=index 
        """
Exemple #8
0
def evaler(MSetname):
    print('************************************************')
    print(GPARAMS.Train_setting.sigma)
    print('************************************************')
    os.environ["CUDA_VISIBLE_DEVICES"] = str(
        GPARAMS.Compute_setting.Gpulist[0])
    path = "./results/Stage%d/%s/" % (GPARAMS.Train_setting.Trainstage,
                                      MSetname)
    if not os.path.exists(path):
        os.system("mkdir -p %s" % path)
    rmse = []
    #TMPSet=MSet(GPARAMS.Compute_setting.Traininglevel)
    TMPSet = MSet(MSetname)
    TMPSet.Load()
    #TMPSet.mols=random.sample(TMPSet.mols,200)
    #TMPSet.mols=random.sample(TMPSet.mols,200)
    f1 = open(path + GPARAMS.Compute_setting.Traininglevel + '.result', 'w')
    f2 = open(path + GPARAMS.Compute_setting.Traininglevel + '_e.csv', 'w')
    f3 = open(path + GPARAMS.Compute_setting.Traininglevel + '_f.csv', 'w')
    f4 = open(path + GPARAMS.Compute_setting.Traininglevel + '_d.csv', 'w')
    f5 = open(path + GPARAMS.Compute_setting.Traininglevel + '_q.csv', 'w')
    for j in range(len(TMPSet.mols)):
        EGCM = (TMPSet.mols[j].Cal_EGCM() - GPARAMS.Esoinn_setting.scalemin
                ) / (GPARAMS.Esoinn_setting.scalemax -
                     GPARAMS.Esoinn_setting.scalemin)
        EGCM[~np.isfinite(EGCM)] = 0
        TMPSet.mols[
            j].belongto = GPARAMS.Esoinn_setting.Model.find_closest_cluster(
                3, EGCM)
        TMPSet.mols[j].properties['clabel'] = int(TMPSet.mols[j].totalcharge)
    NNpredict, ERRORmols, Avgerr, ERROR_str, method = Cal_NN_EFQ(TMPSet)
    print(NNpredict)
    for j in range(len(TMPSet.mols)):
        NNe = NNpredict[j][0] / 627.51
        NNf = NNpredict[j][1] / 627.51
        NNq = NNpredict[j][3]
        NNd = NNpredict[j][2]
        refe = TMPSet.mols[j].properties["atomization"]
        reff = TMPSet.mols[j].properties["force"]
        refd = TMPSet.mols[j].properties["dipole"]
        if GPARAMS.Esoinn_setting.Ifresp == True:
            try:
                print("RESP charge")
                refq = TMPSet.mols[j].properties["resp_charge"]
            except:
                print("Other charge")
                refq = TMPSet.mols[j].properties["charge"]
        elif GPARAMS.Esoinn_setting.Ifadch == True:
            try:
                print("RESP charge")
                refq = TMPSet.mols[j].properties["adch_charge"]
            except:
                print("Other charge")
                refq = TMPSet.mols[j].properties["charge"]

        rmsde = refe - NNe
        print("HHHHHHHHHJJJJJJJJJJJJJKKKKKKKKKKKK")
        print(NNf, reff)
        print("HHHHHHHHHJJJJJJJJJJJJJKKKKKKKKKKKK")
        df = np.reshape(np.square(reff - NNf), -1)
        maxdf = np.max(np.sqrt(df))
        rmsdf = np.sqrt(np.sum(df) / len(df))
        dd = refd - NNd
        rmsdd = np.sqrt(np.sum(np.square(refd - NNd)))
        maxdd = np.sort(dd)[0]
        f1.write(
            "%d %s Deviation E:%.3f F Max:%.3f Rmse %.3f D Max: %.3f Rmse %.3f\n"
            % (j, TMPSet.mols[j].name, rmsde * 627.51, maxdf * 627.51,
               rmsdf * 627.51, maxdd, rmsdd))
        f2.write("%.3f %.3f\n" % (refe * 627.51, NNe * 627.51))
        for k in range(len(TMPSet.mols[j].atoms)):
            for l in range(3):
                f3.write("%.3f %.3f\n" %
                         (reff[k][l] * 627.51, NNf[k][l] * 627.51))
            if GPARAMS.Esoinn_setting.Ifresp == True or GPARAMS.Esoinn_setting.Ifadch == True:
                f5.write("%.3f %.3f\n" % (refq[k], NNq[k]))
        for k in range(3):
            f4.write("%.3f %.3f\n" % (refd[k], NNd[k]))
        f1.flush()
        f2.flush()
        f3.flush()
        f4.flush()
        if GPARAMS.Esoinn_setting.Ifresp == True or GPARAMS.Esoinn_setting.Ifadch == True:
            f5.flush()

    fdata = np.loadtxt(
        "%s" % (path + GPARAMS.Compute_setting.Traininglevel + '_f.csv'))
    a = fdata[:, 0]
    b = fdata[:, 1]
    rmse.append(np.sqrt(np.sum((a - b)**2) / len(a)))
    edata = np.loadtxt(
        "%s" % (path + GPARAMS.Compute_setting.Traininglevel + '_e.csv'))
    a = edata[:, 0]
    b = edata[:, 1]
    rmse.append(np.sqrt(np.sum((a - b)**2) / len(a)))
    if GPARAMS.Esoinn_setting.Ifresp == True or GPARAMS.Esoinn_setting.Ifadch == True:
        qdata = np.loadtxt(
            "%s" % (path + GPARAMS.Compute_setting.Traininglevel + '_q.csv'))
        a = qdata[:, 0]
        b = qdata[:, 1]
        rmse.append(np.sqrt(np.sum((a - b)**2) / len(a)))

    file = open(path + 'rmse.result', 'w')
    file.write('F rmse: %f\n ' % rmse[0])
    file.write('E rmse: %f\n ' % rmse[1])
    if GPARAMS.Esoinn_setting.Ifresp == True or GPARAMS.Esoinn_setting.Ifadch == True:
        file.write('Q rmse: %f\n ' % rmse[2])
    file.close()
    return rmse
Exemple #9
0
jsonfile=args.input

if __name__=="__main__":
    manager=Manager()
    QMQueue=manager.Queue()
    DataQueue=manager.Queue()
    GPUQueue=manager.Queue()
    NetstrucQueue=manager.Queue()
    if os.path.exists('./networks/lastsave'):
        os.system("rm ./networks/lastsave/* -r")
        os.system("cp *.ESOINN Sfactor.in ./networks/lastsave ")
    UpdateGPARAMS(jsonfile)
    for i in GPARAMS.Compute_setting.Gpulist:
        GPUQueue.put(i)

    bigset=MSet('Bigset')
    GPARAMS.Dataset_setting.Inputdatasetlist=random.sample(GPARAMS.Dataset_setting.Inputdatasetlist,8)
    for name in GPARAMS.Dataset_setting.Inputdatasetlist:
        tmpset=MSet(name)
        tmpset.Load()
        bigset.mols+=tmpset.mols
    for i in range(GPARAMS.Compute_setting.Checkernum):
        checker_set=MSet('Bigset_%d'%i)
        checker_set.mols=[bigset.mols[j] for j in range(len(bigset.mols)) if j%(i+1)==0]
        checker_set.mols=[bigset.mols[0]]+random.sample(checker_set.mols,min(GPARAMS.Compute_setting.Checkerstep,len(checker_set.mols)))
        checker_set.Save()
    bigset=None 
    for stage in range(GPARAMS.Train_setting.Trainstage,\
                       GPARAMS.Train_setting.Stagenum+GPARAMS.Train_setting.Trainstage):
        LoadModel()
        #==Main MD process with productor and Consumer model==
jsonfile = args.input

if __name__ == "__main__":
    manager = Manager()
    QMQueue = manager.Queue()
    DataQueue = manager.Queue()
    GPUQueue = manager.Queue()
    NetstrucQueue = manager.Queue()
    if os.path.exists('./networks/lastsave'):
        os.system("rm ./networks/lastsave/* -r")
        os.system("cp *.ESOINN Sfactor.in ./networks/lastsave ")
    UpdateGPARAMS(jsonfile)
    for i in GPARAMS.Compute_setting.Gpulist:
        GPUQueue.put(i)

    bigset = MSet('Bigset')
    #    GPARAMS.Dataset_setting.Inputdatasetlist=random.sample(GPARAMS.Dataset_setting.Inputdataset
    #    for name in GPARAMS.Dataset_setting.Inputdatasetlist:
    #        tmpset=MSet(name)
    #        tmpset.Load()
    #        bigset.mols+=tmpset.mols
    #    for i in range(GPARAMS.Compute_setting.Checkernum):
    #        checker_set=MSet('Bigset_%d'%i)
    #        checker_set.mols=[bigset.mols[j] for j in range(len(bigset.mols)) if j%(i+1)==0]
    #        checker_set.mols=[bigset.mols[0]]+random.sample(checker_set.mols,min(GPARAMS.Compute_setting.Checkerstep,len(checker_set.mols)))
    #        checker_set.Save()
    #    bigset=None
    for stage in range(GPARAMS.Train_setting.Trainstage,\
                       GPARAMS.Train_setting.Stagenum+GPARAMS.Train_setting.Trainstage):
        LoadModel()
        #==Main MD process with productor and Consumer model==
Exemple #11
0
def parallel_caljob(MSetname,manager,ctrlfile):
    para_path='./'
    if GPARAMS.Compute_setting.Traininglevel=="DFTB+":    
        os.environ["OMP_NUM_THREADS"]=GPARAMS.Compute_setting.Ncoresperthreads
        para_path=GPARAMS.Software_setting.Dftbparapath
    input_path='./'+GPARAMS.Compute_setting.Traininglevel+'/Consumer/'
    if not os.path.exists(input_path):
        os.system("mkdir -p "+input_path)
    TMPSet=MSet(MSetname)
    TMPSet.Load()
    mols=TMPSet.mols
    print ('Nmols in Newaddedset:',len(mols))
    if GPARAMS.Train_setting.Ifcpuwithhelp==True:
        dftpercpu=math.ceil(len(mols)/GPARAMS.Train_setting.helpcpunum)
        if dftpercpu<GPARAMS.Train_setting.framenumperjob:
            dftpercpu=GPARAMS.Train_setting.framenumperjob 
        nstage=math.ceil(len(mols)/dftpercpu)
        print (nstage)
        submollist=[mols[i*GPARAMS.Train_setting.framenumperjob:(i+1)*GPARAMS.Train_setting.framenumperjob] for i in range(nstage)]
        subMSetlist=[MSet(MSetname+'_part%d'%i) for i in range(nstage)]
        subMSetresult=[False for i in range(nstage)]
        for i in range(nstage):
            subMSetlist[i].mols=submollist[i]
            subMSetlist[i].Save()
        connectflag=True
        connect_num=0
        while connectflag:
            try:
                trans=pko.Transport((GPARAMS.Train_setting.helpcpunodeip,GPARAMS.Train_setting.helpcpuport))
                #trans.banner_timeout=300
                trans.connect(username=GPARAMS.Train_setting.helpcpuaccount,password=GPARAMS.Train_setting.helpcpupasswd)
                connectflag=False
            except Exception as e:
                print (e)
                connect_num+=1
                print (f"{connect_num} th reconnect to {((GPARAMS.Train_setting.helpcpunodeip,GPARAMS.Train_setting.helpcpuport))} for {MSetname}")
                time.sleep(5)
        ssh=pko.SSHClient()
        #ssh.connect(username=GPARAMS.Train_setting.helpcpuaccount,password=GPARAMS.Train_setting.helpcpupasswd,banner_timeout=300,timeout=15)
        ssh._transport=trans
        sftp=pko.SFTPClient.from_transport(trans)
        workpath=os.getcwd()
        print (workpath)
        jobidlist=[]
        for i in range(nstage):
            subMSetlist[i].mols=submollist[i]
            subMSetlist[i].Save()
            remotepath=GPARAMS.Train_setting.helpcpupath+'/'+MSetname+'/part%d'%i
            srcpath=workpath+'/datasets/%s.pdb'%(MSetname+'_part%d'%i)
            print (" Put pdb file:")
            print (remotepath,srcpath)
            stdin,stdout,stderr=ssh.exec_command('mkdir -p %s/datasets'%remotepath)
            print (stdout.read().decode())
            sftp.put(srcpath,remotepath+'/datasets/%s.pdb'%(MSetname+'_part%d'%i))
            cpurun=open('cpu.run','w')
            if GPARAMS.Train_setting.cpuqueuetype=='PBS':
                cpurun.write(pbscpustr%(GPARAMS.Compute_setting.Ncoresperthreads,GPARAMS.Compute_setting.Traininglevel+"_%d"%i))
            elif GPARAMS.Train_setting.cpuqueuetype=='LSF':
                cpurun.write(lsfcpustr%(GPARAMS.Compute_setting.Ncoresperthreads,GPARAMS.Compute_setting.Traininglevel+"_%d"%i))
            cpurun.write(GPARAMS.Train_setting.helpcpuenv)
            cpurun.write("rm queue\n")
            cpurun.write('touch started\n')
            cpurun.write("python -u $ESOIHOME/bin/Qmcal.py -i %s -d %s> %s.qmout\n"%(ctrlfile,MSetname+'_part%d'%i,MSetname+'_part%d'%i))
            cpurun.write("rm *.chk started\n")
            cpurun.write("touch finished\n")
            cpurun.close()
            sftp.put(localpath=workpath+'/cpu.run',remotepath=remotepath+'/cpu.run')
            sftp.put(localpath=workpath+'/'+ctrlfile,remotepath=remotepath+'/'+ctrlfile)
            if GPARAMS.Train_setting.cpuqueuetype=='PBS':
                stdin,stdout,stderr=ssh.exec_command('cd %s &&touch queue &&qsub cpu.run'%remotepath)
                jobidlist.append(stdout.read().decode().strip())
                print (jobidlist[-1])
#                stdin,stdout,stderr=ssh.exec_command('cd %s &&ls &&qsub cpu.run'%remotepath)
                #print (stdout.read().decode(),stdout.channel.recv_exit_status(),stderr,stdin,remotepath)
                #print (stdout.read().decode(),stderr,stdin)
            elif GPARAMS.Train_setting.cpuqueuetype=='LSF':
                stdin,stdout,stderr=ssh.exec_command('cd %s && bsub <cpu.run'%remotepath)
                print (stdout.read().decode,stderr,stdin)
        t=0
        while False in subMSetresult:
            time.sleep(30)
            t+=30
            for i in range(nstage):
                remotepath=GPARAMS.Train_setting.helpcpupath+'/'+MSetname+'/part%d'%i
                stdin,stdout,stderr=ssh.exec_command("cd %s && ls "%(remotepath))
                tmpstr=stdout.read().decode()
                if 'finished' in tmpstr:
                    state='finished'
                elif 'started' in tmpstr:
                    state='cal'
                elif 'queue' in tmpstr:
                    state='queue'
                if 'finished' in tmpstr:
                    subMSetresult[i]=True
                stdin,stdout,stderr=ssh.exec_command('qstat')
                tmpstr=stdout.read().decode()
                if jobidlist[i] not in tmpstr and state=='queue':
                    stdin,stdout,stderr=ssh.exec_command('cd %s && qsub cpu.run'%remotepath)
                    newid=stdout.read().decode().strip()
                    jobidlist[i]=newid 
            print (t,subMSetresult)
        finishmols=[]
        subMSetlist=[MSet(MSetname+'_part%d'%i) for i in range(nstage)]
        for i in range(nstage):
            srcpath=workpath+'/datasets/%s.pdb'%(MSetname+'_part%d'%i)
            remotepath=GPARAMS.Train_setting.helpcpupath+'/'+MSetname+'/part%d'%i
            os.system('rm %s'%srcpath)
            sftp.get(localpath=srcpath,remotepath=remotepath+'/datasets/'+MSetname+'_part%d.pdb'%i)
            subMSetlist[i].Load()
            finishmols+=subMSetlist[i].mols
        for i in range(len(finishmols)):
            finishmols[i].Cal_EGCM()
        TMPSet.mols=finishmols
        TMPSet.Save()
    else:
        inpathlist=[input_path]*len(mols)
        parapathlist=[para_path]*len(mols)
        corenumperjob=[math.ceil(GPARAMS.Compute_setting.Ncoresperthreads/GPARAMS.Compute_setting.Consumerprocessnum)]*len(mols)
        keywordslist=[GPARAMS.Compute_setting.Gaussiankeywords]*len(mols)
        Atomizationlist=[GPARAMS.Compute_setting.Atomizationlevel]*len(mols)
        inputlist=list(zip(mols,inpathlist,parapathlist,keywordslist,corenumperjob,Atomizationlist))
        paracal_pool=manager.Pool(GPARAMS.Compute_setting.Consumerprocessnum)
        results=paracal_pool.map(calculator,inputlist)
        paracal_pool.close()
        paracal_pool.join()
        mollist=[]
        for i in range(len(results)):
            if results[i][0]==True:
                mollist.append(results[i][1])
                mollist[-1].Cal_EGCM()
        TMPSet.mols=mollist
        TMPSet.Save()
        print ("HHHHHHHHHHHHHHHHHHHH")
        print ("HHHHHHHHHHHHHHHHHHHH")
        print (len(TMPSet.mols))
        print ("HHHHHHHHHHHHHHHHHHHH")
        print ("HHHHHHHHHHHHHHHHHHHH")
    return 
Exemple #12
0
def consumer(Queue):
    import time
    from ..Base import Molnew
    import os
    from TensorMol import MSet 
    print ("Consumer start")
    Newaddedset=MSet('Stage_%d_Newadded'%GPARAMS.Train_setting.Trainstage)
    num=0
    Error_list=[]
    while True:
        ERROR_mols=Queue.get()
        if ERROR_mols==None:
            break
        for i in range(len(ERROR_mols)):
            ERROR_mols[i][0].name="Stage_%d_Mol_%d_%d"%(GPARAMS.Train_setting.Trainstage,num,i)
            Error_list.append(ERROR_mols[i][1])
            Newaddedset.mols.append(ERROR_mols[i][0])
        num+=1
        if num%2000==0:
            Newaddedset.Save() 
    Error_list=-np.array(Error_list)
    Newaddedset.mols=[Newaddedset.mols[i] for i in np.argsort(Error_list)]

    Dataset=[]
    Newaddedset.mols=Check_MSet(Newaddedset.mols)
    sysnum=(len(GPARAMS.System_setting)+GPARAMS.Compute_setting.Checkernum)
    if len(GPARAMS.Esoinn_setting.Model.nodes)!=0 and GPARAMS.Esoinn_setting.Model.class_id > GPARAMS.Train_setting.Modelnumperpoint:
        for i in Newaddedset.mols:
            try:
                Dataset.append(i.EGCM)
            except:
                Dataset.append(i.Cal_EGCM())
        a,b,c,d,signalmask=GPARAMS.Esoinn_setting.Model.predict(Dataset)
        normalmollist=[];edgemollist=[];noisemollist=[]   
        for i in range(len(Newaddedset.mols)):
            if signalmask[i]=='Noise':
                noisemollist.append(Newaddedset.mols[i])
            if signalmask[i]=='Edge':
                edgemollist.append(Newaddedset.mols[i])
            if signalmask[i]=='Normal':
                normalmollist.append(Newaddedset.mols[i])
        print ("Select Newadded set:",len(noisemollist),len(edgemollist),len(normalmollist))
        if len(Newaddedset.mols)>GPARAMS.Compute_setting.samplebasenum*sysnum:
            normalnumpersys=math.ceil(GPARAMS.Compute_setting.samplebasenum*0.3)
            edgenumpersys=math.ceil(GPARAMS.Compute_setting.samplebasenum*0.3)
            noisenumpersys=math.ceil(GPARAMS.Compute_setting.samplebasenum*0.3)
            edgemollist=edgemollist[:edgenumpersys*sysnum]
            normalmollist=normalmollist[:normalnumpersys*sysnum]
            noisemolnum=GPARAMS.Compute_setting.samplebasenum*sysnum-len(normalmollist)-len(edgemollist)
            noisemollist=random.sample(noisemollist[:noisemolnum*5],noisemolnum)
            Newaddedset.mols=edgemollist+noisemollist_tmp+normalmollist  
        print ("After selecting Newadded set:",len(noisemollist),len(edgemollist),len(normalmollist))
    else:
        print ("================================")
        print ("samplebasnum&sysnum",GPARAMS.Compute_setting.samplebasenum,sysnum)
        print ("================================")
        if len(Newaddedset.mols)>GPARAMS.Compute_setting.samplebasenum*sysnum:
            Newaddedset.mols=random.sample(Newaddedset.mols,GPARAMS.Compute_setting.samplebasenum*sysnum)
            Newaddedset.mols=Newaddedset.mols[:GPARAMS.Compute_setting.samplebasenum*sysnum]
    Newaddedset.Save()
    return