Ejemplo n.º 1
0
def dataer(Dataqueue):
    from TensorMol import MSet
    Trainingset = MSet(GPARAMS.Compute_setting.Traininglevel)
    Trainingset.Load()
    Trainingset.mols = Check_MSet(Trainingset.mols, level=1)
    Trainingset.Save()
    if GPARAMS.Esoinn_setting.Ifresp:
        respset = MSet('HF_resp')
        respset.Load()
        respset.mols = Check_MSet(respset.mols, level=1)
        respset.Save()
    print("Trainingset.mols :", len(Trainingset.mols))
    ClusNum = max(GPARAMS.Esoinn_setting.Model.class_id,
                  GPARAMS.Train_setting.Modelnumperpoint)
    print("++++++++++++++++++Dataer++++++++++++++++++++++")
    print("ClusNum:", ClusNum)
    SubTrainList = []
    for i in range(ClusNum):
        SubTrainSet = MSet(GPARAMS.Compute_setting.Traininglevel +
                           '_Cluster%d' % i)
        SubTrainList.append(SubTrainSet)
    print('start make cluster for training set')
    for i in range(len(Trainingset.mols)):
        try:
            EGCM=(Trainingset.mols[i].EGCM-GPARAMS.Esoinn_setting.scalemin)/\
                    (GPARAMS.Esoinn_setting.scalemax-GPARAMS.Esoinn_setting.scalemin)
        except:
            EGCM=(Trainingset.mols[i].Cal_EGCM()-GPARAMS.Esoinn_setting.scalemin)/\
                    (GPARAMS.Esoinn_setting.scalemax-GPARAMS.Esoinn_setting.scalemin)
        EGCM[~np.isfinite(EGCM)] = 0
        if GPARAMS.Esoinn_setting.Model.class_id >= GPARAMS.Train_setting.Modelnumperpoint:
            list = GPARAMS.Esoinn_setting.Model.find_closest_cluster(
                min(GPARAMS.Train_setting.Modelnumperpoint,
                    GPARAMS.Esoinn_setting.Model.class_id), EGCM)
        else:
            list = [i for i in range(GPARAMS.Train_setting.Modelnumperpoint)]
        for j in list:
            SubTrainList[j].mols.append(Trainingset.mols[i])
    for i in range(ClusNum):
        print("Cluster %d has %d mols" % (i, len(SubTrainList[i].mols)))
    for i in range(ClusNum):
        othermollist = []
        for j in range(ClusNum):
            if j != i and len(SubTrainList[j].mols) > 2:
                othermollist += SubTrainList[j].mols
        print("Other mol list for Cluster %d" % i, len(othermollist))
        if len(othermollist) > 0:
            samplenum=min(\
                          math.ceil((len(Trainingset.mols)-len(SubTrainList[i].mols))*GPARAMS.Esoinn_setting.Mixrate),\
                          len(othermollist)\
                         )
            print(len(othermollist), samplenum)
            SubTrainList[i].mols += random.sample(othermollist, samplenum)
        SubTrainList[i].Save()
    for i in range(ClusNum):
        Dataqueue.put((SubTrainList[i], i, GPARAMS.Train_setting.Maxsteps))
        print('%dth cluster is put in queue, mol num: %d!' %
              (i, len(SubTrainList[i].mols)))
Ejemplo n.º 2
0
def consumer(Queue):
    import time
    from ..Base import Molnew
    import os
    from TensorMol import MSet
    print("Consumer start")
    Newaddedset = MSet('Stage_%d_Newadded' % GPARAMS.Train_setting.Trainstage)
    num = 0
    while True:
        ERROR_mols = Queue.get()
        if ERROR_mols == None:
            break
        for i in range(len(ERROR_mols)):
            ERROR_mols[i][0].name = "Stage_%d_Mol_%d" % (
                GPARAMS.Train_setting.Trainstage, num)
            Newaddedset.mols.append(ERROR_mols[i][0])
        num += 1
        if num % 2000 == 0:
            Newaddedset.Save()
    Dataset = []
    Newaddedset.mols = Check_MSet(Newaddedset.mols)
    if len(
            GPARAMS.Esoinn_setting.Model.nodes
    ) != 0 and GPARAMS.Esoinn_setting.Model.class_id > GPARAMS.Train_setting.Modelnumperpoint:
        for i in Newaddedset.mols:
            try:
                Dataset.append(i.EGCM)
            except:
                Dataset.append(i.Cal_EGCM())
        a, b, c, d, signalmask = GPARAMS.Esoinn_setting.Model.predict(Dataset)
        normalmollist = []
        edgemollist = []
        noisemollist = []
        for i in range(len(Newaddedset.mols)):
            if signalmask[i] == 'Noise':
                noisemollist.append(Newaddedset.mols[i])
            if signalmask[i] == 'Edge':
                edgemollist.append(Newaddedset.mols[i])
            if signalmask[i] == 'Normal':
                normalmollist.append(Newaddedset.mols[i])
        if len(Newaddedset.mols) > 1000:
            edgemollist = random.sample(edgemollist,
                                        min(600, len(edgemollist)))
            noisemollist = random.sample(noisemollist,
                                         min(200, len(noisemollist)))
            normalmollist = random.sample(normalmollist,
                                          min(20, len(normalmollist)))
            Newaddedset.mols = edgemollist + noisemollist + normalmollist
    else:
        if len(Newaddedset.mols) > 1000:
            Newaddedset.mols = random.sample(Newaddedset.mols, 1000)
    Newaddedset.Save()
    return
Ejemplo n.º 3
0
def parallel_caljob(MSetname, manager, ctrlfile):
    para_path = './'
    if GPARAMS.Compute_setting.Traininglevel == "DFTB+":
        os.environ[
            "OMP_NUM_THREADS"] = GPARAMS.Compute_setting.Ncoresperthreads
        para_path = GPARAMS.Software_setting.Dftbparapath
    input_path = './' + GPARAMS.Compute_setting.Traininglevel + '/Consumer/'
    if not os.path.exists(input_path):
        os.system("mkdir -p " + input_path)
    TMPSet = MSet(MSetname)
    TMPSet.Load()
    mols = TMPSet.mols
    print('Nmols in Newaddedset:', len(mols))
    if GPARAMS.Train_setting.Ifwithhelp == True:
        nstage = math.ceil(len(mols) / GPARAMS.Train_setting.framenumperjob)
        print(nstage)
        submollist = [
            mols[i * GPARAMS.Train_setting.framenumperjob:(i + 1) *
                 GPARAMS.Train_setting.framenumperjob] for i in range(nstage)
        ]
        subMSetlist = [MSet(MSetname + '_part%d' % i) for i in range(nstage)]
        subMSetresult = [False for i in range(nstage)]
        for i in range(nstage):
            subMSetlist[i].mols = submollist[i]
            subMSetlist[i].Save()
        trans = pko.Transport((GPARAMS.Train_setting.helpcpunodeip,
                               GPARAMS.Train_setting.helpcpuport))
        trans.connect(username=GPARAMS.Train_setting.helpcpuaccount,
                      password=GPARAMS.Train_setting.helpcpupasswd)
        ssh = pko.SSHClient()
        ssh._transport = trans
        sftp = pko.SFTPClient.from_transport(trans)
        workpath = os.getcwd()
        print(workpath)
        for i in range(nstage):
            subMSetlist[i].mols = submollist[i]
            subMSetlist[i].Save()
            remotepath = GPARAMS.Train_setting.helpcpupath + '/' + MSetname + '/part%d' % i
            srcpath = workpath + '/datasets/%s.pdb' % (MSetname +
                                                       '_part%d' % i)
            print(" Put pdb file:")
            print(remotepath, srcpath)
            stdin, stdout, stderr = ssh.exec_command('mkdir -p %s/datasets' %
                                                     remotepath)
            print(stdout.read().decode)
            sftp.put(
                srcpath,
                remotepath + '/datasets/%s.pdb' % (MSetname + '_part%d' % i))
            if GPARAMS.Train_setting.queuetype == 'PBS':
                pbsrun = open('pbs.run', 'w')
                pbsrun.write(
                    pbsstr %
                    (GPARAMS.Compute_setting.Ncoresperthreads,
                     GPARAMS.Compute_setting.Traininglevel + "_%d" % i))
                pbsrun.write(GPARAMS.Train_setting.helpcpuenv)
                pbsrun.write("python -u Qmcal.py -i %s -d %s> %s.qmout\n" %
                             (ctrlfile, MSetname + '_part%d' % i,
                              MSetname + '_part%d' % i))
                pbsrun.write("rm *.chk\n")
                pbsrun.write("touch finished\n")
                pbsrun.close()
                sftp.put(localpath=workpath + '/pbs.run',
                         remotepath=remotepath + '/pbs.run')
                sftp.put(localpath=workpath + '/' + ctrlfile,
                         remotepath=remotepath + '/' + ctrlfile)
                #ssh.exec_command('cd %s && qsub pbs.run')
        t = 0
        while False in subMSetresult:
            time.sleep(300)
            t += 300
            for i in range(nstage):
                remotepath = GPARAMS.Train_setting.helpcpupath + '/' + MSetname + '/part%d' % i
                stdin, stdout, stderr = ssh.exec_command(
                    "cd %s && ls fininshed" % (remotepath))
                if 'finished' in stdout.read().decode():
                    subMSetresult[i] = True
            print(t, subMSetresult)
        finishmols = []
        subMSetlist = [MSet(MSetname + '_part%d' % i) for i in range(nstage)]
        for i in range(nstage):
            srcpath = workpath + '/datasets/%s.pdb' % (MSetname +
                                                       '_part%d' % i)
            remotepath = GPARAMS.Train_setting.helpcpupath + '/' + MSetname + '/part%d' % i
            os.system('rm %s' % srcpath)
            sftp.get(localpath=srcpath,
                     remotepath=remotepath + '/' + MSetname +
                     '_part%d.pdb' % i)
            subMSetlist[i].Load()
            finishmols += subMSetlist[i].mols
        for i in range(len(finishmols)):
            finishmols[i].Cal_EGCM()
        TMPSet.mols = finishmols
        TMPSet.Save()
    else:
        inpathlist = [input_path] * len(mols)
        parapathlist = [para_path] * len(mols)
        corenumperjob = [GPARAMS.Compute_setting.Ncoresperthreads] * len(mols)
        keywordslist = [GPARAMS.Compute_setting.Gaussiankeywords] * len(mols)
        Atomizationlist = [GPARAMS.Compute_setting.Atomizationlevel
                           ] * len(mols)
        inputlist = list(
            zip(mols, inpathlist, parapathlist, keywordslist, corenumperjob,
                Atomizationlist))
        paracal_pool = manager.Pool(GPARAMS.Compute_setting.Consumerprocessnum)
        results = paracal_pool.map(calculator, inputlist)
        paracal_pool.close()
        paracal_pool.join()
        mollist = []
        for i in range(len(results)):
            if results[i][0] == True:
                mollist.append(results[i][1])
                mollist[-1].Cal_EGCM()
        TMPSet.mols = mollist
        TMPSet.Save()
    return
Ejemplo n.º 4
0
def chargenet_train(MSetname, GPUQueue, jsonfile):
    print("RESP coming")
    if len(GPARAMS.Neuralnetwork_setting.NNstrucselect) != 0:
        candidate_struc = get_best_struc(2)
        print("Candidate_NNSTRUC:", candidate_struc)
        basestruc = [math.ceil(i) for i in np.mean(candidate_struc, axis=0)]
    else:
        basestruc = GPARAMS.Neuralnetwork_setting.Initstruc
    deltastruc = [math.ceil(i * 0.10) for i in basestruc]
    print("Delta struc:", deltastruc)
    changevector = [random.randint(-5, 5) for i in range(3)]
    evostruc = [
        basestruc[i] + deltastruc[i] * changevector[i] for i in range(3)
    ]
    print("evo struc:", evostruc)
    if GPARAMS.Train_setting.Ifgpuwithhelp == False:
        GPUID = GPUQueue.get()
        os.environ["CUDA_VISIBLE_DEVICES"] = str(GPUID)
        if GPARAMS.Esoinn_setting.Ifresp == True:
            Chargeset = MSet("HF_resp")
            Chargeset.Load()
        else:
            Chargeset = MSet(GPARAMS.Compute_setting.Traininglevel)
            Chargeset.Load()
        GPARAMS.Neuralnetwork_setting.Switchrate = 0.9
        if len(Chargeset.mols) < GPARAMS.Neuralnetwork_setting.Batchsize * 20:
            num = math.ceil(GPARAMS.Neuralnetwork_setting.Batchsize * 20 /
                            len(TMPset.mols))
            Chargeset.mols = Chargeset.mols * num
        TreatedAtoms = Chargeset.AtomTypes()
        d = MolDigester(TreatedAtoms,
                        name_="ANI1_Sym_Direct",
                        OType_="EnergyAndDipole")
        tset = TData_BP_Direct_EE_WithCharge(Chargeset,
                                             d,
                                             order_=1,
                                             num_indis_=1,
                                             type_="mol",
                                             WithGrad_=True,
                                             MaxNAtoms=100)
        NN_name = None
        ifcontinue = False
        SUBNet = BP_HDNN_charge(tset, NN_name, Structure=evostruc)
        SUBNet.train(SUBNet.max_steps, continue_training=ifcontinue)
        GPUQueue.put(GPUID)
    else:
        connectflag = True
        connect_num = 0
        while connectflag:
            try:
                trans = pko.Transport((GPARAMS.Train_setting.helpgpunodeip,
                                       GPARAMS.Train_setting.helpgpuport))
                trans.connect(username=GPARAMS.Train_setting.helpgpuaccount,
                              password=GPARAMS.Train_setting.helpgpupasswd)
                connectflag = False
            except:
                connect_num += 1
                print(
                    f"{connect_num} th reconnect to {((GPARAMS.Train_setting.helpgpunodeip,GPARAMS.Train_setting.helpgpuport))} for {MSetname}"
                )
                time.sleep(5)

        ssh = pko.SSHClient()
        #ssh.connect(hostname=GPARAMS.Train_setting.helpgpunodeip,port=GPARAMS.Train_setting.helpgpuport,username=GPARAMS.Train_setting.helpgpuaccount,password=GPARAMS.Train_setting.helpgpupasswd,banner_timeout=300,timeout=15)
        ssh._transport = trans
        sftp = pko.SFTPClient.from_transport(trans)
        workpath = os.getcwd()
        print(workpath)
        if GPARAMS.Esoinn_setting.Ifresp == True:
            remotepath = GPARAMS.Train_setting.helpgpupath + '/Stage%d/resp' % (
                GPARAMS.Train_setting.Trainstage)
        elif GPARAMS.Esoinn_setting.Ifadch == True:
            remotepath = GPARAMS.Train_setting.helpgpupath + '/Stage%d/adch' % (
                GPARAMS.Train_setting.Trainstage)
        srcpath = workpath + '/datasets/%s.pdb' % (MSetname)
        print(remotepath, srcpath)
        stdin, stdout, stderr = ssh.exec_command('rm %s' % (remotepath))
        stdin, stdout, stderr = ssh.exec_command('mkdir -p %s' %
                                                 (remotepath + '/datasets'))
        print(stdout.read().decode())
        sftp.put(srcpath, remotepath + '/datasets/%s.pdb' % (MSetname))
        shellrun = open('gpu_resp.run', 'w')
        if GPARAMS.Train_setting.gpuqueuetype == 'LSF':
            if GPARAMS.Esoinn_setting.Ifresp == True:
                shellrun.write(lsfgpustr %
                               (GPARAMS.Train_setting.gpuqueuename, 'Resp'))
                print(lsfgpustr % (GPARAMS.Train_setting.gpuqueuename, 'Resp'),
                      MSetname)
            elif GPARAMS.Esoinn_setting.Ifadch == True:
                shellrun.write(lsfgpustr %
                               (GPARAMS.Train_setting.gpuqueuename, 'Adch'))
                print(lsfgpustr % (GPARAMS.Train_setting.gpuqueuename, 'Adch'),
                      MSetname)

            shellrun.write(GPARAMS.Train_setting.helpgpuenv)
        elif GPARAMS.Train_setting.gpuqueuetype == "PBS":

            if GPARAMS.Esoinn_setting.Ifresp == True:
                shellrun = open('gpu_resp.run', 'w')
                shellrun.write(pbsgpustr %
                               (4, GPARAMS.Train_setting.gpuqueuename, 'Resp'))
                print(
                    pbsgpustr %
                    (4, GPARAMS.Train_setting.gpuqueuename, 'Resp'), MSetname)
            if GPARAMS.Esoinn_setting.Ifadch == True:
                shellrun = open('gpu_adch.run', 'w')
                shellrun.write(pbsgpustr %
                               (4, GPARAMS.Train_setting.gpuqueuename, 'Adch'))
                print(
                    pbsgpustr %
                    (4, GPARAMS.Train_setting.gpuqueuename, 'Adch'), MSetname)

            shellrun.write(GPARAMS.Train_setting.helpgpuenv)
        strucstr = "_".join([str(i) for i in evostruc])
        shellrun.write(
            'python -u $ESOIHOME/bin/TrainNN.py -i %s -d %s -s %s -t bpcharge \n'
            % (jsonfile, MSetname, strucstr))
        shellrun.write('touch finished\n')
        shellrun.close()

        sftp.put(localpath=workpath + '/gpu_resp.run',
                 remotepath=remotepath + '/gpu.run')
        sftp.put(localpath=workpath + '/%s' % jsonfile,
                 remotepath=remotepath + '/%s' % jsonfile)
        if GPARAMS.Train_setting.gpuqueuetype == 'LSF':
            stdin, stdout, stderr = ssh.exec_command("cd %s && bsub <gpu.run" %
                                                     remotepath)
        elif GPARAMS.Train_setting.gpuqueuetype == "PBS":
            stdin, stdout, stderr = ssh.exec_command("cd %s && qsub <gpu.run" %
                                                     remotepath)
        flag = True

        while flag:
            stdin, stdout, stderr = ssh.exec_command("cd %s&& ls" % remotepath)
            tmpstr = stdout.read().decode()
            flag = not ('finished' in tmpstr)
            if GPARAMS.Train_setting.gpuqueuetype == "PBS":
                stdin, stdout, stderr = ssh.exec_command(
                    "cd %s && grep 'CUDA_ERROR_OUT_OF_MEMORY' Cluster*.o*" %
                    remotepath)
                tmpstr = stdout.read().decode()
                normalflag = ('CUDA_ERROR_OUT_OF_MEMORY' in tmpstr)
                if normalflag == True:
                    stdin, stdout, stderr = ssh.exec_command(
                        "cd %s && mkdir old && mv Cluster*.o* finished old && bsub < gpu.run"
                        % remotepath)
                    flag = True

        stdin, stdout, stderr = ssh.exec_command(
            "cd %s && mv %s/%s/*.record networks/chargenet.record" %
            (remotepath, remotepath, GPARAMS.Compute_setting.Traininglevel))
        print(stdout.read().decode())
        stdin, stdout, stderr = ssh.exec_command(
            "cd %s/networks && tar zcvf chargenet.tar.gz * && mv chargenet.tar.gz .."
            % remotepath)
        print(stdout.read().decode())
        sftp.get(localpath=workpath+'/networks/chargenet.tar.gz',\
                remotepath=remotepath+'/chargenet.tar.gz')
        os.system(
            'cd ./networks && tar zxvf chargenet.tar.gz && mv *.record ../%s/Stage%d/ && rm chargenet.tar.gz'
            % (GPARAMS.Compute_setting.Traininglevel,
               GPARAMS.Train_setting.Trainstage))
        os.system('rm gpu_*.run')
    return
Ejemplo n.º 5
0
    if os.path.exists('./networks/lastsave'):
        os.system("rm ./networks/lastsave/* -r")
        os.system("cp *.ESOINN Sfactor.in ./networks/lastsave ")
    UpdateGPARAMS(jsonfile)
    for i in GPARAMS.Compute_setting.Gpulist:
        GPUQueue.put(i)

    bigset=MSet('Bigset')
    GPARAMS.Dataset_setting.Inputdatasetlist=random.sample(GPARAMS.Dataset_setting.Inputdatasetlist,8)
    for name in GPARAMS.Dataset_setting.Inputdatasetlist:
        tmpset=MSet(name)
        tmpset.Load()
        bigset.mols+=tmpset.mols
    for i in range(GPARAMS.Compute_setting.Checkernum):
        checker_set=MSet('Bigset_%d'%i)
        checker_set.mols=[bigset.mols[j] for j in range(len(bigset.mols)) if j%(i+1)==0]
        checker_set.mols=[bigset.mols[0]]+random.sample(checker_set.mols,min(GPARAMS.Compute_setting.Checkerstep,len(checker_set.mols)))
        checker_set.Save()
    bigset=None 
    for stage in range(GPARAMS.Train_setting.Trainstage,\
                       GPARAMS.Train_setting.Stagenum+GPARAMS.Train_setting.Trainstage):
        LoadModel()
        #==Main MD process with productor and Consumer model==
        ProductPool=Pool(len(GPARAMS.Compute_setting.Gpulist))
        Resultlist=[]
        for i in range(len(GPARAMS.System_setting)):
            result=ProductPool.apply_async(productor,(i,QMQueue,GPUQueue))
            Resultlist.append(result)
        for i in range(GPARAMS.Compute_setting.Checkernum):
            result=ProductPool.apply_async(checker,(i,QMQueue,GPUQueue))
            Resultlist.append(result)
Ejemplo n.º 6
0
def parallel_caljob(MSetname,manager,ctrlfile):
    para_path='./'
    if GPARAMS.Compute_setting.Traininglevel=="DFTB+":    
        os.environ["OMP_NUM_THREADS"]=GPARAMS.Compute_setting.Ncoresperthreads
        para_path=GPARAMS.Software_setting.Dftbparapath
    input_path='./'+GPARAMS.Compute_setting.Traininglevel+'/Consumer/'
    if not os.path.exists(input_path):
        os.system("mkdir -p "+input_path)
    TMPSet=MSet(MSetname)
    TMPSet.Load()
    mols=TMPSet.mols
    print ('Nmols in Newaddedset:',len(mols))
    if GPARAMS.Train_setting.Ifcpuwithhelp==True:
        dftpercpu=math.ceil(len(mols)/GPARAMS.Train_setting.helpcpunum)
        if dftpercpu<GPARAMS.Train_setting.framenumperjob:
            dftpercpu=GPARAMS.Train_setting.framenumperjob 
        nstage=math.ceil(len(mols)/dftpercpu)
        print (nstage)
        submollist=[mols[i*GPARAMS.Train_setting.framenumperjob:(i+1)*GPARAMS.Train_setting.framenumperjob] for i in range(nstage)]
        subMSetlist=[MSet(MSetname+'_part%d'%i) for i in range(nstage)]
        subMSetresult=[False for i in range(nstage)]
        for i in range(nstage):
            subMSetlist[i].mols=submollist[i]
            subMSetlist[i].Save()
        connectflag=True
        connect_num=0
        while connectflag:
            try:
                trans=pko.Transport((GPARAMS.Train_setting.helpcpunodeip,GPARAMS.Train_setting.helpcpuport))
                #trans.banner_timeout=300
                trans.connect(username=GPARAMS.Train_setting.helpcpuaccount,password=GPARAMS.Train_setting.helpcpupasswd)
                connectflag=False
            except Exception as e:
                print (e)
                connect_num+=1
                print (f"{connect_num} th reconnect to {((GPARAMS.Train_setting.helpcpunodeip,GPARAMS.Train_setting.helpcpuport))} for {MSetname}")
                time.sleep(5)
        ssh=pko.SSHClient()
        #ssh.connect(username=GPARAMS.Train_setting.helpcpuaccount,password=GPARAMS.Train_setting.helpcpupasswd,banner_timeout=300,timeout=15)
        ssh._transport=trans
        sftp=pko.SFTPClient.from_transport(trans)
        workpath=os.getcwd()
        print (workpath)
        jobidlist=[]
        for i in range(nstage):
            subMSetlist[i].mols=submollist[i]
            subMSetlist[i].Save()
            remotepath=GPARAMS.Train_setting.helpcpupath+'/'+MSetname+'/part%d'%i
            srcpath=workpath+'/datasets/%s.pdb'%(MSetname+'_part%d'%i)
            print (" Put pdb file:")
            print (remotepath,srcpath)
            stdin,stdout,stderr=ssh.exec_command('mkdir -p %s/datasets'%remotepath)
            print (stdout.read().decode())
            sftp.put(srcpath,remotepath+'/datasets/%s.pdb'%(MSetname+'_part%d'%i))
            cpurun=open('cpu.run','w')
            if GPARAMS.Train_setting.cpuqueuetype=='PBS':
                cpurun.write(pbscpustr%(GPARAMS.Compute_setting.Ncoresperthreads,GPARAMS.Compute_setting.Traininglevel+"_%d"%i))
            elif GPARAMS.Train_setting.cpuqueuetype=='LSF':
                cpurun.write(lsfcpustr%(GPARAMS.Compute_setting.Ncoresperthreads,GPARAMS.Compute_setting.Traininglevel+"_%d"%i))
            cpurun.write(GPARAMS.Train_setting.helpcpuenv)
            cpurun.write("rm queue\n")
            cpurun.write('touch started\n')
            cpurun.write("python -u $ESOIHOME/bin/Qmcal.py -i %s -d %s> %s.qmout\n"%(ctrlfile,MSetname+'_part%d'%i,MSetname+'_part%d'%i))
            cpurun.write("rm *.chk started\n")
            cpurun.write("touch finished\n")
            cpurun.close()
            sftp.put(localpath=workpath+'/cpu.run',remotepath=remotepath+'/cpu.run')
            sftp.put(localpath=workpath+'/'+ctrlfile,remotepath=remotepath+'/'+ctrlfile)
            if GPARAMS.Train_setting.cpuqueuetype=='PBS':
                stdin,stdout,stderr=ssh.exec_command('cd %s &&touch queue &&qsub cpu.run'%remotepath)
                jobidlist.append(stdout.read().decode().strip())
                print (jobidlist[-1])
#                stdin,stdout,stderr=ssh.exec_command('cd %s &&ls &&qsub cpu.run'%remotepath)
                #print (stdout.read().decode(),stdout.channel.recv_exit_status(),stderr,stdin,remotepath)
                #print (stdout.read().decode(),stderr,stdin)
            elif GPARAMS.Train_setting.cpuqueuetype=='LSF':
                stdin,stdout,stderr=ssh.exec_command('cd %s && bsub <cpu.run'%remotepath)
                print (stdout.read().decode,stderr,stdin)
        t=0
        while False in subMSetresult:
            time.sleep(30)
            t+=30
            for i in range(nstage):
                remotepath=GPARAMS.Train_setting.helpcpupath+'/'+MSetname+'/part%d'%i
                stdin,stdout,stderr=ssh.exec_command("cd %s && ls "%(remotepath))
                tmpstr=stdout.read().decode()
                if 'finished' in tmpstr:
                    state='finished'
                elif 'started' in tmpstr:
                    state='cal'
                elif 'queue' in tmpstr:
                    state='queue'
                if 'finished' in tmpstr:
                    subMSetresult[i]=True
                stdin,stdout,stderr=ssh.exec_command('qstat')
                tmpstr=stdout.read().decode()
                if jobidlist[i] not in tmpstr and state=='queue':
                    stdin,stdout,stderr=ssh.exec_command('cd %s && qsub cpu.run'%remotepath)
                    newid=stdout.read().decode().strip()
                    jobidlist[i]=newid 
            print (t,subMSetresult)
        finishmols=[]
        subMSetlist=[MSet(MSetname+'_part%d'%i) for i in range(nstage)]
        for i in range(nstage):
            srcpath=workpath+'/datasets/%s.pdb'%(MSetname+'_part%d'%i)
            remotepath=GPARAMS.Train_setting.helpcpupath+'/'+MSetname+'/part%d'%i
            os.system('rm %s'%srcpath)
            sftp.get(localpath=srcpath,remotepath=remotepath+'/datasets/'+MSetname+'_part%d.pdb'%i)
            subMSetlist[i].Load()
            finishmols+=subMSetlist[i].mols
        for i in range(len(finishmols)):
            finishmols[i].Cal_EGCM()
        TMPSet.mols=finishmols
        TMPSet.Save()
    else:
        inpathlist=[input_path]*len(mols)
        parapathlist=[para_path]*len(mols)
        corenumperjob=[math.ceil(GPARAMS.Compute_setting.Ncoresperthreads/GPARAMS.Compute_setting.Consumerprocessnum)]*len(mols)
        keywordslist=[GPARAMS.Compute_setting.Gaussiankeywords]*len(mols)
        Atomizationlist=[GPARAMS.Compute_setting.Atomizationlevel]*len(mols)
        inputlist=list(zip(mols,inpathlist,parapathlist,keywordslist,corenumperjob,Atomizationlist))
        paracal_pool=manager.Pool(GPARAMS.Compute_setting.Consumerprocessnum)
        results=paracal_pool.map(calculator,inputlist)
        paracal_pool.close()
        paracal_pool.join()
        mollist=[]
        for i in range(len(results)):
            if results[i][0]==True:
                mollist.append(results[i][1])
                mollist[-1].Cal_EGCM()
        TMPSet.mols=mollist
        TMPSet.Save()
        print ("HHHHHHHHHHHHHHHHHHHH")
        print ("HHHHHHHHHHHHHHHHHHHH")
        print (len(TMPSet.mols))
        print ("HHHHHHHHHHHHHHHHHHHH")
        print ("HHHHHHHHHHHHHHHHHHHH")
    return 
Ejemplo n.º 7
0
def consumer(Queue):
    import time
    from ..Base import Molnew
    import os
    from TensorMol import MSet 
    print ("Consumer start")
    Newaddedset=MSet('Stage_%d_Newadded'%GPARAMS.Train_setting.Trainstage)
    num=0
    Error_list=[]
    while True:
        ERROR_mols=Queue.get()
        if ERROR_mols==None:
            break
        for i in range(len(ERROR_mols)):
            ERROR_mols[i][0].name="Stage_%d_Mol_%d_%d"%(GPARAMS.Train_setting.Trainstage,num,i)
            Error_list.append(ERROR_mols[i][1])
            Newaddedset.mols.append(ERROR_mols[i][0])
        num+=1
        if num%2000==0:
            Newaddedset.Save() 
    Error_list=-np.array(Error_list)
    Newaddedset.mols=[Newaddedset.mols[i] for i in np.argsort(Error_list)]

    Dataset=[]
    Newaddedset.mols=Check_MSet(Newaddedset.mols)
    sysnum=(len(GPARAMS.System_setting)+GPARAMS.Compute_setting.Checkernum)
    if len(GPARAMS.Esoinn_setting.Model.nodes)!=0 and GPARAMS.Esoinn_setting.Model.class_id > GPARAMS.Train_setting.Modelnumperpoint:
        for i in Newaddedset.mols:
            try:
                Dataset.append(i.EGCM)
            except:
                Dataset.append(i.Cal_EGCM())
        a,b,c,d,signalmask=GPARAMS.Esoinn_setting.Model.predict(Dataset)
        normalmollist=[];edgemollist=[];noisemollist=[]   
        for i in range(len(Newaddedset.mols)):
            if signalmask[i]=='Noise':
                noisemollist.append(Newaddedset.mols[i])
            if signalmask[i]=='Edge':
                edgemollist.append(Newaddedset.mols[i])
            if signalmask[i]=='Normal':
                normalmollist.append(Newaddedset.mols[i])
        print ("Select Newadded set:",len(noisemollist),len(edgemollist),len(normalmollist))
        if len(Newaddedset.mols)>GPARAMS.Compute_setting.samplebasenum*sysnum:
            normalnumpersys=math.ceil(GPARAMS.Compute_setting.samplebasenum*0.3)
            edgenumpersys=math.ceil(GPARAMS.Compute_setting.samplebasenum*0.3)
            noisenumpersys=math.ceil(GPARAMS.Compute_setting.samplebasenum*0.3)
            edgemollist=edgemollist[:edgenumpersys*sysnum]
            normalmollist=normalmollist[:normalnumpersys*sysnum]
            noisemolnum=GPARAMS.Compute_setting.samplebasenum*sysnum-len(normalmollist)-len(edgemollist)
            noisemollist=random.sample(noisemollist[:noisemolnum*5],noisemolnum)
            Newaddedset.mols=edgemollist+noisemollist_tmp+normalmollist  
        print ("After selecting Newadded set:",len(noisemollist),len(edgemollist),len(normalmollist))
    else:
        print ("================================")
        print ("samplebasnum&sysnum",GPARAMS.Compute_setting.samplebasenum,sysnum)
        print ("================================")
        if len(Newaddedset.mols)>GPARAMS.Compute_setting.samplebasenum*sysnum:
            Newaddedset.mols=random.sample(Newaddedset.mols,GPARAMS.Compute_setting.samplebasenum*sysnum)
            Newaddedset.mols=Newaddedset.mols[:GPARAMS.Compute_setting.samplebasenum*sysnum]
    Newaddedset.Save()
    return