def parameter_selection(): cluster = get_free_nodes()[0] #cluster = ['dave'] jobs=[] n=0 is_main_run=0 #filenames=['emotions','yeast','scene','enron','medical','toy10','toy50']#,'cal500','fp','cancer'] filenames=['toy10','toy50','emotions','yeast','scene','enron','medical'] n=0 for filename in filenames: for graph_type in ['tree']: for l_norm in ['2']: in_i=0 for in_c in ['100','75','50','20','10','5','1','0.5','0.25','0.1','0.01']: in_i+=1 for kth_fold in ['1','2','3','4','5']: node=cluster[n%len(cluster)] n+=1 p=multiprocessing.Process(target=singleRSTA, args=(filename,graph_type,'1',node,kth_fold,l_norm,"%d" % in_i,in_c,)) jobs.append(p) p.start() time.sleep(2) # fold pass time.sleep(2) # c pass time.sleep(2*is_main_run) # lnorm pass time.sleep(60*is_main_run) # tree for job in jobs: job.join() pass pass
def run(): logging.info('\t\tGenerating priority queue.') # get list of files filelist = os.listdir('../../FeatureExtraction/Results/bids/') # generate job queueo jobind = 0 n=1 nodefilelist = [] for filename in filelist: if checkfile(filename): continue if n>5: job_queue.put((jobind,(nodefilelist))) jobind+=1 n=1 nodefilelist = [] nodefilelist.append(filename) n+=1 if len(nodefilelist) > 0: job_queue.put((jobind,(nodefilelist))) # get computing node cluster = get_free_nodes()[0] #cluster = ['melkinkari'] # if you don't have access to any computer cluster, just use your machine as the only computing node #cluster = ['ukko133.hpc'] # if you don't have access to any computer cluster, just use your machine as the only computing node # run jobs job_size = job_queue.qsize() logging.info( "\t\tProcessing %d job_queue" % (job_size)) is_main_run_factor=1 # running job_queue threads = []
def run(): cluster = get_free_nodes()[0] jobs=[] n=0 filenames=['memeS','memeM','memeL'] filenames=[] for year in [2000]: for copaper in [10]:#,10,15]: for v in [1000]:#,300,500,700,1000,2000]: filenames.append('%d_%d_%d' % (year,copaper,v)) for filename in filenames: c='100' g='0.8' for nb in ['1','3','5','7']: for losstype in ['dif','exp']: if losstype == 'exp': penaltyrange = ['0.3','0.5','0.7','1','3','5','7'] else: penaltyrange = ['0.1','0.5','0.8'] for penalty in penaltyrange: node = cluster[n%len(cluster)] n+=1 p=multiprocessing.Process(target=singlerun, args=(filename,c,g,nb,penalty,losstype,node,)) jobs.append(p) p.start() time.sleep(15) time.sleep(60) time.sleep(300) for job in jobs: job.join()
def run(): cluster = get_free_nodes()[0] #cluster = ['dave'] jobs = [] n = 0 is_main_run = 0.01 filenames = [ 'emotions', 'yeast', 'scene', 'enron', 'cal500', 'fp', 'cancer', 'medical', 'toy10', 'toy50', 'toy100' ] n = 0 for filename in filenames: for graph_type in ['pair', 'tree']: for t in range(180): para_t = "%d" % (t + 1) node = cluster[n % len(cluster)] n += 1 p = multiprocessing.Process(target=singleMAVAMMlearner, args=( filename, graph_type, para_t, node, )) jobs.append(p) p.start() time.sleep(1 * is_main_run) time.sleep(60 * is_main_run) for job in jobs: job.join() pass
def compute_graph_kernels_in_parallel(): cluster = get_free_nodes()[0] kernelnamelist = ['lRWkernel','WL', 'WLedge', 'WLspdelta', 'RGkernel1', 'l3graphletkernel', 'untilpRWkernel4', 'untilpRWkernel6', 'untilpRWkernel8', 'untilpRWkernel10', 'spkernel', 'SPkernel', 'RWkernel', 'gestkernel3', 'gestkernel4', 'connectedkernel3', 'connectedkernel4', 'connectedkernel5'] # generate jobs and so on job_id = 0 job_size = 1 for kernelname in kernelnamelist: kernelresultfile = "../DTPNCI2015/results/ncicancer_kernel_graph_%s" % (kernelname) if os.path.exists(kernelresultfile): continue job_id = job_id + 1 job_content = kernelname job_queue.put((job_id, job_content)) logging.info("Processing jobs ...") # processing jobs job_size = job_queue.qsize() logging.info("In total %d jobs" % job_size) loadpernode = 1 threads = [] counter = 0 for node in cluster: for i in range(loadpernode): t = Worker(job_queue, node) time.sleep(0.5) counter = counter +1 if counter > job_size: break try: t.start() threads.append(t) except ThreadError: logging.warning("\t\tThread error!") for t in threads: t.join() pass
def run(): cluster = get_free_nodes()[0] #cluster = ['dave'] jobs=[] n=0 is_main_run=1 filenames=['emotions','yeast','scene','enron','cal500','fp','cancer','medical','toy10','toy50','toy100'] filenames=['fp'] n=0 for filename in filenames: for graph_type in ['pair']: for t in range(180): para_t="%d" % (t+1) node=cluster[n%len(cluster)] n+=1 p=multiprocessing.Process(target=singleMAMlearner, args=(filename,graph_type,para_t,node,)) jobs.append(p) p.start() time.sleep(1*is_main_run) time.sleep(30*is_main_run) time.sleep(600*is_main_run) for job in jobs: job.join() pass
def compute_fingerprints_in_parallel(): # cluster is a list of node in an interative cluster e.g. ['ukko001.hpc','ukko002.hpc'] cluster = get_free_nodes()[0] #cluster = ['melkinkari'] logging.info("Read in list of molecules ...") moleculelist = [] fin = open('../DTPNCI2015/results/ncicancer_labels') for line in fin: moleculelist.append(line.strip()) if fp_flag==1: fppath = '../structures/FPfiles/' elif fp_flag==2: fppath = '../structures/FPfiles/' elif fp_flag==3: fppath = '../structures/FPfiles/' else: fppath = '../structures/MATLABfiles/' if not os.path.exists(fppath): os.makedirs(fppath) logging.info("Generate jobs ...") # generate jobs and so on job_id = 0 job_size = 1 for molecule in moleculelist: if fp_flag==1: sdffilename = "%s%s.fp" % (fppath,molecule) elif fp_flag==2: sdffilename = "%s%s.fp3" % (fppath,molecule) elif fp_flag==3: sdffilename = "%s%s.fp4" % (fppath,molecule) else: sdffilename = "%s%s.mat" % (fppath,molecule) if os.path.exists(sdffilename): continue job_id = job_id + 1 job_content=molecule job_queue.put((job_id,job_content)) logging.info("Processing jobs ...") # processing jobs job_size = job_queue.qsize() logging.info("In total %d jobs" % job_size) loadpernode = 8 threads = [] counter = 0 for node in cluster: for i in range(loadpernode): t = Worker(job_queue, node) time.sleep(0.5) counter = counter +1 if counter > job_size: break try: t.start() threads.append(t) except ThreadError: logging.warning("\t\tThread error!") for t in threads: t.join() pass
def run(): cluster = get_free_nodes()[0] #cluster = ['dave'] jobs = [] n = 0 is_main_run_factor = 5 filenames = [ 'cancer', 'ArD20', 'ArD30', 'toy10', 'toy50', 'emotions', 'yeast', 'medical', 'scene', 'enron', 'cal500', 'fp' ] n = 0 # generate jobs for kth_fold in ['1']: #,'2','3','4','5']: for filename in filenames: graph_type = 'tree' for kappa in [180, 280]: #['2','4','8','16','20','32','40','50','60']: for l_norm in ['2']: #for t in [5]:#range(0,41,10): for t in [1, 5] + range(10, 41, 10): if t == 0: t = 1 para_t = "%d" % (t) try: with open( "../outputs/%s_%s_%s_f%s_l%s_k%s_pfRSTAs.log" % (filename, graph_type, para_t, kth_fold, l_norm, kappa)): pass continue except: n = n + 1 job_queue.put((n, filename, graph_type, para_t, kth_fold, l_norm, kappa)) pass # for |T| pass # for l pass # for kappa pass # for datasets pass # for k fole # running jobs job_size = job_queue.qsize() logging.info("\t\tprocessing %d jobs" % (job_size)) threads = [] for i in range(len(cluster)): if job_queue.empty(): break t = Worker(job_queue, cluster[i]) time.sleep(is_main_run_factor) try: t.start() threads.append(t) except ThreadError: logging.warning("\t\tError: thread error caught!") pass for t in threads: t.join() pass pass # def
def compute_fingerprints_in_parallel(): # cluster is a list of node in an interative cluster e.g. ['ukko001.hpc','ukko002.hpc'] cluster = get_free_nodes()[0] #cluster = ['melkinkari'] logging.info("Read in list of molecules ...") moleculelist = [] fin = open('../DTPNCI2015/results/ncicancer_labels') for line in fin: moleculelist.append(line.strip()) if fp_flag == 1: fppath = '../structures/FPfiles/' elif fp_flag == 2: fppath = '../structures/FPfiles/' elif fp_flag == 3: fppath = '../structures/FPfiles/' else: fppath = '../structures/MATLABfiles/' if not os.path.exists(fppath): os.makedirs(fppath) logging.info("Generate jobs ...") # generate jobs and so on job_id = 0 job_size = 1 for molecule in moleculelist: if fp_flag == 1: sdffilename = "%s%s.fp" % (fppath, molecule) elif fp_flag == 2: sdffilename = "%s%s.fp3" % (fppath, molecule) elif fp_flag == 3: sdffilename = "%s%s.fp4" % (fppath, molecule) else: sdffilename = "%s%s.mat" % (fppath, molecule) if os.path.exists(sdffilename): continue job_id = job_id + 1 job_content = molecule job_queue.put((job_id, job_content)) logging.info("Processing jobs ...") # processing jobs job_size = job_queue.qsize() logging.info("In total %d jobs" % job_size) loadpernode = 8 threads = [] counter = 0 for node in cluster: for i in range(loadpernode): t = Worker(job_queue, node) time.sleep(0.5) counter = counter + 1 if counter > job_size: break try: t.start() threads.append(t) except ThreadError: logging.warning("\t\tThread error!") for t in threads: t.join() pass
def run(): ''' a wrapper function to generate cluster node with good perforamnce, to define a job space, and to launch jobs on computing node of the cluster ''' logging.info('\t\tGenerating priority queue.') paramInd = 0 kFold = 5 numLabel = 3200 suffix = 'val' isTest = '0' # iterate over the lists xFilenameList = ['../Data/tcdb.TB', '../Data/tcdb.TICoils', '../Data/tcdb.TIGene3D', '../Data/tcdb.TIHamap', '../Data/tcdb.TIPANTHER', '../Data/tcdb.TIPfam', '../Data/tcdb.TIPhobius', '../Data/tcdb.TIPIRSF', '../Data/tcdb.TIPRINTS', '../Data/tcdb.TIProDom', '../Data/tcdb.TIProSitePatterns', '../Data/tcdb.TIProSiteProfiles', '../Data/tcdb.TISignalP_EUK', '../Data/tcdb.TISignalP_GRAM_NEGATIVE', '../Data/tcdb.TISignalP_GRAM_POSITIVE', '../Data/tcdb.TISMART', '../Data/tcdb.TISUPERFAMILY', '../Data/tcdb.TITIGRFAM', '../Data/tcdb.TITMHMM', '../Data/tcdb.TPSI', '../Data/tcdb.TRPSCDD', '../Data/tcdb.TRPSCDDNCBI', '../Data/tcdb.TRPSCOG', '../Data/tcdb.TRPSKOG', '../Data/tcdb.TRPSPFAM', '../Data/tcdb.TRPSPRK', '../Data/tcdb.TRPSSMART', '../Data/tcdb.TRPSTCDB201509PSSM', '../Data/tcdb.TRPSTIGR'] #xFilenameList = ['../Data/tcdb.TICoils'] yFilenameList = ['../Data/tcdb.TC'] labelIndexList = xrange(1,numLabel+1) foldIndexList = xrange(1,kFold+1) # generate job queue, will iterate over c,k,label for xFilename,yFilename,labelIndex,foldIndex in list(itertools.product(xFilenameList,yFilenameList,labelIndexList,foldIndexList)): for line in open('parameter_setting'): words = line.strip().split(' ') if words[0] == xFilename and words[1] == yFilename: svmC = words[2] tmpDir = '../ResultsSVM/%s_%s/' % ( re.sub('.*/','',xFilename), re.sub('.*/','',yFilename)) if not os.path.exists(tmpDir): os.mkdir(tmpDir) paramInd += 1 outputFilename = tmpDir + '/' + re.sub('.*/','',xFilename) + '_' + re.sub('.*/','',yFilename) + '_l' + str(labelIndex) + '_f' + str(foldIndex) + '_c' +svmC + '_t' + isTest + '_' + suffix ## check if result is ready already if checkfile(outputFilename): continue ## put parameter into queue job_queue.put( (paramInd,(str(paramInd),xFilename,yFilename,str(labelIndex),str(foldIndex),svmC,outputFilename,isTest)) ) # get computing node logging.info('\t\tObtain cluster node') cluster = get_free_nodes()[0] #cluster = ['ukko133.hpc'] # run jobs job_size = job_queue.qsize() logging.info( "\t\tProcessing %d job_queue" % (job_size)) is_main_run_factor=1 # running job_queue threads = [] workerload = 3 for i in range(len(cluster)): for j in range(workerload): if job_queue.empty(): break t = Worker(job_queue, cluster[i]) time.sleep(is_main_run_factor) try: t.start() threads.append(t) except ThreadError: logging.warning("\t\tError: thread error caught!") pass for t in threads: t.join() pass pass
def run(): is_main_run_factor=5 #filenames=['toy10','toy50','emotions','medical','enron','yeast','scene','cal500','fp','cancer'] #filenames=['cancer'] filenames=['toy10','toy50','emotions','yeast','scene','enron','fp','medical'] n=0 # generate job_queue logging.info('\t\tGenerating priority queue.') for newton_method in ['1','0']: for filename in filenames: for slack_c in ['1', '10', '0.1']: for t in [1, 5, 10, 20, 30]: para_t="%d" % (t) graph_type = 'tree' for kappa in ['1','2','3','4','5','6','8','10','12','14','16']: for l_norm in ['2']: #for kth_fold in ['1','2','3','4','5']: for kth_fold in ['1']: for loss_scaling_factor in ['0.1','1']: if checkfile(filename,graph_type,para_t,kth_fold,l_norm,kappa,slack_c,loss_scaling_factor,newton_method): continue else: n=n+1 job_queue.put( (n, (filename,graph_type,para_t,kth_fold,l_norm,kappa,slack_c,loss_scaling_factor,newton_method)) ) pass # for newton_method pass # for loss_scaling_factor pass # for slack_c pass # for |T| pass # for l pass # for kappa pass # for datasets pass # for k fole # get computing nodes cluster = get_free_nodes()[0] # if you have access to some interactive computer cluster, get the list of hostnames of the cluster #cluster = ['melkinkari'] # if you don't have access to any computer cluster, just use your machine as the only computing node # running job_queue job_size = job_queue.qsize() logging.info( "\t\tProcessing %d job_queue" % (job_size)) threads = [] for i in range(len(cluster)): if job_queue.empty(): break t = Worker(job_queue, cluster[i]) time.sleep(is_main_run_factor) try: t.start() threads.append(t) except ThreadError: logging.warning("\t\tError: thread error caught!") pass for t in threads: t.join() pass pass # def
def run(): logging.info('\t\tGenerating priority queue.') paramInd = 0 kFold = 5 numLabel = 3200 suffix = 'val' isTest = '0' # iterate over the lists xFilenameList = ['../Data/tcdb.all.KUNIMKL','../Data/tcdb.all.KALIGN','../Data/tcdb.all.KALIGNF', '../Data/tcdb.all.GUNIMKL','../Data/tcdb.all.GALIGN','../Data/tcdb.all.GALIGNF'] yFilenameList = ['../Data/tcdb.TC'] labelIndexList = xrange(1,numLabel+1) foldIndexList = xrange(1,kFold+1) # generate job queue, will iterate over c,k,label for xFilename,yFilename,labelIndex,foldIndex in list(itertools.product(xFilenameList,yFilenameList,labelIndexList,foldIndexList)): for line in open('parameter_setting'): words = line.strip().split(' ') if words[0] == xFilename and words[1] == yFilename: svmC = words[2] tmpDir = '../ResultsMKL/%s_%s/' % ( re.sub('.*/','',xFilename), re.sub('.*/','',yFilename)) if not os.path.exists(tmpDir): os.mkdir(tmpDir) paramInd += 1 outputFilename = tmpDir + '/' + re.sub('.*/','',xFilename) + '_' + re.sub('.*/','',yFilename) + '_l' + str(labelIndex) + '_f' + str(foldIndex) + '_c' +svmC + '_t' + isTest + '_' + suffix ## check if result is ready already if checkfile(outputFilename): continue ## put parameter into queue job_queue.put( (paramInd,(str(paramInd),xFilename,yFilename,str(labelIndex),str(foldIndex),svmC,outputFilename,isTest)) ) # get computing node logging.info('\t\tObtain cluster node') cluster = get_free_nodes()[0] #cluster = ['ukko133.hpc'] # run jobs job_size = job_queue.qsize() logging.info( "\t\tProcessing %d job_queue" % (job_size)) is_main_run_factor=1 # running job_queue threads = [] workerload = 3 for i in range(len(cluster)): for j in range(workerload): if job_queue.empty(): break t = Worker(job_queue, cluster[i]) time.sleep(is_main_run_factor) try: t.start() threads.append(t) except ThreadError: logging.warning("\t\tError: thread error caught!") pass for t in threads: t.join() pass pass
def run(): jobs=[] n=0 is_main_run_factor=5 filenames=['cancer','ArD20','ArD30','toy10','toy50','emotions','yeast','medical','scene','enron','cal500','fp'] #filenames=['scene'] n=0 # generate jobs logging.info('\t\tGenerating job queue.') for slack_c in ['100','1','0.1','10','0.01','50','0.5','20','0.05','5']: for kth_fold in ['1','2','3','4','5']: for filename in filenames: graph_type = 'tree' for kappa in ['2','8','16','20']: for l_norm in ['2']: for t in range(0,41,10): if t==0: t=1 para_t="%d" % (t) if checkfile(filename,graph_type,para_t,kth_fold,l_norm,kappa,slack_c): continue else: n=n+1 job_queue.put((n,filename,graph_type,para_t,kth_fold,l_norm,kappa,slack_c)) pass # for slack_c pass # for |T| pass # for l pass # for kappa pass # for datasets pass # for k fole # get computing nodes cluster = get_free_nodes()[0] # running jobs job_size = job_queue.qsize() logging.info( "\t\tProcessing %d jobs" % (job_size)) threads = [] for i in range(len(cluster)): if job_queue.empty(): break t = Worker(job_queue, cluster[i]) time.sleep(is_main_run_factor) try: t.start() threads.append(t) except ThreadError: logging.warning("\t\tError: thread error caught!") pass for t in threads: t.join() pass pass # def
def parameter_selection(): cluster = get_free_nodes()[0] #cluster = ['dave'] jobs = [] n = 0 is_main_run = 0 #filenames=['emotions','yeast','scene','enron','medical','toy10','toy50']#,'cal500','fp','cancer'] filenames = [ 'toy10', 'toy50', 'emotions', 'yeast', 'scene', 'enron', 'medical' ] n = 0 for filename in filenames: for graph_type in ['tree']: for l_norm in ['2']: in_i = 0 for in_c in [ '100', '75', '50', '20', '10', '5', '1', '0.5', '0.25', '0.1', '0.01' ]: in_i += 1 for kth_fold in ['1', '2', '3', '4', '5']: node = cluster[n % len(cluster)] n += 1 p = multiprocessing.Process(target=singleRSTA, args=( filename, graph_type, '1', node, kth_fold, l_norm, "%d" % in_i, in_c, )) jobs.append(p) p.start() time.sleep(2) # fold pass time.sleep(2) # c pass time.sleep(2 * is_main_run) # lnorm pass time.sleep(60 * is_main_run) # tree for job in jobs: job.join() pass pass
def compute_graph_kernels_in_parallel(): cluster = get_free_nodes()[0] kernelnamelist = [ 'lRWkernel', 'WL', 'WLedge', 'WLspdelta', 'RGkernel1', 'l3graphletkernel', 'untilpRWkernel4', 'untilpRWkernel6', 'untilpRWkernel8', 'untilpRWkernel10', 'spkernel', 'SPkernel', 'RWkernel', 'gestkernel3', 'gestkernel4', 'connectedkernel3', 'connectedkernel4', 'connectedkernel5' ] # generate jobs and so on job_id = 0 job_size = 1 for kernelname in kernelnamelist: kernelresultfile = "../DTPNCI2015/results/ncicancer_kernel_graph_%s" % ( kernelname) if os.path.exists(kernelresultfile): continue job_id = job_id + 1 job_content = kernelname job_queue.put((job_id, job_content)) logging.info("Processing jobs ...") # processing jobs job_size = job_queue.qsize() logging.info("In total %d jobs" % job_size) loadpernode = 1 threads = [] counter = 0 for node in cluster: for i in range(loadpernode): t = Worker(job_queue, node) time.sleep(0.5) counter = counter + 1 if counter > job_size: break try: t.start() threads.append(t) except ThreadError: logging.warning("\t\tThread error!") for t in threads: t.join() pass
def run(): logging.info('\t\tGenerating priority queue.') paramind = 0 K = 10 for c in ['0.01','0.05','0.1','0.5','1','5','10','50','100','1000']: for g in ['0.0001','0.001','0.005','0.01','0.05','0.1','0.5','1','5','10','50','100']: for k in range(1,(K+1)): paramind += 1 outfilename = '../../Learning/Results/ParameterSelection/%d' % paramind if checkfile(outfilename): continue job_queue.put((paramind,(str(paramind),str(K),str(k),c,g,outfilename))) # get computing node cluster = get_free_nodes()[0] #cluster = ['ukko133.hpc'] # run jobs job_size = job_queue.qsize() logging.info( "\t\tProcessing %d job_queue" % (job_size)) is_main_run_factor=1 # running job_queue threads = [] workerload = 10 for i in range(len(cluster)): for j in range(workerload): if job_queue.empty(): break t = Worker(job_queue, cluster[i]) time.sleep(is_main_run_factor) try: t.start() threads.append(t) except ThreadError: logging.warning("\t\tError: thread error caught!") pass for t in threads: t.join() pass pass # def
def simulationAndRealignment(): ''' #simulation print "start simulation" os.system("echo 'start simulation' > %s"%(logFile)) block_size = cpu_num*max_thread for block_id in range(block): if (block_id + 1) * block_size > seq_num: block_size = seq_num - block_id * block_size os.system("./pair_hmm.py len=%s a=%s e=%s g=%s l=%s rep=%d tree='%s' id=%d in=%s simul=1>> %s"%(options.length, options.indel, options.extension, options.gamma, options.Lambda, block_size, options.tree, block_id, options.ifname, logFile)) ''' #realignment #get job queue t_range = 12 print "get the job queue" job_id = 0 for i in range(t_range): for j in range(block): job_queue.put((job_id, 0.05 + 0.05 * i, j)) job_id += 1 #processing job_size = job_queue.qsize() print "processing %d jobs" % (job_size) start = time.time() os.system("echo 'processing %d jobs' >> %s"%(job_size, logFile)) threads = [] cluster = get_free_nodes()[0] #cluster=['ukko003.hpc','ukko004.hpc','ukko005.hpc','ukko006.hpc','ukko007.hpc','ukko008.hpc'] load = 1 for i in range(load): if job_queue.empty(): break nodes_num = 0 for j in range(max_node): if job_queue.empty(): break os.system("echo '---->%d %s'>> %s"%(j, cluster[j%len(cluster)], logFile)) t = Worker(job_queue, cluster[j%len(cluster)]) nodes_num += 1 time.sleep(1) try: t.start() threads.append(t) except ThreadError: os.system("echo '\t\tError: thread error caught!'>>%s"%(logFile)) time.sleep(random.randint(5000,6000)/1000.0) os.system("echo 'using %d nodes in cluster' >>%s"%(nodes_num, logFile)) for t in threads: t.join() #combine results os.system("echo 'combining results' >>%s"%(logFile)) accuracy = [] score = [] for i in range(t_range): acc = [] lScore = [] for j in range(block): jobFName = "%s/result_for_%s_%d_with_t%.2f"%(dataDir, os.path.splitext(os.path.basename(options.ifname))[0], j, 0.05+0.05*i) s = commands.getoutput("tail -1 %s |tr -d '(|)|'|tr ',' ' '"%(jobFName)) s = s.split() acc.append(float(s[0])) lScore.append(float(s[1])) #os.system("rm "+jobFName) accuracy.append(np.array(acc).mean()) score.append(np.array(lScore).mean()) os.system("echo %s > %s/finalResult"%(' '.join([str(i) for i in accuracy]), dataDir)) os.system("echo %s > %s/finalScore"%(' '.join([str(i) for i in score]), dataDir)) os.system("echo '%.4gs' >> %s"%(time.time()-start, logFile))
def run(): is_main_run_factor = 5 #filenames=['toy10','toy50','emotions','medical','enron','yeast','scene','cal500','fp','cancer'] #filenames=['cancer'] filenames = [ 'toy10', 'toy50', 'emotions', 'yeast', 'scene', 'enron', 'fp', 'medical' ] n = 0 # generate job_queue logging.info('\t\tGenerating priority queue.') for newton_method in ['1', '0']: for filename in filenames: for slack_c in ['1', '10', '0.1']: for t in [1, 5, 10, 20, 30]: para_t = "%d" % (t) graph_type = 'tree' for kappa in [ '1', '2', '3', '4', '5', '6', '8', '10', '12', '14', '16' ]: for l_norm in ['2']: #for kth_fold in ['1','2','3','4','5']: for kth_fold in ['1']: for loss_scaling_factor in ['0.1', '1']: if checkfile(filename, graph_type, para_t, kth_fold, l_norm, kappa, slack_c, loss_scaling_factor, newton_method): continue else: n = n + 1 job_queue.put( (n, (filename, graph_type, para_t, kth_fold, l_norm, kappa, slack_c, loss_scaling_factor, newton_method))) pass # for newton_method pass # for loss_scaling_factor pass # for slack_c pass # for |T| pass # for l pass # for kappa pass # for datasets pass # for k fole # get computing nodes cluster = get_free_nodes( )[0] # if you have access to some interactive computer cluster, get the list of hostnames of the cluster #cluster = ['melkinkari'] # if you don't have access to any computer cluster, just use your machine as the only computing node # running job_queue job_size = job_queue.qsize() logging.info("\t\tProcessing %d job_queue" % (job_size)) threads = [] for i in range(len(cluster)): if job_queue.empty(): break t = Worker(job_queue, cluster[i]) time.sleep(is_main_run_factor) try: t.start() threads.append(t) except ThreadError: logging.warning("\t\tError: thread error caught!") pass for t in threads: t.join() pass pass # def
def run(): ''' a wrapper function to generate cluster node with good perforamnce, to define a job space, and to launch jobs on computing node of the cluster ''' logging.info('\t\tGenerating priority queue.') paramInd = 0 kFold = 5 suffix = 'sel' isTest = '0' # iterate over the lists xFilenameList = ['../Data/tcdb.all.KUNIMKL','../Data/tcdb.all.KALIGN','../Data/tcdb.all.KALIGNF'] xFilenameList = ['../Data/tcdb.all.GUNIMKL','../Data/tcdb.all.GALIGN','../Data/tcdb.all.GALIGNF'] foldIndexList = xrange(1,kFold+1) cList = ['1000','5000','10000','50000'] stepSize1List = ['7','9','11'] stepSize2List = ['7','9','11'] cList.reverse() stepSize1List.reverse() stepSize2List.reverse() yFilename = '../Data/tcdb.TC' EFilename = '../Data/tcdb.TC.E' SFilename = '../Data/tcdb.TC.SrcSpc' for xFilename,foldIndex,sopC,stepSize1,stepSize2 in list(itertools.product(xFilenameList,foldIndexList,cList,stepSize1List,stepSize2List)): tmpDir = '../ResultsSOP/tmp_%s_%s/' % ( re.sub('.*/','',xFilename), re.sub('.*/','',yFilename)) if not os.path.exists(tmpDir): os.mkdir(tmpDir) paramInd += 1 outputFilename = tmpDir + '/' + re.sub('.*/','',xFilename) + '_' + re.sub('.*/','',yFilename) + '_f' + str(foldIndex) + '_c' +sopC + '_s1' + stepSize1 + '_s2' + stepSize2 + '_t' + isTest + '_' + suffix + '.mat' logFilename = tmpDir + '/' + re.sub('.*/','',xFilename) + '_' + re.sub('.*/','',yFilename) + '_f' + str(foldIndex) + '_c' +sopC + '_s1' + stepSize1 + '_s2' + stepSize2 + '_t' + isTest + '_' + suffix + '.log' ## check if result is ready already if checkfile(outputFilename): continue ## put parameter into queue job_queue.put( (paramInd,(str(paramInd),xFilename,yFilename,EFilename,SFilename,str(foldIndex),sopC,outputFilename,logFilename,stepSize1,stepSize2,isTest,suffix)) ) # get computing node logging.info('\t\tObtain cluster node') cluster = get_free_nodes()[0] #cluster = ['ukko133'] # run jobs job_size = job_queue.qsize() logging.info( "\t\tProcessing %d job_queue" % (job_size)) is_main_run_factor=1 # running job_queue threads = [] workerload = 1 for i in range(len(cluster)): for j in range(workerload): if job_queue.empty(): break t = Worker(job_queue, cluster[i]) time.sleep(is_main_run_factor) try: t.start() threads.append(t) except ThreadError: logging.warning("\t\tError: thread error caught!") pass for t in threads: t.join() pass pass