def __algorithm__(self, run_args=None): run_args['strap'] = 1 run_args['file_name'] = self.input_file_name binarysankoff.main(run_args) from Stemweb.algorithms.utils import newick2img newick2img(self.newick_path, self.image_path, radial=self.radial_image) sleep(0.1) self._stop.value = 1
def __algorithm__(self, run_args = None): run_args['strap'] = 1 run_args['file_name'] = self.input_file_name binarysankoff.main(run_args) from Stemweb.algorithms.utils import newick2img newick2img(self.newick_path, self.image_path, radial = self.radial_image) sleep(0.1) self._stop.value = 1
def save_tree(node): newick = _printtree(node, '') + ";" f = None try: f = open(self.newick_path, 'w') f.write(newick) f.close() except: logger = logging.getLogger('stemweb.algorithm_run') logger.error('AlgorithmRun %s:%s could\'t write in file %s.' % \ (self.algorithm_run.algorithm.name, self.algorithm_run.id, \ self.newick_path)) return -1 from Stemweb.algorithms.utils import newick2img newick2img(self.newick_path, self.image_path, \ branch_length = False, radial = self.radial_image, width = 800)
def semuniform (inputfile, iterationmax): probsame = 0.9 printtime = False resfolder = self.run_args['outfolder'] #step 1 read file namelist ,datadic, textdata = readfile(inputfile) #step 2 initiation by nj tree #|< #(1) treeroot, nodeorder, nodehidden, nodeleaf, treedic = njtree (textdata)#with hidden nodes #(2) #treeroot, nodeorder, nodehidden, nodeleaf, treedic = nohiddeninitial(textdata) #without hidden nodes #>| #resfolder = inputfile + '_res' #debug #define folder for resulting trees resfoldertree = os.path.join(resfolder,'tree') #define result folder #create result folder and tree folder import shutil if not os.path.exists(resfolder): os.makedirs(resfolder) else: shutil.rmtree(resfolder) os.makedirs(resfolder) if not os.path.exists(resfoldertree): os.makedirs(resfoldertree) else: shutil.rmtree(resfoldertree) os.makedirs(resfoldertree) #print initial tree #|< #(1) #plot tree with removed hiddden nodes treedicremoved,nodehiddenremoved = removehidden(nodehidden,treedic) #treetodot(treedicremoved,treedicremoved.keys(),nodehiddenremoved, resfoldertree, str(0).zfill(4)) #(2) #plot tree with NO removed hiddden nodes #treetodot(treedic,treedic.keys(),nodehidden, resfoldertree, str(0).zfill(4)+'_withhidden') #>| #step 3 calculate weight matrix logstr = 'The resulting folder is ' + resfolder + '\n' + 'Start at '+ str (time.gmtime()) + '\n' sigma = 0 #<parameter> #initial sigma, will be calculated according to score matrix later rho = 0.001**(1/float(iterationmax-5)) #<parameter> #give initial value to [bestiteration treedicbest nodeorderbest probtreeallbest] probtreeallbest = float(-Inf) bestiteration = 0 treedicbest = copy.deepcopy(treedic) nodeorderbest = copy.deepcopy(nodeorder) #logvector store results in each iteration logvector = [['iteration'],['sigma'],['qscore'],['probability']] #<log> logvector[0].append(0) logvector[1].append(0) logvector[2].append(0) #<important stopsign = 0 for iteration in range(1,iterationmax+1): #print time treedicprevious = copy.deepcopy(treedic) nodeorderprevious = copy.deepcopy(nodeorder) if printtime and (iteration <4): timestart = time.time() if iterationmax>100 and (iteration % 100) ==0 : print ('Now is iteration '+ str(iteration)) if iterationmax<=100 and (iteration % 10) ==0: print ('Now is iteration '+ str(iteration)) weightmatrix = zeros((len(nodeorder),len(nodeorder))) weightmatrixwithnoise = zeros((len(nodeorder),len(nodeorder))) weightmatrixindex = list(nodeorder) #calculate weight matrix #arrange the same columes together probtreeall = 0 for datadickey in datadic.keys(): if datadickey > 1: textbylinewithrepeat = datadic[datadickey] linerepeat = [] textbyline = [] for textbylinewithrepeati in textbylinewithrepeat: if textbylinewithrepeati not in textbyline: textbyline.append(textbylinewithrepeati) linerepeat.append(1.0) else: lineindex = textbyline.index(textbylinewithrepeati) linerepeat[lineindex]= linerepeat[lineindex]+1.0 weightdic, probtree = messagepassingu(treeroot, nodeorder, nodehidden, nodeleaf, treedic, textbyline, linerepeat, namelist, probsame) probtreeall = probtreeall + probtree for ni in range(0,(len(nodeorder)-1)): for nj in range((ni+1),len(nodeorder)): weightmatrix[ni,nj] = weightmatrix[ni,nj] +weightdic[(nodeorder[ni],nodeorder[nj])] weightmatrix[nj,ni] = weightmatrix[ni,nj] #add noise to weight matrix for ni in range(len(nodeorder)): for nj in range(len(nodeorder)): if ni != nj: #add noise or not #|< #(1)with noise weightmatrixwithnoise[ni,nj] = weightmatrix[ni,nj] + random.gauss(0, sigma) #(2)without noise #weightmatrixwithnoise[ni,nj] = weightmatrix[ni,nj] #>| weightmatrixwithnoise[nj,ni] = weightmatrixwithnoise[ni,nj] else: weightmatrixwithnoise[ni,ni] = float('Inf') #update tree by mst treeroot, nodeorder, nodeleaf, treedic= mst(weightmatrixwithnoise,weightmatrixindex) #calculate qscore qscore = 0.0 for nodei in nodeorder[0:(-1)]: qscore = qscore + weightmatrix[weightmatrixindex.index(nodei),weightmatrixindex.index(treedic[nodei]['parent'][0])] #save results in logvector logvector[0].append(iteration) logvector[1].append(sigma) logvector[2].append(qscore) logvector[3].append(probtreeall) #print time if printtime and (iteration < 4): timeend= time.time() print ('The time for iteration ' + str(iteration) + ' is '+str(timeend-timestart)) #if stopsign == 0: #|< #(1) #plot tree with removed hiddden nodes #treedicremoved,nodehiddenremoved = removehidden(nodehidden,treedic) #treetodot(treedicremoved,treedicremoved.keys(),nodehiddenremoved, resfoldertree, str(iteration).zfill(4)) #(2) #plot tree with NO removed hiddden nodes #treetodot(treedic,treedic.keys(),nodehidden, resfoldertree, str(iteration).zfill(4)+'_withhidden') #>| #test converge if (iteration > 10) and (iteration < iterationmax): if stopsign == 1: print ('stop at ' + str(iteration-1) + '\n') break if (abs(logvector[2][-2] - logvector[2][-3]) < 0.001) and (abs(logvector[2][-1] - logvector[2][-2])< 0.001): stopsign = 1 treediclastbackup = copy.deepcopy(treedic) if (iteration == iterationmax) and (stopsign == 0): print ('stop at ' + str(iteration-1) + '\n') treediclastbackup = copy.deepcopy(treedicprevious) #find the iteration with best probtreeall if probtreeall > probtreeallbest: treedicbest = copy.deepcopy(treedicprevious) nodeorderold = nodeorder probtreeallbest = probtreeall bestiteration = iteration - 1 treedicremoved,nodehiddenremoved = removehidden(nodehidden,treedicbest) #treetodot(treedicremoved,treedicremoved.keys(),nodehiddenremoved, resfoldertree, str(iteration-1).zfill(4)) #important> #update sigma if iteration >= 2: sigma = sigma0*((1.0-float(iteration)/float(iterationmax))**2.0) #<parameter> elif iteration == 1: sigma0 = 0.1* max(abs(weightmatrix.min()),abs(weightmatrix.max())) sigma = sigma0 #save last and best tree #|< #(1) #without hidden nodes treediclast,nodehiddenlast = removehidden(nodehidden,treediclastbackup) #treetodot(treediclast ,treediclast.keys(),nodehiddenlast, resfoldertree,'treelast') tree2newick(treediclast ,treediclast.keys(),nodehiddenlast, resfoldertree,'treelast') treedicbestrh,nodehiddenbest = removehidden(nodehidden,treedicbest) #treetodot(treedicbestrh ,treedicbestrh.keys(),nodehiddenbest, resfoldertree,'treebest') tree2newick(treedicbestrh ,treedicbestrh.keys(),nodehiddenbest, resfoldertree,'treebest') from Stemweb.algorithms.utils import newick2img newick2img(self.newick_path, self.image_path, False, radial = False) #(2) #with hidden nodes #treetodot(treediclastbackup, treediclastbackup.keys(), nodehidden, resfoldertree,'treelast_withhidden') #treetodot(treedicbestrh, treedicbestrh.keys(), nodehidden, resfoldertree,'treebest_withhidden') #>| #save log logstr = logstr + 'End at '+ str (time.gmtime()) + '\n' + 'best iteration is ' + str(bestiteration) +'\n' + 'best probability is ' + str(probtreeallbest) + '\n\n\n' #print (resfolder) #print (logstr) #print ('_________________________') inumber = len(logvector) jnumber = iteration + 1 for j in range(jnumber): for i in range(inumber): logstr = logstr + str(logvector[i][j]) + '\t' logstr = logstr.strip() + '\n' logstr = logstr.strip() file = open(os.path.join(resfolder,'log'),'w') file.write(logstr) file.close() return (qscore, bestiteration)
def semuniform(inputfile, iterationmax): probsame = 0.9 printtime = False resfolder = self.run_args['outfolder'] #step 1 read file namelist, datadic, textdata = readfile(inputfile) #step 2 initiation by nj tree #|< #(1) treeroot, nodeorder, nodehidden, nodeleaf, treedic = njtree( textdata) #with hidden nodes #(2) #treeroot, nodeorder, nodehidden, nodeleaf, treedic = nohiddeninitial(textdata) #without hidden nodes #>| #resfolder = inputfile + '_res' #debug #define folder for resulting trees resfoldertree = os.path.join(resfolder, 'tree') #define result folder #create result folder and tree folder import shutil if not os.path.exists(resfolder): os.makedirs(resfolder) else: shutil.rmtree(resfolder) os.makedirs(resfolder) if not os.path.exists(resfoldertree): os.makedirs(resfoldertree) else: shutil.rmtree(resfoldertree) os.makedirs(resfoldertree) #print initial tree #|< #(1) #plot tree with removed hiddden nodes treedicremoved, nodehiddenremoved = removehidden( nodehidden, treedic) #treetodot(treedicremoved,treedicremoved.keys(),nodehiddenremoved, resfoldertree, str(0).zfill(4)) #(2) #plot tree with NO removed hiddden nodes #treetodot(treedic,treedic.keys(),nodehidden, resfoldertree, str(0).zfill(4)+'_withhidden') #>| #step 3 calculate weight matrix logstr = 'The resulting folder is ' + resfolder + '\n' + 'Start at ' + str( time.gmtime()) + '\n' sigma = 0 #<parameter> #initial sigma, will be calculated according to score matrix later rho = 0.001**(1 / float(iterationmax - 5)) #<parameter> #give initial value to [bestiteration treedicbest nodeorderbest probtreeallbest] probtreeallbest = float(-Inf) bestiteration = 0 treedicbest = copy.deepcopy(treedic) nodeorderbest = copy.deepcopy(nodeorder) #logvector store results in each iteration logvector = [['iteration'], ['sigma'], ['qscore'], ['probability']] #<log> logvector[0].append(0) logvector[1].append(0) logvector[2].append(0) #<important stopsign = 0 for iteration in range(1, iterationmax + 1): #print time treedicprevious = copy.deepcopy(treedic) nodeorderprevious = copy.deepcopy(nodeorder) if printtime and (iteration < 4): timestart = time.time() if iterationmax > 100 and (iteration % 100) == 0: print('Now is iteration ' + str(iteration)) if iterationmax <= 100 and (iteration % 10) == 0: print('Now is iteration ' + str(iteration)) weightmatrix = zeros((len(nodeorder), len(nodeorder))) weightmatrixwithnoise = zeros((len(nodeorder), len(nodeorder))) weightmatrixindex = list(nodeorder) #calculate weight matrix #arrange the same columes together probtreeall = 0 for datadickey in datadic.keys(): if datadickey > 1: textbylinewithrepeat = datadic[datadickey] linerepeat = [] textbyline = [] for textbylinewithrepeati in textbylinewithrepeat: if textbylinewithrepeati not in textbyline: textbyline.append(textbylinewithrepeati) linerepeat.append(1.0) else: lineindex = textbyline.index( textbylinewithrepeati) linerepeat[ lineindex] = linerepeat[lineindex] + 1.0 weightdic, probtree = messagepassingu( treeroot, nodeorder, nodehidden, nodeleaf, treedic, textbyline, linerepeat, namelist, probsame) probtreeall = probtreeall + probtree for ni in range(0, (len(nodeorder) - 1)): for nj in range((ni + 1), len(nodeorder)): weightmatrix[ ni, nj] = weightmatrix[ni, nj] + weightdic[ (nodeorder[ni], nodeorder[nj])] weightmatrix[nj, ni] = weightmatrix[ni, nj] #add noise to weight matrix for ni in range(len(nodeorder)): for nj in range(len(nodeorder)): if ni != nj: #add noise or not #|< #(1)with noise weightmatrixwithnoise[ ni, nj] = weightmatrix[ni, nj] + random.gauss( 0, sigma) #(2)without noise #weightmatrixwithnoise[ni,nj] = weightmatrix[ni,nj] #>| weightmatrixwithnoise[ nj, ni] = weightmatrixwithnoise[ni, nj] else: weightmatrixwithnoise[ni, ni] = float('Inf') #update tree by mst treeroot, nodeorder, nodeleaf, treedic = mst( weightmatrixwithnoise, weightmatrixindex) #calculate qscore qscore = 0.0 for nodei in nodeorder[0:(-1)]: qscore = qscore + weightmatrix[ weightmatrixindex.index(nodei), weightmatrixindex.index(treedic[nodei]['parent'][0])] #save results in logvector logvector[0].append(iteration) logvector[1].append(sigma) logvector[2].append(qscore) logvector[3].append(probtreeall) #print time if printtime and (iteration < 4): timeend = time.time() print('The time for iteration ' + str(iteration) + ' is ' + str(timeend - timestart)) #if stopsign == 0: #|< #(1) #plot tree with removed hiddden nodes #treedicremoved,nodehiddenremoved = removehidden(nodehidden,treedic) #treetodot(treedicremoved,treedicremoved.keys(),nodehiddenremoved, resfoldertree, str(iteration).zfill(4)) #(2) #plot tree with NO removed hiddden nodes #treetodot(treedic,treedic.keys(),nodehidden, resfoldertree, str(iteration).zfill(4)+'_withhidden') #>| #test converge if (iteration > 10) and (iteration < iterationmax): if stopsign == 1: print('stop at ' + str(iteration - 1) + '\n') break if (abs(logvector[2][-2] - logvector[2][-3]) < 0.001) and ( abs(logvector[2][-1] - logvector[2][-2]) < 0.001): stopsign = 1 treediclastbackup = copy.deepcopy(treedic) if (iteration == iterationmax) and (stopsign == 0): print('stop at ' + str(iteration - 1) + '\n') treediclastbackup = copy.deepcopy(treedicprevious) #find the iteration with best probtreeall if probtreeall > probtreeallbest: treedicbest = copy.deepcopy(treedicprevious) nodeorderold = nodeorder probtreeallbest = probtreeall bestiteration = iteration - 1 treedicremoved, nodehiddenremoved = removehidden( nodehidden, treedicbest) #treetodot(treedicremoved,treedicremoved.keys(),nodehiddenremoved, resfoldertree, str(iteration-1).zfill(4)) #important> #update sigma if iteration >= 2: sigma = sigma0 * ( (1.0 - float(iteration) / float(iterationmax))**2.0 ) #<parameter> elif iteration == 1: sigma0 = 0.1 * max(abs(weightmatrix.min()), abs(weightmatrix.max())) sigma = sigma0 #save last and best tree #|< #(1) #without hidden nodes treediclast, nodehiddenlast = removehidden(nodehidden, treediclastbackup) #treetodot(treediclast ,treediclast.keys(),nodehiddenlast, resfoldertree,'treelast') tree2newick(treediclast, treediclast.keys(), nodehiddenlast, resfoldertree, 'treelast') treedicbestrh, nodehiddenbest = removehidden( nodehidden, treedicbest) #treetodot(treedicbestrh ,treedicbestrh.keys(),nodehiddenbest, resfoldertree,'treebest') tree2newick(treedicbestrh, treedicbestrh.keys(), nodehiddenbest, resfoldertree, 'treebest') from Stemweb.algorithms.utils import newick2img newick2img(self.newick_path, self.image_path, False, radial=False) #(2) #with hidden nodes #treetodot(treediclastbackup, treediclastbackup.keys(), nodehidden, resfoldertree,'treelast_withhidden') #treetodot(treedicbestrh, treedicbestrh.keys(), nodehidden, resfoldertree,'treebest_withhidden') #>| #save log logstr = logstr + 'End at ' + str( time.gmtime()) + '\n' + 'best iteration is ' + str( bestiteration) + '\n' + 'best probability is ' + str( probtreeallbest) + '\n\n\n' #print (resfolder) #print (logstr) #print ('_________________________') inumber = len(logvector) jnumber = iteration + 1 for j in range(jnumber): for i in range(inumber): logstr = logstr + str(logvector[i][j]) + '\t' logstr = logstr.strip() + '\n' logstr = logstr.strip() file = open(os.path.join(resfolder, 'log'), 'w') file.write(logstr) file.close() return (qscore, bestiteration)