Exemple #1
0
    def __algorithm__(self, run_args=None):
        run_args['strap'] = 1
        run_args['file_name'] = self.input_file_name

        binarysankoff.main(run_args)

        from Stemweb.algorithms.utils import newick2img
        newick2img(self.newick_path, self.image_path, radial=self.radial_image)

        sleep(0.1)
        self._stop.value = 1
Exemple #2
0
	def __algorithm__(self, run_args = None):
		run_args['strap'] = 1
		run_args['file_name'] = self.input_file_name
		
		binarysankoff.main(run_args)
		
		from Stemweb.algorithms.utils import newick2img
		newick2img(self.newick_path, self.image_path, radial = self.radial_image)
		
		sleep(0.1)
		self._stop.value = 1
		
Exemple #3
0
        def save_tree(node):
            newick = _printtree(node, '') + ";"
            f = None
            try:
                f = open(self.newick_path, 'w')
                f.write(newick)
                f.close()
            except:
                logger = logging.getLogger('stemweb.algorithm_run')
                logger.error('AlgorithmRun %s:%s could\'t write in file %s.' % \
                (self.algorithm_run.algorithm.name, self.algorithm_run.id, \
                self.newick_path))
                return -1

            from Stemweb.algorithms.utils import newick2img
            newick2img(self.newick_path, self.image_path, \
             branch_length = False, radial = self.radial_image, width = 800)
Exemple #4
0
		def save_tree(node):
			newick = _printtree(node, '') + ";"
			f = None
			try: 
				f = open(self.newick_path, 'w')
				f.write(newick)
				f.close()
			except:
				logger = logging.getLogger('stemweb.algorithm_run')
				logger.error('AlgorithmRun %s:%s could\'t write in file %s.' % \
				(self.algorithm_run.algorithm.name, self.algorithm_run.id, \
				self.newick_path))
				return -1 
			
			from Stemweb.algorithms.utils import newick2img
			newick2img(self.newick_path, self.image_path, \
				branch_length = False, radial = self.radial_image, width = 800)	
Exemple #5
0
		def semuniform (inputfile, iterationmax):
			probsame = 0.9
			printtime = False
			resfolder = self.run_args['outfolder']
			#step 1 read file
			namelist ,datadic, textdata = readfile(inputfile)
			#step 2 initiation by nj tree
			#|<
			#(1)
			treeroot, nodeorder, nodehidden, nodeleaf, treedic = njtree (textdata)#with hidden nodes
			#(2)
			#treeroot, nodeorder, nodehidden, nodeleaf, treedic = nohiddeninitial(textdata) #without hidden nodes
			#>|
			#resfolder = inputfile + '_res' #debug
			#define folder for resulting trees
			resfoldertree = os.path.join(resfolder,'tree')
			#define result folder
			#create result folder and tree folder
			import shutil
			if not os.path.exists(resfolder):
				os.makedirs(resfolder)
			else:
				shutil.rmtree(resfolder)
				os.makedirs(resfolder)
			if not os.path.exists(resfoldertree):
				os.makedirs(resfoldertree)
			else:
				shutil.rmtree(resfoldertree)
				os.makedirs(resfoldertree)
			#print initial tree
			#|<
			#(1)
			#plot tree with removed hiddden nodes
			treedicremoved,nodehiddenremoved = removehidden(nodehidden,treedic)
			#treetodot(treedicremoved,treedicremoved.keys(),nodehiddenremoved, resfoldertree, str(0).zfill(4))
			#(2)
			#plot tree with NO removed hiddden nodes
			#treetodot(treedic,treedic.keys(),nodehidden, resfoldertree, str(0).zfill(4)+'_withhidden')
			#>|
			#step 3 calculate weight matrix
			logstr = 'The resulting folder is ' + resfolder + '\n' + 'Start at '+ str (time.gmtime()) + '\n'
			sigma = 0 #<parameter>
			#initial sigma, will be calculated according to score matrix later
			rho = 0.001**(1/float(iterationmax-5)) #<parameter>
			#give initial value to [bestiteration treedicbest nodeorderbest probtreeallbest]
			probtreeallbest = float(-Inf)
			bestiteration = 0
			treedicbest = copy.deepcopy(treedic)
			nodeorderbest = copy.deepcopy(nodeorder)
			#logvector store results in each iteration
			logvector = [['iteration'],['sigma'],['qscore'],['probability']] #<log>
			logvector[0].append(0)
			logvector[1].append(0)
			logvector[2].append(0)
			#<important
			stopsign = 0
			for iteration in range(1,iterationmax+1):
				#print time
				treedicprevious = copy.deepcopy(treedic)
				nodeorderprevious = copy.deepcopy(nodeorder)
				if printtime and (iteration <4):
					timestart = time.time()
				if iterationmax>100 and (iteration % 100) ==0 :
					print ('Now is iteration '+ str(iteration))
				if iterationmax<=100 and (iteration % 10) ==0:
					print ('Now is iteration '+ str(iteration))
				weightmatrix = zeros((len(nodeorder),len(nodeorder)))
				weightmatrixwithnoise = zeros((len(nodeorder),len(nodeorder)))
				weightmatrixindex = list(nodeorder)
				#calculate weight matrix
				#arrange the same columes together
				probtreeall = 0
				for datadickey in datadic.keys():
					if datadickey > 1:
						textbylinewithrepeat = datadic[datadickey]
						linerepeat = []
						textbyline = []
						for textbylinewithrepeati in textbylinewithrepeat:
							if textbylinewithrepeati not in textbyline:
								textbyline.append(textbylinewithrepeati)
								linerepeat.append(1.0)
							else:
								lineindex = textbyline.index(textbylinewithrepeati)
								linerepeat[lineindex]= linerepeat[lineindex]+1.0
						weightdic, probtree = messagepassingu(treeroot, nodeorder, nodehidden, nodeleaf, treedic, textbyline, linerepeat, namelist, probsame)
						probtreeall = probtreeall + probtree
						for ni in range(0,(len(nodeorder)-1)):
							for nj in range((ni+1),len(nodeorder)):
								weightmatrix[ni,nj] = weightmatrix[ni,nj] +weightdic[(nodeorder[ni],nodeorder[nj])]
								weightmatrix[nj,ni] = weightmatrix[ni,nj]
				#add noise to weight matrix
				for ni in range(len(nodeorder)):
					for nj in range(len(nodeorder)):
						if ni != nj:
							#add noise or not
							#|<
							#(1)with noise
							weightmatrixwithnoise[ni,nj] = weightmatrix[ni,nj] + random.gauss(0, sigma)
							#(2)without noise
							#weightmatrixwithnoise[ni,nj] = weightmatrix[ni,nj]
							#>|
							weightmatrixwithnoise[nj,ni] = weightmatrixwithnoise[ni,nj]	
						else:
							weightmatrixwithnoise[ni,ni] = float('Inf')
				#update tree by mst
				treeroot, nodeorder, nodeleaf, treedic= mst(weightmatrixwithnoise,weightmatrixindex)
				#calculate qscore
				qscore = 0.0
				for nodei in nodeorder[0:(-1)]:
					qscore = qscore + weightmatrix[weightmatrixindex.index(nodei),weightmatrixindex.index(treedic[nodei]['parent'][0])]
				#save results in logvector
				logvector[0].append(iteration)
				logvector[1].append(sigma)
				logvector[2].append(qscore)
				logvector[3].append(probtreeall)
				#print time
				if printtime and (iteration < 4):
					timeend= time.time()
					print ('The time for iteration ' +  str(iteration) + ' is '+str(timeend-timestart))
				#if stopsign == 0:
					#|<
					#(1)
					#plot tree with removed hiddden nodes
					#treedicremoved,nodehiddenremoved = removehidden(nodehidden,treedic)
					#treetodot(treedicremoved,treedicremoved.keys(),nodehiddenremoved, resfoldertree, str(iteration).zfill(4))
					#(2)
					#plot tree with NO removed hiddden nodes
					#treetodot(treedic,treedic.keys(),nodehidden, resfoldertree, str(iteration).zfill(4)+'_withhidden')
					#>|
				#test converge
				if (iteration > 10) and (iteration < iterationmax):
					if stopsign == 1:
						print ('stop at ' + str(iteration-1) + '\n')
						break
					if (abs(logvector[2][-2] - logvector[2][-3]) < 0.001) and (abs(logvector[2][-1] - logvector[2][-2])< 0.001):
						stopsign = 1
						treediclastbackup = copy.deepcopy(treedic)
				if (iteration == iterationmax) and (stopsign == 0):
					print ('stop at ' + str(iteration-1) + '\n')
					treediclastbackup = copy.deepcopy(treedicprevious)
					
				#find the iteration with best probtreeall
				if probtreeall > probtreeallbest:
					treedicbest = copy.deepcopy(treedicprevious)
					nodeorderold = nodeorder
					probtreeallbest = probtreeall
					bestiteration = iteration - 1
					treedicremoved,nodehiddenremoved = removehidden(nodehidden,treedicbest)
					#treetodot(treedicremoved,treedicremoved.keys(),nodehiddenremoved, resfoldertree, str(iteration-1).zfill(4))
			#important>
				#update sigma
				if iteration >= 2:
					sigma = sigma0*((1.0-float(iteration)/float(iterationmax))**2.0) #<parameter>
				elif iteration == 1:
					sigma0 = 0.1* max(abs(weightmatrix.min()),abs(weightmatrix.max()))
					sigma = sigma0
			#save last and best tree
			#|<
			#(1)
			#without hidden nodes
			treediclast,nodehiddenlast =  removehidden(nodehidden,treediclastbackup)
			#treetodot(treediclast ,treediclast.keys(),nodehiddenlast, resfoldertree,'treelast')
			tree2newick(treediclast ,treediclast.keys(),nodehiddenlast, resfoldertree,'treelast')
			treedicbestrh,nodehiddenbest = removehidden(nodehidden,treedicbest)
			#treetodot(treedicbestrh ,treedicbestrh.keys(),nodehiddenbest, resfoldertree,'treebest')
			tree2newick(treedicbestrh ,treedicbestrh.keys(),nodehiddenbest, resfoldertree,'treebest')
			from Stemweb.algorithms.utils import newick2img
			newick2img(self.newick_path, self.image_path, False, radial = False)
			#(2)
			#with hidden nodes
			#treetodot(treediclastbackup, treediclastbackup.keys(), nodehidden, resfoldertree,'treelast_withhidden')
			#treetodot(treedicbestrh, treedicbestrh.keys(), nodehidden, resfoldertree,'treebest_withhidden')
			#>|
			#save log 
			logstr = logstr + 'End at '+ str (time.gmtime()) + '\n' + 'best iteration is ' + str(bestiteration) +'\n' + 'best probability is ' + str(probtreeallbest) + '\n\n\n'
			#print (resfolder)
			#print (logstr)
			#print ('_________________________')
			inumber = len(logvector)
			jnumber = iteration + 1
			for j in range(jnumber):
				for i in range(inumber):
					logstr = logstr + str(logvector[i][j]) + '\t'
				logstr = logstr.strip() + '\n'
			logstr = logstr.strip()
			file = open(os.path.join(resfolder,'log'),'w')
			file.write(logstr)
			file.close()
			return (qscore, bestiteration)
Exemple #6
0
        def semuniform(inputfile, iterationmax):
            probsame = 0.9
            printtime = False
            resfolder = self.run_args['outfolder']
            #step 1 read file
            namelist, datadic, textdata = readfile(inputfile)
            #step 2 initiation by nj tree
            #|<
            #(1)
            treeroot, nodeorder, nodehidden, nodeleaf, treedic = njtree(
                textdata)  #with hidden nodes
            #(2)
            #treeroot, nodeorder, nodehidden, nodeleaf, treedic = nohiddeninitial(textdata) #without hidden nodes
            #>|
            #resfolder = inputfile + '_res' #debug
            #define folder for resulting trees
            resfoldertree = os.path.join(resfolder, 'tree')
            #define result folder
            #create result folder and tree folder
            import shutil
            if not os.path.exists(resfolder):
                os.makedirs(resfolder)
            else:
                shutil.rmtree(resfolder)
                os.makedirs(resfolder)
            if not os.path.exists(resfoldertree):
                os.makedirs(resfoldertree)
            else:
                shutil.rmtree(resfoldertree)
                os.makedirs(resfoldertree)
            #print initial tree
            #|<
            #(1)
            #plot tree with removed hiddden nodes
            treedicremoved, nodehiddenremoved = removehidden(
                nodehidden, treedic)
            #treetodot(treedicremoved,treedicremoved.keys(),nodehiddenremoved, resfoldertree, str(0).zfill(4))
            #(2)
            #plot tree with NO removed hiddden nodes
            #treetodot(treedic,treedic.keys(),nodehidden, resfoldertree, str(0).zfill(4)+'_withhidden')
            #>|
            #step 3 calculate weight matrix
            logstr = 'The resulting folder is ' + resfolder + '\n' + 'Start at ' + str(
                time.gmtime()) + '\n'
            sigma = 0  #<parameter>
            #initial sigma, will be calculated according to score matrix later
            rho = 0.001**(1 / float(iterationmax - 5))  #<parameter>
            #give initial value to [bestiteration treedicbest nodeorderbest probtreeallbest]
            probtreeallbest = float(-Inf)
            bestiteration = 0
            treedicbest = copy.deepcopy(treedic)
            nodeorderbest = copy.deepcopy(nodeorder)
            #logvector store results in each iteration
            logvector = [['iteration'], ['sigma'], ['qscore'],
                         ['probability']]  #<log>
            logvector[0].append(0)
            logvector[1].append(0)
            logvector[2].append(0)
            #<important
            stopsign = 0
            for iteration in range(1, iterationmax + 1):
                #print time
                treedicprevious = copy.deepcopy(treedic)
                nodeorderprevious = copy.deepcopy(nodeorder)
                if printtime and (iteration < 4):
                    timestart = time.time()
                if iterationmax > 100 and (iteration % 100) == 0:
                    print('Now is iteration ' + str(iteration))
                if iterationmax <= 100 and (iteration % 10) == 0:
                    print('Now is iteration ' + str(iteration))
                weightmatrix = zeros((len(nodeorder), len(nodeorder)))
                weightmatrixwithnoise = zeros((len(nodeorder), len(nodeorder)))
                weightmatrixindex = list(nodeorder)
                #calculate weight matrix
                #arrange the same columes together
                probtreeall = 0
                for datadickey in datadic.keys():
                    if datadickey > 1:
                        textbylinewithrepeat = datadic[datadickey]
                        linerepeat = []
                        textbyline = []
                        for textbylinewithrepeati in textbylinewithrepeat:
                            if textbylinewithrepeati not in textbyline:
                                textbyline.append(textbylinewithrepeati)
                                linerepeat.append(1.0)
                            else:
                                lineindex = textbyline.index(
                                    textbylinewithrepeati)
                                linerepeat[
                                    lineindex] = linerepeat[lineindex] + 1.0
                        weightdic, probtree = messagepassingu(
                            treeroot, nodeorder, nodehidden, nodeleaf, treedic,
                            textbyline, linerepeat, namelist, probsame)
                        probtreeall = probtreeall + probtree
                        for ni in range(0, (len(nodeorder) - 1)):
                            for nj in range((ni + 1), len(nodeorder)):
                                weightmatrix[
                                    ni, nj] = weightmatrix[ni, nj] + weightdic[
                                        (nodeorder[ni], nodeorder[nj])]
                                weightmatrix[nj, ni] = weightmatrix[ni, nj]
                #add noise to weight matrix
                for ni in range(len(nodeorder)):
                    for nj in range(len(nodeorder)):
                        if ni != nj:
                            #add noise or not
                            #|<
                            #(1)with noise
                            weightmatrixwithnoise[
                                ni, nj] = weightmatrix[ni, nj] + random.gauss(
                                    0, sigma)
                            #(2)without noise
                            #weightmatrixwithnoise[ni,nj] = weightmatrix[ni,nj]
                            #>|
                            weightmatrixwithnoise[
                                nj, ni] = weightmatrixwithnoise[ni, nj]
                        else:
                            weightmatrixwithnoise[ni, ni] = float('Inf')
                #update tree by mst
                treeroot, nodeorder, nodeleaf, treedic = mst(
                    weightmatrixwithnoise, weightmatrixindex)
                #calculate qscore
                qscore = 0.0
                for nodei in nodeorder[0:(-1)]:
                    qscore = qscore + weightmatrix[
                        weightmatrixindex.index(nodei),
                        weightmatrixindex.index(treedic[nodei]['parent'][0])]
                #save results in logvector
                logvector[0].append(iteration)
                logvector[1].append(sigma)
                logvector[2].append(qscore)
                logvector[3].append(probtreeall)
                #print time
                if printtime and (iteration < 4):
                    timeend = time.time()
                    print('The time for iteration ' + str(iteration) + ' is ' +
                          str(timeend - timestart))
                #if stopsign == 0:
                #|<
                #(1)
                #plot tree with removed hiddden nodes
                #treedicremoved,nodehiddenremoved = removehidden(nodehidden,treedic)
                #treetodot(treedicremoved,treedicremoved.keys(),nodehiddenremoved, resfoldertree, str(iteration).zfill(4))
                #(2)
                #plot tree with NO removed hiddden nodes
                #treetodot(treedic,treedic.keys(),nodehidden, resfoldertree, str(iteration).zfill(4)+'_withhidden')
                #>|
                #test converge
                if (iteration > 10) and (iteration < iterationmax):
                    if stopsign == 1:
                        print('stop at ' + str(iteration - 1) + '\n')
                        break
                    if (abs(logvector[2][-2] - logvector[2][-3]) < 0.001) and (
                            abs(logvector[2][-1] - logvector[2][-2]) < 0.001):
                        stopsign = 1
                        treediclastbackup = copy.deepcopy(treedic)
                if (iteration == iterationmax) and (stopsign == 0):
                    print('stop at ' + str(iteration - 1) + '\n')
                    treediclastbackup = copy.deepcopy(treedicprevious)

                #find the iteration with best probtreeall
                if probtreeall > probtreeallbest:
                    treedicbest = copy.deepcopy(treedicprevious)
                    nodeorderold = nodeorder
                    probtreeallbest = probtreeall
                    bestiteration = iteration - 1
                    treedicremoved, nodehiddenremoved = removehidden(
                        nodehidden, treedicbest)
                    #treetodot(treedicremoved,treedicremoved.keys(),nodehiddenremoved, resfoldertree, str(iteration-1).zfill(4))
            #important>
            #update sigma
                if iteration >= 2:
                    sigma = sigma0 * (
                        (1.0 - float(iteration) / float(iterationmax))**2.0
                    )  #<parameter>
                elif iteration == 1:
                    sigma0 = 0.1 * max(abs(weightmatrix.min()),
                                       abs(weightmatrix.max()))
                    sigma = sigma0
            #save last and best tree
            #|<
            #(1)
            #without hidden nodes
            treediclast, nodehiddenlast = removehidden(nodehidden,
                                                       treediclastbackup)
            #treetodot(treediclast ,treediclast.keys(),nodehiddenlast, resfoldertree,'treelast')
            tree2newick(treediclast, treediclast.keys(), nodehiddenlast,
                        resfoldertree, 'treelast')
            treedicbestrh, nodehiddenbest = removehidden(
                nodehidden, treedicbest)
            #treetodot(treedicbestrh ,treedicbestrh.keys(),nodehiddenbest, resfoldertree,'treebest')
            tree2newick(treedicbestrh, treedicbestrh.keys(), nodehiddenbest,
                        resfoldertree, 'treebest')
            from Stemweb.algorithms.utils import newick2img
            newick2img(self.newick_path, self.image_path, False, radial=False)
            #(2)
            #with hidden nodes
            #treetodot(treediclastbackup, treediclastbackup.keys(), nodehidden, resfoldertree,'treelast_withhidden')
            #treetodot(treedicbestrh, treedicbestrh.keys(), nodehidden, resfoldertree,'treebest_withhidden')
            #>|
            #save log
            logstr = logstr + 'End at ' + str(
                time.gmtime()) + '\n' + 'best iteration is ' + str(
                    bestiteration) + '\n' + 'best probability is ' + str(
                        probtreeallbest) + '\n\n\n'
            #print (resfolder)
            #print (logstr)
            #print ('_________________________')
            inumber = len(logvector)
            jnumber = iteration + 1
            for j in range(jnumber):
                for i in range(inumber):
                    logstr = logstr + str(logvector[i][j]) + '\t'
                logstr = logstr.strip() + '\n'
            logstr = logstr.strip()
            file = open(os.path.join(resfolder, 'log'), 'w')
            file.write(logstr)
            file.close()
            return (qscore, bestiteration)