def findTrueAverageTableAnchoringAddDistancesOverall(frq, anch, list_taxa, N,method, met): tm.tic() [TotalKeyf,_]=initializeQuartetTable( anch, list_taxa) anch = sorted(list(anch)) lst_taxa = list_taxa.keys() TotalKey = dict() n = len(lst_taxa) skipClades = N for i in range(0, n): if lst_taxa[i] in skipClades: continue for j in range(i+1, n): if lst_taxa[j] in skipClades: continue l = sorted([lst_taxa[i], lst_taxa[j], anch[0], anch[1]]) key_inv = "/".join(l) key_orig = genKey(anch,sorted([lst_taxa[i],lst_taxa[j]])) v = frq[key_orig] if len(v) == 1: v.append(1) if key_inv in TotalKey: vt = TotalKey[key_inv] vt[0] += v[0] vt[1] += v[1] else: vt = list() vt = v TotalKey[key_inv] = vt for q, v2 in TotalKey.iteritems(): vtt = v2[0]/v2[1] TotalKeyf[q] = vtt tm.toc() return TotalKeyf
def findAllChildrenPairsOverall(listTaxa,taxaDict,L1,L2,debugFlag): listTaxaLabels = list() if debugFlag: tm.tic() for t in listTaxa: listTaxaLabels.append(taxaDict[t.taxon.label]) if debugFlag: print "Time to find indeces" tm.toc() return listTaxaLabels
def findAllChildrenPairs(listTaxa,taxaDict,debugFlag): listTaxaLabels = [0 for _ in range(len(listTaxa))] i = 0 if debugFlag: tm.tic() for t in listTaxa: listTaxaLabels[i] = taxaDict[t.taxon.label] i += 1 if debugFlag: print "Time to find indeces" tm.toc() return listTaxaLabels
def test_pyomo_with_sort(self): n = 100 p=1 normal1 = np.random.randn(n) normal2 = np.random.randn(n) uniform1 = np.random.rand(n) uniform2 = np.random.rand(n) linearprog = np.asarray(range(n)) / n U = linearprog V = normal1 iter = [] for i in range(n): for j in range(n): iter.append((i, j)) print('Unsorted') print('EMD sort') tic() print(emd_sort(U, V,p)) toc() print('EMD pyomo') tic() print(emd_pyomo(U, V,p)[0]) toc() print(' ') print('EMD sort') tic() print(emd_sort(np.sort(U), np.sort(V),p)) toc() print("sorted") print('EMD pyomo') tic() print(emd_pyomo(np.sort(U),np.sort(V),p)[0]) toc()
def test_quick(self): print('Warning : this code must be called with runner.py') # Copy this code at the beginning of copula_test to see if it works # And enter python3 runner.py copula_experiments/run_test.txt gosm_options.set_globals() # Create output directory. if not (os.path.isdir(gosm_options.output_directory)): os.mkdir(gosm_options.output_directory) X = np.arange(300) tic() mydistr = UnivariateEpiSplineDistribution(X) for i in range(10): print(mydistr.cdf(i)) toc()
def addQuartets( ch, listTaxa,Q,taxaDict,debugFlag): if debugFlag: tm.tic() pairs = findAllChildrenPairs(listTaxa,taxaDict) if debugFlag: print "Time to find all pairs: " tm.toc() if debugFlag: tm.tic() print "length of these pairs is: "+str(len(pairs)*(len(pairs)-1)/2) for i in range(0,len(pairs)): for j in range(i+1,len(pairs)): Q[pairs[i]][pairs[j]] += 1 Q[pairs[j]][pairs[i]] += 1 if debugFlag: print "Time to add found quartets to the dictionary: " tm.toc() return
def findTrueAverageTableAnchoringAddDistancesOverallFromFile(frq, anch, list_taxa, N,method, met): tm.tic() [TotalKeyf,_]=initializeQuartetTable( anch, list_taxa) anch = sorted(list(anch)) lst_taxa = list_taxa.keys() TotalKey = dict() n = len(lst_taxa) skipClades = N for i in range(0, n): if lst_taxa[i] in skipClades: continue for j in range(i+1, n): if lst_taxa[j] in skipClades: continue for taxon_i in list_taxa[lst_taxa[i]]: for taxon_j in list_taxa[lst_taxa[j]]: lab_taxon_i = taxon_i lab_taxon_j = taxon_j p = sorted([lab_taxon_i,lab_taxon_j]) key_orig = genKey(p,anch) l = sorted([lst_taxa[i], lst_taxa[j], anch[0], anch[1]]) key_inv = "/".join(l) v = frq[key_orig] if len(v) == 1: v.append(1) else: v[0] -= 0.5 v[1] -= 1.5 if key_inv in TotalKey: vt = TotalKey[key_inv] vt[0] += v[0] vt[1] += v[1] else: vt = list() vt = v TotalKey[key_inv] = vt for q, v2 in TotalKey.iteritems(): vtt = (v2[0]+0.5)/(v2[1]+1.5) TotalKeyf[q] = vtt tm.toc() return TotalKeyf
def addQuartetsAnchoredOverall(listTaxa,Q,taxaDict,e,anch,L1,L2,m,debugFlag): if debugFlag: tm.tic() pairs = findAllChildrenPairsOverall(listTaxa,taxaDict,L1,L2,debugFlag) if debugFlag: print "Time to find all pairs: " tm.toc() if debugFlag: tm.tic() print "length of these pairs is: "+str(len(pairs)*(len(pairs)-1)/2) l = countNum(pairs,m) for i in range(0,len(l)): if l[i] == 0: continue for j in range(i+1,len(l)): if l[j] == 0: continue Q[i][j] += l[i]*l[j] if debugFlag: print "Time to add found quartets to the dictionary: " tm.toc() return return
def findAnchoredQuartetsOverall(anchPoly, trees,taxa, outpath,debugFlag): n = len(trees) # if debugFlag: [_,_,anch,_,_] = anchPoly[0] anch = sorted(anch) frq = list() if debugFlag: tm.tic() Q = list() T = list() taxaDict = list() L1 = list() L2 = list() e = list() m = list() clades = list() listPoly = list() taxa_list = list() taxa_inv = list() C = list() for s in range(len(anchPoly)): [ett,Ctt,anch,taxa_listtt,taxa_invtt] = anchPoly[s] [Qt,Tt,taxaDictt,cladest,L1t,L2t,mt,listPolyt] = buildEmptyQuartetsOverall(anch,taxa_listtt,taxa_invtt,Ctt,ett,taxa,n) Q.append(Qt) T.append(Tt) taxa_inv.append(taxa_invtt) taxa_list.append(taxa_listtt) taxaDict.append(taxaDictt) clades.append(cladest) L1.append(L1t) L2.append(L2t) e.append(ett) m.append(mt) C.append(Ctt) listPoly.append(listPolyt) if debugFlag: print "Initializing arrays takes: " tm.toc() for tree in trees: rerooted=reroot(tree,anch) # tm.toc() node = rerooted[0] root = rerooted[1] if debugFlag: tm.tic() # listTaxaTmp=list() while(node.parent_node is not root): if debugFlag: tm.tic() node_pre = node # if node_pre.is_leaf(): # listTaxaTmp.append(node_pre) # node = node.parent_node if debugFlag: print "finding children of this node takes: " tm.tic() chs = node.child_nodes() if debugFlag: tm.toc() chs_n = len(chs) if len(chs)>2: for i in range(0,chs_n): ch = chs[i] if (ch == node_pre): continue else: if debugFlag: tm.tic() listTaxa = ch.leaf_nodes() if debugFlag: print "adding quartets around this node takes (more than 2 children): " for s in range(0,len(anchPoly)): addQuartetsAnchoredOverall(listTaxa,Q[s],taxaDict[s],e[s],anch,L1[s],L2[s],m[s],debugFlag) if debugFlag: tm.toc() for j in range(i+1,chs_n): if (chs[i] == chs[j]) or (chs[j]==node_pre): continue else: if debugFlag: tm.tic() listTaxatmp = [listTaxa,chs[j].leaf_nodes()] for s in range(len(anchPoly)): removeFromQuartetLentreeshAnchoredOverall(T[s],listTaxatmp,taxaDict[s],e[s],m[s]) if debugFlag: print "adding quartets around this node takes (more than 2 children): " tm.toc() else: for ch in chs: if (ch==node_pre): continue else: if debugFlag: tm.tic() listTaxa = ch.leaf_nodes() if len(listTaxa) == 1: continue for s in range(0,len(anchPoly)): addQuartetsAnchoredOverall(listTaxa,Q[s],taxaDict[s],e[s],anch,L1[s],L2[s],m[s],debugFlag) if debugFlag: print "adding quartets around this node takes (less than two children): " tm.toc() if debugFlag: print "finding quartets on this node is finished!" tm.toc() for s in range(len(anchPoly)): frqTmp=makeTrueFrqOverall(Q[s],T[s],clades[s],anch,C[s],listPoly[s]) h = [e[s],frqTmp,anch,taxa_list[s],C[s]] frq.append(h) if debugFlag: print "time for counting is: " tm.toc() return frq
def findAnchoredQuartets(anch,trees,taxa,out,debugFlag): anch = sorted(anch) n = len(trees) if debugFlag: tm.tic() [Q,T,taxaDict,taxaT] = buildEmptyQuartets(anch,taxa,n) if debugFlag: print "Initializing arrays takes: " tm.toc() for tree in trees: # print "time for re-rooting is: " # tm.tic() rerooted=reroot(tree,anch) # tm.toc() node = rerooted[0] root = rerooted[1] if debugFlag: tm.tic() # listTaxaTmp=list() while(node.parent_node is not root): if debugFlag: tm.tic() node_pre = node # if node_pre.is_leaf(): # listTaxaTmp.append(node_pre) # node = node.parent_node if debugFlag: print "finding children of this node takes: " tm.tic() chs = node.child_nodes() if debugFlag: tm.toc() chs_n = len(chs) if len(chs)>2: for i in range(0,chs_n): ch = chs[i] if (ch == node_pre): continue else: if debugFlag: tm.tic() listTaxa = ch.leaf_nodes() if debugFlag: print "adding quartets around this node takes (more than 2 children): " addQuartetsAnchored(ch, listTaxa,Q,taxaDict,debugFlag) if debugFlag: tm.toc() for j in range(i+1,chs_n): if (chs[i] == chs[j]) or (chs[j]==node_pre): continue else: if debugFlag: tm.tic() listTaxatmp = [listTaxa,chs[j].leaf_nodes()] removeFromQuartetLentreeshAnchored(T,listTaxatmp,taxaDict) if debugFlag: print "adding quartets around this node takes (more than 2 children): " tm.toc() else: for ch in chs: if (ch==node_pre): continue else: if debugFlag: tm.tic() listTaxa = ch.leaf_nodes() addQuartetsAnchored(ch, listTaxa,Q,taxaDict,debugFlag) if debugFlag: print "adding quartets around this node takes (less than two children): " tm.toc() if debugFlag: print "finding quartets on this node is finished!" tm.toc() if debugFlag: print "time for counting is: " tm.toc() frq=makeTrueFrq(Q,T,taxaT,anch) return frq
def print_contours(df=pd.DataFrame.from_csv(data_file),dimkeys=dimkeys,marginal_string = marginal_string,copula_list=copula_list,segment_marginal=None): """ print the contours of the pdf of the copula object fitting data from df """ df =df.convert_objects(convert_numeric=True) subset = [] for i in dimkeys: subset.append(i) subset.append('FH' + i) df = df.dropna(axis=0, how='any', subset=subset) mydt = df.index[len(df.index)-1] input = dict.fromkeys(dimkeys) for i in dimkeys: input[i] = df[i].values.tolist() if segment_marginal == 'segmented': input = dict.fromkeys(dimkeys) for i in dimkeys: segmented_df = segmenter.OuterSegmenter(df.loc[df.index < mydt],df, 'copula_experiments/segment_input_wind_FH' + str(i) + '.txt', mydt).retval_dataframe() input[i] = segmented_df[i].values.tolist() else: input = dict.fromkeys(dimkeys) for i in dimkeys: input[i] = df[i].loc[df.index < mydt].values.tolist() marginals = dict.fromkeys(dimkeys) for i in dimkeys: marg_class = distribution_factory(marginal_string) marginals[i] = marg_class(input[i]) distr_class = distribution_factory(copula_string) mydistr = distr_class(dimkeys, input, marginals) xedges = np.arange(-100, 100) yedges = np.arange(-100, 100) H, xedges, yedges = np.histogram2d(input[dimkeys[0]],input[dimkeys[1]], bins=(xedges, yedges)) H = H.T # Let each row list bins with common y range. plt.imshow(H, interpolation='nearest', origin='low', extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]]) plt.xlabel('Error at '+str(dimkeys[0])+' forecast') plt.ylabel('Errors at '+str(dimkeys[1])+ ' forecast') plt.title('Histogram or errors') plt.savefig('./copula_experiments/BPA/histogram2d_errors.png') plt.clf() for mydistr in copula_list: print(mydistr) distr_class = distribution_factory(mydistr) copula = distr_class(dimkeys,input,marginals) x, y = np.meshgrid(np.linspace(-100, 100, 100), np.linspace(-100, 100, 100)) z = np.zeros((len(x),len(y))) tic() for i in range(len(x)): for j in range(len(y)): z[i][j]= copula.pdf({dimkeys[0]:x[i][j],dimkeys[1]:y[i][j]}) toc() graphe = plt.contour(x, y, z, 10) plt.xlabel('Errors at '+str(dimkeys[0])) plt.ylabel('Errors at '+str(dimkeys[1])) plt.title('PDF Contours with '+mydistr+' and '+marginal_string+' marginals') plt.savefig('./copula_experiments/BPA/pdf_contours_'+mydistr+'_'+marginal_string+'.png') plt.clf()
def findTrueAverageTableAnchoringAddDistances(frq, anch, list_taxa, N,method, met): tm.tic() [TotalKeyf,_]=initializeQuartetTable( anch, list_taxa) anch = sorted(list(anch)) lst_taxa = list_taxa.keys() TotalKey = dict() n = len(lst_taxa) numG = max(v[1] for v in frq.values()) skipClades = N for i in range(0, n): if lst_taxa[i] in skipClades: continue for j in range(i+1, n): if lst_taxa[j] in skipClades: continue for taxon_i in list_taxa[lst_taxa[i]]: for taxon_j in list_taxa[lst_taxa[j]]: lab_taxon_i = taxon_i lab_taxon_j = taxon_j p = sorted([lab_taxon_i,lab_taxon_j]) key_orig = genKey(p,anch) l = sorted([lst_taxa[i], lst_taxa[j], anch[0], anch[1]]) key_inv = "/".join(l) if key_orig in frq: v = frq[key_orig] else: v = list() v.append(0.5) v.append(numG) v_inv = float(v[0])/v[1] if key_inv in TotalKey: if met == "freq": vt = TotalKey[key_inv] vt.append(v_inv) elif met == "log": vt = TotalKey[key_inv] vt.append(-np.log(1.*v_inv)) else: if met == "freq": vt = list() vt.append(v_inv) elif met == "log": vt = list() vt.append(-np.log(1.*v_inv)) TotalKey[key_inv] = vt for q, v2 in TotalKey.iteritems(): if met == "log": if method == "gmean": vtt = np.exp(-stats.gmean(v2)) elif method == "mean": vtt = np.exp(-mean(v2)) else: vtt = np.exp(-sqrt(mean(square(v2)))) elif met == "freq": if method == "gmean": vtt = (stats.gmean(v2)) elif method == "mean": vtt = (mean(v2)) else: vtt = (sqrt(mean(square(v2)))) TotalKeyf[q] = vtt tm.toc() return TotalKeyf
readFromFile = True else: readFromFile = False if ( not options.gt or not options.out): sys.exit("Please enter genetrees file, and output folder location") src_fpath = os.path.expanduser(os.path.expandvars(gt)) trees = dendropy.TreeList.get_from_path(src_fpath, 'newick') (converted_labels,new_labels) = tstt.changeLabelsToNumbers(trees,verbose) print "time to compute consensus is: " tm.tic() con_tree = trees.consensus(min_freq=thr) tm.toc() ftmpt=tempfile.mkstemp(suffix='.nwk', prefix="consensusTree", dir=outpath, text=None) con_tree2 = copy.deepcopy(con_tree) tstt.changeLabelsToNames(con_tree2,new_labels,verbose) con_tree2.write(path=ftmpt[1],schema="newick",suppress_rooting=True) os.close(ftmpt[0]) tstt.labelNodes(con_tree) (to_resolve,maxPolyOrder) = tstt.findPolytomies(con_tree) taxa = list() for e in con_tree.leaf_nodes(): taxa.append(e.taxon.label) n = len(con_tree.leaf_nodes())