def findTrueAverageTableAnchoringAddDistancesOverall(frq, anch, list_taxa, N,method, met): tm.tic() [TotalKeyf,_]=initializeQuartetTable( anch, list_taxa) anch = sorted(list(anch)) lst_taxa = list_taxa.keys() TotalKey = dict() n = len(lst_taxa) skipClades = N for i in range(0, n): if lst_taxa[i] in skipClades: continue for j in range(i+1, n): if lst_taxa[j] in skipClades: continue l = sorted([lst_taxa[i], lst_taxa[j], anch[0], anch[1]]) key_inv = "/".join(l) key_orig = genKey(anch,sorted([lst_taxa[i],lst_taxa[j]])) v = frq[key_orig] if len(v) == 1: v.append(1) if key_inv in TotalKey: vt = TotalKey[key_inv] vt[0] += v[0] vt[1] += v[1] else: vt = list() vt = v TotalKey[key_inv] = vt for q, v2 in TotalKey.iteritems(): vtt = v2[0]/v2[1] TotalKeyf[q] = vtt tm.toc() return TotalKeyf
def findAllChildrenPairsOverall(listTaxa,taxaDict,L1,L2,debugFlag): listTaxaLabels = list() if debugFlag: tm.tic() for t in listTaxa: listTaxaLabels.append(taxaDict[t.taxon.label]) if debugFlag: print "Time to find indeces" tm.toc() return listTaxaLabels
def traverseAndFindPoly(node,root,con_tree,setPolyNodes,debugFlag): to_cover = set() while(node.parent_node is not root): if debugFlag: tm.tic() node = node.parent_node if debugFlag: print "finding children of this node takes: " tm.tic() if node.label in setPolyNodes: to_cover.add(node.label) return to_cover
def findAllChildrenPairs(listTaxa,taxaDict,debugFlag): listTaxaLabels = [0 for _ in range(len(listTaxa))] i = 0 if debugFlag: tm.tic() for t in listTaxa: listTaxaLabels[i] = taxaDict[t.taxon.label] i += 1 if debugFlag: print "Time to find indeces" tm.toc() return listTaxaLabels
def test_pyomo_with_sort(self): n = 100 p=1 normal1 = np.random.randn(n) normal2 = np.random.randn(n) uniform1 = np.random.rand(n) uniform2 = np.random.rand(n) linearprog = np.asarray(range(n)) / n U = linearprog V = normal1 iter = [] for i in range(n): for j in range(n): iter.append((i, j)) print('Unsorted') print('EMD sort') tic() print(emd_sort(U, V,p)) toc() print('EMD pyomo') tic() print(emd_pyomo(U, V,p)[0]) toc() print(' ') print('EMD sort') tic() print(emd_sort(np.sort(U), np.sort(V),p)) toc() print("sorted") print('EMD pyomo') tic() print(emd_pyomo(np.sort(U),np.sort(V),p)[0]) toc()
def test_quick(self): print('Warning : this code must be called with runner.py') # Copy this code at the beginning of copula_test to see if it works # And enter python3 runner.py copula_experiments/run_test.txt gosm_options.set_globals() # Create output directory. if not (os.path.isdir(gosm_options.output_directory)): os.mkdir(gosm_options.output_directory) X = np.arange(300) tic() mydistr = UnivariateEpiSplineDistribution(X) for i in range(10): print(mydistr.cdf(i)) toc()
def addQuartets( ch, listTaxa,Q,taxaDict,debugFlag): if debugFlag: tm.tic() pairs = findAllChildrenPairs(listTaxa,taxaDict) if debugFlag: print "Time to find all pairs: " tm.toc() if debugFlag: tm.tic() print "length of these pairs is: "+str(len(pairs)*(len(pairs)-1)/2) for i in range(0,len(pairs)): for j in range(i+1,len(pairs)): Q[pairs[i]][pairs[j]] += 1 Q[pairs[j]][pairs[i]] += 1 if debugFlag: print "Time to add found quartets to the dictionary: " tm.toc() return
def findTrueAverageTableAnchoringAddDistancesOverallFromFile(frq, anch, list_taxa, N,method, met): tm.tic() [TotalKeyf,_]=initializeQuartetTable( anch, list_taxa) anch = sorted(list(anch)) lst_taxa = list_taxa.keys() TotalKey = dict() n = len(lst_taxa) skipClades = N for i in range(0, n): if lst_taxa[i] in skipClades: continue for j in range(i+1, n): if lst_taxa[j] in skipClades: continue for taxon_i in list_taxa[lst_taxa[i]]: for taxon_j in list_taxa[lst_taxa[j]]: lab_taxon_i = taxon_i lab_taxon_j = taxon_j p = sorted([lab_taxon_i,lab_taxon_j]) key_orig = genKey(p,anch) l = sorted([lst_taxa[i], lst_taxa[j], anch[0], anch[1]]) key_inv = "/".join(l) v = frq[key_orig] if len(v) == 1: v.append(1) else: v[0] -= 0.5 v[1] -= 1.5 if key_inv in TotalKey: vt = TotalKey[key_inv] vt[0] += v[0] vt[1] += v[1] else: vt = list() vt = v TotalKey[key_inv] = vt for q, v2 in TotalKey.iteritems(): vtt = (v2[0]+0.5)/(v2[1]+1.5) TotalKeyf[q] = vtt tm.toc() return TotalKeyf
def addQuartetsAnchoredOverall(listTaxa,Q,taxaDict,e,anch,L1,L2,m,debugFlag): if debugFlag: tm.tic() pairs = findAllChildrenPairsOverall(listTaxa,taxaDict,L1,L2,debugFlag) if debugFlag: print "Time to find all pairs: " tm.toc() if debugFlag: tm.tic() print "length of these pairs is: "+str(len(pairs)*(len(pairs)-1)/2) l = countNum(pairs,m) for i in range(0,len(l)): if l[i] == 0: continue for j in range(i+1,len(l)): if l[j] == 0: continue Q[i][j] += l[i]*l[j] if debugFlag: print "Time to add found quartets to the dictionary: " tm.toc() return return
def findAnchoredQuartetsOverall(anchPoly, trees,taxa, outpath,debugFlag): n = len(trees) # if debugFlag: [_,_,anch,_,_] = anchPoly[0] anch = sorted(anch) frq = list() if debugFlag: tm.tic() Q = list() T = list() taxaDict = list() L1 = list() L2 = list() e = list() m = list() clades = list() listPoly = list() taxa_list = list() taxa_inv = list() C = list() for s in range(len(anchPoly)): [ett,Ctt,anch,taxa_listtt,taxa_invtt] = anchPoly[s] [Qt,Tt,taxaDictt,cladest,L1t,L2t,mt,listPolyt] = buildEmptyQuartetsOverall(anch,taxa_listtt,taxa_invtt,Ctt,ett,taxa,n) Q.append(Qt) T.append(Tt) taxa_inv.append(taxa_invtt) taxa_list.append(taxa_listtt) taxaDict.append(taxaDictt) clades.append(cladest) L1.append(L1t) L2.append(L2t) e.append(ett) m.append(mt) C.append(Ctt) listPoly.append(listPolyt) if debugFlag: print "Initializing arrays takes: " tm.toc() for tree in trees: rerooted=reroot(tree,anch) # tm.toc() node = rerooted[0] root = rerooted[1] if debugFlag: tm.tic() # listTaxaTmp=list() while(node.parent_node is not root): if debugFlag: tm.tic() node_pre = node # if node_pre.is_leaf(): # listTaxaTmp.append(node_pre) # node = node.parent_node if debugFlag: print "finding children of this node takes: " tm.tic() chs = node.child_nodes() if debugFlag: tm.toc() chs_n = len(chs) if len(chs)>2: for i in range(0,chs_n): ch = chs[i] if (ch == node_pre): continue else: if debugFlag: tm.tic() listTaxa = ch.leaf_nodes() if debugFlag: print "adding quartets around this node takes (more than 2 children): " for s in range(0,len(anchPoly)): addQuartetsAnchoredOverall(listTaxa,Q[s],taxaDict[s],e[s],anch,L1[s],L2[s],m[s],debugFlag) if debugFlag: tm.toc() for j in range(i+1,chs_n): if (chs[i] == chs[j]) or (chs[j]==node_pre): continue else: if debugFlag: tm.tic() listTaxatmp = [listTaxa,chs[j].leaf_nodes()] for s in range(len(anchPoly)): removeFromQuartetLentreeshAnchoredOverall(T[s],listTaxatmp,taxaDict[s],e[s],m[s]) if debugFlag: print "adding quartets around this node takes (more than 2 children): " tm.toc() else: for ch in chs: if (ch==node_pre): continue else: if debugFlag: tm.tic() listTaxa = ch.leaf_nodes() if len(listTaxa) == 1: continue for s in range(0,len(anchPoly)): addQuartetsAnchoredOverall(listTaxa,Q[s],taxaDict[s],e[s],anch,L1[s],L2[s],m[s],debugFlag) if debugFlag: print "adding quartets around this node takes (less than two children): " tm.toc() if debugFlag: print "finding quartets on this node is finished!" tm.toc() for s in range(len(anchPoly)): frqTmp=makeTrueFrqOverall(Q[s],T[s],clades[s],anch,C[s],listPoly[s]) h = [e[s],frqTmp,anch,taxa_list[s],C[s]] frq.append(h) if debugFlag: print "time for counting is: " tm.toc() return frq
def findAnchoredQuartets(anch,trees,taxa,out,debugFlag): anch = sorted(anch) n = len(trees) if debugFlag: tm.tic() [Q,T,taxaDict,taxaT] = buildEmptyQuartets(anch,taxa,n) if debugFlag: print "Initializing arrays takes: " tm.toc() for tree in trees: # print "time for re-rooting is: " # tm.tic() rerooted=reroot(tree,anch) # tm.toc() node = rerooted[0] root = rerooted[1] if debugFlag: tm.tic() # listTaxaTmp=list() while(node.parent_node is not root): if debugFlag: tm.tic() node_pre = node # if node_pre.is_leaf(): # listTaxaTmp.append(node_pre) # node = node.parent_node if debugFlag: print "finding children of this node takes: " tm.tic() chs = node.child_nodes() if debugFlag: tm.toc() chs_n = len(chs) if len(chs)>2: for i in range(0,chs_n): ch = chs[i] if (ch == node_pre): continue else: if debugFlag: tm.tic() listTaxa = ch.leaf_nodes() if debugFlag: print "adding quartets around this node takes (more than 2 children): " addQuartetsAnchored(ch, listTaxa,Q,taxaDict,debugFlag) if debugFlag: tm.toc() for j in range(i+1,chs_n): if (chs[i] == chs[j]) or (chs[j]==node_pre): continue else: if debugFlag: tm.tic() listTaxatmp = [listTaxa,chs[j].leaf_nodes()] removeFromQuartetLentreeshAnchored(T,listTaxatmp,taxaDict) if debugFlag: print "adding quartets around this node takes (more than 2 children): " tm.toc() else: for ch in chs: if (ch==node_pre): continue else: if debugFlag: tm.tic() listTaxa = ch.leaf_nodes() addQuartetsAnchored(ch, listTaxa,Q,taxaDict,debugFlag) if debugFlag: print "adding quartets around this node takes (less than two children): " tm.toc() if debugFlag: print "finding quartets on this node is finished!" tm.toc() if debugFlag: print "time for counting is: " tm.toc() frq=makeTrueFrq(Q,T,taxaT,anch) return frq
def print_contours(df=pd.DataFrame.from_csv(data_file),dimkeys=dimkeys,marginal_string = marginal_string,copula_list=copula_list,segment_marginal=None): """ print the contours of the pdf of the copula object fitting data from df """ df =df.convert_objects(convert_numeric=True) subset = [] for i in dimkeys: subset.append(i) subset.append('FH' + i) df = df.dropna(axis=0, how='any', subset=subset) mydt = df.index[len(df.index)-1] input = dict.fromkeys(dimkeys) for i in dimkeys: input[i] = df[i].values.tolist() if segment_marginal == 'segmented': input = dict.fromkeys(dimkeys) for i in dimkeys: segmented_df = segmenter.OuterSegmenter(df.loc[df.index < mydt],df, 'copula_experiments/segment_input_wind_FH' + str(i) + '.txt', mydt).retval_dataframe() input[i] = segmented_df[i].values.tolist() else: input = dict.fromkeys(dimkeys) for i in dimkeys: input[i] = df[i].loc[df.index < mydt].values.tolist() marginals = dict.fromkeys(dimkeys) for i in dimkeys: marg_class = distribution_factory(marginal_string) marginals[i] = marg_class(input[i]) distr_class = distribution_factory(copula_string) mydistr = distr_class(dimkeys, input, marginals) xedges = np.arange(-100, 100) yedges = np.arange(-100, 100) H, xedges, yedges = np.histogram2d(input[dimkeys[0]],input[dimkeys[1]], bins=(xedges, yedges)) H = H.T # Let each row list bins with common y range. plt.imshow(H, interpolation='nearest', origin='low', extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]]) plt.xlabel('Error at '+str(dimkeys[0])+' forecast') plt.ylabel('Errors at '+str(dimkeys[1])+ ' forecast') plt.title('Histogram or errors') plt.savefig('./copula_experiments/BPA/histogram2d_errors.png') plt.clf() for mydistr in copula_list: print(mydistr) distr_class = distribution_factory(mydistr) copula = distr_class(dimkeys,input,marginals) x, y = np.meshgrid(np.linspace(-100, 100, 100), np.linspace(-100, 100, 100)) z = np.zeros((len(x),len(y))) tic() for i in range(len(x)): for j in range(len(y)): z[i][j]= copula.pdf({dimkeys[0]:x[i][j],dimkeys[1]:y[i][j]}) toc() graphe = plt.contour(x, y, z, 10) plt.xlabel('Errors at '+str(dimkeys[0])) plt.ylabel('Errors at '+str(dimkeys[1])) plt.title('PDF Contours with '+mydistr+' and '+marginal_string+' marginals') plt.savefig('./copula_experiments/BPA/pdf_contours_'+mydistr+'_'+marginal_string+'.png') plt.clf()
def findTrueAverageTableAnchoringAddDistances(frq, anch, list_taxa, N,method, met): tm.tic() [TotalKeyf,_]=initializeQuartetTable( anch, list_taxa) anch = sorted(list(anch)) lst_taxa = list_taxa.keys() TotalKey = dict() n = len(lst_taxa) numG = max(v[1] for v in frq.values()) skipClades = N for i in range(0, n): if lst_taxa[i] in skipClades: continue for j in range(i+1, n): if lst_taxa[j] in skipClades: continue for taxon_i in list_taxa[lst_taxa[i]]: for taxon_j in list_taxa[lst_taxa[j]]: lab_taxon_i = taxon_i lab_taxon_j = taxon_j p = sorted([lab_taxon_i,lab_taxon_j]) key_orig = genKey(p,anch) l = sorted([lst_taxa[i], lst_taxa[j], anch[0], anch[1]]) key_inv = "/".join(l) if key_orig in frq: v = frq[key_orig] else: v = list() v.append(0.5) v.append(numG) v_inv = float(v[0])/v[1] if key_inv in TotalKey: if met == "freq": vt = TotalKey[key_inv] vt.append(v_inv) elif met == "log": vt = TotalKey[key_inv] vt.append(-np.log(1.*v_inv)) else: if met == "freq": vt = list() vt.append(v_inv) elif met == "log": vt = list() vt.append(-np.log(1.*v_inv)) TotalKey[key_inv] = vt for q, v2 in TotalKey.iteritems(): if met == "log": if method == "gmean": vtt = np.exp(-stats.gmean(v2)) elif method == "mean": vtt = np.exp(-mean(v2)) else: vtt = np.exp(-sqrt(mean(square(v2)))) elif met == "freq": if method == "gmean": vtt = (stats.gmean(v2)) elif method == "mean": vtt = (mean(v2)) else: vtt = (sqrt(mean(square(v2)))) TotalKeyf[q] = vtt tm.toc() return TotalKeyf
if options.filename: readFromFile = True else: readFromFile = False if ( not options.gt or not options.out): sys.exit("Please enter genetrees file, and output folder location") src_fpath = os.path.expanduser(os.path.expandvars(gt)) trees = dendropy.TreeList.get_from_path(src_fpath, 'newick') (converted_labels,new_labels) = tstt.changeLabelsToNumbers(trees,verbose) print "time to compute consensus is: " tm.tic() con_tree = trees.consensus(min_freq=thr) tm.toc() ftmpt=tempfile.mkstemp(suffix='.nwk', prefix="consensusTree", dir=outpath, text=None) con_tree2 = copy.deepcopy(con_tree) tstt.changeLabelsToNames(con_tree2,new_labels,verbose) con_tree2.write(path=ftmpt[1],schema="newick",suppress_rooting=True) os.close(ftmpt[0]) tstt.labelNodes(con_tree) (to_resolve,maxPolyOrder) = tstt.findPolytomies(con_tree) taxa = list() for e in con_tree.leaf_nodes():
import timer #modeling from tensorflow.keras.utils import plot_model from tensorflow.keras.models import Model from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, concatenate, Conv2DTranspose from tensorflow.keras.optimizers import Adam from tensorflow.keras.callbacks import ModelCheckpoint from tensorflow.keras.callbacks import EarlyStopping from tensorflow.keras.callbacks import CSVLogger from tensorflow.keras.callbacks import TensorBoard mpl.rcParams['figure.figsize'] = (10, 10) timer.tic() data_path = '/home/jliu0604/AML/satelite_image/data' PATCH_SIZE = 160 def superimpose_stlite_mask(st_img, mask, color=(10, 0, 0)): #normalize image and select only three channels to display st_normed = 255.0 * st_img / st_img.max() # create color mask using RGB channles colored_mask = np.stack( [mask * color[0], mask * color[1], mask * color[2]]) # combine the colored_mask and st_img together combined = (st_normed + colored_mask).clip(0, 255).astype(np.uint8) return combined