def findTrueAverageTableAnchoringAddDistancesOverall(frq, anch, list_taxa, N,method, met):
    tm.tic()
    [TotalKeyf,_]=initializeQuartetTable( anch, list_taxa)
    anch = sorted(list(anch))
    lst_taxa = list_taxa.keys()
    TotalKey = dict()

    n = len(lst_taxa)
    skipClades = N
    for i in range(0, n):
        if lst_taxa[i] in skipClades:
            continue
        for j in range(i+1, n):
            if lst_taxa[j] in skipClades:
                continue
            l = sorted([lst_taxa[i], lst_taxa[j], anch[0], anch[1]])
            key_inv = "/".join(l)
            key_orig = genKey(anch,sorted([lst_taxa[i],lst_taxa[j]]))
            v = frq[key_orig]
            if len(v) == 1:
                v.append(1)   
            if key_inv in TotalKey:
                    vt = TotalKey[key_inv]
                    vt[0] += v[0]
                    vt[1] += v[1]
            else:
                    vt = list()
                    vt = v
            TotalKey[key_inv] = vt
    for q, v2 in TotalKey.iteritems():
        vtt = v2[0]/v2[1]
        TotalKeyf[q] = vtt
    tm.toc()
    return TotalKeyf
def findAllChildrenPairsOverall(listTaxa,taxaDict,L1,L2,debugFlag):
    listTaxaLabels = list()
    if debugFlag:
        tm.tic()
    for t in listTaxa:
        
        listTaxaLabels.append(taxaDict[t.taxon.label])
    if debugFlag:
        print "Time to find indeces"
        tm.toc()
    return listTaxaLabels
def traverseAndFindPoly(node,root,con_tree,setPolyNodes,debugFlag):
    to_cover = set()
    while(node.parent_node is not root):
        if debugFlag:
            tm.tic()
        node = node.parent_node
        if debugFlag:
            print "finding children of this node takes: "
            tm.tic()
        if node.label in setPolyNodes:
            to_cover.add(node.label)
    return to_cover
def findAllChildrenPairs(listTaxa,taxaDict,debugFlag):
    listTaxaLabels = [0 for _ in range(len(listTaxa))]
    i = 0
    if debugFlag:
            tm.tic()
    for t in listTaxa:
        listTaxaLabels[i] = taxaDict[t.taxon.label]
        i += 1
    if debugFlag:
        print "Time to find indeces"
        tm.toc()
    return listTaxaLabels
Exemple #5
0
    def test_pyomo_with_sort(self):
        n = 100
        p=1
        normal1 = np.random.randn(n)
        normal2 = np.random.randn(n)
        uniform1 = np.random.rand(n)
        uniform2 = np.random.rand(n)
        linearprog = np.asarray(range(n)) / n
        U = linearprog
        V = normal1

        iter = []
        for i in range(n):
            for j in range(n):
                iter.append((i, j))
        print('Unsorted')
        print('EMD sort')
        tic()
        print(emd_sort(U, V,p))
        toc()
        print('EMD pyomo')
        tic()
        print(emd_pyomo(U, V,p)[0])
        toc()
        print(' ')
        print('EMD sort')
        tic()
        print(emd_sort(np.sort(U), np.sort(V),p))
        toc()
        print("sorted")
        print('EMD pyomo')
        tic()
        print(emd_pyomo(np.sort(U),np.sort(V),p)[0])
        toc()
Exemple #6
0
    def test_quick(self):
        print('Warning : this code must be called with runner.py')
        # Copy this code at the beginning of copula_test to see if it works
        # And enter python3 runner.py copula_experiments/run_test.txt
        gosm_options.set_globals()

        # Create output directory.
        if not (os.path.isdir(gosm_options.output_directory)):
            os.mkdir(gosm_options.output_directory)

        X = np.arange(300)
        tic()
        mydistr = UnivariateEpiSplineDistribution(X)
        for i in range(10):
            print(mydistr.cdf(i))
        toc()
def addQuartets( ch, listTaxa,Q,taxaDict,debugFlag):
    if debugFlag:
        tm.tic()
    pairs = findAllChildrenPairs(listTaxa,taxaDict)
    if debugFlag:
        print "Time to find all pairs: "
        tm.toc()
    if debugFlag:
        tm.tic()
        print "length of these pairs is: "+str(len(pairs)*(len(pairs)-1)/2)
    for i in range(0,len(pairs)):
        for j in range(i+1,len(pairs)):
            Q[pairs[i]][pairs[j]] += 1
            Q[pairs[j]][pairs[i]] += 1
    if debugFlag:
        print "Time to add found quartets to the dictionary: "
        tm.toc()
    return 
def findTrueAverageTableAnchoringAddDistancesOverallFromFile(frq, anch, list_taxa, N,method, met):
    tm.tic()
    [TotalKeyf,_]=initializeQuartetTable( anch, list_taxa)
    anch = sorted(list(anch))
    lst_taxa = list_taxa.keys()
    TotalKey = dict()
    n = len(lst_taxa)
    skipClades = N
    for i in range(0, n):
        if lst_taxa[i] in skipClades:
            continue
        for j in range(i+1, n):
            if lst_taxa[j] in skipClades:
                continue
            for taxon_i in list_taxa[lst_taxa[i]]:
                for taxon_j in list_taxa[lst_taxa[j]]:
                    lab_taxon_i = taxon_i
                    lab_taxon_j = taxon_j
                    p = sorted([lab_taxon_i,lab_taxon_j])
                    key_orig = genKey(p,anch)
                    l = sorted([lst_taxa[i], lst_taxa[j], anch[0], anch[1]])
                    key_inv = "/".join(l)        
                    v = frq[key_orig]
                    if len(v) == 1:
                        v.append(1)
                    else:
                        v[0] -= 0.5
                        v[1] -= 1.5   
                    if key_inv in TotalKey:
                            vt = TotalKey[key_inv]
                            vt[0] += v[0]
                            vt[1] += v[1]
                    else:
                            vt = list()
                            vt = v
                    TotalKey[key_inv] = vt
    for q, v2 in TotalKey.iteritems():
        vtt = (v2[0]+0.5)/(v2[1]+1.5)
        TotalKeyf[q] = vtt
    tm.toc()
    return TotalKeyf
def addQuartetsAnchoredOverall(listTaxa,Q,taxaDict,e,anch,L1,L2,m,debugFlag):
    if debugFlag:
        tm.tic()
    pairs = findAllChildrenPairsOverall(listTaxa,taxaDict,L1,L2,debugFlag)
    if debugFlag:
        print "Time to find all pairs: "
        tm.toc()
    if debugFlag:
        tm.tic()
        print "length of these pairs is: "+str(len(pairs)*(len(pairs)-1)/2)
    l = countNum(pairs,m)
    for i in range(0,len(l)):
        if l[i] == 0:
            continue
        for j in range(i+1,len(l)):
            if l[j] == 0:
                continue
            Q[i][j] += l[i]*l[j]
    if debugFlag:
        print "Time to add found quartets to the dictionary: "
        tm.toc()
    return 
    
    return
def findAnchoredQuartetsOverall(anchPoly, trees,taxa, outpath,debugFlag):
    n = len(trees)
#     if debugFlag:
    [_,_,anch,_,_]  = anchPoly[0]
    anch = sorted(anch)
    frq = list()
    if debugFlag:
        tm.tic()
    Q = list()
    T = list()
    taxaDict = list()
    L1 = list()
    L2 = list()
    e = list()
    m = list()
    clades = list()
    listPoly = list()
    taxa_list = list()
    taxa_inv = list()
    C = list()
    for s in range(len(anchPoly)):
        [ett,Ctt,anch,taxa_listtt,taxa_invtt]  = anchPoly[s]
        [Qt,Tt,taxaDictt,cladest,L1t,L2t,mt,listPolyt] = buildEmptyQuartetsOverall(anch,taxa_listtt,taxa_invtt,Ctt,ett,taxa,n)
        Q.append(Qt)
        T.append(Tt)
        taxa_inv.append(taxa_invtt)
        taxa_list.append(taxa_listtt)
        taxaDict.append(taxaDictt)
        clades.append(cladest)
        L1.append(L1t)
        L2.append(L2t)
        e.append(ett)
        m.append(mt)
        C.append(Ctt)
        listPoly.append(listPolyt)
    if debugFlag:
        print "Initializing arrays takes: "
        tm.toc()
    for tree in trees:
        rerooted=reroot(tree,anch)
#         tm.toc()
        node = rerooted[0]
        root = rerooted[1]
        
        if debugFlag:
            tm.tic()
#         listTaxaTmp=list()
        while(node.parent_node is not root):
            if debugFlag:
                tm.tic()
            node_pre = node
#             if node_pre.is_leaf():
#                 listTaxaTmp.append(node_pre)
#                 
            node = node.parent_node
            if debugFlag:
                print "finding children of this node takes: "
                tm.tic()
            chs = node.child_nodes()
            if debugFlag:
                tm.toc()
            chs_n = len(chs)
            
            if len(chs)>2:
                for i in range(0,chs_n):
                    ch = chs[i]
                    if (ch == node_pre):
                        continue
                    else:
                        if debugFlag:
                            tm.tic()    
                        listTaxa = ch.leaf_nodes()
                        if debugFlag:
                            print "adding quartets around this node takes (more than 2 children): "
                        for s in range(0,len(anchPoly)):
                            addQuartetsAnchoredOverall(listTaxa,Q[s],taxaDict[s],e[s],anch,L1[s],L2[s],m[s],debugFlag)
                        if debugFlag:
                            tm.toc()
                    for j in range(i+1,chs_n):
                        if (chs[i] == chs[j]) or (chs[j]==node_pre):
                            continue
                        else:
                            if debugFlag:
                                tm.tic()
                            listTaxatmp = [listTaxa,chs[j].leaf_nodes()]
                            for s in range(len(anchPoly)):
                                removeFromQuartetLentreeshAnchoredOverall(T[s],listTaxatmp,taxaDict[s],e[s],m[s])
                            if debugFlag:
                                print "adding quartets around this node takes (more than 2 children): "
                                tm.toc()
            else:
                for ch in chs:
                    if (ch==node_pre):
                        continue
                    else:
                        if debugFlag:
                            tm.tic()
                        listTaxa = ch.leaf_nodes()
                        if len(listTaxa) == 1:
                            continue
                        for s in range(0,len(anchPoly)):
                            addQuartetsAnchoredOverall(listTaxa,Q[s],taxaDict[s],e[s],anch,L1[s],L2[s],m[s],debugFlag)
                        if debugFlag:
                            print "adding quartets around this node takes (less than two children): "
                            tm.toc()
            if debugFlag:
                print "finding quartets on this node is finished!"
                tm.toc()
    for s in range(len(anchPoly)):
        frqTmp=makeTrueFrqOverall(Q[s],T[s],clades[s],anch,C[s],listPoly[s])
        h = [e[s],frqTmp,anch,taxa_list[s],C[s]]
        frq.append(h)
    if debugFlag:
        print "time for counting is: "
        tm.toc()
    
    return frq
def findAnchoredQuartets(anch,trees,taxa,out,debugFlag):
    anch = sorted(anch)
    n = len(trees)
    if debugFlag:
        tm.tic()
    [Q,T,taxaDict,taxaT] = buildEmptyQuartets(anch,taxa,n)
    if debugFlag:
        print "Initializing arrays takes: "
        tm.toc()
    for tree in trees:
#         print "time for re-rooting is: "
#         tm.tic()
        rerooted=reroot(tree,anch)
#         tm.toc()
        node = rerooted[0]
        root = rerooted[1]
        if debugFlag:
            tm.tic()
#         listTaxaTmp=list()
        while(node.parent_node is not root):
            if debugFlag:
                tm.tic()
            node_pre = node
#             if node_pre.is_leaf():
#                 listTaxaTmp.append(node_pre)
#                 
            node = node.parent_node
            if debugFlag:
                print "finding children of this node takes: "
                tm.tic()
            chs = node.child_nodes()
            if debugFlag:
                tm.toc()
            chs_n = len(chs)
            
            if len(chs)>2:
                for i in range(0,chs_n):
                    ch = chs[i]
                    if (ch == node_pre):
                        continue
                    else:
                        if debugFlag:
                            tm.tic()    
                        listTaxa = ch.leaf_nodes()
                        if debugFlag:
                            print "adding quartets around this node takes (more than 2 children): "
                        addQuartetsAnchored(ch, listTaxa,Q,taxaDict,debugFlag)
                        if debugFlag:
                            tm.toc()
                    for j in range(i+1,chs_n):
                        if (chs[i] == chs[j]) or (chs[j]==node_pre):
                            continue
                        else:
                            if debugFlag:
                                tm.tic()
                            listTaxatmp = [listTaxa,chs[j].leaf_nodes()]
                            removeFromQuartetLentreeshAnchored(T,listTaxatmp,taxaDict)
                            if debugFlag:
                                print "adding quartets around this node takes (more than 2 children): "
                                tm.toc()
            else:
                for ch in chs:
                    if (ch==node_pre):
                        continue
                    else:
                        if debugFlag:
                            tm.tic()
                        listTaxa = ch.leaf_nodes()
                        addQuartetsAnchored(ch, listTaxa,Q,taxaDict,debugFlag)
                        if debugFlag:
                            print "adding quartets around this node takes (less than two children): "
                            tm.toc()
            if debugFlag:
                print "finding quartets on this node is finished!"
                tm.toc()
        if debugFlag:
            print "time for counting is: "
            tm.toc()
    frq=makeTrueFrq(Q,T,taxaT,anch)
    return frq
Exemple #12
0
def print_contours(df=pd.DataFrame.from_csv(data_file),dimkeys=dimkeys,marginal_string = marginal_string,copula_list=copula_list,segment_marginal=None):
    """
    print the contours of the pdf of the copula object fitting data from df
    """
    df =df.convert_objects(convert_numeric=True)
    subset = []
    for i in dimkeys:
        subset.append(i)
        subset.append('FH' + i)
    df = df.dropna(axis=0, how='any', subset=subset)
    mydt = df.index[len(df.index)-1]

    input = dict.fromkeys(dimkeys)
    for i in dimkeys:
        input[i] = df[i].values.tolist()

    if segment_marginal == 'segmented':

        input = dict.fromkeys(dimkeys)
        for i in dimkeys:
            segmented_df = segmenter.OuterSegmenter(df.loc[df.index < mydt],df,
                                                    'copula_experiments/segment_input_wind_FH' + str(i) + '.txt',
                                                    mydt).retval_dataframe()
            input[i] = segmented_df[i].values.tolist()
    else:
        input = dict.fromkeys(dimkeys)
        for i in dimkeys:
            input[i] = df[i].loc[df.index < mydt].values.tolist()

    marginals = dict.fromkeys(dimkeys)
    for i in dimkeys:
        marg_class = distribution_factory(marginal_string)
        marginals[i] = marg_class(input[i])

    distr_class = distribution_factory(copula_string)
    mydistr = distr_class(dimkeys, input, marginals)


    xedges = np.arange(-100, 100)
    yedges = np.arange(-100, 100)

    H, xedges, yedges = np.histogram2d(input[dimkeys[0]],input[dimkeys[1]], bins=(xedges, yedges))
    H = H.T  # Let each row list bins with common y range.

    plt.imshow(H, interpolation='nearest', origin='low', extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]])
    plt.xlabel('Error at '+str(dimkeys[0])+' forecast')
    plt.ylabel('Errors at '+str(dimkeys[1])+ ' forecast')
    plt.title('Histogram or errors')
    plt.savefig('./copula_experiments/BPA/histogram2d_errors.png')
    plt.clf()


    for mydistr in copula_list:
        print(mydistr)
        distr_class = distribution_factory(mydistr)
        copula = distr_class(dimkeys,input,marginals)


        x, y = np.meshgrid(np.linspace(-100, 100, 100), np.linspace(-100, 100, 100))

        z = np.zeros((len(x),len(y)))
        tic()
        for i in range(len(x)):
            for j in range(len(y)):
                z[i][j]= copula.pdf({dimkeys[0]:x[i][j],dimkeys[1]:y[i][j]})
        toc()

        graphe = plt.contour(x, y, z, 10)
        plt.xlabel('Errors at '+str(dimkeys[0]))
        plt.ylabel('Errors at '+str(dimkeys[1]))
        plt.title('PDF Contours with '+mydistr+' and '+marginal_string+' marginals')
        plt.savefig('./copula_experiments/BPA/pdf_contours_'+mydistr+'_'+marginal_string+'.png')
        plt.clf()
def findTrueAverageTableAnchoringAddDistances(frq, anch, list_taxa, N,method, met):
    tm.tic()
    [TotalKeyf,_]=initializeQuartetTable( anch, list_taxa)
    anch = sorted(list(anch))
    lst_taxa = list_taxa.keys()
    TotalKey = dict()
    n = len(lst_taxa)
    numG = max(v[1] for v in frq.values())
    skipClades = N
    for i in range(0, n):
        if lst_taxa[i] in skipClades:
            continue
        for j in range(i+1, n):
            if lst_taxa[j] in skipClades:
                continue
            for taxon_i in list_taxa[lst_taxa[i]]:
                for taxon_j in list_taxa[lst_taxa[j]]:
                    lab_taxon_i = taxon_i
                    lab_taxon_j = taxon_j
                    p = sorted([lab_taxon_i,lab_taxon_j])
                    key_orig = genKey(p,anch)
                    l = sorted([lst_taxa[i], lst_taxa[j], anch[0], anch[1]])
                    key_inv = "/".join(l)
                    if key_orig in frq:
                        v = frq[key_orig]
                    else:
                        v = list()
                        v.append(0.5)
                        v.append(numG)	
                    v_inv = float(v[0])/v[1]
                    if key_inv in TotalKey:
                        if met == "freq":
                            vt = TotalKey[key_inv]
                            vt.append(v_inv)
                        elif met == "log":
                            vt = TotalKey[key_inv]
                            vt.append(-np.log(1.*v_inv))
                    else:
                        if met == "freq":
                            vt = list()
                            vt.append(v_inv)
                        elif met == "log":
                            vt = list()
                            vt.append(-np.log(1.*v_inv))
                    TotalKey[key_inv] = vt
    for q, v2 in TotalKey.iteritems():
        if met == "log":
            if method == "gmean":
                vtt = np.exp(-stats.gmean(v2))
            elif method == "mean":
                vtt = np.exp(-mean(v2))
                
            else:
                vtt = np.exp(-sqrt(mean(square(v2))))
        elif met == "freq":
            if method == "gmean":
                vtt = (stats.gmean(v2))

            elif method == "mean":
                
                vtt = (mean(v2))
                
            else:
                vtt = (sqrt(mean(square(v2))))
                
        TotalKeyf[q] = vtt
    tm.toc()
    return TotalKeyf
Exemple #14
0
if options.filename:
	readFromFile = True
else:
	readFromFile = False
if ( not options.gt  or not options.out):
	sys.exit("Please enter genetrees file, and output folder location")

src_fpath = os.path.expanduser(os.path.expandvars(gt))

trees = dendropy.TreeList.get_from_path(src_fpath, 'newick')

(converted_labels,new_labels) = tstt.changeLabelsToNumbers(trees,verbose)

print "time to compute consensus is: "
tm.tic()
con_tree = trees.consensus(min_freq=thr)   
tm.toc()

ftmpt=tempfile.mkstemp(suffix='.nwk', prefix="consensusTree", dir=outpath, text=None)
con_tree2 = copy.deepcopy(con_tree)
tstt.changeLabelsToNames(con_tree2,new_labels,verbose)

con_tree2.write(path=ftmpt[1],schema="newick",suppress_rooting=True)

os.close(ftmpt[0])
tstt.labelNodes(con_tree)

(to_resolve,maxPolyOrder) = tstt.findPolytomies(con_tree)
taxa = list()
for e in con_tree.leaf_nodes():
import timer

#modeling
from tensorflow.keras.utils import plot_model
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, concatenate, Conv2DTranspose

from tensorflow.keras.optimizers import Adam

from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import CSVLogger
from tensorflow.keras.callbacks import TensorBoard

mpl.rcParams['figure.figsize'] = (10, 10)
timer.tic()

data_path = '/home/jliu0604/AML/satelite_image/data'
PATCH_SIZE = 160


def superimpose_stlite_mask(st_img, mask, color=(10, 0, 0)):
    #normalize image and select only three channels to display
    st_normed = 255.0 * st_img / st_img.max()
    # create color mask using RGB channles
    colored_mask = np.stack(
        [mask * color[0], mask * color[1], mask * color[2]])
    # combine the colored_mask and st_img together
    combined = (st_normed + colored_mask).clip(0, 255).astype(np.uint8)
    return combined