def findTrueAverageTableAnchoringAddDistancesOverall(frq, anch, list_taxa, N,method, met):
    tm.tic()
    [TotalKeyf,_]=initializeQuartetTable( anch, list_taxa)
    anch = sorted(list(anch))
    lst_taxa = list_taxa.keys()
    TotalKey = dict()

    n = len(lst_taxa)
    skipClades = N
    for i in range(0, n):
        if lst_taxa[i] in skipClades:
            continue
        for j in range(i+1, n):
            if lst_taxa[j] in skipClades:
                continue
            l = sorted([lst_taxa[i], lst_taxa[j], anch[0], anch[1]])
            key_inv = "/".join(l)
            key_orig = genKey(anch,sorted([lst_taxa[i],lst_taxa[j]]))
            v = frq[key_orig]
            if len(v) == 1:
                v.append(1)   
            if key_inv in TotalKey:
                    vt = TotalKey[key_inv]
                    vt[0] += v[0]
                    vt[1] += v[1]
            else:
                    vt = list()
                    vt = v
            TotalKey[key_inv] = vt
    for q, v2 in TotalKey.iteritems():
        vtt = v2[0]/v2[1]
        TotalKeyf[q] = vtt
    tm.toc()
    return TotalKeyf
Ejemplo n.º 2
0
def findAllChildrenPairsOverall(listTaxa,taxaDict,L1,L2,debugFlag):
    listTaxaLabels = list()
    if debugFlag:
        tm.tic()
    for t in listTaxa:
        
        listTaxaLabels.append(taxaDict[t.taxon.label])
    if debugFlag:
        print "Time to find indeces"
        tm.toc()
    return listTaxaLabels
Ejemplo n.º 3
0
def findAllChildrenPairs(listTaxa,taxaDict,debugFlag):
    listTaxaLabels = [0 for _ in range(len(listTaxa))]
    i = 0
    if debugFlag:
            tm.tic()
    for t in listTaxa:
        listTaxaLabels[i] = taxaDict[t.taxon.label]
        i += 1
    if debugFlag:
        print "Time to find indeces"
        tm.toc()
    return listTaxaLabels
Ejemplo n.º 4
0
    def test_pyomo_with_sort(self):
        n = 100
        p=1
        normal1 = np.random.randn(n)
        normal2 = np.random.randn(n)
        uniform1 = np.random.rand(n)
        uniform2 = np.random.rand(n)
        linearprog = np.asarray(range(n)) / n
        U = linearprog
        V = normal1

        iter = []
        for i in range(n):
            for j in range(n):
                iter.append((i, j))
        print('Unsorted')
        print('EMD sort')
        tic()
        print(emd_sort(U, V,p))
        toc()
        print('EMD pyomo')
        tic()
        print(emd_pyomo(U, V,p)[0])
        toc()
        print(' ')
        print('EMD sort')
        tic()
        print(emd_sort(np.sort(U), np.sort(V),p))
        toc()
        print("sorted")
        print('EMD pyomo')
        tic()
        print(emd_pyomo(np.sort(U),np.sort(V),p)[0])
        toc()
Ejemplo n.º 5
0
    def test_quick(self):
        print('Warning : this code must be called with runner.py')
        # Copy this code at the beginning of copula_test to see if it works
        # And enter python3 runner.py copula_experiments/run_test.txt
        gosm_options.set_globals()

        # Create output directory.
        if not (os.path.isdir(gosm_options.output_directory)):
            os.mkdir(gosm_options.output_directory)

        X = np.arange(300)
        tic()
        mydistr = UnivariateEpiSplineDistribution(X)
        for i in range(10):
            print(mydistr.cdf(i))
        toc()
Ejemplo n.º 6
0
def addQuartets( ch, listTaxa,Q,taxaDict,debugFlag):
    if debugFlag:
        tm.tic()
    pairs = findAllChildrenPairs(listTaxa,taxaDict)
    if debugFlag:
        print "Time to find all pairs: "
        tm.toc()
    if debugFlag:
        tm.tic()
        print "length of these pairs is: "+str(len(pairs)*(len(pairs)-1)/2)
    for i in range(0,len(pairs)):
        for j in range(i+1,len(pairs)):
            Q[pairs[i]][pairs[j]] += 1
            Q[pairs[j]][pairs[i]] += 1
    if debugFlag:
        print "Time to add found quartets to the dictionary: "
        tm.toc()
    return 
def findTrueAverageTableAnchoringAddDistancesOverallFromFile(frq, anch, list_taxa, N,method, met):
    tm.tic()
    [TotalKeyf,_]=initializeQuartetTable( anch, list_taxa)
    anch = sorted(list(anch))
    lst_taxa = list_taxa.keys()
    TotalKey = dict()
    n = len(lst_taxa)
    skipClades = N
    for i in range(0, n):
        if lst_taxa[i] in skipClades:
            continue
        for j in range(i+1, n):
            if lst_taxa[j] in skipClades:
                continue
            for taxon_i in list_taxa[lst_taxa[i]]:
                for taxon_j in list_taxa[lst_taxa[j]]:
                    lab_taxon_i = taxon_i
                    lab_taxon_j = taxon_j
                    p = sorted([lab_taxon_i,lab_taxon_j])
                    key_orig = genKey(p,anch)
                    l = sorted([lst_taxa[i], lst_taxa[j], anch[0], anch[1]])
                    key_inv = "/".join(l)        
                    v = frq[key_orig]
                    if len(v) == 1:
                        v.append(1)
                    else:
                        v[0] -= 0.5
                        v[1] -= 1.5   
                    if key_inv in TotalKey:
                            vt = TotalKey[key_inv]
                            vt[0] += v[0]
                            vt[1] += v[1]
                    else:
                            vt = list()
                            vt = v
                    TotalKey[key_inv] = vt
    for q, v2 in TotalKey.iteritems():
        vtt = (v2[0]+0.5)/(v2[1]+1.5)
        TotalKeyf[q] = vtt
    tm.toc()
    return TotalKeyf
Ejemplo n.º 8
0
def addQuartetsAnchoredOverall(listTaxa,Q,taxaDict,e,anch,L1,L2,m,debugFlag):
    if debugFlag:
        tm.tic()
    pairs = findAllChildrenPairsOverall(listTaxa,taxaDict,L1,L2,debugFlag)
    if debugFlag:
        print "Time to find all pairs: "
        tm.toc()
    if debugFlag:
        tm.tic()
        print "length of these pairs is: "+str(len(pairs)*(len(pairs)-1)/2)
    l = countNum(pairs,m)
    for i in range(0,len(l)):
        if l[i] == 0:
            continue
        for j in range(i+1,len(l)):
            if l[j] == 0:
                continue
            Q[i][j] += l[i]*l[j]
    if debugFlag:
        print "Time to add found quartets to the dictionary: "
        tm.toc()
    return 
    
    return
Ejemplo n.º 9
0
def findAnchoredQuartetsOverall(anchPoly, trees,taxa, outpath,debugFlag):
    n = len(trees)
#     if debugFlag:
    [_,_,anch,_,_]  = anchPoly[0]
    anch = sorted(anch)
    frq = list()
    if debugFlag:
        tm.tic()
    Q = list()
    T = list()
    taxaDict = list()
    L1 = list()
    L2 = list()
    e = list()
    m = list()
    clades = list()
    listPoly = list()
    taxa_list = list()
    taxa_inv = list()
    C = list()
    for s in range(len(anchPoly)):
        [ett,Ctt,anch,taxa_listtt,taxa_invtt]  = anchPoly[s]
        [Qt,Tt,taxaDictt,cladest,L1t,L2t,mt,listPolyt] = buildEmptyQuartetsOverall(anch,taxa_listtt,taxa_invtt,Ctt,ett,taxa,n)
        Q.append(Qt)
        T.append(Tt)
        taxa_inv.append(taxa_invtt)
        taxa_list.append(taxa_listtt)
        taxaDict.append(taxaDictt)
        clades.append(cladest)
        L1.append(L1t)
        L2.append(L2t)
        e.append(ett)
        m.append(mt)
        C.append(Ctt)
        listPoly.append(listPolyt)
    if debugFlag:
        print "Initializing arrays takes: "
        tm.toc()
    for tree in trees:
        rerooted=reroot(tree,anch)
#         tm.toc()
        node = rerooted[0]
        root = rerooted[1]
        
        if debugFlag:
            tm.tic()
#         listTaxaTmp=list()
        while(node.parent_node is not root):
            if debugFlag:
                tm.tic()
            node_pre = node
#             if node_pre.is_leaf():
#                 listTaxaTmp.append(node_pre)
#                 
            node = node.parent_node
            if debugFlag:
                print "finding children of this node takes: "
                tm.tic()
            chs = node.child_nodes()
            if debugFlag:
                tm.toc()
            chs_n = len(chs)
            
            if len(chs)>2:
                for i in range(0,chs_n):
                    ch = chs[i]
                    if (ch == node_pre):
                        continue
                    else:
                        if debugFlag:
                            tm.tic()    
                        listTaxa = ch.leaf_nodes()
                        if debugFlag:
                            print "adding quartets around this node takes (more than 2 children): "
                        for s in range(0,len(anchPoly)):
                            addQuartetsAnchoredOverall(listTaxa,Q[s],taxaDict[s],e[s],anch,L1[s],L2[s],m[s],debugFlag)
                        if debugFlag:
                            tm.toc()
                    for j in range(i+1,chs_n):
                        if (chs[i] == chs[j]) or (chs[j]==node_pre):
                            continue
                        else:
                            if debugFlag:
                                tm.tic()
                            listTaxatmp = [listTaxa,chs[j].leaf_nodes()]
                            for s in range(len(anchPoly)):
                                removeFromQuartetLentreeshAnchoredOverall(T[s],listTaxatmp,taxaDict[s],e[s],m[s])
                            if debugFlag:
                                print "adding quartets around this node takes (more than 2 children): "
                                tm.toc()
            else:
                for ch in chs:
                    if (ch==node_pre):
                        continue
                    else:
                        if debugFlag:
                            tm.tic()
                        listTaxa = ch.leaf_nodes()
                        if len(listTaxa) == 1:
                            continue
                        for s in range(0,len(anchPoly)):
                            addQuartetsAnchoredOverall(listTaxa,Q[s],taxaDict[s],e[s],anch,L1[s],L2[s],m[s],debugFlag)
                        if debugFlag:
                            print "adding quartets around this node takes (less than two children): "
                            tm.toc()
            if debugFlag:
                print "finding quartets on this node is finished!"
                tm.toc()
    for s in range(len(anchPoly)):
        frqTmp=makeTrueFrqOverall(Q[s],T[s],clades[s],anch,C[s],listPoly[s])
        h = [e[s],frqTmp,anch,taxa_list[s],C[s]]
        frq.append(h)
    if debugFlag:
        print "time for counting is: "
        tm.toc()
    
    return frq
Ejemplo n.º 10
0
def findAnchoredQuartets(anch,trees,taxa,out,debugFlag):
    anch = sorted(anch)
    n = len(trees)
    if debugFlag:
        tm.tic()
    [Q,T,taxaDict,taxaT] = buildEmptyQuartets(anch,taxa,n)
    if debugFlag:
        print "Initializing arrays takes: "
        tm.toc()
    for tree in trees:
#         print "time for re-rooting is: "
#         tm.tic()
        rerooted=reroot(tree,anch)
#         tm.toc()
        node = rerooted[0]
        root = rerooted[1]
        if debugFlag:
            tm.tic()
#         listTaxaTmp=list()
        while(node.parent_node is not root):
            if debugFlag:
                tm.tic()
            node_pre = node
#             if node_pre.is_leaf():
#                 listTaxaTmp.append(node_pre)
#                 
            node = node.parent_node
            if debugFlag:
                print "finding children of this node takes: "
                tm.tic()
            chs = node.child_nodes()
            if debugFlag:
                tm.toc()
            chs_n = len(chs)
            
            if len(chs)>2:
                for i in range(0,chs_n):
                    ch = chs[i]
                    if (ch == node_pre):
                        continue
                    else:
                        if debugFlag:
                            tm.tic()    
                        listTaxa = ch.leaf_nodes()
                        if debugFlag:
                            print "adding quartets around this node takes (more than 2 children): "
                        addQuartetsAnchored(ch, listTaxa,Q,taxaDict,debugFlag)
                        if debugFlag:
                            tm.toc()
                    for j in range(i+1,chs_n):
                        if (chs[i] == chs[j]) or (chs[j]==node_pre):
                            continue
                        else:
                            if debugFlag:
                                tm.tic()
                            listTaxatmp = [listTaxa,chs[j].leaf_nodes()]
                            removeFromQuartetLentreeshAnchored(T,listTaxatmp,taxaDict)
                            if debugFlag:
                                print "adding quartets around this node takes (more than 2 children): "
                                tm.toc()
            else:
                for ch in chs:
                    if (ch==node_pre):
                        continue
                    else:
                        if debugFlag:
                            tm.tic()
                        listTaxa = ch.leaf_nodes()
                        addQuartetsAnchored(ch, listTaxa,Q,taxaDict,debugFlag)
                        if debugFlag:
                            print "adding quartets around this node takes (less than two children): "
                            tm.toc()
            if debugFlag:
                print "finding quartets on this node is finished!"
                tm.toc()
        if debugFlag:
            print "time for counting is: "
            tm.toc()
    frq=makeTrueFrq(Q,T,taxaT,anch)
    return frq
Ejemplo n.º 11
0
def print_contours(df=pd.DataFrame.from_csv(data_file),dimkeys=dimkeys,marginal_string = marginal_string,copula_list=copula_list,segment_marginal=None):
    """
    print the contours of the pdf of the copula object fitting data from df
    """
    df =df.convert_objects(convert_numeric=True)
    subset = []
    for i in dimkeys:
        subset.append(i)
        subset.append('FH' + i)
    df = df.dropna(axis=0, how='any', subset=subset)
    mydt = df.index[len(df.index)-1]

    input = dict.fromkeys(dimkeys)
    for i in dimkeys:
        input[i] = df[i].values.tolist()

    if segment_marginal == 'segmented':

        input = dict.fromkeys(dimkeys)
        for i in dimkeys:
            segmented_df = segmenter.OuterSegmenter(df.loc[df.index < mydt],df,
                                                    'copula_experiments/segment_input_wind_FH' + str(i) + '.txt',
                                                    mydt).retval_dataframe()
            input[i] = segmented_df[i].values.tolist()
    else:
        input = dict.fromkeys(dimkeys)
        for i in dimkeys:
            input[i] = df[i].loc[df.index < mydt].values.tolist()

    marginals = dict.fromkeys(dimkeys)
    for i in dimkeys:
        marg_class = distribution_factory(marginal_string)
        marginals[i] = marg_class(input[i])

    distr_class = distribution_factory(copula_string)
    mydistr = distr_class(dimkeys, input, marginals)


    xedges = np.arange(-100, 100)
    yedges = np.arange(-100, 100)

    H, xedges, yedges = np.histogram2d(input[dimkeys[0]],input[dimkeys[1]], bins=(xedges, yedges))
    H = H.T  # Let each row list bins with common y range.

    plt.imshow(H, interpolation='nearest', origin='low', extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]])
    plt.xlabel('Error at '+str(dimkeys[0])+' forecast')
    plt.ylabel('Errors at '+str(dimkeys[1])+ ' forecast')
    plt.title('Histogram or errors')
    plt.savefig('./copula_experiments/BPA/histogram2d_errors.png')
    plt.clf()


    for mydistr in copula_list:
        print(mydistr)
        distr_class = distribution_factory(mydistr)
        copula = distr_class(dimkeys,input,marginals)


        x, y = np.meshgrid(np.linspace(-100, 100, 100), np.linspace(-100, 100, 100))

        z = np.zeros((len(x),len(y)))
        tic()
        for i in range(len(x)):
            for j in range(len(y)):
                z[i][j]= copula.pdf({dimkeys[0]:x[i][j],dimkeys[1]:y[i][j]})
        toc()

        graphe = plt.contour(x, y, z, 10)
        plt.xlabel('Errors at '+str(dimkeys[0]))
        plt.ylabel('Errors at '+str(dimkeys[1]))
        plt.title('PDF Contours with '+mydistr+' and '+marginal_string+' marginals')
        plt.savefig('./copula_experiments/BPA/pdf_contours_'+mydistr+'_'+marginal_string+'.png')
        plt.clf()
def findTrueAverageTableAnchoringAddDistances(frq, anch, list_taxa, N,method, met):
    tm.tic()
    [TotalKeyf,_]=initializeQuartetTable( anch, list_taxa)
    anch = sorted(list(anch))
    lst_taxa = list_taxa.keys()
    TotalKey = dict()
    n = len(lst_taxa)
    numG = max(v[1] for v in frq.values())
    skipClades = N
    for i in range(0, n):
        if lst_taxa[i] in skipClades:
            continue
        for j in range(i+1, n):
            if lst_taxa[j] in skipClades:
                continue
            for taxon_i in list_taxa[lst_taxa[i]]:
                for taxon_j in list_taxa[lst_taxa[j]]:
                    lab_taxon_i = taxon_i
                    lab_taxon_j = taxon_j
                    p = sorted([lab_taxon_i,lab_taxon_j])
                    key_orig = genKey(p,anch)
                    l = sorted([lst_taxa[i], lst_taxa[j], anch[0], anch[1]])
                    key_inv = "/".join(l)
                    if key_orig in frq:
                        v = frq[key_orig]
                    else:
                        v = list()
                        v.append(0.5)
                        v.append(numG)	
                    v_inv = float(v[0])/v[1]
                    if key_inv in TotalKey:
                        if met == "freq":
                            vt = TotalKey[key_inv]
                            vt.append(v_inv)
                        elif met == "log":
                            vt = TotalKey[key_inv]
                            vt.append(-np.log(1.*v_inv))
                    else:
                        if met == "freq":
                            vt = list()
                            vt.append(v_inv)
                        elif met == "log":
                            vt = list()
                            vt.append(-np.log(1.*v_inv))
                    TotalKey[key_inv] = vt
    for q, v2 in TotalKey.iteritems():
        if met == "log":
            if method == "gmean":
                vtt = np.exp(-stats.gmean(v2))
            elif method == "mean":
                vtt = np.exp(-mean(v2))
                
            else:
                vtt = np.exp(-sqrt(mean(square(v2))))
        elif met == "freq":
            if method == "gmean":
                vtt = (stats.gmean(v2))

            elif method == "mean":
                
                vtt = (mean(v2))
                
            else:
                vtt = (sqrt(mean(square(v2))))
                
        TotalKeyf[q] = vtt
    tm.toc()
    return TotalKeyf
Ejemplo n.º 13
0
	readFromFile = True
else:
	readFromFile = False
if ( not options.gt  or not options.out):
	sys.exit("Please enter genetrees file, and output folder location")

src_fpath = os.path.expanduser(os.path.expandvars(gt))

trees = dendropy.TreeList.get_from_path(src_fpath, 'newick')

(converted_labels,new_labels) = tstt.changeLabelsToNumbers(trees,verbose)

print "time to compute consensus is: "
tm.tic()
con_tree = trees.consensus(min_freq=thr)   
tm.toc()

ftmpt=tempfile.mkstemp(suffix='.nwk', prefix="consensusTree", dir=outpath, text=None)
con_tree2 = copy.deepcopy(con_tree)
tstt.changeLabelsToNames(con_tree2,new_labels,verbose)

con_tree2.write(path=ftmpt[1],schema="newick",suppress_rooting=True)

os.close(ftmpt[0])
tstt.labelNodes(con_tree)

(to_resolve,maxPolyOrder) = tstt.findPolytomies(con_tree)
taxa = list()
for e in con_tree.leaf_nodes():
	taxa.append(e.taxon.label)
n = len(con_tree.leaf_nodes())