Example #1
0
    def test_sampleLabelings(self):
        tree = Tree("(A:1,(B:1,(C:1,(E:1,D:1)Int_1:0.5[&&NHX:ancient=1])Int_2:0.5[&&NHX:ancient=0])Int_3:1)Root;", format=1)
        chrom = {}
        chrom["one"] = ["3","4"]
        species = {}
        species["C"] = chrom
        chrom = {}
        chrom["one"] = ["3","4"]
        species["D"] = chrom
        chrom = {}
        chrom["one"] = []
        species["E"] = chrom
        chrom = {}
        chrom["one"] = []
        species["A"] = chrom
        chrom = {}
        chrom["one"] = []
        species["B"] = chrom

        adj = getAdjacencies.findAdjacencies(species)
        paths = getAdjacencies.findTreePaths(tree)
        internal,adjacenciesAncestral = getAdjacencies.assignAncestralAdjacencies(paths,adj,tree)
        graphs = globalAdjacencyGraph.createGraph(adj,adjacenciesAncestral)
        jointLabels, first = SR.enumJointLabelings(graphs)
        probs={"Int_1":{(6, 7):0.1},"Int_2":{(6, 7):0.1},"Int_3":{(6, 7):0.1},"Root":{(6, 7):0.1}}
        for i in range(0,10):
            validLabels, validAtNode = SR.validLabels(jointLabels,first)
            resolvedCCs = SR.sampleLabelings(tree, graphs, validAtNode, adj,probs, alpha=0)
            reconstructedAdj = SR.reconstructedAdjacencies(resolvedCCs)
            print reconstructedAdj
Example #2
0
 def test_enumJointLabelings(self):
     #print nx.maximal_matching(self.graph)
     #print nx.max_weight_matching(self.graph)
     joint,first = SR.enumJointLabelings([self.graph])
     TestCase.assertEqual(self,len(joint[self.graph]),15)
     valid, validAtNode = SR.validLabels(joint,first)
     TestCase.assertEqual(self,len(valid[self.graph]),8)
Example #3
0
def runSample(params):
        #retrieving the given parameter
        ccs=params[0]
        tree=params[1]
        extantAdjacencies=params[2]
        adjacencyProbs=params[3]
        alpha=params[4]
        i=params[5]
        extantAdjacencies_species_adj=params[6]
        outputDirectory=params[7]
        reconstructedMarkerCount=params[8]
        allSampleReconstructionStatistic={}
        dict_SCJ={}
        #lock = multiprocessing.Lock()
        #output text log
        outLog="Sample: "+str(i)+"\n"

        #start sampling method like in the Main.py
        outLog+="Enumerate joint labelings...\n"
        jointLabels, first = SR.enumJointLabelings(ccs)
        outLog+="Check valid labels...\n"
        validLabels, validAtNode = SR.validLabels(jointLabels, first)

        #lock.acquire()
        outLog+= "Compute ancestral labels with SR...\n"
        topDown = SR.sampleLabelings(tree, ccs, validAtNode, extantAdjacencies, adjacencyProbs, alpha)
        #lock.release()
        reconstructedAdj = SR.reconstructedAdjacencies(topDown)
        SR.outputReconstructedAdjacencies(reconstructedAdj, outputDirectory+"/reconstructed_adjacencies_" + str(i))

        for node in reconstructedAdj:
            # count for each adjaency on each internal node, how often this adjacencies over all samples occurs there
            for adjacency in reconstructedAdj[node]:
                #lock.acquire()
                if (node,adjacency) in allSampleReconstructionStatistic:
                    allSampleReconstructionStatistic[(node,adjacency)] += 1
                else:
                    allSampleReconstructionStatistic.update({(node,adjacency):1})
                #lock.release()
        outLog+="Scaffolding...\n"
        scaffolds = scaffolding.scaffoldAdjacencies(reconstructedAdj)
        undoubled = scaffolding.undoubleScaffolds(scaffolds)
        scaffolding.outputUndoubledScaffolds(undoubled, outputDirectory+"/undoubled_scaffolds_" + str(i))
        scaffolding.outputScaffolds(scaffolds, outputDirectory+"/doubled_scaffolds_" + str(i))
        log=scaffolding.sanityCheckScaffolding(undoubled)
        outLog+=log
        for node in undoubled:
            outLog+= str(node)+'\n'
            markerCounter = 0
            for scaffold in undoubled[node]:
                first = scaffold[0]
                last = scaffold[-1]
                if not first == last:
                    markerCounter = markerCounter + len(scaffold)
                else:
                    markerCounter = markerCounter + len(scaffold) - 1
            outLog+= str(node) + " number of reconstructed undoubled marker in scaffolds: " + str(markerCounter)+'\n'
            # number of reconstructed markerIds given by reconstructedMarkerCount
            # singleton scaffolds number / number of not reconstructed marker
            notReconstructedMarkerCount = reconstructedMarkerCount - markerCounter
            # number of all scaffolds
            allScaffoldCount = len(undoubled[node]) + notReconstructedMarkerCount
            outLog+= str(node) + " number of singleton scaffolds (not reconstructed marker): " + str(
                notReconstructedMarkerCount)+'\n'
            outLog+= str(node) + " number of scaffolds: " + str(allScaffoldCount)+'\n'


        #lock.acquire()
        scj = calculate_SCJ(tree, reconstructedAdj, extantAdjacencies_species_adj)
        outLog+="Single-Cut-or-Join-Distance: " + str(scj)+'\n'
        dict_SCJ.update({'Sample_' + str(i): scj})
        #lock.release()
        return (allSampleReconstructionStatistic,dict_SCJ,outLog)
Example #4
0
            adjacencyProbs[species]={adj:weight}

    line=f.readline()
f.close()


#dictionary for all scj distances
dict_SCJ={}

#compute CCs in global adjacency graph
ccs = globalAdjacencyGraph.createGraph(extantAdjacencies,nodesPerAdjacency)
if (not args.skip_first):
    conflicts = globalAdjacencyGraph.analyseConnectedComponents(ccs)
    globalAdjacencyGraph.outputConflicts(conflicts,args.output+"/conflicts")

    jointLabels, first = SR.enumJointLabelings(ccs)
    validLabels, validAtNode = SR.validLabels(jointLabels,first)

    topDown = SR.computeLabelings(tree, ccs, validAtNode, extantAdjacencies, adjacencyProbs, args.alpha)

    reconstructedAdj = SR.reconstructedAdjacencies(topDown)
    SR.outputReconstructedAdjacencies(reconstructedAdj,args.output+"/reconstructed_adjacencies")
    for node in reconstructedAdj:
        print node
        print "Number of reconstructed adjacencies: "+str(len(reconstructedAdj[node]))

    scaffolds = scaffolding.scaffoldAdjacencies(reconstructedAdj)
    undoubled = scaffolding.undoubleScaffolds(scaffolds)
    scaffolding.outputUndoubledScaffolds(undoubled,args.output+"/undoubled_scaffolds")
    scaffolding.outputScaffolds(scaffolds,args.output+"/doubled_scaffolds")
    scaffolding.sanityCheckScaffolding(undoubled)