def runSample(params): #retrieving the given parameter ccs=params[0] tree=params[1] extantAdjacencies=params[2] adjacencyProbs=params[3] alpha=params[4] i=params[5] extantAdjacencies_species_adj=params[6] outputDirectory=params[7] reconstructedMarkerCount=params[8] allSampleReconstructionStatistic={} dict_SCJ={} #lock = multiprocessing.Lock() #output text log outLog="Sample: "+str(i)+"\n" #start sampling method like in the Main.py outLog+="Enumerate joint labelings...\n" jointLabels, first = SR.enumJointLabelings(ccs) outLog+="Check valid labels...\n" validLabels, validAtNode = SR.validLabels(jointLabels, first) #lock.acquire() outLog+= "Compute ancestral labels with SR...\n" topDown = SR.sampleLabelings(tree, ccs, validAtNode, extantAdjacencies, adjacencyProbs, alpha) #lock.release() reconstructedAdj = SR.reconstructedAdjacencies(topDown) SR.outputReconstructedAdjacencies(reconstructedAdj, outputDirectory+"/reconstructed_adjacencies_" + str(i)) for node in reconstructedAdj: # count for each adjaency on each internal node, how often this adjacencies over all samples occurs there for adjacency in reconstructedAdj[node]: #lock.acquire() if (node,adjacency) in allSampleReconstructionStatistic: allSampleReconstructionStatistic[(node,adjacency)] += 1 else: allSampleReconstructionStatistic.update({(node,adjacency):1}) #lock.release() outLog+="Scaffolding...\n" scaffolds = scaffolding.scaffoldAdjacencies(reconstructedAdj) undoubled = scaffolding.undoubleScaffolds(scaffolds) scaffolding.outputUndoubledScaffolds(undoubled, outputDirectory+"/undoubled_scaffolds_" + str(i)) scaffolding.outputScaffolds(scaffolds, outputDirectory+"/doubled_scaffolds_" + str(i)) log=scaffolding.sanityCheckScaffolding(undoubled) outLog+=log for node in undoubled: outLog+= str(node)+'\n' markerCounter = 0 for scaffold in undoubled[node]: first = scaffold[0] last = scaffold[-1] if not first == last: markerCounter = markerCounter + len(scaffold) else: markerCounter = markerCounter + len(scaffold) - 1 outLog+= str(node) + " number of reconstructed undoubled marker in scaffolds: " + str(markerCounter)+'\n' # number of reconstructed markerIds given by reconstructedMarkerCount # singleton scaffolds number / number of not reconstructed marker notReconstructedMarkerCount = reconstructedMarkerCount - markerCounter # number of all scaffolds allScaffoldCount = len(undoubled[node]) + notReconstructedMarkerCount outLog+= str(node) + " number of singleton scaffolds (not reconstructed marker): " + str( notReconstructedMarkerCount)+'\n' outLog+= str(node) + " number of scaffolds: " + str(allScaffoldCount)+'\n' #lock.acquire() scj = calculate_SCJ(tree, reconstructedAdj, extantAdjacencies_species_adj) outLog+="Single-Cut-or-Join-Distance: " + str(scj)+'\n' dict_SCJ.update({'Sample_' + str(i): scj}) #lock.release() return (allSampleReconstructionStatistic,dict_SCJ,outLog)
globalAdjacencyGraph.outputConflicts(conflicts,args.output+"/conflicts") jointLabels, first = SR.enumJointLabelings(ccs) validLabels, validAtNode = SR.validLabels(jointLabels,first) topDown = SR.computeLabelings(tree, ccs, validAtNode, extantAdjacencies, adjacencyProbs, args.alpha) reconstructedAdj = SR.reconstructedAdjacencies(topDown) SR.outputReconstructedAdjacencies(reconstructedAdj,args.output+"/reconstructed_adjacencies") for node in reconstructedAdj: print node print "Number of reconstructed adjacencies: "+str(len(reconstructedAdj[node])) scaffolds = scaffolding.scaffoldAdjacencies(reconstructedAdj) undoubled = scaffolding.undoubleScaffolds(scaffolds) scaffolding.outputUndoubledScaffolds(undoubled,args.output+"/undoubled_scaffolds") scaffolding.outputScaffolds(scaffolds,args.output+"/doubled_scaffolds") scaffolding.sanityCheckScaffolding(undoubled) # reconstruct marker pairs out of extantAdjacencies reconstructedMarker = set() for adj in extantAdjacencies: #each adjacency equals to markerpairs adj_list=[ adj[0], adj[1] ] for adjpart in adj_list: if (adjpart % 2 ==0): markerId=adjpart/2 else: markerId=(adjpart+1)/2 reconstructedMarker.add(markerId)