Esempi in Python per getInput, esempi in Python per newickFormatReader.getInput

Esempio n. 1

0

Mostra file

File: DP.py Progetto: WeiyunMa/Reconciliation-Repair

def reconcile(fileName, D, T, L):
    """Takes as input a newick file, FileName, a dupliction cost, a transfer 
    cost, and a loss cost. This uses newickFormatReader to extract the host 
    tree, parasite tree and tip mapping from the file and then calls DP to 
    return the DTL reconciliation graph of the provided newick file"""
    host, paras, phi = newickFormatReader.getInput(fileName)
    return DP(host, paras, phi, D, T, L)

Esempio n. 2

0

Mostra file

File: DP.py Progetto: WeiyunMa/Reconciliation-Repair

def reconcile(fileName, D, T, L):
    """Takes as input a newick file, FileName, a dupliction cost, a transfer 
    cost, and a loss cost. This uses newickFormatReader to extract the host 
    tree, parasite tree and tip mapping from the file and then calls DP to 
    return the DTL reconciliation graph of the provided newick file"""
    host, paras, phi = newickFormatReader.getInput(fileName)
    return DP(host, paras, phi, D, T, L)

Esempio n. 3

0

Mostra file

File: empress.py Progetto: newsatit/eMPRess

def main():
    args = process_arg()
    newick_data = getInput(args.filename)
    if args.functionality == "costscape":
        costscape.solve(newick_data, args.tl, args.th, args.ll, args.lh, args)
    elif args.functionality == "reconcile":
        DTLReconGraph.reconcile_noninter(newick_data, args.d, args.t, args.l)
    elif args.functionality == "histogram":
        HistogramMain.compute_pdv(args.filename, newick_data, args.d, args.t, args.l, args)
    elif args.functionality == "clumpr":
        ClusterMain.perform_clustering(newick_data, args.d, args.t, args.l, args.k, args)

Esempio n. 4

0

Mostra file

def newScoreWrapper(newickFile, switchLo, switchHi, lossLo, lossHi, D, T, L):
    """This function takes as input hostTree, parasiteTree, phi, duplication 
	cost D, transfer cost T, and loss cost L, and returns the newDTL whose 
	scores were calculated from costscape."""

    H, P, phi = newickFormatReader.getInput(newickFile)
    originalDTL, numRecon, leaves = DP(H, P, phi, D, T, L)
    pointList = findCenters(newickFile, switchLo, switchHi, lossLo, lossHi)
    DTLPairs = getDTLVals(pointList)
    DTLList = getCostscapeDTLs(DTLPairs, H, P, phi)
    newDTL = changeDTLScores(originalDTL, DTLList)
    return newDTL, numRecon, leaves

Esempio n. 5

0

Mostra file

File: calcCostscapeScore.py Progetto: alex-ozdemir/phylogenetic-reconciliation

def newScoreWrapper(newickFile, switchLo, switchHi, lossLo, lossHi, D, T, L):
    """This function takes as input hostTree, parasiteTree, phi, duplication 
	cost D, transfer cost T, and loss cost L, and returns the newDTL whose 
	scores were calculated from costscape."""

    H, P, phi = newickFormatReader.getInput(newickFile)
    originalDTL, numRecon, leaves = DP(H, P, phi, D, T, L)
    pointList = findCenters(newickFile, switchLo, switchHi, lossLo, lossHi)
    DTLPairs = getDTLVals(pointList)
    DTLList = getCostscapeDTLs(DTLPairs, H, P, phi)
    newDTL = changeDTLScores(originalDTL, DTLList)
    return newDTL, numRecon, leaves

Esempio n. 6

0

Mostra file

File: FromNewick.py Progetto: schnappi-wkl/CompBioSummer2015

def run_test(fileName, max_k):
    cache_dir = './cache'
    D = 2.
    T = 3.
    L = 1.

    host, paras, phi = newickFormatReader.getInput(fileName)

    if not os.path.exists(cache_dir):
        os.makedirs(cache_dir)
        f = open('%s/README' % cache_dir, 'w')
        f.write(
            'This directory holds a cache of reconciliation graph for the TreeLife data set'
        )
        f.close()

    cache_location = '%s/%s.graph' % (cache_dir, os.path.split(fileName)[1])
    if not os.path.isfile(cache_location):
        print >> sys.stderr, 'A reconciliation graph has not been built yet for this newick file'
        print >> sys.stderr, 'Doing so now and caching it in {%s}...' % cache_location

        DictGraph, numRecon = DP.DP(host, paras, phi, D, T, L)

        f = open(cache_location, 'w+')
        f.write(repr(DictGraph))
        f.close()

    print >> sys.stderr, 'Loading reonciliation graph from cache'
    f = open(cache_location)
    DictGraph = eval(f.read())
    f.close()

    scoresList, dictReps = Greedy.Greedy(DictGraph, paras)

    print >> sys.stderr, 'Found cluster representatives using point-collecting'

    graph = ReconGraph.ReconGraph(DictGraph)
    setReps = [
        ReconGraph.dictRecToSetRec(graph, dictRep) for dictRep in dictReps
    ]
    random.seed(0)
    extra_reps = [KMeans.get_template(graph) for i in xrange(max_k)]

    representatives = setReps + extra_reps

    print >> sys.stderr, 'Starting K Means algorithm ... '
    print >> sys.stderr, 'Printing Average and Maximum cluster radius at each step'

    for i in xrange(1, max_k + 1):
        print 'k = %d' % i
        KMeans.k_means(graph, 10, i, 0, representatives[:i])

Esempio n. 7

0

Mostra file

File: reconConversion.py Progetto: schnappi-wkl/CompBioSummer2015

def freqSummation(argList):
    """Takes as input an argument list containing a newick file of host and 
	parasite trees as well as their phi mapping, duplication, transfer, and 
	loss costs, the type of frequency scoring to be used, as well as switch 
	and loss cost ranges for xscape scoring, and returns a file containing the
	list of scores for each individual reconciliation, the sum of the those 
	scores, the total cost of those reconciliations and the number of 
	reconciliations of those trees."""
    newickFile = argList[1]
    D = float(argList[2])
    T = float(argList[3])
    L = float(argList[4])
    freqType = argList[5]
    switchLo = float(argList[6])
    switchHi = float(argList[7])
    lossLo = float(argList[8])
    lossHi = float(argList[9])
    fileName = newickFile[:-7]
    f = open(fileName + "freqFile.txt", 'w')
    host, paras, phi = newickFormatReader.getInput(newickFile)
    DTL, numRecon = DP.DP(host, paras, phi, D, T, L)
    if freqType == "Frequency":
        newDTL = DTL
    elif freqType == "xscape":
        newDTL = calcCostscapeScore.newScoreWrapper(newickFile, switchLo,
                                                    switchHi, lossLo, lossHi,
                                                    D, T, L)
    elif freqType == "unit":
        newDTL = MasterReconciliation.unitScoreDTL(host, paras, phi, D, T, L)
    scoresList, reconciliation = Greedy.Greedy(newDTL, paras)
    totalSum = 0
    for score in scoresList:
        totalSum += score
    for index in reconciliation:
        totalCost = 0
        for key in index:
            if index[key][0] == "L":
                totalCost += L
            elif index[key][0] == "T":
                totalCost += T
            elif index[key][0] == "D":
                totalCost += D
    f.write(str(scoresList) + '\n')
    f.write(str(totalSum) + '\n')
    f.write(str(totalCost) + '\n')
    f.write(str(numRecon))
    f.close()

Esempio n. 8

0

Mostra file

File: ReconConversion.py Progetto: alex-ozdemir/phylogenetic-reconciliation

def freqSummation(argList):
	"""Takes as input an argument list containing a newick file of host and 
	parasite trees as well as their phi mapping, duplication, transfer, and 
	loss costs, the type of frequency scoring to be used, as well as switch 
	and loss cost ranges for xscape scoring, and returns a file containing the
	list of scores for each individual reconciliation, the sum of the those 
	scores, the total cost of those reconciliations and the number of 
	reconciliations of those trees."""
	newickFile = argList[1]
	D = float(argList[2])
	T = float(argList[3])
	L = float(argList[4])
	freqType = argList[5]
	switchLo = float(argList[6])
	switchHi = float(argList[7])
	lossLo = float(argList[8])
	lossHi = float(argList[9])
	fileName = newickFile[:-7]
	f = open(fileName+"freqFile.txt", 'w')
	host, paras, phi = newickFormatReader.getInput(newickFile)
	DTL, numRecon = DP.DP(host, paras, phi, D, T, L)
	print numRecon
	if freqType == "Frequency":
		newDTL = DTL
	elif freqType == "xscape":
		newDTL = calcCostscapeScore.newScoreWrapper(newickFile, switchLo, \
			switchHi, lossLo, lossHi, D, T, L)
	elif freqType == "unit":
		newDTL = MasterReconciliation.unitScoreDTL(host, paras, phi, D, T, L)
	scoresList, reconciliation = Greedy.Greedy(newDTL, paras)
	totalSum = 0
	for score in scoresList:
		totalSum +=score
	for index in reconciliation:
		totalCost = 0
		for key in index:
			if index[key][0] == "L":
				totalCost+=L
			elif index[key][0] == "T":
				totalCost+=T
			elif index[key][0] == "D":
				totalCost+=D
	f.write(str(scoresList)+'\n')
	f.write(str(totalSum)+'\n')
	f.write(str(totalCost)+'\n')
	f.write(str(numRecon))
	f.close()

Esempio n. 9

0

Mostra file

File: FromNewick.py Progetto: alex-ozdemir/phylogenetic-reconciliation

def run_test(fileName, max_k):
    cache_dir = './cache'
    D = 2.
    T = 3.
    L = 1.

    host, paras, phi = newickFormatReader.getInput(fileName)

    if not os.path.exists(cache_dir):
        os.makedirs(cache_dir)
        f = open('%s/README' % cache_dir, 'w')
        f.write('This directory holds a cache of reconciliation graph for the TreeLife data set')
        f.close()

    cache_location = '%s/%s.graph' % (cache_dir, os.path.split(fileName)[1])
    if not os.path.isfile(cache_location):
        print >> sys.stderr, 'A reconciliation graph has not been built yet for this newick file'
        print >> sys.stderr, 'Doing so now and caching it in {%s}...' % cache_location

        DictGraph, numRecon = DP.DP(host, paras, phi, D, T, L)

        f = open(cache_location, 'w+')
        f.write(repr(DictGraph))
        f.close()

    print >> sys.stderr, 'Loading reonciliation graph from cache'
    f = open(cache_location)
    DictGraph = eval(f.read())
    f.close()

    scoresList, dictReps = Greedy.Greedy(DictGraph, paras)

    print >> sys.stderr, 'Found cluster representatives using point-collecting'

    graph = ReconGraph.ReconGraph(DictGraph)
    setReps = [ReconGraph.dictRecToSetRec(graph, dictRep) for dictRep in dictReps]
    random.seed(0)
    extra_reps = [KMeans.get_template(graph) for i in xrange(max_k)]

    representatives = setReps + extra_reps

    print >> sys.stderr, 'Starting K Means algorithm ... '
    print >> sys.stderr, 'Printing Average and Maximum cluster radius at each step'

    for i in xrange(1, max_k + 1):
        print 'k = %d' % i
        KMeans.k_means(graph, 10, i, 0, representatives[:i])

Esempio n. 10

0

Mostra file

def Reconcile(argList):
    """Takes command-line arguments of a .newick file, duplication, transfer, 
	and loss costs, the type of scoring desired and possible switch and loss 
	ranges. Creates Files for the host, parasite, and reconciliations"""
    fileName = argList[1]  #.newick file
    D = float(argList[2])  # Duplication cost
    T = float(argList[3])  # Transfer cost
    L = float(argList[4])  # Loss cost
    freqType = argList[5]  # Frequency type
    # Optional inputs if freqType == xscape
    switchLo = float(argList[6])  # Switch lower boundary
    switchHi = float(argList[7])  # Switch upper boundary
    lossLo = float(argList[8])  # Loss lower boundary
    lossHi = float(argList[9])  # Loss upper boundary

    host, paras, phi = newickFormatReader.getInput(fileName)
    hostRoot = cycleCheckingGraph.findRoot(host)
    hostv = cycleCheckingGraph.treeFormat(host)
    Order = orderGraph.date(hostv)
    # Default scoring function (if freqtype== Frequency scoring)
    DTLReconGraph, numRecon = DP.DP(host, paras, phi, D, T, L)
    print DTLReconGraph, numRecon
    #uses xScape scoring function
    if freqType == "xscape":
        DTLReconGraph = calcCostscapeScore.newScoreWrapper(fileName, switchLo, \
         switchHi, lossLo, lossHi, D, T, L)
    #uses Unit scoring function
    elif freqType == "unit":
        DTLReconGraph = unitScoreDTL(host, paras, phi, D, T, L)

    DTLGraph = copy.deepcopy(DTLReconGraph)
    scoresList, rec = Greedy.Greedy(DTLGraph, paras)
    for n in range(len(rec)):
        graph = cycleCheckingGraph.buildReconciliation(host, paras, rec[n])
        currentOrder = orderGraph.date(graph)
        if currentOrder == "timeTravel":
            rec[n], currentOrder = detectCycles.detectCyclesWrapper(
                host, paras, rec[n])
            currentOrder = orderGraph.date(currentOrder)
        hostOrder = hOrder(hostv, currentOrder)
        hostBranchs = branch(hostv, hostOrder)
        if n == 0:
            newickToVis.convert(fileName, hostBranchs, n, 1)
        else:
            newickToVis.convert(fileName, hostBranchs, n, 0)
        # filename[:-7] is the file name minus the .newick
        reconConversion.convert(rec[n], DTLReconGraph, paras, fileName[:-7], n)

Esempio n. 11

0

Mostra file

File: fixer.py Progetto: WeiyunMa/Reconciliation-Repair

def main():

    if not os.path.exists("fixerOut"):
        os.mkdir("fixerOut")

    for i in xrange(fileNum):

        index = str(i + 1)
        for j in xrange(4 - len(str(i + 1))):
            index = "0" + index

        fileName = "real-100taxa/COG" + index + ".newick"
        if not os.path.isfile(fileName):
            continue

        outFile = open("fixerOut/COG" + index + ".txt", 'w')

        print fileName[13:]
        outFile.write(fileName[13:] + "\n")

        S_dict, G_dict, _ = newickFormatReader.getInput(fileName)
        S, G = eteTreeReader(fileName)
        recs, allRecs = MasterReconciliation.Reconcile(["", fileName, str(dVal), str(tVal), str(lVal), "unit", "0", "1", "0", "1"])

        totRecs = len(allRecs)

        print "# of Infeasible Reconciliations: {0}".format(len(recs))
        outFile.write("# of Reconciliations: {0}\n".format(totRecs))
        outFile.write("# of Infeasible Reconciliations: {0}\n".format(len(recs)))

        min_cost = None

        for T in recs:
            alpha = recon_tree_to_dtl(T)
            out(S, G, alpha, outFile)
            alpha, pull_up = temporal_consistency_fixer(G, G_dict, S, S_dict, alpha)
            cost = out(S, G, alpha, outFile)
            if min_cost is None or cost < min_cost:
                min_cost = cost
            print "number of operations: {0}".format(pull_up)
            outFile.write("number of operations: {0}\n".format(pull_up))

        print "min total:", min_cost
        outFile.write("min total: " + str(min_cost) + "\n")

        outFile.close()

Esempio n. 12

0

Mostra file

File: MasterReconciliation.py Progetto: alex-ozdemir/phylogenetic-reconciliation

def Reconcile(argList):
    """Takes command-line arguments of a .newick file, duplication, transfer, 
    and loss costs, the type of scoring desired and possible switch and loss 
    ranges. Creates Files for the host, parasite, and reconciliations"""
    fileName = argList[1] #.newick file
    D = float(argList[2]) # Duplication cost
    T = float(argList[3]) # Transfer cost
    L = float(argList[4]) # Loss cost
    freqType = argList[5] # Frequency type
    # Optional inputs if freqType == xscape
    switchLo = float(argList[6]) # Switch lower boundary
    switchHi = float(argList[7]) # Switch upper boundary
    lossLo = float(argList[8]) # Loss lower boundary
    lossHi = float(argList[9]) # Loss upper boundary

    host, paras, phi = newickFormatReader.getInput(fileName)
    hostRoot = cycleCheckingGraph.findRoot(host)
    hostv = cycleCheckingGraph.treeFormat(host)
    Order = orderGraph.date(hostv)
    # Default scoring function (if freqtype== Frequency scoring)
    DTLReconGraph, numRecon = DP.DP(host, paras, phi, D, T, L)
    print DTLReconGraph, numRecon
    #uses xScape scoring function
    if freqType == "xscape":
        DTLReconGraph = calcCostscapeScore.newScoreWrapper(fileName, switchLo, \
            switchHi, lossLo, lossHi, D, T, L)
    #uses Unit scoring function
    elif freqType == "unit":
        DTLReconGraph = unitScoreDTL(host, paras, phi, D, T, L)

    DTLGraph = copy.deepcopy(DTLReconGraph)
    scoresList, rec = Greedy.Greedy(DTLGraph, paras)
    for n in range(len(rec)):
        graph = cycleCheckingGraph.buildReconciliation(host, paras, rec[n])
        currentOrder = orderGraph.date(graph)
        if currentOrder == "timeTravel":
            rec[n], currentOrder = detectCycles.detectCyclesWrapper(host, paras, rec[n])
            currentOrder = orderGraph.date(currentOrder)
        hostOrder = hOrder(hostv,currentOrder)
        hostBranchs = branch(hostv,hostOrder)
        if n == 0:
            newickToVis.convert(fileName,hostBranchs, n, 1)
        else:
            newickToVis.convert(fileName,hostBranchs, n, 0)
        # filename[:-7] is the file name minus the .newick
        reconConversion.convert(rec[n], DTLReconGraph, paras, fileName[:-7], n)

Esempio n. 13

0

Mostra file

File: DTLReconGraph.py Progetto: rlhadas/medoids

def reconcile(file_name, dup_cost, transfer_cost, loss_cost):
    """
    :param file_name: the file in which the desired data set it stored, passed as
    a string. For Ran Libeskind-Hadas's/Jessica Wu's group, our data files were almost exclusively
    .newick files once we were sure our algorithm worked correctly, which needed to use
    the newick format reader to correctly read in the data.
    :param dup_cost: the cost associated with a duplication event
    :param transfer_cost: the cost associated with a transfer event
    :param loss_cost: the cost associated with a loss event
    :return: the host tree used, the parasite tree used, the DTLReconGraph, the number of MPRs (as an int), and
    a list of the roots that could be used to produce an MPR for the given trees. See preceding functions
    for details on the format of the host and parasite trees as well as the DTLReconGraph
    """
    # Note: I have made modifications to the return statement to make Diameter.py possible without re-reconciling.
    host, paras, phi = newickFormatReader.getInput(file_name)
    graph, best_cost, num_recon, best_roots = DP(host, paras, phi, dup_cost,
                                                 transfer_cost, loss_cost)
    return host, paras, graph, num_recon, best_roots

Esempio n. 14

0

Mostra file

File: reconConversion.py Progetto: dmsm/CompBioSummer2015

def freqSummation(argList):
    """Takes as input an argument list containing a newick file of host and
    parasite trees as well as their phi mapping, duplication, transfer, and
    loss costs, the type of frequency scoring to be used, as well as switch
    and loss cost ranges for xscape scoring, and returns a file containing the
    list of scores for each individual reconciliation, the sum of the those
    scores, the total cost of those reconciliations and the number of
    reconciliations of those trees."""
    newickFile = argList[0]
    costs = {}
    costs['D'] = float(argList[1])
    costs['T'] = float(argList[2])
    costs['L'] = float(argList[3])
    freqType = argList[4]
    switchLo = float(argList[5])
    switchHi = float(argList[6])
    lossLo = float(argList[7])
    lossHi = float(argList[8])
    fileName = newickFile[:-7]
    f = open("{}freqFile.txt".format(fileName), 'w')
    host, paras, phi = newickFormatReader.getInput(newickFile)
    DTL, numRecon = dp.DP(host, paras, phi, costs['D'], costs['T'], costs['L'])
    if freqType == "Frequency":
        newDTL = DTL
    elif freqType == "xscape":
        newDTL = calcCostscapeScore.newScoreWrapper(newickFile, switchLo, switchHi, lossLo, lossHi, costs['D'],
                                                    costs['T'], costs['L'])
    elif freqType == "unit":
        newDTL = masterReconciliation.unitScoreDTL(host, paras, phi, costs['D'], costs['T'], costs['L'])
    scoresList, reconciliation = greedy.Greedy(newDTL, paras)
    totalSum = sum(scoresList)
    totalCost = 0
    index = reconciliation[0]
    for key in index:
        totalCost += costs.get(index[key][0], 0)

    f.write("{}\n".format(scoresList))
    f.write("{}\n".format(totalSum))
    f.write("{}\n".format(totalCost))
    f.write("{}".format(numRecon))
    f.close()

Esempio n. 15

0

Mostra file

File: calc_recon.py Progetto: jeansung/ClusteringAlgorithmsDTL

def run_test(fileName, max_k):
    cache_dir = '../cache'
    D = 2.
    T = 3.
    L = 1.
    host, paras, phi = newickFormatReader.getInput(fileName)

    if not os.path.exists(cache_dir):
        os.makedirs(cache_dir)
        f = open('%s/README' % cache_dir, 'w')
        f.write('This directory holds a cache of reconciliation graph for the TreeLife data set')
        f.close()

    cache_location = '%s/%s.graph' % (cache_dir, os.path.split(fileName)[1])
    recon_count_location = '%s/%s.count' % (cache_dir, os.path.split(fileName)[1])

    if not(os.path.isfile(cache_location)) or not(os.path.isfile(recon_count_location)):
        print >> sys.stderr, 'A reconciliation graph has not been built yet for this newick file'
        print >> sys.stderr, 'Doing so now and caching it in {%s}...' % cache_location

        DictGraph, numRecon = DP.DP(host, paras, phi, D, T, L)
        f = open(cache_location, 'w+')
        g = open(recon_count_location, 'w+')
        f.write(repr(DictGraph))
        g.write(str(numRecon))
        f.close()
        g.close()

    print >> sys.stderr, 'Loading reonciliation graph from cache'
    #f = open(cache_location)
    g = open(recon_count_location)
    #DictGraph = eval(f.read())
    numRecon = float(g.read())
    #f.close()
    g.close()

    if (numRecon < recon_threshold):
        print >> sys.stderr, 'FALSE:\t', fileName, numRecon
    else:
        print >> sys.stderr, 'TRUE: \t', fileName, numRecon

Esempio n. 16

0

Mostra file

File: newickToVis.py Progetto: schnappi-wkl/CompBioSummer2015

def convert(fileName, HostOrder, n, writeParasite):
    """takes name of original .newick file and the dictionary of host tree branch lengths
    and creates files for the host + parasite trees. Parasite tree can
    be ommited if desired"""
    f = open(fileName, 'r')
    contents = f.read()
    host, paras, phi = newickFormatReader.getInput(fileName)
    hostRoot = cycleCheckingGraph.findRoot(host)
    f.close()
    H, P, phi = contents.split(";")
    P = P.strip()
    H = H.strip()
    H = H + ';'
    host = treelib1.parse_newick(H, HostOrder)
    for key in HostOrder:
        H = H.replace(str(key), str(key) + ':' + str(HostOrder[key]))
    f = open(fileName[:-7] + str(n) + ".stree", 'w')
    treelib1.write_newick(host, f, root_data=True)
    f.close()
    if writeParasite:
        f = open(fileName[:-7] + '.tree', 'w')
        f.write(P + ";")
        f.close()

Esempio n. 17

0

Mostra file

File: newickToVis.py Progetto: aschweickart/CompBioSummer2015

def convert(fileName, HostOrder, n, writeParasite):
    """takes name of original .newick file and the dictionary of host tree branch lengths
    and creates files for the host + parasite trees. Parasite tree can
    be ommited if desired"""
    f = open(fileName, 'r')
    contents = f.read()
    host, paras, phi = newickFormatReader.getInput(fileName)
    hostRoot = cycleCheckingGraph.findRoot(host)
    f.close()
    H,P,phi = contents.split(";")
    P = P.strip()
    H = H.strip()
    H = H + ';'
    host = treelib1.parse_newick(H, HostOrder)
    for key in HostOrder:
        H = H.replace(str(key), str(key) + ':' + str(HostOrder[key]))
    f = open(fileName[:-7]+ str(n) +".stree", 'w')
    treelib1.write_newick(host, f, root_data = True)
    f.close()
    if writeParasite:
        f = open(fileName[:-7] + '.tree', 'w')
        f.write(P + ";")
        f.close()

Esempio n. 18

0

Mostra file

def Reconcile(argList):
	"""Takes command-line arguments of a .newick file, duplication, transfer, 
	and loss costs, the type of scoring desired and possible switch and loss 
	ranges. Creates Files for the host, parasite, and reconciliations"""
	fileName = argList[1] #.newick file
	D = float(argList[2]) # Duplication cost
	T = float(argList[3]) # Transfer cost
	L = float(argList[4]) # Loss cost
	freqType = argList[5] # Frequency type
	# Optional inputs if freqType == xscape
	switchLo = float(argList[6]) # Switch lower boundary
	switchHi = float(argList[7]) # Switch upper boundary
	lossLo = float(argList[8]) # Loss lower boundary
	lossHi = float(argList[9]) # Loss upper boundary

	host, paras, phi = newickFormatReader.getInput(fileName)
	hostRoot = ReconciliationGraph.findRoot(host)
	# Default scoring function (if freqtype== Frequency scoring)
	DTLReconGraph, numRecon = DP.DP(host, paras, phi, D, T, L)
	#uses xScape scoring function
	# if freqType == "xscape":
	# 	DTLReconGraph = calcCostscapeScore.newScoreWrapper(fileName, switchLo, \
	# 		switchHi, lossLo, lossHi, D, T, L)
	#uses Unit scoring function
	if freqType == "unit":
		DTLReconGraph = unitScoreDTL(host, paras, phi, D, T, L)

	DTLGraph = copy.deepcopy(DTLReconGraph)
	scoresList, recs = Greedy.Greedy(DTLGraph, paras)

	infeasible_recs = []
	for rec in recs:
		if orderGraph.date(ReconciliationGraph.buildReconciliation(host, paras, rec)) == False:
			infeasible_recs.append(rec)

	return infeasible_recs, recs

Esempio n. 19

0

Mostra file

File: k_medoids_random.py Progetto: jeansung/ClusteringAlgorithmsDTL

def run_test(fileName, max_k):
    cache_dir = './cache'
    D = 2.
    T = 3.
    L = 1.

    print >> sys.stderr, "FILE: ", fileName
    print fileName


    host, paras, phi = newickFormatReader.getInput(fileName)

    if not os.path.exists(cache_dir):
        os.makedirs(cache_dir)
        f = open('%s/README' % cache_dir, 'w')
        f.write('This directory holds a cache of reconciliation graph for the TreeLife data set')
        f.close()

    cache_location = '%s/%s.graph' % (cache_dir, os.path.split(fileName)[1])
    recon_count_location = '%s/%s.count' % (cache_dir, os.path.split(fileName)[1])
    if not(os.path.isfile(cache_location)) or not(os.path.isfile(recon_count_location)):
        print >> sys.stderr, 'A reconciliation graph has not been built yet for this newick file'
        print >> sys.stderr, 'Doing so now and caching it in {%s}...' % cache_location

        DictGraph, numRecon = DP.DP(host, paras, phi, D, T, L)
        f = open(cache_location, 'w+')
        g = open(recon_count_location, 'w+')
        f.write(repr(DictGraph))
        g.write(str(numRecon))
        f.close()
        g.close()

    print >> sys.stderr, 'Loading reonciliation graph from cache'
    f = open(cache_location)
    g = open(recon_count_location)
    DictGraph = eval(f.read())
    numRecon = float(g.read())
    f.close()
    g.close()

    ## Only consider running algorithm for reconciliations with more than 
    # threshold MPRs
    if (numRecon < recon_threshold):
        print >> sys.stderr, 'Too few reconciliations: ', numRecon
        return 
    else:
        print >> sys.stderr, 'Reconciliation Count: ', numRecon



    scoresList, dictReps = Greedy.Greedy(DictGraph, paras)

    print >> sys.stderr, 'Found cluster representatives using point-collecting'

    graph = ReconGraph.ReconGraph(DictGraph)
    setReps = [ReconGraph.dictRecToSetRec(graph, dictRep) for dictRep in dictReps]
    random.seed(0)
    extra_reps = [KMeans.get_template(graph) for i in xrange(max_k)]

    representatives = setReps + extra_reps

    print >> sys.stderr, 'Starting K Means algorithm ... '
    print >> sys.stderr, 'Printing Average and Maximum cluster radius at each step'

    for seed in xrange(5):
        for i in xrange(1, max_k + 1):
            # print 'k = %d' % i
            # KMeans.k_means(graph, 10, i, 0, representatives[:i])
            KMeans.k_means(graph, 10, i, seed, None)
            print

Esempio n. 20

0

Mostra file

def main():

    if not os.path.exists("treeFiles"):
        os.mkdir("treeFiles")

    for i in xrange(6000):

        index = str(i + 1)

        for j in xrange(4 - len(str(i + 1))):
            index = "0" + index

        inFile = "real-100taxa/COG" + index + ".newick"

        if not os.path.isfile(inFile):
            continue

        outFile = open("treeFiles/COG" + index + ".tree", 'w')

        host, parasite, phi = newickFormatReader.getInput(inFile)
        H = treeFormat(host)
        P = treeFormat(parasite)

        H_dict = {}  # name:index
        P_dict = {}  # name:index

        count = 0
        for key in H:
            count += 1
            H_dict[key] = count

        for key in P:
            count += 1
            P_dict[key] = count

        outFile.write("HOSTTREE\n")
        for key in H:
            outFile.write(str(H_dict[key]) + "\t")
            if H[key] == [None, None]:
                outFile.write("null\tnull\n")
            else:
                outFile.write(
                    str(H_dict[H[key][0]]) + "\t" + str(H_dict[H[key][1]]) +
                    "\n")

        outFile.write("\nHOSTNAMES\n")
        for key in H:
            outFile.write(str(H_dict[key]) + "\t" + key + "\n")

        outFile.write("\nPARASITETREE\n")
        for key in P:
            outFile.write(str(P_dict[key]) + "\t")
            if P[key] == [None, None]:
                outFile.write("null\tnull\n")
            else:
                outFile.write(
                    str(P_dict[P[key][0]]) + "\t" + str(P_dict[P[key][1]]) +
                    "\n")

        outFile.write("\nPARASITENAMES\n")
        for key in P:
            outFile.write(str(P_dict[key]) + "\t" + key + "\n")

        outFile.write("\nPHI\n")
        for key in phi:
            outFile.write(
                str(H_dict[phi[key]]) + "\t" + str(P_dict[key]) + "\n")

        outFile.close()

Esempio n. 21

0

Mostra file

File: k_centers.py Progetto: jeansung/ClusteringAlgorithmsDTL

def run_test(fileName, max_k):
    cache_dir = './cache'
    D = 2.
    T = 3.
    L = 1.

    print >> sys.stderr, "FILE: ", fileName
    print fileName

    host, paras, phi = newickFormatReader.getInput(fileName)

    if not os.path.exists(cache_dir):
        os.makedirs(cache_dir)
        f = open('%s/README' % cache_dir, 'w')
        f.write('This directory holds a cache of reconciliation graph for the TreeLife data set')
        f.close()

    cache_location = '%s/%s.graph' % (cache_dir, os.path.split(fileName)[1])
    recon_count_location = '%s/%s.count' % (cache_dir, os.path.split(fileName)[1])
    if not(os.path.isfile(cache_location)) or not(os.path.isfile(recon_count_location)):
        print >> sys.stderr, 'A reconciliation graph has not been built yet for this newick file'
        print >> sys.stderr, 'Doing so now and caching it in {%s}...' % cache_location

        DictGraph, numRecon = DP.DP(host, paras, phi, D, T, L)
        f = open(cache_location, 'w+')
        g = open(recon_count_location, 'w+')
        f.write(repr(DictGraph))
        g.write(str(numRecon))
        f.close()
        g.close()

    print >> sys.stderr, 'Loading reonciliation graph from cache'
    f = open(cache_location)
    g = open(recon_count_location)
    DictGraph = eval(f.read())
    numRecon = float(g.read())
    f.close()
    g.close()

    
    
    ## Only consider running algorithm for reconciliations with more than 
    # threshold MPRs
    if (numRecon < recon_threshold):
        print >> sys.stderr, 'Too few reconciliations: ', numRecon
        return 
    else:
        print >> sys.stderr, 'Reconciliation Count: ', numRecon



    scoresList, dictReps = Greedy.Greedy(DictGraph, paras)
    graph = ReconGraph.ReconGraph(DictGraph)
    representatives = [ReconGraph.dictRecToSetRec(graph, dictReps[0])]

    ## Debug info
    ## Modifies the graph 
    ## Checking for the case when there is an error in likelihood 
    print >> sys.stderr, "== Checking for likelihoods over 1 =="
    found = False 
    for key in DictGraph.keys():
        children = DictGraph[key]
        for child in children[:-1]:
            if child[-1] > 1:
                # Attempt to round to fix large float math errors
                roundedValue = round(child[-1])
                if roundedValue != 1.0:
                    print >> sys.stderr, "ERR FOUND: ", key, child 
                    found = True 
                
    if not(found):
        print >> sys.stderr, "NO ERR(s)"
    print >> sys.stderr, "== End of over 1 checks. =="



    print >> sys.stderr, 'Starting K-centers algorithm ... '
    for i in xrange(2, max_k + 2):
        d, newrep = maximize(graph,representatives)
        if not all(d_i > 0 for d_i in d):
            print >> sys.stderr, "Distance vector contains 0", d 
            break

        print i-1, min(d),
        representatives.append(newrep)
        dist_sum = 0
        n = 10
        for _ in xrange(n):
            reps = [KMeans.get_weighted_template(graph) for _ in xrange(i-1)]
            dist_sum += min_d(maximize(graph,reps))
        print float(dist_sum) / n

    print  >> sys.stderr, "Finished k centers algorithm ..."

Esempio n. 22

0

Mostra file

File: newickToTreeParser.py Progetto: WeiyunMa/Reconciliation-Repair

def main():

	if not os.path.exists("treeFiles"):
		os.mkdir("treeFiles")	
	
	for i in xrange(6000):

		index = str(i + 1)

		for j in xrange(4 - len(str(i + 1))):
			index = "0" + index

		inFile = "real-100taxa/COG" + index + ".newick"

		if not os.path.isfile(inFile):
			continue

		outFile = open("treeFiles/COG" + index + ".tree", 'w')

		host, parasite, phi = newickFormatReader.getInput(inFile)
		H = treeFormat(host)
		P = treeFormat(parasite)

		H_dict = {}   # name:index
		P_dict = {}   # name:index

		count = 0
		for key in H:
			count += 1
			H_dict[key] = count

		for key in P:
			count += 1
			P_dict[key] = count

		outFile.write("HOSTTREE\n")
		for key in H:
			outFile.write(str(H_dict[key]) + "\t")
			if H[key] == [None, None]:
				outFile.write("null\tnull\n")
			else:
				outFile.write(str(H_dict[H[key][0]]) + "\t" + str(H_dict[H[key][1]]) + "\n")

		outFile.write("\nHOSTNAMES\n")
		for key in H:
			outFile.write(str(H_dict[key]) + "\t" + key + "\n")

		outFile.write("\nPARASITETREE\n")	
		for key in P:
			outFile.write(str(P_dict[key]) + "\t")
			if P[key] == [None, None]:
				outFile.write("null\tnull\n")
			else:
				outFile.write(str(P_dict[P[key][0]]) + "\t" + str(P_dict[P[key][1]]) + "\n")

		outFile.write("\nPARASITENAMES\n")
		for key in P:
			outFile.write(str(P_dict[key]) + "\t" + key + "\n")

		outFile.write("\nPHI\n")
		for key in phi:
			outFile.write(str(H_dict[phi[key]]) + "\t" + str(P_dict[key]) + "\n")

		outFile.close()