savesort.write("\t" + str(used))
        savesort.write("\t" + str(skipped))
        savesort.write("\t" + str(split))
        savesort.write("\n")
        writeTrees(trees, label)
    savesort.close()


codeToSampleMap = getCodeToSampleMap()

#Read in the 'optim' input files, and parse them to figure out where the splits are.
allsets, labelSamples = readOptimInputs(codeToSampleMap)
inputSplits = getOptimInputSplits(allsets)

#Read in the deletions/CNVs
(patientSampleMap, samplePatientMap) = lps.getPatientSampleMap()
deletions, CNVs = lps.loadDeletionsAndCNVs(samplePatientMap)

#Read in the mutations
mutations = readMutations()

sortMutations(mutations, allsets, inputSplits, deletions, CNVs, labelSamples)

#writeAllSampleVAFs(mutations, patientSampleMap, deletions)

count = 0
countByCall = {}
for patient in mutations:
    for chrom in mutations[patient]:
        for pos in mutations[patient][chrom]:
            if len(list(mutations[patient][chrom][pos])) > 1:
                localmin = val
        elif direction == "down":
            if val < localmin:
                localmin = val
                distance = 0
            else:
                distance += 1
            if distance >= 40 and val - localmin > 0.25:
                #print("Switched directions: going up at", key, distance, localmax, val)
                direction = "up"
                localmax = val
                maxkey = key
    return ret


(patientSampleMap, samplePatientMap) = lsl.getPatientSampleMap(
    dipvtet_file="calling_evidence_odds.tsv")
deletions, CNVs = lsl.loadDeletionsAndCNVs(samplePatientMap)

summary = open("summary_smoothed_and_fit.tsv", "w")
summary.write("Patient")
summary.write("\tSample")
summary.write("\tnPoints")
summary.write("\tCall")
summary.write("\tGroup")
#summary.write("\tMean x2")
#summary.write("\tStdev x2")
summary.write("\tHistMax x2")
#summary.write("\tHistMax height")
summary.write("\tFitNormal x2")
summary.write("\tFitNormal weight")
summary.write("\tHistMax x2")
     False if not.
    """
    if patient not in deletions:
        return False
    if sample not in deletions[patient]:
        return False
    if chrom not in deletions[patient][sample]:
        return False
    for (start, end) in deletions[patient][sample][chrom]:
        if start <= pos and end >= pos:
            return True
    return False


mutations = {}
(__, samplePatientMap) = lsl.getPatientSampleMap()
patientSampleMap = {}

with open(mutation_file, 'r') as csvfile:
    for lvec in csv.reader(csvfile):
        if "DNANum" in lvec[0]:
            continue
        (sample, __, __, chr, pos, ref, alt, is_snv, is_2p) = lvec[0:9]
        if (is_snv == "f"):
            continue
        if (is_2p == "f"):
            continue
    #    if ("N" in sample):
    #        continue
        refcnt = int(lvec[-2])
        bafcnt = int(lvec[-1])
)
for patient in groupdata:
    for samples in groupdata[patient]:
        outfile.write(patient)
        outfile.write("\t" + groupdata[patient][samples]["matches_tree"])
        outfile.write("\t" + str(groupdata[patient][samples]["count"]))
        outfile.write("\t" + str(groupdata[patient][samples]["percentage"]))
        outfile.write("\t" + str(groupdata[patient][samples]["cnv_count"]))
        outfile.write("\t" +
                      str(groupdata[patient][samples]["cnv_percentage"]))
        for sample in samples:
            outfile.write("\t" + sample)
        outfile.write("\n")
outfile.close()

patientSampleMap, __ = lsl.getPatientSampleMap()
#Now do some analysis
types = ["Singleton", "Root", "Grouped", "Ungrouped"]
outfile = open(groupdir + outfilename, "w")
outfile.write("Patient\tSubclone SNV Threshhold\tnSNVmax\tGD Samples")
for type in types:
    outfile.write("\t" + type + " counts")
    outfile.write("\t" + type + " total")
outfile.write(
    "\tUngrouped potential subclone counts\tUngrouped potential subclone total\n"
)
for patient in groupdata:
    smallestSNVcount = 100000
    maxSNVcount = 0
    GDsamples = set()
    for sample in patientSampleMap[patient]:
Beispiel #5
0
        print("Invalid match grid for patient", patient, "samples",
              str(samples), "at chr", str(chr), str(segpair))
    return (Nvec, Svec, allBalanced)


#Main routine:
CNlist = []
for (_, _, f) in walk(CN_input):
    CNlist += f

BAFlist = []
for bafin in BAF_input:
    for (_, _, f) in walk(bafin):
        BAFlist += f

(s2p, p2s) = lsl.getPatientSampleMap(dipvtet_file)
for patient in p2s:
    if somepatientsonly and patient not in somepatients:
        continue
    samples = p2s[patient]
    samples.sort()
    segments = getSegmentCalls(patient, samples, s2p, CNlist)
    allA = open(BEAST_output + patient + "_allA.txt", "w")
    allB = open(BEAST_output + patient + "_allB.txt", "w")
    writeHeader(allA, samples)
    writeHeader(allB, samples)
    chrs = list(segments.keys())
    chrs.sort()
    for chr in chrs:
        shouldSwitch = True
        prevN = [-1] * len(samples)