Ejemplo n.º 1
0
def getGenes(inputFiles):
    genes = {}
    variationTypes = []
    patients = []
    headsList = ['patient','gene','log2']
    for geneFile in inputFiles:
        with open(geneFile, 'r') as inFile:
            variationType = geneFile.split('.')[-1]
            variationTypes.append(variationType)
            header = utility.lineToHeader(inFile.readline(), headsList)

            for line in inFile:
                patGen = utility.lineToDict(line, header)
                gene = patGen['gene']
                patient = patGen['patient']
                log2 = patGen['log2']

                if not gene in genes:
                    genes[gene] = {}

                if not variationType in genes[gene]:
                    genes[gene][variationType] = {}

                if not patient in genes[gene][variationType]:
                    genes[gene][variationType][patient] = log2
                    if not patient in patients:
                        patients.append(patient)
                else:
                    genes[gene][variationType][patient] = max(genes[gene][variationType][patient], log2)


    return genes, variationTypes, patients
Ejemplo n.º 2
0
def getSegments(segmentFile):
    segs = []
    with open(segmentFile, 'r') as inFile:
        header = utility.lineToHeader(inFile.readline())
        for line in inFile:
            segment = utility.lineToDict(line, header)
            segs.append(segment)
    return segs
Ejemplo n.º 3
0
def getGenes(genesFile):
    headerList = ['name']
    genesList = []
    with open(genesFile, 'r') as genFile:
        header = utility.lineToHeader(genFile.readline(), headerList)
        for line in genFile:
            gene = utility.lineToDict(line, header)
            genesList.append(gene['name'])
    return genesList
Ejemplo n.º 4
0
def genesFromFile(inputFile):
    geneList = []

    with open(inputFile, 'r') as inFile:
        header = utility.lineToHeader(inFile.readline())
        for line in inFile:
            segment = utility.lineToDict(line, header)
            for gene in segment['genes']:
                if not gene in geneList:
                    geneList.append(gene)

    return geneList
Ejemplo n.º 5
0
def filterSegmentGenes(segmentsFile, geneList):
    segments = []
    with open(segmentsFile, 'r') as inFile:
        header = utility.lineToHeader(inFile.readline())
        for line in inFile:
            filteredGenes = []
            segment = utility.lineToDict(line, header)
            for gene in segment['genes']:
                if gene in geneList:
                    filteredGenes.append(gene)
            if filteredGenes:
                segment['genes'] = filteredGenes
                segments.append(segment)
    return segments
Ejemplo n.º 6
0
def getMergedSegments(inputFile, armsDict, maximum, minimum):
    mergedSegs = []
    toMerge = []
    actualArm = ''
    actualType = ''

    with open(inputFile, 'r') as inFile:
        header = utility.lineToHeader(inFile.readline())

        for line in inFile:
            segment = utility.lineToDict(line, header)
            segment = addArm(segment, armsDict)
            segment = addEventType(segment, maximum, minimum)
            mergedSegs.append(segment)
    return mergedSegs
Ejemplo n.º 7
0
def getArmsInfo(armInputFile):
    headList = ['chrom', 'start', 'end', 'name']
    headInts = ['start', 'end']
    arms = {}
    with open(armInputFile, 'r') as inFile:
        header = utility.lineToHeader(inFile.readline(), headList)

        for line in inFile:
            arm = utility.lineToDict(line, header, ints=headInts)
            arm['length'] = arm['end'] - arm['start']
            arm['name'] = arm['chrom'] + arm['name']

            if not arm['chrom'] in arms:
                arms[arm['chrom']] = {}
            arms[arm['chrom']][arm['name']] = arm
    return arms
Ejemplo n.º 8
0
def annoteSegments(segmentFile, annoteDict):
    segments = []
    with open(segmentFile, 'r') as inFile:
        header = utility.lineToHeader(inFile.readline())

        for line in inFile:
            segment = utility.lineToDict(line, header)
            segment['genes'] = []

            if segment['type'] == 'Neu':
                continue

            chromGenes = annoteDict[segment['chrom']]
            for gene in chromGenes:
                segment = geneOverlap(segment, gene)
            if segment['genes']:
                segments.append(segment)
    return segments
Ejemplo n.º 9
0
def getSegmentsFromFile(inputFile, segs, patientId):
    with open(inputFile, 'r') as inFile:
        header = utility.lineToHeader(inFile.readline())
        for line in inFile:
            segment = utility.lineToDict(line, header)
            segment['patient'] = patientId
            if segment['start'] in segs:
                if segment['end'] in segs[segment['start']]:
                    segs[segment['start']][segment['end']]['log2'].append(
                        segment['log2'])
                    segs[segment['start']][segment['end']]['pVal'].append(
                        segment['pVal'])
                else:
                    segment['log2'] = [segment['log2']]
                    segment['pVal'] = [segment['pVal']]
                    segs[segment['start']][segment['end']] = segment
            else:
                segment['log2'] = [segment['log2']]
                segment['pVal'] = [segment['pVal']]
                segs[segment['start']] = {segment['end']: segment}
    return segs
Ejemplo n.º 10
0
def getAnnotationInfo(annoteFile):
    headsList = [
        'name2', 'chrom', 'start', 'end', 'exonCount', 'exonStarts', 'exonEnds'
    ]
    annInts = ['exonCount', 'start', 'end']
    genes = {}

    with open(annoteFile, 'r') as annFile:
        header = utility.lineToHeader(annFile.readline(), headsList)

        for line in annFile:
            gene = utility.lineToDict(line, header, annInts)
            gene['chrom'] = gene['chrom'][3:]
            gene['exonStarts'] = gene['exonStarts'].split(',')[:-1]
            gene['exonStarts'] = list(map(int, gene['exonStarts']))
            gene['exonEnds'] = gene['exonEnds'].split(',')[:-1]
            gene['exonEnds'] = list(map(int, gene['exonEnds']))

            if not gene['chrom'] in genes:
                genes[gene['chrom']] = []
            genes[gene['chrom']].append(gene)
    return genes