def getGenes(inputFiles): genes = {} variationTypes = [] patients = [] headsList = ['patient','gene','log2'] for geneFile in inputFiles: with open(geneFile, 'r') as inFile: variationType = geneFile.split('.')[-1] variationTypes.append(variationType) header = utility.lineToHeader(inFile.readline(), headsList) for line in inFile: patGen = utility.lineToDict(line, header) gene = patGen['gene'] patient = patGen['patient'] log2 = patGen['log2'] if not gene in genes: genes[gene] = {} if not variationType in genes[gene]: genes[gene][variationType] = {} if not patient in genes[gene][variationType]: genes[gene][variationType][patient] = log2 if not patient in patients: patients.append(patient) else: genes[gene][variationType][patient] = max(genes[gene][variationType][patient], log2) return genes, variationTypes, patients
def getSegments(segmentFile): segs = [] with open(segmentFile, 'r') as inFile: header = utility.lineToHeader(inFile.readline()) for line in inFile: segment = utility.lineToDict(line, header) segs.append(segment) return segs
def getGenes(genesFile): headerList = ['name'] genesList = [] with open(genesFile, 'r') as genFile: header = utility.lineToHeader(genFile.readline(), headerList) for line in genFile: gene = utility.lineToDict(line, header) genesList.append(gene['name']) return genesList
def genesFromFile(inputFile): geneList = [] with open(inputFile, 'r') as inFile: header = utility.lineToHeader(inFile.readline()) for line in inFile: segment = utility.lineToDict(line, header) for gene in segment['genes']: if not gene in geneList: geneList.append(gene) return geneList
def filterSegmentGenes(segmentsFile, geneList): segments = [] with open(segmentsFile, 'r') as inFile: header = utility.lineToHeader(inFile.readline()) for line in inFile: filteredGenes = [] segment = utility.lineToDict(line, header) for gene in segment['genes']: if gene in geneList: filteredGenes.append(gene) if filteredGenes: segment['genes'] = filteredGenes segments.append(segment) return segments
def getMergedSegments(inputFile, armsDict, maximum, minimum): mergedSegs = [] toMerge = [] actualArm = '' actualType = '' with open(inputFile, 'r') as inFile: header = utility.lineToHeader(inFile.readline()) for line in inFile: segment = utility.lineToDict(line, header) segment = addArm(segment, armsDict) segment = addEventType(segment, maximum, minimum) mergedSegs.append(segment) return mergedSegs
def getArmsInfo(armInputFile): headList = ['chrom', 'start', 'end', 'name'] headInts = ['start', 'end'] arms = {} with open(armInputFile, 'r') as inFile: header = utility.lineToHeader(inFile.readline(), headList) for line in inFile: arm = utility.lineToDict(line, header, ints=headInts) arm['length'] = arm['end'] - arm['start'] arm['name'] = arm['chrom'] + arm['name'] if not arm['chrom'] in arms: arms[arm['chrom']] = {} arms[arm['chrom']][arm['name']] = arm return arms
def annoteSegments(segmentFile, annoteDict): segments = [] with open(segmentFile, 'r') as inFile: header = utility.lineToHeader(inFile.readline()) for line in inFile: segment = utility.lineToDict(line, header) segment['genes'] = [] if segment['type'] == 'Neu': continue chromGenes = annoteDict[segment['chrom']] for gene in chromGenes: segment = geneOverlap(segment, gene) if segment['genes']: segments.append(segment) return segments
def getSegmentsFromFile(inputFile, segs, patientId): with open(inputFile, 'r') as inFile: header = utility.lineToHeader(inFile.readline()) for line in inFile: segment = utility.lineToDict(line, header) segment['patient'] = patientId if segment['start'] in segs: if segment['end'] in segs[segment['start']]: segs[segment['start']][segment['end']]['log2'].append( segment['log2']) segs[segment['start']][segment['end']]['pVal'].append( segment['pVal']) else: segment['log2'] = [segment['log2']] segment['pVal'] = [segment['pVal']] segs[segment['start']][segment['end']] = segment else: segment['log2'] = [segment['log2']] segment['pVal'] = [segment['pVal']] segs[segment['start']] = {segment['end']: segment} return segs
def getAnnotationInfo(annoteFile): headsList = [ 'name2', 'chrom', 'start', 'end', 'exonCount', 'exonStarts', 'exonEnds' ] annInts = ['exonCount', 'start', 'end'] genes = {} with open(annoteFile, 'r') as annFile: header = utility.lineToHeader(annFile.readline(), headsList) for line in annFile: gene = utility.lineToDict(line, header, annInts) gene['chrom'] = gene['chrom'][3:] gene['exonStarts'] = gene['exonStarts'].split(',')[:-1] gene['exonStarts'] = list(map(int, gene['exonStarts'])) gene['exonEnds'] = gene['exonEnds'].split(',')[:-1] gene['exonEnds'] = list(map(int, gene['exonEnds'])) if not gene['chrom'] in genes: genes[gene['chrom']] = [] genes[gene['chrom']].append(gene) return genes