def _writeMuts2Tsv(self, muts, path): """ Given a mutation generator, this methods writes a tab separated file for all mutations in the mutation generator. In addition, this method computes the appropriate sample name in scenarios where the mutation is missing sample name annotation. It also computes a list of all chromosomes and sample names contained within the generator. :param path: temporary filename :param muts: generator object with mutations """ sampleNames = set() chroms = set() writer = None # create a temporary file to write tab-separated file tempTsvFile = tempfile.NamedTemporaryFile(dir=path, delete=False) self.logger.debug("Creating intermediate tsv file at %s" % tempTsvFile.name) mutAttributeNames = [] sampleNameSelector = SampleNameSelector(self.mutation, configFile=self.configTable.getConfigFilename(), section="OTHER") with open(tempTsvFile.name, 'w') as fptr: ctr = 0 sampleNameAnnotationName = sampleNameSelector.getOutputAnnotationName() sampleNameSource = sampleNameSelector.getAnnotationSource() for mut in muts: if len(mutAttributeNames) == 0: mutAttributeNames = mut.getAttributeNames() sampleName = sampleNameSelector.getSampleName(mut) if sampleName is not None: if mut.get(sampleNameAnnotationName, None) is None: mut.createAnnotation(sampleNameAnnotationName, sampleName, sampleNameSource) sampleNames.add(sampleName) # Parse chromosome chroms.add(mut.chr) updated_start, updated_ref_allele, updated_alt_allele = MutUtils.retrieveMutCoordinatesForRendering(mut) mut.ref_allele = updated_ref_allele mut.alt_allele = updated_alt_allele mut.start = updated_start if ctr == 0: fieldnames2Render = MutUtils.getAllAttributeNames(mut) if sampleNameAnnotationName is not None: fieldnames2Render += [sampleNameAnnotationName] for fieldname in fieldnames2Render: # fieldnames that start "_" aren't rendered if fieldname.startswith("_"): fieldnames2Render.remove(fieldname) writer = csv.DictWriter(fptr, fieldnames2Render, extrasaction='ignore', delimiter=self.delimiter, lineterminator=self.lineterminator) writer.writeheader() writer.writerow(mut) ctr += 1 if (ctr % 1000) == 0: self.logger.info("Wrote " + str(ctr) + " mutations to tsv.") sampleNames = list(sampleNames) sampleNames.sort() chroms = list(chroms) return chroms, sampleNames, tempTsvFile.name
def _writeMuts2Tsv(self, muts, path): """ Given a mutation generator, this methods writes a tab separated file for all mutations in the mutation generator. In addition, this method computes the appropriate sample name in scenarios where the mutation is missing sample name annotation. It also computes a list of all chromosomes and sample names contained within the generator. :param filename: temporary filename :param muts: generator object with mutations """ sampleNames = set() chroms = set() writer = None # create a temporary file to write tab-separated file tempTsvFile = tempfile.NamedTemporaryFile(dir=path, delete=False) self.logger.info("Creating intermediate tsv file...") sampleNameAnnotationNames = self.getAnnotationNames("SAMPLE_NAME") tumorSampleNameAnnotationNames = self.getAnnotationNames("SAMPLE_TUMOR_NAME") normalSampleNameAnnotationNames = self.getAnnotationNames("SAMPLE_NORMAL_NAME") mutAttributeNames = [] with open(tempTsvFile.name, 'w') as fptr: ctr = 0 for mut in muts: sampleName = None sampleNameAnnotationName = None if len(mutAttributeNames) == 0: mutAttributeNames = mut.getAttributeNames() # Sample name annotation is present if len(sampleNameAnnotationNames) != 0: sampleNameAnnotationName = sampleNameAnnotationNames[0] sampleName = mut[sampleNameAnnotationName] # Both, tumor and normal sample name annotations are present elif len(tumorSampleNameAnnotationNames) != 0 and len(normalSampleNameAnnotationNames) != 0: tumorSampleNameAnnotationName = tumorSampleNameAnnotationNames[0] normalSampleNameAnnotationName = normalSampleNameAnnotationNames[0] sampleName = string.join([mut[normalSampleNameAnnotationName], mut[tumorSampleNameAnnotationName]], sep="-") sampleNameAnnotationName = MutUtils.SAMPLE_NAME_ANNOTATION_NAME mut.createAnnotation(sampleNameAnnotationName, sampleName, "OUTPUT") if ctr == 0: self.logger.info("Sample name is the concatenation of %s and %s columns." % (normalSampleNameAnnotationName, tumorSampleNameAnnotationName)) # Only tumor sample name is present elif len(tumorSampleNameAnnotationNames) != 0: tumorSampleNameAnnotationName = tumorSampleNameAnnotationNames[0] sampleName = mut[tumorSampleNameAnnotationName] sampleNameAnnotationName = MutUtils.SAMPLE_NAME_ANNOTATION_NAME mut.createAnnotation(sampleNameAnnotationName, sampleName, "INPUT") if ctr == 0: self.logger.info("Sample name is %s column." % tumorSampleNameAnnotationName) # Only normal sample name is present elif len(normalSampleNameAnnotationNames) != 0: normalSampleNameAnnotationName = normalSampleNameAnnotationNames[0] sampleName = mut[normalSampleNameAnnotationName] sampleNameAnnotationName = MutUtils.SAMPLE_NAME_ANNOTATION_NAME mut.createAnnotation(sampleNameAnnotationName, sampleName, "INPUT") if ctr == 0: self.logger.info("Sample name is %s column." % normalSampleNameAnnotationName) if sampleName is not None: sampleNames.add(sampleName) # Parse chromosome chroms.add(mut.chr) updated_start, updated_ref_allele, updated_alt_allele = MutUtils.retrieveMutCoordinatesForRendering(mut) mut.ref_allele = updated_ref_allele mut.alt_allele = updated_alt_allele mut.start = updated_start if ctr == 0: fieldnames2Render = MutUtils.getAllAttributeNames(mut) if sampleNameAnnotationName is not None: fieldnames2Render += [sampleNameAnnotationName] for fieldname in fieldnames2Render: # fieldnames that start "_" aren't rendered if fieldname.startswith("_"): fieldnames2Render.remove(fieldname) writer = csv.DictWriter(fptr, fieldnames2Render, extrasaction='ignore', delimiter=self.delimiter, lineterminator=self.lineterminator) writer.writeheader() writer.writerow(mut) ctr += 1 if (ctr % 1000) == 0: self.logger.info("Wrote " + str(ctr) + " mutations to tsv.") sampleNames = list(sampleNames) sampleNames.sort() chroms = list(chroms) return chroms, sampleNames, tempTsvFile.name