def convertReadAlignment(self, read): """ Convert a pysam ReadAlignment to a GA4GH ReadAlignment """ # TODO fill out remaining fields # TODO refine in tandem with code in converters module ret = protocol.ReadAlignment() ret.alignedQuality = list(read.query_qualities) ret.alignedSequence = read.query_sequence ret.alignment = protocol.LinearAlignment() ret.alignment.mappingQuality = read.mapping_quality ret.alignment.position = protocol.Position() ret.alignment.position.referenceName = self._samFile.getrname( read.reference_id) ret.alignment.position.position = read.reference_start ret.alignment.position.strand = \ protocol.Strand.POS_STRAND # TODO fix this! ret.alignment.cigar = [] for operation, length in read.cigar: gaCigarUnit = protocol.CigarUnit() gaCigarUnit.operation = SamCigar.int2ga(operation) gaCigarUnit.operationLength = length gaCigarUnit.referenceSequence = None # TODO fix this! ret.alignment.cigar.append(gaCigarUnit) ret.duplicateFragment = SamFlags.isFlagSet(read.flag, SamFlags.DUPLICATE_FRAGMENT) ret.failedVendorQualityChecks = SamFlags.isFlagSet( read.flag, SamFlags.FAILED_VENDOR_QUALITY_CHECKS) ret.fragmentLength = read.template_length ret.fragmentName = read.query_name ret.id = "{}:{}".format(self._id, read.query_name) ret.info = {key: [str(value)] for key, value in read.tags} ret.nextMatePosition = None if read.next_reference_id != -1: ret.nextMatePosition = protocol.Position() ret.nextMatePosition.referenceName = self._samFile.getrname( read.next_reference_id) ret.nextMatePosition.position = read.next_reference_start ret.nextMatePosition.strand = \ protocol.Strand.POS_STRAND # TODO fix this! # TODO Is this the correct mapping between numberReads and # sam flag 0x1? What about the mapping between numberReads # and 0x40 and 0x80? ret.numberReads = None ret.readNumber = None if SamFlags.isFlagSet(read.flag, SamFlags.NUMBER_READS): ret.numberReads = 2 if SamFlags.isFlagSet(read.flag, SamFlags.READ_NUMBER_ONE): ret.readNumber = 0 elif SamFlags.isFlagSet(read.flag, SamFlags.READ_NUMBER_TWO): ret.readNumber = 1 ret.properPlacement = SamFlags.isFlagSet(read.flag, SamFlags.PROPER_PLACEMENT) ret.readGroupId = self._id ret.secondaryAlignment = SamFlags.isFlagSet( read.flag, SamFlags.SECONDARY_ALIGNMENT) ret.supplementaryAlignment = SamFlags.isFlagSet( read.flag, SamFlags.SUPPLEMENTARY_ALIGNMENT) return ret
def convertReadAlignment(self, read): """ Convert a pysam ReadAlignment to a GA4GH ReadAlignment """ samFile = self._parentContainer.getFileHandle(self._parentSamFilePath) # TODO fill out remaining fields # TODO refine in tandem with code in converters module ret = protocol.ReadAlignment() ret.fragmentId = 'TODO' if read.query_qualities is None: ret.alignedQuality = [] else: ret.alignedQuality = list(read.query_qualities) ret.alignedSequence = read.query_sequence if SamFlags.isFlagSet(read.flag, SamFlags.READ_UNMAPPED): ret.alignment = None else: ret.alignment = protocol.LinearAlignment() ret.alignment.mappingQuality = read.mapping_quality ret.alignment.position = protocol.Position() ret.alignment.position.referenceName = samFile.getrname( read.reference_id) ret.alignment.position.position = read.reference_start ret.alignment.position.strand = protocol.Strand.POS_STRAND if SamFlags.isFlagSet(read.flag, SamFlags.READ_REVERSE_STRAND): ret.alignment.position.strand = protocol.Strand.NEG_STRAND ret.alignment.cigar = [] for operation, length in read.cigar: gaCigarUnit = protocol.CigarUnit() gaCigarUnit.operation = SamCigar.int2ga(operation) gaCigarUnit.operationLength = length gaCigarUnit.referenceSequence = None # TODO fix this! ret.alignment.cigar.append(gaCigarUnit) ret.duplicateFragment = SamFlags.isFlagSet(read.flag, SamFlags.DUPLICATE_READ) ret.failedVendorQualityChecks = SamFlags.isFlagSet( read.flag, SamFlags.FAILED_QUALITY_CHECK) ret.fragmentLength = read.template_length ret.fragmentName = read.query_name ret.info = {key: [str(value)] for key, value in read.tags} if SamFlags.isFlagSet(read.flag, SamFlags.MATE_UNMAPPED): ret.nextMatePosition = None else: ret.nextMatePosition = protocol.Position() if read.next_reference_id != -1: ret.nextMatePosition.referenceName = samFile.getrname( read.next_reference_id) else: ret.nextMatePosition.referenceName = "" ret.nextMatePosition.position = read.next_reference_start ret.nextMatePosition.strand = protocol.Strand.POS_STRAND if SamFlags.isFlagSet(read.flag, SamFlags.MATE_REVERSE_STRAND): ret.nextMatePosition.strand = protocol.Strand.NEG_STRAND if SamFlags.isFlagSet(read.flag, SamFlags.READ_PAIRED): ret.numberReads = 2 else: ret.numberReads = 1 ret.readNumber = None if SamFlags.isFlagSet(read.flag, SamFlags.FIRST_IN_PAIR): if SamFlags.isFlagSet(read.flag, SamFlags.SECOND_IN_PAIR): ret.readNumber = 2 else: ret.readNumber = 0 elif SamFlags.isFlagSet(read.flag, SamFlags.SECOND_IN_PAIR): ret.readNumber = 1 ret.properPlacement = SamFlags.isFlagSet(read.flag, SamFlags.READ_PROPER_PAIR) ret.readGroupId = self.getId() ret.secondaryAlignment = SamFlags.isFlagSet( read.flag, SamFlags.SECONDARY_ALIGNMENT) ret.supplementaryAlignment = SamFlags.isFlagSet( read.flag, SamFlags.SUPPLEMENTARY_ALIGNMENT) ret.id = self.getReadAlignmentId(ret) return ret