def convertReadAlignment(self, read): """ Convert a pysam ReadAlignment to a GA4GH ReadAlignment """ # TODO fill out remaining fields # TODO refine in tandem with code in converters module ret = protocol.ReadAlignment() ret.alignedQuality = list(read.query_qualities) ret.alignedSequence = read.query_sequence ret.alignment = protocol.LinearAlignment() ret.alignment.mappingQuality = read.mapping_quality ret.alignment.position = protocol.Position() ret.alignment.position.referenceName = self._samFile.getrname( read.reference_id) ret.alignment.position.position = read.reference_start ret.alignment.position.strand = \ protocol.Strand.POS_STRAND # TODO fix this! ret.alignment.cigar = [] for operation, length in read.cigar: gaCigarUnit = protocol.CigarUnit() gaCigarUnit.operation = SamCigar.int2ga(operation) gaCigarUnit.operationLength = length gaCigarUnit.referenceSequence = None # TODO fix this! ret.alignment.cigar.append(gaCigarUnit) ret.duplicateFragment = SamFlags.isFlagSet(read.flag, SamFlags.DUPLICATE_FRAGMENT) ret.failedVendorQualityChecks = SamFlags.isFlagSet( read.flag, SamFlags.FAILED_VENDOR_QUALITY_CHECKS) ret.fragmentLength = read.template_length ret.fragmentName = read.query_name ret.id = "{}:{}".format(self._id, read.query_name) ret.info = {key: [str(value)] for key, value in read.tags} ret.nextMatePosition = None if read.next_reference_id != -1: ret.nextMatePosition = protocol.Position() ret.nextMatePosition.referenceName = self._samFile.getrname( read.next_reference_id) ret.nextMatePosition.position = read.next_reference_start ret.nextMatePosition.strand = \ protocol.Strand.POS_STRAND # TODO fix this! # TODO Is this the correct mapping between numberReads and # sam flag 0x1? What about the mapping between numberReads # and 0x40 and 0x80? ret.numberReads = None ret.readNumber = None if SamFlags.isFlagSet(read.flag, SamFlags.NUMBER_READS): ret.numberReads = 2 if SamFlags.isFlagSet(read.flag, SamFlags.READ_NUMBER_ONE): ret.readNumber = 0 elif SamFlags.isFlagSet(read.flag, SamFlags.READ_NUMBER_TWO): ret.readNumber = 1 ret.properPlacement = SamFlags.isFlagSet(read.flag, SamFlags.PROPER_PLACEMENT) ret.readGroupId = self._id ret.secondaryAlignment = SamFlags.isFlagSet( read.flag, SamFlags.SECONDARY_ALIGNMENT) ret.supplementaryAlignment = SamFlags.isFlagSet( read.flag, SamFlags.SUPPLEMENTARY_ALIGNMENT) return ret
def _createReadAlignment(self, i, seed): # TODO fill out a bit more rng = random.Random(seed) alignment = protocol.ReadAlignment() alignment.fragmentLength = rng.randint(10, 100) alignment.alignedQuality = [] alignment.alignedSequence = "" for i in range(alignment.fragmentLength): # TODO: are these reasonable quality values? alignment.alignedQuality.append(rng.randint(1, 20)) alignment.alignedSequence += rng.choice("ACGT") alignment.fragmentId = "frag{}".format(seed) gaPosition = protocol.Position() gaPosition.position = 0 gaPosition.referenceName = "NotImplemented" gaPosition.strand = protocol.Strand.POS_STRAND gaLinearAlignment = protocol.LinearAlignment() gaLinearAlignment.position = gaPosition alignment.alignment = gaLinearAlignment alignment.duplicateFragment = False alignment.failedVendorQualityChecks = False alignment.fragmentName = "simulated{}".format(i) alignment.info = {} alignment.nextMatePosition = None alignment.numberReads = None alignment.properPlacement = False alignment.readGroupId = self.getId() alignment.readNumber = None alignment.secondaryAlignment = False alignment.supplementaryAlignment = False alignment.id = self.getReadAlignmentId(alignment) return alignment
def _createReadAlignment(self, i): # TODO fill out a bit more alignment = protocol.ReadAlignment() alignment.alignedQuality = [1, 2, 3] alignment.alignedSequence = "ACT" alignment.fragmentId = 'TODO' gaPosition = protocol.Position() gaPosition.position = 0 gaPosition.referenceName = "NotImplemented" gaPosition.strand = protocol.Strand.POS_STRAND gaLinearAlignment = protocol.LinearAlignment() gaLinearAlignment.position = gaPosition alignment.alignment = gaLinearAlignment alignment.duplicateFragment = False alignment.failedVendorQualityChecks = False alignment.fragmentLength = 3 alignment.fragmentName = "simulated{}".format(i) alignment.info = {} alignment.nextMatePosition = None alignment.numberReads = None alignment.properPlacement = False alignment.readGroupId = self.getId() alignment.readNumber = None alignment.secondaryAlignment = False alignment.supplementaryAlignment = False alignment.id = self.getReadAlignmentId(alignment) return alignment
def generateReadAlignment(position=0, sequence='abc'): alignment = protocol.ReadAlignment() alignment.alignment = protocol.LinearAlignment() alignment.alignment.position = protocol.Position() alignment.alignment.position.position = position alignment.alignedSequence = sequence return alignment
def convertReadAlignment(self, read): """ Convert a pysam ReadAlignment to a GA4GH ReadAlignment """ samFile = self._parentContainer.getFileHandle(self._parentSamFilePath) # TODO fill out remaining fields # TODO refine in tandem with code in converters module ret = protocol.ReadAlignment() ret.fragmentId = 'TODO' if read.query_qualities is None: ret.alignedQuality = [] else: ret.alignedQuality = list(read.query_qualities) ret.alignedSequence = read.query_sequence if SamFlags.isFlagSet(read.flag, SamFlags.READ_UNMAPPED): ret.alignment = None else: ret.alignment = protocol.LinearAlignment() ret.alignment.mappingQuality = read.mapping_quality ret.alignment.position = protocol.Position() ret.alignment.position.referenceName = samFile.getrname( read.reference_id) ret.alignment.position.position = read.reference_start ret.alignment.position.strand = protocol.Strand.POS_STRAND if SamFlags.isFlagSet(read.flag, SamFlags.READ_REVERSE_STRAND): ret.alignment.position.strand = protocol.Strand.NEG_STRAND ret.alignment.cigar = [] for operation, length in read.cigar: gaCigarUnit = protocol.CigarUnit() gaCigarUnit.operation = SamCigar.int2ga(operation) gaCigarUnit.operationLength = length gaCigarUnit.referenceSequence = None # TODO fix this! ret.alignment.cigar.append(gaCigarUnit) ret.duplicateFragment = SamFlags.isFlagSet(read.flag, SamFlags.DUPLICATE_READ) ret.failedVendorQualityChecks = SamFlags.isFlagSet( read.flag, SamFlags.FAILED_QUALITY_CHECK) ret.fragmentLength = read.template_length ret.fragmentName = read.query_name ret.info = {key: [str(value)] for key, value in read.tags} if SamFlags.isFlagSet(read.flag, SamFlags.MATE_UNMAPPED): ret.nextMatePosition = None else: ret.nextMatePosition = protocol.Position() if read.next_reference_id != -1: ret.nextMatePosition.referenceName = samFile.getrname( read.next_reference_id) else: ret.nextMatePosition.referenceName = "" ret.nextMatePosition.position = read.next_reference_start ret.nextMatePosition.strand = protocol.Strand.POS_STRAND if SamFlags.isFlagSet(read.flag, SamFlags.MATE_REVERSE_STRAND): ret.nextMatePosition.strand = protocol.Strand.NEG_STRAND if SamFlags.isFlagSet(read.flag, SamFlags.READ_PAIRED): ret.numberReads = 2 else: ret.numberReads = 1 ret.readNumber = None if SamFlags.isFlagSet(read.flag, SamFlags.FIRST_IN_PAIR): if SamFlags.isFlagSet(read.flag, SamFlags.SECOND_IN_PAIR): ret.readNumber = 2 else: ret.readNumber = 0 elif SamFlags.isFlagSet(read.flag, SamFlags.SECOND_IN_PAIR): ret.readNumber = 1 ret.properPlacement = SamFlags.isFlagSet(read.flag, SamFlags.READ_PROPER_PAIR) ret.readGroupId = self.getId() ret.secondaryAlignment = SamFlags.isFlagSet( read.flag, SamFlags.SECONDARY_ALIGNMENT) ret.supplementaryAlignment = SamFlags.isFlagSet( read.flag, SamFlags.SUPPLEMENTARY_ALIGNMENT) ret.id = self.getReadAlignmentId(ret) return ret
def convertReadAlignment(self, read, readGroupSet, readGroupId): """ Convert a pysam ReadAlignment to a GA4GH ReadAlignment """ samFile = self.getFileHandle(self._dataUrl) # TODO fill out remaining fields # TODO refine in tandem with code in converters module ret = protocol.ReadAlignment() # ret.fragmentId = 'TODO' ret.aligned_quality.extend(read.query_qualities) ret.aligned_sequence = read.query_sequence if SamFlags.isFlagSet(read.flag, SamFlags.READ_UNMAPPED): ret.ClearField("alignment") else: ret.alignment.CopyFrom(protocol.LinearAlignment()) ret.alignment.mapping_quality = read.mapping_quality ret.alignment.position.CopyFrom(protocol.Position()) ret.alignment.position.reference_name = samFile.getrname( read.reference_id) ret.alignment.position.position = read.reference_start ret.alignment.position.strand = protocol.POS_STRAND if SamFlags.isFlagSet(read.flag, SamFlags.READ_REVERSE_STRAND): ret.alignment.position.strand = protocol.NEG_STRAND for operation, length in read.cigar: gaCigarUnit = ret.alignment.cigar.add() gaCigarUnit.operation = SamCigar.int2ga(operation) gaCigarUnit.operation_length = length gaCigarUnit.reference_sequence = "" # TODO fix this! ret.duplicate_fragment = SamFlags.isFlagSet( read.flag, SamFlags.DUPLICATE_READ) ret.failed_vendor_quality_checks = SamFlags.isFlagSet( read.flag, SamFlags.FAILED_QUALITY_CHECK) ret.fragment_length = read.template_length ret.fragment_name = read.query_name for key, value in read.tags: ret.info[key].values.add().string_value = str(value) if SamFlags.isFlagSet(read.flag, SamFlags.MATE_UNMAPPED): ret.next_mate_position.Clear() else: ret.next_mate_position.Clear() if read.next_reference_id != -1: ret.next_mate_position.reference_name = samFile.getrname( read.next_reference_id) else: ret.next_mate_position.reference_name = "" ret.next_mate_position.position = read.next_reference_start ret.next_mate_position.strand = protocol.POS_STRAND if SamFlags.isFlagSet(read.flag, SamFlags.MATE_REVERSE_STRAND): ret.next_mate_position.strand = protocol.NEG_STRAND if SamFlags.isFlagSet(read.flag, SamFlags.READ_PAIRED): ret.number_reads = 2 else: ret.number_reads = 1 ret.read_number = -1 if SamFlags.isFlagSet(read.flag, SamFlags.FIRST_IN_PAIR): if SamFlags.isFlagSet(read.flag, SamFlags.SECOND_IN_PAIR): ret.read_number = 2 else: ret.read_number = 0 elif SamFlags.isFlagSet(read.flag, SamFlags.SECOND_IN_PAIR): ret.read_number = 1 ret.improper_placement = not SamFlags.isFlagSet( read.flag, SamFlags.READ_PROPER_PAIR) ret.read_group_id = readGroupId ret.secondary_alignment = SamFlags.isFlagSet( read.flag, SamFlags.SECONDARY_ALIGNMENT) ret.supplementary_alignment = SamFlags.isFlagSet( read.flag, SamFlags.SUPPLEMENTARY_ALIGNMENT) ret.id = readGroupSet.getReadAlignmentId(ret) return ret