def _createReadAlignment(self, i, seed): # TODO fill out a bit more rng = random.Random(seed) alignment = protocol.ReadAlignment() alignment.fragment_length = rng.randint(10, 100) alignment.aligned_sequence = "" for i in range(alignment.fragment_length): # TODO: are these reasonable quality values? alignment.aligned_quality.append(rng.randint(1, 20)) alignment.aligned_sequence += rng.choice("ACGT") alignment.alignment.position.position = 0 alignment.alignment.position.reference_name = "NotImplemented" alignment.alignment.position.strand = protocol.POS_STRAND alignment.duplicate_fragment = False alignment.failed_vendor_quality_checks = False alignment.fragment_name = "{}$simulated{}".format(self.getLocalId(), i) alignment.number_reads = 0 alignment.improper_placement = False alignment.read_group_id = self.getId() alignment.read_number = -1 alignment.secondary_alignment = False alignment.supplementary_alignment = False alignment.id = self._parentContainer.getReadAlignmentId(alignment) return alignment
def testSetAttribute(self): expected = 5 read = protocol.ReadAlignment() values = read.attributes.attr['key'].values protocol.setAttribute(values, expected) tag, value = read.attributes.attr.items()[0] result = value.values[0].int32_value self.assertEqual(result, expected)
def search_reads(request): """ Mock function """ alignments = [] for i in xrange(10): ga_alignment = protocol.ReadAlignment() ga_alignment.id = str(i) ga_alignment.alignment.position.position = 123 + i alignments.append(ga_alignment) return (alignments, "tokentoken")
def testGetValueFromValue(self): with self.assertRaises(TypeError): protocol.getValueFromValue(5) val = common.AttributeValue() with self.assertRaises(AttributeError): protocol.getValueFromValue(val) read = protocol.ReadAlignment() expected = "1" read.attributes.attr['key'].values.add().string_value = expected tag, value = read.attributes.attr.items()[0] result = protocol.getValueFromValue(value.values[0]) self.assertEquals(result, expected)
def _convert_alignment(ngs_alignment): """ Accepts an NCBI/NGS alignment and returns a GA4GH ReadAlignment protobuf. """ reference_name = ngs_alignment.getReferenceSpec() # The reference sequence ga_alignment = protocol.ReadAlignment() # GA4GH object to return ga_alignment.id = ngs_alignment.getAlignmentId( ) # Unique within read collection ga_alignment.read_group_id = ngs_alignment.getReadGroup() # Alignment extends Fragment. A "Fragment" is an individual read. ga_alignment.fragment_name = ngs_alignment.getFragmentId() # rtrn.improper_placement = ??? # rtrn.duplicate_fragment = ??? ga_alignment.number_reads = _get_num_reads(ngs_alignment) # rtrn.fragment_length = ??? # rtrn.read_number = NOT AVAILABLE THROUGH NGS # rtrn.failed_vendor_quality_checks = NOT AVAILABLE THROUGH NGS ga_alignment.alignment.position.reference_name = reference_name ga_alignment.alignment.position.position = ngs_alignment.getAlignmentPosition( ) # Both zero-based ga_alignment.alignment.position.strand = _get_strand(ngs_alignment) ga_alignment.alignment.mapping_quality = ngs_alignment.getMappingQuality() _set_cigar(ga_alignment, ngs_alignment.getLongCigar(False)) ga_alignment.secondary_alignment = ngs_alignment.getAlignmentCategory( ) == Alignment.secondaryAlignment # rtrn.supplementary_alignment = NOT AVAILABLE THROUGH NGS ga_alignment.aligned_sequence = ngs_alignment.getClippedFragmentBases() # rtrn.aligned_quality = ??? if ga_alignment.number_reads > 1: mate_align = ngs_alignment.getMateAlignment() ga_alignment.next_mate_position.reference_name = mate_align.getReferenceSpec( ) ga_alignment.next_mate_position.position = mate_align.getAlignmentPosition( ) ga_alignment.next_mate_position.strand = _get_strand(mate_align) # rtrn.attributes = ??? return (ga_alignment)
def convertReadAlignment(self, read, readGroupSet, readGroupId): """ Convert a pysam ReadAlignment to a GA4GH ReadAlignment """ samFile = self.getFileHandle(self._dataUrl) # TODO fill out remaining fields # TODO refine in tandem with code in converters module ret = protocol.ReadAlignment() # ret.fragmentId = 'TODO' ret.aligned_quality.extend(read.query_qualities) ret.aligned_sequence = read.query_sequence if SamFlags.isFlagSet(read.flag, SamFlags.READ_UNMAPPED): ret.ClearField("alignment") else: ret.alignment.CopyFrom(protocol.LinearAlignment()) ret.alignment.mapping_quality = read.mapping_quality ret.alignment.position.CopyFrom(protocol.Position()) ret.alignment.position.reference_name = samFile.getrname( read.reference_id) ret.alignment.position.position = read.reference_start ret.alignment.position.strand = protocol.POS_STRAND if SamFlags.isFlagSet(read.flag, SamFlags.READ_REVERSE_STRAND): ret.alignment.position.strand = protocol.NEG_STRAND for operation, length in read.cigar: gaCigarUnit = ret.alignment.cigar.add() gaCigarUnit.operation = SamCigar.int2ga(operation) gaCigarUnit.operation_length = length gaCigarUnit.reference_sequence = "" # TODO fix this! ret.duplicate_fragment = SamFlags.isFlagSet(read.flag, SamFlags.DUPLICATE_READ) ret.failed_vendor_quality_checks = SamFlags.isFlagSet( read.flag, SamFlags.FAILED_QUALITY_CHECK) ret.fragment_length = read.template_length ret.fragment_name = read.query_name for key, value in read.tags: # Useful for inspecting the structure of read tags # print("{key} {ktype}: {value}, {vtype}".format( # key=key, ktype=type(key), value=value, vtype=type(value))) protocol.setAttribute(ret.attributes.attr[key].values, value) if SamFlags.isFlagSet(read.flag, SamFlags.MATE_UNMAPPED): ret.next_mate_position.Clear() else: ret.next_mate_position.Clear() if read.next_reference_id != -1: ret.next_mate_position.reference_name = samFile.getrname( read.next_reference_id) else: ret.next_mate_position.reference_name = "" ret.next_mate_position.position = read.next_reference_start ret.next_mate_position.strand = protocol.POS_STRAND if SamFlags.isFlagSet(read.flag, SamFlags.MATE_REVERSE_STRAND): ret.next_mate_position.strand = protocol.NEG_STRAND if SamFlags.isFlagSet(read.flag, SamFlags.READ_PAIRED): ret.number_reads = 2 else: ret.number_reads = 1 ret.read_number = -1 if SamFlags.isFlagSet(read.flag, SamFlags.FIRST_IN_PAIR): if SamFlags.isFlagSet(read.flag, SamFlags.SECOND_IN_PAIR): ret.read_number = 2 else: ret.read_number = 0 elif SamFlags.isFlagSet(read.flag, SamFlags.SECOND_IN_PAIR): ret.read_number = 1 ret.improper_placement = not SamFlags.isFlagSet( read.flag, SamFlags.READ_PROPER_PAIR) ret.read_group_id = readGroupId ret.secondary_alignment = SamFlags.isFlagSet( read.flag, SamFlags.SECONDARY_ALIGNMENT) ret.supplementary_alignment = SamFlags.isFlagSet( read.flag, SamFlags.SUPPLEMENTARY_ALIGNMENT) ret.id = readGroupSet.getReadAlignmentId(ret) return ret
def assertAlignmentsEqual(self, gaAlignment, pysamAlignment, readGroupInfo): if pysamAlignment.query_qualities is None: self.assertEqual(gaAlignment.aligned_quality, []) else: self.assertEqual(gaAlignment.aligned_quality, list(pysamAlignment.query_qualities)) self.assertEqual(gaAlignment.aligned_sequence, pysamAlignment.query_sequence) if reads.SamFlags.isFlagSet(pysamAlignment.flag, reads.SamFlags.READ_UNMAPPED): self.assertEqual(0, gaAlignment.alignment.ByteSize()) else: self.assertEqual(gaAlignment.alignment.mapping_quality, pysamAlignment.mapping_quality) self.assertEqual( gaAlignment.alignment.position.reference_name, readGroupInfo.samFile.getrname(pysamAlignment.reference_id)) self.assertEqual(gaAlignment.alignment.position.position, pysamAlignment.reference_start) # TODO test reverseStrand on position and on # nextMatePosition once it has been implemented. self.assertCigarEqual(gaAlignment.alignment.cigar, pysamAlignment.cigar) self.assertFlag(gaAlignment.duplicate_fragment, pysamAlignment, reads.SamFlags.DUPLICATE_READ) self.assertFlag(gaAlignment.failed_vendor_quality_checks, pysamAlignment, reads.SamFlags.FAILED_QUALITY_CHECK) self.assertEqual(gaAlignment.fragment_length, pysamAlignment.template_length) self.assertEqual(gaAlignment.fragment_name, pysamAlignment.query_name) compoundId = datamodel.ReadAlignmentCompoundId( self._gaObject.getCompoundId(), pysamAlignment.query_name) self.assertEqual(gaAlignment.id, str(compoundId)) ret = protocol.ReadAlignment() for key, value in pysamAlignment.tags: protocol.setAttribute(ret.attributes.attr[key].values, value) self.assertEqual(protocol.toJson(gaAlignment.attributes), protocol.toJson(ret.attributes)) if reads.SamFlags.isFlagSet(pysamAlignment.flag, reads.SamFlags.MATE_UNMAPPED): self.assertEqual(0, gaAlignment.next_mate_position.ByteSize()) else: self.assertEqual(gaAlignment.next_mate_position.position, pysamAlignment.next_reference_start) if pysamAlignment.next_reference_id != -1: self.assertEqual( gaAlignment.next_mate_position.reference_name, readGroupInfo.samFile.getrname( pysamAlignment.next_reference_id)) else: self.assertEqual(gaAlignment.next_mate_position.reference_name, "") if gaAlignment.number_reads == 1: self.assertFlag(False, pysamAlignment, reads.SamFlags.READ_PAIRED) elif gaAlignment.number_reads == 2: self.assertFlag(True, pysamAlignment, reads.SamFlags.READ_PAIRED) else: # we shouldn't be setting numberReads to anything else self.assertTrue(False) if gaAlignment.read_number is -1: self.assertFlag(False, pysamAlignment, reads.SamFlags.FIRST_IN_PAIR) self.assertFlag(False, pysamAlignment, reads.SamFlags.SECOND_IN_PAIR) elif gaAlignment.read_number == 0: self.assertFlag(True, pysamAlignment, reads.SamFlags.FIRST_IN_PAIR) self.assertFlag(False, pysamAlignment, reads.SamFlags.SECOND_IN_PAIR) elif gaAlignment.read_number == 1: self.assertFlag(False, pysamAlignment, reads.SamFlags.FIRST_IN_PAIR) self.assertFlag(True, pysamAlignment, reads.SamFlags.SECOND_IN_PAIR) elif gaAlignment.read_number == 2: self.assertFlag(True, pysamAlignment, reads.SamFlags.FIRST_IN_PAIR) self.assertFlag(True, pysamAlignment, reads.SamFlags.SECOND_IN_PAIR) else: # we shouldn't be setting readNumber to anything else self.assertTrue(False) self.assertFlag(not gaAlignment.improper_placement, pysamAlignment, reads.SamFlags.READ_PROPER_PAIR) self.assertEqual(gaAlignment.read_group_id, readGroupInfo.id) self.assertFlag(gaAlignment.secondary_alignment, pysamAlignment, reads.SamFlags.SECONDARY_ALIGNMENT) self.assertFlag(gaAlignment.supplementary_alignment, pysamAlignment, reads.SamFlags.SUPPLEMENTARY_ALIGNMENT)