Beispiel #1
0
 def testSetAttribute(self):
     expected = 5
     read = protocol.ReadAlignment()
     values = read.attributes.attr['key'].values
     protocol.setAttribute(values, expected)
     tag, value = read.attributes.attr.items()[0]
     result = value.values[0].int32_value
     self.assertEqual(result, expected)
Beispiel #2
0
 def testSetAttribute(self):
     expected = 5
     read = protocol.ReadAlignment()
     values = read.attributes.attr['key'].values
     protocol.setAttribute(values, expected)
     tag, value = read.attributes.attr.items()[0]
     result = value.values[0].int32_value
     self.assertEqual(result, expected)
Beispiel #3
0
 def serializeAttributes(self, msg):
     """
     Sets the attrbutes of a message during serialization.
     """
     attributes = self.getAttributes()
     for key in attributes:
         protocol.setAttribute(
             msg.attributes.attr[key].values, attributes[key])
     return msg
Beispiel #4
0
 def serializeAttributes(self, msg):
     """
     Sets the attrbutes of a message during serialization.
     """
     attributes = self.getAttributes()
     for key in attributes:
         protocol.setAttribute(
             msg.attributes.attr[key].values, attributes[key])
     return msg
Beispiel #5
0
    def serializeMetadataAttributes(self, msg, tier=0):
        """
        Sets the attrbutes of a message for metadata during serialization.
        """
        attributes = self.getAttributes()

        for attribute_name in attributes:
            if self.validateAttribute(attribute_name, attributes, tier) is True:
                values = []
                for dictionary in attributes[attribute_name]:
                    for key in dictionary:
                        values.append(dictionary[key])

                protocol.setAttribute(
                    msg.attributes.attr[attribute_name].values, values)

        return msg
Beispiel #6
0
    def convertReadAlignment(self, read, readGroupSet, readGroupId):
        """
        Convert a pysam ReadAlignment to a GA4GH ReadAlignment
        """
        samFile = self.getFileHandle(self._dataUrl)
        # TODO fill out remaining fields
        # TODO refine in tandem with code in converters module
        ret = protocol.ReadAlignment()
        # ret.fragmentId = 'TODO'
        ret.aligned_quality.extend(read.query_qualities)
        ret.aligned_sequence = read.query_sequence
        if SamFlags.isFlagSet(read.flag, SamFlags.READ_UNMAPPED):
            ret.ClearField("alignment")
        else:
            ret.alignment.CopyFrom(protocol.LinearAlignment())
            ret.alignment.mapping_quality = read.mapping_quality
            ret.alignment.position.CopyFrom(protocol.Position())
            ret.alignment.position.reference_name = samFile.getrname(
                read.reference_id)
            ret.alignment.position.position = read.reference_start
            ret.alignment.position.strand = protocol.POS_STRAND
            if SamFlags.isFlagSet(read.flag, SamFlags.READ_REVERSE_STRAND):
                ret.alignment.position.strand = protocol.NEG_STRAND
            for operation, length in read.cigar:
                gaCigarUnit = ret.alignment.cigar.add()
                gaCigarUnit.operation = SamCigar.int2ga(operation)
                gaCigarUnit.operation_length = length
                gaCigarUnit.reference_sequence = ""  # TODO fix this!
        ret.duplicate_fragment = SamFlags.isFlagSet(read.flag,
                                                    SamFlags.DUPLICATE_READ)
        ret.failed_vendor_quality_checks = SamFlags.isFlagSet(
            read.flag, SamFlags.FAILED_QUALITY_CHECK)
        ret.fragment_length = read.template_length
        ret.fragment_name = read.query_name
        for key, value in read.tags:
            # Useful for inspecting the structure of read tags
            # print("{key} {ktype}: {value}, {vtype}".format(
            #     key=key, ktype=type(key), value=value, vtype=type(value)))
            protocol.setAttribute(ret.attributes.attr[key].values, value)

        if SamFlags.isFlagSet(read.flag, SamFlags.MATE_UNMAPPED):
            ret.next_mate_position.Clear()
        else:
            ret.next_mate_position.Clear()
            if read.next_reference_id != -1:
                ret.next_mate_position.reference_name = samFile.getrname(
                    read.next_reference_id)
            else:
                ret.next_mate_position.reference_name = ""
            ret.next_mate_position.position = read.next_reference_start
            ret.next_mate_position.strand = protocol.POS_STRAND
            if SamFlags.isFlagSet(read.flag, SamFlags.MATE_REVERSE_STRAND):
                ret.next_mate_position.strand = protocol.NEG_STRAND
        if SamFlags.isFlagSet(read.flag, SamFlags.READ_PAIRED):
            ret.number_reads = 2
        else:
            ret.number_reads = 1
        ret.read_number = -1
        if SamFlags.isFlagSet(read.flag, SamFlags.FIRST_IN_PAIR):
            if SamFlags.isFlagSet(read.flag, SamFlags.SECOND_IN_PAIR):
                ret.read_number = 2
            else:
                ret.read_number = 0
        elif SamFlags.isFlagSet(read.flag, SamFlags.SECOND_IN_PAIR):
            ret.read_number = 1
        ret.improper_placement = not SamFlags.isFlagSet(
            read.flag, SamFlags.READ_PROPER_PAIR)
        ret.read_group_id = readGroupId
        ret.secondary_alignment = SamFlags.isFlagSet(
            read.flag, SamFlags.SECONDARY_ALIGNMENT)
        ret.supplementary_alignment = SamFlags.isFlagSet(
            read.flag, SamFlags.SUPPLEMENTARY_ALIGNMENT)
        ret.id = readGroupSet.getReadAlignmentId(ret)
        return ret
Beispiel #7
0
 def assertAlignmentsEqual(self, gaAlignment, pysamAlignment,
                           readGroupInfo):
     if pysamAlignment.query_qualities is None:
         self.assertEqual(gaAlignment.aligned_quality, [])
     else:
         self.assertEqual(gaAlignment.aligned_quality,
                          list(pysamAlignment.query_qualities))
     self.assertEqual(gaAlignment.aligned_sequence,
                      pysamAlignment.query_sequence)
     if reads.SamFlags.isFlagSet(pysamAlignment.flag,
                                 reads.SamFlags.READ_UNMAPPED):
         self.assertEqual(0, gaAlignment.alignment.ByteSize())
     else:
         self.assertEqual(gaAlignment.alignment.mapping_quality,
                          pysamAlignment.mapping_quality)
         self.assertEqual(
             gaAlignment.alignment.position.reference_name,
             readGroupInfo.samFile.getrname(pysamAlignment.reference_id))
         self.assertEqual(gaAlignment.alignment.position.position,
                          pysamAlignment.reference_start)
         # TODO test reverseStrand on position and on
         # nextMatePosition once it has been implemented.
         self.assertCigarEqual(gaAlignment.alignment.cigar,
                               pysamAlignment.cigar)
     self.assertFlag(gaAlignment.duplicate_fragment, pysamAlignment,
                     reads.SamFlags.DUPLICATE_READ)
     self.assertFlag(gaAlignment.failed_vendor_quality_checks,
                     pysamAlignment, reads.SamFlags.FAILED_QUALITY_CHECK)
     self.assertEqual(gaAlignment.fragment_length,
                      pysamAlignment.template_length)
     self.assertEqual(gaAlignment.fragment_name, pysamAlignment.query_name)
     compoundId = datamodel.ReadAlignmentCompoundId(
         self._gaObject.getCompoundId(), pysamAlignment.query_name)
     self.assertEqual(gaAlignment.id, str(compoundId))
     ret = protocol.ReadAlignment()
     for key, value in pysamAlignment.tags:
         protocol.setAttribute(ret.attributes.attr[key].values, value)
     self.assertEqual(protocol.toJson(gaAlignment.attributes),
                      protocol.toJson(ret.attributes))
     if reads.SamFlags.isFlagSet(pysamAlignment.flag,
                                 reads.SamFlags.MATE_UNMAPPED):
         self.assertEqual(0, gaAlignment.next_mate_position.ByteSize())
     else:
         self.assertEqual(gaAlignment.next_mate_position.position,
                          pysamAlignment.next_reference_start)
         if pysamAlignment.next_reference_id != -1:
             self.assertEqual(
                 gaAlignment.next_mate_position.reference_name,
                 readGroupInfo.samFile.getrname(
                     pysamAlignment.next_reference_id))
         else:
             self.assertEqual(gaAlignment.next_mate_position.reference_name,
                              "")
     if gaAlignment.number_reads == 1:
         self.assertFlag(False, pysamAlignment, reads.SamFlags.READ_PAIRED)
     elif gaAlignment.number_reads == 2:
         self.assertFlag(True, pysamAlignment, reads.SamFlags.READ_PAIRED)
     else:
         # we shouldn't be setting numberReads to anything else
         self.assertTrue(False)
     if gaAlignment.read_number is -1:
         self.assertFlag(False, pysamAlignment,
                         reads.SamFlags.FIRST_IN_PAIR)
         self.assertFlag(False, pysamAlignment,
                         reads.SamFlags.SECOND_IN_PAIR)
     elif gaAlignment.read_number == 0:
         self.assertFlag(True, pysamAlignment, reads.SamFlags.FIRST_IN_PAIR)
         self.assertFlag(False, pysamAlignment,
                         reads.SamFlags.SECOND_IN_PAIR)
     elif gaAlignment.read_number == 1:
         self.assertFlag(False, pysamAlignment,
                         reads.SamFlags.FIRST_IN_PAIR)
         self.assertFlag(True, pysamAlignment,
                         reads.SamFlags.SECOND_IN_PAIR)
     elif gaAlignment.read_number == 2:
         self.assertFlag(True, pysamAlignment, reads.SamFlags.FIRST_IN_PAIR)
         self.assertFlag(True, pysamAlignment,
                         reads.SamFlags.SECOND_IN_PAIR)
     else:
         # we shouldn't be setting readNumber to anything else
         self.assertTrue(False)
     self.assertFlag(not gaAlignment.improper_placement, pysamAlignment,
                     reads.SamFlags.READ_PROPER_PAIR)
     self.assertEqual(gaAlignment.read_group_id, readGroupInfo.id)
     self.assertFlag(gaAlignment.secondary_alignment, pysamAlignment,
                     reads.SamFlags.SECONDARY_ALIGNMENT)
     self.assertFlag(gaAlignment.supplementary_alignment, pysamAlignment,
                     reads.SamFlags.SUPPLEMENTARY_ALIGNMENT)
Beispiel #8
0
    def convertReadAlignment(self, read, readGroupSet, readGroupId):
        """
        Convert a pysam ReadAlignment to a GA4GH ReadAlignment
        """
        samFile = self.getFileHandle(self._dataUrl)
        # TODO fill out remaining fields
        # TODO refine in tandem with code in converters module
        ret = protocol.ReadAlignment()
        # ret.fragmentId = 'TODO'
        ret.aligned_quality.extend(read.query_qualities)
        ret.aligned_sequence = read.query_sequence
        if SamFlags.isFlagSet(read.flag, SamFlags.READ_UNMAPPED):
            ret.ClearField("alignment")
        else:
            ret.alignment.CopyFrom(protocol.LinearAlignment())
            ret.alignment.mapping_quality = read.mapping_quality
            ret.alignment.position.CopyFrom(protocol.Position())
            ret.alignment.position.reference_name = samFile.getrname(
                read.reference_id)
            ret.alignment.position.position = read.reference_start
            ret.alignment.position.strand = protocol.POS_STRAND
            if SamFlags.isFlagSet(read.flag, SamFlags.READ_REVERSE_STRAND):
                ret.alignment.position.strand = protocol.NEG_STRAND
            for operation, length in read.cigar:
                gaCigarUnit = ret.alignment.cigar.add()
                gaCigarUnit.operation = SamCigar.int2ga(operation)
                gaCigarUnit.operation_length = length
                gaCigarUnit.reference_sequence = ""  # TODO fix this!
        ret.duplicate_fragment = SamFlags.isFlagSet(
            read.flag, SamFlags.DUPLICATE_READ)
        ret.failed_vendor_quality_checks = SamFlags.isFlagSet(
            read.flag, SamFlags.FAILED_QUALITY_CHECK)
        ret.fragment_length = read.template_length
        ret.fragment_name = read.query_name
        for key, value in read.tags:
            # Useful for inspecting the structure of read tags
            # print("{key} {ktype}: {value}, {vtype}".format(
            #     key=key, ktype=type(key), value=value, vtype=type(value)))
            protocol.setAttribute(ret.attributes.attr[key].values, value)

        if SamFlags.isFlagSet(read.flag, SamFlags.MATE_UNMAPPED):
            ret.next_mate_position.Clear()
        else:
            ret.next_mate_position.Clear()
            if read.next_reference_id != -1:
                ret.next_mate_position.reference_name = samFile.getrname(
                    read.next_reference_id)
            else:
                ret.next_mate_position.reference_name = ""
            ret.next_mate_position.position = read.next_reference_start
            ret.next_mate_position.strand = protocol.POS_STRAND
            if SamFlags.isFlagSet(read.flag, SamFlags.MATE_REVERSE_STRAND):
                ret.next_mate_position.strand = protocol.NEG_STRAND
        if SamFlags.isFlagSet(read.flag, SamFlags.READ_PAIRED):
            ret.number_reads = 2
        else:
            ret.number_reads = 1
        ret.read_number = -1
        if SamFlags.isFlagSet(read.flag, SamFlags.FIRST_IN_PAIR):
            if SamFlags.isFlagSet(read.flag, SamFlags.SECOND_IN_PAIR):
                ret.read_number = 2
            else:
                ret.read_number = 0
        elif SamFlags.isFlagSet(read.flag, SamFlags.SECOND_IN_PAIR):
            ret.read_number = 1
        ret.improper_placement = not SamFlags.isFlagSet(
            read.flag, SamFlags.READ_PROPER_PAIR)
        ret.read_group_id = readGroupId
        ret.secondary_alignment = SamFlags.isFlagSet(
            read.flag, SamFlags.SECONDARY_ALIGNMENT)
        ret.supplementary_alignment = SamFlags.isFlagSet(
            read.flag, SamFlags.SUPPLEMENTARY_ALIGNMENT)
        ret.id = readGroupSet.getReadAlignmentId(ret)
        return ret
Beispiel #9
0
 def assertAlignmentsEqual(self, gaAlignment, pysamAlignment,
                           readGroupInfo):
     if pysamAlignment.query_qualities is None:
         self.assertEqual(gaAlignment.aligned_quality, [])
     else:
         self.assertEqual(
             gaAlignment.aligned_quality,
             list(pysamAlignment.query_qualities))
     self.assertEqual(
         gaAlignment.aligned_sequence,
         pysamAlignment.query_sequence)
     if reads.SamFlags.isFlagSet(
             pysamAlignment.flag, reads.SamFlags.READ_UNMAPPED):
         self.assertEqual(0, gaAlignment.alignment.ByteSize())
     else:
         self.assertEqual(
             gaAlignment.alignment.mapping_quality,
             pysamAlignment.mapping_quality)
         self.assertEqual(
             gaAlignment.alignment.position.reference_name,
             readGroupInfo.samFile.getrname(pysamAlignment.reference_id))
         self.assertEqual(
             gaAlignment.alignment.position.position,
             pysamAlignment.reference_start)
         # TODO test reverseStrand on position and on
         # nextMatePosition once it has been implemented.
         self.assertCigarEqual(
             gaAlignment.alignment.cigar,
             pysamAlignment.cigar)
     self.assertFlag(
         gaAlignment.duplicate_fragment,
         pysamAlignment, reads.SamFlags.DUPLICATE_READ)
     self.assertFlag(
         gaAlignment.failed_vendor_quality_checks,
         pysamAlignment, reads.SamFlags.FAILED_QUALITY_CHECK)
     self.assertEqual(
         gaAlignment.fragment_length,
         pysamAlignment.template_length)
     self.assertEqual(
         gaAlignment.fragment_name,
         pysamAlignment.query_name)
     compoundId = datamodel.ReadAlignmentCompoundId(
         self._gaObject.getCompoundId(),
         pysamAlignment.query_name)
     self.assertEqual(gaAlignment.id, str(compoundId))
     ret = protocol.ReadAlignment()
     for key, value in pysamAlignment.tags:
         protocol.setAttribute(ret.attributes.attr[key].values, value)
     self.assertEqual(
         protocol.toJson(gaAlignment.attributes),
         protocol.toJson(ret.attributes))
     if reads.SamFlags.isFlagSet(
             pysamAlignment.flag, reads.SamFlags.MATE_UNMAPPED):
         self.assertEqual(0, gaAlignment.next_mate_position.ByteSize())
     else:
         self.assertEqual(
             gaAlignment.next_mate_position.position,
             pysamAlignment.next_reference_start)
         if pysamAlignment.next_reference_id != -1:
             self.assertEqual(
                 gaAlignment.next_mate_position.reference_name,
                 readGroupInfo.samFile.getrname(
                     pysamAlignment.next_reference_id))
         else:
             self.assertEqual(
                 gaAlignment.next_mate_position.reference_name, "")
     if gaAlignment.number_reads == 1:
         self.assertFlag(
             False, pysamAlignment, reads.SamFlags.READ_PAIRED)
     elif gaAlignment.number_reads == 2:
         self.assertFlag(
             True, pysamAlignment, reads.SamFlags.READ_PAIRED)
     else:
         # we shouldn't be setting numberReads to anything else
         self.assertTrue(False)
     if gaAlignment.read_number is -1:
         self.assertFlag(
             False, pysamAlignment, reads.SamFlags.FIRST_IN_PAIR)
         self.assertFlag(
             False, pysamAlignment, reads.SamFlags.SECOND_IN_PAIR)
     elif gaAlignment.read_number == 0:
         self.assertFlag(
             True, pysamAlignment, reads.SamFlags.FIRST_IN_PAIR)
         self.assertFlag(
             False, pysamAlignment, reads.SamFlags.SECOND_IN_PAIR)
     elif gaAlignment.read_number == 1:
         self.assertFlag(
             False, pysamAlignment, reads.SamFlags.FIRST_IN_PAIR)
         self.assertFlag(
             True, pysamAlignment, reads.SamFlags.SECOND_IN_PAIR)
     elif gaAlignment.read_number == 2:
         self.assertFlag(
             True, pysamAlignment, reads.SamFlags.FIRST_IN_PAIR)
         self.assertFlag(
             True, pysamAlignment, reads.SamFlags.SECOND_IN_PAIR)
     else:
         # we shouldn't be setting readNumber to anything else
         self.assertTrue(False)
     self.assertFlag(
         not gaAlignment.improper_placement,
         pysamAlignment, reads.SamFlags.READ_PROPER_PAIR)
     self.assertEqual(
         gaAlignment.read_group_id,
         readGroupInfo.id)
     self.assertFlag(
         gaAlignment.secondary_alignment,
         pysamAlignment, reads.SamFlags.SECONDARY_ALIGNMENT)
     self.assertFlag(
         gaAlignment.supplementary_alignment,
         pysamAlignment, reads.SamFlags.SUPPLEMENTARY_ALIGNMENT)