예제 #1
0
    def _createReadAlignment(self, i, seed):
        # TODO fill out a bit more
        rng = random.Random(seed)
        alignment = protocol.ReadAlignment()
        alignment.fragment_length = rng.randint(10, 100)
        alignment.aligned_sequence = ""
        for i in range(alignment.fragment_length):
            # TODO: are these reasonable quality values?
            alignment.aligned_quality.append(rng.randint(1, 20))
            alignment.aligned_sequence += rng.choice("ACGT")

        alignment.alignment.position.position = 0
        alignment.alignment.position.reference_name = "NotImplemented"
        alignment.alignment.position.strand = protocol.POS_STRAND
        alignment.duplicate_fragment = False
        alignment.failed_vendor_quality_checks = False

        alignment.fragment_name = "{}$simulated{}".format(self.getLocalId(), i)
        alignment.number_reads = 0
        alignment.improper_placement = False
        alignment.read_group_id = self.getId()
        alignment.read_number = -1
        alignment.secondary_alignment = False
        alignment.supplementary_alignment = False
        alignment.id = self._parentContainer.getReadAlignmentId(alignment)
        return alignment
예제 #2
0
 def testSetAttribute(self):
     expected = 5
     read = protocol.ReadAlignment()
     values = read.attributes.attr['key'].values
     protocol.setAttribute(values, expected)
     tag, value = read.attributes.attr.items()[0]
     result = value.values[0].int32_value
     self.assertEqual(result, expected)
예제 #3
0
def search_reads(request):
    """
    Mock function
    """
    alignments = []
    for i in xrange(10):
        ga_alignment = protocol.ReadAlignment()
        ga_alignment.id = str(i)
        ga_alignment.alignment.position.position = 123 + i
        alignments.append(ga_alignment)
    return (alignments, "tokentoken")
예제 #4
0
 def testGetValueFromValue(self):
     with self.assertRaises(TypeError):
         protocol.getValueFromValue(5)
     val = common.AttributeValue()
     with self.assertRaises(AttributeError):
         protocol.getValueFromValue(val)
     read = protocol.ReadAlignment()
     expected = "1"
     read.attributes.attr['key'].values.add().string_value = expected
     tag, value = read.attributes.attr.items()[0]
     result = protocol.getValueFromValue(value.values[0])
     self.assertEquals(result, expected)
예제 #5
0
def _convert_alignment(ngs_alignment):
    """
    Accepts an NCBI/NGS alignment and returns a GA4GH ReadAlignment
    protobuf.
    """
    reference_name = ngs_alignment.getReferenceSpec()  # The reference sequence
    ga_alignment = protocol.ReadAlignment()  # GA4GH object to return
    ga_alignment.id = ngs_alignment.getAlignmentId(
    )  # Unique within read collection
    ga_alignment.read_group_id = ngs_alignment.getReadGroup()
    # Alignment extends Fragment. A "Fragment" is an individual read.
    ga_alignment.fragment_name = ngs_alignment.getFragmentId()
    # rtrn.improper_placement = ???
    # rtrn.duplicate_fragment = ???
    ga_alignment.number_reads = _get_num_reads(ngs_alignment)
    # rtrn.fragment_length = ???
    # rtrn.read_number = NOT AVAILABLE THROUGH NGS
    # rtrn.failed_vendor_quality_checks = NOT AVAILABLE THROUGH NGS
    ga_alignment.alignment.position.reference_name = reference_name
    ga_alignment.alignment.position.position = ngs_alignment.getAlignmentPosition(
    )  # Both zero-based
    ga_alignment.alignment.position.strand = _get_strand(ngs_alignment)
    ga_alignment.alignment.mapping_quality = ngs_alignment.getMappingQuality()
    _set_cigar(ga_alignment, ngs_alignment.getLongCigar(False))
    ga_alignment.secondary_alignment = ngs_alignment.getAlignmentCategory(
    ) == Alignment.secondaryAlignment
    # rtrn.supplementary_alignment = NOT AVAILABLE THROUGH NGS
    ga_alignment.aligned_sequence = ngs_alignment.getClippedFragmentBases()
    # rtrn.aligned_quality = ???
    if ga_alignment.number_reads > 1:
        mate_align = ngs_alignment.getMateAlignment()
        ga_alignment.next_mate_position.reference_name = mate_align.getReferenceSpec(
        )
        ga_alignment.next_mate_position.position = mate_align.getAlignmentPosition(
        )
        ga_alignment.next_mate_position.strand = _get_strand(mate_align)
    # rtrn.attributes = ???
    return (ga_alignment)
예제 #6
0
    def convertReadAlignment(self, read, readGroupSet, readGroupId):
        """
        Convert a pysam ReadAlignment to a GA4GH ReadAlignment
        """
        samFile = self.getFileHandle(self._dataUrl)
        # TODO fill out remaining fields
        # TODO refine in tandem with code in converters module
        ret = protocol.ReadAlignment()
        # ret.fragmentId = 'TODO'
        ret.aligned_quality.extend(read.query_qualities)
        ret.aligned_sequence = read.query_sequence
        if SamFlags.isFlagSet(read.flag, SamFlags.READ_UNMAPPED):
            ret.ClearField("alignment")
        else:
            ret.alignment.CopyFrom(protocol.LinearAlignment())
            ret.alignment.mapping_quality = read.mapping_quality
            ret.alignment.position.CopyFrom(protocol.Position())
            ret.alignment.position.reference_name = samFile.getrname(
                read.reference_id)
            ret.alignment.position.position = read.reference_start
            ret.alignment.position.strand = protocol.POS_STRAND
            if SamFlags.isFlagSet(read.flag, SamFlags.READ_REVERSE_STRAND):
                ret.alignment.position.strand = protocol.NEG_STRAND
            for operation, length in read.cigar:
                gaCigarUnit = ret.alignment.cigar.add()
                gaCigarUnit.operation = SamCigar.int2ga(operation)
                gaCigarUnit.operation_length = length
                gaCigarUnit.reference_sequence = ""  # TODO fix this!
        ret.duplicate_fragment = SamFlags.isFlagSet(read.flag,
                                                    SamFlags.DUPLICATE_READ)
        ret.failed_vendor_quality_checks = SamFlags.isFlagSet(
            read.flag, SamFlags.FAILED_QUALITY_CHECK)
        ret.fragment_length = read.template_length
        ret.fragment_name = read.query_name
        for key, value in read.tags:
            # Useful for inspecting the structure of read tags
            # print("{key} {ktype}: {value}, {vtype}".format(
            #     key=key, ktype=type(key), value=value, vtype=type(value)))
            protocol.setAttribute(ret.attributes.attr[key].values, value)

        if SamFlags.isFlagSet(read.flag, SamFlags.MATE_UNMAPPED):
            ret.next_mate_position.Clear()
        else:
            ret.next_mate_position.Clear()
            if read.next_reference_id != -1:
                ret.next_mate_position.reference_name = samFile.getrname(
                    read.next_reference_id)
            else:
                ret.next_mate_position.reference_name = ""
            ret.next_mate_position.position = read.next_reference_start
            ret.next_mate_position.strand = protocol.POS_STRAND
            if SamFlags.isFlagSet(read.flag, SamFlags.MATE_REVERSE_STRAND):
                ret.next_mate_position.strand = protocol.NEG_STRAND
        if SamFlags.isFlagSet(read.flag, SamFlags.READ_PAIRED):
            ret.number_reads = 2
        else:
            ret.number_reads = 1
        ret.read_number = -1
        if SamFlags.isFlagSet(read.flag, SamFlags.FIRST_IN_PAIR):
            if SamFlags.isFlagSet(read.flag, SamFlags.SECOND_IN_PAIR):
                ret.read_number = 2
            else:
                ret.read_number = 0
        elif SamFlags.isFlagSet(read.flag, SamFlags.SECOND_IN_PAIR):
            ret.read_number = 1
        ret.improper_placement = not SamFlags.isFlagSet(
            read.flag, SamFlags.READ_PROPER_PAIR)
        ret.read_group_id = readGroupId
        ret.secondary_alignment = SamFlags.isFlagSet(
            read.flag, SamFlags.SECONDARY_ALIGNMENT)
        ret.supplementary_alignment = SamFlags.isFlagSet(
            read.flag, SamFlags.SUPPLEMENTARY_ALIGNMENT)
        ret.id = readGroupSet.getReadAlignmentId(ret)
        return ret
예제 #7
0
 def assertAlignmentsEqual(self, gaAlignment, pysamAlignment,
                           readGroupInfo):
     if pysamAlignment.query_qualities is None:
         self.assertEqual(gaAlignment.aligned_quality, [])
     else:
         self.assertEqual(gaAlignment.aligned_quality,
                          list(pysamAlignment.query_qualities))
     self.assertEqual(gaAlignment.aligned_sequence,
                      pysamAlignment.query_sequence)
     if reads.SamFlags.isFlagSet(pysamAlignment.flag,
                                 reads.SamFlags.READ_UNMAPPED):
         self.assertEqual(0, gaAlignment.alignment.ByteSize())
     else:
         self.assertEqual(gaAlignment.alignment.mapping_quality,
                          pysamAlignment.mapping_quality)
         self.assertEqual(
             gaAlignment.alignment.position.reference_name,
             readGroupInfo.samFile.getrname(pysamAlignment.reference_id))
         self.assertEqual(gaAlignment.alignment.position.position,
                          pysamAlignment.reference_start)
         # TODO test reverseStrand on position and on
         # nextMatePosition once it has been implemented.
         self.assertCigarEqual(gaAlignment.alignment.cigar,
                               pysamAlignment.cigar)
     self.assertFlag(gaAlignment.duplicate_fragment, pysamAlignment,
                     reads.SamFlags.DUPLICATE_READ)
     self.assertFlag(gaAlignment.failed_vendor_quality_checks,
                     pysamAlignment, reads.SamFlags.FAILED_QUALITY_CHECK)
     self.assertEqual(gaAlignment.fragment_length,
                      pysamAlignment.template_length)
     self.assertEqual(gaAlignment.fragment_name, pysamAlignment.query_name)
     compoundId = datamodel.ReadAlignmentCompoundId(
         self._gaObject.getCompoundId(), pysamAlignment.query_name)
     self.assertEqual(gaAlignment.id, str(compoundId))
     ret = protocol.ReadAlignment()
     for key, value in pysamAlignment.tags:
         protocol.setAttribute(ret.attributes.attr[key].values, value)
     self.assertEqual(protocol.toJson(gaAlignment.attributes),
                      protocol.toJson(ret.attributes))
     if reads.SamFlags.isFlagSet(pysamAlignment.flag,
                                 reads.SamFlags.MATE_UNMAPPED):
         self.assertEqual(0, gaAlignment.next_mate_position.ByteSize())
     else:
         self.assertEqual(gaAlignment.next_mate_position.position,
                          pysamAlignment.next_reference_start)
         if pysamAlignment.next_reference_id != -1:
             self.assertEqual(
                 gaAlignment.next_mate_position.reference_name,
                 readGroupInfo.samFile.getrname(
                     pysamAlignment.next_reference_id))
         else:
             self.assertEqual(gaAlignment.next_mate_position.reference_name,
                              "")
     if gaAlignment.number_reads == 1:
         self.assertFlag(False, pysamAlignment, reads.SamFlags.READ_PAIRED)
     elif gaAlignment.number_reads == 2:
         self.assertFlag(True, pysamAlignment, reads.SamFlags.READ_PAIRED)
     else:
         # we shouldn't be setting numberReads to anything else
         self.assertTrue(False)
     if gaAlignment.read_number is -1:
         self.assertFlag(False, pysamAlignment,
                         reads.SamFlags.FIRST_IN_PAIR)
         self.assertFlag(False, pysamAlignment,
                         reads.SamFlags.SECOND_IN_PAIR)
     elif gaAlignment.read_number == 0:
         self.assertFlag(True, pysamAlignment, reads.SamFlags.FIRST_IN_PAIR)
         self.assertFlag(False, pysamAlignment,
                         reads.SamFlags.SECOND_IN_PAIR)
     elif gaAlignment.read_number == 1:
         self.assertFlag(False, pysamAlignment,
                         reads.SamFlags.FIRST_IN_PAIR)
         self.assertFlag(True, pysamAlignment,
                         reads.SamFlags.SECOND_IN_PAIR)
     elif gaAlignment.read_number == 2:
         self.assertFlag(True, pysamAlignment, reads.SamFlags.FIRST_IN_PAIR)
         self.assertFlag(True, pysamAlignment,
                         reads.SamFlags.SECOND_IN_PAIR)
     else:
         # we shouldn't be setting readNumber to anything else
         self.assertTrue(False)
     self.assertFlag(not gaAlignment.improper_placement, pysamAlignment,
                     reads.SamFlags.READ_PROPER_PAIR)
     self.assertEqual(gaAlignment.read_group_id, readGroupInfo.id)
     self.assertFlag(gaAlignment.secondary_alignment, pysamAlignment,
                     reads.SamFlags.SECONDARY_ALIGNMENT)
     self.assertFlag(gaAlignment.supplementary_alignment, pysamAlignment,
                     reads.SamFlags.SUPPLEMENTARY_ALIGNMENT)