def _createReadAlignment(self, i, seed): # TODO fill out a bit more rng = random.Random(seed) alignment = protocol.ReadAlignment() alignment.fragmentLength = rng.randint(10, 100) alignment.alignedQuality = [] alignment.alignedSequence = "" for i in range(alignment.fragmentLength): # TODO: are these reasonable quality values? alignment.alignedQuality.append(rng.randint(1, 20)) alignment.alignedSequence += rng.choice("ACGT") alignment.fragmentId = "frag{}".format(seed) gaPosition = protocol.Position() gaPosition.position = 0 gaPosition.referenceName = "NotImplemented" gaPosition.strand = protocol.Strand.POS_STRAND gaLinearAlignment = protocol.LinearAlignment() gaLinearAlignment.position = gaPosition alignment.alignment = gaLinearAlignment alignment.duplicateFragment = False alignment.failedVendorQualityChecks = False alignment.fragmentName = "simulated{}".format(i) alignment.info = {} alignment.nextMatePosition = None alignment.numberReads = None alignment.properPlacement = False alignment.readGroupId = self.getId() alignment.readNumber = None alignment.secondaryAlignment = False alignment.supplementaryAlignment = False alignment.id = self.getReadAlignmentId(alignment) return alignment
def generateReadAlignment(position=0, sequence='abc'): alignment = protocol.ReadAlignment() alignment.alignment = protocol.LinearAlignment() alignment.alignment.position = protocol.Position() alignment.alignment.position.position = position alignment.alignedSequence = sequence return alignment
def _createReadAlignment(self, i): # TODO fill out a bit more alignment = protocol.ReadAlignment() alignment.alignedQuality = [1, 2, 3] alignment.alignedSequence = "ACT" alignment.fragmentId = 'TODO' gaPosition = protocol.Position() gaPosition.position = 0 gaPosition.referenceName = "NotImplemented" gaPosition.strand = protocol.Strand.POS_STRAND gaLinearAlignment = protocol.LinearAlignment() gaLinearAlignment.position = gaPosition alignment.alignment = gaLinearAlignment alignment.duplicateFragment = False alignment.failedVendorQualityChecks = False alignment.fragmentLength = 3 alignment.fragmentName = "simulated{}".format(i) alignment.info = {} alignment.nextMatePosition = None alignment.numberReads = None alignment.properPlacement = False alignment.readGroupId = self.getId() alignment.readNumber = None alignment.secondaryAlignment = False alignment.supplementaryAlignment = False alignment.id = self.getReadAlignmentId(alignment) return alignment
def _createReadAlignment(self, i, seed): # TODO fill out a bit more rng = random.Random(seed) alignment = protocol.ReadAlignment() alignment.fragment_length = rng.randint(10, 100) alignment.aligned_sequence = "" for i in range(alignment.fragment_length): # TODO: are these reasonable quality values? alignment.aligned_quality.append(rng.randint(1, 20)) alignment.aligned_sequence += rng.choice("ACGT") alignment.alignment.position.position = 0 alignment.alignment.position.reference_name = "NotImplemented" alignment.alignment.position.strand = protocol.POS_STRAND alignment.duplicate_fragment = False alignment.failed_vendor_quality_checks = False alignment.fragment_name = "{}$simulated{}".format( self.getLocalId(), i) alignment.number_reads = 0 alignment.improper_placement = False alignment.read_group_id = self.getId() alignment.read_number = -1 alignment.secondary_alignment = False alignment.supplementary_alignment = False alignment.id = self._parentContainer.getReadAlignmentId(alignment) return alignment
def testInstantiation(self): for class_ in self._getExceptionClasses(): # some exceptions are becoming too complicated to instantiate # like the rest of the exceptions; just do them manually if class_ == exceptions.RequestValidationFailureException: wrongString = "thisIsWrong" objClass = protocol.SearchReadsRequest obj = objClass() obj.start = wrongString jsonDict = obj.toJsonDict() args = (jsonDict, objClass) elif class_ == exceptions.ResponseValidationFailureException: objClass = protocol.SearchReadsResponse obj = objClass() obj.alignments = [protocol.ReadAlignment()] obj.alignments[0].alignment = protocol.LinearAlignment() obj.alignments[0].alignment.mappingQuality = wrongString jsonDict = obj.toJsonDict() args = (jsonDict, objClass) else: numInitArgs = len(inspect.getargspec(class_.__init__).args) - 1 args = ['arg' for _ in range(numInitArgs)] instance = class_(*args) self.assertIsInstance(instance, exceptions.BaseServerException) message = instance.getMessage() self.assertIsInstance(message, basestring) self.assertGreater(len(message), 0) self.assertEqual(instance.getErrorCode(), class_.getErrorCode())
def convertReadAlignment(self, read): """ Convert a pysam ReadAlignment to a GA4GH ReadAlignment """ # TODO fill out remaining fields # TODO refine in tandem with code in converters module ret = protocol.ReadAlignment() ret.alignedQuality = list(read.query_qualities) ret.alignedSequence = read.query_sequence ret.alignment = protocol.LinearAlignment() ret.alignment.mappingQuality = read.mapping_quality ret.alignment.position = protocol.Position() ret.alignment.position.referenceName = self._samFile.getrname( read.reference_id) ret.alignment.position.position = read.reference_start ret.alignment.position.strand = \ protocol.Strand.POS_STRAND # TODO fix this! ret.alignment.cigar = [] for operation, length in read.cigar: gaCigarUnit = protocol.CigarUnit() gaCigarUnit.operation = SamCigar.int2ga(operation) gaCigarUnit.operationLength = length gaCigarUnit.referenceSequence = None # TODO fix this! ret.alignment.cigar.append(gaCigarUnit) ret.duplicateFragment = SamFlags.isFlagSet(read.flag, SamFlags.DUPLICATE_FRAGMENT) ret.failedVendorQualityChecks = SamFlags.isFlagSet( read.flag, SamFlags.FAILED_VENDOR_QUALITY_CHECKS) ret.fragmentLength = read.template_length ret.fragmentName = read.query_name ret.id = "{}:{}".format(self._id, read.query_name) ret.info = {key: [str(value)] for key, value in read.tags} ret.nextMatePosition = None if read.next_reference_id != -1: ret.nextMatePosition = protocol.Position() ret.nextMatePosition.referenceName = self._samFile.getrname( read.next_reference_id) ret.nextMatePosition.position = read.next_reference_start ret.nextMatePosition.strand = \ protocol.Strand.POS_STRAND # TODO fix this! # TODO Is this the correct mapping between numberReads and # sam flag 0x1? What about the mapping between numberReads # and 0x40 and 0x80? ret.numberReads = None ret.readNumber = None if SamFlags.isFlagSet(read.flag, SamFlags.NUMBER_READS): ret.numberReads = 2 if SamFlags.isFlagSet(read.flag, SamFlags.READ_NUMBER_ONE): ret.readNumber = 0 elif SamFlags.isFlagSet(read.flag, SamFlags.READ_NUMBER_TWO): ret.readNumber = 1 ret.properPlacement = SamFlags.isFlagSet(read.flag, SamFlags.PROPER_PLACEMENT) ret.readGroupId = self._id ret.secondaryAlignment = SamFlags.isFlagSet( read.flag, SamFlags.SECONDARY_ALIGNMENT) ret.supplementaryAlignment = SamFlags.isFlagSet( read.flag, SamFlags.SUPPLEMENTARY_ALIGNMENT) return ret
def testValidationFailureExceptionMessages(self): # RequestValidationFailureException wrongString = "thisIsWrong" objClass = protocol.SearchReadsRequest obj = objClass() obj.start = wrongString jsonDict = obj.toJsonDict() instance = exceptions.RequestValidationFailureException( jsonDict, objClass) self.assertIn("invalid fields:", instance.message) self.assertIn("u'start': u'thisIsWrong'", instance.message) self.assertEqual(instance.message.count(wrongString), 2) # ResponseValidationFailureException objClass = protocol.SearchReadsResponse obj = objClass() obj.alignments.extend([protocol.ReadAlignment()]) obj.alignments[0].alignment.mapping_quality = wrongString jsonDict = obj.toJsonDict() instance = exceptions.ResponseValidationFailureException( jsonDict, objClass) self.assertIn("Invalid fields", instance.message) self.assertEqual(instance.message.count(wrongString), 2)
def convertReadAlignment(self, read): """ Convert a pysam ReadAlignment to a GA4GH ReadAlignment """ samFile = self._parentContainer.getFileHandle(self._parentSamFilePath) # TODO fill out remaining fields # TODO refine in tandem with code in converters module ret = protocol.ReadAlignment() ret.fragmentId = 'TODO' if read.query_qualities is None: ret.alignedQuality = [] else: ret.alignedQuality = list(read.query_qualities) ret.alignedSequence = read.query_sequence if SamFlags.isFlagSet(read.flag, SamFlags.READ_UNMAPPED): ret.alignment = None else: ret.alignment = protocol.LinearAlignment() ret.alignment.mappingQuality = read.mapping_quality ret.alignment.position = protocol.Position() ret.alignment.position.referenceName = samFile.getrname( read.reference_id) ret.alignment.position.position = read.reference_start ret.alignment.position.strand = protocol.Strand.POS_STRAND if SamFlags.isFlagSet(read.flag, SamFlags.READ_REVERSE_STRAND): ret.alignment.position.strand = protocol.Strand.NEG_STRAND ret.alignment.cigar = [] for operation, length in read.cigar: gaCigarUnit = protocol.CigarUnit() gaCigarUnit.operation = SamCigar.int2ga(operation) gaCigarUnit.operationLength = length gaCigarUnit.referenceSequence = None # TODO fix this! ret.alignment.cigar.append(gaCigarUnit) ret.duplicateFragment = SamFlags.isFlagSet(read.flag, SamFlags.DUPLICATE_READ) ret.failedVendorQualityChecks = SamFlags.isFlagSet( read.flag, SamFlags.FAILED_QUALITY_CHECK) ret.fragmentLength = read.template_length ret.fragmentName = read.query_name ret.info = {key: [str(value)] for key, value in read.tags} if SamFlags.isFlagSet(read.flag, SamFlags.MATE_UNMAPPED): ret.nextMatePosition = None else: ret.nextMatePosition = protocol.Position() if read.next_reference_id != -1: ret.nextMatePosition.referenceName = samFile.getrname( read.next_reference_id) else: ret.nextMatePosition.referenceName = "" ret.nextMatePosition.position = read.next_reference_start ret.nextMatePosition.strand = protocol.Strand.POS_STRAND if SamFlags.isFlagSet(read.flag, SamFlags.MATE_REVERSE_STRAND): ret.nextMatePosition.strand = protocol.Strand.NEG_STRAND if SamFlags.isFlagSet(read.flag, SamFlags.READ_PAIRED): ret.numberReads = 2 else: ret.numberReads = 1 ret.readNumber = None if SamFlags.isFlagSet(read.flag, SamFlags.FIRST_IN_PAIR): if SamFlags.isFlagSet(read.flag, SamFlags.SECOND_IN_PAIR): ret.readNumber = 2 else: ret.readNumber = 0 elif SamFlags.isFlagSet(read.flag, SamFlags.SECOND_IN_PAIR): ret.readNumber = 1 ret.properPlacement = SamFlags.isFlagSet(read.flag, SamFlags.READ_PROPER_PAIR) ret.readGroupId = self.getId() ret.secondaryAlignment = SamFlags.isFlagSet( read.flag, SamFlags.SECONDARY_ALIGNMENT) ret.supplementaryAlignment = SamFlags.isFlagSet( read.flag, SamFlags.SUPPLEMENTARY_ALIGNMENT) ret.id = self.getReadAlignmentId(ret) return ret
def convertReadAlignment(self, read, readGroupSet, readGroupId): """ Convert a pysam ReadAlignment to a GA4GH ReadAlignment """ samFile = self.getFileHandle(self._dataUrl) # TODO fill out remaining fields # TODO refine in tandem with code in converters module ret = protocol.ReadAlignment() # ret.fragmentId = 'TODO' ret.aligned_quality.extend(read.query_qualities) ret.aligned_sequence = read.query_sequence if SamFlags.isFlagSet(read.flag, SamFlags.READ_UNMAPPED): ret.ClearField("alignment") else: ret.alignment.CopyFrom(protocol.LinearAlignment()) ret.alignment.mapping_quality = read.mapping_quality ret.alignment.position.CopyFrom(protocol.Position()) ret.alignment.position.reference_name = samFile.getrname( read.reference_id) ret.alignment.position.position = read.reference_start ret.alignment.position.strand = protocol.POS_STRAND if SamFlags.isFlagSet(read.flag, SamFlags.READ_REVERSE_STRAND): ret.alignment.position.strand = protocol.NEG_STRAND for operation, length in read.cigar: gaCigarUnit = ret.alignment.cigar.add() gaCigarUnit.operation = SamCigar.int2ga(operation) gaCigarUnit.operation_length = length gaCigarUnit.reference_sequence = "" # TODO fix this! ret.duplicate_fragment = SamFlags.isFlagSet( read.flag, SamFlags.DUPLICATE_READ) ret.failed_vendor_quality_checks = SamFlags.isFlagSet( read.flag, SamFlags.FAILED_QUALITY_CHECK) ret.fragment_length = read.template_length ret.fragment_name = read.query_name for key, value in read.tags: ret.info[key].values.add().string_value = str(value) if SamFlags.isFlagSet(read.flag, SamFlags.MATE_UNMAPPED): ret.next_mate_position.Clear() else: ret.next_mate_position.Clear() if read.next_reference_id != -1: ret.next_mate_position.reference_name = samFile.getrname( read.next_reference_id) else: ret.next_mate_position.reference_name = "" ret.next_mate_position.position = read.next_reference_start ret.next_mate_position.strand = protocol.POS_STRAND if SamFlags.isFlagSet(read.flag, SamFlags.MATE_REVERSE_STRAND): ret.next_mate_position.strand = protocol.NEG_STRAND if SamFlags.isFlagSet(read.flag, SamFlags.READ_PAIRED): ret.number_reads = 2 else: ret.number_reads = 1 ret.read_number = -1 if SamFlags.isFlagSet(read.flag, SamFlags.FIRST_IN_PAIR): if SamFlags.isFlagSet(read.flag, SamFlags.SECOND_IN_PAIR): ret.read_number = 2 else: ret.read_number = 0 elif SamFlags.isFlagSet(read.flag, SamFlags.SECOND_IN_PAIR): ret.read_number = 1 ret.improper_placement = not SamFlags.isFlagSet( read.flag, SamFlags.READ_PROPER_PAIR) ret.read_group_id = readGroupId ret.secondary_alignment = SamFlags.isFlagSet( read.flag, SamFlags.SECONDARY_ALIGNMENT) ret.supplementary_alignment = SamFlags.isFlagSet( read.flag, SamFlags.SUPPLEMENTARY_ALIGNMENT) ret.id = readGroupSet.getReadAlignmentId(ret) return ret