Example #1
0
    def extractFeatures(cls, aln):
        """
        Extract the data in a cmp.h5 alignment record into a
        ConsensusCore-friendly `QvSequenceFeatures` object.  Will
        extract only the features relevant to this Model, zero-filling
        the other features arrays.
        """
        if isinstance(aln, CmpH5Alignment):
            #
            # For cmp.h5 input, we have to use the AlnArray to see where the
            # gaps are (see bug 20752), in order to support old files.
            #
            alnRead = np.fromstring(aln.read(), dtype=np.int8)
            gapMask = alnRead == ord("-")
            _args = [ alnRead[~gapMask].tostring() ]
            for feature in ALL_FEATURES:
                if feature in cls.requiredFeatures:
                    _args.append(asFloatFeature(aln.pulseFeature(feature)[~gapMask]))
                else:
                    _args.append(cc.FloatFeature(int(aln.readLength)))
            return cc.QvSequenceFeatures(*_args)

        else:
            _args = [ aln.read(aligned=False, orientation="native") ]
            for feature in ALL_FEATURES:
                if feature in cls.requiredFeatures:
                    _args.append(asFloatFeature(aln.pulseFeature(feature, aligned=False)))
                else:
                    _args.append(cc.FloatFeature(int(aln.readLength)))
            return cc.QvSequenceFeatures(*_args)
Example #2
0
    def extractFeatures(cls, aln):
        """
        Extract the data in a cmp.h5 alignment record into a
        ConsensusCore-friendly `QvSequenceFeatures` object.  Will
        extract only the features relevant to this Model, zero-filling
        the other features arrays.
        """
        if isinstance(aln, CmpH5Alignment):
            #
            # For cmp.h5 input, we have to use the AlnArray to see where the
            # gaps are (see bug 20752), in order to support old files.
            #
            alnRead = np.fromstring(aln.read(), dtype=np.int8)
            gapMask = alnRead == ord("-")
            _args = [alnRead[~gapMask].tostring()]
            for feature in ALL_FEATURES:
                if feature in cls.requiredFeatures:
                    _args.append(
                        asFloatFeature(aln.baseFeature(feature)[~gapMask]))
                else:
                    _args.append(cc.FloatFeature(int(aln.readLength)))
            return cc.QvSequenceFeatures(*_args)

        else:
            _args = [aln.read(aligned=False, orientation="native")]
            for feature in ALL_FEATURES:
                if feature in cls.requiredFeatures:
                    _args.append(
                        asFloatFeature(aln.baseFeature(feature,
                                                       aligned=False)))
                else:
                    _args.append(cc.FloatFeature(int(aln.readLength)))
            return cc.QvSequenceFeatures(*_args)
Example #3
0
    def extractFeatures(cls, aln):
        """
        Extract the data in a cmp.h5 alignment record into a
        ConsensusCore-friendly `QvSequenceFeatures` object.  Will
        extract only the features relevant to this Model, zero-filling
        the other features arrays.

        Note that we have to use the AlnArray to see where the gaps
        are, at least for the moment (see bug 20752).
        """
        alnRead = np.fromstring(aln.read(), dtype=np.int8)
        gapMask = alnRead == ord("-")
        _args = [ alnRead[~gapMask].tostring() ]
        for feature in [ "InsertionQV",
                         "SubstitutionQV",
                         "DeletionQV",
                         "DeletionTag",
                         "MergeQV" ]:
            if feature in cls.requiredFeatures:
                _args.append(asFloatFeature(aln.pulseFeature(feature)[~gapMask]))
            else:
                _args.append(cc.FloatFeature(int(aln.readLength)))
        return cc.QvSequenceFeatures(*_args)