def extractFeatures(cls, aln): """ Extract the data in a cmp.h5 alignment record into a ConsensusCore-friendly `QvSequenceFeatures` object. Will extract only the features relevant to this Model, zero-filling the other features arrays. """ if isinstance(aln, CmpH5Alignment): # # For cmp.h5 input, we have to use the AlnArray to see where the # gaps are (see bug 20752), in order to support old files. # alnRead = np.fromstring(aln.read(), dtype=np.int8) gapMask = alnRead == ord("-") _args = [ alnRead[~gapMask].tostring() ] for feature in ALL_FEATURES: if feature in cls.requiredFeatures: _args.append(asFloatFeature(aln.pulseFeature(feature)[~gapMask])) else: _args.append(cc.FloatFeature(int(aln.readLength))) return cc.QvSequenceFeatures(*_args) else: _args = [ aln.read(aligned=False, orientation="native") ] for feature in ALL_FEATURES: if feature in cls.requiredFeatures: _args.append(asFloatFeature(aln.pulseFeature(feature, aligned=False))) else: _args.append(cc.FloatFeature(int(aln.readLength))) return cc.QvSequenceFeatures(*_args)
def extractFeatures(cls, aln): """ Extract the data in a cmp.h5 alignment record into a ConsensusCore-friendly `QvSequenceFeatures` object. Will extract only the features relevant to this Model, zero-filling the other features arrays. """ if isinstance(aln, CmpH5Alignment): # # For cmp.h5 input, we have to use the AlnArray to see where the # gaps are (see bug 20752), in order to support old files. # alnRead = np.fromstring(aln.read(), dtype=np.int8) gapMask = alnRead == ord("-") _args = [alnRead[~gapMask].tostring()] for feature in ALL_FEATURES: if feature in cls.requiredFeatures: _args.append( asFloatFeature(aln.baseFeature(feature)[~gapMask])) else: _args.append(cc.FloatFeature(int(aln.readLength))) return cc.QvSequenceFeatures(*_args) else: _args = [aln.read(aligned=False, orientation="native")] for feature in ALL_FEATURES: if feature in cls.requiredFeatures: _args.append( asFloatFeature(aln.baseFeature(feature, aligned=False))) else: _args.append(cc.FloatFeature(int(aln.readLength))) return cc.QvSequenceFeatures(*_args)
def extractFeatures(cls, aln): """ Extract the data in a cmp.h5 alignment record into a ConsensusCore-friendly `QvSequenceFeatures` object. Will extract only the features relevant to this Model, zero-filling the other features arrays. Note that we have to use the AlnArray to see where the gaps are, at least for the moment (see bug 20752). """ alnRead = np.fromstring(aln.read(), dtype=np.int8) gapMask = alnRead == ord("-") _args = [ alnRead[~gapMask].tostring() ] for feature in [ "InsertionQV", "SubstitutionQV", "DeletionQV", "DeletionTag", "MergeQV" ]: if feature in cls.requiredFeatures: _args.append(asFloatFeature(aln.pulseFeature(feature)[~gapMask])) else: _args.append(cc.FloatFeature(int(aln.readLength))) return cc.QvSequenceFeatures(*_args)