def testFindAllClasses(self): """ The find function should be able to find all trig point classes by name. """ for klass in ALL_TRIG_CLASSES: self.assertIs(klass, findTrigPoint(klass.NAME))
def extractNonPairFeatures(self): """ Find all non-pair features in the template. @raise ValueError: If a line contains an unknown landmark or trig point name or if the sequence for a feature does not match the overall full sequence given in the first line of the template for the subject/query. @return: A generator that yields 2-tuples containing: 1. A C{str} feature name 2. A C{light.features.Landmark} or a C{light.features.TrigPoint} instance as they are found in the template lines. """ for line in self.noPipes[1:]: if line.find(',') != -1: # There is a comma on the line, so this is a pair of # features from the matched region. Ignore it as it will be # processed in extractPairedFeatures. continue featureName = line[:self.indentLength].strip() featureStr = line[self.indentLength:] landmark = findLandmark(featureName) if landmark is None: trigPoint = findTrigPoint(featureName) if trigPoint is None: raise ValueError('Unknown feature name %r found in %s ' 'template' % (featureName, self.TYPE)) for match in _NONWHITE_REGEXP.finditer(featureStr): offset = match.start() length = match.end() - offset sequence = featureStr[offset:offset + length] if sequence != self.read.sequence[offset:offset + length]: raise ValueError( '%s feature sequence %r found in %s template (offset ' '%d, length %d) does not match the full sequence for ' 'the %s at those offsets' % (featureName, sequence, self.TYPE, offset, length, self.TYPE)) if landmark: yield (featureName, Landmark(landmark.NAME, landmark.SYMBOL, offset, length)) else: yield (featureName, TrigPoint(trigPoint.NAME, trigPoint.SYMBOL, offset))
def __init__(self, landmarks=None, trigPoints=None, limitPerLandmark=None, maxDistance=None, minDistance=None, distanceBase=None, featureLengthBase=None, randomLandmarkDensity=None, randomTrigPointDensity=None, acAlphaHelixFilename=None, acAlphaHelix310Filename=None, acAlphaHelixCombinedFilename=None, acAlphaHelixPiFilename=None, acExtendedStrandFilename=None): # First set the simple scalar parameters. self.limitPerLandmark = (self.DEFAULT_LIMIT_PER_LANDMARK if limitPerLandmark is None else limitPerLandmark) self.maxDistance = (self.DEFAULT_MAX_DISTANCE if maxDistance is None else maxDistance) self.minDistance = (self.DEFAULT_MIN_DISTANCE if minDistance is None else minDistance) self.distanceBase = (self.DEFAULT_DISTANCE_BASE if distanceBase is None else distanceBase) if self.distanceBase <= 0: raise ValueError('distanceBase must be > 0.') self.featureLengthBase = (self.DEFAULT_FEATURE_LENGTH_BASE if featureLengthBase is None else featureLengthBase) if self.featureLengthBase <= 0: raise ValueError('featureLengthBase must be > 0.') self.randomLandmarkDensity = (self.DEFAULT_RANDOM_LANDMARK_DENSITY if randomLandmarkDensity is None else randomLandmarkDensity) self.randomTrigPointDensity = (self.DEFAULT_RANDOM_TRIG_POINT_DENSITY if randomTrigPointDensity is None else randomTrigPointDensity) self.acAlphaHelixFilename = (self.DEFAULT_AC_ALPHAHELIX_FILENAME if acAlphaHelixFilename is None else acAlphaHelixFilename) self.acAlphaHelix310Filename = ( self.DEFAULT_AC_ALPHAHELIX_3_10_FILENAME if acAlphaHelix310Filename is None else acAlphaHelix310Filename) self.acAlphaHelixCombinedFilename = ( self.DEFAULT_AC_ALPHAHELIX_COMBINED_FILENAME if acAlphaHelixCombinedFilename is None else acAlphaHelixCombinedFilename) self.acAlphaHelixPiFilename = (self.DEFAULT_AC_ALPHAHELIX_PI_FILENAME if acAlphaHelixPiFilename is None else acAlphaHelixPiFilename) self.acExtendedStrandFilename = ( self.DEFAULT_AC_EXTENDED_STRAND_FILENAME if acExtendedStrandFilename is None else acExtendedStrandFilename) if landmarks is None: landmarkClasses = DEFAULT_LANDMARK_CLASSES else: landmarkClasses = set() for landmark in landmarks: if isinstance(landmark, string_types): cls = findLandmark(landmark) if cls: landmarkClasses.add(cls) else: raise ValueError( 'Could not find landmark finder class %r.' % landmark) else: # Assume this is already a landmark class. landmarkClasses.add(landmark) if trigPoints is None: trigPointClasses = DEFAULT_TRIG_CLASSES else: trigPointClasses = set() for trigPoint in trigPoints: if isinstance(trigPoint, string_types): cls = findTrigPoint(trigPoint) if cls: trigPointClasses.add(cls) else: raise ValueError( 'Could not find trig point finder class %r.' % trigPoint) else: # Assume this is already a trig point class. trigPointClasses.add(trigPoint) # The finders instantiated here are not used in this file. They are # used by the backend. We make sorted lists of them so we're guaranteed # to always process them in the same order (in printing, in checksums, # etc). self.landmarkFinders = sorted(cls(self) for cls in landmarkClasses) self.trigPointFinders = sorted(cls(self) for cls in trigPointClasses)
def extractPairedFeatures(self): """ Find all paired features in the template. @raise ValueError: If a line contains an unknown landmark or trig point name or doesn't have exactly one comma (separating the two names). @return: A generator that yields 4-tuples containing 1. A C{str} landmark feature name 2. A C{light.features.Landmark} instance 3. A C{str} trig point feature name (this may actually be a landmark, but we typically call it a trig point). 4. A C{light.features.TrigPoint} instance (may actuall be a C{light.features.Landmark} instance) as they are found in the template lines. """ for line in self.noPipes[1:]: if line.find(',') == -1: # This line contains non-paired features (it has just one # feature name - implied by the lack of a comma). Ignore it # as it will be processed in extractNonPairFeatures. continue if line.count(',') != 1: raise ValueError('%s template line %r contains multiple ' 'commas' % (self.TYPE, line)) landmarkName, secondFeatureName = map( str.strip, line[:self.indentLength].split(',')) featureStr = line[self.indentLength:] landmark = findLandmark(landmarkName) # The first feature of a matched pair must be a landmark. if landmark is None: raise ValueError('Unknown landmark name %r found in %s ' 'template line %r' % (landmarkName, self.TYPE, line)) # The second feature of a matched pair can be a trig point or # another landmark. landmark2 = findLandmark(secondFeatureName) if landmark2 is None: trigPoint = findTrigPoint(secondFeatureName) if trigPoint is None: raise ValueError('Unknown feature name %r found in %s ' 'template' % (secondFeatureName, self.TYPE)) for count, match in enumerate( _NONWHITE_REGEXP.finditer(featureStr)): if count > 1: raise ValueError( 'More than two features found in matched region pair ' 'line in %s template. Line was %r' % (self.TYPE, line)) first = count == 0 offset = match.start() length = match.end() - offset sequence = featureStr[offset:offset + length] if sequence != self.read.sequence[offset:offset + length]: if first: raise ValueError( '%s feature sequence %r found in %s template ' '(offset %d, length %d) does not match the full ' 'sequence for the %s at those offsets' % (landmarkName, sequence, self.TYPE, offset, length, self.TYPE)) else: raise ValueError( '%s feature sequence %r found in %s template ' '(offset %d, length %d) does not match the full ' 'sequence for the %s at those offsets' % (secondFeatureName, sequence, self.TYPE, offset, length, self.TYPE)) if first: landmarkFeature = Landmark(landmark.NAME, landmark.SYMBOL, offset, length) else: if landmark2: secondFeature = Landmark(landmark2.NAME, landmark2.SYMBOL, offset, length) else: secondFeature = TrigPoint(trigPoint.NAME, trigPoint.SYMBOL, offset) yield (landmarkName, landmarkFeature, secondFeatureName, secondFeature)
def testFindDevTrigPoint(self): """ The find function should be able to find development trig point classes. """ self.assertIs(RandomTrigPoint, findTrigPoint('RandomTrigPoint'))
def testFindTrigPointFails(self): """ The find function should return C{None} if asked to find a trig point class that doesn't exist. """ self.assertIs(None, findTrigPoint('silly'))