def testOneElementBinWidth(self): """ If a histogram is created with just one element, the bin width must be zero. """ h = Histogram() h.add(3) h.finalize() self.assertEqual(0.0, h.binWidth)
def testRepeatedFinalize(self): """ If finalize is called a second time, a RuntimeError must be raised. """ h = Histogram() error = ('^Histogram already finalized$') h.add(3) h.finalize() six.assertRaisesRegex(self, RuntimeError, error, h.finalize)
def testNoDataValue(self): """ If an element with no associated datum is added to a histogram, the value that is passed must be stored in the bin. """ h = Histogram(1) h.add(3) h.finalize() self.assertEqual([[3]], h.bins)
def testTwoElementsBinWidth(self): """ If a histogram with 5 buckets is created with two elements that differ by 1.0, the bin width should be set to the correct value of 0.2. """ h = Histogram(5) h.add(3) h.add(4) h.finalize() self.assertEqual(0.2, h.binWidth)
def testOneElementMaxMin(self): """ If a histogram is created with just one element, the max and min should be set to that value. """ h = Histogram() h.add(3) h.finalize() self.assertEqual(3, h.max) self.assertEqual(3, h.min)
def testElementIsStoredInBin(self): """ If a histogram is created with just one element and one bin, the exact element that was passed must be placed in the bin. """ element = object() h = Histogram(1) h.add(3, element) h.finalize() self.assertIs(element, h.bins[0][0])
def testAddDataAfterFinalized(self): """ If an attempt is made to add to a histogram that has been finalized, a RuntimeError must be raised. """ h = Histogram() error = ('^Additional data cannot be added: histogram already ' 'finalized$') h.add(3) h.finalize() six.assertRaisesRegex(self, RuntimeError, error, h.add, 3)
def testTwoElementsMaxMin(self): """ If a histogram is created with two elements, the max and min should be set to the correct values. """ h = Histogram() h.add(3) h.add(4) h.finalize() self.assertEqual(4, h.max) self.assertEqual(3, h.min)
def testAAFractionWhenSignificant(self): """ The isSignificant method must return True if asked about a bin that is significant. """ match = { 'subjectLandmark': Landmark('AlphaHelix', 'A', 0, 9), 'subjectTrigPoint': TrigPoint('Peaks', 'P', 21), 'queryLandmark': Landmark('AlphaHelix', 'A', 10, 9), 'queryTrigPoint': TrigPoint('Peaks', 'P', 25), } histogram = Histogram(1) histogram.add(0, match) histogram.finalize() significance = AAFraction(histogram, 10, 0.75) self.assertTrue(significance.isSignificant(0))
def testAAFractionSignificanceAnalysis(self): """ The correct analysis must be provided. """ match = { 'subjectLandmark': Landmark('AlphaHelix', 'A', 0, 9), 'subjectTrigPoint': TrigPoint('Peaks', 'P', 21), 'queryLandmark': Landmark('AlphaHelix', 'A', 10, 9), 'queryTrigPoint': TrigPoint('Peaks', 'P', 25), } histogram = Histogram(3) histogram.add(0, match) histogram.add(1, match) histogram.add(2, match) histogram.finalize() significance = AAFraction(histogram, 10, 0.75) self.assertTrue(significance.isSignificant(0)) self.assertEqual({'significanceCutoff': 7.5, 'significanceMethod': 'AAFraction'}, significance.analysis)
def _checkPositiveNegative(self, nBins, values): """ When a set of values is put into a histogram, the bin counts that result must be the same (just with the order reversed) as those that result from a histogram made with the same set of values but with opposite sign. @param nBins: The C{int} number of bins to use in the histogram. @param values: A C{list} of values to insert into the histogram. """ # Make a histogram of the values and get all the bin counts. h1 = Histogram(nBins) for value in values: h1.add(value) h1.finalize() counts1 = [len(bin_) for bin_ in h1.bins] # Make a histogram of the negative values and get all the bin counts. h2 = Histogram(nBins) for value in [-x for x in values]: h2.add(value) h2.finalize() counts2 = [len(bin_) for bin_ in h2.bins] counts2.reverse() # Prepare a useful error message, in case there are any differences. differences = ['Counts differ'] for i in range(len(counts1)): if counts1[i] != counts2[i]: h1Low = h1.min + i * h1.binWidth h1High = h1Low + h1.binWidth h2Low = h2.min + i * h2.binWidth h2High = h2Low + h2.binWidth differences.append( ' bin %d (h1 bin range: %.7f to %.7f, h2 bin range: ' '%.7f to %.7f): count %d != count %d' % (i, h1Low, h1High, h2Low, h2High, counts1[i], counts2[i])) # Bin counts must be the same. self.assertEqual(counts1, counts2, '\n'.join(differences))
def testFiveBinsMinusTwoPointFiveToPlusTwoPointFiveIntermediates(self): """ If a histogram is created with 5 bins and a data range of -2.5 to +2.5 items that are added between histogram boundaries must be placed in the expected bins. """ for (value, expectedCounts) in ((-2, [1, 0, 0, 0, 0]), (-1, [0, 1, 0, 0, 0]), (+0, [0, 0, 1, 0, 0]), (+1, [0, 0, 0, 1, 0]), (+2, [0, 0, 0, 0, 1])): h = Histogram(5) h.add(-2.5) # Set min value. h.add(2.5) # Set max value. h.add(value) h.finalize() counts = [len(bin_) for bin_ in h.bins] # Subract 1 from the first and last bin counts, to adjust for the # -2.5 and 2.5 boundary values we added manually. counts[0] -= 1 counts[-1] -= 1 self.assertEqual(expectedCounts, counts)
class Template(object): """ Parse an ASCII art picture of a light matter match and provide access to it. @param template: A C{str} template picture of the match. @raise ValueError: If the query and subject do not have the same number of paired features. """ def __init__(self, template): self.template = self.templateToList(template) self.query = Query(self.template) self.subject = Subject(self.template) if len(self.query.pairedFeatures) != len(self.subject.pairedFeatures): raise ValueError( 'The query and subject do not have the same number of paired ' 'features (%d != %d)' % (len(self.query.pairedFeatures), len(self.subject.pairedFeatures))) # Union the landmark and trig point names from the query and subject. self.landmarks = self.query.landmarks | self.subject.landmarks self.trigPoints = self.query.trigPoints | self.subject.trigPoints self.histogram = Histogram(1) for queryPair, subjectPair in zip(self.query.pairedFeatures, self.subject.pairedFeatures): _, queryLandmark, _, queryTrigPoint = queryPair _, subjectLandmark, _, subjectTrigPoint = subjectPair self.histogram.add(0, { 'queryLandmark': queryLandmark, 'queryTrigPoint': queryTrigPoint, 'subjectLandmark': subjectLandmark, 'subjectTrigPoint': subjectTrigPoint, }) self.histogram.finalize() @staticmethod def templateToList(template): """ Convert a picture to a list of trimmed non-blank lines. @param template: A C{str} template picture of the match. @return: A C{list} of \n separated non-blank lines from C{template}. """ result = [] whitespace = re.compile('^\s*$') for line in template.split('\n'): if whitespace.match(line) is None: result.append(line.rstrip()) return result def calculateScore(self, dbParams=None, findParams=None): """ Using a given scoring method, calculate the score of the alignment between the query and subject in the template. @param findParams: An instance of C{light.parameters.FindParameters} or C{None} to use default find parameters. @raises ValueError: If C{dbParams} is passed and the landmarks and trig points it specifies do not include all the landmarks and trig points named in the template. Of if the C{binScoreMethod} in C{findParams} is unknown. @return: A 2-tuple, being the result of calling the C{calculateScore} method of the C{binScoreMethod} class. The tuple contains a C{float} score of the bin and a C{dict} with the analysis leading to the score (see light/bin_score.py). """ findParams = findParams or FindParameters() if dbParams is None: dbParams = DatabaseParameters(landmarks=self.landmarks, trigPoints=self.trigPoints) else: missing = self.landmarks - set(dbParams.landmarkFinderNames()) if missing: raise ValueError( 'The template mentions landmark finders (%s) that are ' 'not present in the passed DatabaseParameters instance' % ', '.join(sorted(missing))) missing = self.trigPoints - set(dbParams.trigPointFinderNames()) if missing: raise ValueError( 'The template mentions trig point finders (%s) that are ' 'not present in the passed DatabaseParameters instance' % ', '.join(sorted(missing))) database = Database(dbParams=dbParams) _, subjectIndex, subjectHashCount = database.addSubject( self.subject.read) dbSubject = database.getSubjectByIndex(subjectIndex) binScoreMethod = findParams.binScoreMethod if binScoreMethod == 'NoneScore': scorer = NoneScore() elif binScoreMethod == 'MinHashesScore': be = database._connector._backend queryHashCount = 0 scannedQuery = be.scan(self.query.read) for hashInfo in be.getHashes(scannedQuery).values(): queryHashCount += len(hashInfo) scorer = MinHashesScore(self.histogram, min(queryHashCount, subjectHashCount)) elif binScoreMethod == 'FeatureMatchingScore': scorer = FeatureMatchingScore( self.histogram, self.query.read, dbSubject, dbParams, findParams) elif binScoreMethod == 'FeatureAAScore': scorer = FeatureAAScore( self.histogram, self.query.read, dbSubject, dbParams) elif binScoreMethod == 'WeightedFeatureAAScore': scorer = WeightedFeatureAAScore( self.histogram, self.query.read, dbSubject, dbParams, findParams.weights) else: raise ValueError('Unknown bin score method %r' % binScoreMethod) return scorer.calculateScore(0)