Beispiel #1
0
 def testOneElementBinWidth(self):
     """
     If a histogram is created with just one element, the bin width must be
     zero.
     """
     h = Histogram()
     h.add(3)
     h.finalize()
     self.assertEqual(0.0, h.binWidth)
Beispiel #2
0
 def testRepeatedFinalize(self):
     """
     If finalize is called a second time, a RuntimeError must be raised.
     """
     h = Histogram()
     error = ('^Histogram already finalized$')
     h.add(3)
     h.finalize()
     six.assertRaisesRegex(self, RuntimeError, error, h.finalize)
Beispiel #3
0
 def testNoDataValue(self):
     """
     If an element with no associated datum is added to a histogram,
     the value that is passed must be stored in the bin.
     """
     h = Histogram(1)
     h.add(3)
     h.finalize()
     self.assertEqual([[3]], h.bins)
Beispiel #4
0
 def testTwoElementsBinWidth(self):
     """
     If a histogram with 5 buckets is created with two elements that differ
     by 1.0, the bin width should be set to the correct value of 0.2.
     """
     h = Histogram(5)
     h.add(3)
     h.add(4)
     h.finalize()
     self.assertEqual(0.2, h.binWidth)
Beispiel #5
0
 def testOneElementMaxMin(self):
     """
     If a histogram is created with just one element, the max and min
     should be set to that value.
     """
     h = Histogram()
     h.add(3)
     h.finalize()
     self.assertEqual(3, h.max)
     self.assertEqual(3, h.min)
Beispiel #6
0
 def testElementIsStoredInBin(self):
     """
     If a histogram is created with just one element and one bin, the
     exact element that was passed must be placed in the bin.
     """
     element = object()
     h = Histogram(1)
     h.add(3, element)
     h.finalize()
     self.assertIs(element, h.bins[0][0])
Beispiel #7
0
 def testAddDataAfterFinalized(self):
     """
     If an attempt is made to add to a histogram that has been finalized,
     a RuntimeError must be raised.
     """
     h = Histogram()
     error = ('^Additional data cannot be added: histogram already '
              'finalized$')
     h.add(3)
     h.finalize()
     six.assertRaisesRegex(self, RuntimeError, error, h.add, 3)
Beispiel #8
0
 def testTwoElementsMaxMin(self):
     """
     If a histogram is created with two elements, the max and min
     should be set to the correct values.
     """
     h = Histogram()
     h.add(3)
     h.add(4)
     h.finalize()
     self.assertEqual(4, h.max)
     self.assertEqual(3, h.min)
 def testAAFractionWhenSignificant(self):
     """
     The isSignificant method must return True if asked about a bin that is
     significant.
     """
     match = {
         'subjectLandmark': Landmark('AlphaHelix', 'A', 0, 9),
         'subjectTrigPoint': TrigPoint('Peaks', 'P', 21),
         'queryLandmark': Landmark('AlphaHelix', 'A', 10, 9),
         'queryTrigPoint': TrigPoint('Peaks', 'P', 25),
     }
     histogram = Histogram(1)
     histogram.add(0, match)
     histogram.finalize()
     significance = AAFraction(histogram, 10, 0.75)
     self.assertTrue(significance.isSignificant(0))
Beispiel #10
0
 def testAAFractionSignificanceAnalysis(self):
     """
     The correct analysis must be provided.
     """
     match = {
         'subjectLandmark': Landmark('AlphaHelix', 'A', 0, 9),
         'subjectTrigPoint': TrigPoint('Peaks', 'P', 21),
         'queryLandmark': Landmark('AlphaHelix', 'A', 10, 9),
         'queryTrigPoint': TrigPoint('Peaks', 'P', 25),
     }
     histogram = Histogram(3)
     histogram.add(0, match)
     histogram.add(1, match)
     histogram.add(2, match)
     histogram.finalize()
     significance = AAFraction(histogram, 10, 0.75)
     self.assertTrue(significance.isSignificant(0))
     self.assertEqual({'significanceCutoff': 7.5,
                       'significanceMethod': 'AAFraction'},
                      significance.analysis)
Beispiel #11
0
    def _checkPositiveNegative(self, nBins, values):
        """
        When a set of values is put into a histogram, the bin counts that
        result must be the same (just with the order reversed) as those that
        result from a histogram made with the same set of values but with
        opposite sign.

        @param nBins: The C{int} number of bins to use in the histogram.
        @param values: A C{list} of values to insert into the histogram.
        """
        # Make a histogram of the values and get all the bin counts.
        h1 = Histogram(nBins)
        for value in values:
            h1.add(value)
        h1.finalize()
        counts1 = [len(bin_) for bin_ in h1.bins]

        # Make a histogram of the negative values and get all the bin counts.
        h2 = Histogram(nBins)
        for value in [-x for x in values]:
            h2.add(value)
        h2.finalize()
        counts2 = [len(bin_) for bin_ in h2.bins]
        counts2.reverse()

        # Prepare a useful error message, in case there are any differences.
        differences = ['Counts differ']
        for i in range(len(counts1)):
            if counts1[i] != counts2[i]:
                h1Low = h1.min + i * h1.binWidth
                h1High = h1Low + h1.binWidth
                h2Low = h2.min + i * h2.binWidth
                h2High = h2Low + h2.binWidth
                differences.append(
                    '  bin %d (h1 bin range: %.7f to %.7f, h2 bin range: '
                    '%.7f to %.7f): count %d != count %d' %
                    (i, h1Low, h1High, h2Low, h2High, counts1[i], counts2[i]))

        # Bin counts must be the same.
        self.assertEqual(counts1, counts2, '\n'.join(differences))
Beispiel #12
0
 def testFiveBinsMinusTwoPointFiveToPlusTwoPointFiveIntermediates(self):
     """
     If a histogram is created with 5 bins and a data range of -2.5 to +2.5
     items that are added between histogram boundaries must be placed in
     the expected bins.
     """
     for (value,
          expectedCounts) in ((-2, [1, 0, 0, 0, 0]), (-1, [0, 1, 0, 0, 0]),
                              (+0, [0, 0, 1, 0,
                                    0]), (+1, [0, 0, 0, 1,
                                               0]), (+2, [0, 0, 0, 0, 1])):
         h = Histogram(5)
         h.add(-2.5)  # Set min value.
         h.add(2.5)  # Set max value.
         h.add(value)
         h.finalize()
         counts = [len(bin_) for bin_ in h.bins]
         # Subract 1 from the first and last bin counts, to adjust for the
         # -2.5 and 2.5 boundary values we added manually.
         counts[0] -= 1
         counts[-1] -= 1
         self.assertEqual(expectedCounts, counts)
Beispiel #13
0
class Template(object):
    """
    Parse an ASCII art picture of a light matter match and provide access to
    it.

    @param template: A C{str} template picture of the match.
    @raise ValueError: If the query and subject do not have the same number of
        paired features.
    """
    def __init__(self, template):
        self.template = self.templateToList(template)
        self.query = Query(self.template)
        self.subject = Subject(self.template)

        if len(self.query.pairedFeatures) != len(self.subject.pairedFeatures):
            raise ValueError(
                'The query and subject do not have the same number of paired '
                'features (%d != %d)' % (len(self.query.pairedFeatures),
                                         len(self.subject.pairedFeatures)))

        # Union the landmark and trig point names from the query and subject.
        self.landmarks = self.query.landmarks | self.subject.landmarks
        self.trigPoints = self.query.trigPoints | self.subject.trigPoints

        self.histogram = Histogram(1)

        for queryPair, subjectPair in zip(self.query.pairedFeatures,
                                          self.subject.pairedFeatures):
            _, queryLandmark, _, queryTrigPoint = queryPair
            _, subjectLandmark, _, subjectTrigPoint = subjectPair

            self.histogram.add(0, {
                'queryLandmark': queryLandmark,
                'queryTrigPoint': queryTrigPoint,
                'subjectLandmark': subjectLandmark,
                'subjectTrigPoint': subjectTrigPoint,
            })

        self.histogram.finalize()

    @staticmethod
    def templateToList(template):
        """
        Convert a picture to a list of trimmed non-blank lines.

        @param template: A C{str} template picture of the match.
        @return: A C{list} of \n separated non-blank lines from C{template}.
        """
        result = []
        whitespace = re.compile('^\s*$')
        for line in template.split('\n'):
            if whitespace.match(line) is None:
                result.append(line.rstrip())
        return result

    def calculateScore(self, dbParams=None, findParams=None):
        """
        Using a given scoring method, calculate the score of the alignment
        between the query and subject in the template.

        @param findParams: An instance of C{light.parameters.FindParameters} or
            C{None} to use default find parameters.
        @raises ValueError: If C{dbParams} is passed and the landmarks and
            trig points it specifies do not include all the landmarks and trig
            points named in the template. Of if the C{binScoreMethod} in
            C{findParams} is unknown.
        @return: A 2-tuple, being the result of calling the C{calculateScore}
            method of the C{binScoreMethod} class. The tuple contains a
            C{float} score of the bin and a C{dict} with the analysis leading
            to the score (see light/bin_score.py).
        """
        findParams = findParams or FindParameters()
        if dbParams is None:
            dbParams = DatabaseParameters(landmarks=self.landmarks,
                                          trigPoints=self.trigPoints)
        else:
            missing = self.landmarks - set(dbParams.landmarkFinderNames())
            if missing:
                raise ValueError(
                    'The template mentions landmark finders (%s) that are '
                    'not present in the passed DatabaseParameters instance' %
                    ', '.join(sorted(missing)))

            missing = self.trigPoints - set(dbParams.trigPointFinderNames())
            if missing:
                raise ValueError(
                    'The template mentions trig point finders (%s) that are '
                    'not present in the passed DatabaseParameters instance' %
                    ', '.join(sorted(missing)))

        database = Database(dbParams=dbParams)
        _, subjectIndex, subjectHashCount = database.addSubject(
            self.subject.read)
        dbSubject = database.getSubjectByIndex(subjectIndex)

        binScoreMethod = findParams.binScoreMethod
        if binScoreMethod == 'NoneScore':
            scorer = NoneScore()
        elif binScoreMethod == 'MinHashesScore':
            be = database._connector._backend
            queryHashCount = 0
            scannedQuery = be.scan(self.query.read)
            for hashInfo in be.getHashes(scannedQuery).values():
                queryHashCount += len(hashInfo)
            scorer = MinHashesScore(self.histogram,
                                    min(queryHashCount, subjectHashCount))
        elif binScoreMethod == 'FeatureMatchingScore':
            scorer = FeatureMatchingScore(
                self.histogram, self.query.read, dbSubject, dbParams,
                findParams)
        elif binScoreMethod == 'FeatureAAScore':
            scorer = FeatureAAScore(
                self.histogram, self.query.read, dbSubject, dbParams)
        elif binScoreMethod == 'WeightedFeatureAAScore':
            scorer = WeightedFeatureAAScore(
                self.histogram, self.query.read, dbSubject, dbParams,
                findParams.weights)
        else:
            raise ValueError('Unknown bin score method %r' % binScoreMethod)

        return scorer.calculateScore(0)