class TestReferenceValidator(unittest.TestCase): def setUp(self): self.rv = ReferenceValidator(weights={'title':0.75, 'author':0.25}) def tearDown(self): pass def test_validate_correct_reference(self): correct_ref = Reference() correct_ref.set_field('author', [{'first_name':'Jose-Luis', 'last_name':'Sancho', 'middle_name':''}], True) correct_ref.set_field('title', ('Class separability estimation and ' 'incremental learning using boundary methods'), True) self.rv.validate(correct_ref) self.failUnless(correct_ref.validity == 1.0) def test_validate_incorrect_reference(self): incorrect_ref = Reference() incorrect_ref.set_field('title', ('some arbitrary text'), False) incorrect_ref.set_field('author', [{'first_name':'Jose-Luis', 'last_name':'Sancho', 'middle_name':''}], True) self.rv.validate(incorrect_ref) self.failUnless(incorrect_ref.validity < 0.5)
class TestReferenceValidator(unittest.TestCase): def setUp(self): self.rv = ReferenceValidator(weights={'title': 0.75, 'author': 0.25}) def tearDown(self): pass def test_validate_correct_reference(self): correct_ref = Reference() correct_ref.set_field('author', [{ 'first_name': 'Jose-Luis', 'last_name': 'Sancho', 'middle_name': '' }], True) correct_ref.set_field('title', ('Class separability estimation and ' 'incremental learning using boundary methods'), True) self.rv.validate(correct_ref) self.failUnless(correct_ref.validity == 1.0) def test_validate_incorrect_reference(self): incorrect_ref = Reference() incorrect_ref.set_field('title', ('some arbitrary text'), False) incorrect_ref.set_field('author', [{ 'first_name': 'Jose-Luis', 'last_name': 'Sancho', 'middle_name': '' }], True) self.rv.validate(incorrect_ref) self.failUnless(incorrect_ref.validity < 0.5)
def make_reference(self, file, target_format): """ Uses the controllers to extract the content of a file, get some query strings, retrieve results from a search engine, and extract the reference. """ extraction = Extraction() extraction.file_path = file extraction.target_format = target_format log.info("Making reference for file: %s" % file) #@UndefinedVariable rce = RCEController(self.factory) raw_text = rce.extract_content(file, FileFormat.TXT) if not raw_text: return extraction extraction.query_strings = rce.get_query_strings(raw_text) if not extraction.query_strings: log.error('No query strings extracted') #@UndefinedVariable return extraction log.debug("Query strings %s" % str(extraction.query_strings)) #@UndefinedVariable ir = IRController(self.factory) extraction.top_results, extraction.used_query = ( ir.get_top_results(extraction.query_strings)) if not extraction.top_results: log.error('No top results to use with the available wrappers ' #@UndefinedVariable 'after trying %d queries' % len(extraction.query_strings)) return extraction extraction.query_strings.remove(extraction.used_query) log.debug("Used query %s" % str(extraction.used_query)) #@UndefinedVariable log.debug("Query returned %d top results" % len(extraction.top_results)) #@UndefinedVariable ie = IEController(self.factory, target_format) extraction.entries, extraction.used_result = ( ie.extract_reference(extraction.top_results, raw_text)) extraction.top_results.remove(extraction.used_result) log.info("Used result: %s" % str(extraction.used_result)) #@UndefinedVariable validator = ReferenceValidator(FIELD_WEIGHTS) for entry in extraction.entries: validator.validate(entry, raw_text) return extraction
def setUp(self): self.rv = ReferenceValidator(weights={'title':0.75, 'author':0.25})
def setUp(self): self.rv = ReferenceValidator(weights={'title': 0.75, 'author': 0.25})