def make_reference(self, file, target_format): """ Uses the controllers to extract the content of a file, get some query strings, retrieve results from a search engine, and extract the reference. """ extraction = Extraction() extraction.file_path = file extraction.target_format = target_format log.info("Making reference for file: %s" % file) #@UndefinedVariable rce = RCEController(self.factory) raw_text = rce.extract_content(file, FileFormat.TXT) if not raw_text: return extraction extraction.query_strings = rce.get_query_strings(raw_text) if not extraction.query_strings: log.error('No query strings extracted') #@UndefinedVariable return extraction log.debug("Query strings %s" % str(extraction.query_strings)) #@UndefinedVariable ir = IRController(self.factory) extraction.top_results, extraction.used_query = ( ir.get_top_results(extraction.query_strings)) if not extraction.top_results: log.error('No top results to use with the available wrappers ' #@UndefinedVariable 'after trying %d queries' % len(extraction.query_strings)) return extraction extraction.query_strings.remove(extraction.used_query) log.debug("Used query %s" % str(extraction.used_query)) #@UndefinedVariable log.debug("Query returned %d top results" % len(extraction.top_results)) #@UndefinedVariable ie = IEController(self.factory, target_format) extraction.entries, extraction.used_result = ( ie.extract_reference(extraction.top_results, raw_text)) extraction.top_results.remove(extraction.used_result) log.info("Used result: %s" % str(extraction.used_result)) #@UndefinedVariable validator = ReferenceValidator(FIELD_WEIGHTS) for entry in extraction.entries: validator.validate(entry, raw_text) return extraction
class TestRCEController(unittest.TestCase): some_text = """Neurocomputing 35 (2000) 3}26 Class separability estimation and incremental learning using boundary methods Jose-Luis Sancho *, William E. Pierson , Batu Ulug , H AnmH bal R. Figueiras-Vidal , Stanley C. Ahalt ATSC-DI, Escuela Politecnica Superior. Universidad Carlos III Leganes-Madrid, Spain & & Department of Electrical Engineering, he Ohio State University Columbus, OH 43210, USA Received 7 January 1999; revised 5 April 1999; accepted 10 April 2000 Abstract In this paper we discuss the use of boundary methods (BMs) for distribution analysis. We view these methods as tools which can be used to extract useful information from sample distributions. We believe that the information thus extracted has utility for a number of applications, but in particular we discuss the use of BMs as a mechanism for class separability estimation and as an aid to constructing robust and e$cient neural networks (NNs) to solve classi"cation problems. In the "rst case, BMs can establish the utili... """ def setUp(self): factory = UtilFactory() self.rcec = RCEController(factory) self.pdf = normpath(join(dirname(__file__), ('../../../../tests/' 'fixtures/extraction/article.pdf'))) def tearDown(self): pass def test_extract_content_from_non_existent_file(self): content = self.rcec.extract_content('somefile.pdf', FileFormat.TXT) self.failUnless(content is None) def test_extract_content_to_invalid_target_format(self): content = self.rcec.extract_content(self.pdf, 'invalid format') self.failUnless(content is None) def test_extract_content_from_pdf(self): content = self.rcec.extract_content(self.pdf, FileFormat.TXT) self.failUnless(content is not None) def test_get_query_strings(self): strings = self.rcec.get_query_strings(self.some_text) self.failUnless(len(strings) > 0)
class TestRCEController(unittest.TestCase): some_text = """Neurocomputing 35 (2000) 3}26 Class separability estimation and incremental learning using boundary methods Jose-Luis Sancho *, William E. Pierson , Batu Ulug , H AnmH bal R. Figueiras-Vidal , Stanley C. Ahalt ATSC-DI, Escuela Politecnica Superior. Universidad Carlos III Leganes-Madrid, Spain & & Department of Electrical Engineering, he Ohio State University Columbus, OH 43210, USA Received 7 January 1999; revised 5 April 1999; accepted 10 April 2000 Abstract In this paper we discuss the use of boundary methods (BMs) for distribution analysis. We view these methods as tools which can be used to extract useful information from sample distributions. We believe that the information thus extracted has utility for a number of applications, but in particular we discuss the use of BMs as a mechanism for class separability estimation and as an aid to constructing robust and e$cient neural networks (NNs) to solve classi"cation problems. In the "rst case, BMs can establish the utili... """ def setUp(self): factory = UtilFactory() self.rcec = RCEController(factory) self.pdf = normpath(join(dirname(__file__), ("../../../../tests/" "fixtures/extraction/article.pdf"))) def tearDown(self): pass def test_extract_content_from_non_existent_file(self): content = self.rcec.extract_content("somefile.pdf", FileFormat.TXT) self.failUnless(content is None) def test_extract_content_to_invalid_target_format(self): content = self.rcec.extract_content(self.pdf, "invalid format") self.failUnless(content is None) def test_extract_content_from_pdf(self): content = self.rcec.extract_content(self.pdf, FileFormat.TXT) self.failUnless(content is not None) def test_get_query_strings(self): strings = self.rcec.get_query_strings(self.some_text) self.failUnless(len(strings) > 0)
def setUp(self): factory = UtilFactory() self.rcec = RCEController(factory) self.pdf = normpath(join(dirname(__file__), ("../../../../tests/" "fixtures/extraction/article.pdf")))
def setUp(self): factory = UtilFactory() self.rcec = RCEController(factory) self.pdf = normpath(join(dirname(__file__), ('../../../../tests/' 'fixtures/extraction/article.pdf')))