def test_matchPub(self): self.extractor = Extractor().getInstance() pubdao = PublicationDao() person_id = 13419 person_name = 'jie tang' # Read sources from files all_models = {} for page in range(0, 3): filename = "".join((person_name, '_page_', str(page), '.html')) f = file(os.path.join(self.settings.source_dir, filename), 'r') html = f.read() models = self.extractor.extract_from_source(html) if models is not None: self.extractor._Extractor__merge_into_extractedmap( all_models, models) print 'Total found DEBUG %s items.' % len(all_models) # part 2 pubs = pubdao.getPublicationByPerson(person_id, self.settings.generation) printout = False if printout: for key, models in all_models.items(): print key, " --> ", models print '===================' for pub in pubs: print pub (pubs_matched, pubs_not_matched) = self.matchPub(pubs, all_models) print '- test done -', len(pubs_matched), len(pubs_not_matched) return pubs_not_matched
def test_getNodesByPersonName(self): '''Test method getNodesByPersonName.''' print '-TEST-:', self.test_extractFromPage.__doc__.strip() e = Extractor() models = e.getNodesByPersonName('jie tang') for model in models: print model print '-END TEST-'
def test_fetchByPubs(self, pubs): '''Test use a list of pubs that not found in person search''' print '-- test fetchByPubs %s pubs', len(pubs) new_pubs = [] for pub in pubs: new_pubs.append((pub, 'jie tang')) extractor = Extractor() extractor.getNodesByPubs(new_pubs) print '- test done -'
def __init__(self): self.extractor = Extractor().getInstance()
def __init__(self): self.extractor = Extractor().getInstance() self.settings = Settings.getInstance()