예제 #1
0
    def test_matchPub(self):
        self.extractor = Extractor().getInstance()
        pubdao = PublicationDao()
        person_id = 13419
        person_name = 'jie tang'
        # Read sources from files
        all_models = {}
        for page in range(0, 3):
            filename = "".join((person_name, '_page_', str(page), '.html'))
            f = file(os.path.join(self.settings.source_dir, filename), 'r')
            html = f.read()
            models = self.extractor.extract_from_source(html)
            if models is not None:
                self.extractor._Extractor__merge_into_extractedmap(
                    all_models, models)
        print 'Total found DEBUG  %s items.' % len(all_models)

        # part 2
        pubs = pubdao.getPublicationByPerson(person_id,
                                             self.settings.generation)

        printout = False
        if printout:
            for key, models in all_models.items():
                print key, " --> ", models
            print '==================='
            for pub in pubs:
                print pub

        (pubs_matched, pubs_not_matched) = self.matchPub(pubs, all_models)
        print '- test done -', len(pubs_matched), len(pubs_not_matched)
        return pubs_not_matched
 def test_getNodesByPersonName(self):
     '''Test method getNodesByPersonName.'''
     print '-TEST-:', self.test_extractFromPage.__doc__.strip()
     e = Extractor()
     models = e.getNodesByPersonName('jie tang')
     for model in models:
         print model
     print '-END TEST-'
예제 #3
0
    def test_fetchByPubs(self, pubs):
        '''Test use a list of pubs that not found in person search'''
        print '-- test fetchByPubs %s pubs', len(pubs)
        new_pubs = []
        for pub in pubs:
            new_pubs.append((pub, 'jie tang'))

        extractor = Extractor()
        extractor.getNodesByPubs(new_pubs)
        print '- test done -'
 def __init__(self):
     self.extractor = Extractor().getInstance()
예제 #5
0
 def __init__(self):
     self.extractor = Extractor().getInstance()
     self.settings = Settings.getInstance()