Ejemplo n.º 1
0
class TestPubMatcher:
    def __init__(self):
        self.matcher = PubMatcher()

    #
    # Test
    #
    def test_matchPub(self):
        self.extractor = Extractor().getInstance()
        pubdao = PublicationDao()
        person_id = 13419
        person_name = 'jie tang'
        # Read sources from files
        all_models = {}
        for page in range(0, 3):
            filename = "".join((person_name, '_page_', str(page), '.html'))
            f = file(os.path.join(self.settings.source_dir, filename), 'r')
            html = f.read()
            models = self.extractor.extract_from_source(html)
            if models is not None:
                self.extractor._Extractor__merge_into_extractedmap(
                    all_models, models)
        print 'Total found DEBUG  %s items.' % len(all_models)

        # part 2
        pubs = pubdao.getPublicationByPerson(person_id,
                                             self.settings.generation)

        printout = False
        if printout:
            for key, models in all_models.items():
                print key, " --> ", models
            print '==================='
            for pub in pubs:
                print pub

        (pubs_matched, pubs_not_matched) = self.matchPub(pubs, all_models)
        print '- test done -', len(pubs_matched), len(pubs_not_matched)
        return pubs_not_matched

    def test_fetchByPubs(self, pubs):
        '''Test use a list of pubs that not found in person search'''
        print '-- test fetchByPubs %s pubs', len(pubs)
        new_pubs = []
        for pub in pubs:
            new_pubs.append((pub, 'jie tang'))

        extractor = Extractor()
        extractor.getNodesByPubs(new_pubs)
        print '- test done -'

    def test_match_with_authors(self):
        data_test = ((
            '… DeSmedt, W Du, W <b>Kent</b>, MA Ketabchi, WA … - …, 1991 - doi.ieeecomputersociety.org',
            'Rafi Ahmed,Philippe De Smedt,Weimin Du,William Kent,Mohammad A. Ketabchi,Witold Litwin,Abbas Rafii,Ming-Chien Shan'
        ), (
            'R Ahmed, P DeSmedt, W Du, W Kent, MA … - …, 1991 - doi.ieeecomputersociety.org',
            'Rafi Ahmed,Philippe De Smedt,Weimin Du,William Kent,Mohammad A. Ketabchi,Witold Litwin,Abbas Rafii,Ming-Chien Shan'
        ), (
            'P Lyngbaek, W Kent - … on the 1986 international workshop on Object …, 1986 - portal.acm.org',
            'Peter Lyngbak,William Kent'
        ), (
            'W Kent - Proceedings of the 8th Bristish National …, 1990 - fog.hpl.external.hp.com',
            'William Kent'
        ), (
            'DE Neiman, DW Hildum, VR Lessef, T  &hellip;',
            'Daniel E. Neiman,David W. Hildum,Victor R. Lesser,Tuomas Sandholm'
        ), (
            'M Esmaili, R Safavi-Naini, J Pieprzyk',
            'Mansour Esmaili,Reihaneh Safavi-Naini,Josef Pieprzyk'
        ), ('DH Fishman, J Annevelink, E Chow, T  &hellip;',
            'Daniel H. Fishman,Jurgen Annevelink,David Beech,E. C. Chow,Tim Connors,J. W. Davis,Waqar Hasan,C. G. Hoch,William Kent,S. Leichner,Peter Lyngbak,Brom Mahbod,Marie-Anne Neimat,Tore Risch,Ming-Chien Shan,W. Kevin Wilkinson'
            ))
        data_debug = ((
            'DH Fishman, J Annevelink, E Chow, T  &hellip;',
            'Daniel H. Fishman,Jurgen Annevelink,David Beech,E. C. Chow,Tim Connors,J. W. Davis,Waqar Hasan,C. G. Hoch,William Kent,S. Leichner,Peter Lyngbak,Brom Mahbod,Marie-Anne Neimat,Tore Risch,Ming-Chien Shan,W. Kevin Wilkinson'
        ), )
        for ga, da in data_debug:
            print "match: %s \n with: %s \n   is: %s" % (ga, da, \
               self.matcher.matchAuthors(ga, da, debug_output=True))
class TestPubMatcher:
	def __init__(self):
		self.matcher = PubMatcher()
		
	#
	# Test
	#
	def test_matchPub(self):
		self.extractor = Extractor().getInstance()
		pubdao = PublicationDao()
		person_id = 13419
		person_name = 'jie tang'
		# Read sources from files
		all_models = {}
		for page in range(0, 3):
			filename = "".join((person_name, '_page_', str(page), '.html'))
			f = file(os.path.join(self.settings.source_dir, filename), 'r')
			html = f.read()
			models = self.extractor.extract_from_source(html)
			if models is not None:
				self.extractor._Extractor__merge_into_extractedmap(all_models, models)
		print 'Total found DEBUG  %s items.' % len(all_models)

		# part 2
		pubs = pubdao.getPublicationByPerson(person_id, self.settings.generation)

		printout = False
		if printout:
			for key, models in all_models.items():
				print key, " --> ", models
			print '==================='
			for pub in pubs:
				print pub

		(pubs_matched, pubs_not_matched) = self.matchPub(pubs, all_models)
		print '- test done -', len(pubs_matched), len(pubs_not_matched)
		return pubs_not_matched

	def test_fetchByPubs(self, pubs):
		'''Test use a list of pubs that not found in person search'''
		print '-- test fetchByPubs %s pubs', len(pubs)
		new_pubs = []
		for pub in pubs:
			new_pubs.append((pub, 'jie tang'))

		extractor = Extractor()
		extractor.getNodesByPubs(new_pubs)
		print '- test done -'

	def test_match_with_authors(self):
		data_test = (
			('… DeSmedt, W Du, W <b>Kent</b>, MA Ketabchi, WA … - …, 1991 - doi.ieeecomputersociety.org',
			 'Rafi Ahmed,Philippe De Smedt,Weimin Du,William Kent,Mohammad A. Ketabchi,Witold Litwin,Abbas Rafii,Ming-Chien Shan'),
			('R Ahmed, P DeSmedt, W Du, W Kent, MA … - …, 1991 - doi.ieeecomputersociety.org',
			 'Rafi Ahmed,Philippe De Smedt,Weimin Du,William Kent,Mohammad A. Ketabchi,Witold Litwin,Abbas Rafii,Ming-Chien Shan'),
			('P Lyngbaek, W Kent - … on the 1986 international workshop on Object …, 1986 - portal.acm.org',
			 'Peter Lyngbak,William Kent'),
			('W Kent - Proceedings of the 8th Bristish National …, 1990 - fog.hpl.external.hp.com',
			 'William Kent'),
			('DE Neiman, DW Hildum, VR Lessef, T  &hellip;',
			 'Daniel E. Neiman,David W. Hildum,Victor R. Lesser,Tuomas Sandholm'),
			('M Esmaili, R Safavi-Naini, J Pieprzyk',
			 'Mansour Esmaili,Reihaneh Safavi-Naini,Josef Pieprzyk'),
			 ('DH Fishman, J Annevelink, E Chow, T  &hellip;',
			 'Daniel H. Fishman,Jurgen Annevelink,David Beech,E. C. Chow,Tim Connors,J. W. Davis,Waqar Hasan,C. G. Hoch,William Kent,S. Leichner,Peter Lyngbak,Brom Mahbod,Marie-Anne Neimat,Tore Risch,Ming-Chien Shan,W. Kevin Wilkinson')
		)
		data_debug = (
			('DH Fishman, J Annevelink, E Chow, T  &hellip;',
			 'Daniel H. Fishman,Jurgen Annevelink,David Beech,E. C. Chow,Tim Connors,J. W. Davis,Waqar Hasan,C. G. Hoch,William Kent,S. Leichner,Peter Lyngbak,Brom Mahbod,Marie-Anne Neimat,Tore Risch,Ming-Chien Shan,W. Kevin Wilkinson'),
		)
		for ga, da in data_debug:
			print "match: %s \n with: %s \n   is: %s" % (ga, da, \
					 self.matcher.matchAuthors(ga, da, debug_output=True))