def test_retrieve_html(self): print 'Test1: test_retrieve_html()' url = '''allintitle:"Augmenting Branching Temporal Logics with Existential Quantification over Atomic Propositions" OR "Branching-Depth Hierarchies" OR "On the Relative Succinctness of Nondeterministic Buchi and co-Buchi Word Automata"''' url2 = "http://scholar.google.com/scholar?hl=en&num=100&q=%s" % url url2 = URLCleaner.encodeUrlForDownload(url2) url2 = '''http://scholar.google.com/scholar?hl=en&num=100&as_subj=eng&q=%22Finding%20the%20Number%20of%20Factors%20of%20a%20Polynomial%22OR%22Probabilistic%20Models%20of%20Database%20Locking:%20Solutions,%20Computational%20Algorithms,%20and%20Asymptotics%22OR%22The%20AWK%20Programming%20Language%22OR%22Factoring%20Polynomials%20Over%20Algebraic%20Number%20Fields%22''' getter = HtmlRetriever(use_proxy=False) print getter.getHtmlRetry(url2, 1)
class TestCase(): def __init__(self): self.settings = Settings.getInstance() self.parsegoogle = GoogleResultParser() self.htmlRetriever = HtmlRetriever(self.settings.use_proxy) self.checker = checker() def test_parse_google_result(self, title1, title2): '''Test method extract_from_source.''' print '-TEST-:', self.test_parse_google_result.__doc__.strip() url = self.checker.pinQuery(title1, title2); print '> url', '-' * 100 print url html = self.htmlRetriever.getHtmlRetry(url, 3, False); print '> html', '-' * 100 print html[0:100] print '\n' print '> blocks', '-' * 100 models = self.parsegoogle.extract_from_source(html) for model in models: print model print '-END TEST-'
class TestCase(): def __init__(self): self.settings = Settings.getInstance() self.parsegoogle = GoogleResultParser() self.htmlRetriever = HtmlRetriever(self.settings.use_proxy) self.checker = checker() def test_parse_google_result(self, title1, title2): '''Test method extract_from_source.''' print '-TEST-:', self.test_parse_google_result.__doc__.strip() url = self.checker.pinQuery(title1, title2) print '> url', '-' * 100 print url html = self.htmlRetriever.getHtmlRetry(url, 3, False) print '> html', '-' * 100 print html[0:100] print '\n' print '> blocks', '-' * 100 models = self.parsegoogle.extract_from_source(html) for model in models: print model print '-END TEST-'
def test_retrieve_html2(self): url = '''allintitle:"Augmenting Branching Temporal Logics with Existential Quantification over Atomic Propositions" OR "Branching-Depth Hierarchies" OR "On the Relative Succinctness of Nondeterministic Buchi and co-Buchi Word Automata"''' url2 = "http://scholar.google.com/scholar?hl=en&num=100&q=%s" % url url2 = URLCleaner.encodeUrlForDownload(url2) getter = HtmlRetriever(use_proxy=True) print getter.getHtmlRetry(url2, 1)