def setUp(self): httpretty.register_uri( httpretty.GET, 'http://dsc.cdlib.org/search?facet=type-tab&style=cui&raw=1&' 'relation=ark:/13030/tf0c600134', body=open(DIR_FIXTURES + '/testOAC-url_next-0.xml').read()) super(OAC_XML_FetcherTestCase, self).setUp() self.fetcher = fetcher.OAC_XML_Fetcher( 'http://dsc.cdlib.org/search?facet=type-tab&style=cui&raw=1&' 'relation=ark:/13030/tf0c600134', 'extra_data')
def testAmpersandInDoc(self): httpretty.register_uri( httpretty.GET, 'http://dsc.cdlib.org/search?facet=type-tab&style=cui&raw=1&' 'relation=ark:/13030/hb5d5nb7dj', body=open(DIR_FIXTURES + '/testOAC-utf8-content.xml').read()) h = fetcher.OAC_XML_Fetcher( 'http://dsc.cdlib.org/search?facet=type-tab&style=cui&raw=1&' 'relation=ark:/13030/hb5d5nb7dj', 'extra_data') self.assertEqual(h.totalDocs, 25) self.assertEqual(h.currentDoc, 0) h.next()
def testOnlyTextResults(self): '''Test when only texts are in result''' httpretty.register_uri( httpretty.GET, 'http://dsc.cdlib.org/search?facet=type-tab&style=cui&raw=1&' 'relation=ark:/13030/hb5d5nb7dj', body=open(DIR_FIXTURES + '/testOAC-noimages-in-results.xml').read()) h = fetcher.OAC_XML_Fetcher( 'http://dsc.cdlib.org/search?facet=type-tab&style=cui&raw=1&' 'relation=ark:/13030/hb5d5nb7dj', 'extra_data') self.assertEqual(h.totalDocs, 11) recs = self.fetcher.next() self.assertEqual(self.fetcher.groups['text']['end'], 10) self.assertEqual(len(recs), 10)
def testFetchTextOnlyContent(self): httpretty.register_uri( httpretty.GET, 'http://dsc.cdlib.org/search?facet=type-tab&style=cui&raw=1&' 'relation=ark:/13030/hb5d5nb7dj&DocsPerPage=10', body=open(DIR_FIXTURES + '/testOAC-noimages-in-results.xml').read()) httpretty.register_uri( httpretty.GET, 'http://dsc.cdlib.org/search?facet=type-tab&style=cui&raw=1&' 'relation=ark:/13030/hb5d5nb7dj&DocsPerPage=10&startDoc=1&' 'group=text', body=open(DIR_FIXTURES + '/testOAC-noimages-in-results.xml').read()) oac_fetcher = fetcher.OAC_XML_Fetcher( 'http://dsc.cdlib.org/search?facet=type-tab&style=cui&raw=1&' 'relation=ark:/13030/hb5d5nb7dj', 'extra_data', docsPerPage=10) first_set = oac_fetcher.next() self.assertEqual(len(first_set), 10) self.assertEqual( oac_fetcher._url_current, 'http://dsc.cdlib.org/search?facet=type-tab&style=cui&raw=1&' 'relation=ark:/13030/hb5d5nb7dj&docsPerPage=10&startDoc=1&' 'group=text') httpretty.register_uri( httpretty.GET, 'http://dsc.cdlib.org/search?facet=type-tab&style=cui&raw=1&' 'relation=ark:/13030/hb5d5nb7dj&DocsPerPage=10&startDoc=11&' 'group=text', body=open(DIR_FIXTURES + '/testOAC-noimages-in-results-1.xml').read()) second_set = oac_fetcher.next() self.assertEqual(len(second_set), 1) self.assertEqual( oac_fetcher._url_current, 'http://dsc.cdlib.org/search?facet=type-tab&style=cui&raw=1&' 'relation=ark:/13030/hb5d5nb7dj&docsPerPage=10&startDoc=11&' 'group=text') self.assertRaises(StopIteration, oac_fetcher.next)
def testFetchMixedContent(self): '''This interface gets tricky when image & text data are in the collection. My test Mock object will return an xml with 10 images then with 3 images then 10 texts then 1 text then quit ''' httpretty.register_uri( httpretty.GET, 'http://dsc.cdlib.org/search?facet=type-tab&style=cui&raw=1&' 'relation=ark:/13030/hb5d5nb7dj&docsPerPage=10', body=open(DIR_FIXTURES + '/testOAC-url_next-0.xml').read()) httpretty.register_uri( httpretty.GET, 'http://dsc.cdlib.org/search?facet=type-tab&style=cui&raw=1&' 'relation=ark:/13030/hb5d5nb7dj&docsPerPage=10&startDoc=1&' 'group=image', body=open(DIR_FIXTURES + '/testOAC-url_next-0.xml').read()) oac_fetcher = fetcher.OAC_XML_Fetcher( 'http://dsc.cdlib.org/search?facet=type-tab&style=cui&raw=1&' 'relation=ark:/13030/hb5d5nb7dj', 'extra_data', docsPerPage=10) first_set = oac_fetcher.next() self.assertEqual(len(first_set), 10) self.assertEqual( oac_fetcher._url_current, 'http://dsc.cdlib.org/search?facet=type-tab&style=cui&raw=1&' 'relation=ark:/13030/hb5d5nb7dj&docsPerPage=10&startDoc=1&' 'group=image') httpretty.register_uri( httpretty.GET, 'http://dsc.cdlib.org/search?facet=type-tab&style=cui&raw=1&' 'relation=ark:/13030/hb5d5nb7dj&docsPerPage=10&startDoc=11&' 'group=image', body=open(DIR_FIXTURES + '/testOAC-url_next-1.xml').read()) second_set = oac_fetcher.next() self.assertEqual(len(second_set), 3) self.assertEqual( oac_fetcher._url_current, 'http://dsc.cdlib.org/search?facet=type-tab&style=cui&raw=1&' 'relation=ark:/13030/hb5d5nb7dj&docsPerPage=10&startDoc=11&' 'group=image') httpretty.register_uri( httpretty.GET, 'http://dsc.cdlib.org/search?facet=type-tab&style=cui&raw=1&' 'relation=ark:/13030/hb5d5nb7dj&docsPerPage=10&startDoc=1&' 'group=text', body=open(DIR_FIXTURES + '/testOAC-url_next-2.xml').read()) third_set = oac_fetcher.next() self.assertEqual(len(third_set), 10) self.assertEqual( oac_fetcher._url_current, 'http://dsc.cdlib.org/search?facet=type-tab&style=cui&raw=1&' 'relation=ark:/13030/hb5d5nb7dj&docsPerPage=10&startDoc=1&' 'group=text') httpretty.register_uri( httpretty.GET, 'http://dsc.cdlib.org/search?facet=type-tab&style=cui&raw=1&' 'relation=ark:/13030/hb5d5nb7dj&docsPerPage=10&startDoc=11&' 'group=text', body=open(DIR_FIXTURES + '/testOAC-url_next-3.xml').read()) fourth_set = oac_fetcher.next() self.assertEqual(len(fourth_set), 1) self.assertEqual( oac_fetcher._url_current, 'http://dsc.cdlib.org/search?facet=type-tab&style=cui&raw=1&' 'relation=ark:/13030/hb5d5nb7dj&docsPerPage=10&startDoc=11&' 'group=text') self.assertRaises(StopIteration, oac_fetcher.next)