Beispiel #1
0
 def testIterateOverResults(self):
     '''Test the iteration over a mock set of data'''
     httpretty.register_uri(
         httpretty.POST,
         'http://example.edu/solr/select',
         responses=[
             httpretty.Response(body=open(
                 DIR_FIXTURES +
                 '/ucsd-new-feed-missions-bb3038949s-0.xml').read()),
             httpretty.Response(body=open(
                 DIR_FIXTURES +
                 '/ucsd-new-feed-missions-bb3038949s-1.xml').read()),
             httpretty.Response(body=open(
                 DIR_FIXTURES +
                 '/ucsd-new-feed-missions-bb3038949s-2.xml').read()),
             httpretty.Response(body=open(
                 DIR_FIXTURES +
                 '/ucsd-new-feed-missions-bb3038949s-3.xml').read()),
             httpretty.Response(body=open(
                 DIR_FIXTURES +
                 '/ucsd-new-feed-missions-bb3038949s-4.xml').read())
         ])
     h = fetcher.SolrFetcher('http://example.edu/solr',
                             'extra_data',
                             rows=3)
     self.assertEqual(len(h.resp.results), 3)
     n = 0
     for r in h:
         n += 1
     self.assertEqual(['Mission at Santa Barbara'], r['title_tesim'])
     self.assertEqual(n, 10)
 def testFetch(self):
     httpretty.register_uri(
         httpretty.GET,
         'http://digitalcollections.hoover.org/search/*/objects/xml?filter=approved:true&page=1',
         responses=[
             httpretty.Response(body=open(DIR_FIXTURES +
                                          '/eMuseum-page-1.xml').read()),
             httpretty.Response(body=open(DIR_FIXTURES +
                                          '/eMuseum-page-2.xml').read()),
             httpretty.Response(body=open(DIR_FIXTURES +
                                          '/eMuseum-page-3.xml').read()),
         ])
     url = 'http://digitalcollections.hoover.org'
     h = fetcher.eMuseum_Fetcher(url, None)
     self.assertEqual(h.url_base, url)
     docs = []
     d = h.next()
     docs.extend(d)
     for d in h:
         docs.extend(d)
     self.assertEqual(len(docs), 24)
     test1 = docs[12]
     self.assertIn('title', test1)
     self.assertEqual(
         test1['title']['text'],
         'Money is power.  A war savings certificate in every Canadian home.  Get yours now at post offices or banks.'
     )
     self.assertIn('unknown2', test1)
     self.assertIn('text2', test1['primaryMaker'])
     self.assertNotIn('attrib', test1['unknown1'])
Beispiel #3
0
 def testSolrHarvest(self, mock_boto3):
     '''Test the function of the Solr harvest with <date> objects
     in stream'''
     httpretty.register_uri(
         httpretty.POST,
         'http://example.edu/solr/blacklight/select',
         responses=[
             httpretty.Response(body=open(
                 DIR_FIXTURES +
                 '/ucsd-new-feed-missions-bb3038949s-0.xml').read()),
             httpretty.Response(body=open(
                 DIR_FIXTURES +
                 '/ucsd-new-feed-missions-bb3038949s-1.xml').read()),
             httpretty.Response(body=open(
                 DIR_FIXTURES +
                 '/ucsd-new-feed-missions-bb3038949s-2.xml').read()),
             httpretty.Response(body=open(
                 DIR_FIXTURES +
                 '/ucsd-new-feed-missions-bb3038949s-3.xml').read()),
             httpretty.Response(body=open(
                 DIR_FIXTURES +
                 '/ucsd-new-feed-missions-bb3038949s-4.xml').read())
         ])
     self.assertTrue(hasattr(self.controller, 'harvest'))
     self.controller.harvest()
     print "LOGS:{}".format(self.test_log_handler.formatted_records)
     self.assertEqual(len(self.test_log_handler.records), 2)
     self.assertTrue(
         'UC San Diego' in self.test_log_handler.formatted_records[0])
     self.assertEqual(self.test_log_handler.formatted_records[1],
                      '[INFO] HarvestController: 13 records harvested')
Beispiel #4
0
 def testIterateOverResults(self):
     '''Test the iteration over a mock set of data'''
     httpretty.register_uri(
         httpretty.GET,
         'http://example.edu/solr/query',
         responses=[
             httpretty.Response(body=open(
                 DIR_FIXTURES +
                 '/ucsd-new-feed-missions-bb3038949s-0.json').read()),
             httpretty.Response(body=open(
                 DIR_FIXTURES +
                 '/ucsd-new-feed-missions-bb3038949s-1.json').read()),
             httpretty.Response(body=open(
                 DIR_FIXTURES +
                 '/ucsd-new-feed-missions-bb3038949s-2.json').read()),
             httpretty.Response(body=open(
                 DIR_FIXTURES +
                 '/ucsd-new-feed-missions-bb3038949s-3.json').read()),
         ])
     self.assertRaises(TypeError, fetcher.PySolrFetcher)
     h = fetcher.PySolrQueryFetcher('http://example.edu/solr', 'extra_data',
                                    **{'rows': 3})
     self.assertEqual(
         h._query_path,
         'query?q=extra_data&sort=id+asc&cursorMark=%2A&wt=json&rows=3')
     n = 0
     for r in h:
         n += 1
     self.assertEqual(n, 10)
     self.assertEqual(['Mission Santa Ynez'], r['title_tesim'])
Beispiel #5
0
 def testFetch(self):
     '''Test the httpretty mocked fetching of documents'''
     url = 'https://example.edu/action/search/xml?q=ddu%3A20*&' \
           'asf=ddu&asd=&fd=1&_hd=&hd=on&sf=&_rs=&_ef=&ef=on&sd=&ed=&c=ga'
     httpretty.register_uri(
         httpretty.GET,
         url,
         responses=[
             httpretty.Response(open(DIR_FIXTURES +
                                     '/ucsf-page-1.xml').read(),
                                status=200),
             httpretty.Response(open(DIR_FIXTURES +
                                     '/ucsf-page-1.xml').read(),
                                status=200),
             httpretty.Response(open(DIR_FIXTURES +
                                     '/ucsf-page-2.xml').read(),
                                status=200),
             httpretty.Response(open(DIR_FIXTURES +
                                     '/ucsf-page-3.xml').read(),
                                status=200),
         ])
     h = fetcher.UCSF_XML_Fetcher(url, None, page_size=3)
     docs = []
     for d in h:
         docs.extend(d)
     self.assertEqual(len(docs), 7)
     testy = docs[0]
     self.assertIn('tid', testy)
     self.assertEqual(testy['tid'], "nga13j00")
     self.assertEqual(testy['uri'],
                      'http://legacy.library.ucsf.edu/tid/nga13j00')
     self.assertIn('aup', testy['metadata'])
     self.assertEqual(testy['metadata']['aup'], ['Whent, Peter'])
    def testFetch(self, mock_deepharvest, mock_boto):
        '''Test the httpretty mocked fetching of documents'''
        media_json = open(DIR_FIXTURES + '/nuxeo_media_structmap.json').read()
        deepharvest_mocker(mock_deepharvest)
        mock_boto.return_value.get_bucket.return_value.\
            get_key.return_value.\
            get_contents_as_string.return_value = media_json
        httpretty.register_uri(
            httpretty.GET,
            'https://example.edu/api/v1/path/path-to-asset/here/@children',
            responses=[
                httpretty.Response(body=open(DIR_FIXTURES +
                                             '/nuxeo_folder.json').read(),
                                   status=200),
                httpretty.Response(body=open(DIR_FIXTURES +
                                             '/nuxeo_folder-1.json').read(),
                                   status=200),
            ])
        httpretty.register_uri(httpretty.GET,
                               re.compile('https://example.edu/api/v1/id/.*'),
                               body=open(DIR_FIXTURES +
                                         '/nuxeo_doc.json').read())

        httpretty.register_uri(
            httpretty.GET,
            'https://example.edu/api/v1/path/asset-library/UCI/Cochems'
            '/MS-R016_1092.tif/@children?currentPageIndex=0',
            responses=[
                httpretty.Response(body=open(DIR_FIXTURES +
                                             '/nuxeo_no_children.json').read(),
                                   status=200),
            ])
        h = fetcher.NuxeoFetcher('https://example.edu/api/v1',
                                 'path-to-asset/here')
        mock_deepharvest.assert_called_with(
            'path-to-asset/here',
            '',
            conf_pynux={'api': 'https://example.edu/api/v1'})
        docs = []
        for d in h:
            docs.append(d)
        self.assertEqual(3, len(docs))
        self.assertIn('picture:views', docs[0]['properties'])
        self.assertIn('dc:subjects', docs[0]['properties'])
        self.assertIn('structmap_url', docs[0])
        self.assertIn('structmap_text', docs[0])
        self.assertEqual(
            docs[0]['structmap_text'],
            "Angela Davis socializing with students at UC Irvine "
            "AS-061_A69-013_001.tif AS-061_A69-013_002.tif "
            "AS-061_A69-013_003.tif AS-061_A69-013_004.tif "
            "AS-061_A69-013_005.tif AS-061_A69-013_006.tif "
            "AS-061_A69-013_007.tif")
        self.assertEqual(
            docs[0]['isShownBy'],
            'https://nuxeo.cdlib.org/Nuxeo/nxpicsfile/default/'
            '40677ed1-f7c2-476f-886d-bf79c3fec8c4/Medium:content/')
Beispiel #7
0
 def test_fetching_range(self):
     url = 'https://example.edu'
     user_id = 'testuser'
     page_size = 3
     url_first = fetcher.Flickr_Fetcher.url_get_user_photos_template.format(
         api_key='boguskey', user_id=user_id, per_page=page_size, page=1)
     # Ugly but works
     httpretty.register_uri(
         httpretty.GET,
         url_first,
         responses=[
             httpretty.Response(
                 body=open(DIR_FIXTURES + '/flickr-public-photos-1.xml')
                 .read(),
                 status=200),
             httpretty.Response(
                 body=open(DIR_FIXTURES + '/flickr-public-photos-1.xml')
                 .read(),
                 status=200),
             httpretty.Response(
                 body=open(DIR_FIXTURES + '/flickr-public-photos-1.xml')
                 .read(),
                 status=200),
             httpretty.Response(
                 body=open(DIR_FIXTURES + '/flickr-photo-info-0.xml').read(
                 ),
                 status=200),
             httpretty.Response(
                 body=open(DIR_FIXTURES + '/flickr-photo-info-0.xml').read(
                 ),
                 status=200),
             ]
         )
     h = fetcher.Flickr_Fetcher(url, user_id, page_size=page_size,
             page_range='3,5')
     h.doc_current = 10
     self.assertRaises(ValueError, h.next)
     h.docs_fetched = 4
     h.doc_current = 4
     self.assertRaises(StopIteration, h.next)
     h.docs_fetched = 2
     h.doc_current = 2
     h.page_current = 5
     self.assertRaises(StopIteration, h.next)
     h = fetcher.Flickr_Fetcher(url, user_id, page_size=page_size,
             page_range='3,5')
     total = 0
     all_objs = []
     for objs in h:
         total += len(objs)
         all_objs.extend(objs)
     self.assertEqual(total, 4)
     self.assertEqual(len(all_objs), 4)
 def test_fetching(self):
     url = 'https://example.edu'
     playlist_id = 'testplaylist'
     page_size = 3
     url_first = fetcher.YouTube_Fetcher.url_playlistitems.format(
         api_key='boguskey',
         page_size=page_size,
         playlist_id=playlist_id,
         page_token='')
     url_vids = fetcher.YouTube_Fetcher.url_video
     # Ugly but works
     httpretty.register_uri(
         httpretty.GET,
         url_first,
         responses=[
             httpretty.Response(
                 body=open(DIR_FIXTURES +
                           '/youtube_playlist_with_next.json').read(),
                 status=200),
             httpretty.Response(
                 body=open(DIR_FIXTURES +
                           '/youtube_playlist_no_next.json').read(),
                 status=200),
         ])
     httpretty.register_uri(httpretty.GET,
                            url_vids,
                            body=open(DIR_FIXTURES +
                                      '/youtube_video.json').read(),
                            status=200)
     h = fetcher.YouTube_Fetcher(url, playlist_id, page_size=page_size)
     vids = []
     for v in h:
         vids.extend(v)
     self.assertEqual(len(vids), 6)
     self.assertEqual(
         vids[0], {
             u'contentDetails': {
                 u'definition': u'sd',
                 u'projection': u'rectangular',
                 u'caption': u'false',
                 u'duration': u'PT19M35S',
                 u'licensedContent': True,
                 u'dimension': u'2d'
             },
             u'kind': u'youtube#video',
             u'etag':
             u'"m2yskBQFythfE4irbTIeOgYYfBU/-3AtVAYcRLEynWZprpf0OGaY8zo"',
             u'id': u'0Yx8zrbsUu8'
         })
    def test_get_isShownBy_video(self, mock_deepharvest, mock_boto):
        ''' test getting correct isShownBy value for Nuxeo video object
        '''
        deepharvest_mocker(mock_deepharvest)

        httpretty.register_uri(
            httpretty.GET,
            'https://example.edu/api/v1/path/@search?query=SELECT+%2A+FROM+'
            'Document+WHERE+ecm%3AparentId+%3D+'
            '%274c80e254-6def-4230-9f28-bc48878568d4%27+'
            'AND+ecm%3AcurrentLifeCycleState+%21%3D+%27deleted%27+ORDER+BY+'
            'ecm%3Apos&currentPageIndex=0&pageSize=100',
            responses=[
                httpretty.Response(body=open(DIR_FIXTURES +
                                             '/nuxeo_no_children.json').read(),
                                   status=200),
            ])

        h = fetcher.NuxeoFetcher('https://example.edu/api/v1',
                                 'path-to-asset/here')

        nuxeo_metadata = open(DIR_FIXTURES + '/nuxeo_doc_video.json').read()
        nuxeo_metadata = json.loads(nuxeo_metadata)
        isShownBy = h._get_isShownBy(nuxeo_metadata)

        self.assertEqual(
            isShownBy, 'https://s3.amazonaws.com/static.ucldc.cdlib.org/'
            'ucldc-nuxeo-thumb-media/4c80e254-6def-4230-9f28-bc48878568d4')
    def test_get_isShownBy_pdf(self, mock_deepharvest, mock_boto):
        ''' test getting correct isShownBy value for Nuxeo doc
            with no images and PDF at parent level
        '''
        deepharvest_mocker(mock_deepharvest)

        httpretty.register_uri(
            httpretty.GET,
            'https://example.edu/api/v1/path/@search?query=SELECT+%2A+FROM+'
            'Document+WHERE+ecm%3AparentId+%3D+'
            '%2700d55837-01b6-4211-80d8-b966a15c257e%27+ORDER+BY+'
            'ecm%3Apos&currentPageIndex=0&pageSize=100',
            responses=[
                httpretty.Response(body=open(DIR_FIXTURES +
                                             '/nuxeo_no_children.json').read(),
                                   status=200),
            ])

        h = fetcher.NuxeoFetcher('https://example.edu/api/v1',
                                 'path-to-asset/here')

        nuxeo_metadata = open(DIR_FIXTURES +
                              '/nuxeo_doc_pdf_parent.json').read()
        nuxeo_metadata = json.loads(nuxeo_metadata)
        isShownBy = h._get_isShownBy(nuxeo_metadata)
        self.assertEqual(
            isShownBy, 'https://s3.amazonaws.com/static.ucldc.cdlib.org/'
            'ucldc-nuxeo-thumb-media/00d55837-01b6-4211-80d8-b966a15c257e')
Beispiel #11
0
 def test_single_fetching(self):
     url = 'http://single.edu'
     playlist_id = 'PLwtrWl_IBMJtjP5zMk6dVR-BRjzKqCPOM'
     url_vids = fetcher.YouTube_Fetcher.url_video
     httpretty.register_uri(
         httpretty.GET,
         url_vids,
         responses=[
             httpretty.Response(
                 body=open(DIR_FIXTURES +
                           '/youtube_single_video.json').read(),
                 status=200)
         ])
     h = fetcher.YouTube_Fetcher(url, playlist_id)
     vids = []
     for v in h:
         vids.extend(v)
     self.assertEqual(len(vids), 1)
     self.assertEqual(
         vids[0], {
             u'contentDetails': {
                 u'definition': u'sd',
                 u'projection': u'rectangular',
                 u'caption': u'false',
                 u'duration': u'PT19M35S',
                 u'licensedContent': True,
                 u'dimension': u'2d'
             },
             u'kind': u'youtube#video',
             u'etag':
             u'"m2yskBQFythfE4irbTIeOgYYfBU/-3AtVAYcRLEynWZprpf0OGaY8zo"',
             u'id': u'0Yx8zrbsUu8'
         })
Beispiel #12
0
 def testIterateOverResults(self):
     '''Test the RequestSolrFetcher iteration over a mock set of data'''
     httpretty.register_uri(
         httpretty.GET,
         'http://example.edu/solr',
         responses=[
             httpretty.Response(
                 body=open(DIR_FIXTURES +
                           '/ucb-cursor-results-0.json').read()),
             httpretty.Response(
                 body=open(DIR_FIXTURES +
                           '/ucb-cursor-results-1.json').read()),
             httpretty.Response(
                 body=open(DIR_FIXTURES +
                           '/ucb-cursor-results-2.json').read()),
             httpretty.Response(
                 body=open(DIR_FIXTURES +
                           '/ucb-cursor-results-3.json').read()),
         ])
     h = fetcher.RequestsSolrFetcher(
         'http://example.edu/solr',
         'q=extra:data&header=app-name:Value-with:in-it'
         '&header=app_key:111222333')
     h._page_size = 1
     self.assertEqual(h._query_params['q'], ['extra:data'])
     self.assertEqual(h._headers, {
         'app-name': 'Value-with:in-it',
         'app_key': '111222333'
     })
     cursor = h._nextCursorMark
     docs = []
     docs.append(h.next())  # gets the one from init, no get_next_results
     self.assertEqual(cursor, h._cursorMark)
     docs.append(h.next())  # get_next_results
     self.assertNotEqual(cursor, h._nextCursorMark)
     cursor = h._nextCursorMark
     docs.append(h.next())  # get_next_results
     self.assertEqual(cursor, h._cursorMark)
     cursor = h._nextCursorMark
     docs.append(h.next())  # get_next_results
     self.assertEqual(cursor, h._cursorMark)
     self.assertEqual(len(docs), 4)
 def testFetch_missing_media_json(self, mock_deepharvest, mock_boto):
     '''Test the httpretty mocked fetching of documents'''
     deepharvest_mocker(mock_deepharvest)
     mock_boto.return_value.get_bucket.return_value.\
         get_key.return_value = None
     httpretty.register_uri(
         httpretty.GET,
         'https://example.edu/api/v1/path/path-to-asset/here/@children',
         responses=[
             httpretty.Response(body=open(DIR_FIXTURES +
                                          '/nuxeo_folder.json').read(),
                                status=200),
             httpretty.Response(body=open(DIR_FIXTURES +
                                          '/nuxeo_folder-1.json').read(),
                                status=200),
         ])
     httpretty.register_uri(httpretty.GET,
                            re.compile('https://example.edu/api/v1/id/.*'),
                            body=open(DIR_FIXTURES +
                                      '/nuxeo_doc.json').read())
     httpretty.register_uri(
         httpretty.GET,
         'https://example.edu/api/v1/path/asset-library/UCI/Cochems/'
         'MS-R016_1092.tif/@children?currentPageIndex=0',
         responses=[
             httpretty.Response(body=open(DIR_FIXTURES +
                                          '/nuxeo_no_children.json').read(),
                                status=200),
         ])
     h = fetcher.NuxeoFetcher('https://example.edu/api/v1',
                              'path-to-asset/here')
     mock_deepharvest.assert_called_with(
         'path-to-asset/here',
         '',
         conf_pynux={'api': 'https://example.edu/api/v1'})
     docs = []
     for d in h:
         docs.append(d)
     self.assertEqual(docs[0]['structmap_text'], '')
     self.assertEqual(docs[1]['structmap_text'], '')
     self.assertEqual(docs[2]['structmap_text'], '')
Beispiel #14
0
 def test_getMetadataPrefix(self):
     fmts = open(DIR_FIXTURES+'/oai-fmts.xml').read()
     fmts_qdc = open(DIR_FIXTURES+'/oai-fmts-qdc.xml').read()
     httpretty.register_uri(
             httpretty.GET,
             'http://xxxx.cdlib.org/oai?verb=ListMetadataFormats',
             responses=[
                 httpretty.Response(body=fmts, status=200),
                 httpretty.Response(body=fmts, status=200),
                 httpretty.Response(body=fmts, status=200),
                 httpretty.Response(body=fmts_qdc, status=200),
             ])
     set_fetcher = fetcher.OAIFetcher('http://xxxx.cdlib.org/oai',
                                      'set=bogus')
     self.assertEqual(set_fetcher._metadataPrefix, 'oai_dc')
     prefix = set_fetcher.get_metadataPrefix('')
     self.assertEqual(prefix, 'oai_dc')
     prefix = set_fetcher.get_metadataPrefix('metadataPrefix=override')
     self.assertEqual(prefix, 'override')
     prefix = set_fetcher.get_metadataPrefix('')
     self.assertEqual(prefix, 'oai_qdc')
Beispiel #15
0
    def test_get_isShownBy_component_image(self, mock_deepharvest, mock_boto):
        ''' test getting correct isShownBy value for Nuxeo doc
            with no image at parent level, but an image at the component level
        '''
        deepharvest_mocker(mock_deepharvest)

        httpretty.register_uri(
            httpretty.GET,
            'https://example.edu/api/v1/path/@search?query='
            'SELECT+%2A+FROM+Document+WHERE+ecm%3AparentId+%3D+'
            '%27d400bb29-98d4-429c-a0b8-119acdb92006%27+ORDER+BY+'
            'ecm%3Apos&currentPageIndex=0&pageSize=100',
            responses=[
                httpretty.Response(
                    body=open(DIR_FIXTURES + '/nuxeo_image_components.json')
                    .read(),
                    status=200),
            ])

        httpretty.register_uri(
            httpretty.GET,
            'https://example.edu/api/v1/id/'
            'e8af2d74-0c8b-4d18-b86c-4067b9e16159',
            responses=[
                httpretty.Response(
                    body=open(DIR_FIXTURES +
                              '/nuxeo_first_image_component.json').read(),
                    status=200),
            ])

        h = fetcher.NuxeoFetcher('https://example.edu/api/v1',
                                 'path-to-asset/here')

        nuxeo_metadata = open(DIR_FIXTURES +
                              '/nuxeo_doc_imageless_parent.json').read()
        nuxeo_metadata = json.loads(nuxeo_metadata)
        isShownBy = h._get_isShownBy(nuxeo_metadata)
        self.assertEqual(
            isShownBy, 'https://nuxeo.cdlib.org/Nuxeo/nxpicsfile/default/'
            'e8af2d74-0c8b-4d18-b86c-4067b9e16159/Medium:content/')
Beispiel #16
0
 def testPreservicaFetch(self):
     httpretty.register_uri(
         httpretty.GET,
         'https://us.preservica.com/api/entity/v6.0/structural-objects/eb2416ec-ac1e-4e5e-baee-84e3371c03e9/children',
         responses=[
             httpretty.Response(body=open(DIR_FIXTURES +
                                          '/preservica-page-1.xml').read())
         ])
     httpretty.register_uri(
         httpretty.GET,
         'https://us.preservica.com/api/entity/v6.0/structural-objects/eb2416ec-ac1e-4e5e-baee-84e3371c03e9/children/?start=100&amp;max=100',
         match_querystring=True,
         responses=[
             httpretty.Response(body=open(DIR_FIXTURES +
                                          '/preservica-page-2.xml').read())
         ])
     httpretty.register_uri(
         httpretty.GET,
         'https://us.preservica.com/api/entity/v6.0/information-objects/8c81f065-b6e4-457e-8b76-d18176f74bee',
         responses=[
             httpretty.Response(body=open(DIR_FIXTURES +
                                          '/preservica-child-1.xml').read())
         ])
     httpretty.register_uri(
         httpretty.GET,
         'https://us.preservica.com/api/entity/v6.0/information-objects/8c81f065-b6e4-457e-8b76-d18176f74bee/metadata/37db4583-8e8e-4778-ac90-ad443664c5cb',
         responses=[
             httpretty.Response(body=open(DIR_FIXTURES +
                                          '/preservica-child-2.xml').read())
         ])
     httpretty.register_uri(
         httpretty.GET,
         'https://us.preservica.com/api/entity/v6.0/information-objects/9501e09f-1ae8-4abc-a9ec-6c705ff8fdbe',
         responses=[
             httpretty.Response(body=open(DIR_FIXTURES +
                                          '/preservica-child-3.xml').read())
         ])
     httpretty.register_uri(
         httpretty.GET,
         'https://us.preservica.com/api/entity/v6.0/information-objects/9501e09f-1ae8-4abc-a9ec-6c705ff8fdbe/metadata/ec5c46e5-443e-4b6d-81b9-ec2a5252a50c',
         responses=[
             httpretty.Response(body=open(DIR_FIXTURES +
                                          '/preservica-child-4.xml').read())
         ])
     h = fetcher.PreservicaFetcher(
         'https://oakland.access.preservica.com/v6.0/uncategorized/SO_eb2416ec-ac1e-4e5e-baee-84e3371c03e9/',
         'usr, pwd')
     docs = []
     d = h.next()
     docs.extend(d)
     logger.error(docs[0])
     for d in h:
         docs.extend(d)
     self.assertEqual(len(docs), 17)
Beispiel #17
0
 def test_photoset_fetching(self):
     url = 'https://example.edu'
     user_id = 'testphotoset'
     page_size = 6
     url_first = fetcher.Flickr_Fetcher.url_get_photoset_template.format(
         api_key='boguskey', user_id=user_id, per_page=page_size, page=1)
     # Ugly but works
     httpretty.register_uri(
         httpretty.GET,
         url_first,
         responses=[
             httpretty.Response(
                 body=open(DIR_FIXTURES + '/flickr-photoset-1.xml')
                 .read(),
                 status=200),
             httpretty.Response(
                 body=open(DIR_FIXTURES + '/flickr-photoset-1.xml')
                 .read(),
                 status=200),
             httpretty.Response(
                 body=open(DIR_FIXTURES + '/flickr-photo-info-0.xml').read(
                 ),
                 status=200),
             httpretty.Response(
                 body=open(DIR_FIXTURES + '/flickr-photo-info-0.xml').read(
                 ),
                 status=200),
             httpretty.Response(
                 body=open(DIR_FIXTURES + '/flickr-photo-info-0.xml').read(
                 ),
                 status=200),
             httpretty.Response(
                 body=open(DIR_FIXTURES + '/flickr-photoset-2.xml')
                 .read(),
                 status=200),
             httpretty.Response(
                 body=open(DIR_FIXTURES + '/flickr-photo-info-0.xml').read(
                 ),
                 status=200),
             httpretty.Response(
                 body=open(DIR_FIXTURES + '/flickr-photo-info-0.xml').read(
                 ),
                 status=200),
             httpretty.Response(
                 body=open(DIR_FIXTURES + '/flickr-photo-info-0.xml').read(
                 ),
                 status=200),
         ])
     h = fetcher.Flickr_Fetcher(url, user_id, page_size=page_size)
     h.doc_current = 6
     self.assertRaises(ValueError, h.next)
     h.docs_fetched = 6
     self.assertRaises(StopIteration, h.next)
     h = fetcher.Flickr_Fetcher(url, user_id, page_size=page_size)
     total = 0
     all_objs = []
     for objs in h:
         total += len(objs)
         all_objs.extend(objs)
     self.assertEqual(total, 6)
     self.assertEqual(len(all_objs), 6)
     photo_obj = all_objs[0]
     key_list_values = {
         'description': {
             'text':
             'PictionID:56100666 - Catalog:C87-047-040.tif - '
             'Title:Ryan Aeronautical Negative Collection Image - '
             'Filename:C87-047-040.tif - - Image from the Teledyne Ryan '
             'Archives, donated to SDASM in the 1990s. Many of these '
             'images are from Ryan\'s UAV program-----Please Tag these '
             'images so that the information can be permanently stored '
             'with the digital file.---Repository: <a href='
             '"http://www.sandiegoairandspace.org/library/stillimages.'
             'html" rel="nofollow">San Diego Air and Space Museum </a>'
         },
         'isfavorite': '0',
         'views': '499',
         'farm': '5',
         'people': {
             'haspeople': '0',
             'text': None
         },
         'visibility': {
             'text': None,
             'isfamily': '0',
             'isfriend': '0',
             'ispublic': '1'
         },
         'originalformat': 'jpg',
         'owner': {
             'text': None,
             'nsid': "49487266@N07",
             'username': "******",
             'realname': "SDASM Archives",
             'location': "",
             'iconserver': "4070",
             'iconfarm': "5",
             'path_alias': "sdasmarchives",
         },
         'rotation': '0',
         'id': '34394586825',
         'dates': {
             'text': None,
             'lastupdate': '1493683351',
             'posted': '1493683350',
             'taken': '2017-05-01 17:02:30',
             'takengranularity': '0',
             'takenunknown': '1',
         },
         'originalsecret': 'd46e9b19cc',
         'license': '7',
         'title': {
             'text': 'Ryan Aeronautical Image'
         },
         'media': 'photo',
         'notes': [{
             'x': '10',
             'authorname': 'Bees',
             'text': 'foo',
             'w': '50',
             'author': '12037949754@N01',
             'y': '10',
             'h': '50',
             'id': '313'
         }],
         'tags': [{
             'raw': 'woo yay',
             'text': 'wooyay',
             'id': '1234',
             'author': '12037949754@N01'
         }, {
             'raw': 'hoopla',
             'text': 'hoopla',
             'id': '1235',
             'author': '12037949754@N01'
         }],
         'publiceditability': {
             'text': None,
             'cancomment': '1',
             'canaddmeta': '1'
         },
         'comments': {
             'text': '0'
         },
         'server': '4169',
         'dateuploaded': '1493683350',
         'secret': '375e0b1706',
         'safety_level': '0',
         'urls': [{
             'text':
             'https://www.flickr.com/photos/sdasmarchives/34394586825/',
             'type': 'photopage'
         }],
         'usage': {
             'text': None,
             'canblog': '0',
             'candownload': '1',
             'canprint': '0',
             'canshare': '1'
         },
         'editability': {
             'text': None,
             'cancomment': '0',
             'canaddmeta': '0'
         },
     }
     self.assertEqual(len(photo_obj.keys()), len(key_list_values.keys()))
     for k, v in key_list_values.items():
         self.assertEqual(photo_obj[k], v)
Beispiel #18
0
 def test_fetching(self):
     '''Basic tdd start'''
     url = 'https://example.edu'
     extra_data = 'collection:environmentaldesignarchive AND subject:"edith heath"'
     page_current = 1
     url_first = fetcher.IA_Fetcher.url_advsearch.format(
         page_current=page_current, search_query=extra_data)
     httpretty.register_uri(
         httpretty.GET,
         url_first,
         responses=[
             httpretty.Response(body=open(DIR_FIXTURES +
                                          '/ia-results-1.json').read()),
             httpretty.Response(body=open(DIR_FIXTURES +
                                          '/ia-results-2.json').read()),
             httpretty.Response(body=open(DIR_FIXTURES +
                                          '/ia-results-3.json').read()),
         ])
     h = fetcher.IA_Fetcher(url, extra_data)
     results = []
     for v in h:
         results.extend(v)
     self.assertEqual(h.url_base, url)
     self.assertEqual(
         h.url_advsearch, 'https://archive.org/advancedsearch.php?'
         'q={search_query}&rows=500&page={page_current}&output=json')
     self.assertEqual(len(results), 1285)
     self.assertEqual(
         results[1284], {
             u'week':
             0,
             u'publicdate':
             u'2014-02-28T03:17:59Z',
             u'format': [
                 u'Archive BitTorrent', u'JPEG', u'JPEG Thumb', u'JSON',
                 u'Metadata'
             ],
             u'title':
             u'Upright Cabinet Piano',
             u'downloads':
             68,
             u'indexflag': [u'index', u'nonoindex'],
             u'mediatype':
             u'image',
             u'collection': [
                 u'metropolitanmuseumofart-gallery',
                 u'fav-mar_a_luisa_guevara_tirado', u'fav-drewblanco'
             ],
             u'month':
             1,
             u'btih':
             u'e16555eb5474d2543c7ad27a1cfd145195ce05bf',
             u'item_size':
             353871,
             u'backup_location':
             u'ia905804_31',
             u'year':
             u'1835',
             u'date':
             u'1835-01-01T00:00:00Z',
             u'oai_updatedate': [
                 u'2014-02-28T03:17:59Z', u'2014-02-28T03:17:59Z',
                 u'2016-08-31T20:56:29Z'
             ],
             u'identifier':
             u'mma_upright_cabinet_piano_504395',
             u'subject': [
                 u'North and Central America', u'Wood, various materials',
                 u'Cabinets', u'Case furniture', u'1835', u'Pianos',
                 u'New York City', u'Metropolitan Museum of Art',
                 u'Zithers', u'United States', u'Brooklyn',
                 u'Musical instruments', u'Chordophones', u'New York',
                 u'Furniture'
             ]
         })