Example #1
0
 def testLoggingMoreThan1000(self):
     httpretty.register_uri(
         httpretty.GET,
         "https://registry.cdlib.org/api/v1/collection/198/",
         body=open(DIR_FIXTURES + '/collection_api_big_test.json').read())
     httpretty.register_uri(httpretty.GET,
                            re.compile("http://content.cdlib.org/oai?.*"),
                            body=open(DIR_FIXTURES +
                                      '/testOAI-2400-records.xml').read())
     collection = Collection(
         'https://registry.cdlib.org/api/v1/collection/198/')
     controller = fetcher.HarvestController('*****@*****.**',
                                            collection,
                                            config_file=self.config_file,
                                            profile_path=self.profile_path)
     controller.harvest()
     self.assertEqual(len(self.test_log_handler.records), 13)
     self.assertEqual(self.test_log_handler.formatted_records[1],
                      '[INFO] HarvestController: 100 records harvested')
     shutil.rmtree(controller.dir_save)
     self.assertEqual(self.test_log_handler.formatted_records[10],
                      '[INFO] HarvestController: 1000 records harvested')
     self.assertEqual(self.test_log_handler.formatted_records[11],
                      '[INFO] HarvestController: 2000 records harvested')
     self.assertEqual(self.test_log_handler.formatted_records[12],
                      '[INFO] HarvestController: 2400 records harvested')
Example #2
0
 def testAddRegistryData(self):
     '''Unittest the _add_registry_data function'''
     httpretty.register_uri(
         httpretty.GET,
         "https://registry.cdlib.org/api/v1/collection/197/",
         body=open(DIR_FIXTURES + '/collection_api_test.json').read())
     httpretty.register_uri(httpretty.GET,
                            re.compile("http://content.cdlib.org/oai?.*"),
                            body=open(DIR_FIXTURES +
                                      '/testOAI-128-records.xml').read())
     collection = Collection(
         'https://registry.cdlib.org/api/v1/collection/197/')
     self.tearDown_config()  # remove ones setup in setUp
     self.setUp_config(collection)
     controller = fetcher.HarvestController('*****@*****.**',
                                            collection,
                                            config_file=self.config_file,
                                            profile_path=self.profile_path)
     obj = {'id': 'fakey', 'otherdata': 'test'}
     self.assertNotIn('collection', obj)
     controller._add_registry_data(obj)
     self.assertIn('collection', obj)
     self.assertEqual(obj['collection'][0]['@id'],
                      'https://registry.cdlib.org/api/v1/collection/197/')
     self.assertNotIn('campus', obj)
     self.assertIn('campus', obj['collection'][0])
     self.assertNotIn('repository', obj)
     self.assertIn('repository', obj['collection'][0])
     # need to test one without campus
     self.assertEqual(obj['collection'][0]['campus'][0]['@id'],
                      'https://registry.cdlib.org/api/v1/campus/12/')
     self.assertEqual(obj['collection'][0]['repository'][0]['@id'],
                      'https://registry.cdlib.org/api/v1/repository/37/')
Example #3
0
    def setUp(self):
        super(HarvestControllerTestCase, self).setUp()
        httpretty.register_uri(
            httpretty.GET,
            "https://registry.cdlib.org/api/v1/collection/197/",
            body=open(DIR_FIXTURES + '/collection_api_test.json').read())
        httpretty.register_uri(httpretty.GET,
                               re.compile("http://content.cdlib.org/oai?.*"),
                               body=open(DIR_FIXTURES +
                                         '/testOAI-128-records.xml').read())
        self.collection = Collection(
            'https://registry.cdlib.org/api/v1/collection/197/')
        config_file, profile_path = self.setUp_config(self.collection)
        self.controller_oai = fetcher.HarvestController(
            '*****@*****.**',
            self.collection,
            profile_path=profile_path,
            config_file=config_file)
        self.objset_test_doc = json.load(
            open(DIR_FIXTURES + '/objset_test_doc.json'))

        class myNow(datetime.datetime):
            @classmethod
            def now(cls):
                return cls(2017, 7, 14, 12, 1)

        self.old_dt = datetime.datetime
        datetime.datetime = myNow
Example #4
0
    def testNuxeoHarvest(self, mock_deepharvest, mock_boto, mock_boto3):
        '''Test the function of the Nuxeo harvest'''
        media_json = open(DIR_FIXTURES + '/nuxeo_media_structmap.json').read()
        mock_boto.return_value.get_bucket.return_value.\
            get_key.return_value.\
            get_contents_as_string.return_value = media_json
        httpretty.register_uri(
            httpretty.GET,
            'http://registry.cdlib.org/api/v1/collection/19/',
            body=open(DIR_FIXTURES + '/collection_api_test_nuxeo.json').read())
        mock_deepharvest.return_value.fetch_objects.return_value = json.load(
            open(DIR_FIXTURES + '/nuxeo_object_list.json'))
        httpretty.register_uri(
            httpretty.GET,
            re.compile('https://example.edu/Nuxeo/site/api/v1/id/.*'),
            body=open(DIR_FIXTURES + '/nuxeo_doc.json').read())

        self.collection = Collection(
            'http://registry.cdlib.org/api/v1/collection/19/')
        with patch(
                'ConfigParser.SafeConfigParser',
                autospec=True) as mock_configparser:
            config_inst = mock_configparser.return_value
            config_inst.get.return_value = 'dublincore,ucldc_schema,picture'
            self.setUp_config(self.collection)
            self.controller = fetcher.HarvestController(
                '*****@*****.**',
                self.collection,
                config_file=self.config_file,
                profile_path=self.profile_path)
        self.assertTrue(hasattr(self.controller, 'harvest'))
        num = self.controller.harvest()
        self.assertEqual(num, 5)
        self.tearDown_config()
        # verify one record has collection and such filled in
        fname = os.listdir(self.controller.dir_save)[0]
        saved_objset = json.load(
            open(os.path.join(self.controller.dir_save, fname)))
        saved_obj = saved_objset[0]
        self.assertEqual(saved_obj['collection'][0]['@id'],
                         u'http://registry.cdlib.org/api/v1/collection/19/')
        self.assertEqual(saved_obj['collection'][0]['name'],
                         u'Cochems (Edward W.) Photographs')
        self.assertEqual(saved_obj['collection'][0]['title'],
                         u'Cochems (Edward W.) Photographs')
        self.assertEqual(saved_obj['collection'][0]['id'], u'19')
        self.assertEqual(saved_obj['collection'][0]['dcmi_type'], 'I')
        self.assertEqual(saved_obj['collection'][0]['rights_statement'],
                         'a sample rights statement')
        self.assertEqual(saved_obj['collection'][0]['rights_status'], 'PD')
        self.assertEqual(saved_obj['state'], 'project')
        self.assertEqual(
            saved_obj['title'],
            'Adeline Cochems having her portrait taken by her father '
            'Edward W, Cochems in Santa Ana, California: Photograph')
Example #5
0
 def testMARCHarvest(self, mock_boto3):
     '''Test the function of the MARC harvest'''
     httpretty.register_uri(
         httpretty.GET,
         'http://registry.cdlib.org/api/v1/collection/',
         body=open(DIR_FIXTURES + '/collection_api_test_marc.json').read())
     self.collection = Collection(
         'http://registry.cdlib.org/api/v1/collection/')
     self.collection.url_harvest = 'file:' + DIR_FIXTURES + '/marc-test'
     self.setUp_config(self.collection)
     self.controller = fetcher.HarvestController(
         '*****@*****.**',
         self.collection,
         config_file=self.config_file,
         profile_path=self.profile_path)
     self.assertTrue(hasattr(self.controller, 'harvest'))
     num = self.controller.harvest()
     self.assertEqual(num, 10)
     self.tearDown_config()
Example #6
0
 def testFailsIfNoRecords(self):
     '''Test that the Controller throws an error if no records come back
     from fetcher
     '''
     httpretty.register_uri(
         httpretty.GET,
         "https://registry.cdlib.org/api/v1/collection/101/",
         body=open(DIR_FIXTURES + '/collection_api_test.json').read())
     httpretty.register_uri(httpretty.GET,
                            re.compile("http://content.cdlib.org/oai?.*"),
                            body=open(DIR_FIXTURES +
                                      '/testOAI-no-records.xml').read())
     collection = Collection(
         'https://registry.cdlib.org/api/v1/collection/101/')
     controller = fetcher.HarvestController('*****@*****.**',
                                            collection,
                                            config_file=self.config_file,
                                            profile_path=self.profile_path)
     self.assertRaises(fetcher.NoRecordsFetchedException,
                       controller.harvest)
Example #7
0
 def testOAIHarvest(self):
     '''Test the function of the OAI harvest'''
     httpretty.register_uri(
             httpretty.GET,
             'http://registry.cdlib.org/api/v1/collection/',
             body=open(DIR_FIXTURES+'/collection_api_test.json').read())
     httpretty.register_uri(
             httpretty.GET,
             'http://content.cdlib.org/oai',
             body=open(DIR_FIXTURES+'/testOAC-url_next-0.xml').read())
     self.collection = Collection(
             'http://registry.cdlib.org/api/v1/collection/')
     self.setUp_config(self.collection)
     self.controller = fetcher.HarvestController(
             '*****@*****.**', self.collection,
             config_file=self.config_file, profile_path=self.profile_path)
     self.assertTrue(hasattr(self.controller, 'harvest'))
     # TODO: fix why logbook.TestHandler not working for previous logging
     # self.assertEqual(len(self.test_log_handler.records), 2)
     self.tearDown_config()
Example #8
0
 def setUp(self):
     super(HarvestOAC_JSON_ControllerTestCase, self).setUp()
     # self.testFile = DIR_FIXTURES+'/collection_api_test_oac.json'
     httpretty.register_uri(
         httpretty.GET,
         "https://registry.cdlib.org/api/v1/collection/178/",
         body=open(DIR_FIXTURES + '/collection_api_test_oac.json').read())
     httpretty.register_uri(
         httpretty.GET,
         'http://dsc.cdlib.org/search?facet=type-tab&style=cui&raw=1&'
         'relation=ark:/13030/tf2v19n928',
         body=open(DIR_FIXTURES + '/testOAC.json').read())
     self.collection = Collection(
         'https://registry.cdlib.org/api/v1/collection/178/')
     self.setUp_config(self.collection)
     self.controller = fetcher.HarvestController(
         '*****@*****.**',
         self.collection,
         config_file=self.config_file,
         profile_path=self.profile_path)
Example #9
0
 def testSaveToS3(self, mock_boto3):
     httpretty.register_uri(
         httpretty.GET,
         "https://registry.cdlib.org/api/v1/collection/197/",
         body=open(DIR_FIXTURES + '/collection_api_test.json').read())
     httpretty.register_uri(httpretty.GET,
                            re.compile("http://content.cdlib.org/oai?.*"),
                            body=open(DIR_FIXTURES +
                                      '/testOAI-128-records.xml').read())
     collection = Collection(
         'https://registry.cdlib.org/api/v1/collection/197/')
     controller = fetcher.HarvestController('*****@*****.**',
                                            collection,
                                            config_file=self.config_file,
                                            profile_path=self.profile_path)
     controller.save_objset_s3({"xxxx": "yyyy"})
     mock_boto3.assert_called_with('s3')
     mock_boto3().Bucket.assert_called_with('ucldc-ingest')
     mock_boto3().Bucket().put_object.assert_called_with(
         Body='{"xxxx": "yyyy"}\n',
         Key='data-fetched/197/2017-07-14-1201/page-0.jsonl')
Example #10
0
 def setUp(self):
     super(HarvestSolr_ControllerTestCase, self).setUp()
     # self.testFile = DIR_FIXTURES+'/collection_api_test_oac.json'
     httpretty.register_uri(
         httpretty.GET,
         "https://registry.cdlib.org/api/v1/collection/183/",
         body=open(DIR_FIXTURES +
                   '/collection_api_solr_harvest.json').read())
     httpretty.register_uri(
         httpretty.POST,
         'http://example.edu/solr/blacklight/select',
         body=open(DIR_FIXTURES +
                   '/ucsd-new-feed-missions-bb3038949s-0.xml').read())
     self.collection = Collection(
         'https://registry.cdlib.org/api/v1/collection/183/')
     self.setUp_config(self.collection)
     self.controller = fetcher.HarvestController(
         '*****@*****.**',
         self.collection,
         config_file=self.config_file,
         profile_path=self.profile_path)
     print "DIR SAVE::::: {}".format(self.controller.dir_save)
Example #11
0
 def setUp(self):
     super(HarvestOAC_XML_ControllerTestCase, self).setUp()
     # self.testFile = DIR_FIXTURES+'/collection_api_test_oac.json'
     httpretty.register_uri(
         httpretty.GET,
         "https://registry.cdlib.org/api/v1/collection/178/",
         body=open(DIR_FIXTURES +
                   '/collection_api_test_oac_xml.json').read())
     httpretty.register_uri(
         httpretty.GET,
         'http://dsc.cdlib.org/search?facet=type-tab&style=cui&raw=1&'
         'relation=ark:/13030/tf0c600134',
         body=open(DIR_FIXTURES + '/testOAC-url_next-0.xml').read())
     self.collection = Collection(
         'https://registry.cdlib.org/api/v1/collection/178/')
     self.setUp_config(self.collection)
     self.controller = fetcher.HarvestController(
         '*****@*****.**',
         self.collection,
         config_file=self.config_file,
         profile_path=self.profile_path)
     print "DIR SAVE::::: {}".format(self.controller.dir_save)
Example #12
0
 def testHarvestControllerExists(self):
     httpretty.register_uri(
         httpretty.GET,
         "https://registry.cdlib.org/api/v1/collection/197/",
         body=open(DIR_FIXTURES + '/collection_api_test.json').read())
     httpretty.register_uri(httpretty.GET,
                            re.compile("http://content.cdlib.org/oai?.*"),
                            body=open(DIR_FIXTURES +
                                      '/testOAI-128-records.xml').read())
     collection = Collection(
         'https://registry.cdlib.org/api/v1/collection/197/')
     controller = fetcher.HarvestController('*****@*****.**',
                                            collection,
                                            config_file=self.config_file,
                                            profile_path=self.profile_path)
     self.assertTrue(hasattr(controller, 'fetcher'))
     self.assertIsInstance(controller.fetcher, fetcher.OAIFetcher)
     self.assertTrue(hasattr(controller, 'campus_valid'))
     self.assertTrue(hasattr(controller, 'dc_elements'))
     self.assertTrue(hasattr(controller, 'datetime_start'))
     print(controller.s3path)
     self.assertEqual(controller.s3path,
                      'data-fetched/197/2017-07-14-1201/')
     shutil.rmtree(controller.dir_save)