def _get_xml_tree_given_xml_string(self, xml_string: str, id_url: str) -> ElementTree: """ translate the xml string into an ElementTree object for further use """ xml_tree = ElementTree.fromstring("<empty/>") try: xml_tree = ElementTree.fromstring(xml_string) except ElementTree.ParseError as e: print("Error converting to xml results of this url: " + id_url) capture_exception(e) return xml_tree
def _get_json_given_url(url: str, config: dict) -> dict: """ Return json from URL.""" json_response = {} try: json_response = requests.get(url, auth=(config["bendo-token"], "")).json() except ConnectionRefusedError as e: print('Connection refused on url ' + url) capture_exception(e) except Exception as e: # noqa E722 - intentionally ignore warning about bare except print('Error caught trying to process url ' + url) capture_exception(e) return json_response
def _read_site_to_harvest_control_json(self, site_to_harvest: str) -> dict: site_control_json = {} filename = self.local_folder + site_to_harvest + '.json' try: with open(filename, 'r') as input_source: site_control_json = json.load(input_source) except FileNotFoundError as e: print('Unable to load site_harvest_control_json (' + filename + ').') capture_exception(e) except EnvironmentError as e: capture_exception(e) return site_control_json
def _get_xml_string_given_oai_url(self, oai_url: str) -> str: """ Given the oai url, return xml string, stripped of it's namespace information """ try: xml_string = requests.get(oai_url, timeout=60).text xml_string = self._strip_namespaces(xml_string) except ConnectionError as e: capture_exception(e) xml_string = "" print("ConnectionError calling " + oai_url) except Exception as e: capture_exception(e) xml_string = "" print("Error calling " + oai_url) return xml_string
def _get_json_given_url(self, url: str) -> dict: """ Return json from URL.""" json_response = {} print("calling url =", url, int(time.time() - self.start_time), 'seconds.') try: json_response = dependencies.requests.get( url, headers=self.curate_header).json() except ConnectionRefusedError as e: print('Connection refused on url ' + url) capture_exception(e) except Exception as e: # noqa E722 - intentionally ignore warning about bare except print('Error caught trying to process url ' + url) capture_exception(e) return json_response
def _get_metadata_given_url(self, url: str) -> dict: """ Return json from URL.""" json_response = {} try: json_response = json.loads(dependencies.requests.get(url).text) except ConnectionRefusedError as e: print( 'Connection refused in process_web_kiosk_json_metadata/_get_metadata_given_url on url ', url) capture_exception(e) except Exception as e: # noqa E722 - intentionally ignore warning about bare except print( 'Error caught in process_web_kiosk_json_metadata/_get_metadata_given_url trying to process url ' + url) capture_exception(e) return json_response
def run(event, context): """ run the process to retrieve and process web kiosk metadata """ _suplement_event(event) config = setup_pipeline_config(event) google_config = load_config_ssm(config['google_keys_ssm_base']) config.update(google_config) google_credentials = json.loads(config["museum-google-credentials"]) harvest_metadata_rules_class = HarvestMetadataRules(google_credentials) local_folder = os.path.dirname(os.path.realpath(__file__)) + '/' for site_name in event['sites']: harvest_metadata_rules_class.harvest_google_spreadsheet_info(site_name) s3_sync(config["process-bucket"], "sites", local_folder + "sites") try: copy_tree(local_folder + "sites/", local_folder + "../process_manifest/sites/") except EnvironmentError as e: print('Unable to sync sites files to process_manifest ') capture_exception(e)
def get_bendo_info(bendo_base_url, bendo_item, filename): results = {} url = bendo_base_url + "/item/" + bendo_item + "/" + filename try: response = dependencies.requests.head( url) # , headers=self.curate_header) if response: results = response.headers except dependencies.requests.exceptions.InvalidURL as e: print("invalid url in get_bendo_info: ", url) capture_exception(e) except ConnectionRefusedError as e: print('Connection refused in get_bendo_info on url ', url) capture_exception(e) except Exception as e: # noqa E722 - intentionally ignore warning about bare except print('Error caught in get_bendo_info trying to process url ' + url) capture_exception(e) return results