def _get_xml_tree_given_xml_string(self, xml_string: str, id_url: str) -> ElementTree:
     """ translate the xml string into an ElementTree object for further use """
     xml_tree = ElementTree.fromstring("<empty/>")
     try:
         xml_tree = ElementTree.fromstring(xml_string)
     except ElementTree.ParseError as e:
         print("Error converting to xml results of this url: " + id_url)
         capture_exception(e)
     return xml_tree
def _get_json_given_url(url: str, config: dict) -> dict:
    """ Return json from URL."""
    json_response = {}
    try:
        json_response = requests.get(url,
                                     auth=(config["bendo-token"], "")).json()
    except ConnectionRefusedError as e:
        print('Connection refused on url ' + url)
        capture_exception(e)
    except Exception as e:  # noqa E722 - intentionally ignore warning about bare except
        print('Error caught trying to process url ' + url)
        capture_exception(e)
    return json_response
Example #3
0
 def _read_site_to_harvest_control_json(self, site_to_harvest: str) -> dict:
     site_control_json = {}
     filename = self.local_folder + site_to_harvest + '.json'
     try:
         with open(filename, 'r') as input_source:
             site_control_json = json.load(input_source)
     except FileNotFoundError as e:
         print('Unable to load site_harvest_control_json (' + filename +
               ').')
         capture_exception(e)
     except EnvironmentError as e:
         capture_exception(e)
     return site_control_json
 def _get_xml_string_given_oai_url(self, oai_url: str) -> str:
     """ Given the oai url, return xml string, stripped of it's namespace information """
     try:
         xml_string = requests.get(oai_url, timeout=60).text
         xml_string = self._strip_namespaces(xml_string)
     except ConnectionError as e:
         capture_exception(e)
         xml_string = ""
         print("ConnectionError calling " + oai_url)
     except Exception as e:
         capture_exception(e)
         xml_string = ""
         print("Error calling " + oai_url)
     return xml_string
 def _get_json_given_url(self, url: str) -> dict:
     """ Return json from URL."""
     json_response = {}
     print("calling url =", url, int(time.time() - self.start_time),
           'seconds.')
     try:
         json_response = dependencies.requests.get(
             url, headers=self.curate_header).json()
     except ConnectionRefusedError as e:
         print('Connection refused on url ' + url)
         capture_exception(e)
     except Exception as e:  # noqa E722 - intentionally ignore warning about bare except
         print('Error caught trying to process url ' + url)
         capture_exception(e)
     return json_response
 def _get_metadata_given_url(self, url: str) -> dict:
     """ Return json from URL."""
     json_response = {}
     try:
         json_response = json.loads(dependencies.requests.get(url).text)
     except ConnectionRefusedError as e:
         print(
             'Connection refused in process_web_kiosk_json_metadata/_get_metadata_given_url on url ',
             url)
         capture_exception(e)
     except Exception as e:  # noqa E722 - intentionally ignore warning about bare except
         print(
             'Error caught in process_web_kiosk_json_metadata/_get_metadata_given_url trying to process url '
             + url)
         capture_exception(e)
     return json_response
Example #7
0
def run(event, context):
    """ run the process to retrieve and process web kiosk metadata """
    _suplement_event(event)
    config = setup_pipeline_config(event)
    google_config = load_config_ssm(config['google_keys_ssm_base'])
    config.update(google_config)
    google_credentials = json.loads(config["museum-google-credentials"])
    harvest_metadata_rules_class = HarvestMetadataRules(google_credentials)
    local_folder = os.path.dirname(os.path.realpath(__file__)) + '/'
    for site_name in event['sites']:
        harvest_metadata_rules_class.harvest_google_spreadsheet_info(site_name)
    s3_sync(config["process-bucket"], "sites", local_folder + "sites")
    try:
        copy_tree(local_folder + "sites/",
                  local_folder + "../process_manifest/sites/")
    except EnvironmentError as e:
        print('Unable to sync sites files to process_manifest ')
        capture_exception(e)
Example #8
0
def get_bendo_info(bendo_base_url, bendo_item, filename):
    results = {}
    url = bendo_base_url + "/item/" + bendo_item + "/" + filename
    try:
        response = dependencies.requests.head(
            url)  # , headers=self.curate_header)
        if response:
            results = response.headers
    except dependencies.requests.exceptions.InvalidURL as e:
        print("invalid url in get_bendo_info: ", url)
        capture_exception(e)
    except ConnectionRefusedError as e:
        print('Connection refused in get_bendo_info on url ', url)
        capture_exception(e)
    except Exception as e:  # noqa E722 - intentionally ignore warning about bare except
        print('Error caught in get_bendo_info trying to process url ' + url)
        capture_exception(e)
    return results