def pre_tree(func_db, module, config=None, cache=None): """ Scan the modules ahead of time and make a map of all the documents-its for future use. Must be called before tree(). This database of functions will then be called by tree() to query and walk the 'froms' """ config = common.always(config) cache = common.always(cache) if module.__name__ in cache: # Been here before, nothing to do return cache[module.__name__] = {'processed': True} for obj_type, obj in inspect.getmembers(module): if inspect.ismodule(obj): if obj.__package__.startswith(module.__package__): pre_tree(func_db, obj, config, cache) elif inspect.isfunction(obj): comments = inspect.getcomments(obj) if comments is not None and len(comments) > 0: for raw_item in comments.strip().split("\n"): item = raw_item.strip() if item.startswith(DOCUMENT_IT): key = module.__name__ + "." + obj_type value = json.loads(item[len(DOCUMENT_IT):]) fix_local_from(module, value) add_to_tree_database(func_db, key, value)
def clear_scroll(scroll_id, config: dict = None): """ This action is called to clear a scroll ID from CMR allowing CMR to free up memory associated with the current search. This call is the same as calling the following CURL command: curl -i -XPOST -H "Content-Type: application/json" \ https://cmr.earthdata.nasa.gov/search/clear-scroll \ -d '{ "scroll_id" : "xxxx"}' This API call must send " and not ' API call returns HTTP status code 204 when successful. Parameters: scroll_id(string/number): CMR Scroll ID config(dictionary) - used to make configurations changes Returns: error dictionary if there was a problem, otherwise a JSON object of response headers """ config = common.always(config) # Build headers headers = _standard_headers_from_config(config) headers = common.conj(headers, {'Content-Type': 'application/json'}) url = _cmr_basic_url('clear-scroll', None, config) data = '{"scroll_id": "' + str(scroll_id) + '"}' logger.info(" - %s: %s", 'POST', url) obj_json = net.post(url, data, headers=headers) if 'errors' in obj_json: errors = obj_json['errors'] for err in errors: logger.warning(" Error while clearing scroll: %s", err) return obj_json
def _cmr_basic_url(base: str, query: dict, config: dict = None): """ Create a url for calling any CMR search end point, should not make any assumption, beyond the search directory. Will auto set the envirnment based on how config is set Parameters: base: CMR endpoint query: dictionary url parameters config: configurations, responds to: * env - sit, uat, ops, prod, production, or blank for production """ expanded = "" if query is not None and len(query) > 0: expanded = "?" + net.expand_query_to_parameters(query) config = common.always(config) env = config.get('env', '').lower().strip() if len(env) > 0 and not env.endswith("."): env += "." if env in ['', 'ops', 'prod', 'production']: env = "" url = ('https://cmr.{}earthdata.nasa.gov/search/{}{}').format( env, base, expanded) return url
def config_to_header(config, source_key, headers, destination_key=None, default=None): """ Copy a value in the config into a header dictionary for use by urllib. Written to reduce boiler plate code config[key] -> [or default] -> [rename] -> headers[key] Parameters: config(dictionary): where to look for values source_key(string): name if configuration in config headers(dictionary): where to copy values to destination_key(string): name of key to save to in headers default(string): value to use if value can not be found in config """ config = common.always(config) if destination_key is None: destination_key = source_key value = config.get(source_key, default) if destination_key is not None and value is not None: if headers is None: headers = {} headers[destination_key] = value return headers
def search_by_page(base, query = None, filters = None, page_state = None, config: dict = None): """ Recursive function to download all the pages of data. Note, this function will only run for 5 minutes and then will refuse to pull more pages returning what was found in that amount of time. Parameters: query (dictionary): CMR parameters and their values filters (list): A list of lambda functions to reduce the number of columns page_state (dictionary): the current page to download config (dictionary): configurations settings responds to: * accept - the format for the return defaults to UMM-JSON * max-time - total processing time allowed for all calls return collected items """ config = common.always(config) if page_state is None: page_state = create_page_state() # must be the first page obj_json = _make_search_request(base, query, page_state, config) if isinstance(obj_json, str): return _error_object(0, "unknown response: " + obj_json) if 'errors' in obj_json: return obj_json resp_stats = {'hits': obj_json['hits'], 'took': obj_json['took']} items = obj_json['items'] if 'http-headers' in obj_json: http_headers = obj_json['http-headers'] if 'CMR-Scroll-Id' in http_headers and page_state['limit']>2000: page_state['CMR-Scroll-Id'] = http_headers['CMR-Scroll-Id'] items = apply_filters(filters, items) if _continue_download(page_state): accumulated_took_time = page_state['took'] + resp_stats['took'] max_allowed_time = config.get('max-time', 300000) if accumulated_took_time > max_allowed_time: # Do not allow searches to go on forever, put an end to this and # return what has been found so far, but leave a log message logger.warning("max search time exceeded") return items[:page_state['limit']] next_page_state = _next_page_state(page_state, resp_stats['took']) recursive_items = search_by_page(base, query=query, filters=filters, page_state=next_page_state, config=config) items = items + recursive_items else: if 'CMR-Scroll-Id' in page_state and page_state['limit']>2000: scroll_ret = clear_scroll(page_state['CMR-Scroll-Id'], config) if 'errors' in scroll_ret: for err in scroll_ret['errors']: logger.warning('Error processing scroll: %s', err) logger.info("Total records downloaded was %d of %d which took %dms.", len(items), resp_stats['hits'], resp_stats['took']) return items[:page_state['limit']]
def token_config(config: dict = None): """ Pull a token from the configuration dictionary Parameters: config: Responds to: "cmr.token.value": value of token, defaults to 'None' """ config = common.always(config) value = config.get('cmr.token.value', None) return value
def _token_file_path(config: dict = None): """ Return the path to the file which stores a CMR token. This path can be different for each environment if specified with the env config. Returns ~/.cmr_token<.env>, no env if production """ config = common.always(config) env_extention = _env_to_extention(config) path_to_use = config.get('cmr.token.file', '~/.cmr_token' + env_extention) return path_to_use
def token_file(config: dict = None): """ Load a token from a local user file assumed to be ~/.cmr_token Parameters: config: Responds to: "cmr.token.file": location of token file, defaults to ~/.cmr_token Returns token from file """ config = common.always(config) path_to_use = config.get('cmr.token.file', '~/.cmr_token') path = os.path.expanduser(path_to_use) clear_text = None raw_clear_text = common.read_file(path) if raw_clear_text is not None: for line in raw_clear_text.splitlines(): if not line.startswith("#"): clear_text = line break # we only need the first one return clear_text
def token_manager(config: dict = None): """ Use a system like the MacOS X Keychain app. Any os which also has the security app would also work. Parameters: config: Responds to the following: 'token.manager.account': account field in Keychain 'token.manager.app': Keychain command - defaults to /usr/bin/security 'token.manager.service' defaults to 'cmr-lib-token' Returns: token from Keychain """ try: config = common.always(config) account = config.get('token.manager.account', 'user') service = config.get('token.manager.service', 'cmr-lib-token') app = config.get('token.manager.app', '/usr/bin/security') result = common.call_security(account, service, app) except subprocess.CalledProcessError: result = None return result
def _env_to_extention(config: dict = None): """ Allow different files to be loaded for each environment, make an env extension which will be appended to the token file path Parameters: config dictionary containing an env value Return: empty string or a dot followed by the environment. """ config = common.always(config) env = config.get('env', '') if env is None: env = '' env = env.lower().strip() if len(env) > 0 and env.endswith("."): env = env[:-1] if env in ['', 'ops', 'prod', 'production']: env = "" # no extension else: env = "." + env return env
def _env_to_edl_url(endpoint, config: dict = None): """ Pull out parameters from the config and build an EDL endpoint URL Parameters: endpoint: part of the URL after 'api/users' such as token, tokens, revoke_token config: responds to 'env' Return: URL """ config = common.always(config) env = config.get('env', '') if env is None: env = '' env = env.lower().strip() if env in ['', 'ops', 'prod', 'production']: env = "" # no extension url = 'https://{}.urs.earthdata.nasa.gov/api/users/{}'.format( env, endpoint) url = url.replace("://.urs", "://urs") return url
def test_always(self): """Test the always function""" self.assertEqual({}, com.always("wrong type"), 'wrong thing') self.assertEqual({}, com.always([]), 'wrong type') self.assertEqual({}, com.always({}), 'same type') self.assertEqual({'a': 'b'}, com.always({'a': 'b'}), 'populated dict, assumed') self.assertEqual({'a': 'b'}, com.always({'a': 'b'}, otype=dict), 'populated dict') self.assertEqual(['a', 'b'], com.always(['a', 'b'], otype=list), 'populated list') self.assertEqual((1, 2, 3), com.always((1, 2, 3), otype=tuple), 'populated tuple') self.assertEqual((1, 2, 3), com.always((1, 2, 3), tuple), 'populated tuple, positional') # None use cases self.assertEqual({}, com.always(None), 'assumed, none, dict') self.assertEqual({}, com.always(None, otype=dict), 'None, dict') self.assertEqual([], com.always(None, otype=list), 'None, list') self.assertEqual((), com.always(None, otype=tuple), 'None, tuple') self.assertEqual((), com.always(None, tuple), 'None, tuple, positional')