def test_isotime_iso(): iso_date = now_as_iso() iso_format = re.compile(r'[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{6}Z') assert isinstance(iso_date, str) assert iso_format.match(iso_date) assert epoch_to_iso(iso_to_epoch(iso_date)) == iso_date assert iso_date == epoch_to_iso(local_to_epoch(epoch_to_local(iso_to_epoch(iso_date))))
def main(): for day in range(31): today = now_as_iso(24 * 60 * 60 * day) query = "__expiry_ts__:[%s TO %s+1DAY]" % (today, today) minutes_params = ( ("rows", "0"), ("facet", "on"), ("facet.date", "__expiry_ts__"), ("facet.date.start", today), ("facet.date.end", today + "+1DAY"), ("facet.date.gap", "+1MINUTE"), ("facet.mincount", "1"), ) res_minutes = datastore.direct_search("emptyresult", query, args=minutes_params) minutes = res_minutes.get("facet_counts", {}).get("facet_dates", {}).get("__expiry_ts__", {}) for minute, minute_count in minutes.iteritems(): if minute in ['end', 'gap', 'start']: continue if minute_count > 0: for x in datastore.stream_search('emptyresult', "__expiry_ts__:[%s TO %s+1MINUTE]" % (minute, minute)): try: created = epoch_to_iso(iso_to_epoch(today) - (15 * 24 * 60 * 60)) riak_key = x['_yz_rk'] path = os.path.join(directory, created[:10]) + '.index' fh = get_filehandle(path) fh.write(riak_key + "\n") fh.flush() except: # pylint: disable=W0702 logger.exception('Unhandled exception:')
def load_journal(name, delete_queue): working_dir = config.core.expiry.journal.directory expiry_ttl = config.core.expiry.journal.ttl * 24 * 60 * 60 log.debug("Expiry will load journal in %s for %s bucket." % (working_dir, name)) while True: try: for listed_file in os.listdir(working_dir): journal_file = os.path.join(working_dir, listed_file) if os.path.isfile(journal_file): if journal_file.endswith(name): cur_time = now() day = "%sT00:00:00Z" % listed_file.split(".")[0] file_time = iso_to_epoch(day) if file_time + expiry_ttl <= cur_time: with open(journal_file) as to_delete_journal: count = 0 for line in to_delete_journal: if count % 1000 == 0: while delete_queue.length( ) > MAX_QUEUE_LENGTH: time.sleep(SLEEP_TIME) line = line.strip() if line: delete_queue.push(line) count += 1 os.unlink(journal_file) except OSError: pass time.sleep(SLEEP_TIME)
def seconds(t, default=0): try: try: return float(t) except ValueError: return iso_to_epoch(t) except: # pylint:disable=W0702 return default
def test_isotime_local(): local_date = now_as_local() local_format = re.compile(r'[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{6}.*') assert isinstance(local_date, str) assert local_format.match(local_date) assert epoch_to_local(local_to_epoch(local_date)) == local_date assert local_date == epoch_to_local(iso_to_epoch(epoch_to_iso(local_to_epoch(local_date))))
def git_clone_repo(source: Dict[str, Any], previous_update=None) -> List: name = source['name'] url = source['uri'] pattern = source.get('pattern', None) key = source.get('private_key', None) ssl_ignore = source.get("ssl_ignore_errors", False) ca_cert = source.get("ca_cert") git_env = {} if ssl_ignore: git_env['GIT_SSL_NO_VERIFY'] = 1 if ca_cert: LOGGER.info(f"A CA certificate has been provided with this source.") add_cacert(ca_cert) git_env['GIT_SSL_CAINFO'] = certifi.where() clone_dir = os.path.join(UPDATE_DIR, name) if os.path.exists(clone_dir): shutil.rmtree(clone_dir) if key: LOGGER.info(f"key found for {url}") # Save the key to a file git_ssh_identity_file = os.path.join(tempfile.gettempdir(), 'id_rsa') with open(git_ssh_identity_file, 'w') as key_fh: key_fh.write(key) os.chmod(git_ssh_identity_file, 0o0400) git_ssh_cmd = f"ssh -oStrictHostKeyChecking=no -i {git_ssh_identity_file}" git_env['GIT_SSH_COMMAND'] = git_ssh_cmd repo = Repo.clone_from(url, clone_dir, env=git_env) # Check repo last commit if previous_update: if isinstance(previous_update, str): previous_update = iso_to_epoch(previous_update) for c in repo.iter_commits(): if c.committed_date < previous_update: return [] break sigma_files = [] for path_in_dir, _, files in os.walk(clone_dir): for filename in files: filepath = os.path.join(clone_dir, path_in_dir, filename) if pattern: if re.match(pattern, filepath): sigma_files.append( (filepath, get_sha256_for_file(filepath))) else: if re.match(R'.*\.yml', filepath): sigma_files.append( (filepath, get_sha256_for_file(filepath))) return sigma_files
def _get_cached_signatures(signature_cache, last_modified, query_hash): try: if signature_cache.getmtime(query_hash) > iso_to_epoch(last_modified): s = signature_cache.get(query_hash) return make_file_response( s, "al_yara_signatures.yar", len(s), content_type="text/yara" ) except: # pylint: disable=W0702 LOGGER.exception('Failed to read cached signatures:') return None
def update_available(**_): # pylint: disable=W0613 """ Check if updated signatures are. Variables: None Arguments: last_update => Epoch time of last update. Data Block: None Result example: { "update_available" : true } # If updated rules are available. """ last_update = iso_to_epoch(request.args.get('last_update')) last_modified = iso_to_epoch(STORAGE.get_signatures_last_modified()) return make_api_response({"update_available": last_modified > last_update})
def _get_version_map(self): self.engine_map = {} engine_list = [] newest_dat = 0 oldest_dat = now() url = self.cfg.get('BASE_URL') + "stat/engines" try: r = self.session.get(url=url, timeout=self.timeout) except requests.exceptions.Timeout: raise Exception("Metadefender service timeout.") engines = r.json() for engine in engines: if self.cfg.get("MD_VERSION") == 4: name = self._format_engine_name(engine["eng_name"]) version = engine['eng_ver'] def_time = engine['def_time'] etype = engine['engine_type'] elif self.cfg.get("MD_VERSION") == 3: name = self._format_engine_name(engine["eng_name"]).replace( "scanengine", "") version = engine['eng_ver'] def_time = engine['def_time'].replace(" AM", "").replace( " PM", "").replace("/", "-").replace(" ", "T") def_time = def_time[6:10] + "-" + def_time[:5] + def_time[ 10:] + "Z" etype = engine['eng_type'] else: raise Exception("Unknown metadefender version") # Compute newest DAT dat_epoch = iso_to_epoch(def_time) if dat_epoch > newest_dat: newest_dat = dat_epoch if dat_epoch < oldest_dat and dat_epoch != 0 and etype in [ "av", "Bundled engine" ]: oldest_dat = dat_epoch self.engine_map[name] = { 'version': version, 'def_time': iso_to_local(def_time)[:19] } engine_list.append(name) engine_list.append(version) engine_list.append(def_time) self.newest_dat = epoch_to_local(newest_dat)[:19] self.oldest_dat = epoch_to_local(oldest_dat)[:19] self.dat_hash = hashlib.md5("".join(engine_list)).hexdigest()
def update_available(**_): """ Check if updated signatures are. Variables: None Arguments: last_update => ISO time of last update. type => Signature type to check Data Block: None Result example: { "update_available" : true } # If updated rules are available. """ sig_type = request.args.get('type', '*') last_update = iso_to_epoch( request.args.get('last_update', '1970-01-01T00:00:00.000000Z')) last_modified = iso_to_epoch(STORAGE.get_signature_last_modified(sig_type)) return make_api_response({"update_available": last_modified > last_update})
def git_clone_repo(download_directory: str, source: Dict[str, Any], cur_logger, previous_update=None) -> List[str] and List[str]: name = source['name'] url = source['uri'] pattern = source.get('pattern', None) key = source.get('private_key', None) clone_dir = os.path.join(download_directory, name) if os.path.exists(clone_dir): shutil.rmtree(clone_dir) os.makedirs(clone_dir) if key: cur_logger.info(f"key found for {url}") # Save the key to a file git_ssh_identity_file = os.path.join(tempfile.gettempdir(), 'id_rsa') if os.path.exists(git_ssh_identity_file): os.unlink(git_ssh_identity_file) with open(git_ssh_identity_file, 'w') as key_fh: key_fh.write(key) os.chmod(git_ssh_identity_file, 0o0400) git_ssh_cmd = f"ssh -oStrictHostKeyChecking=no -i {git_ssh_identity_file}" repo = Repo.clone_from(url, clone_dir, env={"GIT_SSH_COMMAND": git_ssh_cmd}) else: repo = Repo.clone_from(url, clone_dir) # Check repo last commit if previous_update: if isinstance(previous_update, str): previous_update = iso_to_epoch(previous_update) for c in repo.iter_commits(): if c.committed_date < previous_update: cur_logger.info("There are no new commits, skipping repository...") return [] break if pattern: files = [os.path.join(dp, f) for dp, dn, filenames in os.walk(clone_dir) for f in filenames if re.match(pattern, f)] else: files = glob.glob(os.path.join(clone_dir, '*.yar*')) if not files: cur_logger.warning(f"Could not find any yara file matching pattern: {pattern or '*.yar*'}") return files
def git_clone_repo(source: Dict[str, Any], previous_update: int = None, default_pattern: str = "*", logger=None, output_dir: str = None) -> List[Tuple[str, str]]: name = source['name'] url = source['uri'] pattern = source.get('pattern', None) key = source.get('private_key', None) username = source.get('username', None) password = source.get('password', None) ignore_ssl_errors = source.get("ssl_ignore_errors", False) ca_cert = source.get("ca_cert") proxy = source.get('proxy', None) auth = f'{username}:{password}@' if username and password else None git_config = None git_env = {} if ignore_ssl_errors: git_env['GIT_SSL_NO_VERIFY'] = '1' # Let https requests go through proxy if proxy: os.environ['https_proxy'] = proxy if ca_cert: logger.info("A CA certificate has been provided with this source.") add_cacert(ca_cert) git_env['GIT_SSL_CAINFO'] = certifi.where() if auth: logger.info("Credentials provided for auth..") url = re.sub(r'^(?P<scheme>https?://)', fr'\g<scheme>{auth}', url) clone_dir = os.path.join(output_dir, name) if os.path.exists(clone_dir): shutil.rmtree(clone_dir) with tempfile.NamedTemporaryFile() as git_ssh_identity_file: if key: logger.info(f"key found for {url}") # Save the key to a file git_ssh_identity_file.write(key.encode()) git_ssh_identity_file.seek(0) os.chmod(git_ssh_identity_file.name, 0o0400) git_ssh_cmd = f"ssh -oStrictHostKeyChecking=no -i {git_ssh_identity_file.name}" git_env['GIT_SSH_COMMAND'] = git_ssh_cmd repo = Repo.clone_from(url, clone_dir, env=git_env, git_config=git_config) # Check repo last commit if previous_update: if isinstance(previous_update, str): previous_update = iso_to_epoch(previous_update) for c in repo.iter_commits(): if c.committed_date < previous_update and not FORCE_UPDATE: raise SkipSource() break # Clear proxy setting if proxy: del os.environ['https_proxy'] return filter_downloads(clone_dir, pattern, default_pattern)
def _get_version_map(self, node: str) -> None: """ Get the versions of all engines running on a given node :param node: The IP of the MetaDefender node :return: None """ newest_dat = 0 oldest_dat = now() engine_list = [] active_engines = 0 failed_states = ["removed", "temporary failed", "permanently failed"] url = urljoin(node, 'stat/engines') try: self.log.debug(f"_get_version_map: GET {url}") r = self.session.get(url=url, timeout=self.timeout) engines = r.json() for engine in engines: if engine['active'] and engine["state"] not in failed_states: active_engines += 1 if self.config.get("md_version") == 4: name = self._format_engine_name(engine["eng_name"]) version = engine['eng_ver'] def_time = engine['def_time'] etype = engine['engine_type'] elif self.config.get("md_version") == 3: name = self._format_engine_name( engine["eng_name"]).replace("scanengine", "") version = engine['eng_ver'] def_time = engine['def_time'].replace(" AM", "").replace( " PM", "").replace("/", "-").replace(" ", "T") def_time = def_time[6:10] + "-" + def_time[:5] + def_time[ 10:] + "Z" etype = engine['eng_type'] else: raise Exception("Unknown version of MetaDefender") # Compute newest DAT dat_epoch = iso_to_epoch(def_time) if dat_epoch > newest_dat: newest_dat = dat_epoch if dat_epoch < oldest_dat and dat_epoch != 0 and etype in [ "av", "Bundled engine" ]: oldest_dat = dat_epoch self.nodes[node]['engine_map'][name] = { 'version': version, 'def_time': iso_to_local(def_time)[:19] } engine_list.append(name) engine_list.append(version) engine_list.append(def_time) self.nodes[node]['engine_count'] = active_engines self.nodes[node]['newest_dat'] = epoch_to_local(newest_dat)[:19] self.nodes[node]['oldest_dat'] = epoch_to_local(oldest_dat)[:19] self.nodes[node]['engine_list'] = "".join(engine_list) except exceptions.Timeout: raise Exception( f"Node ({node}) timed out after {self.timeout}s while trying to get engine version map" ) except ConnectionError: raise Exception( f"Unable to connect to node ({node}) while trying to get engine version map" )
def url_download(source: Dict[str, Any], previous_update=None) -> List: """ :param source: :param previous_update: :return: """ name = source['name'] uri = source['uri'] pattern = source.get('pattern', None) username = source.get('username', None) password = source.get('password', None) ca_cert = source.get('ca_cert', None) ignore_ssl_errors = source.get('ssl_ignore_errors', False) auth = (username, password) if username and password else None headers = source.get('headers', None) LOGGER.info( f"{name} source is configured to {'ignore SSL errors' if ignore_ssl_errors else 'verify SSL'}." ) if ca_cert: LOGGER.info(f"A CA certificate has been provided with this source.") add_cacert(ca_cert) # Create a requests session session = requests.Session() session.verify = not ignore_ssl_errors try: if isinstance(previous_update, str): previous_update = iso_to_epoch(previous_update) # Check the response header for the last modified date response = session.head(uri, auth=auth, headers=headers) last_modified = response.headers.get('Last-Modified', None) if last_modified: # Convert the last modified time to epoch last_modified = time.mktime( time.strptime(last_modified, "%a, %d %b %Y %H:%M:%S %Z")) # Compare the last modified time with the last updated time if previous_update and last_modified <= previous_update: # File has not been modified since last update, do nothing return [] if previous_update: previous_update = time.strftime("%a, %d %b %Y %H:%M:%S %Z", time.gmtime(previous_update)) if headers: headers['If-Modified-Since'] = previous_update else: headers = {'If-Modified-Since': previous_update} response = session.get(uri, auth=auth, headers=headers) # Check the response code if response.status_code == requests.codes['not_modified']: # File has not been modified since last update, do nothing return [] elif response.ok: if not os.path.exists(UPDATE_DIR): os.makedirs(UPDATE_DIR) file_name = os.path.basename(urlparse(uri).path) file_path = os.path.join(UPDATE_DIR, file_name) with open(file_path, 'wb') as f: f.write(response.content) rules_files = None if file_name.endswith('tar.gz'): extract_dir = os.path.join(UPDATE_DIR, name) shutil.unpack_archive(file_path, extract_dir=extract_dir) rules_files = set() for path_in_dir, _, files in os.walk(extract_dir): for filename in files: filepath = os.path.join(extract_dir, path_in_dir, filename) if pattern: if re.match(pattern, filepath): rules_files.add(filepath) else: rules_files.add(filepath) return [(f, get_sha256_for_file(f)) for f in rules_files or [file_path]] except requests.Timeout: # TODO: should we retry? pass except Exception as e: # Catch all other types of exceptions such as ConnectionError, ProxyError, etc. LOGGER.info(str(e)) exit() # TODO: Should we exit even if one file fails to download? Or should we continue downloading other files? finally: # Close the requests session session.close()
def url_download(download_directory: str, source: Dict[str, Any], cur_logger, previous_update=None) -> Optional[str]: if os.path.exists(download_directory): shutil.rmtree(download_directory) os.makedirs(download_directory) name = source['name'] uri = source['uri'] username = source.get('username', None) password = source.get('password', None) auth = (username, password) if username and password else None headers = source.get('headers', None) # Create a requests session session = requests.Session() try: if isinstance(previous_update, str): previous_update = iso_to_epoch(previous_update) # Check the response header for the last modified date response = session.head(uri, auth=auth, headers=headers) last_modified = response.headers.get('Last-Modified', None) if last_modified: # Convert the last modified time to epoch last_modified = time.mktime(time.strptime(last_modified, "%a, %d %b %Y %H:%M:%S %Z")) # Compare the last modified time with the last updated time if previous_update and last_modified <= previous_update: # File has not been modified since last update, do nothing cur_logger.info("The file has not been modified since last run, skipping...") return if previous_update: previous_update = time.strftime("%a, %d %b %Y %H:%M:%S %Z", time.gmtime(previous_update)) if headers: headers['If-Modified-Since'] = previous_update else: headers = {'If-Modified-Since': previous_update} response = session.get(uri, auth=auth, headers=headers) # Check the response code if response.status_code == requests.codes['not_modified']: # File has not been modified since last update, do nothing cur_logger.info("The file has not been modified since last run, skipping...") return elif response.ok: file_name = os.path.basename(f"{name}.yar") # TODO: make filename as source name with extension .yar file_path = os.path.join(download_directory, file_name) with open(file_path, 'wb') as f: f.write(response.content) # Return file_path return file_path except requests.Timeout: # TODO: should we retry? pass except Exception as e: # Catch all other types of exceptions such as ConnectionError, ProxyError, etc. cur_logger.info(str(e)) exit() # TODO: Should we exit even if one file fails to download? Or should we continue downloading other files? finally: # Close the requests session session.close()
def resubmit_for_dynamic(sha256, *args, **kwargs): """ Resubmit a file for dynamic analysis Variables: sha256 => Resource locator (SHA256) Arguments (Optional): copy_sid => Mimic the attributes of this SID. name => Name of the file for the submission Data Block: None Result example: # Submission message object as a json dictionary """ user = kwargs['user'] quota_error = check_submission_quota(user) if quota_error: return make_api_response("", quota_error, 503) file_info = STORAGE.file.get(sha256, as_obj=False) if not file_info: return make_api_response( {}, f"File {sha256} cannot be found on the server therefore it cannot be resubmitted.", status_code=404) if not Classification.is_accessible(user['classification'], file_info['classification']): return make_api_response( "", "You are not allowed to re-submit a file that you don't have access to", 403) submit_result = None try: copy_sid = request.args.get('copy_sid', None) name = safe_str(request.args.get('name', sha256)) if copy_sid: submission = STORAGE.submission.get(copy_sid, as_obj=False) else: submission = None if submission: if not Classification.is_accessible(user['classification'], submission['classification']): return make_api_response( "", "You are not allowed to re-submit a submission that you don't have access to", 403) submission_params = submission['params'] submission_params['classification'] = submission['classification'] expiry = submission['expiry_ts'] else: submission_params = ui_to_submission_params( load_user_settings(user)) submission_params['classification'] = file_info['classification'] expiry = file_info['expiry_ts'] # Calculate original submit time if submission_params['ttl'] and expiry: submit_time = epoch_to_iso( iso_to_epoch(expiry) - submission_params['ttl'] * 24 * 60 * 60) else: submit_time = None if not FILESTORE.exists(sha256): return make_api_response( {}, "File %s cannot be found on the server therefore it cannot be resubmitted." % sha256, status_code=404) files = [{'name': name, 'sha256': sha256, 'size': file_info['size']}] submission_params['submitter'] = user['uname'] submission_params['quota_item'] = True if 'priority' not in submission_params: submission_params['priority'] = 500 submission_params[ 'description'] = "Resubmit %s for Dynamic Analysis" % name if "Dynamic Analysis" not in submission_params['services']['selected']: submission_params['services']['selected'].append( "Dynamic Analysis") try: submission_obj = Submission({ "files": files, "params": submission_params, "time": submit_time }) except (ValueError, KeyError) as e: return make_api_response("", err=str(e), status_code=400) submit_result = SubmissionClient( datastore=STORAGE, filestore=FILESTORE, config=config, identify=IDENTIFY).submit(submission_obj) submission_received(submission_obj) return make_api_response(submit_result.as_primitives()) except SubmissionException as e: return make_api_response("", err=str(e), status_code=400) finally: if submit_result is None: decrement_submission_quota(user)
def resubmit_submission_for_analysis(sid, *args, **kwargs): """ Resubmit a submission for analysis with the exact same parameters as before Variables: sid => Submission ID to re-submit Arguments: None Data Block: None Result example: # Submission message object as a json dictionary """ user = kwargs['user'] quota_error = check_submission_quota(user) if quota_error: return make_api_response("", quota_error, 503) submit_result = None try: submission = STORAGE.submission.get(sid, as_obj=False) if submission: if not Classification.is_accessible(user['classification'], submission['classification']): return make_api_response( "", "You are not allowed to re-submit a submission that you don't have access to", 403) submission_params = submission['params'] submission_params['classification'] = submission['classification'] else: return make_api_response({}, "Submission %s does not exists." % sid, status_code=404) submission_params['submitter'] = user['uname'] submission_params['quota_item'] = True submission_params[ 'description'] = "Resubmit %s for analysis" % ", ".join( [x['name'] for x in submission["files"]]) # Calculate original submit time if submission_params['ttl'] and submission['expiry_ts']: submit_time = epoch_to_iso( iso_to_epoch(submission['expiry_ts']) - submission_params['ttl'] * 24 * 60 * 60) else: submit_time = None try: submission_obj = Submission({ "files": submission["files"], "metadata": submission['metadata'], "params": submission_params, "time": submit_time }) except (ValueError, KeyError) as e: return make_api_response("", err=str(e), status_code=400) submit_result = SubmissionClient( datastore=STORAGE, filestore=FILESTORE, config=config, identify=IDENTIFY).submit(submission_obj) submission_received(submission_obj) return make_api_response(submit_result.as_primitives()) except SubmissionException as e: return make_api_response("", err=str(e), status_code=400) finally: if submit_result is None: decrement_submission_quota(user)
if now - epoch >= time_diff: user = key.split(" ")[0] client.hdel('c-tracker-quota', key) client.decr('quota-{user}'.format(user=user)) logger.warning( "API request: \"{key}\" was removed from ongoing " "request because it reached the timeout.".format(key=key)) else: logger.debug( "{key} is ok. [{now} - {value} < {time_diff}]".format( key=key, now=now, value=epoch, time_diff=time_diff)) # Submission Quota tracking for key in persist.keys('submissions-*'): data = persist.hgetall(key) for sid, t in data.iteritems(): epoch = isotime.iso_to_epoch(json.loads(t)) now = time.time() if now - epoch >= quota_time_diff: user = key.split('-')[1] logger.warning( 'Quota item "{sid}" for user "{user}" was removed'.format( sid=sid, user=user)) persist.hdel(key, sid) # Web sessions tracking sessions = client.hgetall('flask_sessions') if sessions: for k, v in sessions.iteritems(): v = json.loads(v) now = time.time() expire_at = v.get('expire_at', 0)
def url_update(test_func=test_file) -> None: """ Using an update configuration file as an input, which contains a list of sources, download all the file(s) which have been modified since the last update. """ update_config = {} # Load configuration if UPDATE_CONFIGURATION_PATH and os.path.exists(UPDATE_CONFIGURATION_PATH): with open(UPDATE_CONFIGURATION_PATH, 'r') as yml_fh: update_config = yaml.safe_load(yml_fh) else: LOGGER.warning("Could not find update configuration file.") exit(1) # Cleanup output path if os.path.exists(UPDATE_OUTPUT_PATH): if os.path.isdir(UPDATE_OUTPUT_PATH): shutil.rmtree(UPDATE_OUTPUT_PATH) else: os.unlink(UPDATE_OUTPUT_PATH) os.makedirs(UPDATE_OUTPUT_PATH) # Get sources sources = update_config.get('sources', None) # Exit if no update sources given if not sources: exit() # Parse updater configuration previous_update = update_config.get('previous_update', None) previous_hash = update_config.get('previous_hash', None) or {} if previous_hash: previous_hash = json.loads(previous_hash) if isinstance(previous_update, str): previous_update = iso_to_epoch(previous_update) # Create a requests session session = requests.Session() files_sha256 = {} # Go through each source and download file for source in sources: uri = source['uri'] name = source['name'] if not uri or not name: LOGGER.warning(f"Invalid source: {source}") continue LOGGER.info(f"Downloading file '{name}' from uri '{uri}' ...") username = source.get('username', None) password = source.get('password', None) auth = (username, password) if username and password else None ca_cert = source.get('ca_cert', None) ignore_ssl_errors = source.get('ssl_ignore_errors', False) headers = source.get('headers', None) if ca_cert: # Add certificate to requests cafile = certifi.where() with open(cafile, 'a') as ca_editor: ca_editor.write(f"\n{ca_cert}") session.verify = not ignore_ssl_errors try: # Check the response header for the last modified date response = session.head(uri, auth=auth, headers=headers) last_modified = response.headers.get('Last-Modified', None) if last_modified: # Convert the last modified time to epoch last_modified = time.mktime( time.strptime(last_modified, "%a, %d %b %Y %H:%M:%S %Z")) # Compare the last modified time with the last updated time if update_config.get( 'previous_update', None) and last_modified <= previous_update: # File has not been modified since last update, do nothing LOGGER.info( "File has not changed since last time, Skipping...") continue if update_config.get('previous_update', None): previous_update = time.strftime("%a, %d %b %Y %H:%M:%S %Z", time.gmtime(previous_update)) if headers: headers['If-Modified-Since'] = previous_update else: headers = { 'If-Modified-Since': previous_update, } response = session.get(uri, auth=auth, headers=headers) # Check the response code if response.status_code == requests.codes['not_modified']: # File has not been modified since last update, do nothing LOGGER.info( "File has not changed since last time, Skipping...") continue elif response.ok: file_path = os.path.join(UPDATE_OUTPUT_PATH, name) with open(file_path, 'wb') as f: f.write(response.content) if not test_func(file_path): os.unlink(file_path) LOGGER.warning( f"The downloaded file was invalid. It will not be part of this update..." ) continue # Append the SHA256 of the file to a list of downloaded files sha256 = get_sha256_for_file(file_path) if previous_hash.get(name, None) != sha256: files_sha256[name] = sha256 else: LOGGER.info( "File as the same hash as last time. Skipping...") LOGGER.info("File successfully downloaded!") except requests.Timeout: LOGGER.warning( f"Cannot find the file for source {name} with url {uri} - (Timeout)" ) continue except Exception as e: # Catch all other types of exceptions such as ConnectionError, ProxyError, etc. LOGGER.warning(f"Source {name} failed with error: {str(e)}") if files_sha256: new_hash = deepcopy(previous_hash) new_hash.update(files_sha256) # Check if the new update hash matches the previous update hash if new_hash == previous_hash: # Update file(s) not changed, delete the downloaded files and exit shutil.rmtree(UPDATE_OUTPUT_PATH, ignore_errors=True) exit() # Create the response yaml with open(os.path.join(UPDATE_OUTPUT_PATH, 'response.yaml'), 'w') as yml_fh: yaml.safe_dump(dict(hash=json.dumps(new_hash), ), yml_fh) LOGGER.info("Service update file(s) successfully downloaded") # Close the requests session session.close()
def test_isotime_epoch(): epoch_date = now(200) assert epoch_date == local_to_epoch(epoch_to_local(epoch_date)) assert epoch_date == iso_to_epoch(epoch_to_iso(epoch_date)) assert isinstance(epoch_date, float)
def url_download(source: Dict[str, Any], previous_update: int = None, logger=None, output_dir: str = None) -> List[Tuple[str, str]]: """ :param source: :param previous_update: :return: """ name = source['name'] uri = source['uri'] pattern = source.get('pattern', None) username = source.get('username', None) password = source.get('password', None) ca_cert = source.get('ca_cert', None) ignore_ssl_errors = source.get('ssl_ignore_errors', False) auth = (username, password) if username and password else None proxy = source.get('proxy', None) headers_list = source.get('headers', []) headers = {} [ headers.update({header['name']: header['value']}) for header in headers_list ] logger.info( f"{name} source is configured to {'ignore SSL errors' if ignore_ssl_errors else 'verify SSL'}." ) if ca_cert: logger.info("A CA certificate has been provided with this source.") add_cacert(ca_cert) # Create a requests session session = requests.Session() session.verify = not ignore_ssl_errors # Let https requests go through proxy if proxy: os.environ['https_proxy'] = proxy try: if isinstance(previous_update, str): previous_update = iso_to_epoch(previous_update) # Check the response header for the last modified date response = session.head(uri, auth=auth, headers=headers) last_modified = response.headers.get('Last-Modified', None) if last_modified: # Convert the last modified time to epoch last_modified = time.mktime( time.strptime(last_modified, "%a, %d %b %Y %H:%M:%S %Z")) # Compare the last modified time with the last updated time if previous_update and last_modified <= previous_update and not FORCE_UPDATE: # File has not been modified since last update, do nothing raise SkipSource() if previous_update: previous_update = time.strftime("%a, %d %b %Y %H:%M:%S %Z", time.gmtime(previous_update)) if headers: headers['If-Modified-Since'] = previous_update else: headers = {'If-Modified-Since': previous_update} response = session.get(uri, auth=auth, headers=headers) # Check the response code if response.status_code == requests.codes[ 'not_modified'] and not FORCE_UPDATE: # File has not been modified since last update, do nothing raise SkipSource() elif response.ok: if not os.path.exists(output_dir): os.makedirs(output_dir) file_name = os.path.basename(urlparse(uri).path) file_path = os.path.join(output_dir, file_name) with open(file_path, 'wb') as f: for content in response.iter_content(BLOCK_SIZE): f.write(content) # Clear proxy setting if proxy: del os.environ['https_proxy'] if file_name.endswith('tar.gz') or file_name.endswith('zip'): extract_dir = os.path.join(output_dir, name) shutil.unpack_archive(file_path, extract_dir=extract_dir) return filter_downloads(extract_dir, pattern) else: return [(file_path, get_sha256_for_file(file_path))] else: logger.warning(f"Download not successful: {response.content}") return [] except SkipSource: # Raise to calling function for handling raise except Exception as e: # Catch all other types of exceptions such as ConnectionError, ProxyError, etc. logger.warning(str(e)) exit() finally: # Close the requests session session.close()