def test_create_dir_doesnt_exist_no_raises(self, mock_os, mock_path): mock_path.exists.return_value = False mock_os.makedirs.side_effect = OSError(errno.EEXIST, "exists") create_dir("testpath") self.assertTrue( mock_os.makedirs.called, "Failed to make directories if path not present." )
def update_defs_from_s3(s3_client, bucket, prefix): create_dir(AV_DEFINITION_PATH) to_download = {} for file_prefix in AV_DEFINITION_FILE_PREFIXES: s3_best_time = None for file_suffix in AV_DEFINITION_FILE_SUFFIXES: filename = file_prefix + "." + file_suffix s3_path = os.path.join(AV_DEFINITION_S3_PREFIX, filename) local_path = os.path.join(AV_DEFINITION_PATH, filename) s3_md5 = md5_from_s3_tags(s3_client, bucket, s3_path) s3_time = time_from_s3(s3_client, bucket, s3_path) if s3_best_time is not None and s3_time < s3_best_time: print("Not downloading older file in series: %s" % filename) continue else: s3_best_time = s3_time if os.path.exists(local_path) and md5_from_file( local_path) == s3_md5: print("Not downloading %s because local md5 matches s3." % filename) continue if s3_md5: print("Downloading definition file %s from s3://%s" % (filename, os.path.join(bucket, prefix))) to_download[file_prefix] = { "s3_path": s3_path, "local_path": local_path, } return to_download
def _init(self): """ Configs for the configuration file """ self._modules_dir = os.path.dirname(os.path.realpath(__file__)) self._source_dir = os.path.abspath( os.path.join(self._modules_dir, os.pardir)) self._home_dir = os.path.abspath( os.path.join(self._source_dir, os.pardir)) self._config_file = "{}/{}".format(self._home_dir, USER_CONFIG_FILE) """ Configs for the logging """ self.log_dir = "{}/{}".format(self._home_dir, "logs") create_dir(self.log_dir) LogConfig(self.log_dir, LOG_FILE_NAME, logging.ERROR) """ Configs for the db directory """ self.db_dir = "{}/{}".format(self._home_dir, "db") create_dir(self.db_dir) """ User defined Configs """ self.user_config = get_yaml_content(self._config_file) # overwrite cfg.yml values into env values if they exist. self.user_config = get_user_config_env_values(self.user_config, USER_CONFIGS_LIST) # Defaults self.user_config[SSL_CERTFILE] = get_user_defined_configuration( self.user_config, SSL_CERTFILE, SSL_CERTFILE_DEFAULT) self.user_config[SSL_KEYFILE] = get_user_defined_configuration( self.user_config, SSL_KEYFILE, SSL_KEYFILE_DEFAULT) self.user_config[SSL_PASSWORD] = get_user_defined_configuration( self.user_config, SSL_PASSWORD, NO_VALUE_PROVIDED) self.user_config[SSL_CAFILE] = get_user_defined_configuration( self.user_config, SSL_CAFILE, SSL_CAFILE_DEFAULT) self._user_defind_required_configs(self.user_config) self._user_defind_runtime_safeguard(self.user_config)
def configure(): if False == common.is_cmd_installed("docker"): common.msg("System", "docker is not installed", "warn") return False common.msg("Perform ", "docker config") proxy_dir = '/etc/systemd/system/docker.service.d/' http_proxy_file = 'http-proxy.conf' https_proxy_file = 'https-proxy.conf' proxy = apps['docker'] if True == proxy['use_proxy']: comment = "" else: comment = "#" # Data http_proxy_content = """[Service] {1}Environment="HTTP_PROXY={0}" """.format(proxy['http_proxy_target'], comment) https_proxy_content = """[Service] {1}Environment="HTTPS_PROXY={0}" """.format(proxy['https_proxy_target'], comment) # action common.create_dir(proxy_dir) common.create_file(proxy_dir, http_proxy_file, http_proxy_content) common.create_file(proxy_dir, https_proxy_file, https_proxy_content) call(["service", "docker", "restart"]) call(["systemctl", "daemon-reload"])
def get_local_file(input, output): black_list = ["package.yaml", ".stage", "release", "control"] valid_components = [] for path in input.split("/"): if path == ".": pass elif path == "..": raise RuntimeError("'..' is inhibited in specifying a local path") else: valid_components.append(path) if len(valid_components) > 0 and valid_components[0] in black_list: raise RuntimeError( "'./{}' is reserved for system use and cannot be used to specify a local file" .format(valid_components[0])) if len(valid_components) == 0: url = "*" else: url = os.path.join(*valid_components) file_list = glob.glob(url) if len(file_list) == 0: print_error("Cannot find any file in '{}'".format(url)) create_dir(output) for file in file_list: if file in black_list: continue dest_path = os.path.join(output, file) #print "{} -> {}".format(file, dest_path) if os.path.isdir(file): shutil.copytree(file, dest_path) else: shutil.copy(file, dest_path)
def generate_hive_queries(fields, tablename): #Create dir where we will write the results RESULTS_DIR=HIVE_QUERY_SCRIPTS_PATH+'/'+tablename create_dir(RESULTS_DIR) query_file = open( HIVE_QUERY_SCRIPTS_PATH + '/' + tablename + '.sh', "w") query_file.write("#!/bin/bash \n") file_name=RESULTS_DIR+'/'+tablename+'.txt' to_execute = "hive -e \""+HIVE_SETUP+" select count(*) as all_rows from "+tablename+";\" > "+file_name+'\n'; to_execute+="echo \"----\" >>"+file_name+'\n'; to_execute += "hive -e \""+HIVE_SETUP+" describe "+tablename+";\" > "+file_name+'\n'; query_file.write(to_execute) for field in fields: column_name=field[0] file_name=RESULTS_DIR+'/'+column_name+'.txt' to_execute='' if field[1]=='STRING': to_execute+="hive -e \""+HIVE_SETUP+" select count(distinct "+column_name+") as distinct_col from "+tablename+";\" > "+file_name+'\n'; to_execute+="echo \"----\" >>"+file_name+'\n'; to_execute+="echo \"Least frequent:\">>"+file_name+'\n'; to_execute+="hive -e \""+HIVE_SETUP+" select "+column_name+", count(*) as count from "+tablename+" GROUP BY "+column_name+" ORDER BY count asc LIMIT 10;\" | sed \'s/\\t/|/g\' >> "+file_name+'\n'; to_execute+="echo \"----\" >>"+file_name+'\n'; to_execute+="echo \"Most frequent:\">>"+file_name+'\n'; to_execute+="hive -e \""+HIVE_SETUP+" select "+column_name+", count(*) as count from "+tablename+" GROUP BY "+column_name+" ORDER BY count desc LIMIT 10;\" | sed \'s/\\t/|/g\' >> "+file_name+'\n'; else: to_execute+="hive -e \""+HIVE_SETUP+" select max("+column_name+") as max from "+tablename+";\" > "+file_name+'\n'; to_execute+="echo \"----\" >>"+file_name+'\n'; to_execute+="hive -e \""+HIVE_SETUP+" select min("+column_name+") as min from "+tablename+";\" >> "+file_name+'\n'; to_execute+="echo \"----\" >>"+file_name+'\n'; to_execute+="hive -e \""+HIVE_SETUP+" select percentile_approx("+column_name+", 0.5) as median from "+tablename+";\" >> "+file_name+'\n'; to_execute+="echo \"----\" >>"+file_name+'\n'; to_execute+="hive -e \""+HIVE_SETUP+" select avg("+column_name+") as mean from "+tablename+";\" >> "+file_name+'\n'; to_execute+="echo \"----\" >>"+file_name+'\n'; to_execute+="hive -e \""+HIVE_SETUP+" select stddev_pop("+column_name+") as stdev from "+tablename+";\" >> "+file_name+'\n'; query_file.write(to_execute) print color.GREEN + "Generated hive queries for table "+ tablename + color.END
def update_defs_from_freshclam(path, library_path=""): create_dir(path) fc_env = os.environ.copy() if library_path: fc_env["LD_LIBRARY_PATH"] = "%s:%s" % ( ":".join(current_library_search_path()), CLAMAVLIB_PATH, ) print("Starting freshclam with defs in %s." % path) fc_proc = subprocess.Popen( [ FRESHCLAM_PATH, "--config-file=./bin/freshclam.conf", "-u %s" % pwd.getpwuid(os.getuid())[0], "--datadir=%s" % path, ], stderr=subprocess.STDOUT, stdout=subprocess.PIPE, env=fc_env, ) output = fc_proc.communicate()[0] print("freshclam output:\n%s" % output) if fc_proc.returncode != 0: print("Unexpected exit code from freshclam: %s." % fc_proc.returncode) return fc_proc.returncode
def test_create_dir_doesnt_exist_but_raises(self, mock_os, mock_path): mock_path.exists.return_value = False mock_os.makedirs.side_effect = OSError(errno.ENAMETOOLONG, "nametoolong") with self.assertRaises(OSError): create_dir("testpath") self.assertTrue( mock_os.makedirs.called, "Failed to make directories if path not present." )
def lambda_handler(event, context): s3 = boto3.resource("s3", endpoint_url=S3_ENDPOINT) s3_client = boto3.client("s3", endpoint_url=S3_ENDPOINT) sns_client = boto3.client("sns", endpoint_url=SNS_ENDPOINT) start_clamd(s3, s3_client) # Get some environment variables EVENT_SOURCE = os.getenv("EVENT_SOURCE", "S3") start_time = get_timestamp() logging.debug("Script starting at %s\n" % (start_time)) s3_object = event_object(event, event_source=EVENT_SOURCE) if str_to_bool(AV_PROCESS_ORIGINAL_VERSION_ONLY): verify_s3_object_version(s3, s3_object) # Publish the start time of the scan if AV_SCAN_START_SNS_ARN not in [None, ""]: start_scan_time = get_timestamp() sns_start_scan(sns_client, s3_object, AV_SCAN_START_SNS_ARN, start_scan_time) with tempfile.TemporaryDirectory(prefix=EFS_SCAN_FILE_PATH) as tmpdirname: file_path = get_local_path(s3_object, tmpdirname) create_dir(os.path.dirname(file_path)) s3_object.download_file(file_path) scan_result, scan_signature = clamav.scan_file(file_path) logging.info( "Scan of s3://%s resulted in %s\n" % (os.path.join(s3_object.bucket_name, s3_object.key), scan_result)) result_time = get_timestamp() # Set the properties on the object with the scan results if "AV_UPDATE_METADATA" in os.environ: set_av_metadata(s3_object, scan_result, scan_signature, result_time) set_av_tags(s3_client, s3_object, scan_result, scan_signature, result_time) # Publish the scan results if AV_STATUS_SNS_ARN not in [None, ""]: sns_scan_results( sns_client, s3_object, AV_STATUS_SNS_ARN, scan_result, scan_signature, result_time, ) stop_scan_time = get_timestamp() logging.debug("Script finished at %s\n" % stop_scan_time)
def create_big_hive_query(): print color.UNDERLINE + "Now starting with creating big hive query" + color.END create_dir(os.path.dirname(BIG_HIVE_SCRIPT)) if os.path.exists(BIG_HIVE_SCRIPT): os.remove(BIG_HIVE_SCRIPT) scripts = [ script for script in os.listdir(HIVE_SCRIPT_PATH) if not os.path.isdir(HIVE_SCRIPT_PATH+'/'+script) ] big_script = open(BIG_HIVE_SCRIPT,"w") for script in scripts: print "Now adding: " + script + color.END small_script = open(HIVE_SCRIPT_PATH + '/' + script,"r") big_script.write(small_script.read()+'\n\n') small_script.close() big_script.close() print color.GREEN + "Congratulations. It's seems like all hive scripts have been made." + color.END print "You can find the results in "+ color.UNDERLINE + BIG_HIVE_SCRIPT + color.END
def update_defs_from_s3(bucket, prefix): create_dir(AV_DEFINITION_PATH) for filename in AV_DEFINITION_FILENAMES: s3_path = os.path.join(AV_DEFINITION_S3_PREFIX, filename) local_path = os.path.join(AV_DEFINITION_PATH, filename) s3_md5 = md5_from_s3_tags(bucket, s3_path) if os.path.exists(local_path) and md5_from_file(local_path) == s3_md5: print("Not downloading %s because local md5 matches s3." % filename) continue if s3_md5: print("Downloading definition file %s from s3://%s" % (filename, os.path.join(bucket, prefix))) s3.Bucket(bucket).download_file(s3_path, local_path)
def lambda_handler(event, context): s3 = boto3.resource("s3") sns_client = boto3.client("sns") # Get some environment variables EVENT_SOURCE = os.getenv("EVENT_SOURCE", "S3") start_time = get_timestamp() print("Script starting at %s\n" % (start_time)) s3_object = event_object(event, event_source=EVENT_SOURCE) if str_to_bool(AV_PROCESS_ORIGINAL_VERSION_ONLY): verify_s3_object_version(s3, s3_object) # Publish the start time of the scan if AV_SCAN_START_SNS_ARN not in [None, ""]: start_scan_time = get_timestamp() sns_start_scan(sns_client, s3_object, AV_SCAN_START_SNS_ARN, start_scan_time) file_path = get_local_path(s3_object, "/tmp") create_dir(os.path.dirname(file_path)) try: s3_object.download_file(file_path) except OSError as e: remove_file(file_path) if e.errno == 28: print("Ran out of disk space. Scan failed") publish_results(s3_object, AV_STATUS_FAILED, "File too large to scan") return else: raise download_clamav_databases() scan_result, scan_signature = clamav.scan_file(file_path) print("Scan of s3://%s resulted in %s\n" % (os.path.join(s3_object.bucket_name, s3_object.key), scan_result)) publish_results(s3_object, scan_result, scan_signature) # Delete downloaded file to free up room on re-usable lambda function container remove_file(file_path) if str_to_bool( AV_DELETE_INFECTED_FILES) and scan_result == AV_STATUS_INFECTED: delete_s3_object(s3_object) stop_scan_time = get_timestamp() print("Script finished at %s\n" % stop_scan_time)
def build_part(part): install_dir = part.install_path() source_dir = part.source_path() build_dir = part.build_path() if part.gconfig.clean_build: remove_dir(build_dir) # copy tree: source -> build if not os.path.exists(build_dir): shutil.copytree(source_dir, build_dir) part.set_build_state("CLEAN") else: print_warn("Part '{}' has ever been built,".format(part.name), "in order to perform a clean build, use --clean-build") remove_dir(install_dir) # get module parameters and put them in 'module_params' var_name_prefix = part.build + "-" prefix_len = len(var_name_prefix) module_params = dict( (k[prefix_len:], v) for k, v in part.doc.iteritems() if len(k) > prefix_len and k[:prefix_len] == var_name_prefix) env = { "BUILD_ROOT": build_path(part.gconfig), "INSTALL_ROOT": install_path(part.gconfig), "BUILD_DIR": build_dir, "INSTALL_DIR": install_dir } # do the build prepare os.chdir(build_dir) build_prepare(part, env) # do the make main os.chdir(build_dir) if build_override(part, env) is False: part.build_module.build(part, module_params, env.copy()) part.set_build_state("BUILT") # do the make install os.chdir(build_dir) create_dir(install_dir) if install_override(part, env) is False: part.build_module.install(part, module_params, env.copy()) part.set_build_state("INSTALLED")
def create_big_hive_query(): print color.UNDERLINE + "Now starting with creating big hive query" + color.END create_dir(os.path.dirname(BIG_HIVE_SCRIPT)) if os.path.exists(BIG_HIVE_SCRIPT): os.remove(BIG_HIVE_SCRIPT) scripts = [ script for script in os.listdir(HIVE_SCRIPT_PATH) if not os.path.isdir(HIVE_SCRIPT_PATH + '/' + script) ] big_script = open(BIG_HIVE_SCRIPT, "w") for script in scripts: print "Now adding: " + script + color.END small_script = open(HIVE_SCRIPT_PATH + '/' + script, "r") big_script.write(small_script.read() + '\n\n') small_script.close() big_script.close() print color.GREEN + "Congratulations. It's seems like all hive scripts have been made." + color.END print "You can find the results in " + color.UNDERLINE + BIG_HIVE_SCRIPT + color.END
def capture_screen(self): try: path = create_dir(con_path.pic_path) file_path = os.path.join(path, parser_time.strftime_hms() + '.png') print(file_path) self.driver.save_screenshot(file_path) return file_path except Exception as e: print('截图出错') raise e
def update_defs_from_freshclam(path): create_dir(path) logging.info("Starting freshclam with defs in %s." % path) fc_proc = subprocess.Popen( [ FRESHCLAM_PATH, "--config-file=./bin/freshclam.conf", "-u %s" % pwd.getpwuid(os.getuid())[0], "--verbose", "--datadir=%s" % path, ], stderr=subprocess.STDOUT, stdout=subprocess.PIPE, ) output = fc_proc.communicate()[0] logging.info("freshclam output:\n%s" % output) if fc_proc.returncode != 0: logging.info("Unexpected exit code from freshclam: %s." % fc_proc.returncode) return fc_proc.returncode
def crawl(): segs = get_entry_urls() logger.write_log("start crawling") try: common.create_dir(OUTPUT_DIR) entries = [] for entry in crawl_entries(segs): if (not entry): logger.write_log("error occurred") return entries.append(entry) common.write_json(entries, OUTPUT_DIR, ENTRIES_FILE_NAME) except Exception: logger.write_log("error occurred") logger.write_error(traceback.format_exc()) finally: logger.write_log("end crawling")
def run(nav_file): """ Generates monthly sharpe ratio for each fund using a rolling window of the last 12 months. Uses this data to generate a rank file that specifies which fund to invest in each month. The fund chosen each month is the one with the highest sharpe ratio. """ # create data directory common.create_dir(data_dir) # read nav data nav_data = common.read_from_file(nav_file) # generate monthly sharpe ratio sharpe_data = get_sharpe_data(nav_data) sharpe_data_file = os.path.join(data_dir, sharpe_data_file_name) common.write_to_file(sharpe_data_file, sharpe_data) # generate sharpe ranking sharpe_rank_data = get_sharpe_rank_data(nav_data, sharpe_data) sharpe_rank_data_file = os.path.join(data_dir, sharpe_rank_file_name) common.write_to_file(sharpe_rank_data_file, sharpe_rank_data)
def _init(self): """ Configs for the configuration file """ self._modules_dir = os.path.dirname(os.path.realpath(__file__)) self._source_dir = os.path.abspath( os.path.join(self._modules_dir, os.pardir)) self._home_dir = os.path.abspath( os.path.join(self._source_dir, os.pardir)) self._config_file = "{}/{}".format(self._home_dir, "cfg.json") self._init_config_file(self._config_file) """ Configs for the processing directory """ self.process_dir = "{}/{}".format(self._home_dir, "process-files") self.last_process_file = "{}/{}".format(self.process_dir, "last-process-file") create_dir(self.process_dir) """ Configs for the processing missed directory """ self.missed_dir = "{}/{}".format(self._home_dir, "missed-files") create_dir(self.missed_dir) """ Configs for the logging """ self.log_dir = "{}/{}".format(self._home_dir, "logs") create_dir(self.log_dir) """ Configs for AWS resources """ self.aws_resources_dir = "{}/{}".format(self._home_dir, "aws-resources") self.default_insights_file = "{}/{}".format(self.aws_resources_dir, DEFAULT_INSIGHTS_FILE) self.insights_arns_file = "{}/{}".format(self.aws_resources_dir, INSIGHTS_ARNS_FILE) self.cloudFormation_stack_file = "{}/{}".format( self.aws_resources_dir, CLOUDFORMATION_STACK_TEMPLATE_FILE) """ Configs for the user input """ self.user_config_delete_files = self.user_config["deleteSiemFiles"] self.user_config_send_historical_data = self.user_config[ "firstRunSendHistoricalData"] self.user_config_src_path = self.user_config["siemPath"] self.user_config_severity_list = pupulate_cef_severity_list( self.user_config["severityFilterInclude"]) self.user_config_action_list = pupulate_cef_filter_list( self.user_config["actionFilterInclude"]) self.user_config_product_list = pupulate_cef_filter_list( self.user_config["productFilterInclude"])
import re import time # parameters INFILE = input("Enter input xml strings filename: [default: strings.xml]\n") if not INFILE: INFILE = "strings.xml" OUTFILE = input("Enter output filename base : [default: totranslate.txt]\n") if not OUTFILE: OUTFILE = "totranslate.txt" OUTFILE_TRANSLATED = input("Enter empty translated filename: [default: translated.txt] (useful for translating several languages)\n") if not OUTFILE_TRANSLATED: OUTFILE_TRANSLATED = "translated.txt" create_dir(WORK_DIRECTORY) filename = f = open(WORK_DIRECTORY + "/" + get_filename(OUTFILE), "w") #initialize empty translated file filenameTranslated = get_filename(OUTFILE_TRANSLATED) fTranslated = open(WORK_DIRECTORY + "/" + filenameTranslated, "w").close() listFilesToTranslate.append(filenameTranslated) print("==========================\n\n") # read xml structure tree = ET.parse(INFILE) root = tree.getroot() iElement = 0 for i in range(len(root)):
def download_s3_object(s3_object, local_prefix): local_path = "%s/%s/%s" % (local_prefix, s3_object.bucket_name, s3_object.key) create_dir(os.path.dirname(local_path)) s3_object.download_file(local_path) return local_path
def run(nav_file): # create data directory common.create_dir(data_dir) # read nav data nav_data = common.read_from_file(nav_file) # remove first 12 entries in nav_data # to compare results with benchmark del nav_data[1:13] # retrieve fund names # the first column (date) is skipped fund_names = nav_data[0].split(',')[1:] # initialize cashflows = [] returns_halfyr = common.init_array_dict(fund_names) returns_annual = common.init_array_dict(fund_names) units_dict_halfyr = common.init_dict(fund_names) units_dict_annual = common.init_dict(fund_names) units_dict_overall = common.init_dict(fund_names) # remove header line del nav_data[0] # compute cashflows and returns cnt = len(nav_data) for i in range(0, cnt): row_data = nav_data[i].split(',') dt = datetime.strptime(row_data[0], '%d-%m-%Y') fund_nav = row_data[1:] fund_nav_dict = common.get_fund_nav_dict(fund_names, fund_nav) # half-yearly returns for each fund if i % 6 == 0 and i > 0: wealth = common.get_fund_wealth(fund_nav_dict, units_dict_halfyr) for fund in fund_names: cashflows_halfyr = cashflows[ i - 6:i] # slice last 6 months cashflows cf = (dt, wealth[fund]) cashflows_halfyr.append(cf) ret = common.xirr(cashflows_halfyr) returns_halfyr[fund].append(ret) # clean up for next pass units_dict_halfyr = common.init_dict(fund_names) # annual returns for each fund if i % 12 == 0 and i > 0: wealth = common.get_fund_wealth(fund_nav_dict, units_dict_annual) for fund in fund_names: cashflows_annual = cashflows[ i - 12:i] # slice last 12 months cashflows cf = (dt, wealth[fund]) cashflows_annual.append(cf) ret = common.xirr(cashflows_annual) returns_annual[fund].append(ret) # clean up for next pass units_dict_annual = common.init_dict(fund_names) # no investment on the last date if i == cnt - 1: break # invested units for fund in fund_names: nav = fund_nav_dict[fund] units = mnt_inv / nav units_dict_halfyr[fund] += units units_dict_annual[fund] += units units_dict_overall[fund] += units # cash outflow cf = (dt, -mnt_inv) cashflows.append(cf) file_data = [] header_line = \ 'Fund,Investment,Wealth,Absolute Return,Annualized Return,' + \ 'Half-Yr Return Mean,Half-Yr Return Std Dev,Half-Yr Sharpe,' + \ 'Annual Return Mean,Annual Return Std Dev,Annual Sharpe' file_data.append(header_line) # total investment num_inv = len(cashflows) total_inv = num_inv * mnt_inv # final wealth nav_line = nav_data[cnt - 1].split(',')[1:] fund_nav_dict = common.get_fund_nav_dict(fund_names, nav_line) wealth = common.get_fund_wealth(fund_nav_dict, units_dict_overall) # performance stats for each fund last_date = nav_data[cnt - 1].split(',')[0] dt = datetime.strptime(last_date, '%d-%m-%Y') for fund in sorted(fund_names): fund_cashflows = cashflows[:] cf = (dt, wealth[fund]) fund_cashflows.append(cf) abs_return = ((wealth[fund] / total_inv) - 1) ann_return = common.xirr(fund_cashflows) hfr = returns_halfyr[fund] halfyr_rf_rate = common.get_rf_rate('half-yearly') halfyr_return_mean = numpy.mean(hfr) halfyr_return_std = numpy.std(hfr) halfyr_sharpe = common.get_sharpe_ratio(hfr, halfyr_rf_rate) afr = returns_annual[fund] annual_rf_rate = common.get_rf_rate('annual') annual_return_mean = numpy.mean(afr) annual_return_std = numpy.std(afr) annual_sharpe = common.get_sharpe_ratio(afr, annual_rf_rate) line_data = \ fund + ',' + str(total_inv) + ',' + str(wealth[fund]) + ',' + \ str(abs_return) + ',' + str(ann_return) + ',' + \ str(halfyr_return_mean) + ',' + str(halfyr_return_std) + ',' + \ str(halfyr_sharpe) + ',' + str(annual_return_mean) + ',' + \ str(annual_return_std) + ',' + str(annual_sharpe) file_data.append(line_data) regular_sip_file = os.path.join(data_dir, regular_sip_file_name) common.write_to_file(regular_sip_file, file_data)
# import require packages import os import json import itertools from common import color, create_dir from settings import PATH, HIVE_SCRIPT_PATH, HIVE_DB, HDFS_PATH, BIG_HIVE_SCRIPT, JSON_DATA_PATH print "Now starting with parsing the hive script to a table layout." try: os.stat(os.path.dirname(BIG_HIVE_SCRIPT)) except: print color.RED + "WARNING! HIVE SCRIPT PATH DOES NOT EXIST!" + color.END print "Check if everything went alright during " + color.UNDERLINE + "hivescripts.py" + color.END create_dir(os.path.dirname(JSON_DATA_PATH)) hive_blob = open(BIG_HIVE_SCRIPT,"r").read() # this ugly script creates a dict. key = table_name, values = [colnames] table_blobs = hive_blob.split("DROP TABLE IF EXISTS ") table_info = {} for table in table_blobs[1:]: name = table[0:table.find(';')] print 'I have found a table: ' + color.UNDERLINE + name + color.END table_info[ name ] = [] colnames = table[table.find('(')+1:table.find(')')] for col in colnames.replace('\n','').split(','): table_info[ name ].append( col[:col.find(" ")] ) # this is the end result for d3
from settings import PATH, HIVE_SCRIPT_PATH, HIVE_DB, HDFS_PATH, BIG_HIVE_SCRIPT, dry_run from common import create_dir, color, get_files_in_dir_with_extension import subprocess import sys #Get files with extension in the defined directory DATA_FILES = get_files_in_dir_with_extension(PATH, '.csv') DATA_FILES.extend(get_files_in_dir_with_extension(PATH, '.txt')) #If we did not find CSV or TXT files in the folder, just exit. if not DATA_FILES: print color.RED + "No csv or txt found in directory "+PATH+ color.END sys.exit(0) #Create the Hive script directory if not exists create_dir(HIVE_SCRIPT_PATH) # define a bunch of nice helper functions ''' For each file create a directory, because in Hive we need to put the data into a folder named the table ''' def generate_folder(datafile): directory = os.path.splitext(datafile)[0] create_dir(directory) print "Created the following directory : " + directory return directory ''' Get only the first few lines from each file. This is used to generate the
parser.add_argument('--epsilon', type=float, nargs='+', default=[1e-8], help='Specify the epsilon hyperparameter for Adam.') args = parser.parse_args() trainData, validData, testData, trainTarget, validTarget, testTarget = loadData() X, y = preprocess_data(trainData, trainTarget) N, d = X.shape Xt, yt = preprocess_data(testData, testTarget) Xv, yv = preprocess_data(validData, validTarget) epochs = args.epochs[0] # Output path path = args.path[0] create_dir(path) batch_size = args.batch_size[0] optimizer = args.optimizer[0] loss_type = args.loss[0] if optimizer == 'gd': for alpha, reg in itertools.product(args.alpha, args.reg): params = { 'alpha': alpha, 'reg': reg, 'batchsize': batch_size, } model_file, loss_file, time_file = make_filenames( path,
def generate_folder(datafile): directory = os.path.splitext(datafile)[0] create_dir(directory) print "Created the following directory : " + directory return directory
from saveES import ElasticSearch from stomp_engine import MessSendOrRecv from common import Asset from common import create_dir from common import case_log from common import unzip from common import return_xml_status from common import copy_report_to_local from common import send_report_to_qloud from kafka_engine import kafka_engine from config import * # 创建案例保存路径 create_dir(RESULT_PATH, ZIP_PATH) asset = Asset(host=ASSETS_HOST, port=ASSETS_PORT, user=ASSETS_USER, passwd=ASSETS_PWD) app = Flask(__name__) test_runner_plugin = TestRunnerPlugin() test_runner_plugin.enable() # 测试引擎url @app.route('/case/engine', methods=["POST"]) def case_engine_by_rest():
from settings import PATH, HIVE_SCRIPT_PATH, HIVE_DB, HDFS_PATH, BIG_HIVE_SCRIPT, dry_run from common import create_dir, color, get_files_in_dir_with_extension import subprocess import sys #Get files with extension in the defined directory DATA_FILES = get_files_in_dir_with_extension(PATH, '.csv') DATA_FILES.extend(get_files_in_dir_with_extension(PATH, '.txt')) #If we did not find CSV or TXT files in the folder, just exit. if not DATA_FILES: print color.RED + "No csv or txt found in directory " + PATH + color.END sys.exit(0) #Create the Hive script directory if not exists create_dir(HIVE_SCRIPT_PATH) # define a bunch of nice helper functions ''' For each file create a directory, because in Hive we need to put the data into a folder named the table ''' def generate_folder(datafile): directory = os.path.splitext(datafile)[0] create_dir(directory) print "Created the following directory : " + directory return directory '''
def lambda_handler(event, context): if AV_SCAN_ROLE_ARN: sts_client = boto3.client("sts") sts_response = sts_client.assume_role( RoleArn=AV_SCAN_ROLE_ARN, RoleSessionName="AVScanRoleAssumption" ) session = boto3.session.Session( aws_access_key_id=sts_response["Credentials"]["AccessKeyId"], aws_secret_access_key=sts_response["Credentials"]["SecretAccessKey"], aws_session_token=sts_response["Credentials"]["SessionToken"], ) s3_cross_account = session.resource("s3") s3_cross_account_client = session.client("s3") sns_cross_account_client = session.client("sns") else: s3_cross_account = boto3.resource("s3") s3_cross_account_client = boto3.client("s3") sns_cross_account_client = boto3.client("sns") s3_local_account = boto3.resource("s3") s3_local_account_client = boto3.client("s3") sns_local_account_client = boto3.client("sns") # Get some environment variables ENV = os.getenv("ENV", "") start_time = get_timestamp() print("Script starting at %s\n" % (start_time)) print("Event received: %s" % event) s3_object = event_object(event, s3_resource=s3_cross_account) if str_to_bool(AV_PROCESS_ORIGINAL_VERSION_ONLY): verify_s3_object_version(s3_cross_account, s3_object) if object_does_not_require_scan( s3_cross_account_client, s3_object.bucket_name, s3_object.key ): if AV_STATUS_SNS_ARN not in [None, ""]: sns_skip_scan( sns_local_account_client, s3_object, AV_STATUS_SNS_ARN, get_timestamp() ) print( "Scan of s3://%s was skipped due to the file being safely generated by a VISO process" % os.path.join(s3_object.bucket_name, s3_object.key) ) else: # Publish the start time of the scan if AV_SCAN_START_SNS_ARN not in [None, ""]: start_scan_time = get_timestamp() sns_start_scan( sns_local_account_client, s3_object, AV_SCAN_START_SNS_ARN, start_scan_time, ) file_path = get_local_path(s3_object, "/tmp") create_dir(os.path.dirname(file_path)) s3_object.download_file(file_path) to_download = clamav.update_defs_from_s3( s3_local_account_client, AV_DEFINITION_S3_BUCKET, AV_DEFINITION_S3_PREFIX ) for download in to_download.values(): s3_path = download["s3_path"] local_path = download["local_path"] print("Downloading definition file %s from s3://%s" % (local_path, s3_path)) s3_local_account.Bucket(AV_DEFINITION_S3_BUCKET).download_file( s3_path, local_path ) print("Downloading definition file %s complete!" % (local_path)) scan_result, scan_signature = clamav.scan_file(file_path) print( "Scan of s3://%s resulted in %s\n" % (os.path.join(s3_object.bucket_name, s3_object.key), scan_result) ) result_time = get_timestamp() # Set the properties on the object with the scan results if "AV_UPDATE_METADATA" in os.environ: set_av_metadata(s3_object, scan_result, scan_signature, result_time) set_av_tags( s3_cross_account_client, s3_object, scan_result, scan_signature, result_time ) # Publish the scan results if AV_STATUS_SNS_ARN not in [None, ""]: sns_scan_results( sns_local_account_client, s3_object, AV_STATUS_SNS_ARN, scan_result, scan_signature, result_time, ) # Publish clean scan results cross account if ( scan_result == AV_STATUS_CLEAN and str_to_bool(AV_STATUS_SNS_PUBLISH_CLEAN) and AV_STATUS_CLEAN_SNS_ARN not in [None, ""] ): sns_scan_results( sns_cross_account_client, s3_object, AV_STATUS_CLEAN_SNS_ARN, scan_result, scan_signature, result_time, ) metrics.send( env=ENV, bucket=s3_object.bucket_name, key=s3_object.key, status=scan_result ) # Delete downloaded file to free up room on re-usable lambda function container try: os.remove(file_path) except OSError: pass if str_to_bool(AV_DELETE_INFECTED_FILES) and scan_result == AV_STATUS_INFECTED: sns_delete_results(s3_object, scan_result) delete_s3_object(s3_object) stop_scan_time = get_timestamp() print("Script finished at %s\n" % stop_scan_time)
# import require packages import os import json import itertools from common import color, create_dir from settings import PATH, HIVE_SCRIPT_PATH, HIVE_DB, HDFS_PATH, BIG_HIVE_SCRIPT, JSON_DATA_PATH print "Now starting with parsing the hive script to a table layout." try: os.stat(os.path.dirname(BIG_HIVE_SCRIPT)) except: print color.RED + "WARNING! HIVE SCRIPT PATH DOES NOT EXIST!" + color.END print "Check if everything went alright during " + color.UNDERLINE + "hivescripts.py" + color.END create_dir(os.path.dirname(JSON_DATA_PATH)) hive_blob = open(BIG_HIVE_SCRIPT, "r").read() # this ugly script creates a dict. key = table_name, values = [colnames] table_blobs = hive_blob.split("DROP TABLE IF EXISTS ") table_info = {} for table in table_blobs[1:]: name = table[0:table.find(';')] print 'I have found a table: ' + color.UNDERLINE + name + color.END table_info[name] = [] colnames = table[table.find('(') + 1:table.find(')')] for col in colnames.replace('\n', '').split(','): table_info[name].append(col[:col.find(" ")]) # this is the end result for d3
def lambda_handler(event, context): s3 = boto3.resource("s3") s3_client = boto3.client("s3") sns_client = boto3.client("sns") # Get some environment variables ENV = os.getenv("ENV", "") EVENT_SOURCE = os.getenv("EVENT_SOURCE", "S3") start_time = get_timestamp() print("Script starting at %s\n" % (start_time)) s3_object = event_object(event, event_source=EVENT_SOURCE) if str_to_bool(AV_PROCESS_ORIGINAL_VERSION_ONLY): verify_s3_object_version(s3, s3_object) # Publish the start time of the scan if AV_SCAN_START_SNS_ARN not in [None, ""]: start_scan_time = get_timestamp() sns_start_scan(sns_client, s3_object, AV_SCAN_START_SNS_ARN, start_scan_time) file_path = get_local_path(s3_object, "/tmp") create_dir(os.path.dirname(file_path)) s3_object.download_file(file_path) to_download = clamav.update_defs_from_s3(s3_client, AV_DEFINITION_S3_BUCKET, AV_DEFINITION_S3_PREFIX) for download in to_download.values(): s3_path = download["s3_path"] local_path = download["local_path"] print("Downloading definition file %s from s3://%s/%s" % (local_path, AV_DEFINITION_S3_BUCKET, s3_path)) s3.Bucket(AV_DEFINITION_S3_BUCKET).download_file(s3_path, local_path) print("Downloading definition file %s complete!" % (local_path)) scan_result, scan_signature = clamav.scan_file(file_path) print("Scan of s3://%s resulted in %s\n" % (os.path.join(s3_object.bucket_name, s3_object.key), scan_result)) result_time = get_timestamp() # Set the properties on the object with the scan results if "AV_UPDATE_METADATA" in os.environ: set_av_metadata(s3_object, scan_result, scan_signature, result_time) set_av_tags(s3_client, s3_object, scan_result, scan_signature, result_time) # Publish the scan results if AV_STATUS_SNS_ARN not in [None, ""]: sns_scan_results( sns_client, s3_object, AV_STATUS_SNS_ARN, scan_result, scan_signature, result_time, ) metrics.send(env=ENV, bucket=s3_object.bucket_name, key=s3_object.key, status=scan_result) # Delete downloaded file to free up room on re-usable lambda function container try: os.remove(file_path) except OSError: pass if str_to_bool( AV_DELETE_INFECTED_FILES) and scan_result == AV_STATUS_INFECTED: delete_s3_object(s3_object) stop_scan_time = get_timestamp() print("Script finished at %s\n" % stop_scan_time)
def run(nav_file, rank_file): """ Generates return statistics based on sharpe ratio ranking data. """ # create data directory common.create_dir(data_dir) # read data files nav_data = common.read_from_file(nav_file) rank_data = common.read_from_file(rank_file) # remove redundant entries in nav_data target_date = rank_data[1].split(',')[0] common.trim_data(nav_data, target_date) assert len(nav_data) == len(rank_data) # retrieve fund names # the first column (date) is skipped fund_names = nav_data[0].split(',')[1:] # initialize cashflows_halfyr = [] cashflows_annual = [] cashflows_overall = [] returns_halfyr = [] returns_annual = [] units_dict_halfyr = common.init_dict(fund_names) units_dict_annual = common.init_dict(fund_names) units_dict_overall = common.init_dict(fund_names) # remove header line del nav_data[0] del rank_data[0] # compute cashflows and returns cnt = len(nav_data) for i in range(0, cnt): (date, fund, nav) = rank_data[i].split(',') dt = datetime.strptime(date, '%d-%m-%Y') # half-yearly returns if i % 6 == 0 and i > 0: nav_line = nav_data[i].split(',')[1:] fund_nav_dict = common.get_fund_nav_dict(fund_names, nav_line) wealth = get_wealth(fund_nav_dict, units_dict_halfyr) cf = (dt, wealth) cashflows_halfyr.append(cf) ret = common.xirr(cashflows_halfyr) returns_halfyr.append(ret) # clean up for next pass del cashflows_halfyr[:] units_dict_halfyr[f] = common.init_dict(fund_names) # annual returns if i % 12 == 0 and i > 0: nav_line = nav_data[i].split(',')[1:] nav_dict = common.get_fund_nav_dict(fund_names, nav_line) wealth = get_wealth(nav_dict, units_dict_annual) cf = (dt, wealth) cashflows_annual.append(cf) ret = common.xirr(cashflows_annual) returns_annual.append(ret) # clean up for next pass del cashflows_annual[:] units_dict_annual[f] = common.init_dict(fund_names) # no investment on the last date if i == cnt - 1: break # units invested units = mnt_inv / float(nav) units_dict_halfyr[fund] += units units_dict_annual[fund] += units units_dict_overall[fund] += units # cash outflow cf = (dt, -mnt_inv) cashflows_halfyr.append(cf) cashflows_annual.append(cf) cashflows_overall.append(cf) file_data = [] # investment details file_data.append('Investment Details') file_data.append('Fund,Units') for f in units_dict_overall: if units_dict_overall[f] > 0: line_data = f + ',' + str(units_dict_overall[f]) file_data.append(line_data) file_data.append('\n') # total investment num_inv = len(cashflows_overall) total_inv = num_inv * mnt_inv file_data.append('Investment,' + str(total_inv)) # final wealth nav_line = nav_data[cnt - 1].split(',')[1:] fund_nav_dict = common.get_fund_nav_dict(fund_names, nav_line) wealth = get_wealth(fund_nav_dict, units_dict_overall) file_data.append('Wealth,' + str(wealth)) # absolute return abs_return = ((wealth / total_inv) - 1) file_data.append('Absolute Return,' + str(abs_return)) # annualized return last_date = nav_data[cnt - 1].split(',')[0] dt = datetime.strptime(last_date, '%d-%m-%Y') cf = (dt, wealth) cashflows_overall.append(cf) annual_return = common.xirr(cashflows_overall) file_data.append('Annualized Return,' + str(annual_return)) file_data.append('\n') file_data.append('Stats,Mean,Std Deviation, Sharpe Ratio') # half-yearly return stats halfyr_rf_rate = common.get_rf_rate('half-yearly') halfyr_mean = numpy.mean(returns_halfyr) halfyr_stdev = numpy.std(returns_halfyr) halfyr_sharpe = common.get_sharpe_ratio(returns_halfyr, halfyr_rf_rate) file_data.append('Half-Yearly,' + str(halfyr_mean) + ',' + str(halfyr_stdev) + ',' + str(halfyr_sharpe)) # annual return stats annual_rf_rate = common.get_rf_rate('annual') annual_mean = numpy.mean(returns_annual) annual_stdev = numpy.std(returns_annual) annual_sharpe = common.get_sharpe_ratio(returns_annual, annual_rf_rate) file_data.append('Annual,' + str(annual_mean) + ',' + str(annual_stdev) + ',' + str(annual_sharpe)) # save stats to file benchmark_file = os.path.join(data_dir, benchmark_file_name) common.write_to_file(benchmark_file, file_data)
def init(): global connection common.create_dir() connection = create_connection() setup_db()
def run(nav_file): # create data directory common.create_dir(data_dir) # read nav data nav_data = common.read_from_file(nav_file) # remove first 12 entries in nav_data # to compare results with benchmark del nav_data[1:13] # retrieve fund names # the first column (date) is skipped fund_names = nav_data[0].split(',')[1:] # initialize cashflows = [] returns_halfyr = common.init_array_dict(fund_names) returns_annual = common.init_array_dict(fund_names) units_dict_halfyr = common.init_dict(fund_names) units_dict_annual = common.init_dict(fund_names) units_dict_overall = common.init_dict(fund_names) # remove header line del nav_data[0] # compute cashflows and returns cnt = len(nav_data) for i in range(0, cnt): row_data = nav_data[i].split(',') dt = datetime.strptime(row_data[0], '%d-%m-%Y') fund_nav = row_data[1:] fund_nav_dict = common.get_fund_nav_dict(fund_names, fund_nav) # half-yearly returns for each fund if i % 6 == 0 and i > 0: wealth = common.get_fund_wealth(fund_nav_dict, units_dict_halfyr) for fund in fund_names: cashflows_halfyr = cashflows[i-6:i] # slice last 6 months cashflows cf = (dt, wealth[fund]) cashflows_halfyr.append(cf) ret = common.xirr(cashflows_halfyr) returns_halfyr[fund].append(ret) # clean up for next pass units_dict_halfyr = common.init_dict(fund_names) # annual returns for each fund if i % 12 == 0 and i > 0: wealth = common.get_fund_wealth(fund_nav_dict, units_dict_annual) for fund in fund_names: cashflows_annual = cashflows[i-12:i] # slice last 12 months cashflows cf = (dt, wealth[fund]) cashflows_annual.append(cf) ret = common.xirr(cashflows_annual) returns_annual[fund].append(ret) # clean up for next pass units_dict_annual = common.init_dict(fund_names) # no investment on the last date if i == cnt - 1: break # invested units for fund in fund_names: nav = fund_nav_dict[fund] units = mnt_inv / nav units_dict_halfyr[fund] += units units_dict_annual[fund] += units units_dict_overall[fund] += units # cash outflow cf = (dt, -mnt_inv) cashflows.append(cf) file_data = [] header_line = \ 'Fund,Investment,Wealth,Absolute Return,Annualized Return,' + \ 'Half-Yr Return Mean,Half-Yr Return Std Dev,Half-Yr Sharpe,' + \ 'Annual Return Mean,Annual Return Std Dev,Annual Sharpe' file_data.append(header_line) # total investment num_inv = len(cashflows) total_inv = num_inv * mnt_inv # final wealth nav_line = nav_data[cnt - 1].split(',')[1:] fund_nav_dict = common.get_fund_nav_dict(fund_names, nav_line) wealth = common.get_fund_wealth(fund_nav_dict, units_dict_overall) # performance stats for each fund last_date = nav_data[cnt - 1].split(',')[0] dt = datetime.strptime(last_date, '%d-%m-%Y') for fund in sorted(fund_names): fund_cashflows = cashflows[:] cf = (dt, wealth[fund]) fund_cashflows.append(cf) abs_return = ((wealth[fund] / total_inv) - 1) ann_return = common.xirr(fund_cashflows) hfr = returns_halfyr[fund] halfyr_rf_rate = common.get_rf_rate('half-yearly') halfyr_return_mean = numpy.mean(hfr) halfyr_return_std = numpy.std(hfr) halfyr_sharpe = common.get_sharpe_ratio(hfr, halfyr_rf_rate) afr = returns_annual[fund] annual_rf_rate = common.get_rf_rate('annual') annual_return_mean = numpy.mean(afr) annual_return_std = numpy.std(afr) annual_sharpe = common.get_sharpe_ratio(afr, annual_rf_rate) line_data = \ fund + ',' + str(total_inv) + ',' + str(wealth[fund]) + ',' + \ str(abs_return) + ',' + str(ann_return) + ',' + \ str(halfyr_return_mean) + ',' + str(halfyr_return_std) + ',' + \ str(halfyr_sharpe) + ',' + str(annual_return_mean) + ',' + \ str(annual_return_std) + ',' + str(annual_sharpe) file_data.append(line_data) regular_sip_file = os.path.join(data_dir, regular_sip_file_name) common.write_to_file(regular_sip_file, file_data)
import os from settings import PATH, HIVE_SCRIPT_PATH, HIVE_DB, HDFS_PATH, HIVE_QUERY_SCRIPTS_PATH from common import create_dir, color, get_files_in_dir_with_extension #FILES = [file for file in os.listdir(HIVE_SCRIPT_PATH)] #DATA_FILES = [ thing for thing in FILES if os.path.isfile(HIVE_SCRIPT_PATH+"/"+thing) and thing.endswith('.hive')] DATA_FILES = get_files_in_dir_with_extension(HIVE_SCRIPT_PATH, '.hive') if not DATA_FILES: print color.RED + "No hive files found in directory "+PATH+ color.END sys.exit(0) #Hive variables HIVE_SETUP="use "+HIVE_DB+"; set hive.cli.print.header=true; " create_dir(HIVE_QUERY_SCRIPTS_PATH) def generate_hive_queries(fields, tablename): #Create dir where we will write the results RESULTS_DIR=HIVE_QUERY_SCRIPTS_PATH+'/'+tablename create_dir(RESULTS_DIR) query_file = open( HIVE_QUERY_SCRIPTS_PATH + '/' + tablename + '.sh', "w") query_file.write("#!/bin/bash \n") file_name=RESULTS_DIR+'/'+tablename+'.txt' to_execute = "hive -e \""+HIVE_SETUP+" select count(*) as all_rows from "+tablename+";\" > "+file_name+'\n'; to_execute+="echo \"----\" >>"+file_name+'\n'; to_execute += "hive -e \""+HIVE_SETUP+" describe "+tablename+";\" > "+file_name+'\n'; query_file.write(to_execute) for field in fields: column_name=field[0] file_name=RESULTS_DIR+'/'+column_name+'.txt'
def lambda_handler(event, context): s3 = boto3.resource("s3") s3_client = boto3.client("s3") sns_client = boto3.client("sns") # Get some environment variables ENV = os.getenv("ENV", "") EVENT_SOURCE = os.getenv("EVENT_SOURCE", "S3") start_time = get_timestamp() print("Script starting at %s\n" % (start_time)) s3_object = event_object(event, event_source=EVENT_SOURCE) if str_to_bool(AV_PROCESS_ORIGINAL_VERSION_ONLY): verify_s3_object_version(s3, s3_object) # Publish the start time of the scan if AV_SCAN_START_SNS_ARN not in [None, ""]: start_scan_time = get_timestamp() sns_start_scan(sns_client, s3_object, AV_SCAN_START_SNS_ARN, start_scan_time) file_path = get_local_path(s3_object, "/tmp") create_dir(os.path.dirname(file_path)) s3_object.download_file(file_path) to_download = clamav.update_defs_from_s3( s3_client, AV_DEFINITION_S3_BUCKET, AV_DEFINITION_S3_PREFIX ) for download in to_download.values(): s3_path = download["s3_path"] local_path = download["local_path"] print("Downloading definition file %s from s3://%s" % (local_path, s3_path)) s3.Bucket(AV_DEFINITION_S3_BUCKET).download_file(s3_path, local_path) print("Downloading definition file %s complete!" % (local_path)) # calculate the md5 of the virus defintion files definition_md5 = clamav.get_definition_md5() # check the file for an existing defintion md5 hash s3_definition_md5 = clamav.md5_from_s3_tags(s3_client, s3_object.bucket_name, s3_object.key, AV_DEFINITION_MD5_METADATA) # skip if there is a match if definition_md5 == s3_definition_md5: print("Not scanning because local defintion md5 matches s3 defintion md5.") return # Set AV_STATUS_SKIPPED if file exceeds maximum file size s3_object_size_result = check_s3_object_size(s3, s3_object) if s3_object_size_result == AV_STATUS_SKIPPED: scan_result = s3_object_size_result scan_signature = AV_SIGNATURE_UNKNOWN else: scan_result, scan_signature = clamav.scan_file(file_path) print( "Scan of s3://%s resulted in %s\n" % (os.path.join(s3_object.bucket_name, s3_object.key), scan_result) ) result_time = get_timestamp() # Set the properties on the object with the scan results if "AV_UPDATE_METADATA" in os.environ: # AV_UPDATE_METADATA doesn't seem to be set anywhere - likely cant get here set_av_metadata(s3_object, scan_result, scan_signature, result_time) set_av_tags(s3_client, s3_object, scan_result, scan_signature, result_time, definition_md5) # Publish the scan results if AV_STATUS_SNS_ARN not in [None, ""]: sns_scan_results( sns_client, s3_object, AV_STATUS_SNS_ARN, scan_result, scan_signature, result_time, ) metrics.send( env=ENV, bucket=s3_object.bucket_name, key=s3_object.key, status=scan_result ) # Delete downloaded file to free up room on re-usable lambda function container try: os.remove(file_path) except OSError: pass if str_to_bool(AV_DELETE_INFECTED_FILES) and scan_result == AV_STATUS_INFECTED: delete_s3_object(s3_object) stop_scan_time = get_timestamp() print("Script finished at %s\n" % stop_scan_time)