def test_create_dir_doesnt_exist_no_raises(self, mock_os, mock_path):
     mock_path.exists.return_value = False
     mock_os.makedirs.side_effect = OSError(errno.EEXIST, "exists")
     create_dir("testpath")
     self.assertTrue(
         mock_os.makedirs.called, "Failed to make directories if path not present."
     )
Esempio n. 2
0
def update_defs_from_s3(s3_client, bucket, prefix):
    create_dir(AV_DEFINITION_PATH)
    to_download = {}
    for file_prefix in AV_DEFINITION_FILE_PREFIXES:
        s3_best_time = None
        for file_suffix in AV_DEFINITION_FILE_SUFFIXES:
            filename = file_prefix + "." + file_suffix
            s3_path = os.path.join(AV_DEFINITION_S3_PREFIX, filename)
            local_path = os.path.join(AV_DEFINITION_PATH, filename)
            s3_md5 = md5_from_s3_tags(s3_client, bucket, s3_path)
            s3_time = time_from_s3(s3_client, bucket, s3_path)

            if s3_best_time is not None and s3_time < s3_best_time:
                print("Not downloading older file in series: %s" % filename)
                continue
            else:
                s3_best_time = s3_time

            if os.path.exists(local_path) and md5_from_file(
                    local_path) == s3_md5:
                print("Not downloading %s because local md5 matches s3." %
                      filename)
                continue
            if s3_md5:
                print("Downloading definition file %s from s3://%s" %
                      (filename, os.path.join(bucket, prefix)))
                to_download[file_prefix] = {
                    "s3_path": s3_path,
                    "local_path": local_path,
                }
    return to_download
 def _init(self):
     """ Configs for the configuration file """
     self._modules_dir = os.path.dirname(os.path.realpath(__file__))
     self._source_dir = os.path.abspath(
         os.path.join(self._modules_dir, os.pardir))
     self._home_dir = os.path.abspath(
         os.path.join(self._source_dir, os.pardir))
     self._config_file = "{}/{}".format(self._home_dir, USER_CONFIG_FILE)
     """ Configs for the logging """
     self.log_dir = "{}/{}".format(self._home_dir, "logs")
     create_dir(self.log_dir)
     LogConfig(self.log_dir, LOG_FILE_NAME, logging.ERROR)
     """ Configs for the db directory """
     self.db_dir = "{}/{}".format(self._home_dir, "db")
     create_dir(self.db_dir)
     """ User defined Configs """
     self.user_config = get_yaml_content(self._config_file)
     # overwrite cfg.yml values into env values if they exist.
     self.user_config = get_user_config_env_values(self.user_config,
                                                   USER_CONFIGS_LIST)
     # Defaults
     self.user_config[SSL_CERTFILE] = get_user_defined_configuration(
         self.user_config, SSL_CERTFILE, SSL_CERTFILE_DEFAULT)
     self.user_config[SSL_KEYFILE] = get_user_defined_configuration(
         self.user_config, SSL_KEYFILE, SSL_KEYFILE_DEFAULT)
     self.user_config[SSL_PASSWORD] = get_user_defined_configuration(
         self.user_config, SSL_PASSWORD, NO_VALUE_PROVIDED)
     self.user_config[SSL_CAFILE] = get_user_defined_configuration(
         self.user_config, SSL_CAFILE, SSL_CAFILE_DEFAULT)
     self._user_defind_required_configs(self.user_config)
     self._user_defind_runtime_safeguard(self.user_config)
def configure():
    if False == common.is_cmd_installed("docker"):
        common.msg("System", "docker is not installed", "warn")
        return False
    common.msg("Perform ", "docker config")
    proxy_dir = '/etc/systemd/system/docker.service.d/'
    http_proxy_file = 'http-proxy.conf'
    https_proxy_file = 'https-proxy.conf'

    proxy = apps['docker']
    if True == proxy['use_proxy']:
        comment = ""
    else:
        comment = "#"

    # Data
    http_proxy_content = """[Service]
    {1}Environment="HTTP_PROXY={0}"
    """.format(proxy['http_proxy_target'], comment)

    https_proxy_content = """[Service]
    {1}Environment="HTTPS_PROXY={0}"
    """.format(proxy['https_proxy_target'], comment)

    # action
    common.create_dir(proxy_dir)
    common.create_file(proxy_dir, http_proxy_file, http_proxy_content)
    common.create_file(proxy_dir, https_proxy_file, https_proxy_content)
    call(["service", "docker", "restart"])
    call(["systemctl", "daemon-reload"])
Esempio n. 5
0
def get_local_file(input, output):
    black_list = ["package.yaml", ".stage", "release", "control"]
    valid_components = []
    for path in input.split("/"):
        if path == ".":
            pass
        elif path == "..":
            raise RuntimeError("'..' is inhibited in specifying a local path")
        else:
            valid_components.append(path)

    if len(valid_components) > 0 and valid_components[0] in black_list:
        raise RuntimeError(
            "'./{}' is reserved for system use and cannot be used to specify a local file"
            .format(valid_components[0]))
    if len(valid_components) == 0:
        url = "*"
    else:
        url = os.path.join(*valid_components)

    file_list = glob.glob(url)
    if len(file_list) == 0:
        print_error("Cannot find any file in '{}'".format(url))
    create_dir(output)
    for file in file_list:
        if file in black_list:
            continue
        dest_path = os.path.join(output, file)
        #print "{} -> {}".format(file, dest_path)
        if os.path.isdir(file):
            shutil.copytree(file, dest_path)
        else:
            shutil.copy(file, dest_path)
Esempio n. 6
0
def generate_hive_queries(fields, tablename):
    #Create dir where we will write the results
    RESULTS_DIR=HIVE_QUERY_SCRIPTS_PATH+'/'+tablename
    create_dir(RESULTS_DIR)
    query_file = open( HIVE_QUERY_SCRIPTS_PATH + '/' + tablename + '.sh', "w")
    query_file.write("#!/bin/bash \n")
    file_name=RESULTS_DIR+'/'+tablename+'.txt'
    to_execute = "hive -e \""+HIVE_SETUP+" select count(*) as all_rows from "+tablename+";\" > "+file_name+'\n';
    to_execute+="echo \"----\" >>"+file_name+'\n';
    to_execute += "hive -e \""+HIVE_SETUP+" describe "+tablename+";\" > "+file_name+'\n';
    query_file.write(to_execute)
    for field in fields:
        column_name=field[0]
        file_name=RESULTS_DIR+'/'+column_name+'.txt'
        to_execute=''
        if field[1]=='STRING':
            to_execute+="hive -e \""+HIVE_SETUP+" select count(distinct "+column_name+") as distinct_col from "+tablename+";\" > "+file_name+'\n';
            to_execute+="echo \"----\" >>"+file_name+'\n';
            to_execute+="echo \"Least frequent:\">>"+file_name+'\n';
            to_execute+="hive -e \""+HIVE_SETUP+" select "+column_name+", count(*) as count from "+tablename+" GROUP BY "+column_name+" ORDER BY count asc LIMIT 10;\" | sed \'s/\\t/|/g\'  >> "+file_name+'\n';
            to_execute+="echo \"----\" >>"+file_name+'\n';
            to_execute+="echo \"Most frequent:\">>"+file_name+'\n';
            to_execute+="hive -e \""+HIVE_SETUP+" select "+column_name+", count(*) as count from "+tablename+" GROUP BY "+column_name+" ORDER BY count desc LIMIT 10;\" | sed \'s/\\t/|/g\'  >> "+file_name+'\n';
        else:
            to_execute+="hive -e \""+HIVE_SETUP+" select max("+column_name+") as max from "+tablename+";\" > "+file_name+'\n';
            to_execute+="echo \"----\" >>"+file_name+'\n';
            to_execute+="hive -e \""+HIVE_SETUP+" select min("+column_name+") as min from "+tablename+";\" >> "+file_name+'\n';
            to_execute+="echo \"----\" >>"+file_name+'\n';
            to_execute+="hive -e \""+HIVE_SETUP+" select percentile_approx("+column_name+", 0.5) as median from "+tablename+";\" >> "+file_name+'\n';
            to_execute+="echo \"----\" >>"+file_name+'\n'; 
            to_execute+="hive -e \""+HIVE_SETUP+" select avg("+column_name+") as mean from "+tablename+";\" >> "+file_name+'\n';
            to_execute+="echo \"----\" >>"+file_name+'\n';
            to_execute+="hive -e \""+HIVE_SETUP+" select stddev_pop("+column_name+") as stdev from "+tablename+";\" >> "+file_name+'\n';
        query_file.write(to_execute)
    print color.GREEN + "Generated hive queries for table "+ tablename + color.END
Esempio n. 7
0
def update_defs_from_freshclam(path, library_path=""):
    create_dir(path)
    fc_env = os.environ.copy()
    if library_path:
        fc_env["LD_LIBRARY_PATH"] = "%s:%s" % (
            ":".join(current_library_search_path()),
            CLAMAVLIB_PATH,
        )
    print("Starting freshclam with defs in %s." % path)
    fc_proc = subprocess.Popen(
        [
            FRESHCLAM_PATH,
            "--config-file=./bin/freshclam.conf",
            "-u %s" % pwd.getpwuid(os.getuid())[0],
            "--datadir=%s" % path,
        ],
        stderr=subprocess.STDOUT,
        stdout=subprocess.PIPE,
        env=fc_env,
    )
    output = fc_proc.communicate()[0]
    print("freshclam output:\n%s" % output)
    if fc_proc.returncode != 0:
        print("Unexpected exit code from freshclam: %s." % fc_proc.returncode)
    return fc_proc.returncode
 def test_create_dir_doesnt_exist_but_raises(self, mock_os, mock_path):
     mock_path.exists.return_value = False
     mock_os.makedirs.side_effect = OSError(errno.ENAMETOOLONG, "nametoolong")
     with self.assertRaises(OSError):
         create_dir("testpath")
     self.assertTrue(
         mock_os.makedirs.called, "Failed to make directories if path not present."
     )
Esempio n. 9
0
def lambda_handler(event, context):
    s3 = boto3.resource("s3", endpoint_url=S3_ENDPOINT)
    s3_client = boto3.client("s3", endpoint_url=S3_ENDPOINT)
    sns_client = boto3.client("sns", endpoint_url=SNS_ENDPOINT)

    start_clamd(s3, s3_client)

    # Get some environment variables
    EVENT_SOURCE = os.getenv("EVENT_SOURCE", "S3")

    start_time = get_timestamp()
    logging.debug("Script starting at %s\n" % (start_time))
    s3_object = event_object(event, event_source=EVENT_SOURCE)

    if str_to_bool(AV_PROCESS_ORIGINAL_VERSION_ONLY):
        verify_s3_object_version(s3, s3_object)

    # Publish the start time of the scan
    if AV_SCAN_START_SNS_ARN not in [None, ""]:
        start_scan_time = get_timestamp()
        sns_start_scan(sns_client, s3_object, AV_SCAN_START_SNS_ARN,
                       start_scan_time)

    with tempfile.TemporaryDirectory(prefix=EFS_SCAN_FILE_PATH) as tmpdirname:
        file_path = get_local_path(s3_object, tmpdirname)
        create_dir(os.path.dirname(file_path))
        s3_object.download_file(file_path)

        scan_result, scan_signature = clamav.scan_file(file_path)
        logging.info(
            "Scan of s3://%s resulted in %s\n" %
            (os.path.join(s3_object.bucket_name, s3_object.key), scan_result))

        result_time = get_timestamp()
        # Set the properties on the object with the scan results
        if "AV_UPDATE_METADATA" in os.environ:
            set_av_metadata(s3_object, scan_result, scan_signature,
                            result_time)
        set_av_tags(s3_client, s3_object, scan_result, scan_signature,
                    result_time)

        # Publish the scan results
        if AV_STATUS_SNS_ARN not in [None, ""]:
            sns_scan_results(
                sns_client,
                s3_object,
                AV_STATUS_SNS_ARN,
                scan_result,
                scan_signature,
                result_time,
            )

    stop_scan_time = get_timestamp()
    logging.debug("Script finished at %s\n" % stop_scan_time)
Esempio n. 10
0
def create_big_hive_query(): 
    print color.UNDERLINE + "Now starting with creating big hive query" + color.END
    create_dir(os.path.dirname(BIG_HIVE_SCRIPT))
    if os.path.exists(BIG_HIVE_SCRIPT): os.remove(BIG_HIVE_SCRIPT)
    scripts = [ script for script in os.listdir(HIVE_SCRIPT_PATH) if not os.path.isdir(HIVE_SCRIPT_PATH+'/'+script) ]
    big_script = open(BIG_HIVE_SCRIPT,"w")    
    for script in scripts: 
        print "Now adding: " + script + color.END
        small_script = open(HIVE_SCRIPT_PATH + '/' + script,"r")
        big_script.write(small_script.read()+'\n\n')
        small_script.close()         
    big_script.close()
    print color.GREEN + "Congratulations. It's seems like all hive scripts have been made." + color.END
    print "You can find the results in "+ color.UNDERLINE + BIG_HIVE_SCRIPT + color.END
Esempio n. 11
0
def update_defs_from_s3(bucket, prefix):
    create_dir(AV_DEFINITION_PATH)
    for filename in AV_DEFINITION_FILENAMES:
        s3_path = os.path.join(AV_DEFINITION_S3_PREFIX, filename)
        local_path = os.path.join(AV_DEFINITION_PATH, filename)
        s3_md5 = md5_from_s3_tags(bucket, s3_path)
        if os.path.exists(local_path) and md5_from_file(local_path) == s3_md5:
            print("Not downloading %s because local md5 matches s3." %
                  filename)
            continue
        if s3_md5:
            print("Downloading definition file %s from s3://%s" %
                  (filename, os.path.join(bucket, prefix)))
            s3.Bucket(bucket).download_file(s3_path, local_path)
Esempio n. 12
0
def lambda_handler(event, context):
    s3 = boto3.resource("s3")
    sns_client = boto3.client("sns")

    # Get some environment variables
    EVENT_SOURCE = os.getenv("EVENT_SOURCE", "S3")

    start_time = get_timestamp()
    print("Script starting at %s\n" % (start_time))
    s3_object = event_object(event, event_source=EVENT_SOURCE)

    if str_to_bool(AV_PROCESS_ORIGINAL_VERSION_ONLY):
        verify_s3_object_version(s3, s3_object)

    # Publish the start time of the scan
    if AV_SCAN_START_SNS_ARN not in [None, ""]:
        start_scan_time = get_timestamp()
        sns_start_scan(sns_client, s3_object, AV_SCAN_START_SNS_ARN,
                       start_scan_time)

    file_path = get_local_path(s3_object, "/tmp")
    create_dir(os.path.dirname(file_path))
    try:
        s3_object.download_file(file_path)
    except OSError as e:
        remove_file(file_path)
        if e.errno == 28:
            print("Ran out of disk space. Scan failed")
            publish_results(s3_object, AV_STATUS_FAILED,
                            "File too large to scan")
            return
        else:
            raise

    download_clamav_databases()

    scan_result, scan_signature = clamav.scan_file(file_path)
    print("Scan of s3://%s resulted in %s\n" %
          (os.path.join(s3_object.bucket_name, s3_object.key), scan_result))

    publish_results(s3_object, scan_result, scan_signature)

    # Delete downloaded file to free up room on re-usable lambda function container
    remove_file(file_path)
    if str_to_bool(
            AV_DELETE_INFECTED_FILES) and scan_result == AV_STATUS_INFECTED:
        delete_s3_object(s3_object)
    stop_scan_time = get_timestamp()
    print("Script finished at %s\n" % stop_scan_time)
Esempio n. 13
0
def build_part(part):
    install_dir = part.install_path()
    source_dir = part.source_path()
    build_dir = part.build_path()

    if part.gconfig.clean_build:
        remove_dir(build_dir)

    # copy tree: source -> build
    if not os.path.exists(build_dir):
        shutil.copytree(source_dir, build_dir)
        part.set_build_state("CLEAN")
    else:
        print_warn("Part '{}' has ever been built,".format(part.name),
                   "in order to perform a clean build, use --clean-build")

    remove_dir(install_dir)

    # get module parameters and put them in 'module_params'
    var_name_prefix = part.build + "-"
    prefix_len = len(var_name_prefix)
    module_params = dict(
        (k[prefix_len:], v) for k, v in part.doc.iteritems()
        if len(k) > prefix_len and k[:prefix_len] == var_name_prefix)

    env = {
        "BUILD_ROOT": build_path(part.gconfig),
        "INSTALL_ROOT": install_path(part.gconfig),
        "BUILD_DIR": build_dir,
        "INSTALL_DIR": install_dir
    }

    # do the build prepare
    os.chdir(build_dir)
    build_prepare(part, env)

    # do the make main
    os.chdir(build_dir)
    if build_override(part, env) is False:
        part.build_module.build(part, module_params, env.copy())
    part.set_build_state("BUILT")

    # do the make install
    os.chdir(build_dir)
    create_dir(install_dir)
    if install_override(part, env) is False:
        part.build_module.install(part, module_params, env.copy())
    part.set_build_state("INSTALLED")
Esempio n. 14
0
def create_big_hive_query():
    print color.UNDERLINE + "Now starting with creating big hive query" + color.END
    create_dir(os.path.dirname(BIG_HIVE_SCRIPT))
    if os.path.exists(BIG_HIVE_SCRIPT): os.remove(BIG_HIVE_SCRIPT)
    scripts = [
        script for script in os.listdir(HIVE_SCRIPT_PATH)
        if not os.path.isdir(HIVE_SCRIPT_PATH + '/' + script)
    ]
    big_script = open(BIG_HIVE_SCRIPT, "w")
    for script in scripts:
        print "Now adding: " + script + color.END
        small_script = open(HIVE_SCRIPT_PATH + '/' + script, "r")
        big_script.write(small_script.read() + '\n\n')
        small_script.close()
    big_script.close()
    print color.GREEN + "Congratulations. It's seems like all hive scripts have been made." + color.END
    print "You can find the results in " + color.UNDERLINE + BIG_HIVE_SCRIPT + color.END
Esempio n. 15
0
 def capture_screen(self):
     try:
         path = create_dir(con_path.pic_path)
         file_path = os.path.join(path, parser_time.strftime_hms() + '.png')
         print(file_path)
         self.driver.save_screenshot(file_path)
         return file_path
     except Exception as e:
         print('截图出错')
         raise e
def update_defs_from_freshclam(path):
    create_dir(path)
    logging.info("Starting freshclam with defs in %s." % path)
    fc_proc = subprocess.Popen(
        [
            FRESHCLAM_PATH,
            "--config-file=./bin/freshclam.conf",
            "-u %s" % pwd.getpwuid(os.getuid())[0],
            "--verbose",
            "--datadir=%s" % path,
        ],
        stderr=subprocess.STDOUT,
        stdout=subprocess.PIPE,
    )
    output = fc_proc.communicate()[0]
    logging.info("freshclam output:\n%s" % output)
    if fc_proc.returncode != 0:
        logging.info("Unexpected exit code from freshclam: %s." % fc_proc.returncode)
    return fc_proc.returncode
Esempio n. 17
0
def crawl():
    segs = get_entry_urls()

    logger.write_log("start crawling")

    try:
        common.create_dir(OUTPUT_DIR)
        entries = []
        for entry in crawl_entries(segs):
            if (not entry):
                logger.write_log("error occurred")
                return
            entries.append(entry)
            common.write_json(entries, OUTPUT_DIR, ENTRIES_FILE_NAME)
    except Exception:
        logger.write_log("error occurred")
        logger.write_error(traceback.format_exc())
    finally:
        logger.write_log("end crawling")
Esempio n. 18
0
def run(nav_file):
  """
  Generates monthly sharpe ratio for each fund using a rolling window of the 
  last 12 months. Uses this data to generate a rank file that specifies which 
  fund to invest in each month. The fund chosen each month is the one with the 
  highest sharpe ratio.
  """
  
  # create data directory
  common.create_dir(data_dir)
  
  # read nav data
  nav_data = common.read_from_file(nav_file)
  
  # generate monthly sharpe ratio
  sharpe_data = get_sharpe_data(nav_data)
  sharpe_data_file = os.path.join(data_dir, sharpe_data_file_name)
  common.write_to_file(sharpe_data_file, sharpe_data)

  # generate sharpe ranking
  sharpe_rank_data = get_sharpe_rank_data(nav_data, sharpe_data)
  sharpe_rank_data_file = os.path.join(data_dir, sharpe_rank_file_name)
  common.write_to_file(sharpe_rank_data_file, sharpe_rank_data)
Esempio n. 19
0
def run(nav_file):
    """
  Generates monthly sharpe ratio for each fund using a rolling window of the 
  last 12 months. Uses this data to generate a rank file that specifies which 
  fund to invest in each month. The fund chosen each month is the one with the 
  highest sharpe ratio.
  """

    # create data directory
    common.create_dir(data_dir)

    # read nav data
    nav_data = common.read_from_file(nav_file)

    # generate monthly sharpe ratio
    sharpe_data = get_sharpe_data(nav_data)
    sharpe_data_file = os.path.join(data_dir, sharpe_data_file_name)
    common.write_to_file(sharpe_data_file, sharpe_data)

    # generate sharpe ranking
    sharpe_rank_data = get_sharpe_rank_data(nav_data, sharpe_data)
    sharpe_rank_data_file = os.path.join(data_dir, sharpe_rank_file_name)
    common.write_to_file(sharpe_rank_data_file, sharpe_rank_data)
Esempio n. 20
0
 def _init(self):
     """ Configs for the configuration file """
     self._modules_dir = os.path.dirname(os.path.realpath(__file__))
     self._source_dir = os.path.abspath(
         os.path.join(self._modules_dir, os.pardir))
     self._home_dir = os.path.abspath(
         os.path.join(self._source_dir, os.pardir))
     self._config_file = "{}/{}".format(self._home_dir, "cfg.json")
     self._init_config_file(self._config_file)
     """ Configs for the processing directory """
     self.process_dir = "{}/{}".format(self._home_dir, "process-files")
     self.last_process_file = "{}/{}".format(self.process_dir,
                                             "last-process-file")
     create_dir(self.process_dir)
     """ Configs for the processing missed directory """
     self.missed_dir = "{}/{}".format(self._home_dir, "missed-files")
     create_dir(self.missed_dir)
     """ Configs for the logging """
     self.log_dir = "{}/{}".format(self._home_dir, "logs")
     create_dir(self.log_dir)
     """ Configs for AWS resources """
     self.aws_resources_dir = "{}/{}".format(self._home_dir,
                                             "aws-resources")
     self.default_insights_file = "{}/{}".format(self.aws_resources_dir,
                                                 DEFAULT_INSIGHTS_FILE)
     self.insights_arns_file = "{}/{}".format(self.aws_resources_dir,
                                              INSIGHTS_ARNS_FILE)
     self.cloudFormation_stack_file = "{}/{}".format(
         self.aws_resources_dir, CLOUDFORMATION_STACK_TEMPLATE_FILE)
     """ Configs for the user input """
     self.user_config_delete_files = self.user_config["deleteSiemFiles"]
     self.user_config_send_historical_data = self.user_config[
         "firstRunSendHistoricalData"]
     self.user_config_src_path = self.user_config["siemPath"]
     self.user_config_severity_list = pupulate_cef_severity_list(
         self.user_config["severityFilterInclude"])
     self.user_config_action_list = pupulate_cef_filter_list(
         self.user_config["actionFilterInclude"])
     self.user_config_product_list = pupulate_cef_filter_list(
         self.user_config["productFilterInclude"])
Esempio n. 21
0
import re
import time

# parameters
INFILE = input("Enter input xml strings filename: [default: strings.xml]\n")
if not INFILE:
    INFILE = "strings.xml"
OUTFILE = input("Enter output filename base : [default: totranslate.txt]\n")
if not OUTFILE:
    OUTFILE = "totranslate.txt"
OUTFILE_TRANSLATED = input("Enter empty translated filename: [default: translated.txt] (useful for translating several languages)\n")
if not OUTFILE_TRANSLATED:
    OUTFILE_TRANSLATED = "translated.txt"


create_dir(WORK_DIRECTORY)
filename = f = open(WORK_DIRECTORY + "/" + get_filename(OUTFILE), "w")

#initialize empty translated file
filenameTranslated = get_filename(OUTFILE_TRANSLATED)
fTranslated = open(WORK_DIRECTORY + "/" + filenameTranslated, "w").close()
listFilesToTranslate.append(filenameTranslated)

print("==========================\n\n")

# read xml structure
tree = ET.parse(INFILE)
root = tree.getroot()
iElement = 0

for i in range(len(root)):
Esempio n. 22
0
def download_s3_object(s3_object, local_prefix):
    local_path = "%s/%s/%s" % (local_prefix, s3_object.bucket_name,
                               s3_object.key)
    create_dir(os.path.dirname(local_path))
    s3_object.download_file(local_path)
    return local_path
Esempio n. 23
0
def run(nav_file):

    # create data directory
    common.create_dir(data_dir)

    # read nav data
    nav_data = common.read_from_file(nav_file)

    # remove first 12 entries in nav_data
    # to compare results with benchmark
    del nav_data[1:13]

    # retrieve fund names
    # the first column (date) is skipped
    fund_names = nav_data[0].split(',')[1:]

    # initialize
    cashflows = []
    returns_halfyr = common.init_array_dict(fund_names)
    returns_annual = common.init_array_dict(fund_names)
    units_dict_halfyr = common.init_dict(fund_names)
    units_dict_annual = common.init_dict(fund_names)
    units_dict_overall = common.init_dict(fund_names)

    # remove header line
    del nav_data[0]

    # compute cashflows and returns
    cnt = len(nav_data)
    for i in range(0, cnt):

        row_data = nav_data[i].split(',')
        dt = datetime.strptime(row_data[0], '%d-%m-%Y')
        fund_nav = row_data[1:]
        fund_nav_dict = common.get_fund_nav_dict(fund_names, fund_nav)

        # half-yearly returns for each fund
        if i % 6 == 0 and i > 0:
            wealth = common.get_fund_wealth(fund_nav_dict, units_dict_halfyr)
            for fund in fund_names:
                cashflows_halfyr = cashflows[
                    i - 6:i]  # slice last 6 months cashflows
                cf = (dt, wealth[fund])
                cashflows_halfyr.append(cf)
                ret = common.xirr(cashflows_halfyr)
                returns_halfyr[fund].append(ret)

            # clean up for next pass
            units_dict_halfyr = common.init_dict(fund_names)

        # annual returns for each fund
        if i % 12 == 0 and i > 0:
            wealth = common.get_fund_wealth(fund_nav_dict, units_dict_annual)
            for fund in fund_names:
                cashflows_annual = cashflows[
                    i - 12:i]  # slice last 12 months cashflows
                cf = (dt, wealth[fund])
                cashflows_annual.append(cf)
                ret = common.xirr(cashflows_annual)
                returns_annual[fund].append(ret)

            # clean up for next pass
            units_dict_annual = common.init_dict(fund_names)

        # no investment on the last date
        if i == cnt - 1:
            break

        # invested units
        for fund in fund_names:
            nav = fund_nav_dict[fund]
            units = mnt_inv / nav
            units_dict_halfyr[fund] += units
            units_dict_annual[fund] += units
            units_dict_overall[fund] += units

        # cash outflow
        cf = (dt, -mnt_inv)
        cashflows.append(cf)

    file_data = []

    header_line = \
      'Fund,Investment,Wealth,Absolute Return,Annualized Return,' + \
      'Half-Yr Return Mean,Half-Yr Return Std Dev,Half-Yr Sharpe,' + \
      'Annual Return Mean,Annual Return Std Dev,Annual Sharpe'
    file_data.append(header_line)

    # total investment
    num_inv = len(cashflows)
    total_inv = num_inv * mnt_inv

    # final wealth
    nav_line = nav_data[cnt - 1].split(',')[1:]
    fund_nav_dict = common.get_fund_nav_dict(fund_names, nav_line)
    wealth = common.get_fund_wealth(fund_nav_dict, units_dict_overall)

    # performance stats for each fund
    last_date = nav_data[cnt - 1].split(',')[0]
    dt = datetime.strptime(last_date, '%d-%m-%Y')
    for fund in sorted(fund_names):
        fund_cashflows = cashflows[:]
        cf = (dt, wealth[fund])
        fund_cashflows.append(cf)
        abs_return = ((wealth[fund] / total_inv) - 1)
        ann_return = common.xirr(fund_cashflows)

        hfr = returns_halfyr[fund]
        halfyr_rf_rate = common.get_rf_rate('half-yearly')
        halfyr_return_mean = numpy.mean(hfr)
        halfyr_return_std = numpy.std(hfr)
        halfyr_sharpe = common.get_sharpe_ratio(hfr, halfyr_rf_rate)

        afr = returns_annual[fund]
        annual_rf_rate = common.get_rf_rate('annual')
        annual_return_mean = numpy.mean(afr)
        annual_return_std = numpy.std(afr)
        annual_sharpe = common.get_sharpe_ratio(afr, annual_rf_rate)

        line_data = \
          fund + ',' + str(total_inv) + ',' + str(wealth[fund]) + ',' + \
          str(abs_return) + ',' + str(ann_return) + ',' + \
          str(halfyr_return_mean) + ',' + str(halfyr_return_std) + ',' + \
          str(halfyr_sharpe) + ',' + str(annual_return_mean) + ',' + \
          str(annual_return_std) + ',' + str(annual_sharpe)
        file_data.append(line_data)

    regular_sip_file = os.path.join(data_dir, regular_sip_file_name)
    common.write_to_file(regular_sip_file, file_data)
# import require packages 
import os 
import json 
import itertools 
from common import color, create_dir
from settings import PATH, HIVE_SCRIPT_PATH, HIVE_DB, HDFS_PATH, BIG_HIVE_SCRIPT, JSON_DATA_PATH

print "Now starting with parsing the hive script to a table layout."

try: 
   os.stat(os.path.dirname(BIG_HIVE_SCRIPT))
except: 
   print color.RED + "WARNING! HIVE SCRIPT PATH DOES NOT EXIST!" + color.END
   print "Check if everything went alright during " + color.UNDERLINE + "hivescripts.py" + color.END 

create_dir(os.path.dirname(JSON_DATA_PATH))

hive_blob = open(BIG_HIVE_SCRIPT,"r").read()

# this ugly script creates a dict. key = table_name, values = [colnames]
table_blobs = hive_blob.split("DROP TABLE IF EXISTS ")
table_info = {} 
for table in table_blobs[1:]:
   name = table[0:table.find(';')]
   print 'I have found a table: ' + color.UNDERLINE + name + color.END 
   table_info[ name ] = [] 
   colnames = table[table.find('(')+1:table.find(')')]
   for col in colnames.replace('\n','').split(','): 
      table_info[ name ].append( col[:col.find(" ")] )

# this is the end result for d3 
Esempio n. 25
0
from settings import PATH, HIVE_SCRIPT_PATH, HIVE_DB, HDFS_PATH, BIG_HIVE_SCRIPT, dry_run
from common import create_dir, color, get_files_in_dir_with_extension
import subprocess
import sys

#Get files with extension in the defined directory
DATA_FILES = get_files_in_dir_with_extension(PATH, '.csv')
DATA_FILES.extend(get_files_in_dir_with_extension(PATH, '.txt'))

#If we did not find CSV or TXT files in the folder, just exit.
if not DATA_FILES:
    print color.RED + "No csv or txt found in directory "+PATH+ color.END
    sys.exit(0)

#Create the Hive script directory if not exists
create_dir(HIVE_SCRIPT_PATH)

# define a bunch of nice helper functions

'''
For each file create a directory, because in Hive we need to put the data into a folder named the table
'''

def generate_folder(datafile): 
    directory = os.path.splitext(datafile)[0]
    create_dir(directory)
    print "Created the following directory : " + directory
    return directory

'''
Get only the first few lines from each file. This is used to generate the 
Esempio n. 26
0
    parser.add_argument('--epsilon', type=float, nargs='+',
                        default=[1e-8], help='Specify the epsilon hyperparameter for Adam.')

    args = parser.parse_args()
    trainData, validData, testData, trainTarget, validTarget, testTarget = loadData()

    X, y = preprocess_data(trainData, trainTarget)
    N, d = X.shape
    Xt, yt = preprocess_data(testData, testTarget)
    Xv, yv = preprocess_data(validData, validTarget)

    epochs = args.epochs[0]

    # Output path
    path = args.path[0]
    create_dir(path)

    batch_size = args.batch_size[0]
    optimizer = args.optimizer[0]
    loss_type = args.loss[0]

    if optimizer == 'gd':
        for alpha, reg in itertools.product(args.alpha, args.reg):
            params = {
                'alpha': alpha,
                'reg': reg,
                'batchsize': batch_size,
            }

            model_file, loss_file, time_file = make_filenames(
                path,
Esempio n. 27
0
def generate_folder(datafile):
    directory = os.path.splitext(datafile)[0]
    create_dir(directory)
    print "Created the following directory : " + directory
    return directory
Esempio n. 28
0
from saveES import ElasticSearch
from stomp_engine import MessSendOrRecv

from common import Asset
from common import create_dir
from common import case_log
from common import unzip
from common import return_xml_status
from common import copy_report_to_local
from common import send_report_to_qloud
from kafka_engine import kafka_engine

from config import *

# 创建案例保存路径
create_dir(RESULT_PATH, ZIP_PATH)

asset = Asset(host=ASSETS_HOST,
              port=ASSETS_PORT,
              user=ASSETS_USER,
              passwd=ASSETS_PWD)

app = Flask(__name__)

test_runner_plugin = TestRunnerPlugin()
test_runner_plugin.enable()


# 测试引擎url
@app.route('/case/engine', methods=["POST"])
def case_engine_by_rest():
Esempio n. 29
0
from settings import PATH, HIVE_SCRIPT_PATH, HIVE_DB, HDFS_PATH, BIG_HIVE_SCRIPT, dry_run
from common import create_dir, color, get_files_in_dir_with_extension
import subprocess
import sys

#Get files with extension in the defined directory
DATA_FILES = get_files_in_dir_with_extension(PATH, '.csv')
DATA_FILES.extend(get_files_in_dir_with_extension(PATH, '.txt'))

#If we did not find CSV or TXT files in the folder, just exit.
if not DATA_FILES:
    print color.RED + "No csv or txt found in directory " + PATH + color.END
    sys.exit(0)

#Create the Hive script directory if not exists
create_dir(HIVE_SCRIPT_PATH)

# define a bunch of nice helper functions
'''
For each file create a directory, because in Hive we need to put the data into a folder named the table
'''


def generate_folder(datafile):
    directory = os.path.splitext(datafile)[0]
    create_dir(directory)
    print "Created the following directory : " + directory
    return directory


'''
Esempio n. 30
0
def lambda_handler(event, context):
    if AV_SCAN_ROLE_ARN:
        sts_client = boto3.client("sts")
        sts_response = sts_client.assume_role(
            RoleArn=AV_SCAN_ROLE_ARN, RoleSessionName="AVScanRoleAssumption"
        )
        session = boto3.session.Session(
            aws_access_key_id=sts_response["Credentials"]["AccessKeyId"],
            aws_secret_access_key=sts_response["Credentials"]["SecretAccessKey"],
            aws_session_token=sts_response["Credentials"]["SessionToken"],
        )
        s3_cross_account = session.resource("s3")
        s3_cross_account_client = session.client("s3")
        sns_cross_account_client = session.client("sns")
    else:
        s3_cross_account = boto3.resource("s3")
        s3_cross_account_client = boto3.client("s3")
        sns_cross_account_client = boto3.client("sns")

    s3_local_account = boto3.resource("s3")
    s3_local_account_client = boto3.client("s3")
    sns_local_account_client = boto3.client("sns")

    # Get some environment variables
    ENV = os.getenv("ENV", "")

    start_time = get_timestamp()
    print("Script starting at %s\n" % (start_time))
    print("Event received: %s" % event)
    s3_object = event_object(event, s3_resource=s3_cross_account)

    if str_to_bool(AV_PROCESS_ORIGINAL_VERSION_ONLY):
        verify_s3_object_version(s3_cross_account, s3_object)

    if object_does_not_require_scan(
        s3_cross_account_client, s3_object.bucket_name, s3_object.key
    ):
        if AV_STATUS_SNS_ARN not in [None, ""]:
            sns_skip_scan(
                sns_local_account_client, s3_object, AV_STATUS_SNS_ARN, get_timestamp()
            )
        print(
            "Scan of s3://%s was skipped due to the file being safely generated by a VISO process"
            % os.path.join(s3_object.bucket_name, s3_object.key)
        )
    else:
        # Publish the start time of the scan
        if AV_SCAN_START_SNS_ARN not in [None, ""]:
            start_scan_time = get_timestamp()
            sns_start_scan(
                sns_local_account_client,
                s3_object,
                AV_SCAN_START_SNS_ARN,
                start_scan_time,
            )

        file_path = get_local_path(s3_object, "/tmp")
        create_dir(os.path.dirname(file_path))
        s3_object.download_file(file_path)

        to_download = clamav.update_defs_from_s3(
            s3_local_account_client, AV_DEFINITION_S3_BUCKET, AV_DEFINITION_S3_PREFIX
        )

        for download in to_download.values():
            s3_path = download["s3_path"]
            local_path = download["local_path"]
            print("Downloading definition file %s from s3://%s" % (local_path, s3_path))
            s3_local_account.Bucket(AV_DEFINITION_S3_BUCKET).download_file(
                s3_path, local_path
            )
            print("Downloading definition file %s complete!" % (local_path))
        scan_result, scan_signature = clamav.scan_file(file_path)
        print(
            "Scan of s3://%s resulted in %s\n"
            % (os.path.join(s3_object.bucket_name, s3_object.key), scan_result)
        )

        result_time = get_timestamp()
        # Set the properties on the object with the scan results
        if "AV_UPDATE_METADATA" in os.environ:
            set_av_metadata(s3_object, scan_result, scan_signature, result_time)
        set_av_tags(
            s3_cross_account_client, s3_object, scan_result, scan_signature, result_time
        )

        # Publish the scan results
        if AV_STATUS_SNS_ARN not in [None, ""]:
            sns_scan_results(
                sns_local_account_client,
                s3_object,
                AV_STATUS_SNS_ARN,
                scan_result,
                scan_signature,
                result_time,
            )

        # Publish clean scan results cross account
        if (
            scan_result == AV_STATUS_CLEAN
            and str_to_bool(AV_STATUS_SNS_PUBLISH_CLEAN)
            and AV_STATUS_CLEAN_SNS_ARN not in [None, ""]
        ):
            sns_scan_results(
                sns_cross_account_client,
                s3_object,
                AV_STATUS_CLEAN_SNS_ARN,
                scan_result,
                scan_signature,
                result_time,
            )

        metrics.send(
            env=ENV, bucket=s3_object.bucket_name, key=s3_object.key, status=scan_result
        )
        # Delete downloaded file to free up room on re-usable lambda function container
        try:
            os.remove(file_path)
        except OSError:
            pass
        if str_to_bool(AV_DELETE_INFECTED_FILES) and scan_result == AV_STATUS_INFECTED:
            sns_delete_results(s3_object, scan_result)
            delete_s3_object(s3_object)

    stop_scan_time = get_timestamp()
    print("Script finished at %s\n" % stop_scan_time)
Esempio n. 31
0
def generate_folder(datafile): 
    directory = os.path.splitext(datafile)[0]
    create_dir(directory)
    print "Created the following directory : " + directory
    return directory
Esempio n. 32
0
# import require packages
import os
import json
import itertools
from common import color, create_dir
from settings import PATH, HIVE_SCRIPT_PATH, HIVE_DB, HDFS_PATH, BIG_HIVE_SCRIPT, JSON_DATA_PATH

print "Now starting with parsing the hive script to a table layout."

try:
    os.stat(os.path.dirname(BIG_HIVE_SCRIPT))
except:
    print color.RED + "WARNING! HIVE SCRIPT PATH DOES NOT EXIST!" + color.END
    print "Check if everything went alright during " + color.UNDERLINE + "hivescripts.py" + color.END

create_dir(os.path.dirname(JSON_DATA_PATH))

hive_blob = open(BIG_HIVE_SCRIPT, "r").read()

# this ugly script creates a dict. key = table_name, values = [colnames]
table_blobs = hive_blob.split("DROP TABLE IF EXISTS ")
table_info = {}
for table in table_blobs[1:]:
    name = table[0:table.find(';')]
    print 'I have found a table: ' + color.UNDERLINE + name + color.END
    table_info[name] = []
    colnames = table[table.find('(') + 1:table.find(')')]
    for col in colnames.replace('\n', '').split(','):
        table_info[name].append(col[:col.find(" ")])

# this is the end result for d3
Esempio n. 33
0
def lambda_handler(event, context):
    s3 = boto3.resource("s3")
    s3_client = boto3.client("s3")
    sns_client = boto3.client("sns")

    # Get some environment variables
    ENV = os.getenv("ENV", "")
    EVENT_SOURCE = os.getenv("EVENT_SOURCE", "S3")

    start_time = get_timestamp()
    print("Script starting at %s\n" % (start_time))
    s3_object = event_object(event, event_source=EVENT_SOURCE)

    if str_to_bool(AV_PROCESS_ORIGINAL_VERSION_ONLY):
        verify_s3_object_version(s3, s3_object)

    # Publish the start time of the scan
    if AV_SCAN_START_SNS_ARN not in [None, ""]:
        start_scan_time = get_timestamp()
        sns_start_scan(sns_client, s3_object, AV_SCAN_START_SNS_ARN,
                       start_scan_time)

    file_path = get_local_path(s3_object, "/tmp")
    create_dir(os.path.dirname(file_path))
    s3_object.download_file(file_path)

    to_download = clamav.update_defs_from_s3(s3_client,
                                             AV_DEFINITION_S3_BUCKET,
                                             AV_DEFINITION_S3_PREFIX)

    for download in to_download.values():
        s3_path = download["s3_path"]
        local_path = download["local_path"]
        print("Downloading definition file %s from s3://%s/%s" %
              (local_path, AV_DEFINITION_S3_BUCKET, s3_path))
        s3.Bucket(AV_DEFINITION_S3_BUCKET).download_file(s3_path, local_path)
        print("Downloading definition file %s complete!" % (local_path))
    scan_result, scan_signature = clamav.scan_file(file_path)
    print("Scan of s3://%s resulted in %s\n" %
          (os.path.join(s3_object.bucket_name, s3_object.key), scan_result))

    result_time = get_timestamp()
    # Set the properties on the object with the scan results
    if "AV_UPDATE_METADATA" in os.environ:
        set_av_metadata(s3_object, scan_result, scan_signature, result_time)
    set_av_tags(s3_client, s3_object, scan_result, scan_signature, result_time)

    # Publish the scan results
    if AV_STATUS_SNS_ARN not in [None, ""]:
        sns_scan_results(
            sns_client,
            s3_object,
            AV_STATUS_SNS_ARN,
            scan_result,
            scan_signature,
            result_time,
        )

    metrics.send(env=ENV,
                 bucket=s3_object.bucket_name,
                 key=s3_object.key,
                 status=scan_result)
    # Delete downloaded file to free up room on re-usable lambda function container
    try:
        os.remove(file_path)
    except OSError:
        pass
    if str_to_bool(
            AV_DELETE_INFECTED_FILES) and scan_result == AV_STATUS_INFECTED:
        delete_s3_object(s3_object)
    stop_scan_time = get_timestamp()
    print("Script finished at %s\n" % stop_scan_time)
Esempio n. 34
0
def run(nav_file, rank_file):
  """
  Generates return statistics based on sharpe ratio ranking data.
  """

  # create data directory
  common.create_dir(data_dir)

  # read data files
  nav_data = common.read_from_file(nav_file)
  rank_data = common.read_from_file(rank_file)
  
  # remove redundant entries in nav_data
  target_date = rank_data[1].split(',')[0]
  common.trim_data(nav_data, target_date)
  assert len(nav_data) == len(rank_data)

  # retrieve fund names
  # the first column (date) is skipped
  fund_names = nav_data[0].split(',')[1:]

  # initialize
  cashflows_halfyr = []
  cashflows_annual = []
  cashflows_overall = []
  returns_halfyr = []
  returns_annual = []
  units_dict_halfyr = common.init_dict(fund_names)
  units_dict_annual = common.init_dict(fund_names)
  units_dict_overall = common.init_dict(fund_names)

  # remove header line
  del nav_data[0]
  del rank_data[0] 

  # compute cashflows and returns
  cnt = len(nav_data)
  for i in range(0, cnt):
  
    (date, fund, nav) = rank_data[i].split(',')
    dt = datetime.strptime(date, '%d-%m-%Y')
  
    # half-yearly returns
    if i % 6 == 0 and i > 0:
      nav_line = nav_data[i].split(',')[1:]
      fund_nav_dict = common.get_fund_nav_dict(fund_names, nav_line)
      wealth = get_wealth(fund_nav_dict, units_dict_halfyr)
      cf = (dt, wealth)
      cashflows_halfyr.append(cf)
      ret = common.xirr(cashflows_halfyr)
      returns_halfyr.append(ret)

      # clean up for next pass
      del cashflows_halfyr[:]
      units_dict_halfyr[f] = common.init_dict(fund_names)
      
    # annual returns
    if i % 12 == 0 and i > 0:
      nav_line = nav_data[i].split(',')[1:]
      nav_dict = common.get_fund_nav_dict(fund_names, nav_line)
      wealth = get_wealth(nav_dict, units_dict_annual)
      cf = (dt, wealth)
      cashflows_annual.append(cf)
      ret = common.xirr(cashflows_annual)
      returns_annual.append(ret)

      # clean up for next pass
      del cashflows_annual[:]
      units_dict_annual[f] = common.init_dict(fund_names)
    
    # no investment on the last date
    if i == cnt - 1:
      break
    
    # units invested
    units = mnt_inv / float(nav)
    units_dict_halfyr[fund] += units
    units_dict_annual[fund] += units
    units_dict_overall[fund] += units

    # cash outflow
    cf = (dt, -mnt_inv)
    cashflows_halfyr.append(cf)
    cashflows_annual.append(cf)
    cashflows_overall.append(cf)
  
  file_data = []
  
  # investment details
  file_data.append('Investment Details')
  file_data.append('Fund,Units')
  for f in units_dict_overall:
    if units_dict_overall[f] > 0:
      line_data = f + ','  + str(units_dict_overall[f])
      file_data.append(line_data)
  file_data.append('\n')
  
  # total investment
  num_inv = len(cashflows_overall)
  total_inv = num_inv * mnt_inv
  file_data.append('Investment,' + str(total_inv))
  
  # final wealth
  nav_line = nav_data[cnt - 1].split(',')[1:]
  fund_nav_dict = common.get_fund_nav_dict(fund_names, nav_line)
  wealth = get_wealth(fund_nav_dict, units_dict_overall)
  file_data.append('Wealth,' + str(wealth))
  
  # absolute return
  abs_return = ((wealth / total_inv) - 1)
  file_data.append('Absolute Return,' + str(abs_return))
  
  # annualized return
  last_date = nav_data[cnt - 1].split(',')[0]
  dt = datetime.strptime(last_date, '%d-%m-%Y')
  cf = (dt, wealth)
  cashflows_overall.append(cf)
  annual_return = common.xirr(cashflows_overall)
  file_data.append('Annualized Return,' + str(annual_return))
  
  file_data.append('\n')
  file_data.append('Stats,Mean,Std Deviation, Sharpe Ratio')
  
  # half-yearly return stats
  halfyr_rf_rate = common.get_rf_rate('half-yearly')
  halfyr_mean = numpy.mean(returns_halfyr)
  halfyr_stdev = numpy.std(returns_halfyr)
  halfyr_sharpe = common.get_sharpe_ratio(returns_halfyr, halfyr_rf_rate)
  file_data.append('Half-Yearly,' + str(halfyr_mean) + ',' + str(halfyr_stdev) + ',' + str(halfyr_sharpe))
  
  # annual return stats
  annual_rf_rate = common.get_rf_rate('annual')
  annual_mean = numpy.mean(returns_annual)
  annual_stdev = numpy.std(returns_annual)
  annual_sharpe = common.get_sharpe_ratio(returns_annual, annual_rf_rate)
  file_data.append('Annual,' + str(annual_mean) + ',' + str(annual_stdev) + ',' + str(annual_sharpe))
  
  # save stats to file
  benchmark_file = os.path.join(data_dir, benchmark_file_name)
  common.write_to_file(benchmark_file, file_data)
Esempio n. 35
0
def init():
  global connection
  common.create_dir()
  connection = create_connection()
  setup_db()
Esempio n. 36
0
def run(nav_file):
  
  # create data directory
  common.create_dir(data_dir)
  
  # read nav data
  nav_data = common.read_from_file(nav_file)
  
  # remove first 12 entries in nav_data 
  # to compare results with benchmark
  del nav_data[1:13]

  # retrieve fund names
  # the first column (date) is skipped
  fund_names = nav_data[0].split(',')[1:]
  
  # initialize
  cashflows = []
  returns_halfyr = common.init_array_dict(fund_names)
  returns_annual = common.init_array_dict(fund_names)
  units_dict_halfyr = common.init_dict(fund_names)
  units_dict_annual = common.init_dict(fund_names)
  units_dict_overall = common.init_dict(fund_names)
  
  # remove header line
  del nav_data[0]

  # compute cashflows and returns
  cnt = len(nav_data)
  for i in range(0, cnt):

    row_data = nav_data[i].split(',')
    dt = datetime.strptime(row_data[0], '%d-%m-%Y')
    fund_nav = row_data[1:]
    fund_nav_dict = common.get_fund_nav_dict(fund_names, fund_nav)
      
    # half-yearly returns for each fund
    if i % 6 == 0 and i > 0:
      wealth = common.get_fund_wealth(fund_nav_dict, units_dict_halfyr)
      for fund in fund_names:
        cashflows_halfyr = cashflows[i-6:i] # slice last 6 months cashflows
        cf = (dt, wealth[fund])
        cashflows_halfyr.append(cf)
        ret = common.xirr(cashflows_halfyr)
        returns_halfyr[fund].append(ret)

      # clean up for next pass
      units_dict_halfyr = common.init_dict(fund_names)

    # annual returns for each fund
    if i % 12 == 0 and i > 0:
      wealth = common.get_fund_wealth(fund_nav_dict, units_dict_annual)
      for fund in fund_names:
        cashflows_annual = cashflows[i-12:i] # slice last 12 months cashflows
        cf = (dt, wealth[fund])
        cashflows_annual.append(cf)
        ret = common.xirr(cashflows_annual)
        returns_annual[fund].append(ret)
      
      # clean up for next pass
      units_dict_annual = common.init_dict(fund_names)
    
    # no investment on the last date
    if i == cnt - 1:
      break
    
    # invested units
    for fund in fund_names:
      nav = fund_nav_dict[fund]
      units = mnt_inv / nav
      units_dict_halfyr[fund] += units
      units_dict_annual[fund] += units
      units_dict_overall[fund] += units
    
    # cash outflow
    cf = (dt, -mnt_inv)
    cashflows.append(cf)
  
  file_data = []
  
  header_line = \
    'Fund,Investment,Wealth,Absolute Return,Annualized Return,' + \
    'Half-Yr Return Mean,Half-Yr Return Std Dev,Half-Yr Sharpe,' + \
    'Annual Return Mean,Annual Return Std Dev,Annual Sharpe'
  file_data.append(header_line)
    
  # total investment
  num_inv = len(cashflows)
  total_inv = num_inv * mnt_inv
  
  # final wealth
  nav_line = nav_data[cnt - 1].split(',')[1:]
  fund_nav_dict = common.get_fund_nav_dict(fund_names, nav_line)
  wealth = common.get_fund_wealth(fund_nav_dict, units_dict_overall)
  
  # performance stats for each fund
  last_date = nav_data[cnt - 1].split(',')[0]
  dt = datetime.strptime(last_date, '%d-%m-%Y')
  for fund in sorted(fund_names):
    fund_cashflows = cashflows[:]
    cf = (dt, wealth[fund])
    fund_cashflows.append(cf)
    abs_return = ((wealth[fund] / total_inv) - 1)
    ann_return = common.xirr(fund_cashflows)
    
    hfr = returns_halfyr[fund]
    halfyr_rf_rate = common.get_rf_rate('half-yearly')
    halfyr_return_mean = numpy.mean(hfr)
    halfyr_return_std = numpy.std(hfr)
    halfyr_sharpe = common.get_sharpe_ratio(hfr, halfyr_rf_rate)

    afr = returns_annual[fund]
    annual_rf_rate = common.get_rf_rate('annual')
    annual_return_mean = numpy.mean(afr)
    annual_return_std = numpy.std(afr)
    annual_sharpe = common.get_sharpe_ratio(afr, annual_rf_rate)
    
    line_data = \
      fund + ',' + str(total_inv) + ',' + str(wealth[fund]) + ',' + \
      str(abs_return) + ',' + str(ann_return) + ',' + \
      str(halfyr_return_mean) + ',' + str(halfyr_return_std) + ',' + \
      str(halfyr_sharpe) + ',' + str(annual_return_mean) + ',' + \
      str(annual_return_std) + ',' + str(annual_sharpe)
    file_data.append(line_data)

  regular_sip_file = os.path.join(data_dir, regular_sip_file_name)
  common.write_to_file(regular_sip_file, file_data)
Esempio n. 37
0
def run(nav_file, rank_file):
    """
  Generates return statistics based on sharpe ratio ranking data.
  """

    # create data directory
    common.create_dir(data_dir)

    # read data files
    nav_data = common.read_from_file(nav_file)
    rank_data = common.read_from_file(rank_file)

    # remove redundant entries in nav_data
    target_date = rank_data[1].split(',')[0]
    common.trim_data(nav_data, target_date)
    assert len(nav_data) == len(rank_data)

    # retrieve fund names
    # the first column (date) is skipped
    fund_names = nav_data[0].split(',')[1:]

    # initialize
    cashflows_halfyr = []
    cashflows_annual = []
    cashflows_overall = []
    returns_halfyr = []
    returns_annual = []
    units_dict_halfyr = common.init_dict(fund_names)
    units_dict_annual = common.init_dict(fund_names)
    units_dict_overall = common.init_dict(fund_names)

    # remove header line
    del nav_data[0]
    del rank_data[0]

    # compute cashflows and returns
    cnt = len(nav_data)
    for i in range(0, cnt):

        (date, fund, nav) = rank_data[i].split(',')
        dt = datetime.strptime(date, '%d-%m-%Y')

        # half-yearly returns
        if i % 6 == 0 and i > 0:
            nav_line = nav_data[i].split(',')[1:]
            fund_nav_dict = common.get_fund_nav_dict(fund_names, nav_line)
            wealth = get_wealth(fund_nav_dict, units_dict_halfyr)
            cf = (dt, wealth)
            cashflows_halfyr.append(cf)
            ret = common.xirr(cashflows_halfyr)
            returns_halfyr.append(ret)

            # clean up for next pass
            del cashflows_halfyr[:]
            units_dict_halfyr[f] = common.init_dict(fund_names)

        # annual returns
        if i % 12 == 0 and i > 0:
            nav_line = nav_data[i].split(',')[1:]
            nav_dict = common.get_fund_nav_dict(fund_names, nav_line)
            wealth = get_wealth(nav_dict, units_dict_annual)
            cf = (dt, wealth)
            cashflows_annual.append(cf)
            ret = common.xirr(cashflows_annual)
            returns_annual.append(ret)

            # clean up for next pass
            del cashflows_annual[:]
            units_dict_annual[f] = common.init_dict(fund_names)

        # no investment on the last date
        if i == cnt - 1:
            break

        # units invested
        units = mnt_inv / float(nav)
        units_dict_halfyr[fund] += units
        units_dict_annual[fund] += units
        units_dict_overall[fund] += units

        # cash outflow
        cf = (dt, -mnt_inv)
        cashflows_halfyr.append(cf)
        cashflows_annual.append(cf)
        cashflows_overall.append(cf)

    file_data = []

    # investment details
    file_data.append('Investment Details')
    file_data.append('Fund,Units')
    for f in units_dict_overall:
        if units_dict_overall[f] > 0:
            line_data = f + ',' + str(units_dict_overall[f])
            file_data.append(line_data)
    file_data.append('\n')

    # total investment
    num_inv = len(cashflows_overall)
    total_inv = num_inv * mnt_inv
    file_data.append('Investment,' + str(total_inv))

    # final wealth
    nav_line = nav_data[cnt - 1].split(',')[1:]
    fund_nav_dict = common.get_fund_nav_dict(fund_names, nav_line)
    wealth = get_wealth(fund_nav_dict, units_dict_overall)
    file_data.append('Wealth,' + str(wealth))

    # absolute return
    abs_return = ((wealth / total_inv) - 1)
    file_data.append('Absolute Return,' + str(abs_return))

    # annualized return
    last_date = nav_data[cnt - 1].split(',')[0]
    dt = datetime.strptime(last_date, '%d-%m-%Y')
    cf = (dt, wealth)
    cashflows_overall.append(cf)
    annual_return = common.xirr(cashflows_overall)
    file_data.append('Annualized Return,' + str(annual_return))

    file_data.append('\n')
    file_data.append('Stats,Mean,Std Deviation, Sharpe Ratio')

    # half-yearly return stats
    halfyr_rf_rate = common.get_rf_rate('half-yearly')
    halfyr_mean = numpy.mean(returns_halfyr)
    halfyr_stdev = numpy.std(returns_halfyr)
    halfyr_sharpe = common.get_sharpe_ratio(returns_halfyr, halfyr_rf_rate)
    file_data.append('Half-Yearly,' + str(halfyr_mean) + ',' +
                     str(halfyr_stdev) + ',' + str(halfyr_sharpe))

    # annual return stats
    annual_rf_rate = common.get_rf_rate('annual')
    annual_mean = numpy.mean(returns_annual)
    annual_stdev = numpy.std(returns_annual)
    annual_sharpe = common.get_sharpe_ratio(returns_annual, annual_rf_rate)
    file_data.append('Annual,' + str(annual_mean) + ',' + str(annual_stdev) +
                     ',' + str(annual_sharpe))

    # save stats to file
    benchmark_file = os.path.join(data_dir, benchmark_file_name)
    common.write_to_file(benchmark_file, file_data)
import os
from settings import PATH, HIVE_SCRIPT_PATH, HIVE_DB, HDFS_PATH, HIVE_QUERY_SCRIPTS_PATH
from common import create_dir, color, get_files_in_dir_with_extension
        
#FILES = [file for file in os.listdir(HIVE_SCRIPT_PATH)]
#DATA_FILES = [ thing for thing in FILES if os.path.isfile(HIVE_SCRIPT_PATH+"/"+thing) and thing.endswith('.hive')]
DATA_FILES = get_files_in_dir_with_extension(HIVE_SCRIPT_PATH, '.hive')

if not DATA_FILES:
    print color.RED + "No hive files found in directory "+PATH+ color.END
    sys.exit(0)

#Hive variables
HIVE_SETUP="use "+HIVE_DB+"; set hive.cli.print.header=true; "

create_dir(HIVE_QUERY_SCRIPTS_PATH)

def generate_hive_queries(fields, tablename):
    #Create dir where we will write the results
    RESULTS_DIR=HIVE_QUERY_SCRIPTS_PATH+'/'+tablename
    create_dir(RESULTS_DIR)
    query_file = open( HIVE_QUERY_SCRIPTS_PATH + '/' + tablename + '.sh', "w")
    query_file.write("#!/bin/bash \n")
    file_name=RESULTS_DIR+'/'+tablename+'.txt'
    to_execute = "hive -e \""+HIVE_SETUP+" select count(*) as all_rows from "+tablename+";\" > "+file_name+'\n';
    to_execute+="echo \"----\" >>"+file_name+'\n';
    to_execute += "hive -e \""+HIVE_SETUP+" describe "+tablename+";\" > "+file_name+'\n';
    query_file.write(to_execute)
    for field in fields:
        column_name=field[0]
        file_name=RESULTS_DIR+'/'+column_name+'.txt'
Esempio n. 39
0
def lambda_handler(event, context):
    s3 = boto3.resource("s3")
    s3_client = boto3.client("s3")
    sns_client = boto3.client("sns")

    # Get some environment variables
    ENV = os.getenv("ENV", "")
    EVENT_SOURCE = os.getenv("EVENT_SOURCE", "S3")

    start_time = get_timestamp()
    print("Script starting at %s\n" % (start_time))
    s3_object = event_object(event, event_source=EVENT_SOURCE)

    if str_to_bool(AV_PROCESS_ORIGINAL_VERSION_ONLY):
        verify_s3_object_version(s3, s3_object)

    # Publish the start time of the scan
    if AV_SCAN_START_SNS_ARN not in [None, ""]:
        start_scan_time = get_timestamp()
        sns_start_scan(sns_client, s3_object, AV_SCAN_START_SNS_ARN, start_scan_time)

    file_path = get_local_path(s3_object, "/tmp")
    create_dir(os.path.dirname(file_path))
    s3_object.download_file(file_path)

    to_download = clamav.update_defs_from_s3(
        s3_client, AV_DEFINITION_S3_BUCKET, AV_DEFINITION_S3_PREFIX
    )

    for download in to_download.values():
        s3_path = download["s3_path"]
        local_path = download["local_path"]
        print("Downloading definition file %s from s3://%s" % (local_path, s3_path))
        s3.Bucket(AV_DEFINITION_S3_BUCKET).download_file(s3_path, local_path)
        print("Downloading definition file %s complete!" % (local_path))

    # calculate the md5 of the virus defintion files
    definition_md5 = clamav.get_definition_md5()

    # check the file for an existing defintion md5 hash
    s3_definition_md5 = clamav.md5_from_s3_tags(s3_client, s3_object.bucket_name, s3_object.key, AV_DEFINITION_MD5_METADATA)

    # skip if there is a match
    if definition_md5 == s3_definition_md5:
        print("Not scanning because local defintion md5 matches s3 defintion md5.")
        return

    # Set AV_STATUS_SKIPPED if file exceeds maximum file size
    s3_object_size_result = check_s3_object_size(s3, s3_object)
    if s3_object_size_result == AV_STATUS_SKIPPED:
        scan_result = s3_object_size_result
        scan_signature = AV_SIGNATURE_UNKNOWN
    else:
        scan_result, scan_signature = clamav.scan_file(file_path)

    print(
        "Scan of s3://%s resulted in %s\n"
        % (os.path.join(s3_object.bucket_name, s3_object.key), scan_result)
    )

    result_time = get_timestamp()
    # Set the properties on the object with the scan results
    if "AV_UPDATE_METADATA" in os.environ:
        # AV_UPDATE_METADATA doesn't seem to be set anywhere - likely cant get here
        set_av_metadata(s3_object, scan_result, scan_signature, result_time)
    set_av_tags(s3_client, s3_object, scan_result, scan_signature, result_time, definition_md5)

    # Publish the scan results
    if AV_STATUS_SNS_ARN not in [None, ""]:
        sns_scan_results(
            sns_client,
            s3_object,
            AV_STATUS_SNS_ARN,
            scan_result,
            scan_signature,
            result_time,
        )

    metrics.send(
        env=ENV, bucket=s3_object.bucket_name, key=s3_object.key, status=scan_result
    )
    # Delete downloaded file to free up room on re-usable lambda function container
    try:
        os.remove(file_path)
    except OSError:
        pass
    if str_to_bool(AV_DELETE_INFECTED_FILES) and scan_result == AV_STATUS_INFECTED:
        delete_s3_object(s3_object)
    stop_scan_time = get_timestamp()
    print("Script finished at %s\n" % stop_scan_time)