コード例 #1
0
 def do_download_mainconfig(self, source_maincfg_path, temp_folder,
                            dest_maincfg_filename):
     tmp_main_config_path = os.path.join(temp_folder, dest_maincfg_filename)
     if os.path.exists(tmp_main_config_path):
         os.remove(tmp_main_config_path)
     logger.info('INFO - start copying s3 main config - %s' %
                 source_maincfg_path)
     download_from_s3(source_maincfg_path, tmp_main_config_path)
     logger.info('INFO - end copying s3 main config - %s' %
                 source_maincfg_path)
     return tmp_main_config_path
コード例 #2
0
    def copy_configs(self, cfg):
        s3_data_config_path = "s3://eternity02.deployment/lambda/data-collector-repo-sofr-app/config/s3_data_cfg.json"
        tmp_data_config_path = os.path.join(cfg["data_config_path"],
                                            "data_cfg.json")
        if os.path.exists(tmp_data_config_path):
            os.remove(tmp_data_config_path)
        logger.info('INFO - start copying data config - %s to %s ' %
                    (s3_data_config_path, tmp_data_config_path))
        download_from_s3(s3_data_config_path, tmp_data_config_path)
        logger.info('INFO - end copying data config - %s to %s ' %
                    (s3_data_config_path, tmp_data_config_path))

        s3_data_schema_path = "s3://eternity02.deployment/lambda/data-collector-repo-sofr-app/schema/s3_sofr_schema.json"
        tmp_data_schema_path = os.path.join(cfg["schema_path"],
                                            "sofr_schema.json")
        if os.path.exists(tmp_data_schema_path):
            os.remove(tmp_data_schema_path)
        logger.info('INFO - start copying data schema - %s to %s ' %
                    (s3_data_schema_path, tmp_data_schema_path))
        download_from_s3(s3_data_schema_path, tmp_data_schema_path)
        logger.info('INFO - end copying data schema - %s to %s ' %
                    (s3_data_schema_path, tmp_data_schema_path))
コード例 #3
0
    def do_copy_configs(self, cfg, temp_folder, source_datacfg_path,
                        source_schema_path, dest_datacfg_filename,
                        dest_schema_filename):
        tmp_data_config_path = os.path.join(temp_folder,
                                            cfg["data_config_path"],
                                            dest_datacfg_filename)
        if os.path.exists(tmp_data_config_path):
            os.remove(tmp_data_config_path)
        logger.info('INFO - start copying data config - %s to %s ' %
                    (source_datacfg_path, tmp_data_config_path))
        download_from_s3(source_datacfg_path, tmp_data_config_path)
        logger.info('INFO - end copying data config - %s to %s ' %
                    (source_datacfg_path, tmp_data_config_path))

        tmp_data_schema_path = os.path.join(temp_folder, cfg["schema_path"],
                                            dest_schema_filename)
        if os.path.exists(tmp_data_schema_path):
            os.remove(tmp_data_schema_path)
        logger.info('INFO - start copying data schema - %s to %s ' %
                    (source_schema_path, tmp_data_schema_path))
        download_from_s3(source_schema_path, tmp_data_schema_path)
        logger.info('INFO - end copying data schema - %s to %s ' %
                    (source_schema_path, tmp_data_schema_path))
コード例 #4
0
import sys
import os
from manager.zip_manager import load_dependencies
from manager.s3_manager import download_from_s3, upload_to_s3_repo_sofr

download_path = "/tmp/data-collector-repo-sofr-app.zip"
s3_path = "s3://eternity02.deployment/lambda/data-collector-repo-sofr-app/deployment/data-collector-repo-sofr-app.zip"
download_from_s3(s3_path, download_path)
load_dependencies(download_path)
# sys.path.insert(0, "/tmp/data-collector-repo-sofr-app")
# user_home = os.environ["HOME"]
os.environ["PYTHONPATH"] = "/tmp/package:./"
sys.path.append("/tmp/package")

from logger import logger
import json
import requests
import os
from manager.dataframe_manager import read_dataframe
from manager.excel_manager import write_to_excel
from manager.avro_manager import convert_to_avro
# from manager.config_manager import ConfigManager
from manager.base_config_manager import BaseConfigManager
from manager.template_helper import recover_string_template
from manager.crawl_manager import Crawler

# temp_folder = "/tmp"
# source_maincfg_path = "s3://eternity02.deployment/lambda/data-collector-repo-sofr-app/config/s3_main_cfg.json"
# source_datacfg_path = "s3://eternity02.deployment/lambda/data-collector-repo-sofr-app/config/s3_data_cfg.json"
# source_schema_path = "s3://eternity02.deployment/lambda/data-collector-repo-sofr-app/schema/s3_sofr_schema.json"
# dest_maincfg_filename = "main_cfg.json"
コード例 #5
0
    def __init__(self, eventobj, is_runningon_s3=False):
        self.cfgobj = defaultdict()
        if is_runningon_s3:
            s3_main_config_path = "s3://eternity02.deployment/lambda/data-collector-repo-sofr-app/config/s3_main_cfg.json"
            tmp_main_config_path = "/tmp/main_cfg.json"
            if os.path.exists(tmp_main_config_path):
                os.remove(tmp_main_config_path)
            logger.info('INFO - start copying s3 config - %s' %
                        s3_main_config_path)
            download_from_s3(s3_main_config_path, tmp_main_config_path)
            general_cfg_path = tmp_main_config_path
            logger.info('INFO - end copying s3 config - %s' %
                        s3_main_config_path)
        else:
            local_main_config_path = "main_cfg.json"
            logger.info('INFO - start copying local config - %s' %
                        local_main_config_path)
            general_cfg_path = local_main_config_path
            logger.info('INFO - end copying local config - %s' %
                        local_main_config_path)

        try:
            logger.info("INFO - start loading main config - %s" %
                        general_cfg_path)
            config_obj = self.load_cfg(general_cfg_path)
            self.cfgobj.update(config_obj)
            logger.info("INFO - end loading main config - %s" %
                        general_cfg_path)
        except Exception as err:
            logger.error("ERROR - fail to load main config - %s" %
                         general_cfg_path)
            logger.error(err)
            raise err

        if is_runningon_s3:
            self.prepare_tmp_paths(self.cfgobj)
            self.copy_configs(self.cfgobj)

        try:
            data_cfg_path = os.path.join(self.cfgobj["data_config_path"],
                                         self.cfgobj["source_config_filename"])
            logger.info("INFO - start loading data config - %s" %
                        data_cfg_path)
            config_obj = self.load_cfg(data_cfg_path, is_datasource_cfg=True)
            self.cfgobj.update(config_obj)
            logger.info("INFO - end loading data  config - %s" % data_cfg_path)
        except Exception as err:
            logger.error("ERROR - fail to load data config - %s" %
                         data_cfg_path)
            logger.error(err)
            raise err

        try:
            logger.info("INFO - start loading event config - %s" % eventobj)
            self.load_eventobj(eventobj, self.cfgobj)
            logger.info("INFO - end loading event config - %s" % eventobj)
        except Exception as err:
            logger.error("ERROR - fail to load event datetime - %s" % eventobj)
            logger.error(err)
            raise err

        self.cfgobj['url'] = self.recover_string_template(
            self.cfgobj, "url_template")
        self.cfgobj['xls_filename'] = self.recover_string_template(
            self.cfgobj, "xls_filename_template")
        self.cfgobj['avro_filename'] = self.recover_string_template(
            self.cfgobj, "avro_filename_template")