def do_download_mainconfig(self, source_maincfg_path, temp_folder, dest_maincfg_filename): tmp_main_config_path = os.path.join(temp_folder, dest_maincfg_filename) if os.path.exists(tmp_main_config_path): os.remove(tmp_main_config_path) logger.info('INFO - start copying s3 main config - %s' % source_maincfg_path) download_from_s3(source_maincfg_path, tmp_main_config_path) logger.info('INFO - end copying s3 main config - %s' % source_maincfg_path) return tmp_main_config_path
def copy_configs(self, cfg): s3_data_config_path = "s3://eternity02.deployment/lambda/data-collector-repo-sofr-app/config/s3_data_cfg.json" tmp_data_config_path = os.path.join(cfg["data_config_path"], "data_cfg.json") if os.path.exists(tmp_data_config_path): os.remove(tmp_data_config_path) logger.info('INFO - start copying data config - %s to %s ' % (s3_data_config_path, tmp_data_config_path)) download_from_s3(s3_data_config_path, tmp_data_config_path) logger.info('INFO - end copying data config - %s to %s ' % (s3_data_config_path, tmp_data_config_path)) s3_data_schema_path = "s3://eternity02.deployment/lambda/data-collector-repo-sofr-app/schema/s3_sofr_schema.json" tmp_data_schema_path = os.path.join(cfg["schema_path"], "sofr_schema.json") if os.path.exists(tmp_data_schema_path): os.remove(tmp_data_schema_path) logger.info('INFO - start copying data schema - %s to %s ' % (s3_data_schema_path, tmp_data_schema_path)) download_from_s3(s3_data_schema_path, tmp_data_schema_path) logger.info('INFO - end copying data schema - %s to %s ' % (s3_data_schema_path, tmp_data_schema_path))
def do_copy_configs(self, cfg, temp_folder, source_datacfg_path, source_schema_path, dest_datacfg_filename, dest_schema_filename): tmp_data_config_path = os.path.join(temp_folder, cfg["data_config_path"], dest_datacfg_filename) if os.path.exists(tmp_data_config_path): os.remove(tmp_data_config_path) logger.info('INFO - start copying data config - %s to %s ' % (source_datacfg_path, tmp_data_config_path)) download_from_s3(source_datacfg_path, tmp_data_config_path) logger.info('INFO - end copying data config - %s to %s ' % (source_datacfg_path, tmp_data_config_path)) tmp_data_schema_path = os.path.join(temp_folder, cfg["schema_path"], dest_schema_filename) if os.path.exists(tmp_data_schema_path): os.remove(tmp_data_schema_path) logger.info('INFO - start copying data schema - %s to %s ' % (source_schema_path, tmp_data_schema_path)) download_from_s3(source_schema_path, tmp_data_schema_path) logger.info('INFO - end copying data schema - %s to %s ' % (source_schema_path, tmp_data_schema_path))
import sys import os from manager.zip_manager import load_dependencies from manager.s3_manager import download_from_s3, upload_to_s3_repo_sofr download_path = "/tmp/data-collector-repo-sofr-app.zip" s3_path = "s3://eternity02.deployment/lambda/data-collector-repo-sofr-app/deployment/data-collector-repo-sofr-app.zip" download_from_s3(s3_path, download_path) load_dependencies(download_path) # sys.path.insert(0, "/tmp/data-collector-repo-sofr-app") # user_home = os.environ["HOME"] os.environ["PYTHONPATH"] = "/tmp/package:./" sys.path.append("/tmp/package") from logger import logger import json import requests import os from manager.dataframe_manager import read_dataframe from manager.excel_manager import write_to_excel from manager.avro_manager import convert_to_avro # from manager.config_manager import ConfigManager from manager.base_config_manager import BaseConfigManager from manager.template_helper import recover_string_template from manager.crawl_manager import Crawler # temp_folder = "/tmp" # source_maincfg_path = "s3://eternity02.deployment/lambda/data-collector-repo-sofr-app/config/s3_main_cfg.json" # source_datacfg_path = "s3://eternity02.deployment/lambda/data-collector-repo-sofr-app/config/s3_data_cfg.json" # source_schema_path = "s3://eternity02.deployment/lambda/data-collector-repo-sofr-app/schema/s3_sofr_schema.json" # dest_maincfg_filename = "main_cfg.json"
def __init__(self, eventobj, is_runningon_s3=False): self.cfgobj = defaultdict() if is_runningon_s3: s3_main_config_path = "s3://eternity02.deployment/lambda/data-collector-repo-sofr-app/config/s3_main_cfg.json" tmp_main_config_path = "/tmp/main_cfg.json" if os.path.exists(tmp_main_config_path): os.remove(tmp_main_config_path) logger.info('INFO - start copying s3 config - %s' % s3_main_config_path) download_from_s3(s3_main_config_path, tmp_main_config_path) general_cfg_path = tmp_main_config_path logger.info('INFO - end copying s3 config - %s' % s3_main_config_path) else: local_main_config_path = "main_cfg.json" logger.info('INFO - start copying local config - %s' % local_main_config_path) general_cfg_path = local_main_config_path logger.info('INFO - end copying local config - %s' % local_main_config_path) try: logger.info("INFO - start loading main config - %s" % general_cfg_path) config_obj = self.load_cfg(general_cfg_path) self.cfgobj.update(config_obj) logger.info("INFO - end loading main config - %s" % general_cfg_path) except Exception as err: logger.error("ERROR - fail to load main config - %s" % general_cfg_path) logger.error(err) raise err if is_runningon_s3: self.prepare_tmp_paths(self.cfgobj) self.copy_configs(self.cfgobj) try: data_cfg_path = os.path.join(self.cfgobj["data_config_path"], self.cfgobj["source_config_filename"]) logger.info("INFO - start loading data config - %s" % data_cfg_path) config_obj = self.load_cfg(data_cfg_path, is_datasource_cfg=True) self.cfgobj.update(config_obj) logger.info("INFO - end loading data config - %s" % data_cfg_path) except Exception as err: logger.error("ERROR - fail to load data config - %s" % data_cfg_path) logger.error(err) raise err try: logger.info("INFO - start loading event config - %s" % eventobj) self.load_eventobj(eventobj, self.cfgobj) logger.info("INFO - end loading event config - %s" % eventobj) except Exception as err: logger.error("ERROR - fail to load event datetime - %s" % eventobj) logger.error(err) raise err self.cfgobj['url'] = self.recover_string_template( self.cfgobj, "url_template") self.cfgobj['xls_filename'] = self.recover_string_template( self.cfgobj, "xls_filename_template") self.cfgobj['avro_filename'] = self.recover_string_template( self.cfgobj, "avro_filename_template")