def get_sql_conn(): db_info = configuration.get_config()['db'] db_host = db_info['dbhost'] db_user = db_info['dbuser'] db_password = db_info['dbpassword'] connection = pymysql.connect(host=db_host, user=db_user, password=db_password) return connection
def log_info(message, params=None): print_message = message if params is None else message % params print(print_message) is_aws = os.environ.get("AWS_EXECUTION_ENV") is not None if not is_aws: log_file_name = configuration.get_config()['logging']['logFileName'] logs = open(log_file_name, 'a') logs.write(print_message)
def log_error(error_message, params=None): print_error = error_message if params is None else error_message % params print(print_error) is_aws = os.environ.get("AWS_EXECUTION_ENV") is not None if not is_aws: errors_file_name = configuration.get_config( )['logging']['errorFileName'] errors = open(errors_file_name, 'a') errors.write(print_error)
def run(): logging.log_info('Starting scraping data from boxofficemojo.com...\n\n') tasks = create_tasks_from_config() config = configuration.get_config() if config['execution'] is not None: if config['execution']['purgeExistingData'] is not None: if config['execution']['purgeExistingData'] == 'True': clear_all_tables() if config['execution']['executionMode'] is not None: execution_mode = ExecutionMode[config['execution']['executionMode']] filtered_tasks = list(filter(lambda t: t.executionMode == execution_mode, tasks)) run_tasks(filtered_tasks) else: run_tasks(tasks) logging.log_info('Finished scraping data from boxofficemojo.com.')
def get_data_file_directory(): data_info = configuration.get_config()['data'] is_aws = os.environ.get("AWS_EXECUTION_ENV") is not None directory = data_info['awsDir'] if is_aws else data_info['localDir'] return directory