def getLogger(name=None): #configure logging LoggerManager._loggers[name] = logging.getLogger(name) LoggerManager._loggers[name].setLevel(logging.INFO) if logging_dest == 'mysql': db = { 'host': mysql_hostname, 'port': 3306, 'dbuser': mysql_username, 'dbpassword': mysql_password, 'dbname': mysql_database } sqlh = mySQLHandler.mySQLHandler(db) LoggerManager._loggers[name].addHandler(sqlh) else: fileh = logging.FileHandler('actions.log') fileh.setFormatter( logging.Formatter( '%(asctime)s - %(name)s - %(module)s - %(message)s')) LoggerManager._loggers[name].addHandler(fileh) if sentry: sentryh = SentryHandler(sentry) sentryh.setLevel(logging.ERROR) setup_logging(sentryh) requests_log = logging.getLogger("requests") requests_log.setLevel(logging.WARNING) return LoggerManager._loggers[name]
def init_mysql_handler(app, level): # logger setting import logging import mySQLHandler logger = logging.getLogger('wuaiwow') logger.setLevel(logging.DEBUG if app.debug else logging.WARNING) handler = mySQLHandler.mySQLHandler(db=app.config["DB"]) handler.setLevel(level) app.logger.addHandler(handler) return logger
def getLogger(name=None): #configure logging LoggerManager._loggers[name] = logging.getLogger(name) LoggerManager._loggers[name].setLevel(logging.INFO) if logging_dest == 'mysql': db = {'host':mysql_hostname, 'port':3306, 'dbuser':mysql_username, 'dbpassword':mysql_password, 'dbname':mysql_database} sqlh = mySQLHandler.mySQLHandler(db) LoggerManager._loggers[name].addHandler(sqlh) else: fileh = logging.FileHandler('actions.log') fileh.setFormatter(logging.Formatter('%(asctime)s - %(module)s - %(message)s')) LoggerManager._loggers[name].addHandler(fileh) requests_log = logging.getLogger("requests") requests_log.setLevel(logging.WARNING) return LoggerManager._loggers[name]
def main(): start_time = timer() parser = optparse.OptionParser(usage="usage: %prog [options values]", version="%prog 2.0") parser.add_option('-d', '--dbname', help='Database of Table to be fork lifted into Hive', dest='db_name') parser.add_option('-t', '--table', help='Table name to be fork lifted into Hive', dest='table_name') parser.add_option( '-f', '--fromDate', help= 'Optional: Date used to filter rows from source and also in name of extract file', dest='fromDate') parser.add_option( '-e', '--endDate', help='Optional: End date used to filter rows, from source, for a range' 'Passing this fetches rows between start and end dates, inclusive of the dates passed', dest='endDate', default=None) parser.add_option( '-o', '--operator', help= 'Optional: Operator to be used to Filter rows. Possible values: >, >=, <, <=, =, <>' 'Used only when a single date value is passed.', dest='operator', default='=') parser.add_option( '-r', '--runSwitch', help='Optional: Use this to run script with limited functionality. ' 'Possible values: G to Generate Scripts ' '\t\t\t E to Execute Data Extraction scripts' '\t\t\t L to Execute Load to Target scripts' '\t\t\t Q to Execute Data Quality Check scripts' '\t\t\t GELQ (default) does both of the above', dest='runSwitch', default='GEL') parser.add_option( '-p', '--persistExtract', help='Optional: Used only for Dev or Debugging' 'Set to N by default, will be set to Y only during dev calls so extracts generated are not lost', dest='persistExtract', default='N') current_os = system() if current_os != "Windows": dir_sep = "/" # utilities.check_if_running('extract2Hive') else: dir_sep = "\\" # ---------------------------------------- Parse Arguments ------------------------------------------- (opts, args) = parser.parse_args() if (opts.db_name is None) or (opts.table_name is None) or (opts.fromDate is None): parser.print_help() exit(-1) # ---------------------------------------- Start Logging ------------------------------------------- base_dir = os.path.dirname(os.path.dirname( os.path.realpath(__file__))) + dir_sep log_dir = os.path.expanduser('~') + dir_sep + "log" + dir_sep if not os.path.exists(log_dir): os.makedirs(log_dir) log_filename = log_dir + opts.db_name + "." + opts.table_name + "." + opts.fromDate + "." + \ datetime.now().strftime("%Y%m%d%H%M%S") + ".log" logging.basicConfig(filename=log_filename, filemode='w', level=logging.INFO) logging.info("Logging started") utilities.print_info("Log Directory: " + log_filename) utilities.print_info("Base Directory is " + base_dir) # col_meta_file = base_dir + "config" + dir_sep + "ops_metadata_tbl_cols.txt" # tbl_meta_file = base_dir + "config" + dir_sep + "ops_metadata_tbl.txt" utilities.print_info("Current OS: " + current_os) # utilities.print_info("Table Metadata File: " + tbl_meta_file) # utilities.print_info("Column Metadata File: " + col_meta_file) # ---------------------------------------- Validate arguments ------------------------------------------- if opts.db_name is None: utilities.abort_with_msg("Please provide Source Database Name") if opts.table_name is None: utilities.abort_with_msg("Please provide Source Table/View Name") if opts.fromDate is None: utilities.abort_with_msg( "Please provide Date or Date Range to fetch data") operators_to_check = [">", ">=", "<", "<=", "=", "<>"] if opts.operator != '': if not any(oper in opts.operator for oper in operators_to_check): parser.print_help() utilities.abort_with_msg( "Not a valid operator to build a condition") legal_switch = [ "G", "GE", "GEL", "GL", "Q", "GQ", "GLQ", "GELQ", "GF", "GEF", "GELF", "GLF", "GQF", "GELQF", "GFE", "GEFL", "GEFLQ", "GEQLF", "GFEL" ] if opts.runSwitch.upper() not in legal_switch: # if not any(switch == opts.runSwitch for switch in legal_switch): parser.print_help() utilities.abort_with_msg( "Not a valid runSwitch. Valid switch combinations: " + str(legal_switch)) if "G" not in opts.runSwitch and opts.runSwitch is not None: parser.print_help() utilities.abort_with_msg( "Cannot execute Extraction, Load or Quality check without script generation. " + "Include G in switch") # -------------------- Store values into Class variable to avoid passing around -------------------------------- LocalValues.db_nm = opts.db_name.strip().upper() LocalValues.tbl_nm = opts.table_name.strip().upper() LocalValues.date_to_process = opts.fromDate LocalValues.mysql_prop_file = base_dir + 'common' + dir_sep + 'ENV.mySQL.properties' LocalValues.process_start = datetime.now() print("Prop file: " + LocalValues.mysql_prop_file) # mySQL_helper(log_filename, LocalValues.mysql_prop_file) utilities.mySQLhdr = mySQLHandler(LocalValues.mysql_prop_file) utilities.GlobalValues.mySQLHandler_instance = mySQLHandler( LocalValues.mysql_prop_file) utilities.GlobalValues.epv_aim_file = base_dir + 'common' + dir_sep + 'ENV.epvaim.properties' # ---------------------------------------- Read Metadata into Arrays ------------------------------------------- # Fetch Table attributes for Filtering, Partition & Distribution mysql_meta = mySQLMetadata(LocalValues.mysql_prop_file) tbl_meta = mysql_meta.read_table_metadata(sourceDB=LocalValues.db_nm, sourceTable=LocalValues.tbl_nm) col_meta = mysql_meta.readColumnMetadata(sourceDB=LocalValues.db_nm, sourceTable=LocalValues.tbl_nm) for key, val in sorted(tbl_meta.items()): utilities.print_info("\t\t" + key + " : " + str(val)) #for col_meta_row in col_meta: # utilities.print_info("\t\t" + "".join([str(tpl) for tpl in col_meta_row] )) # tbl_meta = utilities.read_file(arg_db_nm=LocalValues.db_nm, arg_tbl_nm=LocalValues.tbl_nm, # arg_input_file=tbl_meta_file, arg_content_type='TABLES') # # col_meta = utilities.read_file(arg_db_nm=LocalValues.db_nm, arg_tbl_nm=LocalValues.tbl_nm, # arg_input_file=col_meta_file, arg_content_type='COLS') # ---------------------------------------- Generate Scripts and DDL's ------------------------------------------- ext_time = 0 LocalValues.job_pid = 0 # audit_action = 'insert' audit_action = set_audit_action() audit_log(audit_action, 'OPS_GEN_SCRIPT INITIALIZED') if "G" in opts.runSwitch: # Check for switch to Generate data extraction scripts LocalValues.job_pid = int(round(time() * 1000)) if tbl_meta[ 'delta_col']: # Check for metadata to determine how to fetch deltas l_extract_filter = utilities.build_extract_filter( arg_tbl_meta=tbl_meta, arg_from_val=opts.fromDate, arg_to_val=opts.endDate, arg_operator=opts.operator, run_switch=opts.runSwitch) else: utilities.print_warn( "No Condition Built for extract. Placeholder condition of 1=1 will be used" ) l_extract_filter = "1 = 1" # A bunch of Key Value Pairs used at the time of Running Scripts Generated # Extract column metadata attribute values col_filter_y = [] for i, col_meta_row in enumerate(col_meta): if col_meta_row[6][1] == 'Y': col_filter_y.append(col_meta_row) col_meta_values = [] for i, col_meta_row in enumerate(col_filter_y): col_meta_values.append([]) tuples = col_meta_row[2:len(col_meta_row)] for j, v in enumerate(tuples): col_meta_values[i].append(v[1]) print 'debug 1' helper_dict = utilities.gen_script_from_tmplt( arg_base_dir=base_dir, arg_tbl_meta=tbl_meta, arg_col_meta=col_meta_values, arg_dir_sep=dir_sep, arg_xtract_filter=l_extract_filter, arg_from_val=opts.fromDate, arg_to_val=opts.endDate, arg_operator=opts.operator, run_switch=opts.runSwitch) audit_log(audit_action, 'GENERATE EXTRACT SCRIPT') print 'debug 1' gen_time = timer() utilities.print_info("Time taken to generate scripts " + str(gen_time - start_time) + " Seconds") rows = 0 # ---------------------------------------- Run all extraction Scripts ------------------------------------------- if "E" in opts.runSwitch: # Check for switch to execute data Extraction scripts audit_action = set_audit_action() audit_log(audit_action, 'START EXECUTE SOURCE DB EXTRACT SCRIPT', 0) extract_file, rows = utilities.run_extract( arg_tbl_meta=tbl_meta, arg_log_dir=log_dir, arg_passwd_file=base_dir + 'common' + dir_sep + 'ENV.scriptpwd.properties', arg_date_for_extract=opts.fromDate, arg_helper_dict=helper_dict) LocalValues.rowcount = rows audit_log(audit_action, 'COMPLETED EXECUTE SOURCE DB EXTRACT SCRIPT', rows, extract_file) # ---- V1 framework takes the extract and uses Shell scripts for further processing. Hence EXIT.----- if tbl_meta["v1_support"] == "Y": exit(0) # Run alter raw table to add partition utilities.run_hql(arg_script=helper_dict["alt_raw_tbl"], arg_mode="f") ext_time = timer() utilities.print_info("Time taken to extract from Source " + str(ext_time - gen_time) + " Seconds") if extract_file != '' and opts.persistExtract == 'N': utilities.run_shell_cmd( "rm -f " + extract_file) # Delete extract file, if exists # added on 2/2/2017 if tbl_meta["db_type"] == 'TERADATA': utilities.del_in_hdfs(tbl_meta["hdfs_basedir"] + "/" + tbl_meta["hdfs_extract_dir"] + "/" + os.path.basename(extract_file)) if "L" in opts.runSwitch: # Check for switch to execute data Load scripts audit_action = set_audit_action() # Run insert overwrite to refined tables utilities.run_hql(arg_script=helper_dict["ins_hive_rfnd_tbl"], arg_mode="f", arg_param=" --hiveconf inputsrcdt='" + opts.fromDate + "'") if '_RAW' in helper_dict["raw_hdfs_partition_location"].upper(): v_dirpath = tbl_meta["hdfs_raw_dir"] + dir_sep + tbl_meta[ "tgt_tbl"].lower() + "_raw" else: v_dirpath = tbl_meta["hdfs_raw_dir"] + dir_sep + tbl_meta[ "tgt_tbl"].lower() # v_part_col = helper_dict["ptn_col_list"][0] + '_PTN' if helper_dict["ptn_col_list"][0] is not None else None # LocalValues.raw_size, LocalValues.raw_replica_size = utilities.check_hdfs_space(arg_hdfs_base_path=v_dirpath, part_col="LOADDATE", from_dt=opts.fromDate) # LocalValues.refined_size, LocalValues.refined_replica_size = utilities.check_hdfs_space(arg_hdfs_base_path=tbl_meta["hdfs_refined_dir"] + dir_sep + tbl_meta["tgt_tbl"].lower(), part_col=v_part_col, from_dt=opts.fromDate, end_dt=opts.endDate ) LocalValues.raw_size, LocalValues.raw_replica_size = 0, 0 LocalValues.refined_size, LocalValues.refined_replica_size = 0, 0 # Table Metadata contains data retention value. if tbl_meta["hive_raw_retention"].strip( ) != '' and tbl_meta["hive_raw_retention"].strip() != '0': purgepartitions.purge_partition( dirpath=tbl_meta["hdfs_raw_dir"] + dir_sep + tbl_meta["tgt_tbl"].lower() + "_raw", retentiondays=tbl_meta["hive_raw_retention"], fromdt=opts.fromDate, ptncolnm="loaddate", hivedb=tbl_meta["stg_db"], hivetbl=tbl_meta["tgt_tbl"] + "_raw") if tbl_meta["hive_refined_retention"].strip( ) != '' and tbl_meta["hive_refined_retention"].strip() != '0': purgepartitions.purge_partition( dirpath=tbl_meta["hdfs_refined_dir"] + dir_sep + tbl_meta["tgt_tbl"].lower(), retentiondays=tbl_meta["hive_refined_retention"], fromdt=opts.fromDate, ptncolnm=helper_dict["ptn_col_list"][0] + '_PTN', hivedb=tbl_meta["tgt_db"], hivetbl=tbl_meta["tgt_tbl"]) audit_log(audit_action, 'EXECUTE DATA LOAD', rows) load_time = timer() utilities.print_info("Time taken to load to Hive refined table " + str(load_time - ext_time) + " seconds") utilities.print_info( "Time taken to generate,extract and load to hive " + str(load_time - start_time) + " seconds") if "Q" in opts.runSwitch: # Check for switch to execute data Quality scripts utilities.run_dq(arg_tbl_meta=tbl_meta, arg_log_dir=log_dir, from_date=opts.fromDate, end_date=opts.endDate, part_col=helper_dict["ptn_col_list"][0] + '_PTN', which_table="both") qual_time = timer() utilities.print_info("Time taken for generate till quality check " + str(qual_time - start_time) + " seconds") audit_action = 'update' audit_log(audit_action, 'COMPLETED SUCCESSFULLY', LocalValues.rowcount)
log.info("%s: %s" % (threadName, time.ctime(time.time()))) counter -= 1 logger = logging.getLogger('thread_example') logger.setLevel(logging.DEBUG) db = { 'host': 'localhost', 'port': 3306, 'dbuser': '******', 'dbpassword': '******', 'dbname': 'logger' } sqlh = mySQLHandler.mySQLHandler(db) logger.addHandler(sqlh) ############## Basic Threads logger.info("Entering Basic Threads") # Create new threads thread1 = myThread_basic(logger, 1, "Thread-1-Basic", 1) thread2 = myThread_basic(logger, 2, "Thread-2-Basic", 2) # Start new Threads thread1.start() thread2.start() logger.info("Exiting Basic Tasks") ############## Synchronizing Threads
else: queueLock.release() time.sleep(1) def print_log(log,threadName, delay, counter): while counter: time.sleep(delay) log.info("%s: %s" % (threadName, time.ctime(time.time()))) counter -= 1 logger = logging.getLogger('thread_example') logger.setLevel(logging.DEBUG) db = {'host':'localhost', 'port': 3306, 'dbuser':'******', 'dbpassword':'******', 'dbname':'logger'} sqlh = mySQLHandler.mySQLHandler(db) logger.addHandler(sqlh) ############## Basic Threads logger.info("Entering Basic Threads") # Create new threads thread1 = myThread_basic(logger,1, "Thread-1-Basic", 1) thread2 = myThread_basic(logger,2, "Thread-2-Basic", 2) # Start new Threads thread1.start() thread2.start() logger.info("Exiting Basic Tasks") ############## Synchronizing Threads
def main(): global base_dir global dir_sep start_time = timer() parser = optparse.OptionParser(usage="usage: %prog [options values]", version="%prog 2.0") parser.add_option('-f', '--FeedName', help='Name of the Feed that needs to be executed', dest=vFeedName) parser.add_option('-x', '--XMLName', help='Name of the XML that needs to be parsed and converted to XML', dest=vXMLName) parser.add_option('-s', '--StartDate', help='Date used to create the CSV file', dest='vFromDate', default=None) parser.add_option('-k', '--KeyColumn', help='Keycolumn based on which if a line needs to be written into CSV', dest='vKeyCol', default=None) parser.add_option('-l', '--OutputFileLocation', help='Ouptut File Location ', dest='vOutputDir', default=None) parser.add_option('-r', '--RootTagName', help='Root Tag Name where the search for the XML Tags should start. Give the most granular rooot tag from where the traversal should start', dest='vRoot', default=None) current_os = system() if current_os != "Windows": dir_sep = '/' else: dir_sep = '\\' (opts, args) = parser.parse_args() base_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) + dir_sep log_dir=LOGDIR if opts.vFeedName is None: parser.print_help() checkRC("99","Please provide Feed Name") if opts.vXMLName is None: parser.print_help() checkRC("99","Please provide XML File Name with Path") if opts.vFromDate is None: parser.print_help() checkRC("99","Please provide a From Date") if opts.vOutputDir is None: parser.print_help() checkRC("99","Please provide a Output Directory where the csv file should be placed") if opts.vRoot is None: parser.print_help() checkRC("99","Please provide a Root Tag Names else XML Parsong cannot be done") vOutputFileName = opts.vOutputDir + dir_sep + opts.vFeedName + "." + opts.vFromDate + ".csv" vLogFileName = log_dir + opts.vFeedName + "." + opts.vFromDate + "." + datetime.now().strftime("%Y%m%d%H%M%S") + ".log" logging.baseConfig(filename=vLogFileName, filemode='w', level=logging.DEBUG) logging.info("Logging Started") logging.info(datetime.now().strftime("%Y%m%d%H%M%S") + ": [[INFO]] Log FileName: " + vLogFileName) logging.info(datetime.now().strftime("%Y%m%d%H%M%S") + ": [[INFO]] XML Parsing Started) LocalValues.db_nm = "FILE" LocalValues.tbl_nm = opts.vFeedName.strip().upper() LocalValues.date_to_process = opts.vFromDate LocalValues.mysql_prop_file = base_dir + 'common' + dir_sep + 'ENV.mySQL.properties' LocalValues.process_start = datetime.now() print(datetime.now().strftime("%Y%m%d%H%M%S") + ": [[INFO]] Prop File: " + LocalValues.mysql_prop_file) utilities.mySQLhdr = mySQLHandler(LocalValues.mysql_prop_file) utilities.GlobalValues.mySQLHandler_instance = mySQLHandler(LocalValues.mysql_prop_file) mysql_meta = mySQLMetadata(LocalValues.mysql_prop_file) col_meta = mysql_meta.read_column_metadata(sourceDB=LocalValues, sourceTable=LocalValues.tbl_nm) audit_action = set_audit_action() audit_log(audit_action, "XML Parsubg Started") audit_action = set_audit_action() # Build the header list vTagList = [] for i, v in enumerate(col_meta): vTagList.append(v["SOURCECOLUMN"]) if vTagList is None: audit_log(audit_action, 'Header TagList Creation Failed') logging.info(datetime.now().strftime("%Y%m%d%H%M%S") + ": [[ERROR]] Header TagList Creation Failed") checkRC("99","Header TagList Creation Failed") vOutData = [] parser = etree.XMLParser(recover=True, remove_blank_text=True) root = etree.parse(opts.vXMLName, parser) print(str(type(root)) + ' ' + str(root)) if root is None: audit_log(audit_action, "Parsing FAiled") logging.info(datetime.now().strftime("%Y%m%d%H%M%S") + ": [[ERROR]] Parsing FAiled") checkRC("99","XML Parsing Failed") # add field names by copying tag_list vRootTag = ".//" + opts.vRoot vTags = root.findall(vRootTag) for i in vTags: vXMLTags = get_XML_data(i,opts.vKeyCol,vTagList) if vXMLTags: vOutData.append(vXMLTags) else: audit_log(audit_action, "Parsing FAiled") logging.info(datetime.now().strftime("%Y%m%d%H%M%S") + ": [[ERROR]] Parsing FAiled") checkRC("99","XML Parsing Failed") logging.info(datetime.now().strftime("%Y%m%d%H%M%S") + ": [[INFO]] XML Parsing Completed") logging.info(datetime.now().strftime("%Y%m%d%H%M%S") + ": [[INFO]] Starting CSV File Write: " + vOutputFileName) # Writing to a CSV file vOutFile = open(vOutputFileName, "wb") csv_writer = csv.writer(vOutFile, quoting=csv.QUOTE_MINIMAL) for row in vOutData: csv_writer.writerow(row) vOutFile.close() logging.info(datetime.now().strftime("%Y%m%d%H%M%S") + ": [[INFO]] Complete CSV File WRite: " + vOutputFileName) logging.info(datetime.now().strftime("%Y%m%d%H%M%S") + ": [[INFO]] Checking if the csv has any data. If not abort the process") vCnt = check_csv_cnt(vOutputFileName) LocalValues.rowcount = vCnt[0] logging.info(datetime.now().strftime("%Y%m%d%H%M%S") + ": [[INFO]] Remove the XML File from Edge Node") run_shell_cmd("rm -f -r " + opts.vXMLName, audit_action) audit_log(audit_action, 'XML Parsing Completed') if __name__ == '__main__': main()
def main(): start_time = timer() parser = optparse.OptionParser(usage="usage: %prog [options values]", version="%prog 3.6.3") parser.add_option('-d', '--dbname', help='Database of Table to be fork lifted into Hive', dest=db_name) parser.add_option('-t', '--table', help='Table name to be fork lifted into Hive', dest=table_name) parser.add_option('-f', '--fromDate', help='Optional: Data used to filter rows from source and also in name of extract file', dest='fromDate', default=None) parser.add_option('-e', '--endDate', help='OptionalL End date used to filter rows from source for a range' 'Passing this fetches rows between start and end dates, inclusive of the dates passed', dest='endDate'm default=None) parser.add_option('-o', '--operator', help='Optional: Operator to be used to filter rows. Possible values: >, >=, <, <=, =, <>' dest='operator' default='=') parser.add_option('-r', '--runSwitch', help='Optional: Use this to run script with limited functionality.' 'Possible Values: G for Generate Scripts' '\t\t\t E to execute Data Extraction Scripts' '\t\t\t L to execute Load to Target Scripts' '\t\t\t F to take complex query from SOURCE_SQL_TXT metadata' '\t\t\t Q to execute Data Quality Check scripts' '\t\t\t GELQ (default) does both of the above', dest='runSwitch', default='GEL') parser.add_option('-p','--persistExtract', help='Optional: Used only for DEV or Debugging' 'Set to N by default, will be set to Y only during dev calls to extract generated are not lost' dest='persistExtract', default=None) parser.add_option('-s', action='store_true', help='Optional: Override the ops_jobstat status check', dest='statOverride', default=None) parser.add_option('-c',action='store_true', help='Optional: Override for using HQLs from staging/deloy', dest='useStage', default=None) parser.add_option('-x','--extract', help='Custom extract file to override generated extract feature - Requires a part from ' 'Command Line or from metadata column SOURCE_SQL_TEXT', dest='extract_path', default=None) parser.add_option('-m','--mysqlprop', help='MySQL properties file override for switching to another set of meta tables', dest='mysql_prop', default=''), parser.add_option('-u','--usepostprocessor', help='Post Processor Attachmnent, accepts a switch 'Y' for default' 'processing or accepts a dictionary of post processor switch and their values' dest='postprocess_opts', default='') parser.add_option('-l','--landingpath', help='Custom Extract Landing path with filename to override generated extract landing path features. Requires a path from command line argument', dest='landing_path', default='') current_os = system() if current_os != "Windows": dir_sep = '/' else: dir_sep = '\\' (opts, args) = parser.parse_args() if opts.db_name is None or opts.table_name is None: parser.print_help() exit(-1) base_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) + dir_sep base_fid_path = os.path.expanduser('-') + dir_sep log_dir=LOGDIR if not os.path.exists(log_dir): os.makedirs(log_dir) if opts.FromDate is not None: LocalValues.log_filename = log_dir + opts.db_name + '.' + opts.table_name + '.' + opts.fromDate + '.' + \ datetime.now().strftime("%Y%m%d%H%M%S") + ".log" else: LocalValues.log_filename = log_dir + opts.db_name + '.' + opts.table_name + '.' + datetime.now().strftime("%Y%m%d") + '.' + \ datetime.now().strftime("%Y%m%d%H%M%S") + ".log" logging.baseConfig(filename=LocalValues.log_filename, filemode='w', level=logging.INFO) logging.info("Logging Started") utilities.print_info("framework version: " + LocalValues.framework_version) utilities.print_info("Log file from the current run: " + LocalValues.log_filename) utilities.print_info("Base Directory is " + base_dir) utilities.print_info("Current OS: " + current_os) if opts.db_name is None: utilities.abort_with_msg("Please provide Source Database Name") if opts.table_name is None: utilities.abort_with_msg("Please provide Source Table/View Name") if opts.fromDate is None: utilities.print_info("No Date Provided. Job Calendar will be used") operators_to_check = [">",">=", "<", "<=", "=", "<>"] if opts.operator != '': if not any(oper in opts.operator for oper in operators_to_check): parser.print_help() utilities.abort_with_msg("Not a valid operator to build a condition") legal_switch = ["G", "GE", "GEL", "GL", "GQ", "Q", "GLQ", "GELQ", "GF", "GEF", "GELF", "GLF", "GQF", "GELQF", "GFE", "GEFL", "GEFLQ", "GEQLF", "GFEL", "P"] if opts.runSwitch.upper() not in legal_switch: parser.print_help() utilities.abort_with_msg("Not a valid runSwitch. Valid Switch combinations: " + str(legal_switch)) if "G" not in opts.runSwitch and opts.runSwitch is not None and "jds_user" not in opts.db_name.lower(): parser.print_help() utilities.abort_with_msg("Cannot execute Extraction, Load or Quality check without script generation." + "Include G in switch") LocalValues.db_nm = opts.db_name.strip().upper() LocalValues.tbl_nm = opts.table_name.strip().upper() if opts.mysql_prop == '': LocalValues.mysql_prop_file = base_dir + 'common' + dir_sep + 'ENV.mySQL.properties' else: LocalValues.mysql_prop_file = opts.mysql_prop LocalValues.process_start = datetime.now() utilities.mySQLhdr = mySQLHandler(LocalValues.mysql_prop_file) utilities.GlobalValues.mySQLHandler_instance = mySQLHandler(LocalValues.mysql_prop_file) utilities.GlobalValues.epvaim_file = base_dir + 'common' + dir_sep + 'ENV.epvaim.properties' if "jds_user" not in LocalValues.db_nm.lower(): mysql_meta = mySQLMetadata(LocalValues.mysql_prop_file) tbl_meta = mysql_meta.read_table_metadata(sourceDB=LocalValues.db_nm, sourceTable=LocalValues.tbl_nm) col_meta = mysql_meta.read_column_metadata(sourceDB=LocalValues.db_nm, sourceTable=LocalValues.tbl_nm) if "P" in opts.runSwitch: LocalValues.date_to_process = opts.fromDate mysql_meta = mySQLMetadata(LocalValues.mysql_prop_file) tbl_meta = mysql_meta.read_table_metadata(sourceDB=LocalValues.db_nm, sourceTable=LocalValues.tbl_nm) src_folder_path = normalize_path(tbl_meta['src_tbl']).lower() tgt_folder_path = normalize_path(tbl_meta['extract_landing_dir']) processed_folder_list = get_folder_list(src_folder_path) move_raw_folder(processed_folder_list, tgt_folder_path) tgt_folder_list = get_tgt_folder_info(tgt_folder_path, processed_folder_list) tgt_final_folder_list = move_csv_files_sub_folder(tgt_folder_list) opts.runSwitch = "Q" gen_ext_hql(tgt_final_folder_list, base_dirm mysql_meta, tbl_meta, dir_sepm LocalValues, opts) retention(tgt_final_folder_list, tgt_folder_path) exit(-1) temp_path = '' if "F" not in opts.runSwitch: if opts.extract_path.strip().lower() == 'meta' and len(tbl_meta['source_sql_txt'].strip()) < 5: utilities.abort_with_msg("Custom Source Path missing in meta. Its required if -x switch is used') elif opts.extract_path.strip().lower() != 'meta': if base_fid_path in opts.extract_path.strip(): temp_path = opts.extract_path.strip() else: temp_path = base_fid_path + opts.extract_path.strip() if not os.path.exists(temp_path): utilities.print_warn("Custom Extract path passed from argument does not exists. will check in meta path") if not os.path.exists(tbl_meta['source_sql_txt'].strip()): utilities.abort_with_msg("Custom Extract file doesnt exist either in argument or meta") else: temp_path = tbl_meta['source_sql_txt'].strip() tbl_meta['source_sql_txt'] = temp_path.strip() elif opts.extract_path.strip().lower() == 'meta' and len(tbl_meta['source_sql_txt'].strip()) > 5: if base_fid_path in tbl_meta['source_sql_txt'].strip() temp_path = tbl_meta['source_sql_txt'].strip() else: temp_path = base_fid_path + tbl_meta['source_sql_txt'].strip() if not os.path.exists(temp_path): utilities.abort_with_msg("Customer Extract path from meta does not exists") tbl_meta['source_sql_txt'] = temp_path.strip() if temp_path != base_fid_path and temp_path != '': LocalValues.custom_extract = 'Y' utilities.print_info("Custom Extract is in use") if opts.fromDate is None: if opts.endDate is not None: utilities.abort_with_msg("End Date not accepted without from date") end_date = 'datetime.today().strftime("%Y%m%d') job_cal_check = jobCalendarCheck(end_date, LocalValues.db_nm, LocalValues.tbl_nm, LocalValues.mysql_prop_file) runSwitch, eff_end_date = job_cal_check.job_calendar_check() last_good_date = mysql_meta.read_opts_jobstat_max(feed=LocalValues.tbl_nm) if last_good_date is None: utilities.print_warn("Last run date has returned to None. So will be using the date from Calendar : " + eff_end_date) last_good_date = eff_end_date new_from_date = (datetime.strptime(last_good_date,'%Y%m%d')).strftime('%Y%m%d') else: new_from_date = (datetime.strptime(last_good_date, '%Y%m%d') + timedelta(days=1)).strftime('%Y%m%d') LocalValues.date_to_process = new_from_date LocalValues.date_till_process = eff_end_date if eff_end_date == new_from_date: if run_switch == 'N': utilities.print_warn("Job will not run for effective date: " + str(new_from_date)) audit_action = set_audit_action() audit_log(audit_action, "JOB NOT SCHEDULED TO RUN FOR " + eff_end_date) exit(-1) if new_from_date > eff_end_date: utilities.print_warn("Job will not run when from_date " + new_from_date + " is greater than end date " + eff_end_date) audit_action = set_audit_action() audit_log(audit_action, "JOB ABORTED BECASE FROM DATE " + new_from_date + ' GREATER THAN END DATE ' + eff_end_date) exit(-1) opts.fromDate = new_from_date opts.endDate = eff_end_date utilities.print_info("Run Switch: " + run_switch + " Effective From Date: " + str(opts.fromDate)) utilities.print_info("Run Switch: " + run_switch + " Effective End Date: " + str(opts.endDate)) LocalValues.date_to_process = opts.fromDate LocalValues.date_till_process = opts.endDate if opts.statOverride is None: ops_jobstat = mysql_meta.read_opts_jobstat(feed=LocalValues.tbl_nm, from_dt=LocalValues.date_to_process, end_dt=LocalValues.date_till_process )