def main(): env, env_ver, app, user_id, pwd, key_store, jdbc_ref = arg_handle() envvars.populate(env, env_ver, "bdh", "data_ingest") #keystore_root = get_keystore_root(key_store,app, \ # envvars.list['hdfs_common_keystore_root'], \ # envvars.list['hdfs_service_keystore_root']) keystore_root = envvars.list['hdfs_' + key_store + '_keystore_root'] print "keystore root = ", keystore_root alias_name = get_alias_name(key_store, jdbc_ref, user_id) key_provider = "jceks://hdfs/" + keystore_root + "/" + user_id.lower( ) + ".jceks" #command to delete alias hadoop_cmd = "hadoop credential delete " + alias_name + " -f -provider " + key_provider print "delete mannually (if needed) alias using command ==> " print " " + hadoop_cmd rc, status = commands.getstatusoutput(hadoop_cmd) print(status) #command to create alias hadoop_cmd = "hadoop credential create " + alias_name + " -provider " + key_provider print "Generating alias using command ==> " print " " + hadoop_cmd + " -v " + pwd.replace("\$", "\\\$") hadoop_cmd = hadoop_cmd + " -v " + pwd.replace("$", "\$") rc, status = commands.getstatusoutput(hadoop_cmd) print(status)
def main(): options, args = arg_handle() envvars.populate(options.env, options.env_ver, options.app, options.sub_app) config_file_path = envvars.list[ 'lfs_app_config'] + "/ingest/" + options.config_file if not os.path.isfile(config_file_path): print "run_gen.py -> ERROR: config file " + config_file_path + " does not exists ***" sys.exit(1) print "**************************************************************************************************" args = " ".join([ envvars.list['lfs_global_scripts'] + "/getmetadata.py ", "-e " + options.env.strip(), "-a " + options.app, "-u " + options.sub_app, " -v " + options.env_ver, " -k " + options.key_store.strip(), " -s " + options.config_file ]) getmetadata_script = "python " + args if (options.step == "all") or (options.step == "1"): print( "run_gen.py -> STEP-1 : ************************************************************************" ) print("run_gen.py -> Invoked : " + getmetadata_script) call = subprocess.Popen(getmetadata_script.split(' '), stdout=subprocess.PIPE, stderr=subprocess.STDOUT) while True: line = call.stdout.readline() if not line: break print line.strip() sys.stdout.flush() call.communicate() rc = call.returncode if rc != 0: print "run_gen.py -> getting metadata using " + args + " is not successful." sys.exit(1) else: print "run_gen.py -> getting metadata command was successful." if (options.step == '1'): sys.exit(0) print "**************************************************************************************************" args = " ".join([ envvars.list['lfs_global_scripts'] + "/generate.py -s", options.config_file, "-m", envvars.list['lfs_app_config'] + "/ingest/" + options.config_file + ".meta", "-w", envvars.list['lfs_app_workflows'] + "/wf_db_ingest", "-k", options.key_store, "-e " + options.env.strip(), "-a " + options.app, "-u " + options.sub_app, "-v " + options.env_ver ]) generate_script = "python " + args if (options.step == "all") or (options.step == "2"): print( "run_gen.py -> STEP-2 : ************************************************************************" ) print("run_gen.py -> Invoked : " + generate_script) call = subprocess.Popen(generate_script.split(' '), stdout=subprocess.PIPE, stderr=subprocess.STDOUT) while True: line = call.stdout.readline() if not line: break print line.strip() sys.stdout.flush() call.communicate() rc = call.returncode if rc != 0: print "run_gen.py -> Generating create scripts and properties file is not successful." sys.exit(1) else: print "run_gen.py -> generating create scripts and properties files was successful." if (options.step == '2'): sys.exit(0) print "**************************************************************************************************" if (options.step == "all") or (options.step == "3"): print( "run_gen.py -> STEP-3 : ************************************************************************" ) args = " ".join([ envvars.list['lfs_global_scripts'] + "/run_hive_create.py", "--app " + options.app, "--subapp " + options.sub_app, "--env " + options.env, "--op0 " + envvars.list['lfs_app_config'] + "/ingest/" + options.config_file + ".list", "--op1 " + envvars.list['lfs_global_config'] + "/oozie_global.properties", "--env_ver " + options.env_ver, "--op2 " + envvars.list['lfs_app_workflows'] + "/wf_db_ingest", "--op3 " + envvars.list['lfs_app_src'] + "/hive" ]) hivecreate_script = "python " + args print("run_gen.py -> Invoked : " + hivecreate_script) call = subprocess.Popen(hivecreate_script.split(' '), stdout=subprocess.PIPE, stderr=subprocess.STDOUT) while True: line = call.stdout.readline() if not line: break print line.strip() sys.stdout.flush() call.communicate() rc = call.returncode #os.system(hivecreate_script) if rc != 0: print "run_gen.py -> Creating hive tables is not successful." print rc sys.exit(1) else: print "run_gen.py -> Completed executing create table scripts." print "run_gen.py -> Completed executing create table scripts." print "**************************************************************************************************"
def main(): global return_code, group, start_line sys.stdout.flush() common_properties, app, sub_app, env, env_ver, workflow_name, custom_date, file_name = arg_handle( ) envvars.populate(env, env_ver, app, sub_app) log_time = datetime.datetime.fromtimestamp( time.time()).strftime('%Y-%m-%d_%H-%M-%S') log_file = envvars.list[ 'lfs_app_logs'] + "/run_job-" + workflow_name + '_' + log_time + '.log' print("LogFile: " + log_file) print("To Kill: kill " + str(os.getpid())) sys.stdout = open(log_file, 'a', 0) start_line = "".join('*' for i in range(100)) print(start_line) print( "run_criss_indx_generatr_wf.py -> Started : " + datetime.datetime.fromtimestamp( time.time()).strftime('%Y-%m-%d %H:%M:%S')) print envvars.list['lfs_app_wrk'] final_properties = envvars.list[ 'lfs_app_wrk'] + '/' + env + '_' + workflow_name + '.properties' removefile(final_properties) properties_file = open(final_properties, 'wb') # Concatenate global properties file and table properties file shutil.copyfileobj(open(common_properties, 'rb'), properties_file) appSpecificProperties = envvars.list[ 'lfs_app_workflows'] + '/' + workflow_name + '/job.properties' # appSpecificFile = Path(appSpecificProperties) #db = "hv_db_" + sub_app #dbName = envvars.list[db] todayDate = "" if custom_date == None: todayDate = date.today() else: todayDate = datetime.datetime.strptime(custom_date, '%Y-%m-%d') yesterdayDate = todayDate - timedelta(1) curDate_yyyy = todayDate.strftime('%Y') curDate_mm = todayDate.strftime('%m') curDate_yyyy_mm_dd = todayDate.strftime('%Y-%m-%d') yesterday_yyyy_mm_dd = yesterdayDate.strftime('%Y-%m-%d') yesterday_yyyymmdd = yesterdayDate.strftime('%Y%m%d') user = getpass.getuser() hadoop_user = user.upper() cmd = "rm " + hadoop_user + ".keytab" rcc, status_txt = commands.getstatusoutput(cmd) print "removing old keytabs if any...status=", status_txt cmd = "hdfs dfs -get /user/" + hadoop_user.lower( ) + "/" + hadoop_user + ".keytab" rcc, status_txt = commands.getstatusoutput(cmd) print "Getting keytab and status = ", status_txt rc, rec_cnt = lastPreviousWorkingDay(curDate_yyyy_mm_dd, env, hadoop_user) lastPreviousBusinessday_yyyy_mm_dd = rec_cnt.split('\t')[0] print "lastPreviousBusinessday_yyyy_mm_dd" + lastPreviousBusinessday_yyyy_mm_dd curDate_yyyy_mm_dd = lastPreviousBusinessday_yyyy_mm_dd emailList = "" rawDb = "" dbName = "" sourceFileName = "" targetTable = "" lookUpTable = "" mappingTable = "" if os.path.exists(appSpecificProperties): # load app specific job.properties envvars.load_file(appSpecificProperties) try: with open(appSpecificProperties) as fin: for line in fin: args = line.split('=') if args[0].strip() == "sourceFileName": sourceFileName = args[1].strip() if args[0].strip() == "mappingTable": mappingTable = args[1].strip() if args[0].strip() == "targetTable": targetTable = args[1].strip() if args[0].strip() == "lookupTable": lookUpTable = args[1].strip() if args[0].strip() == "email_list": emailList = args[1].strip() except IOError as e: if e.errno != errno.ENOENT: raise IOError("exception file reading error") else: print("No joblist file found") shutil.copyfileobj(open(appSpecificProperties, 'rb'), properties_file) jobProperties = "" if emailList == "": #emailList = envvars.list['email_list'] emailList = ['*****@*****.**'] dbName = envvars.list['hv_db_efgifi'] rawDb = envvars.list['hv_db_efgifi_stage'] home = '/data/' path = os.path.dirname(os.path.realpath(__file__)) print "path" + path root = path.split('efgifi/')[0] print root targethdfsFilePath = "" print "file_name :" + str(file_name) if file_name == None: print "File Name not specified" else: sourceFilePath = root + "/landingzone/efgifi/" + str(file_name) targetLocalFilePath = root + "/efgifi/wrk/" + str(file_name) targethdfsFilePath = '/bdp' + env + '/bdh/' + env_ver + '/str/raw/' + rawDb + "/" + "criss_ifind_delim_stg/" with open(sourceFilePath, "rb") as ebcdic: ascii_txt = codecs.decode(ebcdic.read(), "cp500") print "targetLocalFilePath :" + str(targetLocalFilePath) with io.open(targetLocalFilePath, mode='w', encoding='utf-8') as target: target.write( ascii_txt.replace("DTL:", "\nDTL:").replace("HDR:", "\nHDR:").replace( "!", "|").replace(" ", "").replace(" \n", "\n")) print "Before running hdfs put command ...." hdfs_put_cmd = "hdfs dfs -put -f " + targetLocalFilePath + " " + targethdfsFilePath print "--- running hdfs_put command --> " + hdfs_put_cmd rc, status = commands.getstatusoutput(hdfs_put_cmd) if (rc > 0): print status else: print "source criss index file copied to hdfs " basePath = '/bdp' + env + '/bdh/' + env_ver + '/str/pub/' jobProperties = '\n'.join([ 'app=' + app, 'basePath=' + basePath, 'sourceFilePath=' + targethdfsFilePath + "/" + str(file_name), 'mappingFilePath=' + basePath + dbName + "/" + mappingTable, 'targetFilePath=' + basePath + dbName + "/" + targetTable + "/load_date=" + yesterday_yyyy_mm_dd, 'lookupFilePath=' + basePath + dbName + "/" + lookUpTable + "/load_date=" + yesterday_yyyy_mm_dd, 'scriptLocation=/data/bdp' + env + '/bdh/' + env_ver + '/' + sub_app + '/code/scripts/', 'curDate_yyyy_mm_dd=' + curDate_yyyy_mm_dd, 'yesterday_yyyy_mm_dd=' + yesterday_yyyy_mm_dd, 'yesterday_yyyymmdd=' + yesterday_yyyymmdd, 'lastPreviousDayBusinessday_yyyy_mm_dd=' + lastPreviousBusinessday_yyyy_mm_dd, 'happ=' + app, 'oozieLib=' + envvars.list['oozie.libpath'] ]) properties_file.write(jobProperties) user = getpass.getuser() hadoop_user = user.upper() cmd = "rm " + hadoop_user + ".keytab" rcc, status_txt = commands.getstatusoutput(cmd) print "removing old keytabs if any...status=", status_txt cmd = "hdfs dfs -get /user/" + hadoop_user.lower( ) + "/" + hadoop_user + ".keytab" rcc, status_txt = commands.getstatusoutput(cmd) print "Getting keytab and status = ", status_txt properties_file.close() print("run_efgifi_workflow.py -> FinalPrpty : " + final_properties) workflow = envvars.list['hdfs_app_workflows'] + '/' + workflow_name print workflow oozie_wf_cmd = "oozie job -oozie " + envvars.list['oozieNode'] + " -config " oozie_wf_cmd = oozie_wf_cmd + final_properties oozie_wf_cmd = oozie_wf_cmd + ' -Doozie.wf.application.path=' oozie_wf_cmd = oozie_wf_cmd + workflow oozie_wf_cmd = oozie_wf_cmd + ' -debug -run' print("run_efgifi_workflow.py -> Invoked : " + oozie_wf_cmd) rc, jobid_str = commands.getstatusoutput(oozie_wf_cmd) if rc == 0: jobid_str = jobid_str.split('job: ') jobid = jobid_str[1].strip() else: print("run_efgifi_workflow.py -> Failed : " + jobid_str) return_code = 8 sys.exit(return_code) print( jobid + "-> Started : " + datetime.datetime.fromtimestamp( time.time()).strftime('%Y-%m-%d %H:%M:%S')) getStatus(jobid, envvars.list['oozieNode'], curDate_yyyy_mm_dd, hadoop_user, dbName, targetTable, emailList)
print("getmetadata.py -> MetadataLocation : " + outputFilePath) if __name__ == "__main__": parser = optparse.OptionParser() options = validateOptions() env = options.env env_ver = options.env_ver env_root = checkenv(options.env) common_root_path= env_root + "/bdh/01/global" sys.path.append( os.path.expanduser(common_root_path + "/code/scripts/") ) import envvars envvars.populate(env,env_ver,options.app,options.sub_app) sqoopParamFilePath = envvars.list['lfs_app_config']+"/ingest/"+options.sqoopparams tableName = options.tableName print "getmetadata.py -> sqoopParamFilePath = " + sqoopParamFilePath config = ConfigParser.ConfigParser() config.readfp(open(sqoopParamFilePath)) appName = config.get("DataSourceInfo","app") app = appName sub_app = config.get("DataSourceInfo","sub_app") if appName.find("/") != -1: app = appName.split("/")[0] sub_app = appName.split("/")[1]
def main(): global return_code return_code = 0 start_line = "".join('*' for i in range(100)) print(start_line) print( "run_solr_index.py -> Started : " + datetime.datetime.fromtimestamp( time.time()).strftime('%Y-%m-%d %H:%M:%S')) options = arg_handle() env = options.env.strip() env_ver = options.env_ver.strip() app = options.app.strip() sub_app = options.sub_app.strip() group = options.group.strip() # Get envvars from oozie_common_properties file envvars.populate(env, env_ver, app, sub_app) collection_name = options.collection_name.strip() actions = options.actions.strip() try: script_name = options.script_name.strip() except AttributeError as e: script_name = "" curr_date = datetime.datetime.fromtimestamp( time.time()).strftime("%Y-%m-%d") prev_load_date = (datetime.datetime.fromtimestamp(time.time()) + datetime.timedelta(days=-1)).strftime("%Y-%m-%d") try: lower_bound = options.min_bound.strip() except AttributeError as e: lower_bound = prev_load_date try: upper_bound = options.max_bound.strip() except AttributeError as e: upper_bound = prev_load_date if collection_name is None or collection_name == "": print( "run_solr_index.py -> Error : Collection name is required " ) sys.exit(8) if actions is None or actions == "": print( "run_solr_index.py -> Error : ACtions are required ex:insert,delete,create " ) sys.exit(8) collection_folder = envvars.list['lfs_solr'] + '/' + collection_name if not os.path.isdir(collection_folder): print("run_solr_index.py -> Error : Collection folder " + collection_folder + " does not exists..") sys.exit(8) if not (os.path.exists(collection_folder + "/conf") and os.path.exists(collection_folder + "/conf/schema.xml")): print( "run_solr_index.py -> Error : Configuration folder is corrupt.." ) sys.exit(8) title_trans = ''.join( chr(c) if (chr(c).isupper() or chr(c).islower() or str(c).isdigit() ) and chr(c) != '-' else '_' for c in range(256)) solr_table_name = 'solr_' + collection_name + "_" + lower_bound solr_table_name = solr_table_name.translate(title_trans) if "delete" in actions.lower(): solr_cmd = " ".join([ "solrctl --solr", envvars.list['solr_server'], "--zk", envvars.list['zookeeper_ensemble'], "collection --delete ", collection_name ]) rc, out = commands.getstatusoutput(solr_cmd) if rc != 0: print("run_solr_index.py -> Failed : " + solr_cmd + ";RC : " + str(rc)) print out #sys.exit(10) print("run_solr_index.py -> Command : " + solr_cmd + ";RC : " + str(rc)) solr_cmd = " ".join([ "solrctl --solr", envvars.list['solr_server'], "--zk", envvars.list['zookeeper_ensemble'], "instancedir --delete ", collection_name ]) rc, out = commands.getstatusoutput(solr_cmd) if rc != 0: print out print("run_solr_index.py -> Failed : " + solr_cmd + ";RC : " + str(rc)) #sys.exit(10) print("run_solr_index.py -> Command : " + solr_cmd + ";RC : " + str(rc)) if "create" in actions.lower(): solr_cmd = " ".join([ "solrctl --solr", envvars.list['solr_server'], "--zk", envvars.list['zookeeper_ensemble'], "instancedir --create ", collection_name, collection_folder ]) rc, out = commands.getstatusoutput(solr_cmd) if rc != 0: print out print("run_solr_index.py -> Failed : " + solr_cmd + ";RC : " + str(rc)) sys.exit(10) print("run_solr_index.py -> Command : " + solr_cmd + ";RC : " + str(rc)) solr_cmd = " ".join([ "solrctl --solr", envvars.list['solr_server'], "--zk", envvars.list['zookeeper_ensemble'], "collection --create ", collection_name, "-s 1 -r 1 -m 1" ]) rc, out = commands.getstatusoutput(solr_cmd) if rc != 0: print out print("run_solr_index.py -> Failed : " + solr_cmd + ";RC : " + str(rc)) sys.exit(10) print("run_solr_index.py -> Command : " + solr_cmd + ";RC : " + str(rc)) final_properties = envvars.list[ 'lfs_app_wrk'] + '/' + env + '_' + app.replace( "/", "_") + '_' + solr_table_name + '.properties' # Remove if the file exists silentremove(final_properties) if "query" in actions.lower(): # open the final properties file in write append mode properties_file = open(final_properties, 'wb') shutil.copyfileobj( open('/cloudera_nfs1/config/oozie_global.properties', 'rb'), properties_file) dynamic_properties = "" if script_name == "": print( "run_solr_index.py -> Error : Script name required for insert option" ) sys.exit(8) else: script_name = options.script_name.strip() script_file = collection_folder + "/" + script_name hdfs_cmd = "hdfs dfs -put -f " + script_file + " " + envvars.list[ 'hdfs_app_workflows'] + '/wf_hive_query/' rc, out = commands.getstatusoutput(hdfs_cmd) if rc != 0: print("run_solr_index.py -> Failed : " + hdfs_cmd + ";RC : " + str(rc)) print out #sys.exit(10) dynamic_properties = '\n'.join([ '\nenv=' + env, 'app=' + app, 'sub_app=' + sub_app, 'group=' + group, 'happ=' + envvars.list['happ'], 'hv_db=' + envvars.list['hv_db_' + app + '_' + sub_app + '_stage'], 'hv_db_stage=' + envvars.list['hv_db_' + app + '_' + sub_app + '_stage'], 'hv_table=' + solr_table_name, 'table=' + solr_table_name, 'stage_table=' + solr_table_name, 'hdfs_location=' + envvars.list['hdfs_str_raw'] + "/solr/" + solr_table_name, 'hdfs_tmp_dir=' + envvars.list['hdfs_str_raw'] + "/tmp_" + solr_table_name, 'prev_load_date=' + prev_load_date, 'hive_query=' + script_name, 'curr_date=' + curr_date, 'min_bound=' + lower_bound, 'max_bound=' + upper_bound ]) properties_file.write(dynamic_properties) properties_file.close() print("run_solr_index.py -> DynmcPrpty : " + dynamic_properties.replace("\n", ", ")) print("run_solr_index.py -> FinalPrpty : " + final_properties) sys.stdout.flush() abc_parameter = env + ',' + env_ver + ',' + app + ',' + sub_app + ',' + group + ",run_solr_index.py" rc = runoozieworkflow(final_properties, abc_parameter) if rc > 0: sys.exit(rc) if "insert" in actions.lower(): morphline_file_name = collection_name + ".conf" morphline_file_path = collection_folder + "/" + collection_name + ".conf" if not (os.path.exists(morphline_file_path)): print( "run_solr_index.py -> Error : Morphline conf file doesnot exist.." + morphline_file_path) sys.exit(8) solr_mr_cmd = "|".join([ "hadoop|jar", envvars.list['solr_home'] + "/contrib/mr/search-mr-1.0.0-cdh5.5.4-job.jar|org.apache.solr.hadoop.MapReduceIndexerTool", "-D|'mapred.child.java.opts=-Xmx4G'", "-D|'mapreduce.reduce.memory.mb=8192'", "--morphline-file", morphline_file_path, "--output-dir", envvars.list['hdfs_str_raw'] + "/tmp_" + solr_table_name, "--verbose|--go-live|--zk-host", envvars.list['zookeeper_ensemble'], "--collection", collection_name, envvars.list['hdfs_str_raw'] + "/solr/" + solr_table_name ]) env_var_cmd = ";".join([ "export myDriverJarDir=/opt/cloudera/parcels/CDH/lib/solr/contrib/crunch", "export myDependencyJarDir=/opt/cloudera/parcels/CDH/lib/search/lib/search-crunch", "export myDriverJar=$(find $myDriverJarDir -maxdepth 1 -name 'search-crunch-*.jar' ! -name '*-job.jar' ! -name '*-sources.jar')", "export myDependencyJarFiles=$(find $myDependencyJarDir -name '*.jar' | sort | tr '\n' ',' | head -c -1)", "export myDependencyJarFiles=$myDependencyJarFiles,$(find /opt/cloudera/parcels/CDH/jars -name 'snappy-java-*.jar')", "export myDependencyJarPaths=$(find $myDependencyJarDir -name '*.jar' | sort | tr '\n' ':' | head -c -1)", 'export myJVMOptions="-DmaxConnectionsPerHost=10000 -DmaxConnections=10000 -Dspark.yarn.maxAppAttempts=1"', "export SPARK_HOME=/opt/cloudera/parcels/CDH/lib/spark", "export SPARK_SUBMIT_CLASSPATH=/opt/cloudera/parcels/CDH/lib/search/lib/search-crunch/commons-codec-*.jar:$SPARK_HOME/assembly/lib/*:/opt/cloudera/parcels/CDH/lib/search/lib/search-crunch/*" ]) #rc, export = commands.getstatusoutput(env_var_cmd) #if rc != 0: # print("run_solr_index.py -> Failed : "+env_var_cmd+";RC : "+str(rc)) # print out # sys.exit(10) rc, dependency_jars = commands.getstatusoutput( env_var_cmd + ';echo "$myDependencyJarFiles"') if rc != 0: print("run_solr_index.py -> Failed : " + 'echo "$myDependencyJarFiles"' + ";RC : " + str(rc)) print dependency_jars sys.exit(10) print("run_solr_index.py -> Dependency Jars : " + dependency_jars) rc, driver_jar = commands.getstatusoutput(env_var_cmd + ';echo "$myDriverJar"') if rc != 0: print("run_solr_index.py -> Failed : " + 'echo "$myDriverJar"' + ";RC : " + str(rc)) print driver_jar sys.exit(10) print("run_solr_index.py -> driver_jar : " + driver_jar) rc, jvm_options = commands.getstatusoutput(env_var_cmd + ';echo "$myJVMOptions"') if rc != 0: print("run_solr_index.py -> Failed : " + 'echo "$myJVMOptions"' + ";RC : " + str(rc)) print jvm_options sys.exit(10) print("run_solr_index.py -> jvm_options : " + jvm_options) rc, user_name = commands.getstatusoutput("echo $USER") token_file_name = env + '_' + app.replace( "/", "_") + '_solr_' + user_name.lower( ) + '_' + options.group.lower() + '_' + collection_name + '.token' token_file_path = envvars.list['lfs_app_wrk'] + '/' + token_file_name tokenCmd = " curl --negotiate -u: '" + envvars.list[ 'solr_server'] + "/?op=GETDELEGATIONTOKEN' > " + token_file_path rc, token_txt = commands.getstatusoutput(tokenCmd) if rc != 0: print("run_solr_index.py -> Failed : " + tokenCmd + ";RC : " + str(rc)) print token_txt sys.exit(10) log4jCmd = "echo $(ls /opt/cloudera/parcels/CDH/share/doc/search-*/search-crunch/log4j.properties)" rc, log4j_path = commands.getstatusoutput(log4jCmd) if rc != 0: print("run_solr_index.py -> Failed : " + log4jCmd + ";RC : " + str(rc)) print token_txt sys.exit(10) solr_spark_cmd = "|".join([ "spark-submit", "--master", "yarn", "--deploy-mode", "cluster", "--jars", dependency_jars, '--executor-memory', '6G', '--num-executors', '1', '--conf', '"spark.executor.extraJavaOptions=' + jvm_options + '"', '--conf', '"spark.driver.extraJavaOptions=' + jvm_options + '"', '--class', 'org.apache.solr.crunch.CrunchIndexerTool', "--files", token_file_path + "," + morphline_file_path + "," + log4j_path, driver_jar, "-Dhadoop.tmp.dir=/tmp", "-Dspark.yarn.maxAppAttempts=1", "-DmorphlineVariable.ZK_HOST=" + envvars.list['zookeeper_ensemble'], "-DtokenFile=" + token_file_name, "--morphline-file", morphline_file_name, "--log4j|log4j.properties", "--pipeline-type|spark|--chatty", envvars.list['hdfs_str_raw'] + "/solr/" + solr_table_name ]) solr_cmd = solr_spark_cmd call = subprocess.Popen(solr_cmd.split('|'), stdout=subprocess.PIPE, stderr=subprocess.STDOUT) prev_line = "" line_count = 0 while True: line = call.stdout.readline() if not line: break if prev_line[20:] != line.strip()[20:] or line_count > 1000: print line.strip() line_count = 0 prev_line = line.strip() else: print prev_line line_count = line_count + 1 sys.stdout.flush() call.communicate() if call.returncode != 0: print "run_solr_index.py -> Failed : " + solr_cmd + ";RC : " + str( call.returncode) sys.exit(10) print("run_solr_index.py -> Command : " + solr_cmd + ";RC : " + str(call.returncode)) sys.stdout.flush() print( "fi_getfreddiemac.py -> Ended : " + datetime.datetime.fromtimestamp( time.time()).strftime('%Y-%m-%d %H:%M:%S')) print start_line print "Return-Code:" + str(return_code) sys.exit(return_code)
def main(): global return_code return_code = 0 start_line = "".join('*' for i in range(100)) print(start_line) print("put_file.py -> Started : " + datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')) table,field,field_type,field_rdbms_format,field_hadoop_format,file_reg,upper_bound,common_properties,app,sub_app,env,env_ver,group,ingest_type = arg_handle() print "field_type=" , field_type # Get envvars from oozie_common_properties file envvars.populate(env,env_ver,app,sub_app) #Get Final Properties final name and path from variables final_properties = envvars.list['lfs_app_wrk'] + '/' + env + '_' + app.replace("/","_") + '_' + table + '.properties' # Remove if the file exists silentremove(final_properties) # open the final properties file in write append mode properties_file = open(final_properties, 'wb') if 'uzip' in ingest_type: unzip_cmd = 'unzip "'+envvars.list['lfs_app_data'] + '/'+file_reg+'" -d "'+envvars.list['lfs_app_data'] + '/'+file_reg.rsplit('/',1)[0]+'/"' print("put_file.py -> Command : "+unzip_cmd) sys.stdout.flush() rc, out = commands.getstatusoutput(unzip_cmd) if rc == 0: if rc == 0: print("put_file.py -> GZIP command successful ") rm_cmd = 'rm '+envvars.list['lfs_app_data'] + '/'+file_reg rc, out = commands.getstatusoutput(rm_cmd) if rc!=0: print("put_file.py -> File remove command failed" + str(out)) sys.exit(1) else: print("put_file.py -> Removed Files :"+envvars.list['lfs_app_data'] + '/'+file_reg) else: print("put_file.py -> GZIP command failed" + str(out)) sys.exit(1) elif rc == 2304: print("put_file.py -> No Files to be processed RC:" + str(rc)) print(str(out)) else: print("put_file.py -> "+unzip_cmd+" command failed RC:" + str(rc)) print(str(out)) sys.exit(1) if 'rmfl' in ingest_type: rmfl_cmd = 'rm '+envvars.list['lfs_app_data'] + '/'+file_reg print("put_file.py -> Command : "+rmfl_cmd) sys.stdout.flush() rc, out = commands.getstatusoutput(rmfl_cmd) if rc == 0: if rc == 0: print("put_file.py -> GZIP command successful ") else: print("put_file.py -> GZIP command failed" + str(out)) sys.exit(1) elif rc == 256: print("put_file.py -> No Files to be processed RC:" + str(rc)) print(str(out)) else: print("put_file.py -> "+rmfl_cmd+" command failed RC:" + str(rc)) print(str(out)) sys.exit(1) sys.stdout.flush() if 'gzip' in ingest_type: gz_cmd = "gzip "+envvars.list['lfs_app_data'] + '/'+file_reg rc, out = commands.getstatusoutput(gz_cmd) if rc == 0: if rc == 0: print("put_file.py -> GZIP command successful ") #Removing txt is not necessary as GZIPping will remove them #rm_cmd = 'rm '+envvars.list['lfs_app_data'] + '/'+file_reg #rc, out = commands.getstatusoutput(rm_cmd) #if rc!=0: # print("put_file.py -> File remove command failed" + str(out)) # sys.exit(1) #else: # print("put_file.py -> Removed Files :"+envvars.list['lfs_app_data'] + '/'+file_reg) else: print("put_file.py -> GZIP command failed" + str(out)) sys.exit(1) elif rc == 256: print("put_file.py -> No Files to be processed RC:" + str(rc)) print(str(out)) else: print("put_file.py -> "+gz_cmd+" command failed RC:" + str(rc)) print(str(out)) sys.exit(1) if 'bzip' in ingest_type: bz_cmd = "bzip2 "+envvars.list['lfs_app_data'] + '/'+file_reg rc, out = commands.getstatusoutput(bz_cmd) if rc == 0: if rc == 0: print("put_file.py -> GZIP command successful ") #Removing txt is not necessary as GZIPping will remove them #rm_cmd = 'rm '+envvars.list['lfs_app_data'] + '/'+file_reg #rc, out = commands.getstatusoutput(rm_cmd) #if rc!=0: # print("put_file.py -> File remove command failed" + str(out)) # sys.exit(1) #else: # print("put_file.py -> Removed Files :"+envvars.list['lfs_app_data'] + '/'+file_reg) else: print("put_file.py -> GZIP command failed" + str(out)) sys.exit(1) elif rc == 256: print("put_file.py -> No Files to be processed RC:" + str(rc)) print(str(out)) else: print("put_file.py -> "+bz_cmd+" command failed RC:" + str(rc)) print(str(out)) sys.exit(1) sys.stdout.flush() if 'rplc' in ingest_type: hdfs_loc = envvars.list['hdfs_str_raw'] + '/'+envvars.list['hv_db_'+app+'_'+sub_app+'_stage']+'/'+table+'/' put_cmd = 'hdfs dfs -put -f '+envvars.list['lfs_app_data'] + '/'+file_reg+' '+ hdfs_loc rc, out = commands.getstatusoutput(put_cmd) if rc == 0: print("put_file.py -> Command Sucessful: "+put_cmd) rm_cmd = 'rm '+envvars.list['lfs_app_data'] + '/'+file_reg rc, out = commands.getstatusoutput(rm_cmd) if rc!=0: print("put_file.py -> File remove command failed" + str(out)) sys.exit(1) else: print("put_file.py -> Removed File :"+envvars.list['lfs_app_data'] + '/'+file_reg) elif rc == 256: print("put_file.py -> No Files to be processed RC:" + str(rc)) print(str(out)) else: print("put_file.py -> HDFS PUT command failed RC:" + str(rc)) print(str(out)) sys.exit(1) sys.stdout.flush() if 'extp' in ingest_type: try: quarter_offset = file_reg.replace('{mm}','mm').replace('{YYYY}','YYYY').index('{q}') except ValueError: quarter_offset = -1 try: year_offset = file_reg.replace('{mm}','mm').replace('{q}','q').index('{YYYY}') except ValueError: year_offset = -1 try: month_offset = file_reg.replace('{YYYY}','YYYY').replace('{q}','q').index('{mm}') except ValueError: month_offset = -1 file_reg = file_reg.replace('.','\.') file_reg = file_reg.replace('{q}','[1-4]{1}') file_reg = file_reg.replace('{YYYY}','[0-9]{4}') file_reg = file_reg.replace('{mm}','[0-9]{2}') prog = re.compile(file_reg) lfs_data = envvars.list['lfs_app_data'] + '/'+table+'/' hdfs_data = envvars.list['hdfs_str_raw'] + '/'+table+'/' year = "" month = "" quarter = "" for f in listdir(lfs_data): if isfile(join(lfs_data, f)): if prog.match(f): hdfs_path = hdfs_data if year_offset != -1: year = f[year_offset:4] hdfs_path = hdfs_path + "year="+str(f)[year_offset:year_offset+4]+"/" if month_offset != -1: month = f[month_offset:2] hdfs_path = hdfs_path +"month="+str(f)[month_offset:month_offset+2]+"/" if quarter_offset != -1: quarter = f[quarter_offset:1] hdfs_path = hdfs_path + "quarter="+str(f)[quarter_offset:quarter_offset+1]+"/" put_command = "hdfs dfs -put -f "+ join(lfs_data, f) + " " + hdfs_path print(put_command) rc, out = commands.getstatusoutput(put_command) if rc == 0: rm_command = "rm " + join(lfs_data, f) #rc, out = commands.getstatusoutput(rm_command) else: print("hdfs command failed" + str(out)) sys.exit(1) sys.stdout.flush() print("put_file.py -> Ended : " + datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')) print "put_file.py -> Return-Code:" + str(return_code) print start_line sys.exit(return_code)
def main(): global return_code, group, start_line return_code = 0 start_line = "".join('*' for i in range(100)) print(start_line) print( "run_oozie_workflow.py -> Started : " + datetime.datetime.fromtimestamp( time.time()).strftime('%Y-%m-%d %H:%M:%S')) sys.stdout.flush() abc_parameter, workflow, options_file = arg_handle() abc_parameters = abc_parameter.split(',') env = abc_parameters[0] env_ver = abc_parameters[1] app = abc_parameters[2] sub_app = abc_parameters[3] group = abc_parameters[4] parent_script = abc_parameters[5] #config path for ABC logging # Get envvars from oozie_common_properties file envvars.populate(env, env_ver, app, sub_app) print("ABC Parameter" + abc_parameter) oozie_wf_cmd = "oozie job -oozie " + envvars.list['oozieNode'] + " -config " oozie_wf_cmd = oozie_wf_cmd + options_file oozie_wf_cmd = oozie_wf_cmd + ' -Doozie.wf.application.path=' oozie_wf_cmd = oozie_wf_cmd + workflow oozie_wf_cmd = oozie_wf_cmd + ' -debug -run' print("run_oozie_workflow.py -> Invoked : " + oozie_wf_cmd) rc, jobid_str = commands.getstatusoutput(oozie_wf_cmd) if rc == 0: jobid_str = jobid_str.split('job: ') jobid = jobid_str[1].strip() abc_line = "|".join([ group, jobid, "oozie", "run_oozie_workflow.py", "", "", "STARTED", getpass.getuser(), "oozie workflow started", str(datetime.datetime.today()) ]) print("**ABC_log**->" + abc_line) sys.stdout.flush() else: print("run_oozie_workflow.py -> Failed : " + jobid_str) return_code = 8 sys.exit(return_code) print( jobid + "-> Started : " + datetime.datetime.fromtimestamp( time.time()).strftime('%Y-%m-%d %H:%M:%S')) sys.stdout.flush() #ABC logging #status = "RUNNING" #cnt = 0 get_status(jobid, envvars.list['oozieNode'], "Main") abc_line = "|".join([ group, jobid, "oozie", "run_oozie_workflow.py", "", "", "ENDED", getpass.getuser(), "oozie workflow Ended", str(datetime.datetime.today()) ]) print("**ABC_log**->" + abc_line) sys.stdout.flush() print( jobid + "-> Ended : " + datetime.datetime.fromtimestamp( time.time()).strftime('%Y-%m-%d %H:%M:%S')) abc_line = "|".join([ group, "run_oozie_workflow.py", "python", parent_script, "", "", "ENDED", getpass.getuser(), "return-code:" + str(return_code), str(datetime.datetime.today()) ]) print("**ABC_log**->" + abc_line) sys.stdout.flush() print( "run_oozie_workflow.py -> Ended : " + datetime.datetime.fromtimestamp( time.time()).strftime('%Y-%m-%d %H:%M:%S'))
def main(): """main() is the driver function for entire parquet table load. From raw table will process table by table""" global return_code return_code = 0 start_line = "".join('*' for i in range(100)) print(start_line) print("file_ingest_parquet.py -> Started : " + datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')) table, field, field_type, field_rdbms_format, field_hadoop_format, common_properties, app, sub_app, env, env_ver, group, wf_name, hive_script, partition_column, partition_frequency, scd_type, natural_keys, custom_date = arg_handle() print ("Workflow_Directory= " + wf_name + "Oozie Hive Ation .hql Name= " + str(hive_script)) # Get envvars from oozie_common_properties file envvars.populate(env, env_ver, app, sub_app) # Get Final Properties final name and path from variables final_properties = envvars.list['lfs_app_wrk'] + '/' + env + '_' + app.replace("/", "_") + '_' + table + '.properties' # Remove if the file exists silentremove(final_properties) # open the final properties file in write append mode properties_file = open(final_properties, 'wb') # Build the table properties file name and path from variables - file_ingest_parquet.py calls workflow based on the wf_name mentioned in jobnames.list (5th Parameter) table_properties = envvars.list['lfs_app_workflows'] + '/' + wf_name + '/' + table + '.properties' # load evironment variables for app specific envvars.load_file(table_properties) # Concatenate global properties file and table properties file shutil.copyfileobj(open(common_properties, 'rb'), properties_file) shutil.copyfileobj(open(table_properties, 'rb'), properties_file) # Get Databese name from environment variables db = envvars.list['hv_db'] db_stage = envvars.list['hv_db_stage'] table = envvars.list['hv_table'] # Raw Table's HDFS Directory hdfs_raw_dir = envvars.list['hdfs_str_raw_fileingest'] + "/" + db_stage # get time stamp to load the table hdfs_load_ts = "'" + str(datetime.datetime.now()) + "'" partitionColAlias = "" sys.stdout.flush() todayDate = "" if custom_date == None: todayDate = date.today() else: todayDate = datetime.datetime.strptime(custom_date , '%Y-%m-%d') todayDate = todayDate + timedelta(1) yesterdayDate=todayDate - timedelta(1) curDate_yyyy=todayDate.strftime('%Y') curDate_mm=todayDate.strftime('%m') curDate_yyyy_mm_dd=todayDate.strftime('%Y-%m-%d') curDate_yyyymmdd=todayDate.strftime('%Y%m%d') user=getpass.getuser() hadoop_user=user.upper() cmd="rm " + hadoop_user +".keytab" rcc,status_txt=commands.getstatusoutput(cmd) print "removing old keytabs if any...status=", status_txt cmd="hdfs dfs -get /user/" + hadoop_user.lower() + "/" + hadoop_user + ".keytab" rcc,status_txt=commands.getstatusoutput(cmd) print "Getting keytab and status = ", status_txt # For Handling SCD Type Tables if(scd_type == "scd1" or scd_type == "merge"): partitionColAlias = 'B.' keys = natural_keys.split(",") numKeys = len(keys) for idx in range(0, numKeys): if(idx == 0): onClause = "on_clause= A." + keys[idx] + "=B." + keys[idx] nullClauseParquet = "null_clause_parquet= A." + keys[idx] + " IS NULL " nullClauseRaw = "null_clause_raw= B." + keys[idx] + " IS NULL " if(idx > 0): onClause = onClause + " and " + "A." + keys[idx] + "=B." + keys[idx] nullClauseParquet = nullClauseParquet + "and" + " A." + keys[idx] + " IS NULL " nullClauseRaw = nullClauseRaw + "and" + " B." + keys[idx] + " IS NULL " # For Partitioning # File Ingest date_string = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d') month_string = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m') if (partition_column): # Retrieval of Date and Month String START mnthDate_cat_cmd = "hdfs dfs -cat " + hdfs_raw_dir + "/" + "load_month_date" + "/" + table + "_mnthDate" rc, cmd_out = commands.getstatusoutput(mnthDate_cat_cmd) if (rc == 0): print("file_ingest_rawdb.py -> Retrieval of Date and Month is successful.") print ("Command Executed is : " + mnthDate_cat_cmd) else: print("file_ingest_rawdb.py -> Retrieval of Date and Month is NOT successful.") print cmd_out print ("Command Executed is : " + mnthDate_cat_cmd) sys.exit(1) # Retrieval of Date and Month String END date_string = cmd_out.split('|')[2] month_string = cmd_out.split('|')[1] if sub_app == 'efgifi' and (table=='criss_stdfld_delim' or table=='criss_ifind_delim'): rc,rec_cnt=lastPreviousDay(curDate_yyyy_mm_dd, env, hadoop_user) date_string=rec_cnt.split('\t')[0] print "Inside efgifi lastPreviousDay loop date_string ="+date_string elif sub_app == 'efgifi': rc,rec_cnt=lastPreviousWorkingDay(curDate_yyyy_mm_dd, env, hadoop_user) date_string=rec_cnt.split('\t')[0] print "Inside efgifi lastPreviousWorkingDay loop date_string ="+date_string print "date_string ="+date_string partitionColumn = "partition_column=" + partition_column partition_clause = "partition_clause=partition ( " + partition_column + " )" if (partition_frequency == "daily"): partition_column_select = "partition_column_select=,'" + date_string + "'" elif (partition_frequency == "monthly"): partition_column_select = "partition_column_select=,'" + month_string + "'" elif (partition_frequency == "monthly,daily"): partition_column_select = "partition_column_select=,'" + month_string + "','" + date_string + "'" elif (partition_frequency): partition_column_select = "partition_column_select=,'" + partition_frequency + "'" else: parCols = partition_column.split(",") numParCols = len(parCols) for idx in range(0, numParCols): if(idx == 0): partition_column_with_qualifier = partitionColAlias + parCols[idx] if(idx > 0): partition_column_with_qualifier += "," + partitionColAlias + parCols[idx] partition_column_select = "partition_column_select=," + partition_column_with_qualifier print("file_ingest_parquet.py -> DownloadTyp: Full Download of table ") dynamic_properties = '\n'.join(['\n', 'env=' + env, 'app=' + app, 'sub_app=' + sub_app, 'group=' + group, 'happ=' + envvars.list['happ'] , 'min_bound=' + "''", 'max_bound=' + "''", 'min_bound_hadoop=' + "''", 'max_bound_hadoop=' + "''", 'hdfs_load_ts=' + hdfs_load_ts]) if(hive_script): dynamic_properties = dynamic_properties + '\n' + 'hive_script=' + hive_script if (partition_column): dynamic_properties = dynamic_properties + '\n ' + partitionColumn + '\n ' + partition_clause + '\n ' + partition_column_select if(scd_type == "scd1" or scd_type == "merge"): dynamic_properties = dynamic_properties + '\n ' + onClause + '\n ' + nullClauseParquet + '\n ' + nullClauseRaw # DATES and MONTHS daysCol = "" for dy in range(0, 7): daysCol += "day_" + str(dy) + "=" + str((datetime.datetime.fromtimestamp(time.time()) - datetime.timedelta(dy)).strftime('%Y-%m-%d')) + "\n" dynamic_properties = dynamic_properties + '\n ' + daysCol dynamic_properties = dynamic_properties + '\n ' + "where=1=1" dynamic_properties = dynamic_properties + '\n ' + "where_hadoop=1=1" abc_parameter = env + ',' + env_ver + ',' + app + ',' + sub_app + ',' + group + "," + table + "," # ABC logging parameter for oozie # print "env"+ env # abc_parameter = env+','+env_ver+','+app+','+sub_app+','+group+","+table+','+field+ lower_bound_hadoop +"to"+upper_bound_hadoop properties_file.write(dynamic_properties) properties_file.close() print("file_ingest_parquet.py -> CommnPrpty : " + common_properties) print("file_ingest_parquet.py -> TablePrpty : " + table_properties) print("file_ingest_parquet.py -> DynmcPrpty : " + dynamic_properties.replace("\n", ", ")) print("file_ingest_parquet.py -> FinalPrpty : " + final_properties) sys.stdout.flush() # ABC Logging Pending rc = runoozieworkflow(final_properties, abc_parameter, wf_name) print "Return-Code:" + str(rc) if rc > return_code: return_code = rc abc_line = "|".join([group, "file_ingest_parquet.py", "python", "run_job.py", str(table), "File_Ingest", "ENDED", getpass.getuser(), "return-code:" + str(return_code), str(datetime.datetime.today())]) print("**ABC_log**->" + abc_line) sys.stdout.flush() if (return_code == 0): impala_cmd = envvars.list['impalaConnect'] + ' "REFRESH ' + db + '.' + table + ';"' print impala_cmd rc, output = commands.getstatusoutput(impala_cmd) if (rc != 0): print("file_ingest_parquet.py -> Ooozie Successful But Impala REFRESH FAILED : " + datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')) print start_line print "Return-Code:" + str(return_code) sys.exit(rc) print("file_ingest_parquet.py -> Ended : " + datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')) print start_line print "Return-Code:" + str(return_code) sys.exit(return_code)
def main(): global return_code return_code = 0 start_line = "".join('*' for i in range(100)) print(start_line) print("run_ingest.py -> Started : " + datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')) table, field, field_type, field_rdbms_format, field_hadoop_format, lower_bound, upper_bound, common_properties, app, sub_app, env, env_ver, group, ingest_type, common_date = arg_handle( ) print "field_type=", field_type # Get envvars from oozie_common_properties file envvars.populate(env, env_ver, app, sub_app) #Get Final Properties final name and path from variables final_properties = envvars.list[ 'lfs_app_wrk'] + '/' + env + '_' + app.replace( "/", "_") + '_' + table + '.properties' # Remove if the file exists silentremove(final_properties) # open the final properties file in write append mode properties_file = open(final_properties, 'wb') # Build the table properties file name and path from variables run_ingest only calls wf_db_ingest workflow table_properties = envvars.list[ 'lfs_app_workflows'] + '/wf_db_ingest/' + table + '.properties' rm_ctlM = "sed -i -e 's/\r$//' " + table_properties rc, status = commands.getstatusoutput(rm_ctlM) print("run_ingest.py -> removing ^M characters in file: " + rm_ctlM + " Status:" + str(rc)) # get time stamp to load the table hdfs_load_ts = "'" + str(common_date).replace("_", " ") + "'" common_date_tfmt = datetime.strptime(common_date, '%Y-%m-%d_%H:%M:%S.%f') log_time = common_date_tfmt.strftime('%Y-%m-%d_%H-%M-%S') log_date = common_date_tfmt.strftime('%Y-%m-%d') log_folder = envvars.list['lfs_app_logs'] + "/" + log_date log_file = log_folder + "/run_job-" + group + '_' + log_time + '.log' envvars.list['hdfs_load_ts'] = hdfs_load_ts #load evironment variables for app specific envvars.load_file(table_properties) # Concatenate global properties file and table properties file shutil.copyfileobj(open(common_properties, 'rb'), properties_file) shutil.copyfileobj(open(table_properties, 'rb'), properties_file) #Get Databese name from environment variables db = envvars.list['hv_db'] table = envvars.list['hv_table'] sys.stdout.flush() if ingest_type == 'sync': sourceStats = get_stats_sqoop(table, envvars.list['where_column']) targetStats = get_stats_impala(db, table, envvars.list['where_column']) #print("Source Result:"+str(sourceStats)) #print("Target Result:"+str(targetStats)) whereClause = "" whereHClause = "" for key in sourceStats: if key in targetStats: if sourceStats[key] != targetStats[key]: if whereClause == "": whereClause = whereClause + envvars.list[ 'where_column'] + "=to_timestamp('" + key + "', 'yyyy-mm-dd hh24:mi:ss.FF')" else: whereClause = whereClause + " or " + envvars.list[ 'where_column'] + "=to_timestamp('" + key + "', 'yyyy-mm-dd hh24:mi:ss.FF')" else: if whereClause == "": whereClause = whereClause + envvars.list[ 'where_column'] + "=to_timestamp('" + key + "', 'yyyy-mm-dd hh24:mi:ss.FF')" else: whereClause = whereClause + " or " + envvars.list[ 'where_column'] + "=to_timestamp('" + key + "', 'yyyy-mm-dd hh24:mi:ss.FF')" dynamic_properties = '\n'.join([ '\nenv=' + env, 'app=' + app, 'sub_app=' + sub_app, 'group=' + group, 'happ=' + envvars.list['happ'], 'where=' + whereClause, 'log_file=' + log_file, 'hdfs_load_ts=' + hdfs_load_ts ]) elif ingest_type == 'incr': if field is None: print( "run_ingest.py -> ERROR: Incremental SQOOP cannot be performed with out where column " ) return_code = 2 sys.exit(return_code) print( "run_ingest.py -> DownloadTyp: Partial Download based on where condition " ) # Check if the lower_date range is passed from jobnames.list file if lower_bound is None or lower_bound == "": # lower_date range is not found check for presence of exclusions file lower_bound, upper_bound = get_exception_args( envvars.list['lfs_app_config'], table) # lower_date is still none get lower date from impala table if lower_bound is None and field is not None and db is not None: lower_bound = get_min_bound_impala(db, table, field, field_type) if lower_bound is None or lower_bound == "": print( "run_ingest.py -> LowerBound: Cannot be determined. Use Sync option" ) return_code = 2 sys.exit(return_code) else: print("run_ingest.py -> LowerBound: Min date " + lower_bound + " is determined from Impala table") elif lower_bound is None and field is None: print( "run_ingest.py -> Arguments error: lower_bound or field or entry in exception file is expected" ) return_code = 2 sys.exit(return_code) else: print( "run_ingest.py -> LowerBound : Min date is determined from exclusions file" ) else: print( "run_ingest.py -> LowerBound : Min date is determined from jobnames.list file" ) if upper_bound is None or upper_bound == "": curr_dt = str(datetime.now().date()) if field.strip().lower() == "msrmnt_prd_id": print "run_ingest.py -> Upper_bound : BDW table date used " + str( curr_dt) upper_bound = get_bdw_date_from_id(db, curr_dt) elif field_type.lower() == "timestamp": upper_bound = str( datetime.strptime(str(datetime.now()), "%Y-%m-%d %H:%M:%S.%f")) elif field_type.lower() == "int": upper_bound = '99999999' print("run_ingest.py -> UpperBound : is 99999999") else: upper_bound = curr_dt print( "run_ingest.py -> UpperBound : Max Date is current date" ) else: print( "run_ingest.py -> UpperBound : Max Date source is same as Min date" ) if field_type.strip().lower() == "timestamp" or field_type.lower( ) == "": ingest_special_args = get_ingest_special_args( envvars.list['lfs_app_config'], table) if "lower_bound_modifier_days" in ingest_special_args: try: val = int(ingest_special_args["lower_bound_modifier_days"]. strip()) print("run_ingest.py -> LowerBound Modifier:" + str(val)) lower_bound = datetime.strptime( lower_bound, "%Y-%m-%d %H:%M:%S.%f") + timedelta(days=val) lower_bound = str(lower_bound) print( "run_ingest.py -> LowerBound : updated to " + lower_bound + " from ingest_special.properties file") except ValueError: print( "lower_bound_modifier is not an int! " + str(ingest_special_args["lower_bound_modifier_days"]) + "!") if field_type.lower( ) == "timestamp" and envvars.list['datasource'] == "oracle": lower_bound_f = "to_timestamp('" + lower_bound + "','YYYY-MM-DD HH24:MI:SS.FF')" upper_bound_f = "to_timestamp('" + upper_bound + "','YYYY-MM-DD HH24:MI:SS.FF')" else: lower_bound_f = lower_bound upper_bound_f = upper_bound dynamic_properties = '\n'.join([ '\nenv=' + env, 'app=' + app, 'sub_app=' + sub_app, 'group=' + group, 'log_file=' + log_file, 'happ=' + envvars.list['happ'], 'min_bound=' + lower_bound_f, 'max_bound=' + upper_bound_f, 'hdfs_load_ts=' + hdfs_load_ts ]) if field_type.lower() == "int": dynamic_properties = dynamic_properties + '\n ' + "where=${where_column} between ${min_bound} and ${max_bound}" dynamic_properties = dynamic_properties + '\n ' + "where_hadoop=${where_column} between ${min_bound} and ${max_bound}" abc_parameter = env + ',' + env_ver + ',' + app + ',' + sub_app + ',' + group + "," + table + ',' + field + lower_bound + "to" + upper_bound elif field_type == None or field_type == "" or field_type.lower( ) == "date" or field_type.lower() == "timestamp": field_rdbms_format = determine_default_field_format( field_rdbms_format) field_hadoop_format = determine_default_field_format( field_hadoop_format) if field_type.lower() == "timestamp": field_rdbms_format = '%Y-%m-%d %H:%M:%S.%f' field_hadoop_format = '%Y-%m-%d %H:%M:%S.%f' lower_bound_validated = validate_date_format( lower_bound, field_rdbms_format) upper_bound_validated = validate_date_format( upper_bound, field_rdbms_format) lower_bound_hadoop = lower_bound_validated.strftime( field_hadoop_format) upper_bound_hadoop = upper_bound_validated.strftime( field_hadoop_format) dynamic_properties = '\n'.join([ dynamic_properties, 'min_bound_hadoop=' + lower_bound_hadoop, 'max_bound_hadoop=' + upper_bound_hadoop ]) if field_type.lower( ) == "timestamp" and envvars.list['datasource'] == "oracle": dynamic_properties = dynamic_properties + '\n ' + "where=${where_column} between ${min_bound} and ${max_bound}" dynamic_properties = dynamic_properties + '\n ' + "where_hadoop=${where_column} between '${min_bound_hadoop}' and '${max_bound_hadoop}'" else: dynamic_properties = dynamic_properties + '\n ' + "where=${where_column} between '${min_bound}' and '${max_bound}'" dynamic_properties = dynamic_properties + '\n ' + "where_hadoop=${where_column} between '${min_bound_hadoop}' and '${max_bound_hadoop}'" abc_parameter = env + ',' + env_ver + ',' + app + ',' + sub_app + ',' + group + "," + table + ',' + field + lower_bound_hadoop + "to" + upper_bound_hadoop else: dynamic_properties = dynamic_properties + '\n ' + "where=${where_column} between ${min_bound} and ${max_bound}" dynamic_properties = dynamic_properties + '\n ' + "where_hadoop=${where_column} between ${min_bound} and ${max_bound}" abc_parameter = env + ',' + env_ver + ',' + app + ',' + sub_app + ',' + group + "," + table + ',' + field + lower_bound + "to" + upper_bound else: print( "run_ingest.py -> DownloadTyp: Full Download of table ") dynamic_properties = '\n'.join([ '\nenv=' + env, 'app=' + app, 'sub_app=' + sub_app, 'group=' + group, 'log_file=' + log_file, 'happ=' + envvars.list['happ'], 'min_bound=' + "''", 'max_bound=' + "''", 'min_bound_hadoop=' + "''", 'max_bound_hadoop=' + "''", 'hdfs_load_ts=' + hdfs_load_ts ]) dynamic_properties = dynamic_properties + '\n ' + "where=1=1" if envvars.list['hive_query'].strip().lower( ) == 'hv_ins_stg_fnl_audit.hql': dynamic_properties = dynamic_properties + '\n ' + "where_hadoop=as_of_date=" + hdfs_load_ts else: dynamic_properties = dynamic_properties + '\n ' + "where_hadoop=1=1" abc_parameter = env + ',' + env_ver + ',' + app + ',' + sub_app + ',' + group + "," + table + "," #ABC logging parameter for oozie #print "env"+ env #abc_parameter = env+','+env_ver+','+app+','+sub_app+','+group+","+table+','+field+ lower_bound_hadoop +"to"+upper_bound_hadoop properties_file.write(dynamic_properties) properties_file.close() print("run_ingest.py -> CommnPrpty : " + common_properties) print("run_ingest.py -> TablePrpty : " + table_properties) print("run_ingest.py -> DynmcPrpty : " + dynamic_properties.replace("\n", ", ")) print("run_ingest.py -> FinalPrpty : " + final_properties) sys.stdout.flush() # ABC Logging Started parameter_string = "" if lower_bound is not None and lower_bound != "": parameter_string = field + " " + lower_bound + " " + upper_bound comments = "Properties file name :" + final_properties abc_line = "|".join([ group, "run_ingest.py", "python", "run_job.py", str(table), parameter_string, "RUNNING", getpass.getuser(), comments, str(datetime.today()) ]) print("**ABC_log**->" + abc_line) abc_parameter = env + ',' + env_ver + ',' + app + ',' + sub_app + ',' + group + ",run_ingest.py" sys.stdout.flush() rc = runoozieworkflow(final_properties, abc_parameter) print "Return-Code:" + str(rc) if rc > return_code: return_code = rc abc_line = "|".join([ group, "run_ingest.py", "python", "run_job.py", str(table), parameter_string, "ENDED", getpass.getuser(), "return-code:" + str(return_code), str(datetime.today()) ]) print("**ABC_log**->" + abc_line) sys.stdout.flush() print("run_ingest.py -> Ended : " + datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')) print start_line print "Return-Code:" + str(return_code) sys.exit(return_code)
def main(): options = arg_handle() #BEELINE_URL='jdbc:hive2://lbdp164a.uat.pncint.net:10000/default;principal=hive/[email protected]' envvars.populate(options.env,options.env_ver,options.app,options.sub_app) BEELINE_URL="beeline -u '"+envvars.list['hive2JDBC']+"principal="+envvars.list['hive2Principal']+"'" envvars.list['hdfs_load_ts'] = str(datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S.%f')) print "run_hive_create.py -> Beeline connect command - "+ BEELINE_URL #Read the list of table names from the generated file. try: with open(options.inputFile) as fin: for line in fin: print "run_hive_create.py -> ***************table name = " +line.strip() +"*********************" sys.stdout.flush() tableName=line.strip() envvars.clearList() envvars.populate(options.env,options.env_ver,options.app,options.sub_app) envvars.load_file(options.table_properties_path+"/"+tableName+".properties") #stageTargetDir=envvars.list['stg_target_dir'] #targetDir=envvars.list['target_dir'] print "run_hive_create.py -> creating stage table - " +envvars.list['hv_db_stage']+"."+envvars.list['stage_table'] print "run_hive_create.py -> stage target_dir = " + envvars.list['stg_target_dir'] #comment/uncomment the below print statement to know/hide the exact command issued for creating hive table beeline_cmd = " ".join([BEELINE_URL, "-hiveconf hv_db_stage="+envvars.list['hv_db_stage'], " -hiveconf hv_db="+envvars.list['hv_db'], " -hiveconf stage_table="+tableName.strip(), " -hiveconf stg_target_dir="+envvars.list['stg_target_dir'], "-f", options.table_create_path+"/"+tableName+"_create_stg.hql"]) print "run_hive_create.py -> " + beeline_cmd #beeline -u ${BEELINE_URL} -hiveconf hv_db_stage="${hv_db_stage}" -hiveconf hv_db="${hv_db}" -hiveconf stage_table="$tableName" -hiveconf stg_target_dir="${stageTargetDir}" --silent=true -f "${table_create_path}"/${tableName}_create_stg.hql #hive -hiveconf hv_db_stage="${hv_db_stage}" -hiveconf hv_db="${hv_db}" -hiveconf stage_table="$tableName" -hiveconf stg_target_dir="${stageTargetDir}" -f "${table_create_path}"/${tableName}_create_stg.hql sys.stdout.flush() rc = os.system(beeline_cmd) sys.stdout.flush() #1>/tmp/${USER}.log if ( rc != 0 ): print "run_hive_create.py -> Create stage table script failed. please validate, fix and continue" sys.exit(1) #hdfs dfs -chmod -R 777 ${stageTargetDir} 2>&1 print "run_hive_create.py -> creating final table - " + envvars.list['hv_db']+"."+envvars.list['hv_table'] #print "run_hive_create.py -> final target_dir = " + envvars.list['target_dir'] #Comment/Un-Comment the below print statement to know/hide the exact hive command used beeline_cmd = " ".join([BEELINE_URL, "-hiveconf hv_db_stage="+envvars.list['hv_db_stage'], " -hiveconf hv_db="+envvars.list['hv_db'], " -hiveconf table="+tableName.strip(), #" -hiveconf target_dir="+envvars.list['target_dir'], "-f", options.table_create_path+"/"+tableName+"_create_parquet.hql"]) print "run_hive_create.py -> " + beeline_cmd #beeline -u ${BEELINE_URL} -hiveconf hv_db_stage="${hv_db_stage}" -hiveconf hv_db="${hv_db}" -hiveconf table="$table" -hiveconf target_dir="${targetDir}" --silent=true -f "${table_create_path}"/${tableName}_create_parquet.hql #hive -hiveconf hv_db_stage="${hv_db_stage}" -hiveconf hv_db="${hv_db}" -hiveconf table="$table" -hiveconf target_dir="${targetDir}" -f "${table_create_path}"/${tableName}_create_parquet.hql #1>/tmp/${USER}.log sys.stdout.flush() rc = os.system(beeline_cmd) sys.stdout.flush() if ( rc != 0 ): print "run_hive_create.py -> Create table script failed. please validate, fix and continue" sys.exit(1) print "run_hive_create.py -> Invalidate metadata - " +envvars.list['hv_db']+"."+envvars.list['hv_table'] impalaCmd=envvars.list['impalaConnect']+"' invalidate metadata "+envvars.list['hv_db']+"."+envvars.list['hv_table']+";invalidate metadata "+envvars.list['hv_db_stage']+"."+envvars.list['stage_table']+"; '" print "run_hive_create.py -> " + impalaCmd sys.stdout.flush() rc = os.system(impalaCmd) sys.stdout.flush() if (rc!= 0 ): print "run_hive_create.py -> Invalidate metadata failed" sys.exit(1) except IOError as e: if e.errno != errno.ENOENT: raise IOError("exception file reading error") else: print("No Tablelist file found")
def main(): global return_code return_code = 0 start_line = "".join('*' for i in range(100)) print(start_line) print("run_hive_query.py -> Started : " + datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')) options = arg_handle() table = options.table.strip().lower() app = options.app.lower() env = options.env.lower() env_ver = options.env_ver.lower() sub_app = options.sub_app.lower() query = "" query_type = options.query_type.strip().lower() if query_type == "cstm": query = options.query.strip() group = options.group common_properties = '/cloudera_nfs1/config/oozie_global.properties' # Get envvars from oozie_common_properties file envvars.populate(env, env_ver, app, sub_app) hdfs_load_ts = "'" + str(options.common_date).replace("_", " ") + "'" common_date_tfmt = datetime.strptime(options.common_date, '%Y-%m-%d_%H:%M:%S.%f') log_time = common_date_tfmt.strftime('%Y-%m-%d_%H-%M-%S') log_date = common_date_tfmt.strftime('%Y-%m-%d') log_folder = envvars.list['lfs_app_logs'] + "/" + log_date log_file = log_folder + "/run_job-" + group + '_' + log_time + '.log' #Get Final Properties final name and path from variables final_properties = envvars.list[ 'lfs_app_wrk'] + '/' + env + '_hive_query_' + app.replace( "/", "_") + '_' + table + '.properties' # Remove if the file exists silentremove(final_properties) # open the final properties file in write append mode properties_file = open(final_properties, 'wb') # Build the table properties file name and path from variables run_ingest only calls wf_db_ingest workflow table_properties = envvars.list[ 'lfs_app_workflows'] + '/wf_db_ingest/' + table + '.properties' # get time stamp to load the table hdfs_load_ts = "'" + str(datetime.now()) + "'" envvars.list['hdfs_load_ts'] = hdfs_load_ts #load evironment variables for app specific if os.path.isfile(table_properties): envvars.load_file(table_properties) # Concatenate global properties file and table properties file shutil.copyfileobj(open(common_properties, 'rb'), properties_file) if os.path.isfile(table_properties): shutil.copyfileobj(open(table_properties, 'rb'), properties_file) #Get Databese name from environment variables db = envvars.list['hv_db'] table = envvars.list['hv_table'] else: db = envvars.list['hv_db_' + app + '_' + sub_app] table = table_properties sys.stdout.flush() curr_date = datetime.fromtimestamp(time.time()).strftime("%Y-%m-%d") prev_load_date = (datetime.fromtimestamp(time.time()) + timedelta(days=-1)).strftime("%Y-%m-%d") dynamic_properties = '\n'.join([ '\nenv=' + env, 'app=' + app, 'sub_app=' + sub_app, 'group=' + group, 'prev_load_date=' + prev_load_date, 'curr_date=' + curr_date, 'log_file=' + log_file, 'happ=' + envvars.list['happ'] ]) if query_type == 'mgfl': print( "run_hive_query.py -> HiveQueryTyp: Merge small files ") if not envvars.list['partition_column'] == '': if not (options.ldate is None or options.ldate.strip() == ""): partition_clause = "PARTITION (" + envvars.list[ 'partition_column'] + "='" + options.ldate.strip() + "')" where_clause = " where " + envvars.list[ 'partition_column'] + "='" + options.ldate.strip() + "'" else: partition_clause = "PARTITION (" + envvars.list[ 'partition_column'] + "='" + prev_load_date + "')" where_clause = " where " + envvars.list[ 'partition_column'] + "='" + prev_load_date + "'" dynamic_properties = '\n'.join([ dynamic_properties, 'hive_query=merge_smfl_table.hql', 'python_script=invalidate_metadata.py', 'partition_clause=' + partition_clause, 'where_clause=' + where_clause ]) abc_parameter = env + ',' + env_ver + ',' + app + ',' + sub_app + ',' + group + "," + table + "," else: print( "run_hive_query.py -> Partition column must be present to merge small files " ) sys.exit() elif query_type == 'cstm': print("run_hive_query.py -> HiveQueryTyp: custom query ") queries = query query = queries.split(",")[0] rm_ctlM = "sed -i -e 's/\r$//' " + envvars.list[ 'lfs_app_workflows'] + '/wf_hive_query/' + query rc, status = commands.getstatusoutput(rm_ctlM) print( "run_hive_query.py -> removing ^M characters in file: " + rm_ctlM + " Status:" + str(rc)) hdfs_put = "hdfs dfs -put -f " + envvars.list[ 'lfs_app_workflows'] + '/wf_hive_query/' + query + " " + envvars.list[ 'hdfs_app_workflows'] + '/wf_hive_query/' rc, status = commands.getstatusoutput(hdfs_put) print("run_hive_query.py -> copying file: " + hdfs_put + " Status:" + str(rc)) if len(queries.split(",")) == 2: py_query = queries.split(",")[1] hdfs_put = "hdfs dfs -put -f " + envvars.list[ 'lfs_app_workflows'] + '/wf_hive_query/' + py_query + " " + envvars.list[ 'hdfs_app_workflows'] + '/wf_hive_query/' rc, status = commands.getstatusoutput(hdfs_put) print("run_hive_query.py -> copying file: " + hdfs_put + " Status:" + str(rc)) else: py_query = 'invalidate_metadata.py' dynamic_properties = '\n'.join([ dynamic_properties, 'hive_query=' + query, 'python_script=' + py_query ]) abc_parameter = env + ',' + env_ver + ',' + app + ',' + sub_app + ',' + group + "," + table + "," properties_file.write(dynamic_properties) properties_file.close() print("run_hive_query.py -> CommnPrpty : " + common_properties) print("run_hive_query.py -> TablePrpty : " + table_properties) print("run_hive_query.py -> DynmcPrpty : " + dynamic_properties.replace("\n", ", ")) print("run_hive_query.py -> FinalPrpty : " + final_properties) sys.stdout.flush() # ABC Logging Started parameter_string = "" comments = "Properties file name :" + final_properties abc_line = "|".join([ group, "run_hive_query.py", "python", "run_job.py", str(table), parameter_string, "RUNNING", getpass.getuser(), comments, str(datetime.today()) ]) print("**ABC_log**->" + abc_line) abc_parameter = env + ',' + env_ver + ',' + app + ',' + sub_app + ',' + group + ",run_hive_query.py" sys.stdout.flush() rc = runoozieworkflow(final_properties, abc_parameter) print "Return-Code:" + str(rc) if rc > return_code: return_code = rc abc_line = "|".join([ group, "run_hive_query.py", "python", "run_job.py", str(table), parameter_string, "ENDED", getpass.getuser(), "return-code:" + str(return_code), str(datetime.today()) ]) print("**ABC_log**->" + abc_line) sys.stdout.flush() print("run_hive_query.py -> Ended : " + datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')) print start_line print "Return-Code:" + str(return_code) sys.exit(return_code)
def main(): global return_code, msck_count, msck_command return_code = 0 msck_count = 0 home = '/data/' path = os.path.dirname(os.path.realpath(__file__)) root = path.split('src/scripts')[0] #env = path.split('/')[2].split('bdp')[1] #env_ver = path.split('/')[4] env = 'p' env_ver = '01' usage = "usage: run_job.py grp_name app sub_app jobnames.list" parser = OptionParser(usage) (options, args) = parser.parse_args() if len(args) < 3: parser.error("Arguments - group_job_name and app name are required.") global app, sub_app grp_name = args[0] app = args[1] sub_app = args[2] jobnames = "jobnames.list" common_date ="" if len(args) == 4: common_date = args[3].strip() else: common_date = str(datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d_%H:%M:%S.%f')) common_date_tfmt = datetime.strptime(common_date,'%Y-%m-%d_%H:%M:%S.%f') asofdate = common_date_tfmt.strftime('%Y-%m-%d') log_time = common_date_tfmt.strftime('%Y-%m-%d_%H-%M-%S') rerunjobnames = "jobnames_"+asofdate+".list" rerun = "N" # rerunjobnames = jobnames # rerun = "Y" envvars.populate(env,env_ver,app,sub_app) log_date = common_date_tfmt.strftime('%Y-%m-%d') log_folder = envvars.list['lfs_app_logs'] + "/"+log_date if not os.path.exists(log_folder): os.makedirs(log_folder) chmod_log = "chmod 777 "+log_folder rc, status = commands.getstatusoutput(chmod_log) log_file = log_folder +"/run_job-" + grp_name + '_' + log_time + '.log' global abc_log_file, stdout_file abc_log_file = envvars.list['lfs_app_logs'] + "/"+grp_name+".tmp" failed_group_name = "@@"+ grp_name + '_' + log_time print("LogFile: " + log_file) print("To Kill: kill " + str(os.getpid())) f = open(log_file, "a",0) f.close() stdout_file = open(log_file, "r+",0) sys.stdout = stdout_file global kerb, user_name rc, user_name = commands.getstatusoutput("echo $USER") service_user_name = envvars.list['srvc_acct_login_'+app+'_'+sub_app] if service_user_name is not None and service_user_name != "": user_name = service_user_name if not os.path.isfile(envvars.list['lfs_keystore']+user_name.lower()+".keytab "): kerb = "kinit -k -t "+envvars.list['lfs_keystore']+"/"+user_name.lower()+".keytab "+user_name.lower()+envvars.list['domainName'] rc, out = commands.getstatusoutput(kerb) print("run_job.py -> Authenticated : "+kerb+" RC:"+str(rc)) else: print("run_job.py -> Keytab file missing, not able to authenticate. Using user default authentication") start_line = "".join('*' for i in range(100)) print start_line print("run_job.py -> Started : " + datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')) global abc_hdfs_put #hdfs_abc_log_file = envvars.list['hdfs_meta_raw']+"/"+envvars.list['hv_db_meta_stage']+"/abc_hadoop/load_date="+str(asofdate)+"/00000.log"; #abc_hdfs_put = " ".join(["hdfs","dfs","-appendToFile",abc_log_file, # hdfs_abc_log_file]) #hdfs_chmod = "hdfs dfs -chmod -R 777 " + hdfs_abc_log_file #rc, out = commands.getstatusoutput(hdfs_chmod) #print("---Output of chmod command of abc_log_file-->"+hdfs_chmod) #print("run_job.py -> Invoked : " +hdfs_chmod) #print out #msck_command = "beeline -u '" + envvars.list['hive2JDBC'] + ";principal=" + envvars.list['hive2Principal']+"' -e " #msck_command = "hive -e " #msck_command = msck_command + "'use "+ envvars.list['hv_db_meta_stage']+"; msck repair table abc_hadoop;'" comments = "" # determine joblist file path job_list_file = envvars.list['lfs_app_config'] + '/' + jobnames rerun_job_list_file = envvars.list['lfs_app_config'] + '/' + grp_name + "_rerun.list" print("run_job.py -> JobList : " + job_list_file) if os.path.isfile(rerun_job_list_file): job_list_file = rerun_job_list_file print("run_job.py -> JobList : Rerun file found, updating joblist lookup file. Please re-run if original entries has to run.") print("run_job.py -> JobList : " + job_list_file) comments = comments + "Rerun file found "+job_list_file else: comments = comments + "joblist file " + job_list_file abc_line = "|".join([grp_name,"run_job.py","python","CA-7 Job","",str(args),"STARTED", user_name,comments.replace(os.linesep,"---"),str(datetime.today())+"\n"]) writeabc(abc_line) input_scripts_count = 0 failed_scripts_count = 0 failed_scripts = "" try: with open(job_list_file) as fin: for line in fin: args = line.split('|') if args[0].strip().lower() == grp_name.lower() or grp_name.lower() == '*all': options = ' --env ' + env + ' --app ' + app + ' --env_ver ' + env_ver + ' --group ' + grp_name options = options + ' --subapp ' + sub_app + ' --cmmn_dt ' + common_date if len(args) < 3: print("Error: Table name and script name not defined in config file") return None, None, None, None, None, None, None if len(args) >= 4: job = args[2].strip() if args[1].strip().lower() == 'g': path = envvars.list['lfs_global_scripts'] else: path = envvars.list['lfs_app_scripts'] options = options + ' --op0 ' + args[3].strip() if len(args) >= 5 and args[4].strip != "": options = options + ' --op1 ' + args[4].strip() if len(args) >= 6 and args[5].strip != "": options = options + ' --op2 ' + args[5].strip() if len(args) >= 7 and args[6].strip != "": options = options + ' --op3 ' + args[6].strip() if len(args) >= 8 and args[7].strip != "": options = options + ' --op4 ' + args[7].strip() if len(args) >= 9 and args[8].strip != "": options = options + ' --op5 ' + args[8].strip() if len(args) >= 10 and args[9].strip != "": options = options + ' --op6 ' + args[9].strip() if len(args) >= 11 and args[10].strip != "": options = options + ' --op7 ' + args[10].strip() if len(args) >= 12 and args[11].strip != "": options = options + ' --op8 ' + args[11].strip() if len(args) >= 13 and args[12].strip != "": options = options + ' --op9 ' + args[12].strip() input_scripts_count = input_scripts_count + 1 rc = call_script(path, job, options) if rc != 0: failed_scripts_count = failed_scripts_count + 1 fs = line.split('|') fs[0] = failed_group_name failed_scripts = failed_scripts + line if rc > return_code: return_code = rc except IOError as e: if e.errno != errno.ENOENT: raise IOError("exception file reading error") else: print("No joblist file found") if return_code > 0: #if input_scripts_count != failed_scripts_count: with open(rerun_job_list_file, 'w') as myfile: myfile.write(failed_scripts) chmod_log = "chmod 777 "+rerun_job_list_file rc, status = commands.getstatusoutput(chmod_log) print "run_job.py -> Failed Script: Some scripts failed.. Please use below command to rerun.." print "run_job.py -> Re-run Cmd : "+ " ".join(["python",path+"/run_job.py",grp_name,app,sub_app]) abc_line = "|".join([grp_name,"run_job.py","python","CA-7 Job","",str(args),"FAILED", user_name,"run_job failed, Some scripts failed.." + str(return_code),str(datetime.today())+"\n"]) writeabc(abc_line) #else: # print "run_job.py -> Failed Script: All scripts failed.. Please use below command to rerun.." # print "run_job.py -> Re-run Cmd : "+ " ".join(["python",path+"/run_job.py",grp_name,app,sub_app,jobnames]) # abc_line = "|".join([grp_name,"run_job.py","python","CA-7 Job","",str(args),"FAILED", # user_name,"run_job failed, all scripts failed.." + str(return_code),str(datetime.today())+"\n"]) # writeabc(abc_line) elif os.path.isfile(rerun_job_list_file): print "run_job.py -> Deleting..." + str(rerun_job_list_file) os.remove(rerun_job_list_file) abc_line = "|".join([grp_name,"run_job.py","python","CA-7 Job","",str(args),"ENDED", user_name,"run_job ended,Return-Code:" + str(return_code),str(datetime.today())+"\n"]) writeabc(abc_line) print("run_job.py -> Ended : Return-Code:" + str(return_code)+" " + datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')) print start_line silentremove(abc_log_file) sys.exit(return_code)