def main(): """ get the date and hour for the previous hour. Will check from the beginning of the day, insert when missing. """ ts = calendar.timegm(time.gmtime()) print "###################" print "# Performing the hourly mrqos_region summary" print "# starting processing time is " + str(ts) print "###################" ts_last_hour = ts-3600 datestamp = time.strftime('%Y%m%d', time.gmtime(float(ts_last_hour))) hourstamp = time.strftime('%H', time.gmtime(float(ts_last_hour))) hour_list = [str("%02d" % x) for x in range(24)] region_summary_retrial_max = 10 # check if the summary has been performed on this particular hour (last hour) print " **** checking day = %s, hour = %s." % (datestamp, hourstamp), if hdfsutil.test_file(os.path.join(config.hdfs_qos_rg_view_hour % (datestamp, hourstamp), '000000_0.deflate')): f = open(os.path.join(config.mrqos_hive_query, 'mrqos_region_view_hour.hive'), 'r') strcmd = f.read() strcmd_s = strcmd % (datestamp, hourstamp, datestamp, hourstamp, datestamp, hourstamp) f.close() strcmd_g = "select * from mrqos.region_view_hour where datestamp=%s and hour=%s;" % (datestamp, hourstamp) query_result_file = os.path.join(config.mrqos_query_result,'region_view_hour.%s.%s.csv' % (datestamp, hourstamp)) print " **** perform beeline for hourly summary for day = %s, hour = %s." % (datestamp, hourstamp) count_retrial = 0 while count_retrial < region_summary_retrial_max: try: beeline.bln_e(strcmd_s) try: beeline.bln_e_output(strcmd_g, query_result_file) except: print " **** copy to local failed!" break except: # delete the folder if summarization failed. print " **** summarization failed upto #retrials="+str(count_retrial) hdfsutil.rm(config.hdfs_qos_rg_view_hour % (datestamp, hourstamp), r=True) count_retrial += 1 else: print " file exists." # check if the summary has been performed since the beginning of the day, last check on day X is X+1/0:30:00 for hour in hour_list: if hour < hourstamp: print " **** checking day = %s, hour = %s." % (datestamp, hour), if hdfsutil.test_file(os.path.join(config.hdfs_qos_rg_view_hour % (datestamp, hour), '000000_0.deflate')): f = open(os.path.join(config.mrqos_hive_query, 'mrqos_region_view_hour.hive'), 'r') strcmd = f.read() strcmd_s = strcmd % (datestamp, hour, datestamp, hour, datestamp, hour) f.close() print " **** perform beeline for hourly summary for day = %s, hour = %s." %(datestamp, hour) try: beeline.bln_e(strcmd_s) except: # delete the folder if summarization failed. print " **** summarization failed, removed hdfs folder." hdfsutil.rm(config.hdfs_qos_rg_view_hour % (datestamp, hour), r=True) else: print " file exists."
def main(): """ get the date for the past day (yesterday). """ timenow = int(time.time()) datenow = str(datetime.date.today()-datetime.timedelta(1)) datenow = datenow[0:4]+datenow[5:7]+datenow[8:10] print "###################" print "# Start processing the data back in " + datenow + " (yesterday)" print "# starting processing time is " + str(timenow) print "###################" ts = calendar.timegm(time.gmtime()) ts_last_hour = ts-3600 datestamp = time.strftime('%Y%m%d', time.gmtime(float(ts_last_hour))) hourstamp = time.strftime('%H', time.gmtime(float(ts_last_hour))) # check if the summary has been performed on this particular hour (last hour) print " **** checking day = %s." % (datestamp), if hdfsutil.test_file(os.path.join(config.hdfs_qos_rg_day % (datestamp), '000000_0.deflate')): f = open(os.path.join(config.mrqos_hive_query, 'mrqos_region_summarize_day.hive'), 'r') strcmd = f.read() strcmd_s = strcmd % (datestamp, datestamp, datestamp) f.close() print " **** perform beeline for hourly summary for day = %s, hour = %s." %(datestamp, hourstamp) try: beeline.bln_e(strcmd_s) except: # delete the folder if summarization failed. print " **** summarization failed, removed hdfs folder." hdfsutil.rm(config.hdfs_qos_rg_day % (datestamp), r=True) else: print " file exists."
def main(): # initialze the logger logging.basicConfig( filename=os.path.join('/home/testgrp/logs/', 'mapmon_summarize.log'), level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', datefmt='%m/%d/%Y %H:%M:%S') logger = logging.getLogger(__name__) timenow = int(time.time()) datenow = str(datetime.date.today() - datetime.timedelta(1)) date_idx = datenow[0:4] + datenow[5:7] + datenow[8:10] # get the latest barebone day_idx bb_day_idx = beeline.get_last_partitions('mapper.barebones').split('=')[1] logger.info("barebone index: day={}".format(bb_day_idx)) # get the latest mpd yesterday uuid_list = [ x.split('=')[-1] for x in hdfsutil.ls( os.path.join(os.path.dirname(config.hdfs_table), 'mapper', 'mapmon', 'day={}'.format(date_idx))) ] for uuid_idx in uuid_list: logger.info("dealing with day={}, uuid={}".format(date_idx, uuid_idx)) file_location = os.path.join(config.hdfs_table, 'mapmon_sum', 'day={}'.format(date_idx), 'mpd_uuid={}'.format(uuid_idx)) if hdfsutil.test_dic(file_location): logger.info('creating folder: {}'.format(file_location)) hdfsutil.mkdir(file_location) if hdfsutil.test_file(os.path.join(file_location, '000000_0.deflate')): f = open( os.path.join(config.mrqos_hive_query, 'mapmon_summarize.hive'), 'r') strcmd = f.read() strcmd_s = strcmd % (date_idx, uuid_idx, bb_day_idx, date_idx, uuid_idx, date_idx, uuid_idx) f.close() try: beeline.bln_e(strcmd_s) except: # delete the folder if summarization failed. logger.warn("summarization failed, removing hdfs folder.") hdfsutil.rm(file_location, r=True) else: logger.info(" file exists.")
def main(): # initialze the logger logging.basicConfig(filename=os.path.join('/home/testgrp/logs/', 'mapmon_summarize.log'), level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', datefmt='%m/%d/%Y %H:%M:%S') logger = logging.getLogger(__name__) timenow = int(time.time()) datenow = str(datetime.date.today()-datetime.timedelta(1)) date_idx = datenow[0:4]+datenow[5:7]+datenow[8:10] # get the latest barebone day_idx bb_day_idx = beeline.get_last_partitions('mapper.barebones').split('=')[1] logger.info("barebone index: day={}".format(bb_day_idx)) # get the latest mpd yesterday uuid_list = [x.split('=')[-1] for x in hdfsutil.ls(os.path.join(os.path.dirname(config.hdfs_table),'mapper','mapmon','day={}'.format(date_idx)))] for uuid_idx in uuid_list: logger.info("dealing with day={}, uuid={}".format(date_idx, uuid_idx)) file_location = os.path.join(config.hdfs_table, 'mapmon_sum', 'day={}'.format(date_idx), 'mpd_uuid={}'.format(uuid_idx)) if hdfsutil.test_dic(file_location): logger.info('creating folder: {}'.format(file_location)) hdfsutil.mkdir(file_location) if hdfsutil.test_file(os.path.join(file_location, '000000_0.deflate')): f = open(os.path.join(config.mrqos_hive_query, 'mapmon_summarize.hive'), 'r') strcmd = f.read() strcmd_s = strcmd % (date_idx, uuid_idx, bb_day_idx, date_idx, uuid_idx, date_idx, uuid_idx) f.close() try: beeline.bln_e(strcmd_s) except: # delete the folder if summarization failed. logger.warn("summarization failed, removing hdfs folder.") hdfsutil.rm(file_location, r=True) else: logger.info(" file exists.")
def main(): """ get the date and hour for the previous hour. Will check from the beginning of the day, insert when missing. """ ts = calendar.timegm(time.gmtime()) logging.basicConfig(filename=os.path.join(config.mrqos_logging, 'cron_region_summary_hour.log'), level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', datefmt='%m/%d/%Y %H:%M:%S') logger = logging.getLogger(__name__) # start the logging logger.info("###################") logger.info("# Performing the hourly mrqos_region summary") logger.info("# starting time: " + str(ts) + " = " + time.strftime('GMT %Y-%m-%d %H:%M:%S', time.gmtime(ts))) logger.info("###################") # parameter: backfilter length bf_length = config.region_summary_back_filling ts_last_couple_hour_list = [ts-(1+x)*3600 for x in range(bf_length)] for ts_last_hour in ts_last_couple_hour_list: datestamp = time.strftime('%Y%m%d', time.gmtime(float(ts_last_hour))) hourstamp = time.strftime('%H', time.gmtime(float(ts_last_hour))) region_summary_retrial_max = 10 # ############################### # # The SUMMARY HOUR hive procedure # # ############################### # #logger.info(" **** summary hour tour: checking day = %s, hour = %s." % (datestamp, hourstamp)) # check if the summary has been performed on this particular hour (last hour) if hdfsutil.test_file(os.path.join(config.hdfs_qos_rg_hour % (datestamp, hourstamp), '000000_0.deflate')): logger.info("** region summary hour: checking day = %s, hour = %s, and file does not exist." % (datestamp, hourstamp)) f = open(os.path.join(config.mrqos_hive_query, 'mrqos_region_summarize_hour.hive'), 'r') strcmd = f.read() strcmd_s = strcmd % (datestamp, hourstamp, datestamp, hourstamp, datestamp, hourstamp) f.close() count_retrial = 0 while count_retrial < region_summary_retrial_max: tic = time.time() try: beeline.bln_e(strcmd_s) logger.info("BLN region summary hour success @ cost = %s sec." % str(time.time()-tic)) break except sp.CalledProcessError as e: # delete the folder if summarization failed. logger.info("BLN region summary hour failed @ cost = %s sec in retrial #%s" % (str(time.time()-tic), str(count_retrial))) logger.exception("message") hdfsutil.rm(config.hdfs_qos_rg_hour % (datestamp, hourstamp), r=True) count_retrial += 1 else: logger.info("** region summary hour: checking day = %s, hour = %s, and file exists." % (datestamp, hourstamp)) # ############################ # # The CASE VIEW hive procedure # # ############################ # #print " **** case view tour:" # check if the summary has been performed on this particular hour (last hour) if hdfsutil.test_file(os.path.join(config.hdfs_qos_case_view_hour % (datestamp, hourstamp), '000000_0.deflate')): logger.info("** case view hour: checking day = %s, hour = %s, and file does not exist." % (datestamp, hourstamp)) f = open(os.path.join(config.mrqos_hive_query, 'mrqos_case_view_hour.hive'), 'r') strcmd = f.read() strcmd_s = strcmd % (datestamp, hourstamp, datestamp, hourstamp) f.close() strcmd_g = "select * from mrqos.case_view_hour where datestamp=%s and hour=%s;" % (datestamp, hourstamp) query_result_file = os.path.join(config.mrqos_query_result,'case_view_hour.%s.%s.csv' % (datestamp, hourstamp)) count_retrial = 0 while count_retrial < region_summary_retrial_max: try: tic = time.time() beeline.bln_e(strcmd_s) logger.info("BLN case view hour success @ cost = %s sec." % str(time.time()-tic)) try: beeline.bln_e_output(strcmd_g, query_result_file) except sp.CalledProcessError as e: logger.warning("copy to local failed, retrying...") print e.message try: beeline.bln_e_output(strcmd_g, query_result_file) except sp.CalledProcessError as e: logger.error("copy to local failed again, abort.") logger.exception("message") break except sp.CalledProcessError as e: # delete the folder if summarization failed. logger.info("BLN case view hour failed @ cost = %s sec in retrial #%s" % (str(time.time()-tic), str(count_retrial))) logger.exception("message") hdfsutil.rm(config.hdfs_qos_case_view_hour % (datestamp, hourstamp), r=True) count_retrial += 1 else: logger.info("** case view hour: checking day = %s, hour = %s, and file exists." % (datestamp, hourstamp)) # ############################## # # The REGION VIEW hive procedure # # ############################## # # check if the summary has been performed on this particular hour (last hour) if hdfsutil.test_file(os.path.join(config.hdfs_qos_rg_view_hour % (datestamp, hourstamp), '000000_0.deflate')): logger.info("** region view hour: checking day = %s, hour = %s, and file does not exist." % (datestamp, hourstamp)) f = open(os.path.join(config.mrqos_hive_query, 'mrqos_region_view_hour.hive'), 'r') strcmd = f.read() strcmd_s = strcmd % (datestamp, hourstamp, datestamp, hourstamp, datestamp, hourstamp) f.close() strcmd_g = "select * from mrqos.region_view_hour where datestamp=%s and hour=%s;" % (datestamp, hourstamp) query_result_file = os.path.join(config.mrqos_query_result,'region_view_hour.%s.%s.csv' % (datestamp, hourstamp)) count_retrial = 0 while count_retrial < region_summary_retrial_max: try: tic = time.time() beeline.bln_e(strcmd_s) logger.info("BLN region view hour success @ cost = %s sec." % str(time.time()-tic)) try: beeline.bln_e_output(strcmd_g, query_result_file) except sp.CalledProcessError as e: logger.warning("copy to local failed, retrying...") print e.message try: beeline.bln_e_output(strcmd_g, query_result_file) except sp.CalledProcessError as e: logger.error("copy to local failed again, abort.") logger.exception("message") break except sp.CalledProcessError as e: # delete the folder if summarization failed. logger.info("BLN region view hour failed @ cost = %s sec in retrial #%s" % (str(time.time()-tic), str(count_retrial))) logger.exception("message") hdfsutil.rm(config.hdfs_qos_rg_view_hour % (datestamp, hourstamp), r=True) count_retrial += 1 else: logger.info("** region view hour: checking day = %s, hour = %s, and file exists." % (datestamp, hourstamp))
def main(): # logging set-up logging.basicConfig(filename=os.path.join(config.mrqos_logging, 'io_ratio_join.log'), level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', datefmt='%m/%d/%Y %H:%M:%S') logger = logging.getLogger(__name__) # ############################## # start the script # parameter setting ts = int(time.time()) logger.info('########### ts=%s ###########' % str(ts)) #datestamp = time.strftime('%Y%m%d', time.gmtime(float(ts))) #hourstamp = time.strftime('%H', time.gmtime(float(ts))) # IO-Ratio Join: last_mrqos_region_partition = beeline.get_last_partitions('mrqos.mrqos_region') [datestamp, hourstamp, ts_region] = [x.split('=')[1] for x in last_mrqos_region_partition.split('/')] logger.info('MRQOS mrqos_region partition: datestamp=%s, hour=%s, ts_region=%s' % (datestamp, hourstamp, ts_region)) mapruleinfo_partitions = [x for x in sorted(beeline.show_partitions('mrqos.maprule_info').split('\n'),reverse=True) if '=' in x] mapruleinfo_partitions = [x for x in mapruleinfo_partitions if x < 'ts=%s' % ts_region] ts_mapruleinfo = mapruleinfo_partitions[0].split('=')[1] logger.info('MRQOS maprule_info partition: ts_mapruleinfo=%s' % ts_mapruleinfo) region_summary_retrial_max = 10 # ############################### # # The In-Out Ratio hive procedure # # ############################### # # check if the summary has been performed on this particular hour (last hour) # print " **** checking day = %s, hour = %s." % (datestamp, hourstamp), if hdfsutil.test_file(os.path.join(config.hdfs_table, 'mrqos_ioratio', 'datestamp=%s' % datestamp, 'hour=%s' % hourstamp, 'ts=%s' % ts_region, '000000_0.deflate')): logger.info(' Joined file not exist.') f = open(os.path.join(config.mrqos_hive_query, 'mrqos_ioratio.hive'), 'r') strcmd = f.read() strcmd_s = strcmd % (datestamp, hourstamp, ts_region, datestamp, hourstamp, ts_region, ts_mapruleinfo) print strcmd_s f.close() # strcmd_g = "SELECT maprule, geoname, netname, region, avg_region_score, score_target, hourly_region_nsd_demand, hourly_region_eu_demand, hourly_region_ra_load, case_ra_load, case_nsd_demand, case_eu_demand, case_uniq_region, name, ecor, continent, country, city, latitude, longitude, provider, region_capacity, ecor_capacity, prp, numghosts, datestamp, hour FROM mrqos.mrqos_region_hour WHERE datestamp=%s and hour=%s;" % (datestamp, hourstamp) # query_result_file = os.path.join(config.mrqos_query_result,'region_summary_hour.%s.%s.csv' % (datestamp, hourstamp)) print " BLN for hourly summary: day = %s, hour = %s. " %(datestamp, hourstamp) count_retrial = 0 while count_retrial < region_summary_retrial_max: tic = time.time() try: beeline.bln_e(strcmd_s) logger.info(' ****** success with time cost = %s.' % str(time.time()-tic)) break except sp.CalledProcessError as e: # delete the folder if summarization failed. logger.error(' ****** failed with time cost = %s upto # retrials=%s' % (str(time.time()-tic), str(count_retrial))) logger.error('error %s' % e.message) hdfsutil.rm(os.path.join(config.hdfs_table, 'mrqos_ioratio', 'datestamp=%s' % datestamp, 'hour=%s' % hourstamp, 'ts=%s' % ts_region), r=True) count_retrial += 1 else: logger.info(' Joined file exists.')
def main(): """ get the date and hour for the previous hour. Will check from the beginning of the day, insert when missing. """ ts = calendar.timegm(time.gmtime()) print "###################" print "# Performing the hourly mrqos_region summary" print "# starting processing time is " + str(ts) + " = " + time.strftime( 'GMT %Y-%m-%d %H:%M:%S', time.gmtime(ts)) print "###################" ts_last_hour = ts - 3600 datestamp = time.strftime('%Y%m%d', time.gmtime(float(ts_last_hour))) hourstamp = time.strftime('%H', time.gmtime(float(ts_last_hour))) #hour_list = [str("%02d" % x) for x in range(24)] region_summary_retrial_max = 10 # ############################### # # The SUMMARY HOUR hive procedure # # ############################### # print " **** summary hour tour:" # check if the summary has been performed on this particular hour (last hour) print " **** checking day = %s, hour = %s." % (datestamp, hourstamp), if hdfsutil.test_file( os.path.join(config.hdfs_qos_rg_hour % (datestamp, hourstamp), '000000_0.deflate')): print " file not exits,", f = open( os.path.join(config.mrqos_hive_query, 'mrqos_region_summarize_hour.hive'), 'r') strcmd = f.read() strcmd_s = strcmd % (datestamp, hourstamp, datestamp, hourstamp, datestamp, hourstamp) f.close() strcmd_g = "SELECT maprule, geoname, netname, region, avg_region_score, score_target, hourly_region_nsd_demand, hourly_region_eu_demand, hourly_region_ra_load, case_ra_load, case_nsd_demand, case_eu_demand, case_uniq_region, name, ecor, continent, country, city, latitude, longitude, provider, region_capacity, ecor_capacity, prp, numghosts, datestamp, hour FROM mrqos.mrqos_region_hour WHERE datestamp=%s and hour=%s;" % ( datestamp, hourstamp) query_result_file = os.path.join( config.mrqos_query_result, 'region_summary_hour.%s.%s.csv' % (datestamp, hourstamp)) print " BLN for hourly summary: day = %s, hour = %s. " % (datestamp, hourstamp) count_retrial = 0 while count_retrial < region_summary_retrial_max: tic = time.time() try: beeline.bln_e(strcmd_s) print " ****** success with time cost = %s." % str( time.time() - tic) #try: # beeline.bln_e_output(strcmd_g, query_result_file) #except: # print " **** copy to local failed, retry!" # beeline.bln_e_output(strcmd_g, query_result_file) break except sp.CalledProcessError as e: # delete the folder if summarization failed. print " ****** failed with time cost = %s upto # retrials=%s" % ( str(time.time() - tic), str(count_retrial)) print e.message hdfsutil.rm(config.hdfs_qos_rg_hour % (datestamp, hourstamp), r=True) count_retrial += 1 else: print " file exists." # ############################ # # The CASE VIEW hive procedure # # ############################ # print " **** case view tour:" # check if the summary has been performed on this particular hour (last hour) print " **** checking day = %s, hour = %s." % (datestamp, hourstamp), if hdfsutil.test_file( os.path.join( config.hdfs_qos_case_view_hour % (datestamp, hourstamp), '000000_0.deflate')): print " file not exits,", f = open( os.path.join(config.mrqos_hive_query, 'mrqos_case_view_hour.hive'), 'r') strcmd = f.read() strcmd_s = strcmd % (datestamp, hourstamp, datestamp, hourstamp) f.close() strcmd_g = "select * from mrqos.case_view_hour where datestamp=%s and hour=%s;" % ( datestamp, hourstamp) query_result_file = os.path.join( config.mrqos_query_result, 'case_view_hour.%s.%s.csv' % (datestamp, hourstamp)) print " BLN for hourly summary for day = %s, hour = %s." % (datestamp, hourstamp) count_retrial = 0 while count_retrial < region_summary_retrial_max: try: tic = time.time() beeline.bln_e(strcmd_s) print " ****** success with time cost = %s." % str( time.time() - tic) try: beeline.bln_e_output(strcmd_g, query_result_file) except sp.CalledProcessError as e: print " **** copy to local failed, retry!" print e.message beeline.bln_e_output(strcmd_g, query_result_file) break except sp.CalledProcessError as e: # delete the folder if summarization failed. print " ****** failed with time cost = %s upto #retrials=%s" % ( str(time.time() - tic), str(count_retrial)) print e.message hdfsutil.rm(config.hdfs_qos_case_view_hour % (datestamp, hourstamp), r=True) count_retrial += 1 else: print " file exists." # ############################## # # The REGION VIEW hive procedure # # ############################## # print " **** region view tour:" # check if the summary has been performed on this particular hour (last hour) print " **** checking day = %s, hour = %s." % (datestamp, hourstamp), if hdfsutil.test_file( os.path.join(config.hdfs_qos_rg_view_hour % (datestamp, hourstamp), '000000_0.deflate')): print " file not exits,", f = open( os.path.join(config.mrqos_hive_query, 'mrqos_region_view_hour.hive'), 'r') strcmd = f.read() strcmd_s = strcmd % (datestamp, hourstamp, datestamp, hourstamp, datestamp, hourstamp) f.close() strcmd_g = "select * from mrqos.region_view_hour where datestamp=%s and hour=%s;" % ( datestamp, hourstamp) query_result_file = os.path.join( config.mrqos_query_result, 'region_view_hour.%s.%s.csv' % (datestamp, hourstamp)) print " BLN for hourly summary for day = %s, hour = %s." % (datestamp, hourstamp) count_retrial = 0 while count_retrial < region_summary_retrial_max: try: tic = time.time() beeline.bln_e(strcmd_s) print " ****** success with time cost = %s." % str( time.time() - tic) try: beeline.bln_e_output(strcmd_g, query_result_file) except sp.CalledProcessError as e: print " **** copy to local failed, retry!" print e.message beeline.bln_e_output(strcmd_g, query_result_file) break except sp.CalledProcessError as e: # delete the folder if summarization failed. print " ****** failed with time cost = %s upto #retrials=%s" % ( str(time.time() - tic), str(count_retrial)) print e.message hdfsutil.rm(config.hdfs_qos_rg_view_hour % (datestamp, hourstamp), r=True) count_retrial += 1 else: print " file exists."
def main(argv): """ get the date and hour for the specified day and hour. Clean(drop) and rebuild the table partition. """ try: opts, args = getopt.getopt(argv,"qd:h:",["datestamp=","hour="]) except getopt.GetoptError: print 'region_summary_hour_repair.py -d <datestamp> -h <hour>' sys.exit(2) hour ='' datestamp = '' for opt, arg in opts: if opt == '-q': print 'region_summary_hour_repair.py -d <datestamp> -h <hour>' sys.exit() elif opt in ("-d", "--datestamp"): datestamp = arg elif opt in ("-h", "--hour"): hour = arg ts = calendar.timegm(time.gmtime()) print "###################" print "# Performing the repair of the mrqos_region summary" print "# starting processing time is " + str(ts) + " = " + time.strftime('GMT %Y-%m-%d %H:%M:%S', time.gmtime(ts)) print "###################" if (not datestamp and not hour): print 'region_summary_hour_repair.py -d <datestamp> -h <hour>' sys.exit(2) print 'Fixing datestamp = %s' % datestamp if not hour: hour_list = [str("%02d" % x) for x in range(24)] print 'Fixing hour = %s' % hour_list else: print 'Fixing hour = %s' % hour #ts_last_hour = ts-3600 #datestamp = time.strftime('%Y%m%d', time.gmtime(float(ts_last_hour))) #hourstamp = time.strftime('%H', time.gmtime(float(ts_last_hour))) #hour_list = [str("%02d" % x) for x in range(24)] region_summary_retrial_max = 10 print " #**** first perform table cleanups: " if not hour: for hourstamp in hour_list: cleanup_mrqos_region_related_tables(datestamp, hourstamp) else: hourstamp = hour cleanup_mrqos_region_related_tables(datestamp, hourstamp) print " #**** rebuild the db / table: " if not hour: for hourstamp in hour_list: # ############################### # # The SUMMARY HOUR hive procedure # # ############################### # print " **** summary hour tour:" # check if the summary has been performed on this particular hour (last hour) print " **** checking day = %s, hour = %s." % (datestamp, hourstamp), if hdfsutil.test_file(os.path.join(config.hdfs_qos_rg_hour % (datestamp, hourstamp), '000000_0.deflate')): print " file not exits,", f = open(os.path.join(config.mrqos_hive_query, 'mrqos_region_summarize_hour.hive'), 'r') strcmd = f.read() strcmd_s = strcmd % (datestamp, hourstamp, datestamp, hourstamp, datestamp, hourstamp) f.close() strcmd_g = "SELECT maprule, geoname, netname, region, avg_region_score, score_target, hourly_region_nsd_demand, hourly_region_eu_demand, hourly_region_ra_load, case_ra_load, case_nsd_demand, case_eu_demand, case_uniq_region, name, ecor, continent, country, city, latitude, longitude, provider, region_capacity, ecor_capacity, prp, numghosts, datestamp, hour FROM mrqos.mrqos_region_hour WHERE datestamp=%s and hour=%s;" % (datestamp, hourstamp) query_result_file = os.path.join(config.mrqos_query_result,'region_summary_hour.%s.%s.csv' % (datestamp, hourstamp)) print " BLN for hourly summary: day = %s, hour = %s. " %(datestamp, hourstamp) count_retrial = 0 while count_retrial < region_summary_retrial_max: tic = time.time() try: beeline.bln_e(strcmd_s) print " ****** success with time cost = %s." % str(time.time()-tic) #try: # beeline.bln_e_output(strcmd_g, query_result_file) #except: # print " **** copy to local failed, retry!" # beeline.bln_e_output(strcmd_g, query_result_file) break except: # delete the folder if summarization failed. print " ****** failed with time cost = %s upto # retrials=%s" % (str(time.time()-tic), str(count_retrial)) hdfsutil.rm(config.hdfs_qos_rg_hour % (datestamp, hourstamp), r=True) count_retrial += 1 else: print " file exists." # ############################ # # The CASE VIEW hive procedure # # ############################ # print " **** case view tour:" # check if the summary has been performed on this particular hour (last hour) print " **** checking day = %s, hour = %s." % (datestamp, hourstamp), if hdfsutil.test_file(os.path.join(config.hdfs_qos_case_view_hour % (datestamp, hourstamp), '000000_0.deflate')): print " file not exits,", f = open(os.path.join(config.mrqos_hive_query, 'mrqos_case_view_hour.hive'), 'r') strcmd = f.read() strcmd_s = strcmd % (datestamp, hourstamp, datestamp, hourstamp) f.close() strcmd_g = "select * from mrqos.case_view_hour where datestamp=%s and hour=%s;" % (datestamp, hourstamp) query_result_file = os.path.join(config.mrqos_query_result,'case_view_hour.%s.%s.csv' % (datestamp, hourstamp)) print " BLN for hourly summary for day = %s, hour = %s." % (datestamp, hourstamp) count_retrial = 0 while count_retrial < region_summary_retrial_max: try: tic = time.time() beeline.bln_e(strcmd_s) print " ****** success with time cost = %s." % str(time.time()-tic) # repair don't care about moving the result to SQLite DB #try: # beeline.bln_e_output(strcmd_g, query_result_file) #except: # print " **** copy to local failed, retry!" # beeline.bln_e_output(strcmd_g, query_result_file) break except: # delete the folder if summarization failed. print " ****** failed with time cost = %s upto #retrials=%s" % (str(time.time()-tic), str(count_retrial)) hdfsutil.rm(config.hdfs_qos_case_view_hour % (datestamp, hourstamp), r=True) count_retrial += 1 else: print " file exists." # ############################## # # The REGION VIEW hive procedure # # ############################## # print " **** region view tour:" # check if the summary has been performed on this particular hour (last hour) print " **** checking day = %s, hour = %s." % (datestamp, hourstamp), if hdfsutil.test_file(os.path.join(config.hdfs_qos_rg_view_hour % (datestamp, hourstamp), '000000_0.deflate')): print " file not exits,", f = open(os.path.join(config.mrqos_hive_query, 'mrqos_region_view_hour.hive'), 'r') strcmd = f.read() strcmd_s = strcmd % (datestamp, hourstamp, datestamp, hourstamp, datestamp, hourstamp) f.close() strcmd_g = "select * from mrqos.region_view_hour where datestamp=%s and hour=%s;" % (datestamp, hourstamp) query_result_file = os.path.join(config.mrqos_query_result,'region_view_hour.%s.%s.csv' % (datestamp, hourstamp)) print " BLN for hourly summary for day = %s, hour = %s." % (datestamp, hourstamp) count_retrial = 0 while count_retrial < region_summary_retrial_max: try: tic = time.time() beeline.bln_e(strcmd_s) print " ****** success with time cost = %s." % str(time.time()-tic) # repair don't care about moving the result to SQLite DB #try: # beeline.bln_e_output(strcmd_g, query_result_file) #except: # print " **** copy to local failed, retry!" # beeline.bln_e_output(strcmd_g, query_result_file) break except: # delete the folder if summarization failed. print " ****** failed with time cost = %s upto #retrials=%s" % (str(time.time()-tic), str(count_retrial)) hdfsutil.rm(config.hdfs_qos_rg_view_hour % (datestamp, hourstamp), r=True) count_retrial += 1 else: print " file exists." else: # ############################### # # The SUMMARY HOUR hive procedure # # ############################### # print " **** summary hour tour:" # check if the summary has been performed on this particular hour (last hour) print " **** checking day = %s, hour = %s." % (datestamp, hourstamp), if hdfsutil.test_file(os.path.join(config.hdfs_qos_rg_hour % (datestamp, hourstamp), '000000_0.deflate')): print " file not exits,", f = open(os.path.join(config.mrqos_hive_query, 'mrqos_region_summarize_hour.hive'), 'r') strcmd = f.read() strcmd_s = strcmd % (datestamp, hourstamp, datestamp, hourstamp, datestamp, hourstamp) f.close() strcmd_g = "SELECT maprule, geoname, netname, region, avg_region_score, score_target, hourly_region_nsd_demand, hourly_region_eu_demand, hourly_region_ra_load, case_ra_load, case_nsd_demand, case_eu_demand, case_uniq_region, name, ecor, continent, country, city, latitude, longitude, provider, region_capacity, ecor_capacity, prp, numghosts, datestamp, hour FROM mrqos.mrqos_region_hour WHERE datestamp=%s and hour=%s;" % (datestamp, hourstamp) query_result_file = os.path.join(config.mrqos_query_result,'region_summary_hour.%s.%s.csv' % (datestamp, hourstamp)) print " BLN for hourly summary: day = %s, hour = %s. " %(datestamp, hourstamp) count_retrial = 0 while count_retrial < region_summary_retrial_max: tic = time.time() try: beeline.bln_e(strcmd_s) print " ****** success with time cost = %s." % str(time.time()-tic) #try: # beeline.bln_e_output(strcmd_g, query_result_file) #except: # print " **** copy to local failed, retry!" # beeline.bln_e_output(strcmd_g, query_result_file) break except: # delete the folder if summarization failed. print " ****** failed with time cost = %s upto # retrials=%s" % (str(time.time()-tic), str(count_retrial)) hdfsutil.rm(config.hdfs_qos_rg_hour % (datestamp, hourstamp), r=True) count_retrial += 1 else: print " file exists." # ############################ # # The CASE VIEW hive procedure # # ############################ # print " **** case view tour:" # check if the summary has been performed on this particular hour (last hour) print " **** checking day = %s, hour = %s." % (datestamp, hourstamp), if hdfsutil.test_file(os.path.join(config.hdfs_qos_case_view_hour % (datestamp, hourstamp), '000000_0.deflate')): print " file not exits,", f = open(os.path.join(config.mrqos_hive_query, 'mrqos_case_view_hour.hive'), 'r') strcmd = f.read() strcmd_s = strcmd % (datestamp, hourstamp, datestamp, hourstamp) f.close() strcmd_g = "select * from mrqos.case_view_hour where datestamp=%s and hour=%s;" % (datestamp, hourstamp) query_result_file = os.path.join(config.mrqos_query_result,'case_view_hour.%s.%s.csv' % (datestamp, hourstamp)) print " BLN for hourly summary for day = %s, hour = %s." % (datestamp, hourstamp) count_retrial = 0 while count_retrial < region_summary_retrial_max: try: tic = time.time() beeline.bln_e(strcmd_s) print " ****** success with time cost = %s." % str(time.time()-tic) # repair don't care about moving the result to SQLite DB #try: # beeline.bln_e_output(strcmd_g, query_result_file) #except: # print " **** copy to local failed, retry!" # beeline.bln_e_output(strcmd_g, query_result_file) break except: # delete the folder if summarization failed. print " ****** failed with time cost = %s upto #retrials=%s" % (str(time.time()-tic), str(count_retrial)) hdfsutil.rm(config.hdfs_qos_case_view_hour % (datestamp, hourstamp), r=True) count_retrial += 1 else: print " file exists." # ############################## # # The REGION VIEW hive procedure # # ############################## # print " **** region view tour:" # check if the summary has been performed on this particular hour (last hour) print " **** checking day = %s, hour = %s." % (datestamp, hourstamp), if hdfsutil.test_file(os.path.join(config.hdfs_qos_rg_view_hour % (datestamp, hourstamp), '000000_0.deflate')): print " file not exits,", f = open(os.path.join(config.mrqos_hive_query, 'mrqos_region_view_hour.hive'), 'r') strcmd = f.read() strcmd_s = strcmd % (datestamp, hourstamp, datestamp, hourstamp, datestamp, hourstamp) f.close() strcmd_g = "select * from mrqos.region_view_hour where datestamp=%s and hour=%s;" % (datestamp, hourstamp) query_result_file = os.path.join(config.mrqos_query_result,'region_view_hour.%s.%s.csv' % (datestamp, hourstamp)) print " BLN for hourly summary for day = %s, hour = %s." % (datestamp, hourstamp) count_retrial = 0 while count_retrial < region_summary_retrial_max: try: tic = time.time() beeline.bln_e(strcmd_s) print " ****** success with time cost = %s." % str(time.time()-tic) # repair don't care about moving the result to SQLite DB #try: # beeline.bln_e_output(strcmd_g, query_result_file) #except: # print " **** copy to local failed, retry!" # beeline.bln_e_output(strcmd_g, query_result_file) break except: # delete the folder if summarization failed. print " ****** failed with time cost = %s upto #retrials=%s" % (str(time.time()-tic), str(count_retrial)) hdfsutil.rm(config.hdfs_qos_rg_view_hour % (datestamp, hourstamp), r=True) count_retrial += 1 else: print " file exists."