コード例 #1
0
def main():
    """ get the date and hour for the previous hour. Will check from the beginning of the day, insert when missing. """
    ts = calendar.timegm(time.gmtime())
    print "###################"
    print "# Performing the hourly mrqos_region summary"
    print "# starting processing time is " + str(ts)
    print "###################"
    ts_last_hour = ts-3600
    datestamp = time.strftime('%Y%m%d', time.gmtime(float(ts_last_hour)))
    hourstamp = time.strftime('%H', time.gmtime(float(ts_last_hour)))
    hour_list = [str("%02d" % x) for x in range(24)]
    region_summary_retrial_max = 10

    # check if the summary has been performed on this particular hour (last hour)
    print "    ****  checking day = %s, hour = %s." % (datestamp, hourstamp),
    if hdfsutil.test_file(os.path.join(config.hdfs_qos_rg_view_hour % (datestamp, hourstamp), '000000_0.deflate')):
        f = open(os.path.join(config.mrqos_hive_query, 'mrqos_region_view_hour.hive'), 'r')
        strcmd = f.read()
        strcmd_s = strcmd % (datestamp, hourstamp, datestamp, hourstamp, datestamp, hourstamp)
        f.close()
        strcmd_g = "select * from mrqos.region_view_hour where datestamp=%s and hour=%s;" % (datestamp, hourstamp)
        query_result_file = os.path.join(config.mrqos_query_result,'region_view_hour.%s.%s.csv' % (datestamp, hourstamp))
        print "    ****  perform beeline for hourly summary for day = %s, hour = %s." % (datestamp, hourstamp)
        count_retrial = 0
        while count_retrial < region_summary_retrial_max:
            try:
                beeline.bln_e(strcmd_s)
                try:
                    beeline.bln_e_output(strcmd_g, query_result_file)
                except:
                    print "    ****  copy to local failed!"
                break
            except:
                # delete the folder if summarization failed.
                print "    ****  summarization failed upto #retrials="+str(count_retrial)
                hdfsutil.rm(config.hdfs_qos_rg_view_hour % (datestamp, hourstamp), r=True)
                count_retrial += 1

    else:
        print " file exists."

    # check if the summary has been performed since the beginning of the day, last check on day X is X+1/0:30:00
    for hour in hour_list:
        if hour < hourstamp:
            print "    ****  checking day = %s, hour = %s." % (datestamp, hour),
            if hdfsutil.test_file(os.path.join(config.hdfs_qos_rg_view_hour % (datestamp, hour), '000000_0.deflate')):
                f = open(os.path.join(config.mrqos_hive_query, 'mrqos_region_view_hour.hive'), 'r')
                strcmd = f.read()
                strcmd_s = strcmd % (datestamp, hour, datestamp, hour, datestamp, hour)
                f.close()
                print "    ****  perform beeline for hourly summary for day = %s, hour = %s." %(datestamp, hour)
                try:
                    beeline.bln_e(strcmd_s)
                except:
                    # delete the folder if summarization failed.
                    print "    ****  summarization failed, removed hdfs folder."
                    hdfsutil.rm(config.hdfs_qos_rg_view_hour % (datestamp, hour), r=True)
            else:
                print " file exists."
コード例 #2
0
def main():
    """ get the date for the past day (yesterday). """
    timenow = int(time.time())
    datenow = str(datetime.date.today()-datetime.timedelta(1))
    datenow = datenow[0:4]+datenow[5:7]+datenow[8:10]

    print "###################"
    print "# Start processing the data back in " + datenow + " (yesterday)"
    print "# starting processing time is " + str(timenow)
    print "###################"

    ts = calendar.timegm(time.gmtime())
    ts_last_hour = ts-3600
    datestamp = time.strftime('%Y%m%d', time.gmtime(float(ts_last_hour)))
    hourstamp = time.strftime('%H', time.gmtime(float(ts_last_hour)))

    # check if the summary has been performed on this particular hour (last hour)
    print "    ****  checking day = %s." % (datestamp),
    if hdfsutil.test_file(os.path.join(config.hdfs_qos_rg_day % (datestamp), '000000_0.deflate')):
        f = open(os.path.join(config.mrqos_hive_query, 'mrqos_region_summarize_day.hive'), 'r')
        strcmd = f.read()
        strcmd_s = strcmd % (datestamp, datestamp, datestamp)
        f.close()
        print "    ****  perform beeline for hourly summary for day = %s, hour = %s." %(datestamp, hourstamp)
        try:
            beeline.bln_e(strcmd_s)
        except:
            # delete the folder if summarization failed.
            print "    ****  summarization failed, removed hdfs folder."
            hdfsutil.rm(config.hdfs_qos_rg_day % (datestamp), r=True)
    else:
        print " file exists."
コード例 #3
0
def main():
    # initialze the logger
    logging.basicConfig(
        filename=os.path.join('/home/testgrp/logs/', 'mapmon_summarize.log'),
        level=logging.INFO,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        datefmt='%m/%d/%Y %H:%M:%S')
    logger = logging.getLogger(__name__)

    timenow = int(time.time())
    datenow = str(datetime.date.today() - datetime.timedelta(1))
    date_idx = datenow[0:4] + datenow[5:7] + datenow[8:10]

    # get the latest barebone day_idx
    bb_day_idx = beeline.get_last_partitions('mapper.barebones').split('=')[1]
    logger.info("barebone index: day={}".format(bb_day_idx))

    # get the latest mpd yesterday
    uuid_list = [
        x.split('=')[-1] for x in hdfsutil.ls(
            os.path.join(os.path.dirname(config.hdfs_table), 'mapper',
                         'mapmon', 'day={}'.format(date_idx)))
    ]
    for uuid_idx in uuid_list:
        logger.info("dealing with day={}, uuid={}".format(date_idx, uuid_idx))
        file_location = os.path.join(config.hdfs_table, 'mapmon_sum',
                                     'day={}'.format(date_idx),
                                     'mpd_uuid={}'.format(uuid_idx))
        if hdfsutil.test_dic(file_location):
            logger.info('creating folder: {}'.format(file_location))
            hdfsutil.mkdir(file_location)

        if hdfsutil.test_file(os.path.join(file_location, '000000_0.deflate')):
            f = open(
                os.path.join(config.mrqos_hive_query, 'mapmon_summarize.hive'),
                'r')
            strcmd = f.read()
            strcmd_s = strcmd % (date_idx, uuid_idx, bb_day_idx, date_idx,
                                 uuid_idx, date_idx, uuid_idx)
            f.close()
            try:
                beeline.bln_e(strcmd_s)
            except:
                # delete the folder if summarization failed.
                logger.warn("summarization failed, removing hdfs folder.")
                hdfsutil.rm(file_location, r=True)
        else:
            logger.info(" file exists.")
コード例 #4
0
def main():
    # initialze the logger
    logging.basicConfig(filename=os.path.join('/home/testgrp/logs/', 'mapmon_summarize.log'),
                        level=logging.INFO,
                        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
                        datefmt='%m/%d/%Y %H:%M:%S')
    logger = logging.getLogger(__name__)

    timenow = int(time.time())
    datenow = str(datetime.date.today()-datetime.timedelta(1))
    date_idx = datenow[0:4]+datenow[5:7]+datenow[8:10]

    # get the latest barebone day_idx
    bb_day_idx = beeline.get_last_partitions('mapper.barebones').split('=')[1]
    logger.info("barebone index: day={}".format(bb_day_idx))

    # get the latest mpd yesterday
    uuid_list = [x.split('=')[-1] for x in hdfsutil.ls(os.path.join(os.path.dirname(config.hdfs_table),'mapper','mapmon','day={}'.format(date_idx)))]
    for uuid_idx in uuid_list:
        logger.info("dealing with day={}, uuid={}".format(date_idx, uuid_idx))
        file_location = os.path.join(config.hdfs_table,
                                           'mapmon_sum',
                                           'day={}'.format(date_idx),
                                           'mpd_uuid={}'.format(uuid_idx))
        if hdfsutil.test_dic(file_location):
            logger.info('creating folder: {}'.format(file_location))
            hdfsutil.mkdir(file_location)


        if hdfsutil.test_file(os.path.join(file_location, '000000_0.deflate')):
            f = open(os.path.join(config.mrqos_hive_query, 'mapmon_summarize.hive'), 'r')
            strcmd = f.read()
            strcmd_s = strcmd % (date_idx, uuid_idx, bb_day_idx,
                                 date_idx, uuid_idx,
                                 date_idx, uuid_idx)
            f.close()
            try:
                beeline.bln_e(strcmd_s)
            except:
                # delete the folder if summarization failed.
                logger.warn("summarization failed, removing hdfs folder.")
                hdfsutil.rm(file_location, r=True)
        else:
            logger.info(" file exists.")
コード例 #5
0
def main():
    """ get the date and hour for the previous hour. Will check from the beginning of the day, insert when missing. """
    ts = calendar.timegm(time.gmtime())
    logging.basicConfig(filename=os.path.join(config.mrqos_logging, 'cron_region_summary_hour.log'),
                        level=logging.INFO,
                        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
                        datefmt='%m/%d/%Y %H:%M:%S')
    logger = logging.getLogger(__name__)

    # start the logging
    logger.info("###################")
    logger.info("# Performing the hourly mrqos_region summary")
    logger.info("# starting time: " + str(ts) + " = " + time.strftime('GMT %Y-%m-%d %H:%M:%S', time.gmtime(ts)))
    logger.info("###################")

    # parameter: backfilter length
    bf_length = config.region_summary_back_filling
    ts_last_couple_hour_list = [ts-(1+x)*3600 for x in range(bf_length)]

    for ts_last_hour in ts_last_couple_hour_list:
        datestamp = time.strftime('%Y%m%d', time.gmtime(float(ts_last_hour)))
        hourstamp = time.strftime('%H', time.gmtime(float(ts_last_hour)))
        region_summary_retrial_max = 10

        # ############################### #
        # The SUMMARY HOUR hive procedure #
        # ############################### #
        #logger.info("    ****  summary hour tour: checking day = %s, hour = %s." % (datestamp, hourstamp))
        # check if the summary has been performed on this particular hour (last hour)
        if hdfsutil.test_file(os.path.join(config.hdfs_qos_rg_hour % (datestamp, hourstamp), '000000_0.deflate')):
            logger.info("** region summary hour: checking day = %s, hour = %s, and file does not exist." % (datestamp,
                                                                                                            hourstamp))
            f = open(os.path.join(config.mrqos_hive_query, 'mrqos_region_summarize_hour.hive'), 'r')
            strcmd = f.read()
            strcmd_s = strcmd % (datestamp, hourstamp, datestamp, hourstamp, datestamp, hourstamp)
            f.close()

            count_retrial = 0
            while count_retrial < region_summary_retrial_max:
                tic = time.time()
                try:
                    beeline.bln_e(strcmd_s)
                    logger.info("BLN region summary hour success @ cost = %s sec." % str(time.time()-tic))
                    break
                except sp.CalledProcessError as e:
                    # delete the folder if summarization failed.
                    logger.info("BLN region summary hour failed @ cost = %s sec in retrial #%s" % (str(time.time()-tic),
                                                                                                   str(count_retrial)))
                    logger.exception("message")
                    hdfsutil.rm(config.hdfs_qos_rg_hour % (datestamp, hourstamp), r=True)
                    count_retrial += 1
        else:
            logger.info("** region summary hour: checking day = %s, hour = %s, and file exists." % (datestamp,
                                                                                                    hourstamp))


        # ############################ #
        # The CASE VIEW hive procedure #
        # ############################ #
        #print "    ****  case view tour:"
        # check if the summary has been performed on this particular hour (last hour)
        if hdfsutil.test_file(os.path.join(config.hdfs_qos_case_view_hour % (datestamp, hourstamp), '000000_0.deflate')):
            logger.info("** case view hour: checking day = %s, hour = %s, and file does not exist." % (datestamp,
                                                                                                       hourstamp))
            f = open(os.path.join(config.mrqos_hive_query, 'mrqos_case_view_hour.hive'), 'r')
            strcmd = f.read()
            strcmd_s = strcmd % (datestamp, hourstamp, datestamp, hourstamp)
            f.close()
            strcmd_g = "select * from mrqos.case_view_hour where datestamp=%s and hour=%s;" % (datestamp, hourstamp)
            query_result_file = os.path.join(config.mrqos_query_result,'case_view_hour.%s.%s.csv' % (datestamp, hourstamp))

            count_retrial = 0
            while count_retrial < region_summary_retrial_max:
                try:
                    tic = time.time()
                    beeline.bln_e(strcmd_s)
                    logger.info("BLN case view hour success @ cost = %s sec." % str(time.time()-tic))
                    try:
                        beeline.bln_e_output(strcmd_g, query_result_file)
                    except sp.CalledProcessError as e:
                        logger.warning("copy to local failed, retrying...")
                        print e.message
                        try:
                            beeline.bln_e_output(strcmd_g, query_result_file)
                        except sp.CalledProcessError as e:
                            logger.error("copy to local failed again, abort.")
                            logger.exception("message")
                    break
                except sp.CalledProcessError as e:
                    # delete the folder if summarization failed.
                    logger.info("BLN case view hour failed @ cost = %s sec in retrial #%s" % (str(time.time()-tic),
                                                                                              str(count_retrial)))
                    logger.exception("message")
                    hdfsutil.rm(config.hdfs_qos_case_view_hour % (datestamp, hourstamp), r=True)
                    count_retrial += 1

        else:
            logger.info("** case view hour: checking day = %s, hour = %s, and file exists." % (datestamp,
                                                                                               hourstamp))



        # ############################## #
        # The REGION VIEW hive procedure #
        # ############################## #
        # check if the summary has been performed on this particular hour (last hour)
        if hdfsutil.test_file(os.path.join(config.hdfs_qos_rg_view_hour % (datestamp, hourstamp), '000000_0.deflate')):
            logger.info("** region view hour: checking day = %s, hour = %s, and file does not exist." % (datestamp,
                                                                                                         hourstamp))
            f = open(os.path.join(config.mrqos_hive_query, 'mrqos_region_view_hour.hive'), 'r')
            strcmd = f.read()
            strcmd_s = strcmd % (datestamp, hourstamp, datestamp, hourstamp, datestamp, hourstamp)
            f.close()
            strcmd_g = "select * from mrqos.region_view_hour where datestamp=%s and hour=%s;" % (datestamp, hourstamp)
            query_result_file = os.path.join(config.mrqos_query_result,'region_view_hour.%s.%s.csv' % (datestamp, hourstamp))

            count_retrial = 0
            while count_retrial < region_summary_retrial_max:
                try:
                    tic = time.time()
                    beeline.bln_e(strcmd_s)
                    logger.info("BLN region view hour success @ cost = %s sec." % str(time.time()-tic))
                    try:
                        beeline.bln_e_output(strcmd_g, query_result_file)
                    except sp.CalledProcessError as e:
                        logger.warning("copy to local failed, retrying...")
                        print e.message
                        try:
                            beeline.bln_e_output(strcmd_g, query_result_file)
                        except sp.CalledProcessError as e:
                            logger.error("copy to local failed again, abort.")
                            logger.exception("message")
                    break
                except sp.CalledProcessError as e:
                    # delete the folder if summarization failed.
                    logger.info("BLN region view hour failed @ cost = %s sec in retrial #%s" % (str(time.time()-tic),
                                                                                                str(count_retrial)))
                    logger.exception("message")
                    hdfsutil.rm(config.hdfs_qos_rg_view_hour % (datestamp, hourstamp), r=True)
                    count_retrial += 1

        else:
            logger.info("** region view hour: checking day = %s, hour = %s, and file exists." % (datestamp,
                                                                                                 hourstamp))
コード例 #6
0
def main():
    # logging set-up
    logging.basicConfig(filename=os.path.join(config.mrqos_logging, 'io_ratio_join.log'),
                        level=logging.INFO,
                        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
                        datefmt='%m/%d/%Y %H:%M:%S')
    logger = logging.getLogger(__name__)

    # ##############################
    # start the script
    # parameter setting

    ts = int(time.time())
    logger.info('########### ts=%s ###########' % str(ts))
    #datestamp = time.strftime('%Y%m%d', time.gmtime(float(ts)))
    #hourstamp = time.strftime('%H', time.gmtime(float(ts)))

    # IO-Ratio Join:
    last_mrqos_region_partition = beeline.get_last_partitions('mrqos.mrqos_region')
    [datestamp, hourstamp, ts_region] = [x.split('=')[1] for x in last_mrqos_region_partition.split('/')]
    logger.info('MRQOS mrqos_region partition: datestamp=%s, hour=%s, ts_region=%s' % (datestamp,
                                                                                 hourstamp,
                                                                                 ts_region))

    mapruleinfo_partitions = [x for x in sorted(beeline.show_partitions('mrqos.maprule_info').split('\n'),reverse=True) if '=' in x]
    mapruleinfo_partitions = [x for x in mapruleinfo_partitions if x < 'ts=%s' % ts_region]
    ts_mapruleinfo = mapruleinfo_partitions[0].split('=')[1]
    logger.info('MRQOS maprule_info partition: ts_mapruleinfo=%s' % ts_mapruleinfo)

    region_summary_retrial_max = 10

    # ############################### #
    # The In-Out Ratio hive procedure #
    # ############################### #
    # check if the summary has been performed on this particular hour (last hour)
    # print "    ****  checking day = %s, hour = %s." % (datestamp, hourstamp),
    if hdfsutil.test_file(os.path.join(config.hdfs_table,
                                       'mrqos_ioratio',
                                       'datestamp=%s' % datestamp,
                                       'hour=%s' % hourstamp,
                                       'ts=%s' % ts_region,
                                       '000000_0.deflate')):
        logger.info(' Joined file not exist.')
        f = open(os.path.join(config.mrqos_hive_query, 'mrqos_ioratio.hive'), 'r')
        strcmd = f.read()
        strcmd_s = strcmd % (datestamp, hourstamp, ts_region,
                             datestamp, hourstamp, ts_region,
                             ts_mapruleinfo)
        print strcmd_s
        f.close()
        # strcmd_g = "SELECT maprule, geoname, netname, region, avg_region_score, score_target, hourly_region_nsd_demand, hourly_region_eu_demand, hourly_region_ra_load, case_ra_load, case_nsd_demand, case_eu_demand, case_uniq_region, name, ecor, continent, country, city, latitude, longitude, provider, region_capacity, ecor_capacity, prp, numghosts, datestamp, hour FROM mrqos.mrqos_region_hour WHERE datestamp=%s and hour=%s;" % (datestamp, hourstamp)
        # query_result_file = os.path.join(config.mrqos_query_result,'region_summary_hour.%s.%s.csv' % (datestamp, hourstamp))

        print " BLN for hourly summary: day = %s, hour = %s. " %(datestamp, hourstamp)
        count_retrial = 0
        while count_retrial < region_summary_retrial_max:
            tic = time.time()
            try:
                beeline.bln_e(strcmd_s)
                logger.info('    ******  success with time cost = %s.' % str(time.time()-tic))
                break
            except sp.CalledProcessError as e:
                # delete the folder if summarization failed.
                logger.error('    ******  failed with time cost = %s upto # retrials=%s' % (str(time.time()-tic), str(count_retrial)))
                logger.error('error %s' % e.message)
                hdfsutil.rm(os.path.join(config.hdfs_table,
                                         'mrqos_ioratio',
                                         'datestamp=%s' % datestamp,
                                         'hour=%s' % hourstamp,
                                         'ts=%s' % ts_region), r=True)
                count_retrial += 1
    else:
        logger.info(' Joined file exists.')
コード例 #7
0
def main():
    """ get the date and hour for the previous hour. Will check from the beginning of the day, insert when missing. """
    ts = calendar.timegm(time.gmtime())
    print "###################"
    print "# Performing the hourly mrqos_region summary"
    print "# starting processing time is " + str(ts) + " = " + time.strftime(
        'GMT %Y-%m-%d %H:%M:%S', time.gmtime(ts))
    print "###################"
    ts_last_hour = ts - 3600
    datestamp = time.strftime('%Y%m%d', time.gmtime(float(ts_last_hour)))
    hourstamp = time.strftime('%H', time.gmtime(float(ts_last_hour)))
    #hour_list = [str("%02d" % x) for x in range(24)]
    region_summary_retrial_max = 10

    # ############################### #
    # The SUMMARY HOUR hive procedure #
    # ############################### #
    print "    ****  summary hour tour:"
    # check if the summary has been performed on this particular hour (last hour)
    print "    ****  checking day = %s, hour = %s." % (datestamp, hourstamp),
    if hdfsutil.test_file(
            os.path.join(config.hdfs_qos_rg_hour % (datestamp, hourstamp),
                         '000000_0.deflate')):
        print " file not exits,",
        f = open(
            os.path.join(config.mrqos_hive_query,
                         'mrqos_region_summarize_hour.hive'), 'r')
        strcmd = f.read()
        strcmd_s = strcmd % (datestamp, hourstamp, datestamp, hourstamp,
                             datestamp, hourstamp)
        f.close()
        strcmd_g = "SELECT maprule, geoname, netname, region, avg_region_score, score_target, hourly_region_nsd_demand, hourly_region_eu_demand, hourly_region_ra_load, case_ra_load, case_nsd_demand, case_eu_demand, case_uniq_region, name, ecor, continent, country, city, latitude, longitude, provider, region_capacity, ecor_capacity, prp, numghosts, datestamp, hour FROM mrqos.mrqos_region_hour WHERE datestamp=%s and hour=%s;" % (
            datestamp, hourstamp)
        query_result_file = os.path.join(
            config.mrqos_query_result,
            'region_summary_hour.%s.%s.csv' % (datestamp, hourstamp))

        print " BLN for hourly summary: day = %s, hour = %s. " % (datestamp,
                                                                  hourstamp)
        count_retrial = 0
        while count_retrial < region_summary_retrial_max:
            tic = time.time()
            try:
                beeline.bln_e(strcmd_s)
                print "    ******  success with time cost = %s." % str(
                    time.time() - tic)
                #try:
                #    beeline.bln_e_output(strcmd_g, query_result_file)
                #except:
                #    print "    ****  copy to local failed, retry!"
                #    beeline.bln_e_output(strcmd_g, query_result_file)
                break
            except sp.CalledProcessError as e:
                # delete the folder if summarization failed.
                print "    ******  failed with time cost = %s upto # retrials=%s" % (
                    str(time.time() - tic), str(count_retrial))
                print e.message
                hdfsutil.rm(config.hdfs_qos_rg_hour % (datestamp, hourstamp),
                            r=True)
                count_retrial += 1
    else:
        print " file exists."

    # ############################ #
    # The CASE VIEW hive procedure #
    # ############################ #
    print "    ****  case view tour:"
    # check if the summary has been performed on this particular hour (last hour)
    print "    ****  checking day = %s, hour = %s." % (datestamp, hourstamp),
    if hdfsutil.test_file(
            os.path.join(
                config.hdfs_qos_case_view_hour % (datestamp, hourstamp),
                '000000_0.deflate')):
        print " file not exits,",
        f = open(
            os.path.join(config.mrqos_hive_query, 'mrqos_case_view_hour.hive'),
            'r')
        strcmd = f.read()
        strcmd_s = strcmd % (datestamp, hourstamp, datestamp, hourstamp)
        f.close()
        strcmd_g = "select * from mrqos.case_view_hour where datestamp=%s and hour=%s;" % (
            datestamp, hourstamp)
        query_result_file = os.path.join(
            config.mrqos_query_result,
            'case_view_hour.%s.%s.csv' % (datestamp, hourstamp))
        print " BLN for hourly summary for day = %s, hour = %s." % (datestamp,
                                                                    hourstamp)
        count_retrial = 0
        while count_retrial < region_summary_retrial_max:
            try:
                tic = time.time()
                beeline.bln_e(strcmd_s)
                print "    ******  success with time cost = %s." % str(
                    time.time() - tic)
                try:
                    beeline.bln_e_output(strcmd_g, query_result_file)
                except sp.CalledProcessError as e:
                    print "    ****  copy to local failed, retry!"
                    print e.message
                    beeline.bln_e_output(strcmd_g, query_result_file)
                break
            except sp.CalledProcessError as e:
                # delete the folder if summarization failed.
                print "    ******  failed with time cost = %s upto #retrials=%s" % (
                    str(time.time() - tic), str(count_retrial))
                print e.message
                hdfsutil.rm(config.hdfs_qos_case_view_hour %
                            (datestamp, hourstamp),
                            r=True)
                count_retrial += 1

    else:
        print " file exists."

    # ############################## #
    # The REGION VIEW hive procedure #
    # ############################## #
    print "    ****  region view tour:"
    # check if the summary has been performed on this particular hour (last hour)
    print "    ****  checking day = %s, hour = %s." % (datestamp, hourstamp),
    if hdfsutil.test_file(
            os.path.join(config.hdfs_qos_rg_view_hour % (datestamp, hourstamp),
                         '000000_0.deflate')):
        print " file not exits,",
        f = open(
            os.path.join(config.mrqos_hive_query,
                         'mrqos_region_view_hour.hive'), 'r')
        strcmd = f.read()
        strcmd_s = strcmd % (datestamp, hourstamp, datestamp, hourstamp,
                             datestamp, hourstamp)
        f.close()
        strcmd_g = "select * from mrqos.region_view_hour where datestamp=%s and hour=%s;" % (
            datestamp, hourstamp)
        query_result_file = os.path.join(
            config.mrqos_query_result,
            'region_view_hour.%s.%s.csv' % (datestamp, hourstamp))
        print " BLN for hourly summary for day = %s, hour = %s." % (datestamp,
                                                                    hourstamp)
        count_retrial = 0
        while count_retrial < region_summary_retrial_max:
            try:
                tic = time.time()
                beeline.bln_e(strcmd_s)
                print "    ******  success with time cost = %s." % str(
                    time.time() - tic)
                try:
                    beeline.bln_e_output(strcmd_g, query_result_file)
                except sp.CalledProcessError as e:
                    print "    ****  copy to local failed, retry!"
                    print e.message
                    beeline.bln_e_output(strcmd_g, query_result_file)
                break
            except sp.CalledProcessError as e:
                # delete the folder if summarization failed.
                print "    ******  failed with time cost = %s upto #retrials=%s" % (
                    str(time.time() - tic), str(count_retrial))
                print e.message
                hdfsutil.rm(config.hdfs_qos_rg_view_hour %
                            (datestamp, hourstamp),
                            r=True)
                count_retrial += 1

    else:
        print " file exists."
コード例 #8
0
def main(argv):
    """ get the date and hour for the specified day and hour. Clean(drop) and rebuild the table partition. """
    try:
        opts, args = getopt.getopt(argv,"qd:h:",["datestamp=","hour="])
    except getopt.GetoptError:
        print 'region_summary_hour_repair.py -d <datestamp> -h <hour>'
        sys.exit(2)

    hour =''
    datestamp = ''

    for opt, arg in opts:
        if opt == '-q':
            print 'region_summary_hour_repair.py -d <datestamp> -h <hour>'
            sys.exit()
        elif opt in ("-d", "--datestamp"):
            datestamp = arg
        elif opt in ("-h", "--hour"):
            hour = arg

    ts = calendar.timegm(time.gmtime())
    print "###################"
    print "# Performing the repair of the  mrqos_region summary"
    print "# starting processing time is " + str(ts) + " = " + time.strftime('GMT %Y-%m-%d %H:%M:%S', time.gmtime(ts))
    print "###################"

    if (not datestamp and not hour):
        print 'region_summary_hour_repair.py -d <datestamp> -h <hour>'
        sys.exit(2)

    print 'Fixing datestamp = %s' % datestamp

    if not hour:
        hour_list = [str("%02d" % x) for x in range(24)]
        print 'Fixing hour = %s' % hour_list
    else:
        print 'Fixing hour = %s' % hour

    #ts_last_hour = ts-3600
    #datestamp = time.strftime('%Y%m%d', time.gmtime(float(ts_last_hour)))
    #hourstamp = time.strftime('%H', time.gmtime(float(ts_last_hour)))
    #hour_list = [str("%02d" % x) for x in range(24)]
    region_summary_retrial_max = 10


    print "    #**** first perform table cleanups: "
    if not hour:
        for hourstamp in hour_list:
            cleanup_mrqos_region_related_tables(datestamp, hourstamp)
    else:
        hourstamp = hour
        cleanup_mrqos_region_related_tables(datestamp, hourstamp)

    print "    #**** rebuild the db / table: "
    if not hour:
        for hourstamp in hour_list:
            # ############################### #
            # The SUMMARY HOUR hive procedure #
            # ############################### #
            print "    ****  summary hour tour:"
            # check if the summary has been performed on this particular hour (last hour)
            print "    ****  checking day = %s, hour = %s." % (datestamp, hourstamp),
            if hdfsutil.test_file(os.path.join(config.hdfs_qos_rg_hour % (datestamp, hourstamp), '000000_0.deflate')):
                print " file not exits,",
                f = open(os.path.join(config.mrqos_hive_query, 'mrqos_region_summarize_hour.hive'), 'r')
                strcmd = f.read()
                strcmd_s = strcmd % (datestamp, hourstamp, datestamp, hourstamp, datestamp, hourstamp)
                f.close()
                strcmd_g = "SELECT maprule, geoname, netname, region, avg_region_score, score_target, hourly_region_nsd_demand, hourly_region_eu_demand, hourly_region_ra_load, case_ra_load, case_nsd_demand, case_eu_demand, case_uniq_region, name, ecor, continent, country, city, latitude, longitude, provider, region_capacity, ecor_capacity, prp, numghosts, datestamp, hour FROM mrqos.mrqos_region_hour WHERE datestamp=%s and hour=%s;" % (datestamp, hourstamp)
                query_result_file = os.path.join(config.mrqos_query_result,'region_summary_hour.%s.%s.csv' % (datestamp, hourstamp))

                print " BLN for hourly summary: day = %s, hour = %s. " %(datestamp, hourstamp)
                count_retrial = 0
                while count_retrial < region_summary_retrial_max:
                    tic = time.time()
                    try:
                        beeline.bln_e(strcmd_s)
                        print "    ******  success with time cost = %s." % str(time.time()-tic)
                        #try:
                        #    beeline.bln_e_output(strcmd_g, query_result_file)
                        #except:
                        #    print "    ****  copy to local failed, retry!"
                        #    beeline.bln_e_output(strcmd_g, query_result_file)
                        break
                    except:
                        # delete the folder if summarization failed.
                        print "    ******  failed with time cost = %s upto # retrials=%s" % (str(time.time()-tic), str(count_retrial))
                        hdfsutil.rm(config.hdfs_qos_rg_hour % (datestamp, hourstamp), r=True)
                        count_retrial += 1
            else:
                print " file exists."


            # ############################ #
            # The CASE VIEW hive procedure #
            # ############################ #
            print "    ****  case view tour:"
            # check if the summary has been performed on this particular hour (last hour)
            print "    ****  checking day = %s, hour = %s." % (datestamp, hourstamp),
            if hdfsutil.test_file(os.path.join(config.hdfs_qos_case_view_hour % (datestamp, hourstamp), '000000_0.deflate')):
                print " file not exits,",
                f = open(os.path.join(config.mrqos_hive_query, 'mrqos_case_view_hour.hive'), 'r')
                strcmd = f.read()
                strcmd_s = strcmd % (datestamp, hourstamp, datestamp, hourstamp)
                f.close()
                strcmd_g = "select * from mrqos.case_view_hour where datestamp=%s and hour=%s;" % (datestamp, hourstamp)
                query_result_file = os.path.join(config.mrqos_query_result,'case_view_hour.%s.%s.csv' % (datestamp, hourstamp))
                print " BLN for hourly summary for day = %s, hour = %s." % (datestamp, hourstamp)
                count_retrial = 0
                while count_retrial < region_summary_retrial_max:
                    try:
                        tic = time.time()
                        beeline.bln_e(strcmd_s)
                        print "    ******  success with time cost = %s." % str(time.time()-tic)
                        # repair don't care about moving the result to SQLite DB
                        #try:
                        #    beeline.bln_e_output(strcmd_g, query_result_file)
                        #except:
                        #    print "    ****  copy to local failed, retry!"
                        #    beeline.bln_e_output(strcmd_g, query_result_file)
                        break
                    except:
                        # delete the folder if summarization failed.
                        print "    ******  failed with time cost = %s upto #retrials=%s" % (str(time.time()-tic), str(count_retrial))
                        hdfsutil.rm(config.hdfs_qos_case_view_hour % (datestamp, hourstamp), r=True)
                        count_retrial += 1

            else:
                print " file exists."


            # ############################## #
            # The REGION VIEW hive procedure #
            # ############################## #
            print "    ****  region view tour:"
            # check if the summary has been performed on this particular hour (last hour)
            print "    ****  checking day = %s, hour = %s." % (datestamp, hourstamp),
            if hdfsutil.test_file(os.path.join(config.hdfs_qos_rg_view_hour % (datestamp, hourstamp), '000000_0.deflate')):
                print " file not exits,",
                f = open(os.path.join(config.mrqos_hive_query, 'mrqos_region_view_hour.hive'), 'r')
                strcmd = f.read()
                strcmd_s = strcmd % (datestamp, hourstamp, datestamp, hourstamp, datestamp, hourstamp)
                f.close()
                strcmd_g = "select * from mrqos.region_view_hour where datestamp=%s and hour=%s;" % (datestamp, hourstamp)
                query_result_file = os.path.join(config.mrqos_query_result,'region_view_hour.%s.%s.csv' % (datestamp, hourstamp))
                print " BLN for hourly summary for day = %s, hour = %s." % (datestamp, hourstamp)
                count_retrial = 0
                while count_retrial < region_summary_retrial_max:
                    try:
                        tic = time.time()
                        beeline.bln_e(strcmd_s)
                        print "    ******  success with time cost = %s." % str(time.time()-tic)
                        # repair don't care about moving the result to SQLite DB
                        #try:
                        #    beeline.bln_e_output(strcmd_g, query_result_file)
                        #except:
                        #    print "    ****  copy to local failed, retry!"
                        #    beeline.bln_e_output(strcmd_g, query_result_file)
                        break
                    except:
                        # delete the folder if summarization failed.
                        print "    ******  failed with time cost = %s upto #retrials=%s" % (str(time.time()-tic), str(count_retrial))
                        hdfsutil.rm(config.hdfs_qos_rg_view_hour % (datestamp, hourstamp), r=True)
                        count_retrial += 1
            else:
                print " file exists."

    else:
        # ############################### #
        # The SUMMARY HOUR hive procedure #
        # ############################### #
        print "    ****  summary hour tour:"
        # check if the summary has been performed on this particular hour (last hour)
        print "    ****  checking day = %s, hour = %s." % (datestamp, hourstamp),
        if hdfsutil.test_file(os.path.join(config.hdfs_qos_rg_hour % (datestamp, hourstamp), '000000_0.deflate')):
            print " file not exits,",
            f = open(os.path.join(config.mrqos_hive_query, 'mrqos_region_summarize_hour.hive'), 'r')
            strcmd = f.read()
            strcmd_s = strcmd % (datestamp, hourstamp, datestamp, hourstamp, datestamp, hourstamp)
            f.close()
            strcmd_g = "SELECT maprule, geoname, netname, region, avg_region_score, score_target, hourly_region_nsd_demand, hourly_region_eu_demand, hourly_region_ra_load, case_ra_load, case_nsd_demand, case_eu_demand, case_uniq_region, name, ecor, continent, country, city, latitude, longitude, provider, region_capacity, ecor_capacity, prp, numghosts, datestamp, hour FROM mrqos.mrqos_region_hour WHERE datestamp=%s and hour=%s;" % (datestamp, hourstamp)
            query_result_file = os.path.join(config.mrqos_query_result,'region_summary_hour.%s.%s.csv' % (datestamp, hourstamp))

            print " BLN for hourly summary: day = %s, hour = %s. " %(datestamp, hourstamp)
            count_retrial = 0
            while count_retrial < region_summary_retrial_max:
                tic = time.time()
                try:
                    beeline.bln_e(strcmd_s)
                    print "    ******  success with time cost = %s." % str(time.time()-tic)
                    #try:
                    #    beeline.bln_e_output(strcmd_g, query_result_file)
                    #except:
                    #    print "    ****  copy to local failed, retry!"
                    #    beeline.bln_e_output(strcmd_g, query_result_file)
                    break
                except:
                    # delete the folder if summarization failed.
                    print "    ******  failed with time cost = %s upto # retrials=%s" % (str(time.time()-tic), str(count_retrial))
                    hdfsutil.rm(config.hdfs_qos_rg_hour % (datestamp, hourstamp), r=True)
                    count_retrial += 1
        else:
            print " file exists."


        # ############################ #
        # The CASE VIEW hive procedure #
        # ############################ #
        print "    ****  case view tour:"
        # check if the summary has been performed on this particular hour (last hour)
        print "    ****  checking day = %s, hour = %s." % (datestamp, hourstamp),
        if hdfsutil.test_file(os.path.join(config.hdfs_qos_case_view_hour % (datestamp, hourstamp), '000000_0.deflate')):
            print " file not exits,",
            f = open(os.path.join(config.mrqos_hive_query, 'mrqos_case_view_hour.hive'), 'r')
            strcmd = f.read()
            strcmd_s = strcmd % (datestamp, hourstamp, datestamp, hourstamp)
            f.close()
            strcmd_g = "select * from mrqos.case_view_hour where datestamp=%s and hour=%s;" % (datestamp, hourstamp)
            query_result_file = os.path.join(config.mrqos_query_result,'case_view_hour.%s.%s.csv' % (datestamp, hourstamp))
            print " BLN for hourly summary for day = %s, hour = %s." % (datestamp, hourstamp)
            count_retrial = 0
            while count_retrial < region_summary_retrial_max:
                try:
                    tic = time.time()
                    beeline.bln_e(strcmd_s)
                    print "    ******  success with time cost = %s." % str(time.time()-tic)
                    # repair don't care about moving the result to SQLite DB
                    #try:
                    #    beeline.bln_e_output(strcmd_g, query_result_file)
                    #except:
                    #    print "    ****  copy to local failed, retry!"
                    #    beeline.bln_e_output(strcmd_g, query_result_file)
                    break
                except:
                    # delete the folder if summarization failed.
                    print "    ******  failed with time cost = %s upto #retrials=%s" % (str(time.time()-tic), str(count_retrial))
                    hdfsutil.rm(config.hdfs_qos_case_view_hour % (datestamp, hourstamp), r=True)
                    count_retrial += 1

        else:
            print " file exists."


        # ############################## #
        # The REGION VIEW hive procedure #
        # ############################## #
        print "    ****  region view tour:"
        # check if the summary has been performed on this particular hour (last hour)
        print "    ****  checking day = %s, hour = %s." % (datestamp, hourstamp),
        if hdfsutil.test_file(os.path.join(config.hdfs_qos_rg_view_hour % (datestamp, hourstamp), '000000_0.deflate')):
            print " file not exits,",
            f = open(os.path.join(config.mrqos_hive_query, 'mrqos_region_view_hour.hive'), 'r')
            strcmd = f.read()
            strcmd_s = strcmd % (datestamp, hourstamp, datestamp, hourstamp, datestamp, hourstamp)
            f.close()
            strcmd_g = "select * from mrqos.region_view_hour where datestamp=%s and hour=%s;" % (datestamp, hourstamp)
            query_result_file = os.path.join(config.mrqos_query_result,'region_view_hour.%s.%s.csv' % (datestamp, hourstamp))
            print " BLN for hourly summary for day = %s, hour = %s." % (datestamp, hourstamp)
            count_retrial = 0
            while count_retrial < region_summary_retrial_max:
                try:
                    tic = time.time()
                    beeline.bln_e(strcmd_s)
                    print "    ******  success with time cost = %s." % str(time.time()-tic)
                    # repair don't care about moving the result to SQLite DB
                    #try:
                    #    beeline.bln_e_output(strcmd_g, query_result_file)
                    #except:
                    #    print "    ****  copy to local failed, retry!"
                    #    beeline.bln_e_output(strcmd_g, query_result_file)
                    break
                except:
                    # delete the folder if summarization failed.
                    print "    ******  failed with time cost = %s upto #retrials=%s" % (str(time.time()-tic), str(count_retrial))
                    hdfsutil.rm(config.hdfs_qos_rg_view_hour % (datestamp, hourstamp), r=True)
                    count_retrial += 1
        else:
            print " file exists."