コード例 #1
0
def main():
    """ get the date and hour for the previous hour. Will check from the beginning of the day, insert when missing. """
    ts = calendar.timegm(time.gmtime())
    print "###################"
    print "# Performing the hourly mrqos_region summary"
    print "# starting processing time is " + str(ts)
    print "###################"
    ts_last_hour = ts-3600
    datestamp = time.strftime('%Y%m%d', time.gmtime(float(ts_last_hour)))
    hourstamp = time.strftime('%H', time.gmtime(float(ts_last_hour)))
    hour_list = [str("%02d" % x) for x in range(24)]
    region_summary_retrial_max = 10

    # check if the summary has been performed on this particular hour (last hour)
    print "    ****  checking day = %s, hour = %s." % (datestamp, hourstamp),
    if hdfsutil.test_file(os.path.join(config.hdfs_qos_rg_view_hour % (datestamp, hourstamp), '000000_0.deflate')):
        f = open(os.path.join(config.mrqos_hive_query, 'mrqos_region_view_hour.hive'), 'r')
        strcmd = f.read()
        strcmd_s = strcmd % (datestamp, hourstamp, datestamp, hourstamp, datestamp, hourstamp)
        f.close()
        strcmd_g = "select * from mrqos.region_view_hour where datestamp=%s and hour=%s;" % (datestamp, hourstamp)
        query_result_file = os.path.join(config.mrqos_query_result,'region_view_hour.%s.%s.csv' % (datestamp, hourstamp))
        print "    ****  perform beeline for hourly summary for day = %s, hour = %s." % (datestamp, hourstamp)
        count_retrial = 0
        while count_retrial < region_summary_retrial_max:
            try:
                beeline.bln_e(strcmd_s)
                try:
                    beeline.bln_e_output(strcmd_g, query_result_file)
                except:
                    print "    ****  copy to local failed!"
                break
            except:
                # delete the folder if summarization failed.
                print "    ****  summarization failed upto #retrials="+str(count_retrial)
                hdfsutil.rm(config.hdfs_qos_rg_view_hour % (datestamp, hourstamp), r=True)
                count_retrial += 1

    else:
        print " file exists."

    # check if the summary has been performed since the beginning of the day, last check on day X is X+1/0:30:00
    for hour in hour_list:
        if hour < hourstamp:
            print "    ****  checking day = %s, hour = %s." % (datestamp, hour),
            if hdfsutil.test_file(os.path.join(config.hdfs_qos_rg_view_hour % (datestamp, hour), '000000_0.deflate')):
                f = open(os.path.join(config.mrqos_hive_query, 'mrqos_region_view_hour.hive'), 'r')
                strcmd = f.read()
                strcmd_s = strcmd % (datestamp, hour, datestamp, hour, datestamp, hour)
                f.close()
                print "    ****  perform beeline for hourly summary for day = %s, hour = %s." %(datestamp, hour)
                try:
                    beeline.bln_e(strcmd_s)
                except:
                    # delete the folder if summarization failed.
                    print "    ****  summarization failed, removed hdfs folder."
                    hdfsutil.rm(config.hdfs_qos_rg_view_hour % (datestamp, hour), r=True)
            else:
                print " file exists."
コード例 #2
0
def fqry(qry, t1, t2, file_output):
    """
    function of query
    :param qry: query content doing the comparison
    :param t1: timestamp #1
    :param t2: timestamp #2
    :param file_output: se.
    :return: none
    """
    beeline.bln_e_output(qry % (t1, t2), os.path.join(config.mrqos_data, file_output))
コード例 #3
0
def fqry(qry, t1, t2, file_output):
    """
    function of query
    :param qry: query content doing the comparison
    :param t1: timestamp #1
    :param t2: timestamp #2
    :param file_output: se.
    :return: none
    """
    beeline.bln_e_output(qry % (t1, t2),
                         os.path.join(config.mrqos_data, file_output))
コード例 #4
0
def main():
    """ get the date and hour for the previous hour. Will check from the beginning of the day, insert when missing. """
    ts = calendar.timegm(time.gmtime())
    print "###################"
    print "# Performing the hourly mrqos_region summary"
    print "# starting processing time is " + str(ts)
    print "###################"
    ts_last_hour = ts-3600
    datestamp = time.strftime('%Y%m%d', time.gmtime(float(ts_last_hour)))
    hourstamp = time.strftime('%H', time.gmtime(float(ts_last_hour)))
    hour_list = [str("%02d" % x) for x in range(24)]
    hour_list = [x for x in hour_list if x <= hourstamp]
    region_summary_retrial_max = 10

    # check if the summary has been performed on this particular hour (last hour)
    folders_day = '/'.join(str(config.hdfs_qos_rg_view_hour % (datestamp, '00')).split('/')[0:-1])

    # check if the summary folder for "this day" (datestamp) has been created or not, if not, create one
    if hdfsutil.test_dic(folders_day):
        hdfsutil.mkdir(folders_day)

    folders_in = [folders_day+'/hour=%s' % x for x in hour_list]
    folders_out = hdfsutil.ls(folders_day)

    folders_missing = [x for x in folders_in if x not in folders_out]
    folders_missing.sort(reverse=True)

    for item in folders_missing:
        hourstamp = item[-2:]
        print "    ****  missing data for day = %s, hour = %s." % (datestamp, hourstamp),
        f = open(os.path.join(config.mrqos_hive_query, 'mrqos_region_view_hour.hive'), 'r')
        strcmd = f.read()
        strcmd_s = strcmd % (datestamp, hourstamp, datestamp, hourstamp, datestamp, hourstamp)
        f.close()
        strcmd_g = "select * from mrqos.region_view_hour where datestamp=%s and hour=%s;" % (datestamp, hourstamp)
        query_result_file = os.path.join(config.mrqos_query_result,'region_view_hour.%s.%s.csv' % (datestamp, hourstamp))
        print "    ****  perform beeline for hourly summary for day = %s, hour = %s." % (datestamp, hourstamp)
        count_retrial = 0
        while count_retrial < region_summary_retrial_max:
            try:
                beeline.bln_e(strcmd_s)
                try:
                    beeline.bln_e_output(strcmd_g, query_result_file)
                except:
                    print "    ****  copy to local failed!"
                break
            except sp.CalledProcessError as e:
                # delete the folder if summarization failed.
                print "    ****  summarization failed upto #retrials="+str(count_retrial)
                print "    ****  ",
                print e.message
                hdfsutil.rm(config.hdfs_qos_rg_view_hour % (datestamp, hourstamp), r=True)
                count_retrial += 1
コード例 #5
0
def main():
    """  this function will compute the statistics of MRQOS tables within the
    past two weeks (X-1 day : X-15 days) """
    timenow = int(time.time())
    datenow = str(datetime.date.today()-datetime.timedelta(1))
    datestamp = time.strftime('%Y%m%d', time.gmtime(float(timenow-86400)))
    datenow = datenow[0:4]+datenow[5:7]+datenow[8:10]
    print "###################"
    print "# Start processing the data back in " + datenow + " for two-week window"
    print "# starting processing time is " + str(timenow)
    print "###################"
    max_retrial = 10

    # update the ts table for later summarize usage. file uploaded to HDFS
    print "    ****  create new mrqos_ts table."
    sp.call([config.create_ts_table], shell=True)

    # summarize script old version
    # open the file for writing the results
    print "    ****  running hive summarizing script."
    retrial = 0
    while retrial < max_retrial:
        try:
            tic = time.time()
            f = open('/home/testgrp/MRQOS/mrqos_data/summarized_table.tmp','w')
            sp.check_call(['hive','-f','/home/testgrp/MRQOS/MRQOS_table_summarize.hive'],stdout=f)
            print "    # success with time cost = %s" % str(time.time()-tic)
            break
        except:
            retrial += 1
            print "    # failed retrial #%s with time cost = %s" % (str(retrial), str(time.time()-tic))
        f.close()


    # process the file, take country only
    cmd = """cat /home/testgrp/MRQOS/mrqos_data/summarized_table.tmp | sed s:NULL:0:g | sed 's/\t/,/g' | awk -F',' '{x=length($4); if(x==2){print $0;}}' | awk -F',' '{if($3>0){$1=""; $2=""; print $0;}}' | sed 's/^\s\+//g' > /home/testgrp/MRQOS/mrqos_data/summarized_processed.tmp""";
    sp.check_call( cmd, shell=True )

    # upload the summarized table in hive
    print "    ****  upload the summarized table to HDFS."
    listname = os.path.join(config.mrqos_data, 'summarized_processed.tmp')
    hdfs_d = os.path.join(config.hdfs_table,'mrqos_sum','ts=%s' % str(datenow))
    upload_to_hive(listname, hdfs_d, str(datenow), 'mrqos_sum')


    # summarize script new version
    # open the file for writing the results
    print "    ****  running hive summarizing script (new)."
    retrial = 0
    while retrial < max_retrial:
        try:
            tic = time.time()
            f = open('/home/testgrp/MRQOS/mrqos_data/summarized_table2.tmp','w')
            sp.check_call(['hive','-f','/home/testgrp/MRQOS/mrqos_hive_query/MRQOS_table_summarize2.hive'],stdout=f)
            print "    # success with time cost = %s" % str(time.time()-tic)
            break
        except:
            retrial += 1
            print "    # failed retrial #%s with time cost = %s" % (str(retrial), str(time.time()-tic))
        f.close()


    # process the file, take country only
    cmd = """cat /home/testgrp/MRQOS/mrqos_data/summarized_table2.tmp | sed s:NULL:0:g | sed 's/\t/,/g' | awk -F',' '{x=length($4); if(x==2){print $0;}}' | awk -F',' '{if($3>0){$1=""; $2=""; print $0;}}' | sed 's/^\s\+//g' > /home/testgrp/MRQOS/mrqos_data/summarized_processed2.tmp""";
    sp.check_call( cmd, shell=True )

    # upload the summarized table in hive
    print "    ****  upload the summarized table to HDFS."
    listname = os.path.join(config.mrqos_data, 'summarized_processed2.tmp')
    hdfs_d = os.path.join(config.hdfs_table,'mrqos_sum2','ts=%s' % str(datenow))
    upload_to_hive(listname, hdfs_d, str(datenow), 'mrqos_sum2')


    # compute COMPOUND-ERROR-METRIC
    print "    ****  running hive script for compound error metrics."
    retrial = 0
    while retrial < max_retrial:
        try:
            tic = time.time()
            f = open(os.path.join(config.mrqos_hive_query, 'MRQOS_table_levels.hive'), 'r')
            strcmd = f.read()
            strcmd_s = strcmd % (datestamp, datestamp, datestamp, datestamp, datestamp,
                                 datestamp, datestamp, datestamp, datestamp, datestamp,
                                 datestamp)
            f.close()

            f = open('/home/testgrp/MRQOS/mrqos_data/compound_metric_full.tmp','w')
            beeline.bln_e_output(strcmd_s, '/home/testgrp/MRQOS/mrqos_data/compound_metric_full.tmp')
            print "    # success with time cost = %s" % str(time.time()-tic)
            # replaced code:
            #sp.check_call(['hive','-f','/home/testgrp/MRQOS/MRQOS_table_levels.hive'],stdout=f)
            f1 = '/home/testgrp/MRQOS/mrqos_data/compound_metric_full.tmp'
            f2 = '/home/testgrp/MRQOS/mrqos_data/compound_metric.tmp'
            cmd_str = ''' cat %s | awk '{print $1, $2, $3, $4, $5, $6, $14, $15, $16, $17, $18, $26, $27, $28, $29, $30, $38, $39, $40, $41, $42, $50, $51}' > %s ''' % (f1, f2)
            sp.check_call(cmd_str, shell=True)
            break
        except:
            retrial += 1
            print "    # failed retrial #%s with time cost = %s" % (str(retrial), str(time.time()-tic))
    f.close()

    # obtain the summarized statistics that spanned [-28d, -14d]
    print "    ****  running hive queries for 2w comparisons."
    cmdstr = '''/usr/bin/python /home/testgrp/MRQOS/mrqos_python_script/mrqos_sum_comparison.py'''
    sp.check_call(cmdstr, shell=True)
コード例 #6
0
def main():
    """ get the date and hour for the previous hour. Will check from the beginning of the day, insert when missing. """
    ts = calendar.timegm(time.gmtime())
    logging.basicConfig(filename=os.path.join(config.mrqos_logging, 'cron_region_summary_hour.log'),
                        level=logging.INFO,
                        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
                        datefmt='%m/%d/%Y %H:%M:%S')
    logger = logging.getLogger(__name__)

    # start the logging
    logger.info("###################")
    logger.info("# Performing the hourly mrqos_region summary")
    logger.info("# starting time: " + str(ts) + " = " + time.strftime('GMT %Y-%m-%d %H:%M:%S', time.gmtime(ts)))
    logger.info("###################")

    # parameter: backfilter length
    bf_length = config.region_summary_back_filling
    ts_last_couple_hour_list = [ts-(1+x)*3600 for x in range(bf_length)]

    for ts_last_hour in ts_last_couple_hour_list:
        datestamp = time.strftime('%Y%m%d', time.gmtime(float(ts_last_hour)))
        hourstamp = time.strftime('%H', time.gmtime(float(ts_last_hour)))
        region_summary_retrial_max = 10

        # ############################### #
        # The SUMMARY HOUR hive procedure #
        # ############################### #
        #logger.info("    ****  summary hour tour: checking day = %s, hour = %s." % (datestamp, hourstamp))
        # check if the summary has been performed on this particular hour (last hour)
        if hdfsutil.test_file(os.path.join(config.hdfs_qos_rg_hour % (datestamp, hourstamp), '000000_0.deflate')):
            logger.info("** region summary hour: checking day = %s, hour = %s, and file does not exist." % (datestamp,
                                                                                                            hourstamp))
            f = open(os.path.join(config.mrqos_hive_query, 'mrqos_region_summarize_hour.hive'), 'r')
            strcmd = f.read()
            strcmd_s = strcmd % (datestamp, hourstamp, datestamp, hourstamp, datestamp, hourstamp)
            f.close()

            count_retrial = 0
            while count_retrial < region_summary_retrial_max:
                tic = time.time()
                try:
                    beeline.bln_e(strcmd_s)
                    logger.info("BLN region summary hour success @ cost = %s sec." % str(time.time()-tic))
                    break
                except sp.CalledProcessError as e:
                    # delete the folder if summarization failed.
                    logger.info("BLN region summary hour failed @ cost = %s sec in retrial #%s" % (str(time.time()-tic),
                                                                                                   str(count_retrial)))
                    logger.exception("message")
                    hdfsutil.rm(config.hdfs_qos_rg_hour % (datestamp, hourstamp), r=True)
                    count_retrial += 1
        else:
            logger.info("** region summary hour: checking day = %s, hour = %s, and file exists." % (datestamp,
                                                                                                    hourstamp))


        # ############################ #
        # The CASE VIEW hive procedure #
        # ############################ #
        #print "    ****  case view tour:"
        # check if the summary has been performed on this particular hour (last hour)
        if hdfsutil.test_file(os.path.join(config.hdfs_qos_case_view_hour % (datestamp, hourstamp), '000000_0.deflate')):
            logger.info("** case view hour: checking day = %s, hour = %s, and file does not exist." % (datestamp,
                                                                                                       hourstamp))
            f = open(os.path.join(config.mrqos_hive_query, 'mrqos_case_view_hour.hive'), 'r')
            strcmd = f.read()
            strcmd_s = strcmd % (datestamp, hourstamp, datestamp, hourstamp)
            f.close()
            strcmd_g = "select * from mrqos.case_view_hour where datestamp=%s and hour=%s;" % (datestamp, hourstamp)
            query_result_file = os.path.join(config.mrqos_query_result,'case_view_hour.%s.%s.csv' % (datestamp, hourstamp))

            count_retrial = 0
            while count_retrial < region_summary_retrial_max:
                try:
                    tic = time.time()
                    beeline.bln_e(strcmd_s)
                    logger.info("BLN case view hour success @ cost = %s sec." % str(time.time()-tic))
                    try:
                        beeline.bln_e_output(strcmd_g, query_result_file)
                    except sp.CalledProcessError as e:
                        logger.warning("copy to local failed, retrying...")
                        print e.message
                        try:
                            beeline.bln_e_output(strcmd_g, query_result_file)
                        except sp.CalledProcessError as e:
                            logger.error("copy to local failed again, abort.")
                            logger.exception("message")
                    break
                except sp.CalledProcessError as e:
                    # delete the folder if summarization failed.
                    logger.info("BLN case view hour failed @ cost = %s sec in retrial #%s" % (str(time.time()-tic),
                                                                                              str(count_retrial)))
                    logger.exception("message")
                    hdfsutil.rm(config.hdfs_qos_case_view_hour % (datestamp, hourstamp), r=True)
                    count_retrial += 1

        else:
            logger.info("** case view hour: checking day = %s, hour = %s, and file exists." % (datestamp,
                                                                                               hourstamp))



        # ############################## #
        # The REGION VIEW hive procedure #
        # ############################## #
        # check if the summary has been performed on this particular hour (last hour)
        if hdfsutil.test_file(os.path.join(config.hdfs_qos_rg_view_hour % (datestamp, hourstamp), '000000_0.deflate')):
            logger.info("** region view hour: checking day = %s, hour = %s, and file does not exist." % (datestamp,
                                                                                                         hourstamp))
            f = open(os.path.join(config.mrqos_hive_query, 'mrqos_region_view_hour.hive'), 'r')
            strcmd = f.read()
            strcmd_s = strcmd % (datestamp, hourstamp, datestamp, hourstamp, datestamp, hourstamp)
            f.close()
            strcmd_g = "select * from mrqos.region_view_hour where datestamp=%s and hour=%s;" % (datestamp, hourstamp)
            query_result_file = os.path.join(config.mrqos_query_result,'region_view_hour.%s.%s.csv' % (datestamp, hourstamp))

            count_retrial = 0
            while count_retrial < region_summary_retrial_max:
                try:
                    tic = time.time()
                    beeline.bln_e(strcmd_s)
                    logger.info("BLN region view hour success @ cost = %s sec." % str(time.time()-tic))
                    try:
                        beeline.bln_e_output(strcmd_g, query_result_file)
                    except sp.CalledProcessError as e:
                        logger.warning("copy to local failed, retrying...")
                        print e.message
                        try:
                            beeline.bln_e_output(strcmd_g, query_result_file)
                        except sp.CalledProcessError as e:
                            logger.error("copy to local failed again, abort.")
                            logger.exception("message")
                    break
                except sp.CalledProcessError as e:
                    # delete the folder if summarization failed.
                    logger.info("BLN region view hour failed @ cost = %s sec in retrial #%s" % (str(time.time()-tic),
                                                                                                str(count_retrial)))
                    logger.exception("message")
                    hdfsutil.rm(config.hdfs_qos_rg_view_hour % (datestamp, hourstamp), r=True)
                    count_retrial += 1

        else:
            logger.info("** region view hour: checking day = %s, hour = %s, and file exists." % (datestamp,
                                                                                                 hourstamp))
コード例 #7
0
def main():
    """ get the date and hour for the previous hour. Will check from the beginning of the day, insert when missing. """
    ts = calendar.timegm(time.gmtime())
    print "###################"
    print "# Performing the hourly mrqos_region summary"
    print "# starting processing time is " + str(ts) + " = " + time.strftime(
        'GMT %Y-%m-%d %H:%M:%S', time.gmtime(ts))
    print "###################"
    ts_last_hour = ts - 3600
    datestamp = time.strftime('%Y%m%d', time.gmtime(float(ts_last_hour)))
    hourstamp = time.strftime('%H', time.gmtime(float(ts_last_hour)))
    #hour_list = [str("%02d" % x) for x in range(24)]
    region_summary_retrial_max = 10

    # ############################### #
    # The SUMMARY HOUR hive procedure #
    # ############################### #
    print "    ****  summary hour tour:"
    # check if the summary has been performed on this particular hour (last hour)
    print "    ****  checking day = %s, hour = %s." % (datestamp, hourstamp),
    if hdfsutil.test_file(
            os.path.join(config.hdfs_qos_rg_hour % (datestamp, hourstamp),
                         '000000_0.deflate')):
        print " file not exits,",
        f = open(
            os.path.join(config.mrqos_hive_query,
                         'mrqos_region_summarize_hour.hive'), 'r')
        strcmd = f.read()
        strcmd_s = strcmd % (datestamp, hourstamp, datestamp, hourstamp,
                             datestamp, hourstamp)
        f.close()
        strcmd_g = "SELECT maprule, geoname, netname, region, avg_region_score, score_target, hourly_region_nsd_demand, hourly_region_eu_demand, hourly_region_ra_load, case_ra_load, case_nsd_demand, case_eu_demand, case_uniq_region, name, ecor, continent, country, city, latitude, longitude, provider, region_capacity, ecor_capacity, prp, numghosts, datestamp, hour FROM mrqos.mrqos_region_hour WHERE datestamp=%s and hour=%s;" % (
            datestamp, hourstamp)
        query_result_file = os.path.join(
            config.mrqos_query_result,
            'region_summary_hour.%s.%s.csv' % (datestamp, hourstamp))

        print " BLN for hourly summary: day = %s, hour = %s. " % (datestamp,
                                                                  hourstamp)
        count_retrial = 0
        while count_retrial < region_summary_retrial_max:
            tic = time.time()
            try:
                beeline.bln_e(strcmd_s)
                print "    ******  success with time cost = %s." % str(
                    time.time() - tic)
                #try:
                #    beeline.bln_e_output(strcmd_g, query_result_file)
                #except:
                #    print "    ****  copy to local failed, retry!"
                #    beeline.bln_e_output(strcmd_g, query_result_file)
                break
            except sp.CalledProcessError as e:
                # delete the folder if summarization failed.
                print "    ******  failed with time cost = %s upto # retrials=%s" % (
                    str(time.time() - tic), str(count_retrial))
                print e.message
                hdfsutil.rm(config.hdfs_qos_rg_hour % (datestamp, hourstamp),
                            r=True)
                count_retrial += 1
    else:
        print " file exists."

    # ############################ #
    # The CASE VIEW hive procedure #
    # ############################ #
    print "    ****  case view tour:"
    # check if the summary has been performed on this particular hour (last hour)
    print "    ****  checking day = %s, hour = %s." % (datestamp, hourstamp),
    if hdfsutil.test_file(
            os.path.join(
                config.hdfs_qos_case_view_hour % (datestamp, hourstamp),
                '000000_0.deflate')):
        print " file not exits,",
        f = open(
            os.path.join(config.mrqos_hive_query, 'mrqos_case_view_hour.hive'),
            'r')
        strcmd = f.read()
        strcmd_s = strcmd % (datestamp, hourstamp, datestamp, hourstamp)
        f.close()
        strcmd_g = "select * from mrqos.case_view_hour where datestamp=%s and hour=%s;" % (
            datestamp, hourstamp)
        query_result_file = os.path.join(
            config.mrqos_query_result,
            'case_view_hour.%s.%s.csv' % (datestamp, hourstamp))
        print " BLN for hourly summary for day = %s, hour = %s." % (datestamp,
                                                                    hourstamp)
        count_retrial = 0
        while count_retrial < region_summary_retrial_max:
            try:
                tic = time.time()
                beeline.bln_e(strcmd_s)
                print "    ******  success with time cost = %s." % str(
                    time.time() - tic)
                try:
                    beeline.bln_e_output(strcmd_g, query_result_file)
                except sp.CalledProcessError as e:
                    print "    ****  copy to local failed, retry!"
                    print e.message
                    beeline.bln_e_output(strcmd_g, query_result_file)
                break
            except sp.CalledProcessError as e:
                # delete the folder if summarization failed.
                print "    ******  failed with time cost = %s upto #retrials=%s" % (
                    str(time.time() - tic), str(count_retrial))
                print e.message
                hdfsutil.rm(config.hdfs_qos_case_view_hour %
                            (datestamp, hourstamp),
                            r=True)
                count_retrial += 1

    else:
        print " file exists."

    # ############################## #
    # The REGION VIEW hive procedure #
    # ############################## #
    print "    ****  region view tour:"
    # check if the summary has been performed on this particular hour (last hour)
    print "    ****  checking day = %s, hour = %s." % (datestamp, hourstamp),
    if hdfsutil.test_file(
            os.path.join(config.hdfs_qos_rg_view_hour % (datestamp, hourstamp),
                         '000000_0.deflate')):
        print " file not exits,",
        f = open(
            os.path.join(config.mrqos_hive_query,
                         'mrqos_region_view_hour.hive'), 'r')
        strcmd = f.read()
        strcmd_s = strcmd % (datestamp, hourstamp, datestamp, hourstamp,
                             datestamp, hourstamp)
        f.close()
        strcmd_g = "select * from mrqos.region_view_hour where datestamp=%s and hour=%s;" % (
            datestamp, hourstamp)
        query_result_file = os.path.join(
            config.mrqos_query_result,
            'region_view_hour.%s.%s.csv' % (datestamp, hourstamp))
        print " BLN for hourly summary for day = %s, hour = %s." % (datestamp,
                                                                    hourstamp)
        count_retrial = 0
        while count_retrial < region_summary_retrial_max:
            try:
                tic = time.time()
                beeline.bln_e(strcmd_s)
                print "    ******  success with time cost = %s." % str(
                    time.time() - tic)
                try:
                    beeline.bln_e_output(strcmd_g, query_result_file)
                except sp.CalledProcessError as e:
                    print "    ****  copy to local failed, retry!"
                    print e.message
                    beeline.bln_e_output(strcmd_g, query_result_file)
                break
            except sp.CalledProcessError as e:
                # delete the folder if summarization failed.
                print "    ******  failed with time cost = %s upto #retrials=%s" % (
                    str(time.time() - tic), str(count_retrial))
                print e.message
                hdfsutil.rm(config.hdfs_qos_rg_view_hour %
                            (datestamp, hourstamp),
                            r=True)
                count_retrial += 1

    else:
        print " file exists."
コード例 #8
0
def main():
    """ get the date and hour for the previous hour. Will check from the beginning of the day, insert when missing. """
    ts = calendar.timegm(time.gmtime())
    print "###################"
    print "# Performing the hourly mrqos_region summary"
    print "# starting processing time is " + str(ts)
    print "###################"
    ts_last_hour = ts - 3600
    datestamp = time.strftime('%Y%m%d', time.gmtime(float(ts_last_hour)))
    hourstamp = time.strftime('%H', time.gmtime(float(ts_last_hour)))
    hour_list = [str("%02d" % x) for x in range(24)]
    hour_list = [x for x in hour_list if x <= hourstamp]
    region_summary_retrial_max = 10

    # check if the summary has been performed on this particular hour (last hour)
    folders_day = '/'.join(
        str(config.hdfs_qos_rg_view_hour % (datestamp, '00')).split('/')[0:-1])

    # check if the summary folder for "this day" (datestamp) has been created or not, if not, create one
    if hdfsutil.test_dic(folders_day):
        hdfsutil.mkdir(folders_day)

    folders_in = [folders_day + '/hour=%s' % x for x in hour_list]
    folders_out = hdfsutil.ls(folders_day)

    folders_missing = [x for x in folders_in if x not in folders_out]
    folders_missing.sort(reverse=True)

    for item in folders_missing:
        hourstamp = item[-2:]
        print "    ****  missing data for day = %s, hour = %s." % (datestamp,
                                                                   hourstamp),
        f = open(
            os.path.join(config.mrqos_hive_query,
                         'mrqos_region_view_hour.hive'), 'r')
        strcmd = f.read()
        strcmd_s = strcmd % (datestamp, hourstamp, datestamp, hourstamp,
                             datestamp, hourstamp)
        f.close()
        strcmd_g = "select * from mrqos.region_view_hour where datestamp=%s and hour=%s;" % (
            datestamp, hourstamp)
        query_result_file = os.path.join(
            config.mrqos_query_result,
            'region_view_hour.%s.%s.csv' % (datestamp, hourstamp))
        print "    ****  perform beeline for hourly summary for day = %s, hour = %s." % (
            datestamp, hourstamp)
        count_retrial = 0
        while count_retrial < region_summary_retrial_max:
            try:
                beeline.bln_e(strcmd_s)
                try:
                    beeline.bln_e_output(strcmd_g, query_result_file)
                except:
                    print "    ****  copy to local failed!"
                break
            except sp.CalledProcessError as e:
                # delete the folder if summarization failed.
                print "    ****  summarization failed upto #retrials=" + str(
                    count_retrial)
                print "    ****  ",
                print e.message
                hdfsutil.rm(config.hdfs_qos_rg_view_hour %
                            (datestamp, hourstamp),
                            r=True)
                count_retrial += 1