Esempio n. 1
0
def main():
    "Main function"
    optmgr = OptionParser()
    opts = optmgr.parser.parse_args()
    print("Input arguments: %s" % opts)
    time0 = time.time()
    fout = opts.fout
    date = opts.date
    verbose = opts.verbose
    yarn = opts.yarn
    inst = opts.inst
    if inst in ['global', 'phys01', 'phys02', 'phys03']:
        inst = inst.upper()
    else:
        raise Exception('Unsupported DBS instance "%s"' % inst)
    patterns = opts.patterns.split(',') if opts.patterns else []
    antipatterns = opts.antipatterns.split(',') if opts.antipatterns else []
    run(fout, date, yarn, verbose, patterns, antipatterns, inst)
    print('Start time  : %s' %
          time.strftime('%Y-%m-%d %H:%M:%S GMT', time.gmtime(time0)))
    print('End time    : %s' %
          time.strftime('%Y-%m-%d %H:%M:%S GMT', time.gmtime(time.time())))
    print('Elapsed time: %s' % elapsed_time(time0))

    with open("spark_exec_time_campaigns.txt", "w") as text_file:
        text_file.write(elapsed_time(time0))
Esempio n. 2
0
def main():
    "Main function"
    optmgr = OptionParser()
    opts = optmgr.parser.parse_args()

    print("Input arguments: %s" % opts)

    start_time = time.time()
    verbose = opts.verbose
    yarn = opts.yarn
    inst = opts.inst
    date = opts.date
    fout = opts.fout

    if  inst.lower() in ['global', 'phys01', 'phys02', 'phys03']:
        inst = inst.upper()
    else:
        raise Exception('Unsupported DBS instance "%s"' % inst)

    # Create spark context
    ctx = spark_context('cms', yarn, verbose)

    # Create SQL context to be used for SQL queries
    sql_context = HiveContext(ctx)

    # Initialize DBS tables (will be used with AAA, CMSSW)
    dbs_tables(sql_context, inst=inst, verbose=verbose)

    aaa_start_time = time.time()

    run_aaa(date, fout, ctx, sql_context, verbose)

    aaa_elapsed_time = elapsed_time(aaa_start_time)
    cmssw_start_time = time.time()

    run_cmssw(date, fout, ctx, sql_context, verbose)

    cmssw_elapsed_time = elapsed_time(cmssw_start_time)
    eos_start_time = time.time()

    run_eos(date, fout, ctx, sql_context, verbose)

    eos_elapsed_time = elapsed_time(eos_start_time)
    jm_start_time = time.time()

    run_jm(date, fout, ctx, sql_context, verbose)

    jm_elapsed_time = elapsed_time(jm_start_time)

    ctx.stop()

    print('Start time         : %s' % time.strftime('%Y-%m-%d %H:%M:%S GMT', time.gmtime(start_time)))
    print('End time           : %s' % time.strftime('%Y-%m-%d %H:%M:%S GMT', time.gmtime(time.time())))
    print('Total elapsed time : %s' % elapsed_time(start_time))

    print('AAA elapsed time   : %s' % aaa_elapsed_time)
    print('CMSSW elapsed time : %s' % cmssw_elapsed_time)
    print('EOS elapsed time   : %s' % eos_elapsed_time)
    print('JM elapsed time    : %s' % jm_elapsed_time)
Esempio n. 3
0
def main():
    "Main function"
    optmgr = OptionParser()
    opts = optmgr.parser.parse_args()
    print("Input arguments: %s" % opts)
    time0 = time.time()
    fout = opts.fout
    date = opts.date
    verbose = opts.verbose
    yarn = opts.yarn
    run(date, fout, yarn, verbose)
    print('Start time  : %s' %
          time.strftime('%Y-%m-%d %H:%M:%S GMT', time.gmtime(time0)))
    print('End time    : %s' %
          time.strftime('%Y-%m-%d %H:%M:%S GMT', time.gmtime(time.time())))
    print('Elapsed time: %s sec' % elapsed_time(time0))

    with open('phedex_time_data.txt', 'w') as file:
        file.write(elapsed_time(time0))
Esempio n. 4
0
def main():
    "Main function"
    optmgr = OptionParser()
    opts = optmgr.parser.parse_args()
    print("Input arguments: %s" % opts)
    time0 = time.time()
    run(opts.fout, opts.hdir, opts.date, opts.yarn, opts.verbose)
    print('Start time  : %s' %
          time.strftime('%Y-%m-%d %H:%M:%S GMT', time.gmtime(time0)))
    print('End time    : %s' %
          time.strftime('%Y-%m-%d %H:%M:%S GMT', time.gmtime(time.time())))
    print('Elapsed time: %s sec' % elapsed_time(time0))
Esempio n. 5
0
def main():
    "Main function"
    optmgr  = OptionParser()
    opts = optmgr.parser.parse_args()
    print("Input arguments: %s" % opts)
    time0 = time.time()
    inst = opts.inst
    if  inst in ['global', 'phys01', 'phys02', 'phys03']:
        inst = inst.upper()
    else:
        raise Exception('Unsupported DBS instance "%s"' % inst)
    run(opts.date, opts.fout, opts.yarn, opts.verbose, inst)
    print('Start time  : %s' % time.strftime('%Y-%m-%d %H:%M:%S GMT', time.gmtime(time0)))
    print('End time    : %s' % time.strftime('%Y-%m-%d %H:%M:%S GMT', time.gmtime(time.time())))
    print('Elapsed time: %s sec' % elapsed_time(time0))
Esempio n. 6
0
def main():
    "Main function"
    optmgr = OptionParser()
    opts = optmgr.parser.parse_args()

    print("Input arguments: %s" % opts)

    start_time = time.time()
    verbose = opts.verbose
    yarn = opts.yarn
    inst = opts.inst
    date = opts.date
    fout = opts.fout
    aaa_hdir = opts.aaa_hdir

    if inst.lower() in ['global', 'phys01', 'phys02', 'phys03']:
        inst = inst.upper()
    else:
        raise Exception('Unsupported DBS instance "%s"' % inst)

    # Create spark context
    ctx = spark_context('cms', yarn, verbose)

    quiet_logs(ctx)

    # Create SQL context to be used for SQL queries
    sql_context = SQLContext(ctx)

    # Initialize DBS tables
    dbs_tables(sql_context,
               inst=inst,
               verbose=verbose,
               tables=['fdf', 'bdf', 'ddf'])

    # Initialize PhEDEx table to be used in file_block_site table
    phedex_tables(sql_context, verbose=verbose)

    # Register clean_site_name to be used with SQL queries
    sql_context.udf.register("clean_site_name", clean_site_name)

    # Register tier_from_site_name to be used with SQL queries
    sql_context.udf.register("tier_from_site_name", tier_from_site_name)

    # Register dn2uuid to be used with SQL queries
    sql_context.udf.register("dn2uuid", dn2uuid)

    # Register parse_app to be used with SQL queries
    sql_context.udf.register("parse_app", parse_app)

    # Register stream4app to be used with SQL queries
    sql_context.udf.register("stream4app", stream4app)

    # Register parse_dn to be used with SQL queries
    sql_context.udf.register("parse_dn", parse_dn)

    f_b_s_start_time = time.time()
    # Create temp table with file name, block name, site name and site from PhEDEx
    create_file_block_site_table(ctx, sql_context, verbose)
    f_b_s_elapsed_time = elapsed_time(f_b_s_start_time)

    cmssw_start_time = time.time()
    aggregated_cmssw_df = run_agg_cmssw(date, ctx, sql_context, verbose)
    cmssw_elapsed_time = elapsed_time(cmssw_start_time)

    aaa_start_time = time.time()
    if len(aaa_hdir) > 0:
        aggregated_aaa_df = run_agg_aaa(date, ctx, sql_context, aaa_hdir,
                                        verbose)
    else:
        aggregated_aaa_df = run_agg_aaa(date,
                                        ctx,
                                        sql_context,
                                        verbose=verbose)

    aaa_elapsed_time = elapsed_time(aaa_start_time)

    eos_start_time = time.time()
    aggregated_eos_df = run_agg_eos(date, ctx, sql_context, verbose)
    eos_elapsed_time = elapsed_time(eos_start_time)

    jm_start_time = time.time()
    aggregated_jm_df = run_agg_jm(date, ctx, sql_context, verbose)
    jm_elapsed_time = elapsed_time(jm_start_time)

    if verbose:
        print('Will union outputs from all streams to a single dataframe')
    # Schema for output is:
    # site name, dataset name, number of accesses, distinct users, stream
    all_df = aggregated_cmssw_df.unionAll(aggregated_aaa_df)
    all_df = all_df.unionAll(aggregated_eos_df)
    all_df = all_df.unionAll(aggregated_jm_df)
    all_df = all_df.sort(desc("nacc"))

    if verbose:
        print('Done joining all outputs to a single dataframe')

    fout = fout + "/" + short_date_string(date)

    # output_dataframe(fout + "/Aggregated/CMSSW/" + short_date_string(date), aggregated_cmssw_df, verbose)
    # output_dataframe(fout + "/Aggregated/AAA/" + short_date_string(date), aggregated_aaa_df, verbose)
    # output_dataframe(fout + "/Aggregated/EOS/" + short_date_string(date), aggregated_eos_df, verbose)
    # output_dataframe(fout + "/Aggregated/JobMonitoring/" + short_date_string(date), aggregated_jm_df, verbose)

    output_dataframe(fout, all_df, verbose)

    if verbose:
        cmssw_df_size = aggregated_cmssw_df.count()
        aaa_df_size = aggregated_aaa_df.count()
        eos_df_size = aggregated_eos_df.count()
        jm_df_size = aggregated_jm_df.count()
        all_df_size = all_df.count()

        print('CMSSW:')
        aggregated_cmssw_df.show(10)
        aggregated_cmssw_df.printSchema()

        print('AAA:')
        aggregated_aaa_df.show(10)
        aggregated_aaa_df.printSchema()

        print('EOS:')
        aggregated_eos_df.show(10)
        aggregated_eos_df.printSchema()

        print('JobMonitoring:')
        aggregated_jm_df.show(10)
        aggregated_jm_df.printSchema()

        print('Aggregated all:')
        all_df.show(10)
        all_df.printSchema()

        print('Output record count:')
        print('Output record count CMSSW         : %s' % cmssw_df_size)
        print('Output record count AAA           : %s' % aaa_df_size)
        print('Output record count EOS           : %s' % eos_df_size)
        print('Output record count JobMonitoring : %s' % jm_df_size)
        print('Output record count Total:        : %s' % all_df_size)

    ctx.stop()

    print('Start time         : %s' %
          time.strftime('%Y-%m-%d %H:%M:%S GMT', time.gmtime(start_time)))
    print('End time           : %s' %
          time.strftime('%Y-%m-%d %H:%M:%S GMT', time.gmtime(time.time())))
    print('Total elapsed time : %s' % elapsed_time(start_time))

    print('FileBlockSite elapsed time : %s' % f_b_s_elapsed_time)
    print('AAA elapsed time           : %s' % aaa_elapsed_time)
    print('CMSSW elapsed time         : %s' % cmssw_elapsed_time)
    print('EOS elapsed time           : %s' % eos_elapsed_time)
    print('JobMonitoring elapsed time : %s' % jm_elapsed_time)