Esempio n. 1
0
def save_current_figure(filename, dir="./", close=True, format=None):
    if filename is not None:
        fu.ensure_dir(dir)
        full_filename = os.path.join(dir, filename)
        pylab.savefig(full_filename, format=format)
        if close:
            pylab.close()
Esempio n. 2
0
def send_hadoop_command(hdfs_uri, hdfs_dir, jobtracker_uri,
      which_engine_binary, which_hadoop_binary, which_hadoop_jar,
      input_filename, table_data_filename, command_dict_filename, output_path,
      n_tasks=1, one_map_task_per_line=True,
      task_timeout=60000000, DEBUG=False):
  # make sure output_path doesn't exist
  rm_hdfs(hdfs_uri, output_path, hdfs_base_dir=hdfs_dir)
  # send up input
  put_hdfs(hdfs_uri, input_filename, hdfs_base_dir=hdfs_dir)
  # actually send
  hadoop_cmd_str = create_hadoop_cmd_str(hdfs_uri, hdfs_dir, jobtracker_uri,
      which_engine_binary, which_hadoop_binary, which_hadoop_jar,
      input_filename, table_data_filename, command_dict_filename, output_path,
      n_tasks, one_map_task_per_line,
      task_timeout)
  was_successful = None
  if DEBUG:
    print(hadoop_cmd_str)
    return hadoop_cmd_str
  else:
    fu.ensure_dir(output_path)
    output_path_dotdot = os.path.split(output_path)[0]
    out_filename = os.path.join(output_path_dotdot, 'out')
    err_filename = os.path.join(output_path_dotdot, 'err')
    redirect_str = '>>%s 2>>%s'
    redirect_str %= (out_filename, err_filename)
    # could I nohup and check hdfs for presence of _SUCCESS every N seconds?
    # cmd_str = ' '.join(['nohup', hadoop_cmd_str, redirect_str, '&'])
    cmd_str = ' '.join([hadoop_cmd_str, redirect_str])
    os.system(cmd_str)
  return
Esempio n. 3
0
def save_current_figure(filename, dir='./', close=True, format=None):
    if filename is not None:
        fu.ensure_dir(dir)
        full_filename = os.path.join(dir, filename)
        pylab.savefig(full_filename, format=format)
        if close:
            pylab.close()
Esempio n. 4
0
def savefig_legend_outside(filename, ax=None, bbox_inches="tight", dir="./"):
    if ax is None:
        ax = pylab.gca()
    lgd = ax.get_legend()
    fu.ensure_dir(dir)
    full_filename = os.path.join(dir, filename)
    pylab.savefig(full_filename, bbox_extra_artists=(lgd,), bbox_inches=bbox_inches)
    return
Esempio n. 5
0
def savefig_legend_outside(filename, ax=None, bbox_inches='tight', dir='./'):
    if ax is None:
        ax = pylab.gca()
    lgd = ax.get_legend()
    fu.ensure_dir(dir)
    full_filename = os.path.join(dir, filename)
    pylab.savefig(
        full_filename,
        bbox_extra_artists=(lgd, ),
        bbox_inches=bbox_inches,
    )
    return
Esempio n. 6
0
def send_hadoop_command(hdfs_uri,
                        hdfs_dir,
                        jobtracker_uri,
                        which_engine_binary,
                        which_hadoop_binary,
                        which_hadoop_jar,
                        input_filename,
                        table_data_filename,
                        command_dict_filename,
                        output_path,
                        n_tasks=1,
                        one_map_task_per_line=True,
                        task_timeout=60000000,
                        DEBUG=False):
    # make sure output_path doesn't exist
    rm_hdfs(hdfs_uri, output_path, hdfs_base_dir=hdfs_dir)
    # send up input
    put_hdfs(hdfs_uri, input_filename, hdfs_base_dir=hdfs_dir)
    # actually send
    hadoop_cmd_str = create_hadoop_cmd_str(
        hdfs_uri, hdfs_dir, jobtracker_uri, which_engine_binary,
        which_hadoop_binary, which_hadoop_jar, input_filename,
        table_data_filename, command_dict_filename, output_path, n_tasks,
        one_map_task_per_line, task_timeout)
    was_successful = None
    if DEBUG:
        print hadoop_cmd_str
        return hadoop_cmd_str
    else:
        fu.ensure_dir(output_path)
        output_path_dotdot = os.path.split(output_path)[0]
        out_filename = os.path.join(output_path_dotdot, 'out')
        err_filename = os.path.join(output_path_dotdot, 'err')
        redirect_str = '>>%s 2>>%s'
        redirect_str %= (out_filename, err_filename)
        # could I nohup and check hdfs for presence of _SUCCESS every N seconds?
        # cmd_str = ' '.join(['nohup', hadoop_cmd_str, redirect_str, '&'])
        cmd_str = ' '.join([hadoop_cmd_str, redirect_str])
        os.system(cmd_str)
    return
    num_clusters_list = args.num_clusters_list
    num_splits_list = args.num_splits_list
    which_engine_binary = args.which_engine_binary
    #
    print('using num_rows_list: %s' % num_rows_list)
    print('using num_cols_list: %s' % num_cols_list)
    print('using num_clusters_list: %s' % num_clusters_list)
    print('using num_splits_list: %s' % num_splits_list)
    print('using engine_binary: %s' % which_engine_binary)
    time.sleep(2)


    script_filename = 'hadoop_line_processor.py'
    # some hadoop processing related settings
    dirname = 'runtime_analysis'
    fu.ensure_dir(dirname)
    temp_dir = tempfile.mkdtemp(prefix='runtime_analysis_',
                                dir=dirname)
    print('using dir: %s' % temp_dir)
    #
    table_data_filename = os.path.join(temp_dir, 'table_data.pkl.gz')
    input_filename = os.path.join(temp_dir, 'hadoop_input')
    output_filename = os.path.join(temp_dir, 'hadoop_output')
    output_path = os.path.join(temp_dir, 'output')  
    parsed_out_file = os.path.join(temp_dir, 'parsed_output.csv')

    # Hard code the parameter values for now

    parameter_list = [num_rows_list, num_cols_list, num_clusters_list, num_splits_list]

    # Iterate over the parameter values and write each run as a line in the hadoop_input file
    num_cols_list = args.num_cols_list
    num_clusters_list = args.num_clusters_list
    num_splits_list = args.num_splits_list
    which_engine_binary = args.which_engine_binary
    #
    print('using num_rows_list: %s' % num_rows_list)
    print('using num_cols_list: %s' % num_cols_list)
    print('using num_clusters_list: %s' % num_clusters_list)
    print('using num_splits_list: %s' % num_splits_list)
    print('using engine_binary: %s' % which_engine_binary)
    time.sleep(2)

    script_filename = 'hadoop_line_processor.py'
    # some hadoop processing related settings
    dirname = 'runtime_analysis'
    fu.ensure_dir(dirname)
    temp_dir = tempfile.mkdtemp(prefix='runtime_analysis_', dir=dirname)
    print('using dir: %s' % temp_dir)
    #
    table_data_filename = os.path.join(temp_dir, 'table_data.pkl.gz')
    input_filename = os.path.join(temp_dir, 'hadoop_input')
    output_filename = os.path.join(temp_dir, 'hadoop_output')
    output_path = os.path.join(temp_dir, 'output')
    parsed_out_file = os.path.join(temp_dir, 'parsed_output.csv')

    # Hard code the parameter values for now

    parameter_list = [
        num_rows_list, num_cols_list, num_clusters_list, num_splits_list
    ]