Esempio n. 1
0
def main(argv):
    parser = make_parser()
    opt, _ = parser.parse_args(argv)
    if opt.output is not sys.stdout:
        opt.output = open(opt.output, 'w')
    logger = logging.getLogger("main")
    logger.setLevel(logging.DEBUG)
    runner = hadut.PipesRunner(prefix=PREFIX, logger=logger)
    with open(LOCAL_MR_SCRIPT) as f:
        pipes_code = pts.add_sys_path(f.read())
    runner.set_input(opt.input, put=True)
    runner.set_exe(pipes_code)
    mr_options = get_mr_options(opt, runner.wd)
    runner.run(properties=mr_options,
               hadoop_conf_dir=HADOOP_CONF_DIR,
               logger=logger)
    mr_output = runner.collect_output()
    runner.clean()
    d = pts.parse_mr_output(mr_output, vtype=int)
    ip_list = sorted(d.iteritems(), key=operator.itemgetter(1), reverse=True)
    if opt.n_top:
        ip_list = ip_list[:opt.n_top]
    for ip, count in ip_list:
        opt.output.write("%s\t%d\n" % (ip, count))
    if opt.output is not sys.stdout:
        opt.output.close()
Esempio n. 2
0
def main(argv):
  parser = make_parser()
  opt, _ = parser.parse_args(argv)
  if opt.output is not sys.stdout:
    opt.output = open(opt.output, 'w')
  logger = logging.getLogger("main")
  logger.setLevel(logging.DEBUG)
  runner = hadut.PipesRunner(prefix=PREFIX, logger=logger)
  with open(LOCAL_MR_SCRIPT) as f:
    pipes_code = pts.add_sys_path(f.read())
  runner.set_input(opt.input, put=True)
  runner.set_exe(pipes_code)
  mr_options = get_mr_options(opt, runner.wd)
  runner.run(
    properties=mr_options, hadoop_conf_dir=HADOOP_CONF_DIR, logger=logger
    )
  mr_output = runner.collect_output()
  runner.clean()
  d = pts.parse_mr_output(mr_output, vtype=int)
  ip_list = sorted(d.iteritems(), key=operator.itemgetter(1), reverse=True)
  if opt.n_top:
    ip_list = ip_list[:opt.n_top]
  for ip, count in ip_list:
    opt.output.write("%s\t%d\n" % (ip, count))
  if opt.output is not sys.stdout:
    opt.output.close()
Esempio n. 3
0
def get_res(output_dir):
    fs = hdfs()
    data = []
    for x in fs.list_directory(output_dir):
        if os.path.split(x['path'])[-1].startswith('part-'):
            with fs.open_file(x['path'], 'rt') as f:
                data.append(f.read())
    all_data = ''.join(data)
    return pts.parse_mr_output(all_data, vtype=int)
Esempio n. 4
0
def get_res(output_dir):
    fs = hdfs()
    data = []
    for x in fs.list_directory(output_dir):
        if os.path.split(x['path'])[-1].startswith('part-'):
            with fs.open_file(x['path']) as f:
                data.append(f.read())
    all_data = ''.join(data)
    return pts.parse_mr_output(all_data, vtype=int)
Esempio n. 5
0
def check(res, expected_res):
  res = pts.compare_counts(pts.parse_mr_output(res, vtype=int), expected_res)
  if res:
    return "ERROR: %s" % res
  else:
    return "OK."
Esempio n. 6
0
def get_res(output_dir):
    return pts.parse_mr_output(hadut.collect_output(output_dir), vtype=int)
Esempio n. 7
0
def check(res, expected_res):
    res = pts.compare_counts(pts.parse_mr_output(res, vtype=int), expected_res)
    if res:
        return "ERROR: %s" % res
    else:
        return "OK."