def main(args=None): parser = argparse.ArgumentParser(description='Fetch output produced by recab_table and format into a single csv table.') parser.add_argument('input_dir', metavar='INPUT_DIR', help="Input directory (recab_table output path") parser.add_argument('output', nargs='?', metavar="OUTPUT", default=DefaultOutput, help="Desired output file. If not specified output will be written to stdout.") args = parser.parse_args(args) if not hadut.hdfs_path_exists(args.input_dir): parser.error("Can't find specified input HDFS path %s" % args.input_dir) output = None if args.output == DefaultOutput: output = sys.stdout else: try: output = open(args.output, 'w') except Exception as e: sys.stderr.write("Error opening output file %s\n" % args.output) sys.stderr.write("Message: %s\n" % e) parser.error() try: # write the header output.write(Header) output.write("\n") output.flush() retcode = subprocess.call([hadut.hadoop, "dfs", "-cat", args.input_dir + "/part-r-*"], stdout=output) if retcode != 0: sys.stderr.write("Error writing output file\n") sys.exit(retcode) finally: output.close() return 0
def setup(self): """ * Creates an hdfs directory with the name of this test (self.make_hdfs_test_path()) * uploads the local 'input' directory into the hdfs directory """ hadut.run_hadoop_cmd_e("dfsadmin", args_list=["-safemode", "wait"]) self.logger.debug("hdfs out of safe mode") if hadut.hdfs_path_exists(self.make_hdfs_test_path()): error_msg = "hdfs test path '%s' already exists. Please remove it" % self.make_hdfs_test_path() self.logger.fatal(error_msg) raise RuntimeError(error_msg) hadut.dfs("-mkdir", self.make_hdfs_test_path()) input_dir = self.make_hdfs_input_path() hadut.dfs("-put", self.make_local_input_path(), input_dir)
def setup(self): """ * Creates an hdfs directory with the name of this test (self.make_hdfs_test_path()) * uploads the local 'input' directory into the hdfs directory """ hadut.run_hadoop_cmd_e("dfsadmin", args_list=["-safemode", "wait"]) self.logger.debug("hdfs out of safe mode") if hadut.hdfs_path_exists(self.make_hdfs_test_path()): error_msg = "hdfs test path '%s' already exists. Please remove it" % self.make_hdfs_test_path( ) self.logger.fatal(error_msg) raise RuntimeError(error_msg) hadut.dfs("-mkdir", self.make_hdfs_test_path()) input_dir = self.make_hdfs_input_path() hadut.dfs("-put", self.make_local_input_path(), input_dir)