def create_SUCCESS_file(path): if path.startswith("s3:") or path.startswith("s3n:") or path.startswith("s3a:"): s3_client = S3Client( eggo_config.get("aws", "aws_access_key_id"), eggo_config.get("aws", "aws_secret_access_key") ) s3_client.put_string("", os.path.join(path, "_SUCCESS")) elif path.startswith("hdfs:"): hdfs_client = HdfsClient() hdfs_client.put("/dev/null", os.path.join(path, "_SUCCESS")) elif path.startswith("file:"): open(os.path.join(path, "_SUCCESS"), "a").close()
def run(self): tmp_dir = mkdtemp(prefix="tmp_eggo_", dir=eggo_config.get("worker_env", "work_path")) try: # build the remote command for each source tmp_command_file = "{0}/command_file".format(tmp_dir) with open(tmp_command_file, "w") as command_file: for source in ToastConfig().config["sources"]: command_file.write("{0}\n".format(json.dumps(source))) # 3. Copy command file to Hadoop filesystem hdfs_client = HdfsClient() hdfs_client.mkdir(os.path.dirname(self.hdfs_path), True) hdfs_client.put(tmp_command_file, self.hdfs_path) finally: rmtree(tmp_dir)