Esempio n. 1
0
def create_SUCCESS_file(path):
    if path.startswith("s3:") or path.startswith("s3n:") or path.startswith("s3a:"):
        s3_client = S3Client(
            eggo_config.get("aws", "aws_access_key_id"), eggo_config.get("aws", "aws_secret_access_key")
        )
        s3_client.put_string("", os.path.join(path, "_SUCCESS"))
    elif path.startswith("hdfs:"):
        hdfs_client = HdfsClient()
        hdfs_client.put("/dev/null", os.path.join(path, "_SUCCESS"))
    elif path.startswith("file:"):
        open(os.path.join(path, "_SUCCESS"), "a").close()
Esempio n. 2
0
    def run(self):
        tmp_dir = mkdtemp(prefix="tmp_eggo_", dir=eggo_config.get("worker_env", "work_path"))
        try:
            # build the remote command for each source
            tmp_command_file = "{0}/command_file".format(tmp_dir)
            with open(tmp_command_file, "w") as command_file:
                for source in ToastConfig().config["sources"]:
                    command_file.write("{0}\n".format(json.dumps(source)))

            # 3. Copy command file to Hadoop filesystem
            hdfs_client = HdfsClient()
            hdfs_client.mkdir(os.path.dirname(self.hdfs_path), True)
            hdfs_client.put(tmp_command_file, self.hdfs_path)
        finally:
            rmtree(tmp_dir)