コード例 #1
0
    def execute(self, context):
        self.log.info("Going to start Bulk Rename sftp operator")
        sftp_hook = SFTPHook(ftp_conn_id=self.sftp_conn_id)
        sftp_hook.no_host_key_check = True
        if self.source_files:
            if type(self.source_files) is str:
                source_files_list = ast.literal_eval(self.source_files)

        if self.source_path:
            source_files_list = sftp_hook.list_directory(self.source_path)
            source_files_list = [
                os.path.join(self.source_path, x) for x in source_files_list
            ]

        file_path_list = []
        if self.file_limit:
            source_files_list = source_files_list[: self.file_limit]
        for key in source_files_list:
            file_path = key.split("/")[-1]
            file_path = os.path.join(self.dest_path, file_path)
            self.log.info(f"Renaming {key} to {file_path}")

            conn = sftp_hook.get_conn()
            for i in range(0, 5):
                try:
                    try:
                        conn.remove(file_path)
                        print("Deleted duplicated file")
                    except IOError:
                        pass

                    conn.rename(key, file_path)
                    file_path_list.append(file_path)
                    break
                except IOError:
                    self.log.info("File not found, skipping")
                    break
                except Exception:
                    self.log.info(
                        f"Got no response from server, waiting for next try number {(i + 1)}"
                    )
                    if i < 4:
                        time.sleep(2 ** i + random.random())
                        sftp_hook = SFTPHook(ftp_conn_id=self.sftp_conn_id)
                        sftp_hook.no_host_key_check = True
                        conn = sftp_hook.get_conn()
                    else:
                        raise

        self.log.info("Finished executing Bulk Rename sftp operator")
        return file_path_list
コード例 #2
0
 def execute(self, context):
     self.log.info("Going to start delete file sftp operator")
     sftp_hook = SFTPHook(ftp_conn_id=self.sftp_conn_id)
     sftp_hook.no_host_key_check = True
     sftp_hook.delete_file(self.file_path)
     self.log.info("Finished executing delete file sftp operator")
     return True
コード例 #3
0
 def execute(self, context):
     self.log.info("Going to start Rename SFTP Operator")
     sftp_hook = SFTPHook(ftp_conn_id=self.sftp_conn_id)
     sftp_hook.no_host_key_check = True
     conn = sftp_hook.get_conn()
     try:
         conn.rename(self.source_file, self.dest_file)
     except IOError:
         self.log.info("File not found, skipping")
     self.log.info("Finished executing RenameSFTPOperator")
コード例 #4
0
    def execute(self, context):
        self.log.info("Going to start Bulk sftp to s3 operator")
        sftp_hook = SFTPHook(ftp_conn_id=self.sftp_conn_id)
        sftp_hook.no_host_key_check = True
        list_dir = sftp_hook.list_directory(self.sftp_path)

        if len(list_dir) < 1:
            self.log.info("Got no files to process. Skipping")
            return False

        self.log.info(f"Got {len(list_dir)} files to move")
        temp_files = []
        file_path_list = []
        ssh_hook = SSHHook(ssh_conn_id=self.sftp_conn_id)
        sftp_client = ssh_hook.get_conn().open_sftp()
        s3_hook = S3Hook(self.aws_conn_id)
        for file_name in list_dir:
            file_path = os.path.join(self.sftp_path, file_name)
            file_path_list.append(file_path)
            s3_key = str(os.path.join(self.dest_path, file_name))
            file_metadata = {"ftp": NamedTemporaryFile("w"), "s3_key": s3_key}
            for i in range(0, 5):
                try:
                    self.log.info(f"Downloading {file_path}")
                    sftp_client.get(file_path, file_metadata["ftp"].name)
                    file_metadata["ftp"].flush()
                    temp_files.append(file_metadata)
                    break
                except Exception:
                    self.log.info(
                        f"Got no response from server, waiting for next try number {(i + 1)}"
                    )
                    if i < 4:
                        time.sleep(2 ** i + random.random())
                        sftp_client = (
                            SSHHook(ssh_conn_id=self.sftp_conn_id)
                            .get_conn()
                            .open_sftp()
                        )
                    else:
                        raise

        self.log.info(f"Uploading to S3 with {self.workers} workers")
        with Pool(self.workers) as pool:
            pool.starmap(
                s3_hook.load_file,
                [
                    (x["ftp"].name, x["s3_key"], self.dest_bucket, True, False)
                    for x in temp_files
                ],
            )

        self.log.info("Finished executing Bulk sftp to s3 operator")
        return file_path_list
コード例 #5
0
    def delete_file(self, file_path):
        for i in range(0, 5):
            try:
                print(f"Deleting {file_path}")
                sftp_hook = SFTPHook(ftp_conn_id=self.sftp_conn_id)
                sftp_hook.no_host_key_check = True
                sftp_hook.delete_file(file_path)
                sftp_hook.close_conn()
                break
            except Exception:
                i += 1
                time.sleep(2 ** i)
                if i >= 4:
                    raise

        return True
コード例 #6
0
    def execute(self, context):
        self.log.info("Going to start bulk delete file sftp operator")
        sftp_hook = SFTPHook(ftp_conn_id=self.sftp_conn_id)
        sftp_hook.no_host_key_check = True

        source_files_list = sftp_hook.list_directory(self.source_path)
        source_files_list = [
            os.path.join(self.source_path, x) for x in source_files_list
        ]

        self.log.info(
            f"Going to delete {len(source_files_list)} with {self.workers} workers"
        )
        with Pool(self.workers) as pool:
            pool.map(self.delete_file, source_files_list)

        self.log.info("Finished executing bulk delete file sftp operator")
        return True