Ejemplo n.º 1
0
    def execute(self, *args):
        super().execute()

        valid = EssentialParameters(self.__class__.__name__, [self._src_pattern])
        valid()

        if isinstance(self._credentials, str):
            self._logger.warning(
                (
                    "DeprecationWarning: "
                    "In the near future, "
                    "the `credentials` will be changed to accept only dictionary types. "
                    "Please see more information "
                    "https://github.com/BrainPad/cliboa/blob/master/docs/modules/gcs_download.md"
                )
            )
            key_filepath = self._credentials
        else:
            key_filepath = self._source_path_reader(self._credentials)
        client = Gcs.get_gcs_client(key_filepath)
        bucket = client.bucket(self._bucket)
        dl_files = []
        for blob in client.list_blobs(
            bucket, prefix=self._prefix, delimiter=self._delimiter
        ):
            r = re.compile(self._src_pattern)
            if not r.fullmatch(blob.name):
                continue
            dl_files.append(blob.name)
            blob.download_to_filename(
                os.path.join(self._dest_dir, os.path.basename(blob.name))
            )

        ObjectStore.put(self._step, dl_files)
Ejemplo n.º 2
0
 def _save_to_cache(self):
     self._logger.info("Save data to on memory")
     if isinstance(self._credentials, str):
         self._logger.warning(
             (
                 "DeprecationWarning: "
                 "In the near future, "
                 "the `credentials` will be changed to accept only dictionary types. "
                 "Please see more information "
                 "https://github.com/BrainPad/cliboa/blob/master/docs/modules/bigquery_read.md"
             )
         )
         key_filepath = self._credentials
     else:
         key_filepath = self._source_path_reader(self._credentials)
     df = pandas.read_gbq(
         query="SELECT * FROM %s.%s" % (self._dataset, self._tblname)
         if self._query is None
         else self._query,
         dialect="standard",
         location=self._location,
         project_id=self._project_id,
         credentials=ServiceAccount.auth(key_filepath),
     )
     ObjectStore.put(self._key, df)
Ejemplo n.º 3
0
    def execute(self, *args):
        input_valid = IOInput(self._io)
        input_valid()

        files = glob(self._src_path)
        if len(files) > 1:
            raise CliboaException("Input file must be only one.")

        if len(files) == 0:
            raise FileNotFound("The specified csv file not found.")

        with open(files[0], "r", encoding=self._encoding) as f:

            # save per one column
            if self._columns:
                reader = csv.DictReader(f, delimiter=",")
                for row in reader:
                    # extract only the specified columns
                    row_dict = {}
                    for c in self._columns:
                        if not row.get(c):
                            continue
                        row_dict[c] = row.get(c)
                    self._s.save(row_dict)
            else:
                reader = csv.reader(f)
                header = next(reader, None)
                for row in reader:
                    row_dict = dict(zip(header, row))
                    self._s.save(row_dict)

        # cache downloaded file names
        ObjectStore.put(self._step, files)
Ejemplo n.º 4
0
    def execute(self, *args):
        for k, v in self.__dict__.items():
            self._logger.info("%s : %s" % (k, v))

        # essential parameters check
        valid = EssentialParameters(
            self.__class__.__name__,
            [self._host, self._user, self._src_dir, self._src_pattern],
        )
        valid()

        os.makedirs(self._dest_dir, exist_ok=True)

        # fetch src
        sftp = Sftp(
            self._host,
            self._user,
            self._password,
            self._key,
            self._timeout,
            self._retry_count,
            self._port,
        )
        files = sftp.list_files(self._src_dir, self._dest_dir,
                                re.compile(self._src_pattern))

        if self.__quit is True and len(files) == 0:
            self._logger.info(
                "No file was found. After process will not be processed")
            return 0

        # cache downloaded file names
        ObjectStore.put(self._step, files)
Ejemplo n.º 5
0
    def execute(self, *args):
        # essential parameters check
        valid = EssentialParameters(
            self.__class__.__name__,
            [self._host, self._user, self._src_dir, self._src_pattern],
        )
        valid()

        os.makedirs(self._dest_dir, exist_ok=True)

        # fetch src
        sftp = Sftp(
            self._host,
            self._user,
            self._password,
            self._key,
            self._timeout,
            self._retry_count,
            self._port,
        )
        files = sftp.list_files(
            self._src_dir, self._dest_dir, re.compile(self._src_pattern)
        )

        if self._quit is True and len(files) == 0:
            self._logger.info("No file was found. After process will not be processed")
            return StepStatus.SUCCESSFUL_TERMINATION

        self._logger.info("Files downloaded %s" % files)

        # cache downloaded file names
        ObjectStore.put(self._step, files)
Ejemplo n.º 6
0
 def _save_to_cache(self):
     self._logger.info("Save data to on memory")
     df = pandas.read_gbq(
         query="SELECT * FROM %s.%s" % (self._dataset, self._tblname)
         if self._query is None else self._query,
         dialect="standard",
         location=self._location,
         project_id=self._project_id,
         credentials=ServiceAccount.auth(self._credentials),
     )
     ObjectStore.put(self._key, df)
Ejemplo n.º 7
0
    def execute(self, *args):
        super().execute()
        valid = EssentialParameters(self.__class__.__name__, [self._key])
        valid()

        df = pandas.read_gbq(
            query=self._get_query(),
            dialect="standard",
            location=self._location,
            project_id=self._project_id,
            credentials=self._auth(),
        )
        ObjectStore.put(self._key, df)
Ejemplo n.º 8
0
    def execute(self, *args):
        # essential parameters check
        valid = EssentialParameters(
            self.__class__.__name__,
            [self._host, self._user, self._src_dir, self._src_pattern],
        )
        valid()

        os.makedirs(self._dest_dir, exist_ok=True)

        if isinstance(self._key, str):
            self._logger.warning((
                "DeprecationWarning: "
                "In the near future, "
                "the `key` will be changed to accept only dictionary types. "
                "Please see more information "
                "https://github.com/BrainPad/cliboa/blob/master/docs/modules/sftp_download.md"
            ))
            key_filepath = self._key
        else:
            key_filepath = self._source_path_reader(self._key)

        # fetch src
        sftp = Sftp(
            self._host,
            self._user,
            self._password,
            key_filepath,
            self._passphrase,
            self._timeout,
            self._retry_count,
            self._port,
        )
        files = sftp.list_files(
            self._src_dir,
            self._dest_dir,
            re.compile(self._src_pattern),
            self._endfile_suffix,
            self._ignore_empty_file,
        )
        if self._quit is True and len(files) == 0:
            self._logger.info(
                "No file was found. After process will not be processed")
            return StepStatus.SUCCESSFUL_TERMINATION

        self._logger.info("Files downloaded %s" % files)

        # cache downloaded file names
        ObjectStore.put(self._step, files)
Ejemplo n.º 9
0
    def test_execute_with_key(self):
        try:
            os.makedirs(self._data_dir)
            dummy_pass = os.path.join(self._data_dir, "id_rsa")
            with open(dummy_pass, "w") as f:
                f.write("test")

            instance = SftpDownload()
            Helper.set_property(instance, "logger",
                                LisboaLog.get_logger(__name__))
            Helper.set_property(instance, "host", "dummy.host")
            Helper.set_property(instance, "user", "dummy_user")
            Helper.set_property(instance, "key", dummy_pass)
            Helper.set_property(instance, "src_dir", "/")
            Helper.set_property(instance, "src_pattern", ".*.txt")
            Helper.set_property(instance, "dest_dir", self._data_dir)
            Helper.set_property(instance, "step", "sftp_class")

            with ExitStack() as stack:
                mock_sftp = stack.enter_context(
                    patch("cliboa.util.sftp.Sftp.list_files"))
                mock_sftp.return_value = ["test.txt"]

                instance.execute()

                assert mock_sftp.called
                assert ObjectStore.get("sftp_class") == ["test.txt"]
        finally:
            shutil.rmtree(self._data_dir)
Ejemplo n.º 10
0
    def execute(self, *args):
        super().execute()

        valid = EssentialParameters(self.__class__.__name__,
                                    [self._src_pattern])
        valid()

        client = self._gcs_client()
        bucket = client.get_bucket(self._bucket)
        dl_files = []
        for blob in bucket.list_blobs(prefix=self._prefix,
                                      delimiter=self._delimiter):
            r = re.compile(self._src_pattern)
            if not r.fullmatch(blob.name):
                continue
            dl_files.append(blob.name)
            blob.download_to_filename(
                os.path.join(self._dest_dir, os.path.basename(blob.name)))

        ObjectStore.put(self._step, dl_files)
Ejemplo n.º 11
0
    def execute(self, *args):
        for k, v in self.__dict__.items():
            self._logger.info("%s : %s" % (k, v))
        super().execute()

        valid = EssentialParameters(self.__class__.__name__,
                                    [self._src_pattern])
        valid()

        c = storage.Client(self._project_id,
                           credentials=ServiceAccount.auth(self._credentials))
        bucket = c.get_bucket(self._bucket)
        dl_files = []
        for blob in bucket.list_blobs(prefix=self._prefix,
                                      delimiter=self._delimiter):
            r = re.compile(self._src_pattern)
            if not r.fullmatch(blob.name):
                continue
            dl_files.append(blob.name)
            blob.download_to_filename(
                os.path.join(self._dest_dir, os.path.basename(blob.name)))

        ObjectStore.put(self._step, dl_files)
Ejemplo n.º 12
0
    def execute(self, *args):
        super().execute()
        valid = EssentialParameters(self.__class__.__name__, [self._key])
        valid()

        if isinstance(self._credentials, str):
            self._logger.warning(
                (
                    "DeprecationWarning: "
                    "In the near future, "
                    "the `credentials` will be changed to accept only dictionary types. "
                )
            )
            key_filepath = self._credentials
        else:
            key_filepath = self._source_path_reader(self._credentials)
        df = pandas.read_gbq(
            query=self._get_query(),
            dialect="standard",
            location=self._location,
            project_id=self._project_id,
            credentials=ServiceAccount.auth(key_filepath),
        )
        ObjectStore.put(self._key, df)
Ejemplo n.º 13
0
    def execute(self, *args):
        dl_files = ObjectStore.get(self._symbol)

        if len(dl_files) > 0:
            self._logger.info("Delete files %s" % dl_files)
            client = self._gcs_client()
            bucket = client.get_bucket(super().get_step_argument("bucket"))
            for blob in bucket.list_blobs(
                    prefix=super().get_step_argument("prefix"),
                    delimiter=super().get_step_argument("delimiter"),
            ):
                for dl_f in dl_files:
                    if dl_f == blob.name:
                        blob.delete()
                        break
        else:
            self._logger.info("No files to delete.")
Ejemplo n.º 14
0
    def execute(self, *args):
        files = ObjectStore.get(self._symbol)

        if files is not None and len(files) > 0:
            self._logger.info("Delete files %s" % files)

            if isinstance(super().get_step_argument("key"), str):
                self._logger.warning((
                    "DeprecationWarning: "
                    "In the near future, "
                    "the `key` will be changed to accept only dictionary types. "
                    "Please see more information "
                    "https://github.com/BrainPad/cliboa/blob/master/docs/modules/sftp_download_file_delete.md"  # noqa
                ))
                key_filepath = super().get_step_argument("key")
            else:
                key_filepath = self._source_path_reader(
                    super().get_step_argument("key"))

            sftp = Sftp(
                super().get_step_argument("host"),
                super().get_step_argument("user"),
                super().get_step_argument("password"),
                key_filepath,
                super().get_step_argument("timeout"),
                super().get_step_argument("retry_count"),
                super().get_step_argument("port"),
            )

            endfile_suffix = super().get_step_argument("endfile_suffix")
            for file in files:
                sftp.remove_specific_file(super().get_step_argument("src_dir"),
                                          file)
                self._logger.info("%s is successfully deleted." % file)

                if endfile_suffix:
                    sftp.remove_specific_file(
                        super().get_step_argument("src_dir"),
                        file + endfile_suffix)
                    self._logger.info("%s is successfully deleted." %
                                      (file + endfile_suffix))
        else:
            self._logger.info("No files to delete.")
Ejemplo n.º 15
0
    def execute(self, *args):
        files = ObjectStore.get(self._symbol)

        if files is not None and len(files) > 0:
            self._logger.info("Delete files %s" % files)
            sftp = Sftp(
                super().get_step_argument("host"),
                super().get_step_argument("user"),
                super().get_step_argument("password"),
                super().get_step_argument("key"),
                super().get_step_argument("timeout"),
                super().get_step_argument("retry_count"),
                super().get_step_argument("port"),
            )
            for file in files:
                sftp.remove_specific_file(super().get_step_argument("src_dir"), file)
                self._logger.info("%s is successfully deleted." % file)
        else:
            self._logger.info("No files to delete.")
Ejemplo n.º 16
0
    def execute(self, *args):
        files = ObjectStore.get(self._symbol)

        if files is not None and len(files) > 0:
            self._logger.info("Delete files %s" % files)

            ftp_util = FtpUtil(
                super().get_step_argument("host"),
                super().get_step_argument("user"),
                super().get_step_argument("password"),
                super().get_step_argument("timeout"),
                super().get_step_argument("retry_count"),
                super().get_step_argument("port"),
                super().get_step_argument("tls"),
            )
            for file in files:
                ftp_util.remove_specific_file(
                    super().get_step_argument("src_dir"), file)
        else:
            self._logger.info("No files to delete.")
Ejemplo n.º 17
0
    def test_execute_with_files(self):
        instance = SftpDownload()
        Helper.set_property(instance, "logger", LisboaLog.get_logger(__name__))
        Helper.set_property(instance, "host", "dummy.host")
        Helper.set_property(instance, "user", "dummy_user")
        Helper.set_property(instance, "password", "dummy_pass")
        Helper.set_property(instance, "src_dir", "/")
        Helper.set_property(instance, "src_pattern", ".*.txt")
        Helper.set_property(instance, "dest_dir", self._data_dir)
        Helper.set_property(instance, "step", "sftp_class")

        with ExitStack() as stack:
            mock_sftp = stack.enter_context(
                patch("cliboa.util.sftp.Sftp.list_files"))
            mock_sftp.return_value = ["test.txt"]

            instance.execute()

            assert ObjectStore.get("sftp_class") == ["test.txt"]
            shutil.rmtree(self._data_dir)
Ejemplo n.º 18
0
    def execute(self, *args):
        for k, v in self.__dict__.items():
            self._logger.info("%s : %s" % (k, v))
        dl_files = ObjectStore.get(self._symbol)

        if len(dl_files) > 0:
            self._logger.info("Delete files %s" % dl_files)
            c = storage.Client(
                super().get_step_argument("project_id"),
                credentials=ServiceAccount.auth(
                    super().get_step_argument("credentials")),
            )
            bucket = c.get_bucket(super().get_step_argument("bucket"))
            for blob in bucket.list_blobs(
                    prefix=super().get_step_argument("prefix"),
                    delimiter=super().get_step_argument("delimiter"),
            ):
                for dl_f in dl_files:
                    if dl_f == blob.name:
                        blob.delete()
                        break
        else:
            self._logger.info("No files to delete.")