def execute(self, *args): super().execute() valid = EssentialParameters(self.__class__.__name__, [self._src_pattern]) valid() if isinstance(self._credentials, str): self._logger.warning( ( "DeprecationWarning: " "In the near future, " "the `credentials` will be changed to accept only dictionary types. " "Please see more information " "https://github.com/BrainPad/cliboa/blob/master/docs/modules/gcs_download.md" ) ) key_filepath = self._credentials else: key_filepath = self._source_path_reader(self._credentials) client = Gcs.get_gcs_client(key_filepath) bucket = client.bucket(self._bucket) dl_files = [] for blob in client.list_blobs( bucket, prefix=self._prefix, delimiter=self._delimiter ): r = re.compile(self._src_pattern) if not r.fullmatch(blob.name): continue dl_files.append(blob.name) blob.download_to_filename( os.path.join(self._dest_dir, os.path.basename(blob.name)) ) ObjectStore.put(self._step, dl_files)
def _save_to_cache(self): self._logger.info("Save data to on memory") if isinstance(self._credentials, str): self._logger.warning( ( "DeprecationWarning: " "In the near future, " "the `credentials` will be changed to accept only dictionary types. " "Please see more information " "https://github.com/BrainPad/cliboa/blob/master/docs/modules/bigquery_read.md" ) ) key_filepath = self._credentials else: key_filepath = self._source_path_reader(self._credentials) df = pandas.read_gbq( query="SELECT * FROM %s.%s" % (self._dataset, self._tblname) if self._query is None else self._query, dialect="standard", location=self._location, project_id=self._project_id, credentials=ServiceAccount.auth(key_filepath), ) ObjectStore.put(self._key, df)
def execute(self, *args): input_valid = IOInput(self._io) input_valid() files = glob(self._src_path) if len(files) > 1: raise CliboaException("Input file must be only one.") if len(files) == 0: raise FileNotFound("The specified csv file not found.") with open(files[0], "r", encoding=self._encoding) as f: # save per one column if self._columns: reader = csv.DictReader(f, delimiter=",") for row in reader: # extract only the specified columns row_dict = {} for c in self._columns: if not row.get(c): continue row_dict[c] = row.get(c) self._s.save(row_dict) else: reader = csv.reader(f) header = next(reader, None) for row in reader: row_dict = dict(zip(header, row)) self._s.save(row_dict) # cache downloaded file names ObjectStore.put(self._step, files)
def execute(self, *args): for k, v in self.__dict__.items(): self._logger.info("%s : %s" % (k, v)) # essential parameters check valid = EssentialParameters( self.__class__.__name__, [self._host, self._user, self._src_dir, self._src_pattern], ) valid() os.makedirs(self._dest_dir, exist_ok=True) # fetch src sftp = Sftp( self._host, self._user, self._password, self._key, self._timeout, self._retry_count, self._port, ) files = sftp.list_files(self._src_dir, self._dest_dir, re.compile(self._src_pattern)) if self.__quit is True and len(files) == 0: self._logger.info( "No file was found. After process will not be processed") return 0 # cache downloaded file names ObjectStore.put(self._step, files)
def execute(self, *args): # essential parameters check valid = EssentialParameters( self.__class__.__name__, [self._host, self._user, self._src_dir, self._src_pattern], ) valid() os.makedirs(self._dest_dir, exist_ok=True) # fetch src sftp = Sftp( self._host, self._user, self._password, self._key, self._timeout, self._retry_count, self._port, ) files = sftp.list_files( self._src_dir, self._dest_dir, re.compile(self._src_pattern) ) if self._quit is True and len(files) == 0: self._logger.info("No file was found. After process will not be processed") return StepStatus.SUCCESSFUL_TERMINATION self._logger.info("Files downloaded %s" % files) # cache downloaded file names ObjectStore.put(self._step, files)
def _save_to_cache(self): self._logger.info("Save data to on memory") df = pandas.read_gbq( query="SELECT * FROM %s.%s" % (self._dataset, self._tblname) if self._query is None else self._query, dialect="standard", location=self._location, project_id=self._project_id, credentials=ServiceAccount.auth(self._credentials), ) ObjectStore.put(self._key, df)
def execute(self, *args): super().execute() valid = EssentialParameters(self.__class__.__name__, [self._key]) valid() df = pandas.read_gbq( query=self._get_query(), dialect="standard", location=self._location, project_id=self._project_id, credentials=self._auth(), ) ObjectStore.put(self._key, df)
def execute(self, *args): # essential parameters check valid = EssentialParameters( self.__class__.__name__, [self._host, self._user, self._src_dir, self._src_pattern], ) valid() os.makedirs(self._dest_dir, exist_ok=True) if isinstance(self._key, str): self._logger.warning(( "DeprecationWarning: " "In the near future, " "the `key` will be changed to accept only dictionary types. " "Please see more information " "https://github.com/BrainPad/cliboa/blob/master/docs/modules/sftp_download.md" )) key_filepath = self._key else: key_filepath = self._source_path_reader(self._key) # fetch src sftp = Sftp( self._host, self._user, self._password, key_filepath, self._passphrase, self._timeout, self._retry_count, self._port, ) files = sftp.list_files( self._src_dir, self._dest_dir, re.compile(self._src_pattern), self._endfile_suffix, self._ignore_empty_file, ) if self._quit is True and len(files) == 0: self._logger.info( "No file was found. After process will not be processed") return StepStatus.SUCCESSFUL_TERMINATION self._logger.info("Files downloaded %s" % files) # cache downloaded file names ObjectStore.put(self._step, files)
def test_execute_with_key(self): try: os.makedirs(self._data_dir) dummy_pass = os.path.join(self._data_dir, "id_rsa") with open(dummy_pass, "w") as f: f.write("test") instance = SftpDownload() Helper.set_property(instance, "logger", LisboaLog.get_logger(__name__)) Helper.set_property(instance, "host", "dummy.host") Helper.set_property(instance, "user", "dummy_user") Helper.set_property(instance, "key", dummy_pass) Helper.set_property(instance, "src_dir", "/") Helper.set_property(instance, "src_pattern", ".*.txt") Helper.set_property(instance, "dest_dir", self._data_dir) Helper.set_property(instance, "step", "sftp_class") with ExitStack() as stack: mock_sftp = stack.enter_context( patch("cliboa.util.sftp.Sftp.list_files")) mock_sftp.return_value = ["test.txt"] instance.execute() assert mock_sftp.called assert ObjectStore.get("sftp_class") == ["test.txt"] finally: shutil.rmtree(self._data_dir)
def execute(self, *args): super().execute() valid = EssentialParameters(self.__class__.__name__, [self._src_pattern]) valid() client = self._gcs_client() bucket = client.get_bucket(self._bucket) dl_files = [] for blob in bucket.list_blobs(prefix=self._prefix, delimiter=self._delimiter): r = re.compile(self._src_pattern) if not r.fullmatch(blob.name): continue dl_files.append(blob.name) blob.download_to_filename( os.path.join(self._dest_dir, os.path.basename(blob.name))) ObjectStore.put(self._step, dl_files)
def execute(self, *args): for k, v in self.__dict__.items(): self._logger.info("%s : %s" % (k, v)) super().execute() valid = EssentialParameters(self.__class__.__name__, [self._src_pattern]) valid() c = storage.Client(self._project_id, credentials=ServiceAccount.auth(self._credentials)) bucket = c.get_bucket(self._bucket) dl_files = [] for blob in bucket.list_blobs(prefix=self._prefix, delimiter=self._delimiter): r = re.compile(self._src_pattern) if not r.fullmatch(blob.name): continue dl_files.append(blob.name) blob.download_to_filename( os.path.join(self._dest_dir, os.path.basename(blob.name))) ObjectStore.put(self._step, dl_files)
def execute(self, *args): super().execute() valid = EssentialParameters(self.__class__.__name__, [self._key]) valid() if isinstance(self._credentials, str): self._logger.warning( ( "DeprecationWarning: " "In the near future, " "the `credentials` will be changed to accept only dictionary types. " ) ) key_filepath = self._credentials else: key_filepath = self._source_path_reader(self._credentials) df = pandas.read_gbq( query=self._get_query(), dialect="standard", location=self._location, project_id=self._project_id, credentials=ServiceAccount.auth(key_filepath), ) ObjectStore.put(self._key, df)
def execute(self, *args): dl_files = ObjectStore.get(self._symbol) if len(dl_files) > 0: self._logger.info("Delete files %s" % dl_files) client = self._gcs_client() bucket = client.get_bucket(super().get_step_argument("bucket")) for blob in bucket.list_blobs( prefix=super().get_step_argument("prefix"), delimiter=super().get_step_argument("delimiter"), ): for dl_f in dl_files: if dl_f == blob.name: blob.delete() break else: self._logger.info("No files to delete.")
def execute(self, *args): files = ObjectStore.get(self._symbol) if files is not None and len(files) > 0: self._logger.info("Delete files %s" % files) if isinstance(super().get_step_argument("key"), str): self._logger.warning(( "DeprecationWarning: " "In the near future, " "the `key` will be changed to accept only dictionary types. " "Please see more information " "https://github.com/BrainPad/cliboa/blob/master/docs/modules/sftp_download_file_delete.md" # noqa )) key_filepath = super().get_step_argument("key") else: key_filepath = self._source_path_reader( super().get_step_argument("key")) sftp = Sftp( super().get_step_argument("host"), super().get_step_argument("user"), super().get_step_argument("password"), key_filepath, super().get_step_argument("timeout"), super().get_step_argument("retry_count"), super().get_step_argument("port"), ) endfile_suffix = super().get_step_argument("endfile_suffix") for file in files: sftp.remove_specific_file(super().get_step_argument("src_dir"), file) self._logger.info("%s is successfully deleted." % file) if endfile_suffix: sftp.remove_specific_file( super().get_step_argument("src_dir"), file + endfile_suffix) self._logger.info("%s is successfully deleted." % (file + endfile_suffix)) else: self._logger.info("No files to delete.")
def execute(self, *args): files = ObjectStore.get(self._symbol) if files is not None and len(files) > 0: self._logger.info("Delete files %s" % files) sftp = Sftp( super().get_step_argument("host"), super().get_step_argument("user"), super().get_step_argument("password"), super().get_step_argument("key"), super().get_step_argument("timeout"), super().get_step_argument("retry_count"), super().get_step_argument("port"), ) for file in files: sftp.remove_specific_file(super().get_step_argument("src_dir"), file) self._logger.info("%s is successfully deleted." % file) else: self._logger.info("No files to delete.")
def execute(self, *args): files = ObjectStore.get(self._symbol) if files is not None and len(files) > 0: self._logger.info("Delete files %s" % files) ftp_util = FtpUtil( super().get_step_argument("host"), super().get_step_argument("user"), super().get_step_argument("password"), super().get_step_argument("timeout"), super().get_step_argument("retry_count"), super().get_step_argument("port"), super().get_step_argument("tls"), ) for file in files: ftp_util.remove_specific_file( super().get_step_argument("src_dir"), file) else: self._logger.info("No files to delete.")
def test_execute_with_files(self): instance = SftpDownload() Helper.set_property(instance, "logger", LisboaLog.get_logger(__name__)) Helper.set_property(instance, "host", "dummy.host") Helper.set_property(instance, "user", "dummy_user") Helper.set_property(instance, "password", "dummy_pass") Helper.set_property(instance, "src_dir", "/") Helper.set_property(instance, "src_pattern", ".*.txt") Helper.set_property(instance, "dest_dir", self._data_dir) Helper.set_property(instance, "step", "sftp_class") with ExitStack() as stack: mock_sftp = stack.enter_context( patch("cliboa.util.sftp.Sftp.list_files")) mock_sftp.return_value = ["test.txt"] instance.execute() assert ObjectStore.get("sftp_class") == ["test.txt"] shutil.rmtree(self._data_dir)
def execute(self, *args): for k, v in self.__dict__.items(): self._logger.info("%s : %s" % (k, v)) dl_files = ObjectStore.get(self._symbol) if len(dl_files) > 0: self._logger.info("Delete files %s" % dl_files) c = storage.Client( super().get_step_argument("project_id"), credentials=ServiceAccount.auth( super().get_step_argument("credentials")), ) bucket = c.get_bucket(super().get_step_argument("bucket")) for blob in bucket.list_blobs( prefix=super().get_step_argument("prefix"), delimiter=super().get_step_argument("delimiter"), ): for dl_f in dl_files: if dl_f == blob.name: blob.delete() break else: self._logger.info("No files to delete.")