Ejemplo n.º 1
0
    def execute(self, *args):
        super().execute()

        valid = EssentialParameters(self.__class__.__name__,
                                    [self._src_pattern, self._dest_dir])
        valid()

        service = BlobServiceAdapter().get_client(
            account_url=self._account_url,
            account_access_key=self._account_access_key,
            connection_string=self._connection_string,
        )
        container_client = service.get_container_client(self._container_name)
        blobs = container_client.list_blobs(name_starts_with=self._prefix)
        for blob in blobs:
            filename = blob.name
            rec = re.compile(self._src_pattern)
            if not rec.fullmatch(filename):
                continue
            dest_path = os.path.join(self._dest_dir,
                                     os.path.basename(filename))
            blob_client = service.get_blob_client(
                container=self._container_name, blob=filename)

            with open(dest_path, "wb") as local_blob:
                blob_data = blob_client.download_blob()
                blob_data.readinto(local_blob)
Ejemplo n.º 2
0
    def execute(self, *args):
        super().execute()

        input_valid = IOInput(self._io)
        input_valid()

        for k, v in self.__dict__.items():
            self._logger.info("%s : %s" % (k, v))

        param_valid = EssentialParameters(self.__class__.__name__,
                                          [self._tblname])
        param_valid()

        tbl_valid = SqliteTableExistence(self._dbname, self._tblname)
        tbl_valid()

        def dict_factory(cursor, row):
            d = {}
            for i, col in enumerate(cursor.description):
                d[col[0]] = row[i]
            return d

        self._sqlite_adptr.connect(self._dbname)
        cur = self._sqlite_adptr.fetch(sql=self.__get_query(),
                                       row_factory=dict_factory)
        for r in cur:
            self._s.save(r)
Ejemplo n.º 3
0
Archivo: http.py Proyecto: 45deg/cliboa
 def execute(self, *args):
     if self.__form_auth is True:
         valid = EssentialParameters(
             self.__class__.__name__,
             [self.__form_url, self.__form_id, self.__form_password],
         )
         valid()
Ejemplo n.º 4
0
    def execute(self, *args):
        super().execute()

        valid = EssentialParameters(self.__class__.__name__, [self._src_pattern])
        valid()

        adapter = (
            S3Adapter(self._access_key, self._secret_key)
            if self._access_key and self._secret_key
            else S3Adapter()
        )
        client = adapter.get_client()

        p = client.get_paginator("list_objects")
        for page in p.paginate(
            Bucket=self._bucket, Delimiter=self._delimiter, Prefix=self._prefix
        ):
            for c in page.get("Contents", []):
                path = c.get("Key")
                filename = os.path.basename(path)
                rec = re.compile(self._src_pattern)
                if not rec.fullmatch(filename):
                    continue
                dest_path = os.path.join(self._dest_dir, filename)
                client.download_file(self._bucket, path, dest_path)
Ejemplo n.º 5
0
    def execute(self, *args):
        super().execute()

        valid = EssentialParameters(self.__class__.__name__, [self._src_pattern])
        valid()

        if isinstance(self._credentials, str):
            self._logger.warning(
                (
                    "DeprecationWarning: "
                    "In the near future, "
                    "the `credentials` will be changed to accept only dictionary types. "
                    "Please see more information "
                    "https://github.com/BrainPad/cliboa/blob/master/docs/modules/gcs_download.md"
                )
            )
            key_filepath = self._credentials
        else:
            key_filepath = self._source_path_reader(self._credentials)
        client = Gcs.get_gcs_client(key_filepath)
        bucket = client.bucket(self._bucket)
        dl_files = []
        for blob in client.list_blobs(
            bucket, prefix=self._prefix, delimiter=self._delimiter
        ):
            r = re.compile(self._src_pattern)
            if not r.fullmatch(blob.name):
                continue
            dl_files.append(blob.name)
            blob.download_to_filename(
                os.path.join(self._dest_dir, os.path.basename(blob.name))
            )

        ObjectStore.put(self._step, dl_files)
Ejemplo n.º 6
0
    def execute(self, *args):
        super().execute()

        valid = EssentialParameters(
            self.__class__.__name__, [self._src_dir, self._src_pattern]
        )
        valid()

        resource = self._s3_resource()
        bucket = resource.Bucket(self._bucket)
        files = super().get_target_files(self._src_dir, self._src_pattern)

        if len(files) > 0:
            for f in files:
                bucket.upload_file(
                    Key=os.path.join(self._key, os.path.basename(f)), Filename=f
                )
        else:
            self._logger.info(
                "Files to upload do not exist. File pattern: {}".format(
                    os.path.join(self._src_dir, self._src_pattern)
                )
            )
            if self._quit is True:
                return StepStatus.SUCCESSFUL_TERMINATION
Ejemplo n.º 7
0
    def execute(self, *args):
        super().execute()
        if not self._key and not self._bucket:
            raise InvalidParameter("Specifying either 'key' or 'bucket' is essential.")
        if self._key and self._bucket:
            raise InvalidParameter("Cannot specify both 'key' and 'bucket'.")

        # fetch records and save to on-memory
        if self._key:
            valid = EssentialParameters(self.__class__.__name__, [self._tblname])
            valid()
            self._save_to_cache()
        elif self._bucket:
            valid = EssentialParameters(self.__class__.__name__, [self._dest_dir])
            valid()
            self._save_as_file_via_gcs()
Ejemplo n.º 8
0
    def execute(self, *args):
        super().execute()

        valid = EssentialParameters(
            self.__class__.__name__, [self._tblname, self._bucket, self._dest_dir]
        )
        valid()

        os.makedirs(self._dest_dir, exist_ok=True)

        if isinstance(self._credentials, str):
            self._logger.warning(
                (
                    "DeprecationWarning: "
                    "In the near future, "
                    "the `credentials` will be changed to accept only dictionary types. "
                )
            )
            key_filepath = self._credentials
        else:
            key_filepath = self._source_path_reader(self._credentials)
        gbq_client = BigQuery.get_bigquery_client(key_filepath)
        gbq_ref = gbq_client.dataset(self._dataset).table(self._tblname)

        gcs_client = Gcs.get_gcs_client(key_filepath)
        gcs_bucket = gcs_client.bucket(self._bucket)

        ymd_hms = datetime.now().strftime("%Y%m%d%H%M%S%f")
        path = "%s-%s" % ("".join(random.choices(string.ascii_letters, k=8)), ymd_hms)
        prefix = "%s/%s/%s" % (self._dataset, self._tblname, path)

        """
        gsc dir -> gs://{bucket_name}
                       /{dataset_name}/{table_name}
                       /{XXXXXXXX}-{yyyyMMddHHmmssSSS}/*.csv.gz
        """
        if self._filename:
            dest_gcs = "gs://%s/%s/%s*.csv.gz" % (self._bucket, prefix, self._filename)
        else:
            dest_gcs = "gs://%s/%s/*.csv.gz" % (self._bucket, prefix)

        # job config settings
        job_config = bigquery.ExtractJobConfig()
        job_config.compression = bigquery.Compression.GZIP
        job_config.destination_format = bigquery.DestinationFormat.CSV

        # Execute query.
        job = gbq_client.extract_table(
            gbq_ref, dest_gcs, job_config=job_config, location=self._location
        )
        job.result()

        # Download from gcs
        for blob in gcs_client.list_blobs(gcs_bucket, prefix=prefix):
            dest = os.path.join(self._dest_dir, os.path.basename(blob.name))
            blob.download_to_filename(dest)

        # Cleanup temporary files
        for blob in gcs_client.list_blobs(gcs_bucket, prefix=prefix):
            blob.delete()
Ejemplo n.º 9
0
    def execute(self, *args):
        # essential parameters check
        valid = EssentialParameters(
            self.__class__.__name__,
            [self._host, self._user, self._src_dir, self._src_pattern],
        )
        valid()

        if isinstance(self._key, str):
            self._logger.warning((
                "DeprecationWarning: "
                "In the near future, "
                "the `key` will be changed to accept only dictionary types. "
                "Please see more information "
                "https://github.com/BrainPad/cliboa/blob/master/docs/modules/sftp_delete.md"
            ))
            key_filepath = self._key
        else:
            key_filepath = self._source_path_reader(self._key)

        # remove src
        sftp = Sftp(
            self._host,
            self._user,
            self._password,
            key_filepath,
            self._passphrase,
            self._timeout,
            self._retry_count,
            self._port,
        )
        sftp.clear_files(self._src_dir, re.compile(self._src_pattern))
Ejemplo n.º 10
0
    def execute(self, *args):
        for k, v in self.__dict__.items():
            self._logger.info("%s : %s" % (k, v))
        super().execute()

        valid = EssentialParameters(self.__class__.__name__,
                                    [self._src_dir, self._src_pattern])
        valid()

        session = None
        if self._access_key and self._secret_key:
            session = Session(self._access_key, self._secret_key, self._region)

        s3 = session.resource("s3") if session else boto3.resource("s3")
        bucket = s3.Bucket(self._bucket)
        files = super().get_target_files(self._src_dir, self._src_pattern)
        if len(files) == 0:
            raise FileNotFound(
                "Files matching to the specified pattern %s is not found." %
                os.path.join(self._src_dir, self._src_pattern))
        else:
            for f in files:
                bucket.upload_file(Key=os.path.join(self._key,
                                                    os.path.basename(f)),
                                   Filename=f)
Ejemplo n.º 11
0
    def execute(self, *args):
        # essential parameters check
        valid = EssentialParameters(
            self.__class__.__name__,
            [self._host, self._user, self._src_dir, self._src_pattern],
        )
        valid()

        os.makedirs(self._dest_dir, exist_ok=True)

        # fetch src
        sftp = Sftp(
            self._host,
            self._user,
            self._password,
            self._key,
            self._timeout,
            self._retry_count,
            self._port,
        )
        files = sftp.list_files(
            self._src_dir, self._dest_dir, re.compile(self._src_pattern)
        )

        if self._quit is True and len(files) == 0:
            self._logger.info("No file was found. After process will not be processed")
            return StepStatus.SUCCESSFUL_TERMINATION

        self._logger.info("Files downloaded %s" % files)

        # cache downloaded file names
        ObjectStore.put(self._step, files)
Ejemplo n.º 12
0
    def execute(self, *args):
        valid = EssentialParameters(
            self.__class__.__name__, [self._user, self._password]
        )
        valid()

        super().execute()
Ejemplo n.º 13
0
    def execute(self, *args):
        for k, v in self.__dict__.items():
            self._logger.info("%s : %s" % (k, v))

        # essential parameters check
        valid = EssentialParameters(
            self.__class__.__name__,
            [self._host, self._user, self._src_dir, self._src_pattern],
        )
        valid()

        os.makedirs(self._dest_dir, exist_ok=True)

        # fetch src
        sftp = Sftp(
            self._host,
            self._user,
            self._password,
            self._key,
            self._timeout,
            self._retry_count,
            self._port,
        )
        files = sftp.list_files(self._src_dir, self._dest_dir,
                                re.compile(self._src_pattern))

        if self.__quit is True and len(files) == 0:
            self._logger.info(
                "No file was found. After process will not be processed")
            return 0

        # cache downloaded file names
        ObjectStore.put(self._step, files)
Ejemplo n.º 14
0
    def execute(self, *args):
        super().execute()

        valid = EssentialParameters(
            self.__class__.__name__, [self._collection, self._document, self._dest_dir]
        )
        valid()

        if isinstance(self._credentials, str):
            self._logger.warning(
                (
                    "DeprecationWarning: "
                    "In the near future, "
                    "the `credentials` will be changed to accept only dictionary types. "
                    "Please see more information "
                    "https://github.com/BrainPad/cliboa/blob/master/docs/modules/firestore_document_download.md"  # noqa
                )
            )
            key_filepath = self._credentials
        else:
            key_filepath = self._source_path_reader(self._credentials)
        firestore_client = Firestore.get_firestore_client(key_filepath)
        ref = firestore_client.document(self._collection, self._document)
        doc = ref.get()

        with open(os.path.join(self._dest_dir, doc.id), mode="wt") as f:
            f.write(json.dumps(doc.to_dict()))
Ejemplo n.º 15
0
    def execute(self, *args):
        super().execute()

        valid = EssentialParameters(
            self.__class__.__name__,
            [self._src_dir, self._src_pattern, self._dest_dir])
        valid()

        service = BlobServiceAdapter().get_client(
            account_url=self._account_url,
            account_access_key=self._account_access_key,
            connection_string=self._connection_string,
        )
        files = super().get_target_files(self._src_dir, self._src_pattern)

        if len(files) > 0:
            for f in files:
                path = os.path.join(self._dest_dir, os.path.basename(f))
                blob_client = service.get_blob_client(
                    container=self._container_name, blob=path)
                with open(f, "rb") as data:
                    blob_client.upload_blob(data, overwrite=True)
        else:
            self._logger.info(
                "Files to upload do not exist. File pattern: {}".format(
                    os.path.join(self._src_dir, self._src_pattern)))
            if self._quit is True:
                return StepStatus.SUCCESSFUL_TERMINATION
Ejemplo n.º 16
0
 def test_essential_parameters_ng(self):
     """
     EssentialParameters invalid case
     """
     with pytest.raises(CliboaException) as excinfo:
         valid = EssentialParameters("DummyClass", [""])
         valid()
     assert "is not specified" in str(excinfo.value)
Ejemplo n.º 17
0
    def execute(self, *args):
        param_valid = EssentialParameters(self.__class__.__name__,
                                          [self._raw_query])
        param_valid()

        def func():
            self._sqlite_adptr.execute(self._raw_query)

        super().execute(func)
Ejemplo n.º 18
0
    def execute(self, *args):
        super().execute()
        valid = EssentialParameters(self.__class__.__name__, [self._key])
        valid()

        df = pandas.read_gbq(
            query=self._get_query(),
            dialect="standard",
            location=self._location,
            project_id=self._project_id,
            credentials=self._auth(),
        )
        ObjectStore.put(self._key, df)
Ejemplo n.º 19
0
    def execute(self, *args):
        super().execute()

        valid = EssentialParameters(
            self.__class__.__name__,
            [self._tblname, self._bucket, self._dest_dir])
        valid()

        os.makedirs(self._dest_dir, exist_ok=True)

        gbq_client = bigquery.Client.from_service_account_json(
            self._credentials)
        gbq_ref = gbq_client.dataset(self._dataset).table(self._tblname)

        gcs_client = storage.Client.from_service_account_json(
            self._credentials)
        gcs_bucket = gcs_client.get_bucket(self._bucket)

        ymd_hms = datetime.now().strftime("%Y%m%d%H%M%S%f")
        path = "%s-%s" % ("".join(random.choices(string.ascii_letters,
                                                 k=8)), ymd_hms)
        prefix = "%s/%s/%s" % (self._dataset, self._tblname, path)

        # gsc dir -> gs://{bucket_name}/{dataset_name}/{table_name}/{XXXXXXXX}-{yyyyMMddHHmmssSSS}/*.csv.gz
        if self._filename:
            dest_gcs = "gs://%s/%s/%s*.csv.gz" % (self._bucket, prefix,
                                                  self._filename)
        else:
            dest_gcs = "gs://%s/%s/*.csv.gz" % (self._bucket, prefix)

        # job config settings
        job_config = bigquery.ExtractJobConfig()
        job_config.compression = bigquery.Compression.GZIP
        job_config.desctination_format = bigquery.DestinationFormat.CSV

        # Execute query.
        job = gbq_client.extract_table(gbq_ref,
                                       dest_gcs,
                                       job_config=job_config,
                                       location=self._location)
        job.result()

        # Download from gcs
        for blob in gcs_bucket.list_blobs(prefix=prefix):
            dest = os.path.join(self._dest_dir, os.path.basename(blob.name))
            blob.download_to_filename(dest)

        # Cleanup temporary files
        for blob in gcs_bucket.list_blobs(prefix=prefix):
            blob.delete()
Ejemplo n.º 20
0
    def execute(self, *args):
        super().execute()

        valid = EssentialParameters(
            self.__class__.__name__,
            [self._collection, self._document, self._dest_dir])
        valid()

        firestore_client = self._firestore_client()
        ref = firestore_client.document(self._collection, self._document)
        doc = ref.get()

        with open(os.path.join(self._dest_dir, doc.id), mode="wt") as f:
            f.write(json.dumps(doc.to_dict()))
Ejemplo n.º 21
0
    def execute(self, *args):
        super().execute()

        param_valid = EssentialParameters(self.__class__.__name__,
                                          [self._tblname])
        param_valid()

        tbl_valid = SqliteTableExistence(self._dbname, self._tblname)
        tbl_valid()

        output_valid = IOOutput(self._io)
        output_valid()

        # get table column definition
        self._sqlite_adptr.connect(self._dbname)
        column_def = self.__get_column_def()

        if self._refresh is True:
            self.__refresh_table(column_def)

        # database transaction
        def insert():
            self._logger.info("Start to insert")
            insert_rows = []
            with open(self._s.cache_file, "r", encoding="utf-8") as f:
                for i, l_str in enumerate(f, 1):
                    l_dict = ast.literal_eval(l_str)
                    insert_rows.append(l_dict)

                    # Check only once
                    if i == 1:
                        self.__valid_column_def(column_def, l_dict)

                    # execute bulk insert
                    if i % self._insert_cnt == 0:
                        self._sqlite_adptr.execute_many_insert(
                            self._tblname, column_def, insert_rows,
                            self._replace_into)
                        insert_rows.clear()

                if len(insert_rows) > 0:
                    self._sqlite_adptr.execute_many_insert(
                        self._tblname, column_def, insert_rows,
                        self._replace_into)
                    insert_rows.clear()

            self._logger.info("Finish to insert")

        super().execute(insert)
        self._s.remove()
Ejemplo n.º 22
0
    def execute(self, *args):
        # essential parameters check
        valid = EssentialParameters(
            self.__class__.__name__,
            [self._host, self._user, self._src_dir, self._src_pattern],
        )
        valid()

        os.makedirs(self._dest_dir, exist_ok=True)

        if isinstance(self._key, str):
            self._logger.warning((
                "DeprecationWarning: "
                "In the near future, "
                "the `key` will be changed to accept only dictionary types. "
                "Please see more information "
                "https://github.com/BrainPad/cliboa/blob/master/docs/modules/sftp_download.md"
            ))
            key_filepath = self._key
        else:
            key_filepath = self._source_path_reader(self._key)

        # fetch src
        sftp = Sftp(
            self._host,
            self._user,
            self._password,
            key_filepath,
            self._passphrase,
            self._timeout,
            self._retry_count,
            self._port,
        )
        files = sftp.list_files(
            self._src_dir,
            self._dest_dir,
            re.compile(self._src_pattern),
            self._endfile_suffix,
            self._ignore_empty_file,
        )
        if self._quit is True and len(files) == 0:
            self._logger.info(
                "No file was found. After process will not be processed")
            return StepStatus.SUCCESSFUL_TERMINATION

        self._logger.info("Files downloaded %s" % files)

        # cache downloaded file names
        ObjectStore.put(self._step, files)
Ejemplo n.º 23
0
    def execute(self, *args):
        super().execute()

        valid = EssentialParameters(self.__class__.__name__, [self._dest_path])
        valid()

        self._sqlite_adptr.connect(self._dbname)
        try:
            self._sqlite_adptr.export_table(
                self._tblname,
                self._dest_path,
                encoding=self._encoding,
                order=self._order,
                no_duplicate=self._no_duplicate,
            )
        finally:
            self._close_database()
Ejemplo n.º 24
0
    def execute(self, *args):
        # essential parameters check
        valid = EssentialParameters(
            self.__class__.__name__,
            [self._host, self._user, self._src_dir, self._src_pattern],
        )
        valid()

        # remove src
        sftp = Sftp(
            self._host,
            self._user,
            self._password,
            self._key,
            self._timeout,
            self._retry_count,
            self._port,
        )
        sftp.clear_files(self._src_dir, re.compile(self._src_pattern))
Ejemplo n.º 25
0
    def execute(self, *args):
        super().execute()

        valid = EssentialParameters(self.__class__.__name__,
                                    [self._src_pattern])
        valid()

        client = self._s3_client()
        p = client.get_paginator("list_objects")
        for page in p.paginate(Bucket=self._bucket,
                               Delimiter=self._delimiter,
                               Prefix=self._prefix):
            for c in page.get("Contents", []):
                filename = c.get("Key")
                rec = re.compile(self._src_pattern)
                if not rec.fullmatch(filename):
                    continue
                dest_path = os.path.join(self._dest_dir,
                                         os.path.basename(filename))
                client.download_file(self._bucket, filename, dest_path)
Ejemplo n.º 26
0
    def execute(self, *args):
        super().execute()

        valid = EssentialParameters(self.__class__.__name__,
                                    [self._src_pattern])
        valid()

        client = self._gcs_client()
        bucket = client.get_bucket(self._bucket)
        dl_files = []
        for blob in bucket.list_blobs(prefix=self._prefix,
                                      delimiter=self._delimiter):
            r = re.compile(self._src_pattern)
            if not r.fullmatch(blob.name):
                continue
            dl_files.append(blob.name)
            blob.download_to_filename(
                os.path.join(self._dest_dir, os.path.basename(blob.name)))

        ObjectStore.put(self._step, dl_files)
Ejemplo n.º 27
0
    def execute(self, *args):
        for k, v in self.__dict__.items():
            self._logger.info("%s : %s" % (k, v))

        # essential parameters check
        valid = EssentialParameters(
            self.__class__.__name__,
            [self._host, self._user, self._src_dir, self._src_pattern],
        )
        valid()

        # remove src
        sftp = Sftp(
            self._host,
            self._user,
            self._password,
            self._key,
            self._timeout,
            self._retry_count,
            self._port,
        )
        sftp.clear_files(self._src_dir, re.compile(self._src_pattern))
Ejemplo n.º 28
0
    def execute(self, *args):
        for k, v in self.__dict__.items():
            self._logger.info("%s : %s" % (k, v))
        super().execute()

        valid = EssentialParameters(self.__class__.__name__,
                                    [self._src_pattern])
        valid()

        c = storage.Client(self._project_id,
                           credentials=ServiceAccount.auth(self._credentials))
        bucket = c.get_bucket(self._bucket)
        dl_files = []
        for blob in bucket.list_blobs(prefix=self._prefix,
                                      delimiter=self._delimiter):
            r = re.compile(self._src_pattern)
            if not r.fullmatch(blob.name):
                continue
            dl_files.append(blob.name)
            blob.download_to_filename(
                os.path.join(self._dest_dir, os.path.basename(blob.name)))

        ObjectStore.put(self._step, dl_files)
Ejemplo n.º 29
0
    def execute(self, *args):
        super().execute()
        valid = EssentialParameters(self.__class__.__name__, [self._key])
        valid()

        if isinstance(self._credentials, str):
            self._logger.warning(
                (
                    "DeprecationWarning: "
                    "In the near future, "
                    "the `credentials` will be changed to accept only dictionary types. "
                )
            )
            key_filepath = self._credentials
        else:
            key_filepath = self._source_path_reader(self._credentials)
        df = pandas.read_gbq(
            query=self._get_query(),
            dialect="standard",
            location=self._location,
            project_id=self._project_id,
            credentials=ServiceAccount.auth(key_filepath),
        )
        ObjectStore.put(self._key, df)
Ejemplo n.º 30
0
 def execute(self, *args):
     # essential parameters check
     param_valid = EssentialParameters(self.__class__.__name__,
                                       [self._dbname])
     param_valid()