def execute(self, *args): super().execute() param_valid = EssentialParameters(self.__class__.__name__, [self._table_schema]) param_valid() cache_list = [] inserts = False # initial if_exists if_exists = self.REPLACE if self._replace is True else self.APPEND with open(self._s.cache_file, "r", encoding="utf-8") as f: for i, l_str in enumerate(f): l_dict = ast.literal_eval(l_str) cache_list.append(l_dict) if len(cache_list) == self.BULK_LINE_CNT: df = pandas.DataFrame( self.__create_insert_data(cache_list)) if inserts is True: # if_exists after the first insert execution if_exists = self.APPEND dest_tbl = self._dataset + "." + self._tblname self._logger.info("Start insert %s rows to %s" % (len(cache_list), dest_tbl)) df.to_gbq( dest_tbl, project_id=self._project_id, if_exists=if_exists, table_schema=self._table_schema, location=self._location, credentials=ServiceAccount.auth(self._credentials), ) cache_list.clear() inserts = True if len(cache_list) > 0: df = pandas.DataFrame(self.__create_insert_data(cache_list)) if inserts is True: # if_exists after the first insert execution if_exists = self.APPEND dest_tbl = self._dataset + "." + self._tblname self._logger.info("Start insert %s rows to %s" % (len(cache_list), dest_tbl)) df.to_gbq( dest_tbl, project_id=self._project_id, if_exists=if_exists, table_schema=self._table_schema, location=self._location, credentials=ServiceAccount.auth(self._credentials), ) self._s.remove()
def __exec_insert(self, insert_rows, is_inserted, if_exists): """ Execute insert into a BigQuery table Args: insert_rows: rows to insert is_inserted: if the data is already inserted or not if_exists: replace or append """ df = pandas.DataFrame(self.__format_insert_data(insert_rows)) if is_inserted is True: # if_exists after the first insert execution if_exists = self.APPEND dest_tbl = self._dataset + "." + self._tblname self._logger.info("Start insert %s rows to %s" % (len(insert_rows), dest_tbl)) if isinstance(self._credentials, str): self._logger.warning(( "DeprecationWarning: " "In the near future, " "the `credentials` will be changed to accept only dictionary types. " )) key_filepath = self._credentials else: key_filepath = self._source_path_reader(self._credentials) df.to_gbq( dest_tbl, project_id=self._project_id, if_exists=if_exists, table_schema=self._table_schema, location=self._location, credentials=ServiceAccount.auth(key_filepath), )
def _save_to_cache(self): self._logger.info("Save data to on memory") if isinstance(self._credentials, str): self._logger.warning( ( "DeprecationWarning: " "In the near future, " "the `credentials` will be changed to accept only dictionary types. " "Please see more information " "https://github.com/BrainPad/cliboa/blob/master/docs/modules/bigquery_read.md" ) ) key_filepath = self._credentials else: key_filepath = self._source_path_reader(self._credentials) df = pandas.read_gbq( query="SELECT * FROM %s.%s" % (self._dataset, self._tblname) if self._query is None else self._query, dialect="standard", location=self._location, project_id=self._project_id, credentials=ServiceAccount.auth(key_filepath), ) ObjectStore.put(self._key, df)
def _save_to_cache(self): self._logger.info("Save data to on memory") df = pandas.read_gbq( query="SELECT * FROM %s.%s" % (self._dataset, self._tblname) if self._query is None else self._query, dialect="standard", location=self._location, project_id=self._project_id, credentials=ServiceAccount.auth(self._credentials), ) ObjectStore.put(self._key, df)
def execute(self, *args): super().execute() valid = EssentialParameters(self.__class__.__name__, [self._key]) valid() df = pandas.read_gbq( query=self._get_query(), dialect="standard", location=self._location, project_id=self._project_id, credentials=ServiceAccount.auth(self._credentials), ) ObjectStore.put(self._key, df)
def __exec_insert(self, insert_rows, is_inserted, if_exists): """ Execute insert into a BigQuery table Args: insert_rows: rows to insert is_inserted: if the data is already inserted or not if_exists: replace or append """ df = pandas.DataFrame(self.__format_insert_data(insert_rows)) if is_inserted is True: # if_exists after the first insert execution if_exists = self.APPEND dest_tbl = self._dataset + "." + self._tblname self._logger.info("Start insert %s rows to %s" % (len(insert_rows), dest_tbl)) df.to_gbq( dest_tbl, project_id=self._project_id, if_exists=if_exists, table_schema=self._table_schema, location=self._location, credentials=ServiceAccount.auth(self._credentials), )
def execute(self, *args): for k, v in self.__dict__.items(): self._logger.info("%s : %s" % (k, v)) dl_files = ObjectStore.get(self._symbol) if len(dl_files) > 0: self._logger.info("Delete files %s" % dl_files) c = storage.Client( super().get_step_argument("project_id"), credentials=ServiceAccount.auth( super().get_step_argument("credentials")), ) bucket = c.get_bucket(super().get_step_argument("bucket")) for blob in bucket.list_blobs( prefix=super().get_step_argument("prefix"), delimiter=super().get_step_argument("delimiter"), ): for dl_f in dl_files: if dl_f == blob.name: blob.delete() break else: self._logger.info("No files to delete.")
def execute(self, *args): for k, v in self.__dict__.items(): self._logger.info("%s : %s" % (k, v)) super().execute() valid = EssentialParameters(self.__class__.__name__, [self._src_pattern]) valid() c = storage.Client(self._project_id, credentials=ServiceAccount.auth(self._credentials)) bucket = c.get_bucket(self._bucket) dl_files = [] for blob in bucket.list_blobs(prefix=self._prefix, delimiter=self._delimiter): r = re.compile(self._src_pattern) if not r.fullmatch(blob.name): continue dl_files.append(blob.name) blob.download_to_filename( os.path.join(self._dest_dir, os.path.basename(blob.name))) ObjectStore.put(self._step, dl_files)
def execute(self, *args): super().execute() valid = EssentialParameters(self.__class__.__name__, [self._key]) valid() if isinstance(self._credentials, str): self._logger.warning( ( "DeprecationWarning: " "In the near future, " "the `credentials` will be changed to accept only dictionary types. " ) ) key_filepath = self._credentials else: key_filepath = self._source_path_reader(self._credentials) df = pandas.read_gbq( query=self._get_query(), dialect="standard", location=self._location, project_id=self._project_id, credentials=ServiceAccount.auth(key_filepath), ) ObjectStore.put(self._key, df)
def execute(self, *args): super().execute() param_valid = EssentialParameters(self.__class__.__name__, [self._table_schema]) param_valid() cache_list = [] inserts = False # initial if_exists if_exists = self.REPLACE if self._replace is True else self.APPEND if isinstance(self._credentials, str): self._logger.warning(( "DeprecationWarning: " "In the near future, " "the `credentials` will be changed to accept only dictionary types. " )) key_filepath = self._credentials else: key_filepath = self._source_path_reader(self._credentials) with open(self._s.cache_file, "r", encoding="utf-8") as f: for i, l_str in enumerate(f): l_dict = ast.literal_eval(l_str) cache_list.append(l_dict) if len(cache_list) == self.BULK_LINE_CNT: df = pandas.DataFrame( self.__create_insert_data(cache_list)) if inserts is True: # if_exists after the first insert execution if_exists = self.APPEND dest_tbl = self._dataset + "." + self._tblname self._logger.info("Start insert %s rows to %s" % (len(cache_list), dest_tbl)) df.to_gbq( dest_tbl, project_id=self._project_id, if_exists=if_exists, table_schema=self._table_schema, location=self._location, credentials=ServiceAccount.auth(key_filepath), ) cache_list.clear() inserts = True if len(cache_list) > 0: df = pandas.DataFrame(self.__create_insert_data(cache_list)) if inserts is True: # if_exists after the first insert execution if_exists = self.APPEND dest_tbl = self._dataset + "." + self._tblname self._logger.info("Start insert %s rows to %s" % (len(cache_list), dest_tbl)) df.to_gbq( dest_tbl, project_id=self._project_id, if_exists=if_exists, table_schema=self._table_schema, location=self._location, credentials=ServiceAccount.auth(key_filepath), ) self._s.remove()
def test_auth_no_credentials(self): assert ServiceAccount.auth(None) is None