def _write_table(self): import elasticsearch as es if not isinstance(self.stream, es.Elasticsearch): raise ValueError("stream must be an elasticsearch.Elasticsearch instance") self._verify_value_matrix() self._preprocess() mappings = self._get_mappings() try: result = self.stream.indices.create(index=self.index_name, body=mappings) self._logger.logger.debug(result) except es.TransportError as e: if e.error == "index_already_exists_exception": # ignore already existing index self._logger.logger.debug(msgfy.to_error_message(e)) else: raise for body in self._get_body(): try: self.stream.index(index=self.index_name, body=body, doc_type=self.document_type) except es.exceptions.RequestError as e: self._logger.logger.error("{}, body={}".format(msgfy.to_error_message(e), body))
def create_url_loader(logger, source_url, format_name, encoding, proxies): try: return ptr.TableUrlLoader(source_url, format_name, encoding=encoding, proxies=proxies) except (ptr.HTTPError, ptr.UrlError) as e: logger.error(msgfy.to_error_message(e)) sys.exit(ExitCode.FAILED_HTTP) except ptr.ProxyError as e: logger.error(msgfy.to_error_message(e)) sys.exit(errno.ECONNABORTED)
def convert(self, credentials, title: str) -> None: logger = self._logger result_counter = self._result_counter source_id = self._fetch_next_source_id() loader = ptr.GoogleSheetsTableLoader() loader.source = credentials loader.title = title # if typepy.is_null_string(loader.source): # loader.source = app_config_mgr.load().get( # ConfigKey.GS_CREDENTIALS_FILE_PATH) try: for table_data in loader.load(): logger.debug("loaded table_data: {}".format(str(table_data))) sqlite_tabledata = self.normalize_table(table_data) source_info = SourceInfo( base_name=title, dst_table=sqlite_tabledata.table_name, format_name="google sheets", source_id=source_id, ) try: self._table_creator.create( sqlite_tabledata, self._index_list, source_info=source_info ) SourceInfo.insert(source_info) except (ptr.ValidationError, ptr.DataError): result_counter.inc_fail() except ptr.OpenError as e: logger.error(msgfy.to_error_message(e)) result_counter.inc_fail() except (ptr.ValidationError, ptr.DataError) as e: logger.error( "invalid credentials data: path={}, message={}".format(credentials, str(e)) ) result_counter.inc_fail() except ptr.APIError as e: logger.error(msgfy.to_error_message(e)) result_counter.inc_fail() except ImportError as e: logger.error( "{}: try to install dependencies with 'pip install sqlitebiter[gs]'".format( msgfy.to_error_message(e) ) ) except OverflowError as e: logger.error("{}: {}".format(title, e)) result_counter.inc_fail()
def star_repository(github_client, starred_info_set, cache_mgr_map, options): github_user = github_client.get_user() starred_count = 0 for starred_info in sorted(starred_info_set): if starred_info.star_status == StarStatus.STARRED: logger.info("skip already starred: {}".format( starred_info.github_repo_id)) continue if starred_info.is_owned and not options.include_owner_repo: logger.info("skip owned repository: {}".format( starred_info.github_repo_id)) continue if starred_info.star_status == StarStatus.NOT_FOUND: logger.info("skip GitHub repository not found: {}".format( starred_info.pypi_pkg_name)) continue if starred_info.star_status == StarStatus.NOT_AVAILABLE: logger.info("skip repository that could not get info: {}".format( starred_info.pypi_pkg_name)) continue logger.info("star to {}".format(starred_info.github_repo_id)) if options.dry_run: continue try: repo_obj = github_client.get_repo(starred_info.github_repo_id) except UnknownObjectException as e: logger.error(msgfy.to_error_message(e)) continue try: github_user.add_to_starred(repo_obj) starred_count += 1 cache_mgr_map[CacheType.PYPI].remove_pkg_cache( starred_info.pypi_pkg_name, "starred_info") except UnknownObjectException as e: logger.error( dedent("""\ failed to star a repository. the personal access token may not has public_repo scope. msg: {} """.format(msgfy.to_error_message(e)))) continue if starred_count: cache_mgr_map[CacheType.GITHUB].remove_misc_cache( github_user.login, "starred")
def create_url_loader(logger, source_url, format_name, encoding, type_hint_rules, proxies): try: return ptr.TableUrlLoader( source_url, format_name, encoding=encoding, type_hint_rules=type_hint_rules, proxies=proxies, ) except (ptr.HTTPError, ptr.UrlError) as e: logger.error(msgfy.to_error_message(e)) sys.exit(ExitCode.FAILED_HTTP) except ptr.ProxyError as e: logger.error(msgfy.to_error_message(e)) sys.exit(errno.ECONNABORTED)
def _preprocess_table_dp(self): if self._is_complete_table_dp_preprocess: return self._logger.logger.debug("_preprocess_table_dp") if typepy.is_empty_sequence(self.header_list) and self._use_default_header: self.header_list = [ convert_idx_to_alphabet(col_idx) for col_idx in range(len(self.__value_matrix_org[0])) ] try: self._table_value_dp_matrix = self._dp_extractor.to_dp_matrix( to_value_matrix(self.header_list, self.__value_matrix_org) ) except TypeError as e: self._logger.logger.debug(msgfy.to_error_message(e)) self._table_value_dp_matrix = [] self._column_dp_list = self._dp_extractor.to_column_dp_list( self._table_value_dp_matrix, self._column_dp_list ) self._is_complete_table_dp_preprocess = True
def load_ipynb_text(text: str): try: return nbformat.reads(text, as_version=4) except AttributeError as e: raise nbformat.reader.NotJSONError(msgfy.to_error_message(e)) except OSError as e: _schema_not_found_error_handler(e) raise
def load_ipynb_file(file_path: str, encoding: str): with open(file_path, encoding=encoding) as f: try: return nbformat.read(f, as_version=4) except AttributeError as e: raise nbformat.reader.NotJSONError(msgfy.to_error_message(e)) except OSError as e: _schema_not_found_error_handler(e) raise
def load_ipynb_file(file_path, encoding): with io.open(file_path, encoding=encoding) as f: try: return nbformat.read(f, as_version=4) except AttributeError as e: raise nbformat.reader.NotJSONError(msgfy.to_error_message(e)) except IOError as e: _schema_not_found_error_handler(e) raise
def load_json(self, cache_file_path): with cache_file_path.open() as f: try: return json.load(f) except json.JSONDecodeError as e: logger.error("failed to load cache file '{}': {}".format( cache_file_path, msgfy.to_error_message(e))) return None
def gs(ctx, credentials, title, output_path): """ Convert a spreadsheet in Google Sheets to a SQLite database file. CREDENTIALS: OAuth2 Google credentials file. TITLE: Title of the Google Sheets to convert. """ con = create_database(ctx, output_path) verbosity_level = ctx.obj.get(Context.VERBOSITY_LEVEL) schema_extractor = get_schema_extractor(con, verbosity_level) result_counter = ResultCounter() logger = make_logger("{:s} gs".format(PROGRAM_NAME), ctx.obj[Context.LOG_LEVEL]) table_creator = TableCreator(logger=logger, dst_con=con) loader = ptr.GoogleSheetsTableLoader() loader.source = credentials loader.title = title # if typepy.is_null_string(loader.source): # loader.source = app_config_manager.load().get( # ConfigKey.GS_CREDENTIALS_FILE_PATH) try: for table_data in loader.load(): logger.debug(u"loaded table_data: {}".format( six.text_type(table_data))) sqlite_tabledata = SQLiteTableDataSanitizer(table_data).normalize() try: table_creator.create(sqlite_tabledata, ctx.obj.get(Context.INDEX_LIST)) except (ptr.ValidationError, ptr.DataError): result_counter.inc_fail() logger.info( get_success_message( verbosity_level, "google sheets", schema_extractor.get_table_schema_text( sqlite_tabledata.table_name))) except ptr.OpenError as e: logger.error(msgfy.to_error_message(e)) result_counter.inc_fail() except AttributeError: logger.error(u"invalid credentials data: path={}".format(credentials)) result_counter.inc_fail() except (ptr.ValidationError, ptr.DataError) as e: logger.error(u"invalid credentials data: path={}, message={}".format( credentials, str(e))) result_counter.inc_fail() write_completion_message(logger, output_path, result_counter) sys.exit(result_counter.get_return_code())
def __search_github_repo(self, repo_id, search_value, category_name): cache_filepath = self.__github_cache_mgr.get_misc_cache_filepath( "/".join([repo_id, category_name]), sanitize_filename(search_value)) msg_template = "source {result} include {category}: repo={repo} path={path}" if self.__github_cache_mgr.is_cache_available(cache_filepath): with cache_filepath.open() as f: try: if int(f.read()): logger.debug( msg_template.format( result="found", category=category_name, repo=repo_id, path=cache_filepath, )) return True else: logger.debug( msg_template.format( result="not found", category=category_name, repo=repo_id, path=cache_filepath, )) return False except ValueError as e: logger.warn(msgfy.to_error_message(e)) query = "{} in:file language:python repo:{}".format( search_value, repo_id) logger.debug("search {}: {}".format(category_name, query)) results = self.__github_client.search_code(query) search_regexp = re.compile(search_value, re.MULTILINE) with cache_filepath.open(mode="w") as f: for content_file in results.get_page(0): decoded_content = MultiByteStrDecoder( content_file.decoded_content).unicode_str if not search_regexp.search(decoded_content): continue logger.debug( msg_template.format(result="found", category=category_name, repo=repo_id, path=content_file.path)) f.write("1") return True f.write("0") return False
def __init__(self, options): self._options = options self._dclient = None if self._options.use_docker: try: self._dclient = DockerClient(options.tc_command_output) except DockerException as e: logger.error(msgfy.to_error_message(e)) sys.exit(1)
def close(self): if self.workbook is None: return try: self.workbook.save(self._file_path) except IndexError as e: logger.debug(msgfy.to_error_message(e)) self._clear()
def convert(self, credentials, title): logger = self._logger result_counter = self._result_counter source_id = self._fetch_next_source_id() loader = ptr.GoogleSheetsTableLoader() loader.source = credentials loader.title = title # if typepy.is_null_string(loader.source): # loader.source = app_config_mgr.load().get( # ConfigKey.GS_CREDENTIALS_FILE_PATH) try: for table_data in loader.load(): logger.debug("loaded table_data: {}".format(six.text_type(table_data))) sqlite_tabledata = self.normalize_table(table_data) source_info = SourceInfo( base_name=title, dst_table=sqlite_tabledata.table_name, format_name="google sheets", source_id=source_id, ) try: self._table_creator.create( sqlite_tabledata, self._index_list, source_info=source_info ) SourceInfo.insert(source_info) except (ptr.ValidationError, ptr.DataError): result_counter.inc_fail() except ptr.OpenError as e: logger.error(msgfy.to_error_message(e)) result_counter.inc_fail() except (ptr.ValidationError, ptr.DataError) as e: logger.error( "invalid credentials data: path={}, message={}".format(credentials, str(e)) ) result_counter.inc_fail() except ptr.APIError as e: logger.error(msgfy.to_error_message(e)) result_counter.inc_fail()
def normalize_tc_value(tc_obj): import ipaddress try: tc_obj.sanitize() except ipaddress.AddressValueError as e: logger.error(IPV6_OPTION_ERROR_MSG_FORMAT.format(e)) sys.exit(errno.EINVAL) except ValueError as e: logger.error(msgfy.to_error_message(e)) sys.exit(errno.EINVAL)
def verify_container(self, container, exit_on_exception=False): if not is_execute_tc_command(self.__tc_command_output): return try: self.__verify_container(container) except ContainerNotFoundError as e: if exit_on_exception: logger.error(msgfy.to_error_message(e)) sys.exit(errno.EPERM) raise
def create_url_loader( logger, source_url: str, format_name: str, encoding: str, type_hint_rules: Optional[TypeHintRules], proxies: Optional[Dict], ) -> AbstractTableReader: try: return ptr.TableUrlLoader( source_url, format_name, encoding=encoding, type_hint_rules=type_hint_rules, proxies=proxies, ) except (ptr.HTTPError, ptr.UrlError) as e: logger.error(msgfy.to_error_message(e)) sys.exit(ExitCode.FAILED_HTTP) except ptr.ProxyError as e: logger.error(msgfy.to_error_message(e)) sys.exit(errno.ECONNABORTED)
def __check_tc(self, tc): try: tc.validate() except (NetworkInterfaceNotFoundError, ContainerNotFoundError) as e: logger.error(e) return errno.EINVAL except ipaddress.AddressValueError as e: logger.error(IPV6_OPTION_ERROR_MSG_FORMAT.format(e)) return errno.EINVAL except ParameterError as e: logger.error(msgfy.to_error_message(e)) return errno.EINVAL return 0
def _preprocess_table_dp(self) -> None: if self._is_complete_table_dp_preprocess: return self._logger.logger.debug("_preprocess_table_dp") try: self._table_value_dp_matrix = self._dp_extractor.to_dp_matrix( to_value_matrix(self.headers, self.value_matrix)) except TypeError as e: self._logger.logger.debug(msgfy.to_error_message(e)) self._table_value_dp_matrix = [] self._is_complete_table_dp_preprocess = True
def __create_loader(self, url): logger = self._logger proxies = self.__get_proxies() try: return create_url_loader(logger, url, self._format_name, self._encoding, proxies) except ptr.LoaderNotFoundError as e: logger.debug(e) try: return create_url_loader(logger, url, "html", self._encoding, proxies) except ptr.LoaderNotFoundError as e: logger.error(msgfy.to_error_message(e)) sys.exit(ExitCode.FAILED_LOADER_NOT_FOUND)
def extract_starred_info(self, pypi_pkg_name): cache_filepath = self.__pypi_cache_mgr.get_pkg_cache_filepath( pypi_pkg_name, "starred_info") if self.__github_cache_mgr.is_cache_available(cache_filepath): cache_data = self.__github_cache_mgr.load_json(cache_filepath) if cache_data: try: info = GitHubStarredInfo(**cache_data) info.validate() return info except (TypeError, ValueError) as e: logger.debug("failed to load cache: {}".format( msgfy.to_debug_message(e))) pip_show = PipShow.execute(pypi_pkg_name) github_repo_info = self.__find_github_repo_info_from_text( pip_show.content) if github_repo_info: return self.__register_starred_status(pypi_pkg_name, github_repo_info, depth=0) try: starred_info = self.__traverse_github_repo(pip_show, pypi_pkg_name, depth=0) if starred_info: return starred_info return GitHubStarredInfo( pypi_pkg_name=pypi_pkg_name, github_repo_id="[Repository not found]", star_status=StarStatus.NOT_FOUND, is_owned=None, url=None, ) except RateLimitExceededException as e: logger.error(msgfy.to_error_message(e)) return GitHubStarredInfo( pypi_pkg_name=pypi_pkg_name, github_repo_id="Exceed API rate limit", star_status=StarStatus.NOT_AVAILABLE, is_owned=None, url=None, )
def set_tc_from_file(logger, config_file_path, is_overwrite): return_code = 0 loader = TcConfigLoader(logger) loader.is_overwrite = is_overwrite try: loader.load_tcconfig(config_file_path) except IOError as e: logger.error(msgfy.to_error_message(e)) return errno.EIO for tcconfig_command in loader.get_tcconfig_command_list(): return_code |= spr.SubprocessRunner(tcconfig_command).run() return return_code
def __create_loader(self, url): logger = self._logger type_hint_rules = self.__extract_type_hint_rules(url) proxies = self.__get_proxies() try: return create_url_loader( logger, url, self._format_name, self._encoding, type_hint_rules, proxies ) except ptr.LoaderNotFoundError as e: logger.debug(e) try: return create_url_loader(logger, url, "html", self._encoding, type_hint_rules, proxies) except ptr.LoaderNotFoundError as e: logger.error(msgfy.to_error_message(e)) sys.exit(ExitCode.FAILED_LOADER_NOT_FOUND)
def create_text_loader( logger, text: str, format_name: str, encoding: str, type_hint_rules: Optional[TypeHintRules], ) -> AbstractTableReader: try: return ptr.TableTextLoader( text, format_name, encoding=encoding, type_hint_rules=type_hint_rules, ) except (ptr.LoaderNotFoundError) as e: logger.error(msgfy.to_error_message(e)) sys.exit(ExitCode.FAILED_LOADER_NOT_FOUND)
def __create_loader(self, url): logger = self._logger type_hint_rules = TYPE_HINT_FROM_HEADER_RULES if self._is_type_hint_header else None proxies = self.__get_proxies() try: return create_url_loader(logger, url, self._format_name, self._encoding, type_hint_rules, proxies) except ptr.LoaderNotFoundError as e: logger.debug(e) try: return create_url_loader(logger, url, "html", self._encoding, type_hint_rules, proxies) except ptr.LoaderNotFoundError as e: logger.error(msgfy.to_error_message(e)) sys.exit(ExitCode.FAILED_LOADER_NOT_FOUND)
def __create_loader(self, url: str) -> AbstractTableReader: logger = self._logger type_hint_rules = self.__extract_type_hint_rules(url) proxies = self.__get_proxies() try: return create_url_loader(logger, url, self._format_name, self._encoding, type_hint_rules, proxies) except ptr.LoaderNotFoundError as e: logger.debug(e) try: return create_url_loader(logger, url, "html", self._encoding, type_hint_rules, proxies) except ptr.LoaderNotFoundError as e: logger.error(msgfy.to_error_message(e)) sys.exit(ExitCode.FAILED_LOADER_NOT_FOUND)
def extract_tc_params(options): dclient = None if options.use_docker: try: dclient = DockerClient(options.tc_command_output) except DockerException as e: logger.error(msgfy.to_error_message(e)) sys.exit(1) tc_params = {} for device in options.device: try: if options.use_docker and dclient.exist_container( container=device): container = device dclient.verify_container(container) dclient.create_veth_table(container) container_info = dclient.extract_container_info(container) for veth in dclient.fetch_veth_list(container_info.name): tc_params.update( TcShapingRuleParser(veth, options.ip_version, options.tc_command_output, logger).get_tc_parameter()) key = "{veth} (container_id={id}, image={image})".format( veth=veth, id=container_info.id[:12], image=container_info.image) tc_params[key] = tc_params.pop(veth) else: verify_network_interface(device, options.tc_command_output) tc_params.update( TcShapingRuleParser(device, options.ip_version, options.tc_command_output, logger).get_tc_parameter()) except TargetNotFoundError as e: logger.warn(e) continue return tc_params
def set_tc_from_file(logger, config_file_path: str, is_overwrite: bool, tc_command_output: Optional[str]) -> int: return_code = 0 loader = TcConfigLoader(logger) loader.is_overwrite = is_overwrite loader.tc_command_output = tc_command_output try: loader.load_tcconfig(config_file_path) except OSError as e: logger.error(msgfy.to_error_message(e)) return errno.EIO for tcconfig_command in loader.get_tcconfig_commands(): runner = spr.SubprocessRunner(tcconfig_command) return_code |= runner.run() if return_code != 0: logger.error(runner.stderr) elif tc_command_output == TcCommandOutput.STDOUT: print(runner.stdout.strip()) return return_code
def __create_ifindex_table(self, container_name): netns_path = self.__get_netns_path(container_name) try: netns_path.stat() except PermissionError as e: logger.error(e) return errno.EPERM IfIndex.create() proc = SubprocessRunner( "ip netns exec {ns} ip link show type veth".format( ns=container_name), dry_run=False) if proc.run() != 0: logger.error(proc.stderr) return proc.returncode veth_groups_regexp = re.compile("([0-9]+): ([a-z0-9]+)@([a-z0-9]+): ") peer_ifindex_prefix_regexp = re.compile("^if") try: for i, line in enumerate(proc.stdout.splitlines()): match = veth_groups_regexp.search(line) if not match: continue logger.debug("parse veth @{} [{:02d}] {}".format( container_name, i, line)) ifindex, ifname, peer_ifindex = match.groups() IfIndex.insert( IfIndex( host=container_name, ifindex=int(ifindex), ifname=ifname, peer_ifindex=int( peer_ifindex_prefix_regexp.sub("", peer_ifindex)), )) proc = SubprocessRunner("ip link show type veth", dry_run=False) if proc.run() != 0: logger.error(proc.stderr) return proc.returncode for line in proc.stdout.splitlines(): logger.debug("parse veth @docker-host [{:02d}] {}".format( i, line)) match = veth_groups_regexp.search(line) if not match: continue ifindex, ifname, peer_ifindex = match.groups() try: IfIndex.insert( IfIndex( host=self.__host_name, ifindex=int(ifindex), ifname=ifname, peer_ifindex=int( peer_ifindex_prefix_regexp.sub( "", peer_ifindex)), )) except OperationalError as e: logger.error(msgfy.to_error_message(e)) finally: IfIndex.commit() try: netns_path.remove_p() except PermissionError as e: logger.error(msgfy.to_error_message(e)) return errno.EPERM return 0
def test_exception_format_str(self, format_str, expected): with pytest.raises(expected): assert msgfy.to_error_message(ValueError("test"), format_str)
def test_exception_e_object(self, exception_obj, expected): with pytest.raises(expected): assert msgfy.to_error_message(exception_obj)
def test_normal_format_str(self, format_str): assert msgfy.to_error_message(ValueError("test"), format_str) != format_str
def test_normal_smoke(self): assert msgfy.to_error_message(ValueError("test"))