class EnvConfig: debug = T.bool.default(True).desc('debug') log_level = T.enum('DEBUG,INFO,WARNING,ERROR').default('INFO') root_url = T.url.relaxed.default('http://*****:*****@url,name@url') secret_key = T.str.default('8k1v_4#kv4+3qu1=ulp+@@#65&++!fl1(e*7)ew&nv!)cq%e2y') allow_private_address = T.bool.default(False) check_feed_minutes = T.int.min(1).default(30) feed_story_retention = T.int.min(1).default(5000).desc('max storys to keep per feed') # actor actor_storage_path = T.str.default('data/actor_storage') actor_storage_compact_wal_delta = T.int.min(1).default(5000) actor_queue_max_complete_size = T.int.min(0).default(500) actor_max_retry_time = T.int.min(1).default(600) actor_max_retry_count = T.int.min(0).default(1) actor_token = T.str.optional # postgres database pg_host = T.str.default('localhost').desc('postgres host') pg_port = T.int.default(5432).desc('postgres port') pg_db = T.str.default('rssant').desc('postgres database') pg_user = T.str.default('rssant').desc('postgres user') pg_password = T.str.default('rssant').desc('postgres password') # github login github_client_id = T.str.optional github_secret = T.str.optional # sentry sentry_enable = T.bool.default(False) sentry_dsn = T.str.optional # email smtp admin_email = T.email.default('*****@*****.**') smtp_enable = T.bool.default(False) smtp_host = T.str.optional smtp_port = T.int.min(0).optional smtp_username = T.str.optional smtp_password = T.str.optional smtp_use_ssl = T.bool.default(False) def _parse_scheduler_extra_networks(self): if not self.scheduler_extra_networks: return [] networks = [] for part in self.scheduler_extra_networks.strip().split(','): part = part.split('@', maxsplit=1) if len(part) != 2: raise Invalid('invalid scheduler_extra_networks') name, url = part networks.append(dict(name=name, url=url)) networks = validate_extra_networks(networks) return list(networks) def __post_init__(self): if self.sentry_enable and not self.sentry_dsn: raise Invalid('sentry_dsn is required when sentry_enable=True') if self.smtp_enable: if not self.smtp_host: raise Invalid('smtp_host is required when smtp_enable=True') if not self.smtp_port: raise Invalid('smtp_port is required when smtp_enable=True') scheduler_extra_networks = self._parse_scheduler_extra_networks() self.registery_node_spec = { 'name': 'scheduler', 'modules': ['scheduler'], 'networks': [{ 'name': self.scheduler_network, 'url': self.scheduler_url, }] + scheduler_extra_networks } self.current_node_spec = { 'name': '{}@{}'.format(LOCAL_NODE_NAME, os.getpid()), 'modules': [], 'networks': [{ 'name': self.scheduler_network, 'url': None, }] }
dict( url=story.link, use_proxy=feed.use_proxy, feed_id=story.feed_id, offset=story.offset, num_sub_sentences=num_sub_sentences, )) def _is_fulltext_story(story): if story.iframe_url or story.audio_url or story.image_url: return True return is_fulltext_content(StoryContentInfo(story.content)) T_ACCEPT = T.enum(','.join(FulltextAcceptStrategy.__members__)) _TIMEOUT_ERRORS = (socket.timeout, TimeoutError, requests.exceptions.Timeout) @actor('harbor_rss.sync_story_fulltext') def do_sync_story_fulltext( ctx: ActorContext, feed_id: T.int, offset: T.int, ) -> T.dict( feed_id=T.int, offset=T.int.min(0), use_proxy=T.bool, url=T.url, response_status=T.int, accept=T_ACCEPT,
class EnvConfig(ConfigModel): debug: bool = T.bool.default(False).desc('debug') profiler_enable: bool = T.bool.default(False).desc('enable profiler or not') debug_toolbar_enable: bool = T.bool.default(False).desc('enable debug toolbar or not') log_level: str = T.enum('DEBUG,INFO,WARNING,ERROR').default('INFO') root_url: str = T.url.relaxed.default('http://*****:*****@url,name@url') secret_key: str = T.str.default('8k1v_4#kv4+3qu1=ulp+@@#65&++!fl1(e*7)ew&nv!)cq%e2y') allow_private_address: bool = T.bool.default(False) check_feed_minutes: int = T.int.min(1).default(30) feed_story_retention: int = T.int.min(1).default(5000).desc('max storys to keep per feed') pg_story_volumes: str = T.str.optional # actor actor_storage_path: str = T.str.default('data/actor_storage') actor_storage_compact_wal_delta: int = T.int.min(1).default(5000) actor_queue_max_complete_size: int = T.int.min(0).default(500) actor_max_retry_time: int = T.int.min(1).default(600) actor_max_retry_count: int = T.int.min(0).default(1) actor_token: str = T.str.optional # postgres database pg_host: str = T.str.default('localhost').desc('postgres host') pg_port: int = T.int.default(5432).desc('postgres port') pg_db: str = T.str.default('rssant').desc('postgres database') pg_user: str = T.str.default('rssant').desc('postgres user') pg_password: str = T.str.default('rssant').desc('postgres password') # github login github_client_id: str = T.str.optional github_secret: str = T.str.optional # sentry sentry_enable: bool = T.bool.default(False) sentry_dsn: str = T.str.optional # email smtp admin_email: str = T.email.default('*****@*****.**') smtp_enable: bool = T.bool.default(False) smtp_host: str = T.str.optional smtp_port: int = T.int.min(0).optional smtp_username: str = T.str.optional smtp_password: str = T.str.optional smtp_use_ssl: bool = T.bool.default(False) # rss proxy rss_proxy_url: str = T.url.optional rss_proxy_token: str = T.str.optional rss_proxy_enable: bool = T.bool.default(False) # analytics analytics_matomo_enable: bool = T.bool.default(False) analytics_matomo_url: str = T.str.optional analytics_matomo_site_id: str = T.str.optional analytics_google_enable: bool = T.bool.default(False) analytics_google_tracking_id: str = T.str.optional def _parse_scheduler_extra_networks(self): if not self.scheduler_extra_networks: return [] networks = [] for part in self.scheduler_extra_networks.strip().split(','): part = part.split('@', maxsplit=1) if len(part) != 2: raise Invalid('invalid scheduler_extra_networks') name, url = part networks.append(dict(name=name, url=url)) networks = validate_extra_networks(networks) return list(networks) @classmethod def _parse_story_volumes(cls, text: str): """ Format: {volume}:{user}:{password}@{host}:{port}/{db}/{table} >>> volumes = EnvConfig._parse_story_volumes('0:user:password@host:5432/db/table') >>> expect = {0: dict( ... user='******', password='******', ... host='host', port=5432, db='db', table='table' ... )} >>> volumes == expect True """ re_volume = re.compile( r'^(\d+)\:([^:@/]+)\:([^:@/]+)\@([^:@/]+)\:(\d+)\/([^:@/]+)\/([^:@/]+)$') volumes = {} for part in text.split(','): match = re_volume.match(part) if not match: raise Invalid(f'invalid story volume {part!r}') volume = int(match.group(1)) volumes[volume] = dict( user=match.group(2), password=match.group(3), host=match.group(4), port=int(match.group(5)), db=match.group(6), table=match.group(7), ) return volumes def __post_init__(self): if self.sentry_enable and not self.sentry_dsn: raise Invalid('sentry_dsn is required when sentry_enable=True') if self.smtp_enable: if not self.smtp_host: raise Invalid('smtp_host is required when smtp_enable=True') if not self.smtp_port: raise Invalid('smtp_port is required when smtp_enable=True') scheduler_extra_networks = self._parse_scheduler_extra_networks() self.registery_node_spec = { 'name': 'scheduler', 'modules': ['scheduler'], 'networks': [{ 'name': self.scheduler_network, 'url': self.scheduler_url, }] + scheduler_extra_networks } self.current_node_spec = { 'name': '{}@{}'.format(LOCAL_NODE_NAME, os.getpid()), 'modules': [], 'networks': [{ 'name': self.scheduler_network, 'url': None, }] } if self.pg_story_volumes: volumes = self._parse_story_volumes(self.pg_story_volumes) else: volumes = {0: dict( user=self.pg_user, password=self.pg_password, host=self.pg_host, port=self.pg_port, db=self.pg_db, table='story_volume_0', )} self.pg_story_volumes_parsed = volumes
class EnvConfig(ConfigModel): debug: bool = T.bool.default(False).desc('debug') profiler_enable: bool = T.bool.default(False).desc( 'enable profiler or not') debug_toolbar_enable: bool = T.bool.default(False).desc( 'enable debug toolbar or not') log_level: str = T.enum('DEBUG,INFO,WARNING,ERROR').default('INFO') root_url: str = T.url.default('http://*****:*****@url,name@url') secret_key: str = T.str.default( '8k1v_4#kv4+3qu1=ulp+@@#65&++!fl1(e*7)ew&nv!)cq%e2y') allow_private_address: bool = T.bool.default(False) check_feed_minutes: int = T.int.min(1).default(30) feed_story_retention: int = T.int.min(1).default(5000).desc( 'max storys to keep per feed') pg_story_volumes: str = T.str.optional feed_reader_request_timeout: int = T.int.default(90).desc( 'feed reader request timeout') # actor actor_storage_path: str = T.str.default('data/actor_storage') actor_storage_compact_wal_delta: int = T.int.min(1).default(5000) actor_queue_max_complete_size: int = T.int.min(0).default(500) actor_max_retry_time: int = T.int.min(1).default(600) actor_max_retry_count: int = T.int.min(0).default(1) actor_token: str = T.str.optional # postgres database pg_host: str = T.str.default('localhost').desc('postgres host') pg_port: int = T.int.default(5432).desc('postgres port') pg_db: str = T.str.default('rssant').desc('postgres database') pg_user: str = T.str.default('rssant').desc('postgres user') pg_password: str = T.str.default('rssant').desc('postgres password') # github login github_client_id: str = T.str.optional github_secret: str = T.str.optional github_standby_configs: str = T.str.optional.desc( 'domain,client_id,secret;') # sentry sentry_enable: bool = T.bool.default(False) sentry_dsn: str = T.str.optional # email smtp admin_email: str = T.email.default('*****@*****.**') smtp_enable: bool = T.bool.default(False) smtp_host: str = T.str.optional smtp_port: int = T.int.min(0).optional smtp_username: str = T.str.optional smtp_password: str = T.str.optional smtp_use_ssl: bool = T.bool.default(False) # rss proxy rss_proxy_url: str = T.url.optional rss_proxy_token: str = T.str.optional rss_proxy_enable: bool = T.bool.default(False) # http proxy or socks proxy proxy_url: str = T.url.scheme('http https socks5').optional proxy_enable: bool = T.bool.default(False) # analytics matomo analytics_matomo_enable: bool = T.bool.default(False) analytics_matomo_url: str = T.str.optional analytics_matomo_site_id: str = T.str.optional # analytics google analytics_google_enable: bool = T.bool.default(False) analytics_google_tracking_id: str = T.str.optional # analytics plausible analytics_plausible_enable: str = T.bool.default(False) analytics_plausible_url: str = T.str.optional analytics_plausible_domain: str = T.str.optional # shopant shopant_enable: bool = T.bool.default(False) shopant_product_id: int = T.int.optional shopant_product_secret: str = T.str.optional shopant_url: str = T.url.optional # image proxy image_proxy_enable: bool = T.bool.default(True) image_proxy_urls: bool = T.str.default('origin').desc('逗号分隔的URL列表') image_token_secret: str = T.str.default('rssant') image_token_expires: float = T.timedelta.min('1s').default('30m') detect_story_image_enable: bool = T.bool.default(False) # hashid salt hashid_salt: str = T.str.default('rssant') def _parse_scheduler_extra_networks(self): if not self.scheduler_extra_networks: return [] networks = [] for part in self.scheduler_extra_networks.strip().split(','): part = part.split('@', maxsplit=1) if len(part) != 2: raise Invalid('invalid scheduler_extra_networks') name, url = part networks.append(dict(name=name, url=url)) networks = validate_extra_networks(networks) return list(networks) @classmethod def _parse_story_volumes(cls, text: str): """ Format: {volume}:{user}:{password}@{host}:{port}/{db}/{table} >>> volumes = EnvConfig._parse_story_volumes('0:user:password@host:5432/db/table') >>> expect = {0: dict( ... user='******', password='******', ... host='host', port=5432, db='db', table='table' ... )} >>> volumes == expect True """ re_volume = re.compile( r'^(\d+)\:([^:@/]+)\:([^:@/]+)\@([^:@/]+)\:(\d+)\/([^:@/]+)\/([^:@/]+)$' ) volumes = {} for part in text.split(','): match = re_volume.match(part) if not match: raise Invalid(f'invalid story volume {part!r}') volume = int(match.group(1)) volumes[volume] = dict( user=match.group(2), password=match.group(3), host=match.group(4), port=int(match.group(5)), db=match.group(6), table=match.group(7), ) return volumes def _parse_github_standby_configs(self): configs = {} items = (self.github_standby_configs or '').strip().split(';') for item in filter(None, items): parts = item.split(',') if len(parts) != 3: raise Invalid('invalid github standby configs') domain, client_id, secret = parts configs[domain] = GitHubConfigModel(domain=domain, client_id=client_id, secret=secret) return configs def __post_init__(self): if self.sentry_enable and not self.sentry_dsn: raise Invalid('sentry_dsn is required when sentry_enable=True') if self.smtp_enable: if not self.smtp_host: raise Invalid('smtp_host is required when smtp_enable=True') if not self.smtp_port: raise Invalid('smtp_port is required when smtp_enable=True') scheduler_extra_networks = self._parse_scheduler_extra_networks() self.registery_node_spec = { 'name': 'scheduler', 'modules': ['scheduler'], 'networks': [{ 'name': self.scheduler_network, 'url': self.scheduler_url, }] + scheduler_extra_networks } self.current_node_spec = { 'name': '{}@{}'.format(LOCAL_NODE_NAME, os.getpid()), 'modules': [], 'networks': [{ 'name': self.scheduler_network, 'url': None, }] } if self.pg_story_volumes: volumes = self._parse_story_volumes(self.pg_story_volumes) else: volumes = { 0: dict( user=self.pg_user, password=self.pg_password, host=self.pg_host, port=self.pg_port, db=self.pg_db, table='story_volume_0', ) } self.pg_story_volumes_parsed = volumes self.github_standby_configs_parsed = self._parse_github_standby_configs( ) @cached_property def root_domain(self) -> str: return urlparse(self.root_url).hostname @cached_property def standby_domain_set(self) -> set: return set((self.standby_domains or '').strip().split(',')) @cached_property def image_proxy_url_list(self) -> list: url_s = (self.image_proxy_urls or '').strip().split(',') return list(sorted(set(url_s)))
elements = { 'int': T.int, 'int.optional': T.int.optional, 'int.min(0).max(10)': T.int.min(0).max(10), 'int.default(5)': T.int.default(5), 'int.desc("a number")': T.int.desc('a number'), 'int.min(0).max(10).optional.default(5).desc("a number")': T.int.min(0).max(10).optional.default(5).desc('a number'), 'enum("A B C")': T.enum('A B C'), "enum('A B C')": T.enum('A B C'), } invalid_elements = [ None, '', 'int.', 'int.min()()', 'int.range(0,10)', 'int.range([0,10])', 'enum([1,2,3])', ]