async def do_unregister( ctx: ActorContext, node_name: T.str) -> T.dict(nodes=T.list(NodeSpecSchema)): LOG.info(f'unregister node {node_name}') ctx.registery.remove(node_name) await ctx.tell('scheduler.save_registery') return dict(nodes=ctx.registery.to_spec())
def do_update_story_images( ctx: ActorContext, feed_id: T.int, offset: T.int, story_url: T.url, images: T.list(T.dict( url=T.url, status=T.int, )) ): # save image info url_root_status = {} for img in images: url_root = ImageInfo.extract_url_root(img['url']) value = (img['status'], img['url']) if url_root in url_root_status: url_root_status[url_root] = max(value, url_root_status[url_root]) else: url_root_status[url_root] = value with transaction.atomic(): image_info_objects = [] for url_root, (status, url) in url_root_status.items(): image_info_objects.append(ImageInfo( url_root=url_root, sample_url=url, referer=story_url, status_code=status, )) LOG.info(f'bulk create {len(image_info_objects)} ImageInfo objects') ImageInfo.objects.bulk_create(image_info_objects) _replace_story_images(feed_id, offset)
async def do_proxy_tell(ctx: ActorContext, tasks: T.list( T.dict( dst=T.str, content=T.dict.optional, ))): for t in tasks: await ctx.tell(dst=t['dst'], content=t['content'], priority=10)
async def __call__( self, ctx: ActorContext, dst_info: T.list(T.dict(dst=T.str)), dst_node_info: T.list(T.dict(dst=T.str, dst_node=T.str)), ): dst_nodes = defaultdict(set) dst_info = [x['dst'] for x in dst_info] for dst in dst_info: for dst_node in ctx.registery.find_dst_nodes(dst): dst_nodes[dst_node].add(dst) dst_node_info = [(x['dst'], x['dst_node']) for x in dst_node_info] for dst, dst_node in dst_node_info: dst_nodes[dst_node].add(dst) local_dst_list = dst_nodes.pop(self.app.name, None) if local_dst_list: for dst in local_dst_list: self.app.queue.op_notify(src_node=self.app.name, dst=dst, available=True) tasks = [] for dst_node, dst_list in dst_nodes.items(): content = dict(dst_list=list(dst_list)) tasks.append(self.safe_notify(ctx, content, dst_node=dst_node)) await asyncio.gather(*tasks)
def test_repr(): schema = T.dict(key=T.list(T.int).unique, ).optional.desc('a dict') assert repr(schema) == "T.dict({key}).optional.desc('a dict')" schema = T.list(T.int.min(0)).unique assert repr(schema) == 'T.list(int).unique' schema = T.str.minlen(10).optional(False) assert repr(schema) == 'T.str.minlen(10)' assert repr(Schema()) == 'Schema<>'
def test_list(): schema = isomorph_schema(['list.unique.maxlen(10).desc("a list")', 'int']) assert schema == T.list(T.int).unique.maxlen(10).desc('a list') schema = isomorph_schema(['int']) assert schema == T.list(T.int) with pytest.raises(SchemaError): isomorph_schema(['list', 'int', 'str'])
async def do_register(ctx: ActorContext, node: NodeSpecSchema) -> T.dict(nodes=T.list(NodeSpecSchema)): LOG.info(f'register node:\n{pretty_format_json(node)}') existed = ctx.registery.get(node['name']) if existed and existed.to_spec() == node: LOG.info(f'register node {node["name"]} already existed and no changes') else: ctx.registery.add(node) LOG.info('current registery info:\n' + pretty_format_json(ctx.registery.to_spec())) await ctx.tell('scheduler.save_registery') return dict(nodes=ctx.registery.to_spec())
def test_slice(): expect = T.int.min(100).default(100) assert User["id"] == expect assert User["id", ] == T.dict(id=expect) assert T(User.id) == expect assert User["id", "name"] == T.dict(id=expect, name=T.str) with pytest.raises(KeyError): User["unknown"] with pytest.raises(KeyError): User["id", "unknown"]
def test_slice(): schema = T.dict( id=T.int, age=T.int.min(0), name=T.str, ).optional assert schema['id'] == T.dict(id=T.int).optional assert schema['age', 'name'] == T.dict( age=T.int.min(0), name=T.str, ).optional
def test_str_copy_and_to_primitive(): schema = T.dict( key=T.list(T.int.min(0).max(9)).unique.optional(False), tag=T.str.desc('a tag'), ).optional.desc('a dict').__schema__ assert schema.to_primitive() == EXPECT assert json.loads(str(schema)) == EXPECT copy = schema.copy() assert copy.to_primitive() == EXPECT # verify copy is deep copy schema.items['key'].items = T.int assert copy.to_primitive() == EXPECT
def test_dict(): schema = isomorph_schema({ '$self': 'dict.optional.desc("a dict")', 'key': 'str', }) assert schema == T.dict(key=T.str).optional.desc('a dict') schema = isomorph_schema({'key': 'str'}) assert schema == T.dict(key=T.str) with pytest.raises(SchemaError): isomorph_schema({'$self': ''})
class Model: user = T.dict(userid=T.int.min(0).max(9).desc("UserID")) tags = T.list(T.int.min(0)) style = T.dict( width=T.int.desc("width"), height=T.int.desc("height"), border_width=T.int.desc("border_width"), border_style=T.str.desc("border_style"), border_color=T.str.desc("border_color"), color=T.str.desc("color"), ) optional = T.str.optional.desc("unknown value")
def test_validr(benchmark): from validr import T, Compiler schema = Compiler().compile( T.dict( location=T.dict(lat=T.float.min(-90).max(90), lng=T.float.min(-180).max(180)), name=T.str, alt_names=T.list(T.str), population=T.dict(city=T.int.min(0), metro=T.int.min(0)), )) assert benchmark(schema, data) == data
async def do_pong(ctx: ActorContext, message: T.str) -> T.dict(message=T.str): LOG.info(ctx.message) r = await ctx.ask('registery.query') LOG.info(r) if message == 'error': raise ValueError(message) return dict(message=message)
def get_manual_fields(self, path, method): f, url, params, returns = self._method_meta[method] if params is None: return [] field_schemas = T(params).__schema__.items path_fields = self.get_path_fields(path, method) path_field_names = set(x.name for x in path_fields) fields = [] for name, item in field_schemas.items(): if name in path_field_names or name in ['id', 'pk']: continue required = not item.params.get('optional', False) default = item.params.get('default') if not (default is None or default == ''): required = False if method in ['GET', 'DELETE']: location = 'query' else: location = 'form' field = coreapi.Field(name=name, required=required, location=location, schema=coreschema_from_validr(item)) fields.append(field) return fields
def generate_fields_schema(layout_type): """ """ SUBFIELD_VALIDATE = Contact.SUBFIELD_VALIDATE INSTANCE_FIELDS = layout_type.fields dict_schema = { "$self": "dict", } fields_schema = { key: { subkey: f"{SUBFIELD_VALIDATE[subkey]}.minlen({1 if INSTANCE_FIELDS[key]['requirement'] else 0})" if SUBFIELD_VALIDATE[subkey] == "str" else SUBFIELD_VALIDATE[subkey] for subkey in SUBFIELD_VALIDATE.keys() } for key in INSTANCE_FIELDS.keys() } dict_schema.update(fields_schema) schema = T(dict_schema) compiled_schema = Compiler().compile(schema) return compiled_schema
def do_ping(ctx: ActorContext, message: T.str) -> T.dict(message=T.str): LOG.info(ctx.message) r = ctx.ask('registery.query') LOG.info(r) ctx.tell('worker.pong', dict(message=message)) if message == 'error': raise ValueError(message) return dict(message=message)
def test_custom_validator(): @validator(string=True) def choice_validator(compiler, items): choices = set(items.split()) def validate(value): if value in choices: return value raise Invalid('invalid choice') return validate compiler = Compiler(validators={'choice': choice_validator}) schema = T.list(T.choice('A B C D').default('A')) assert T(schema) == schema # test copy custom validator validate = compiler.compile(schema) assert validate(['A', 'B', 'C', 'D', None]) == ['A', 'B', 'C', 'D', 'A']
def do_save_feed_creation_result( ctx: ActorContext, feed_creation_id: T.int, messages: T.list(T.str), feed: FeedSchema.optional, ): with transaction.atomic(): feed_dict = feed try: feed_creation = FeedCreation.get_by_pk(feed_creation_id) except FeedCreation.DoesNotExist: LOG.warning(f'feed creation {feed_creation_id} not exists') return if feed_creation.status == FeedStatus.READY: LOG.info(f'feed creation {feed_creation_id} is ready') return feed_creation.message = '\n\n'.join(messages) feed_creation.dt_updated = timezone.now() if not feed_dict: feed_creation.status = FeedStatus.ERROR feed_creation.save() FeedUrlMap(source=feed_creation.url, target=FeedUrlMap.NOT_FOUND).save() return url = feed_dict['url'] feed = Feed.get_first_by_url(url) if not feed: now = timezone.now() feed = Feed(url=url, status=FeedStatus.READY, reverse_url=reverse_url(url), dt_updated=now, dt_checked=now, dt_synced=now) feed.save() feed_creation.status = FeedStatus.READY feed_creation.feed_id = feed.id feed_creation.save() user_feed = UserFeed.objects.filter(user_id=feed_creation.user_id, feed_id=feed.id).first() if user_feed: LOG.info('UserFeed#{} user_id={} feed_id={} already exists'.format( user_feed.id, feed_creation.user_id, feed.id)) else: user_feed = UserFeed( user_id=feed_creation.user_id, feed_id=feed.id, is_from_bookmark=feed_creation.is_from_bookmark, ) user_feed.save() FeedUrlMap(source=feed_creation.url, target=feed.url).save() if feed.url != feed_creation.url: FeedUrlMap(source=feed.url, target=feed.url).save() ctx.hope('harbor_rss.update_feed', dict( feed_id=feed.id, feed=validate_feed_output(feed_dict), ))
def _get_params(self, f): sig = inspect.signature(f) params_schema = {} for name, p in sig.parameters.items(): if p.annotation is not inspect.Parameter.empty: params_schema[name] = p.annotation if params_schema: return T.dict(params_schema).__schema__ return None
def do_sync_story_fulltext( ctx: ActorContext, feed_id: T.int, offset: T.int, ) -> T.dict( feed_id=T.int, offset=T.int.min(0), use_proxy=T.bool, url=T.url, response_status=T.int, accept=T_ACCEPT, ): with log_django_context_metric('harbor_rss.sync_story_fulltext:read'): feed = Feed.get_by_pk(feed_id, detail='+use_proxy') story = STORY_SERVICE.get_by_offset(feed_id, offset, detail=True) assert story, f'story#{feed_id},{offset} not found' story_content_info = StoryContentInfo(story.content) num_sub_sentences = len(split_sentences(story_content_info.text)) ret = dict( feed_id=feed_id, offset=offset, url=story.link, use_proxy=feed.use_proxy, accept=FulltextAcceptStrategy.REJECT.value, ) try: result = ctx.ask( 'worker_rss.fetch_story', dict( url=story.link, use_proxy=feed.use_proxy, feed_id=feed_id, offset=offset, num_sub_sentences=num_sub_sentences, )) except _TIMEOUT_ERRORS as ex: LOG.error(f'Ask worker_rss.fetch_story timeout: {ex}') ret.update(response_status=FeedResponseStatus.CONNECTION_TIMEOUT) return ret else: ret.update( response_status=result['response_status'], use_proxy=result['use_proxy'], ) if not result['content']: return ret with log_django_context_metric('harbor_rss.sync_story_fulltext:write'): accept = _update_story( story=story, story_content_info=story_content_info, content=result['content'], summary=None, # not need update summary url=result['url'], sentence_count=result['sentence_count'], ) ret.update(accept=accept.value) return ret
def test_optional(): assert _(T.int.optional)(None) is None assert _(T.str.optional)(None) == '' assert _(T.str.optional)('') == '' assert _(T.list(T.int).optional)(None) is None assert _(T.dict(key=T.int).optional)(None) is None with pytest.raises(Invalid): assert _(T.int.optional)('') with pytest.raises(Invalid): assert _(T.dict(key=T.int).optional)('') with pytest.raises(Invalid): assert _(T.int)(None) with pytest.raises(Invalid): assert _(T.str)(None) with pytest.raises(Invalid): assert _(T.dict(key=T.int))(None) with pytest.raises(Invalid): assert _(T.list(T.int))(None)
async def method_d( self, name: T.str.maxlen(10).optional.desc('姓名'), language: T.str.default('CN').desc('语言'), ) -> T.dict( name=T.str.maxlen(10).optional.desc('姓名'), email=T.email.optional.desc('邮箱'), sex=T.bool.desc('性别'), message=T.str.maxlen(100).desc('欢迎消息'), ): """A Simple Hello
def get_params(f): sig = inspect.signature(f) params_schema = {} for name, p in list(sig.parameters.items())[1:]: if p.default is not inspect.Parameter.empty: raise ValueError('You should not set default in schema annotation!') if p.annotation is inspect.Parameter.empty: raise ValueError(f'Missing annotation in parameter {name}!') params_schema[name] = p.annotation if params_schema: return T.dict(params_schema).__schema__ return None
async def do_detect_story_images( ctx: ActorContext, feed_id: T.int, offset: T.int, story_url: T.url, image_urls: T.list(T.url).unique, ): LOG.info( f'detect story images story={feed_id},{offset} num_images={len(image_urls)} begin' ) options = dict( allow_non_webpage=True, dns_service=DNS_SERVICE, ) async with AsyncFeedReader(**options) as reader: async def _read(url): if is_referer_deny_url(url): return url, FeedResponseStatus.REFERER_DENY.value response = await reader.read(url, referer="https://rss.anyant.com/", ignore_content=True) return url, response.status futs = [] for url in image_urls: futs.append(asyncio.ensure_future(_read(url))) t_begin = time.time() try: results = await asyncio.gather(*futs) except (TimeoutError, concurrent.futures.TimeoutError): results = [fut.result() for fut in futs if fut.done()] cost_ms = (time.time() - t_begin) * 1000 num_ok = num_error = 0 images = [] for url, status in results: if status == 200: num_ok += 1 else: num_error += 1 images.append(dict(url=url, status=status)) LOG.info(f'detect story images story={feed_id},{offset} ' f'num_images={len(image_urls)} finished, ' f'ok={num_ok} error={num_error} cost={cost_ms:.0f}ms') await ctx.hope( 'harbor_rss.update_story_images', dict( feed_id=feed_id, offset=offset, story_url=story_url, images=images, ))
async def __call__( self, ctx: ActorContext, dst_list: T.list(T.str), ): queue = self.app.queue src_node = ctx.message.src_node for dst in dst_list: try: queue.op_notify(src_node=src_node, dst=dst, available=True) except ActorStateError as ex: LOG.warning(ex) return dict(message='OK')
async def method_login( self, username: T.str.maxlen(16), password: T.str.maxlen(16), ) -> T.dict(user_id=T.int, message=T.str): user = await self.db.query(username) if user is None: raise LoginFailed('incorrect username or password') if password != user.password: raise LoginFailed('incorrect username or password') auth_token = self.auth.generate_token(user) self.response.headers['auth_token'] = auth_token return dict( user_id=user.user_id, message='OK', )
async def __call__( self, ctx: ActorContext, actor_name: T.str, upstream_list: T.list(T.str).minlen(1), maxsize: T.int.min(1), ): LOG.info( f'fetch dst={actor_name} maxsize={maxsize} from {upstream_list}') tasks = [] size = min(100, max(1, maxsize // len(upstream_list))) content = dict(dst=actor_name, maxsize=size) for src_node in upstream_list: if src_node == self.app.name: tasks.append(self.local_fetch(dst=actor_name, maxsize=size)) else: tasks.append(self.safe_fetch(ctx, content, src_node)) queue = self.app.queue messages_list = await asyncio.gather(*tasks) for src_node, messages in zip(upstream_list, messages_list): if len(messages) < size: queue.op_notify(src_node=src_node, dst=actor_name, available=False) if actor_name == ACTOR_MESSAGE_ACKER: for msg in messages: status = msg.content['status'] try: queue.op_acked(outbox_message_id=msg.id, status=status) except ActorStateError as ex: LOG.warning(ex) else: for msg in messages: msg = self.app.registery.complete_message(msg) try: queue.op_inbox(msg) except ActorStateError as ex: LOG.warning(ex)
from rssant_common.dns_service import DNS_SERVICE LOG = logging.getLogger(__name__) _MAX_STORY_HTML_LENGTH = 5 * 1000 * 1024 _MAX_STORY_CONTENT_LENGTH = 1000 * 1024 _MAX_STORY_SUMMARY_LENGTH = 300 StorySchema = T.dict( unique_id=T.str, title=T.str, content_hash_base64=T.str, author=T.str.optional, link=T.url.optional, image_url=T.url.optional, iframe_url=T.url.optional, audio_url=T.url.optional, has_mathjax=T.bool.optional, dt_published=T.datetime.optional, dt_updated=T.datetime.optional, summary=T.str.optional, content=T.str.optional, sentence_count=T.int.min(0).optional, ) FeedSchema = T.dict( url=T.url, use_proxy=T.bool.default(False), title=T.str, content_length=T.int, content_hash_base64=T.str, link=T.url.optional,
def do_dns_service_update(ctx, records: T.dict.key(T.str).value(T.list(T.str))): LOG.info('dns_service_update %r', records) DNS_SERVICE.update(records)