Esempio n. 1
0
async def do_unregister(
        ctx: ActorContext,
        node_name: T.str) -> T.dict(nodes=T.list(NodeSpecSchema)):
    LOG.info(f'unregister node {node_name}')
    ctx.registery.remove(node_name)
    await ctx.tell('scheduler.save_registery')
    return dict(nodes=ctx.registery.to_spec())
Esempio n. 2
0
def do_update_story_images(
    ctx: ActorContext,
    feed_id: T.int,
    offset: T.int,
    story_url: T.url,
    images: T.list(T.dict(
        url=T.url,
        status=T.int,
    ))
):
    # save image info
    url_root_status = {}
    for img in images:
        url_root = ImageInfo.extract_url_root(img['url'])
        value = (img['status'], img['url'])
        if url_root in url_root_status:
            url_root_status[url_root] = max(value, url_root_status[url_root])
        else:
            url_root_status[url_root] = value
    with transaction.atomic():
        image_info_objects = []
        for url_root, (status, url) in url_root_status.items():
            image_info_objects.append(ImageInfo(
                url_root=url_root,
                sample_url=url,
                referer=story_url,
                status_code=status,
            ))
        LOG.info(f'bulk create {len(image_info_objects)} ImageInfo objects')
        ImageInfo.objects.bulk_create(image_info_objects)
    _replace_story_images(feed_id, offset)
Esempio n. 3
0
async def do_proxy_tell(ctx: ActorContext, tasks: T.list(
    T.dict(
        dst=T.str,
        content=T.dict.optional,
    ))):
    for t in tasks:
        await ctx.tell(dst=t['dst'], content=t['content'], priority=10)
Esempio n. 4
0
 async def __call__(
         self,
         ctx: ActorContext,
         dst_info: T.list(T.dict(dst=T.str)),
         dst_node_info: T.list(T.dict(dst=T.str, dst_node=T.str)),
 ):
     dst_nodes = defaultdict(set)
     dst_info = [x['dst'] for x in dst_info]
     for dst in dst_info:
         for dst_node in ctx.registery.find_dst_nodes(dst):
             dst_nodes[dst_node].add(dst)
     dst_node_info = [(x['dst'], x['dst_node']) for x in dst_node_info]
     for dst, dst_node in dst_node_info:
         dst_nodes[dst_node].add(dst)
     local_dst_list = dst_nodes.pop(self.app.name, None)
     if local_dst_list:
         for dst in local_dst_list:
             self.app.queue.op_notify(src_node=self.app.name,
                                      dst=dst,
                                      available=True)
     tasks = []
     for dst_node, dst_list in dst_nodes.items():
         content = dict(dst_list=list(dst_list))
         tasks.append(self.safe_notify(ctx, content, dst_node=dst_node))
     await asyncio.gather(*tasks)
Esempio n. 5
0
def test_repr():
    schema = T.dict(key=T.list(T.int).unique, ).optional.desc('a dict')
    assert repr(schema) == "T.dict({key}).optional.desc('a dict')"
    schema = T.list(T.int.min(0)).unique
    assert repr(schema) == 'T.list(int).unique'
    schema = T.str.minlen(10).optional(False)
    assert repr(schema) == 'T.str.minlen(10)'
    assert repr(Schema()) == 'Schema<>'
Esempio n. 6
0
def test_list():
    schema = isomorph_schema(['list.unique.maxlen(10).desc("a list")', 'int'])
    assert schema == T.list(T.int).unique.maxlen(10).desc('a list')

    schema = isomorph_schema(['int'])
    assert schema == T.list(T.int)

    with pytest.raises(SchemaError):
        isomorph_schema(['list', 'int', 'str'])
Esempio n. 7
0
async def do_register(ctx: ActorContext, node: NodeSpecSchema) -> T.dict(nodes=T.list(NodeSpecSchema)):
    LOG.info(f'register node:\n{pretty_format_json(node)}')
    existed = ctx.registery.get(node['name'])
    if existed and existed.to_spec() == node:
        LOG.info(f'register node {node["name"]} already existed and no changes')
    else:
        ctx.registery.add(node)
        LOG.info('current registery info:\n' + pretty_format_json(ctx.registery.to_spec()))
        await ctx.tell('scheduler.save_registery')
    return dict(nodes=ctx.registery.to_spec())
Esempio n. 8
0
def test_slice():
    expect = T.int.min(100).default(100)
    assert User["id"] == expect
    assert User["id", ] == T.dict(id=expect)
    assert T(User.id) == expect
    assert User["id", "name"] == T.dict(id=expect, name=T.str)
    with pytest.raises(KeyError):
        User["unknown"]
    with pytest.raises(KeyError):
        User["id", "unknown"]
Esempio n. 9
0
def test_slice():
    schema = T.dict(
        id=T.int,
        age=T.int.min(0),
        name=T.str,
    ).optional
    assert schema['id'] == T.dict(id=T.int).optional
    assert schema['age', 'name'] == T.dict(
        age=T.int.min(0),
        name=T.str,
    ).optional
Esempio n. 10
0
def test_str_copy_and_to_primitive():
    schema = T.dict(
        key=T.list(T.int.min(0).max(9)).unique.optional(False),
        tag=T.str.desc('a tag'),
    ).optional.desc('a dict').__schema__
    assert schema.to_primitive() == EXPECT
    assert json.loads(str(schema)) == EXPECT
    copy = schema.copy()
    assert copy.to_primitive() == EXPECT
    # verify copy is deep copy
    schema.items['key'].items = T.int
    assert copy.to_primitive() == EXPECT
Esempio n. 11
0
def test_dict():
    schema = isomorph_schema({
        '$self': 'dict.optional.desc("a dict")',
        'key': 'str',
    })
    assert schema == T.dict(key=T.str).optional.desc('a dict')

    schema = isomorph_schema({'key': 'str'})
    assert schema == T.dict(key=T.str)

    with pytest.raises(SchemaError):
        isomorph_schema({'$self': ''})
Esempio n. 12
0
class Model:
    user = T.dict(userid=T.int.min(0).max(9).desc("UserID"))
    tags = T.list(T.int.min(0))
    style = T.dict(
        width=T.int.desc("width"),
        height=T.int.desc("height"),
        border_width=T.int.desc("border_width"),
        border_style=T.str.desc("border_style"),
        border_color=T.str.desc("border_color"),
        color=T.str.desc("color"),
    )
    optional = T.str.optional.desc("unknown value")
Esempio n. 13
0
def test_validr(benchmark):
    from validr import T, Compiler

    schema = Compiler().compile(
        T.dict(
            location=T.dict(lat=T.float.min(-90).max(90),
                            lng=T.float.min(-180).max(180)),
            name=T.str,
            alt_names=T.list(T.str),
            population=T.dict(city=T.int.min(0), metro=T.int.min(0)),
        ))
    assert benchmark(schema, data) == data
Esempio n. 14
0
async def do_pong(ctx: ActorContext, message: T.str) -> T.dict(message=T.str):
    LOG.info(ctx.message)
    r = await ctx.ask('registery.query')
    LOG.info(r)
    if message == 'error':
        raise ValueError(message)
    return dict(message=message)
Esempio n. 15
0
 def get_manual_fields(self, path, method):
     f, url, params, returns = self._method_meta[method]
     if params is None:
         return []
     field_schemas = T(params).__schema__.items
     path_fields = self.get_path_fields(path, method)
     path_field_names = set(x.name for x in path_fields)
     fields = []
     for name, item in field_schemas.items():
         if name in path_field_names or name in ['id', 'pk']:
             continue
         required = not item.params.get('optional', False)
         default = item.params.get('default')
         if not (default is None or default == ''):
             required = False
         if method in ['GET', 'DELETE']:
             location = 'query'
         else:
             location = 'form'
         field = coreapi.Field(name=name,
                               required=required,
                               location=location,
                               schema=coreschema_from_validr(item))
         fields.append(field)
     return fields
Esempio n. 16
0
def generate_fields_schema(layout_type):
    """
    

    """
    SUBFIELD_VALIDATE = Contact.SUBFIELD_VALIDATE
    INSTANCE_FIELDS = layout_type.fields

    dict_schema = {
        "$self": "dict",
    }

    fields_schema = {
        key: {
            subkey:
            f"{SUBFIELD_VALIDATE[subkey]}.minlen({1 if INSTANCE_FIELDS[key]['requirement'] else 0})"
            if SUBFIELD_VALIDATE[subkey] == "str" else
            SUBFIELD_VALIDATE[subkey]
            for subkey in SUBFIELD_VALIDATE.keys()
        }
        for key in INSTANCE_FIELDS.keys()
    }

    dict_schema.update(fields_schema)
    schema = T(dict_schema)

    compiled_schema = Compiler().compile(schema)

    return compiled_schema
Esempio n. 17
0
def do_ping(ctx: ActorContext, message: T.str) -> T.dict(message=T.str):
    LOG.info(ctx.message)
    r = ctx.ask('registery.query')
    LOG.info(r)
    ctx.tell('worker.pong', dict(message=message))
    if message == 'error':
        raise ValueError(message)
    return dict(message=message)
Esempio n. 18
0
def test_custom_validator():
    @validator(string=True)
    def choice_validator(compiler, items):
        choices = set(items.split())

        def validate(value):
            if value in choices:
                return value
            raise Invalid('invalid choice')

        return validate

    compiler = Compiler(validators={'choice': choice_validator})
    schema = T.list(T.choice('A B C D').default('A'))
    assert T(schema) == schema  # test copy custom validator
    validate = compiler.compile(schema)
    assert validate(['A', 'B', 'C', 'D', None]) == ['A', 'B', 'C', 'D', 'A']
Esempio n. 19
0
def do_save_feed_creation_result(
        ctx: ActorContext,
        feed_creation_id: T.int,
        messages: T.list(T.str),
        feed: FeedSchema.optional,
):
    with transaction.atomic():
        feed_dict = feed
        try:
            feed_creation = FeedCreation.get_by_pk(feed_creation_id)
        except FeedCreation.DoesNotExist:
            LOG.warning(f'feed creation {feed_creation_id} not exists')
            return
        if feed_creation.status == FeedStatus.READY:
            LOG.info(f'feed creation {feed_creation_id} is ready')
            return
        feed_creation.message = '\n\n'.join(messages)
        feed_creation.dt_updated = timezone.now()
        if not feed_dict:
            feed_creation.status = FeedStatus.ERROR
            feed_creation.save()
            FeedUrlMap(source=feed_creation.url,
                       target=FeedUrlMap.NOT_FOUND).save()
            return
        url = feed_dict['url']
        feed = Feed.get_first_by_url(url)
        if not feed:
            now = timezone.now()
            feed = Feed(url=url,
                        status=FeedStatus.READY,
                        reverse_url=reverse_url(url),
                        dt_updated=now,
                        dt_checked=now,
                        dt_synced=now)
            feed.save()
        feed_creation.status = FeedStatus.READY
        feed_creation.feed_id = feed.id
        feed_creation.save()
        user_feed = UserFeed.objects.filter(user_id=feed_creation.user_id,
                                            feed_id=feed.id).first()
        if user_feed:
            LOG.info('UserFeed#{} user_id={} feed_id={} already exists'.format(
                user_feed.id, feed_creation.user_id, feed.id))
        else:
            user_feed = UserFeed(
                user_id=feed_creation.user_id,
                feed_id=feed.id,
                is_from_bookmark=feed_creation.is_from_bookmark,
            )
            user_feed.save()
        FeedUrlMap(source=feed_creation.url, target=feed.url).save()
        if feed.url != feed_creation.url:
            FeedUrlMap(source=feed.url, target=feed.url).save()
    ctx.hope('harbor_rss.update_feed',
             dict(
                 feed_id=feed.id,
                 feed=validate_feed_output(feed_dict),
             ))
Esempio n. 20
0
 def _get_params(self, f):
     sig = inspect.signature(f)
     params_schema = {}
     for name, p in sig.parameters.items():
         if p.annotation is not inspect.Parameter.empty:
             params_schema[name] = p.annotation
     if params_schema:
         return T.dict(params_schema).__schema__
     return None
Esempio n. 21
0
def do_sync_story_fulltext(
    ctx: ActorContext,
    feed_id: T.int,
    offset: T.int,
) -> T.dict(
        feed_id=T.int,
        offset=T.int.min(0),
        use_proxy=T.bool,
        url=T.url,
        response_status=T.int,
        accept=T_ACCEPT,
):
    with log_django_context_metric('harbor_rss.sync_story_fulltext:read'):
        feed = Feed.get_by_pk(feed_id, detail='+use_proxy')
        story = STORY_SERVICE.get_by_offset(feed_id, offset, detail=True)
    assert story, f'story#{feed_id},{offset} not found'
    story_content_info = StoryContentInfo(story.content)
    num_sub_sentences = len(split_sentences(story_content_info.text))
    ret = dict(
        feed_id=feed_id,
        offset=offset,
        url=story.link,
        use_proxy=feed.use_proxy,
        accept=FulltextAcceptStrategy.REJECT.value,
    )
    try:
        result = ctx.ask(
            'worker_rss.fetch_story',
            dict(
                url=story.link,
                use_proxy=feed.use_proxy,
                feed_id=feed_id,
                offset=offset,
                num_sub_sentences=num_sub_sentences,
            ))
    except _TIMEOUT_ERRORS as ex:
        LOG.error(f'Ask worker_rss.fetch_story timeout: {ex}')
        ret.update(response_status=FeedResponseStatus.CONNECTION_TIMEOUT)
        return ret
    else:
        ret.update(
            response_status=result['response_status'],
            use_proxy=result['use_proxy'],
        )
        if not result['content']:
            return ret
    with log_django_context_metric('harbor_rss.sync_story_fulltext:write'):
        accept = _update_story(
            story=story,
            story_content_info=story_content_info,
            content=result['content'],
            summary=None,  # not need update summary
            url=result['url'],
            sentence_count=result['sentence_count'],
        )
        ret.update(accept=accept.value)
    return ret
Esempio n. 22
0
def test_optional():
    assert _(T.int.optional)(None) is None
    assert _(T.str.optional)(None) == ''
    assert _(T.str.optional)('') == ''
    assert _(T.list(T.int).optional)(None) is None
    assert _(T.dict(key=T.int).optional)(None) is None

    with pytest.raises(Invalid):
        assert _(T.int.optional)('')
    with pytest.raises(Invalid):
        assert _(T.dict(key=T.int).optional)('')

    with pytest.raises(Invalid):
        assert _(T.int)(None)
    with pytest.raises(Invalid):
        assert _(T.str)(None)
    with pytest.raises(Invalid):
        assert _(T.dict(key=T.int))(None)
    with pytest.raises(Invalid):
        assert _(T.list(T.int))(None)
Esempio n. 23
0
 async def method_d(
     self,
     name: T.str.maxlen(10).optional.desc('姓名'),
     language: T.str.default('CN').desc('语言'),
 ) -> T.dict(
         name=T.str.maxlen(10).optional.desc('姓名'),
         email=T.email.optional.desc('邮箱'),
         sex=T.bool.desc('性别'),
         message=T.str.maxlen(100).desc('欢迎消息'),
 ):
     """A Simple Hello
Esempio n. 24
0
def get_params(f):
    sig = inspect.signature(f)
    params_schema = {}
    for name, p in list(sig.parameters.items())[1:]:
        if p.default is not inspect.Parameter.empty:
            raise ValueError('You should not set default in schema annotation!')
        if p.annotation is inspect.Parameter.empty:
            raise ValueError(f'Missing annotation in parameter {name}!')
        params_schema[name] = p.annotation
    if params_schema:
        return T.dict(params_schema).__schema__
    return None
Esempio n. 25
0
async def do_detect_story_images(
    ctx: ActorContext,
    feed_id: T.int,
    offset: T.int,
    story_url: T.url,
    image_urls: T.list(T.url).unique,
):
    LOG.info(
        f'detect story images story={feed_id},{offset} num_images={len(image_urls)} begin'
    )
    options = dict(
        allow_non_webpage=True,
        dns_service=DNS_SERVICE,
    )
    async with AsyncFeedReader(**options) as reader:

        async def _read(url):
            if is_referer_deny_url(url):
                return url, FeedResponseStatus.REFERER_DENY.value
            response = await reader.read(url,
                                         referer="https://rss.anyant.com/",
                                         ignore_content=True)
            return url, response.status

        futs = []
        for url in image_urls:
            futs.append(asyncio.ensure_future(_read(url)))
        t_begin = time.time()
        try:
            results = await asyncio.gather(*futs)
        except (TimeoutError, concurrent.futures.TimeoutError):
            results = [fut.result() for fut in futs if fut.done()]
        cost_ms = (time.time() - t_begin) * 1000
    num_ok = num_error = 0
    images = []
    for url, status in results:
        if status == 200:
            num_ok += 1
        else:
            num_error += 1
        images.append(dict(url=url, status=status))
    LOG.info(f'detect story images story={feed_id},{offset} '
             f'num_images={len(image_urls)} finished, '
             f'ok={num_ok} error={num_error} cost={cost_ms:.0f}ms')
    await ctx.hope(
        'harbor_rss.update_story_images',
        dict(
            feed_id=feed_id,
            offset=offset,
            story_url=story_url,
            images=images,
        ))
Esempio n. 26
0
 async def __call__(
     self,
     ctx: ActorContext,
     dst_list: T.list(T.str),
 ):
     queue = self.app.queue
     src_node = ctx.message.src_node
     for dst in dst_list:
         try:
             queue.op_notify(src_node=src_node, dst=dst, available=True)
         except ActorStateError as ex:
             LOG.warning(ex)
     return dict(message='OK')
Esempio n. 27
0
 async def method_login(
         self,
         username: T.str.maxlen(16),
         password: T.str.maxlen(16),
 ) -> T.dict(user_id=T.int, message=T.str):
     user = await self.db.query(username)
     if user is None:
         raise LoginFailed('incorrect username or password')
     if password != user.password:
         raise LoginFailed('incorrect username or password')
     auth_token = self.auth.generate_token(user)
     self.response.headers['auth_token'] = auth_token
     return dict(
         user_id=user.user_id,
         message='OK',
     )
Esempio n. 28
0
 async def __call__(
         self,
         ctx: ActorContext,
         actor_name: T.str,
         upstream_list: T.list(T.str).minlen(1),
         maxsize: T.int.min(1),
 ):
     LOG.info(
         f'fetch dst={actor_name} maxsize={maxsize} from {upstream_list}')
     tasks = []
     size = min(100, max(1, maxsize // len(upstream_list)))
     content = dict(dst=actor_name, maxsize=size)
     for src_node in upstream_list:
         if src_node == self.app.name:
             tasks.append(self.local_fetch(dst=actor_name, maxsize=size))
         else:
             tasks.append(self.safe_fetch(ctx, content, src_node))
     queue = self.app.queue
     messages_list = await asyncio.gather(*tasks)
     for src_node, messages in zip(upstream_list, messages_list):
         if len(messages) < size:
             queue.op_notify(src_node=src_node,
                             dst=actor_name,
                             available=False)
         if actor_name == ACTOR_MESSAGE_ACKER:
             for msg in messages:
                 status = msg.content['status']
                 try:
                     queue.op_acked(outbox_message_id=msg.id, status=status)
                 except ActorStateError as ex:
                     LOG.warning(ex)
         else:
             for msg in messages:
                 msg = self.app.registery.complete_message(msg)
                 try:
                     queue.op_inbox(msg)
                 except ActorStateError as ex:
                     LOG.warning(ex)
Esempio n. 29
0
from rssant_common.dns_service import DNS_SERVICE

LOG = logging.getLogger(__name__)

_MAX_STORY_HTML_LENGTH = 5 * 1000 * 1024
_MAX_STORY_CONTENT_LENGTH = 1000 * 1024
_MAX_STORY_SUMMARY_LENGTH = 300

StorySchema = T.dict(
    unique_id=T.str,
    title=T.str,
    content_hash_base64=T.str,
    author=T.str.optional,
    link=T.url.optional,
    image_url=T.url.optional,
    iframe_url=T.url.optional,
    audio_url=T.url.optional,
    has_mathjax=T.bool.optional,
    dt_published=T.datetime.optional,
    dt_updated=T.datetime.optional,
    summary=T.str.optional,
    content=T.str.optional,
    sentence_count=T.int.min(0).optional,
)

FeedSchema = T.dict(
    url=T.url,
    use_proxy=T.bool.default(False),
    title=T.str,
    content_length=T.int,
    content_hash_base64=T.str,
    link=T.url.optional,
Esempio n. 30
0
def do_dns_service_update(ctx, records: T.dict.key(T.str).value(T.list(T.str))):
    LOG.info('dns_service_update %r', records)
    DNS_SERVICE.update(records)