Beispiel #1
0
import yarl
import listparser
from validr import T, Invalid

from rssant_common.helper import coerce_url
from rssant_common.validator import compiler
from rssant_common.blacklist import compile_url_blacklist
from .schema import validate_opml, IMPORT_ITEMS_LIMIT
from .helper import RE_URL


LOG = logging.getLogger(__name__)

_RE_OPML_FILENAME = re.compile(r'^.*\.(opml|xml)$', re.I)
_validate_url = compiler.compile(T.url)


_BLACKLIST_CONTENT = """
youtube.com
facebook.com
amazon.com
wikipedia.org
twitter.com
vk.com
instagram.com
live.com
tmall.com
baidu.com
taobao.com
jd.com
Beispiel #2
0
    unique_id=T.str,
    title=T.str,
    content_hash_base64=T.str.optional,
    author=T.str.optional,
    link=T.str.optional,
    image_url=T.url.optional,
    iframe_url=T.url.optional,
    audio_url=T.url.optional,
    has_mathjax=T.bool.optional,
    dt_published=T.datetime.object.optional.invalid_to_default,
    dt_updated=T.datetime.object.optional,
    summary=T.str.optional,
    content=T.str.optional,
)

validate_story = compiler.compile(StorySchema)


@pytest.mark.dbtest
class StoryTestCase(TransactionTestCase):
    def setUp(self):
        print('setUp')
        storys = []
        updated_storys = []
        now = timezone.datetime(2020, 6, 1, 12, 12, 12, tzinfo=timezone.utc)
        for i in range(200):
            dt = now + timezone.timedelta(minutes=i)
            content = f'test story content {i}' * (i % 5)
            content_hash_base64 = compute_hash_base64(content)
            summary = content[:30]
            story = {
Beispiel #3
0
    link=T.url.optional,
    author=T.str.optional,
    icon=T.str.optional,
    description=T.str.optional,
    version=T.str.optional,
    dt_updated=T.datetime.optional,
    encoding=T.str.optional,
    etag=T.str.optional,
    last_modified=T.str.optional,
    response_status=T.int.optional,
    checksum_data=T.bytes.maxlen(4096).optional,
    warnings=T.str.optional,
    storys=T.list,
)

_validate_feed = compiler.compile(FeedSchema)
_validate_story = compiler.compile(StorySchema)


def validate_feed(feed):
    feed_info = feed.get('url') or feed.get('link') or feed.get('title')
    try:
        feed_data = _validate_feed(feed)
    except Invalid as ex:
        ex.args = (f'{ex.args[0]}, feed={feed_info}', *ex.args[1:])
        raise
    storys = []
    for story in feed_data['storys']:
        try:
            story = _validate_story(story)
        except Invalid as ex:
Beispiel #4
0
import os.path

from dotenv import load_dotenv
from validr import T, modelclass, fields, Invalid

from rssant_common.validator import compiler
from actorlib.network_helper import LOCAL_NODE_NAME


validate_extra_networks = compiler.compile(T.list(T.dict(
    name=T.str,
    url=T.url.relaxed,
)))


@modelclass(compiler=compiler)
class EnvConfig:
    debug = T.bool.default(True).desc('debug')
    log_level = T.enum('DEBUG,INFO,WARNING,ERROR').default('INFO')
    root_url = T.url.relaxed.default('http://*****:*****@url,name@url')
    secret_key = T.str.default('8k1v_4#kv4+3qu1=ulp+@@#65&++!fl1(e*7)ew&nv!)cq%e2y')
    allow_private_address = T.bool.default(False)
    check_feed_minutes = T.int.min(1).default(30)
    feed_story_retention = T.int.min(1).default(5000).desc('max storys to keep per feed')
    # actor
    actor_storage_path = T.str.default('data/actor_storage')
    actor_storage_compact_wal_delta = T.int.min(1).default(5000)
    actor_queue_max_complete_size = T.int.min(0).default(500)
Beispiel #5
0
    ident=T.str,
    title=T.str,
    url=T.str.optional,
    content=T.str.maxlen(_MAX_CONTENT_LENGTH).optional,
    summary=T.str.maxlen(_MAX_SUMMARY_LENGTH).optional,
    image_url=T.str.optional,
    audio_url=T.str.optional,
    dt_published=T.datetime.object.optional,
    dt_updated=T.datetime.object.optional,
    author_name=T.str.optional,
    author_url=T.str.optional,
    author_avatar_url=T.str.optional,
)


validate_raw_feed = compiler.compile(RawFeedSchema)
validate_raw_story = compiler.compile(RawStorySchema)


class RawFeedResult:

    __slots__ = ('_feed', '_storys', '_warnings')

    def __init__(self, feed, storys, warnings=None):
        self._feed = feed
        self._storys = storys
        self._warnings = warnings

    def __repr__(self):
        return '<{} url={!r} version={!r} title={!r} has {} storys>'.format(
            type(self).__name__,
Beispiel #6
0
FeedInfoSchemaFields = {
    k: FeedSchemaFields[k]
    for k in FeedInfoSchemaFieldNames
}
FeedInfoSchema = T.dict(
    **FeedInfoSchemaFields,
    status=T.str.default(FeedStatus.READY),
)

StoryOutputSchema = T.dict(**StoryOutputSchemaFields)
FeedOutputSchema = T.dict(
    **FeedOutputSchemaFields,
    storys=T.list(StoryOutputSchema),
)

validate_feed_output = compiler.compile(FeedOutputSchema)


@actor('harbor_rss.update_feed_creation_status')
@django_context
def do_update_feed_creation_status(
    ctx: ActorContext,
    feed_creation_id: T.int,
    status: T.str,
):
    with transaction.atomic():
        FeedCreation.objects.filter(pk=feed_creation_id).update(status=status)


@actor('harbor_rss.save_feed_creation_result')
@django_context
Beispiel #7
0
        link=T.str.optional.desc(
            "The URL which the feed image would point to"),
    ).optional.desc("Details about the feed image. A feed image "
                    "can be a logo, banner, or a picture of the author"),
    generator=T.str.optional.desc(
        "A human-readable name of the application used to generate the feed"),
    generator_detail=T.dict(
        name=T.str.optional.desc("Same as feed.generator"),
        href=T.str.optional.desc(
            "The URL of the application used to generate the feed"),
        version=T.str.optional.desc(
            "The version number of the application used to generate the feed"),
    ).optional.desc("Details about the feed generator"),
)

# user feed count limit should less than below number
IMPORT_ITEMS_LIMIT = 20000

OPMLSchema = T.dict(title=T.str.optional,
                    items=T.list(
                        T.dict(
                            url=T.url.optional.invalid_to_default,
                            title=T.str.maxlen(200).strip.truncated.optional,
                            type=T.str.maxlen(50).strip.truncated.optional,
                            group=T.str.maxlen(50).strip.truncated.optional,
                        )).maxlen(IMPORT_ITEMS_LIMIT))

validate_feed = compiler.compile(FeedSchema)
validate_story = compiler.compile(StorySchema)
validate_opml = compiler.compile(OPMLSchema)
Beispiel #8
0
    return text[:max(0, width - len(placeholder))] + placeholder


def unsafe_kill_thread(thread_id):
    # https://www.geeksforgeeks.org/python-different-ways-to-kill-a-thread/
    if thread_id is None:
        return False
    res = ctypes.pythonapi.PyThreadState_SetAsyncExc(
        thread_id, ctypes.py_object(SystemExit))
    if res > 1:
        ctypes.pythonapi.PyThreadState_SetAsyncExc(thread_id, 0)
        LOG.error(f'kill thread#{thread_id} failed')
    return res <= 0


parse_actor_timer = internal_schema_compiler.compile(
    T.interval.min('1s').max('24h'))


def _get_function_name(fn):
    mod_name = getattr(fn, '__module__', None)
    name = getattr(fn, '__qualname__', None)
    if not name:
        name = getattr(fn, '__name__', None)
    if mod_name:
        return f'{mod_name}.{name}'
    else:
        return name


def auto_restart_when_crash(fn):
    fn_name = _get_function_name(fn)
Beispiel #9
0
import json
import gzip
import datetime
import struct

from validr import T
import lz4.frame as lz4

from rssant_common.validator import compiler

_dump_datetime = compiler.compile(T.datetime)


def _json_default(obj):
    if isinstance(obj, (datetime.date, datetime.datetime)):
        return _dump_datetime(obj)
    raise TypeError("Type %s not serializable" % type(obj))


class StoryData:
    """
    http://quixdb.github.io/squash-benchmark/
    """

    VERSION_GZIP = 1
    VERSION_LZ4 = 2
    VERSION_RAW = 3

    __slots__ = ('_value', '_version')

    def __init__(self, value: bytes, version: int = None):
Beispiel #10
0
>>> encoded = encode_image_url(url, referer)
>>> decoded = decode_image_url(encoded)
>>> decoded['url'] == url
True
>>> decoded['referer'] == referer
True
"""
import base64
import json
import brotli
from validr import T, Invalid
from rssant_common.validator import compiler

validate_image_url = compiler.compile(
    T.dict(
        url=T.url,
        referer=T.url.optional,
    ))


class ImageUrlEncodeError(Exception):
    """ImageUrlEncodeError"""


class ImageUrlDecodeError(Exception):
    """ImageUrlDecodeError"""


def encode_image_url(url, referer=None):
    try:
        text = json.dumps(validate_image_url(dict(url=url, referer=referer)))
Beispiel #11
0
import time
import base64
import json
import hmac
import brotli
from validr import T, Invalid
from rssant_common.validator import compiler

validate_image_token = compiler.compile(
    T.dict(
        timestamp=T.int,
        referrer=T.url.optional,
    ))


class ImageTokenEncodeError(Exception):
    """ImageTokenEncodeError"""


class ImageTokenDecodeError(Exception):
    """ImageTokenDecodeError"""


class ImageTokenExpiredError(ImageTokenDecodeError):
    """ImageTokenExpiredError"""


class ImageToken:
    def __init__(self, *, referrer: str = None, timestamp: int = None):
        self.referrer = (referrer or '')[:255]
        self.timestamp = timestamp or int(time.time())
Beispiel #12
0
    return text[:max(0, width - len(placeholder))] + placeholder


def unsafe_kill_thread(thread_id):
    # https://www.geeksforgeeks.org/python-different-ways-to-kill-a-thread/
    if thread_id is None:
        return False
    res = ctypes.pythonapi.PyThreadState_SetAsyncExc(
        thread_id, ctypes.py_object(SystemExit))
    if res > 1:
        ctypes.pythonapi.PyThreadState_SetAsyncExc(thread_id, 0)
        LOG.error(f'kill thread#{thread_id} failed')
    return res <= 0


parse_actor_timer = internal_schema_compiler.compile(
    T.timedelta.min('1s').max('24h').object)


def _get_function_name(fn):
    mod_name = getattr(fn, '__module__', None)
    name = getattr(fn, '__qualname__', None)
    if not name:
        name = getattr(fn, '__name__', None)
    if mod_name:
        return f'{mod_name}.{name}'
    else:
        return name


def auto_restart_when_crash(fn):
    fn_name = _get_function_name(fn)