import yarl import listparser from validr import T, Invalid from rssant_common.helper import coerce_url from rssant_common.validator import compiler from rssant_common.blacklist import compile_url_blacklist from .schema import validate_opml, IMPORT_ITEMS_LIMIT from .helper import RE_URL LOG = logging.getLogger(__name__) _RE_OPML_FILENAME = re.compile(r'^.*\.(opml|xml)$', re.I) _validate_url = compiler.compile(T.url) _BLACKLIST_CONTENT = """ youtube.com facebook.com amazon.com wikipedia.org twitter.com vk.com instagram.com live.com tmall.com baidu.com taobao.com jd.com
unique_id=T.str, title=T.str, content_hash_base64=T.str.optional, author=T.str.optional, link=T.str.optional, image_url=T.url.optional, iframe_url=T.url.optional, audio_url=T.url.optional, has_mathjax=T.bool.optional, dt_published=T.datetime.object.optional.invalid_to_default, dt_updated=T.datetime.object.optional, summary=T.str.optional, content=T.str.optional, ) validate_story = compiler.compile(StorySchema) @pytest.mark.dbtest class StoryTestCase(TransactionTestCase): def setUp(self): print('setUp') storys = [] updated_storys = [] now = timezone.datetime(2020, 6, 1, 12, 12, 12, tzinfo=timezone.utc) for i in range(200): dt = now + timezone.timedelta(minutes=i) content = f'test story content {i}' * (i % 5) content_hash_base64 = compute_hash_base64(content) summary = content[:30] story = {
link=T.url.optional, author=T.str.optional, icon=T.str.optional, description=T.str.optional, version=T.str.optional, dt_updated=T.datetime.optional, encoding=T.str.optional, etag=T.str.optional, last_modified=T.str.optional, response_status=T.int.optional, checksum_data=T.bytes.maxlen(4096).optional, warnings=T.str.optional, storys=T.list, ) _validate_feed = compiler.compile(FeedSchema) _validate_story = compiler.compile(StorySchema) def validate_feed(feed): feed_info = feed.get('url') or feed.get('link') or feed.get('title') try: feed_data = _validate_feed(feed) except Invalid as ex: ex.args = (f'{ex.args[0]}, feed={feed_info}', *ex.args[1:]) raise storys = [] for story in feed_data['storys']: try: story = _validate_story(story) except Invalid as ex:
import os.path from dotenv import load_dotenv from validr import T, modelclass, fields, Invalid from rssant_common.validator import compiler from actorlib.network_helper import LOCAL_NODE_NAME validate_extra_networks = compiler.compile(T.list(T.dict( name=T.str, url=T.url.relaxed, ))) @modelclass(compiler=compiler) class EnvConfig: debug = T.bool.default(True).desc('debug') log_level = T.enum('DEBUG,INFO,WARNING,ERROR').default('INFO') root_url = T.url.relaxed.default('http://*****:*****@url,name@url') secret_key = T.str.default('8k1v_4#kv4+3qu1=ulp+@@#65&++!fl1(e*7)ew&nv!)cq%e2y') allow_private_address = T.bool.default(False) check_feed_minutes = T.int.min(1).default(30) feed_story_retention = T.int.min(1).default(5000).desc('max storys to keep per feed') # actor actor_storage_path = T.str.default('data/actor_storage') actor_storage_compact_wal_delta = T.int.min(1).default(5000) actor_queue_max_complete_size = T.int.min(0).default(500)
ident=T.str, title=T.str, url=T.str.optional, content=T.str.maxlen(_MAX_CONTENT_LENGTH).optional, summary=T.str.maxlen(_MAX_SUMMARY_LENGTH).optional, image_url=T.str.optional, audio_url=T.str.optional, dt_published=T.datetime.object.optional, dt_updated=T.datetime.object.optional, author_name=T.str.optional, author_url=T.str.optional, author_avatar_url=T.str.optional, ) validate_raw_feed = compiler.compile(RawFeedSchema) validate_raw_story = compiler.compile(RawStorySchema) class RawFeedResult: __slots__ = ('_feed', '_storys', '_warnings') def __init__(self, feed, storys, warnings=None): self._feed = feed self._storys = storys self._warnings = warnings def __repr__(self): return '<{} url={!r} version={!r} title={!r} has {} storys>'.format( type(self).__name__,
FeedInfoSchemaFields = { k: FeedSchemaFields[k] for k in FeedInfoSchemaFieldNames } FeedInfoSchema = T.dict( **FeedInfoSchemaFields, status=T.str.default(FeedStatus.READY), ) StoryOutputSchema = T.dict(**StoryOutputSchemaFields) FeedOutputSchema = T.dict( **FeedOutputSchemaFields, storys=T.list(StoryOutputSchema), ) validate_feed_output = compiler.compile(FeedOutputSchema) @actor('harbor_rss.update_feed_creation_status') @django_context def do_update_feed_creation_status( ctx: ActorContext, feed_creation_id: T.int, status: T.str, ): with transaction.atomic(): FeedCreation.objects.filter(pk=feed_creation_id).update(status=status) @actor('harbor_rss.save_feed_creation_result') @django_context
link=T.str.optional.desc( "The URL which the feed image would point to"), ).optional.desc("Details about the feed image. A feed image " "can be a logo, banner, or a picture of the author"), generator=T.str.optional.desc( "A human-readable name of the application used to generate the feed"), generator_detail=T.dict( name=T.str.optional.desc("Same as feed.generator"), href=T.str.optional.desc( "The URL of the application used to generate the feed"), version=T.str.optional.desc( "The version number of the application used to generate the feed"), ).optional.desc("Details about the feed generator"), ) # user feed count limit should less than below number IMPORT_ITEMS_LIMIT = 20000 OPMLSchema = T.dict(title=T.str.optional, items=T.list( T.dict( url=T.url.optional.invalid_to_default, title=T.str.maxlen(200).strip.truncated.optional, type=T.str.maxlen(50).strip.truncated.optional, group=T.str.maxlen(50).strip.truncated.optional, )).maxlen(IMPORT_ITEMS_LIMIT)) validate_feed = compiler.compile(FeedSchema) validate_story = compiler.compile(StorySchema) validate_opml = compiler.compile(OPMLSchema)
return text[:max(0, width - len(placeholder))] + placeholder def unsafe_kill_thread(thread_id): # https://www.geeksforgeeks.org/python-different-ways-to-kill-a-thread/ if thread_id is None: return False res = ctypes.pythonapi.PyThreadState_SetAsyncExc( thread_id, ctypes.py_object(SystemExit)) if res > 1: ctypes.pythonapi.PyThreadState_SetAsyncExc(thread_id, 0) LOG.error(f'kill thread#{thread_id} failed') return res <= 0 parse_actor_timer = internal_schema_compiler.compile( T.interval.min('1s').max('24h')) def _get_function_name(fn): mod_name = getattr(fn, '__module__', None) name = getattr(fn, '__qualname__', None) if not name: name = getattr(fn, '__name__', None) if mod_name: return f'{mod_name}.{name}' else: return name def auto_restart_when_crash(fn): fn_name = _get_function_name(fn)
import json import gzip import datetime import struct from validr import T import lz4.frame as lz4 from rssant_common.validator import compiler _dump_datetime = compiler.compile(T.datetime) def _json_default(obj): if isinstance(obj, (datetime.date, datetime.datetime)): return _dump_datetime(obj) raise TypeError("Type %s not serializable" % type(obj)) class StoryData: """ http://quixdb.github.io/squash-benchmark/ """ VERSION_GZIP = 1 VERSION_LZ4 = 2 VERSION_RAW = 3 __slots__ = ('_value', '_version') def __init__(self, value: bytes, version: int = None):
>>> encoded = encode_image_url(url, referer) >>> decoded = decode_image_url(encoded) >>> decoded['url'] == url True >>> decoded['referer'] == referer True """ import base64 import json import brotli from validr import T, Invalid from rssant_common.validator import compiler validate_image_url = compiler.compile( T.dict( url=T.url, referer=T.url.optional, )) class ImageUrlEncodeError(Exception): """ImageUrlEncodeError""" class ImageUrlDecodeError(Exception): """ImageUrlDecodeError""" def encode_image_url(url, referer=None): try: text = json.dumps(validate_image_url(dict(url=url, referer=referer)))
import time import base64 import json import hmac import brotli from validr import T, Invalid from rssant_common.validator import compiler validate_image_token = compiler.compile( T.dict( timestamp=T.int, referrer=T.url.optional, )) class ImageTokenEncodeError(Exception): """ImageTokenEncodeError""" class ImageTokenDecodeError(Exception): """ImageTokenDecodeError""" class ImageTokenExpiredError(ImageTokenDecodeError): """ImageTokenExpiredError""" class ImageToken: def __init__(self, *, referrer: str = None, timestamp: int = None): self.referrer = (referrer or '')[:255] self.timestamp = timestamp or int(time.time())
return text[:max(0, width - len(placeholder))] + placeholder def unsafe_kill_thread(thread_id): # https://www.geeksforgeeks.org/python-different-ways-to-kill-a-thread/ if thread_id is None: return False res = ctypes.pythonapi.PyThreadState_SetAsyncExc( thread_id, ctypes.py_object(SystemExit)) if res > 1: ctypes.pythonapi.PyThreadState_SetAsyncExc(thread_id, 0) LOG.error(f'kill thread#{thread_id} failed') return res <= 0 parse_actor_timer = internal_schema_compiler.compile( T.timedelta.min('1s').max('24h').object) def _get_function_name(fn): mod_name = getattr(fn, '__module__', None) name = getattr(fn, '__qualname__', None) if not name: name = getattr(fn, '__name__', None) if mod_name: return f'{mod_name}.{name}' else: return name def auto_restart_when_crash(fn): fn_name = _get_function_name(fn)