Example #1
0
def category_entries(category_id):
    cursor = Cursor(category_id)
    generator = None
    url_token, entry_after, read, starred = get_optional_args()
    if url_token:
        try:
            generator = get_entry_generator(url_token)
        except IteratorNotFound:
            pass
    else:
        url_token = text_type(now())
    if not generator:
        subscriptions = cursor.recursive_subscriptions
        generator = CategoryEntryGenerator()
        if entry_after:
            id_after, time_after = decode_entry_after(entry_after)
        else:
            time_after = None
            id_after = None
        for subscription in subscriptions:
            try:
                with get_stage() as stage:
                    feed = stage.feeds[subscription.feed_id]
            except KeyError:
                continue
            feed_title = text_type(feed.title)
            it = iter(feed.entries)
            feed_permalink = get_permalink(feed)
            child = FeedEntryGenerator(category_id, subscription.feed_id,
                                       feed_title, feed_permalink, it, now(),
                                       read, starred)
            generator.add(child)
        generator.set_generators(id_after, time_after)
    save_entry_generators(url_token, generator)
    tidy_generators_up()
    entries = generator.get_entries()
    if not entries or len(entries) < app.config['PAGE_SIZE']:
        next_url = None
        if not entries:
            remove_entry_generator(url_token)
    else:
        next_url = make_next_url(
            category_id, url_token,
            encode_entry_after(entries[-1]['entry_id'],
                               entries[-1]['updated']), read, starred)

    # FIXME: use Entry.updated_at instead of from json data.
    codec = Rfc3339()
    last_updated_at = ''
    if len(entries) and not entry_after:
        last_updated_at = max(codec.decode(x['updated'])
                              for x in entries).isoformat()
    return jsonify(title=category_id.split('/')[-1][1:]
                   or app.config['ALLFEED'],
                   entries=entries,
                   read_url=url_for('read_all_entries',
                                    category_id=category_id,
                                    last_updated=last_updated_at,
                                    _external=True),
                   next_url=next_url)
Example #2
0
File: app.py Project: klutzy/web
def feed_entries(category_id, feed_id):
    stage = get_stage()
    Cursor(category_id)
    try:
        with stage:
            feed = stage.feeds[feed_id]
    except KeyError:
        r = jsonify(
            error='feed-not-found',
            message='Given feed does not exist'
        )
        r.status_code = 404
        return r
    url_token, entry_after, read, starred = get_optional_args()
    generator = None
    if url_token:
        try:
            generator = get_entry_generator(url_token)
        except IteratorNotFound:
            pass
    else:
        url_token = str(now())
    if not generator:
        it = iter(feed.entries)
        feed_title = clean_html(str(feed.title))
        feed_permalink = get_permalink(feed)
        generator = FeedEntryGenerator(category_id, feed_id, feed_title,
                                       feed_permalink, it, now(), read, starred)
        try:
            generator.set_iterator(entry_after)
        except StopIteration:
            return jsonify(
                title=generator.feed_title,
                entries=[],
                next_url=None
            )
    save_entry_generators(url_token, generator)
    tidy_generators_up()
    entries = generator.get_entries()
    if len(entries) < app.config['PAGE_SIZE']:
        next_url = None
        if not entries:
            remove_entry_generator(url_token)
    else:
        next_url = make_next_url(
            category_id,
            url_token,
            entries[-1]['entry_id'],
            read,
            starred,
            feed_id
        )
    return jsonify(
        title=clean_html(str(feed.title)),
        entries=entries,
        next_url=next_url
    )
Example #3
0
def test_session_revise():
    doc = TestMergeableDoc()
    min_updated_at = now()
    session = Session()
    session.revise(doc)
    assert isinstance(doc.__revision__, Revision)
    assert doc.__revision__.session is session
    assert min_updated_at <= doc.__revision__.updated_at <= now()
    time.sleep(0.1)
    min_updated_at = now()
    session.revise(doc)
    assert min_updated_at <= doc.__revision__.updated_at <= now()
Example #4
0
def test_stage_write(fx_repo, fx_session, fx_stage):
    doc = TestDoc()
    min_ts = now()
    with fx_stage:
        wdoc = fx_stage.write(['doc.{0}.xml'.format(fx_session.identifier)],
                              doc)
    assert wdoc.__revision__.session is fx_session
    assert min_ts <= wdoc.__revision__.updated_at <= now()
    xml = fx_repo.data['doc.{0}.xml'.format(fx_session.identifier)]
    read_doc = read(TestDoc, [xml])
    assert isinstance(read_doc, TestDoc)
    assert read_doc.__revision__ == wdoc.__revision__
Example #5
0
def test_session_revise():
    doc = TestMergeableDoc()
    min_updated_at = now()
    session = Session()
    session.revise(doc)
    assert isinstance(doc.__revision__, Revision)
    assert doc.__revision__.session is session
    assert min_updated_at <= doc.__revision__.updated_at <= now()
    time.sleep(0.1)
    min_updated_at = now()
    session.revise(doc)
    assert min_updated_at <= doc.__revision__.updated_at <= now()
Example #6
0
File: app.py Project: klutzy/web
def category_entries(category_id):
    cursor = Cursor(category_id)
    generator = None
    url_token, entry_after, read, starred = get_optional_args()
    if url_token:
        try:
            generator = get_entry_generator(url_token)
        except IteratorNotFound:
            pass
    else:
        url_token = str(now())
    if not generator:
        subscriptions = cursor.recursive_subscriptions
        generator = CategoryEntryGenerator()
        if entry_after:
            id_after, time_after = decode_entry_after(entry_after)
        else:
            time_after = None
            id_after = None
        for subscription in subscriptions:
            try:
                with get_stage() as stage:
                    feed = stage.feeds[subscription.feed_id]
            except KeyError:
                continue
            feed_id = get_hash(feed.id)
            feed_title = clean_html(str(feed.title))
            it = iter(feed.entries)
            feed_permalink = get_permalink(feed)
            child = FeedEntryGenerator(category_id, feed_id, feed_title,
                                       feed_permalink, it, now(), read, starred)
            generator.add(child)
        generator.set_generators(id_after, time_after)
    save_entry_generators(url_token, generator)
    tidy_generators_up()
    entries = generator.get_entries()
    if not entries or len(entries) < app.config['PAGE_SIZE']:
        next_url = None
        if not entries:
            remove_entry_generator(url_token)
    else:
        next_url = make_next_url(
            category_id,
            url_token,
            encode_entry_after(entries[-1]['entry_id'], entries[-1]['updated']),
            read,
            starred
        )
    return jsonify(
        title=category_id.split('/')[-1][1:] or app.config['ALLFEED'],
        entries=entries,
        next_url=next_url
    )
Example #7
0
def test_stage_write(fx_repo, fx_session, fx_stage):
    doc = TestDoc()
    min_ts = now()
    with fx_stage:
        wdoc = fx_stage.write(['doc.{0}.xml'.format(fx_session.identifier)],
                              doc)
    assert wdoc.__revision__.session is fx_session
    assert min_ts <= wdoc.__revision__.updated_at <= now()
    xml = fx_repo.data['doc.{0}.xml'.format(fx_session.identifier)]
    read_doc = read(TestDoc, [xml])
    assert isinstance(read_doc, TestDoc)
    assert read_doc.__revision__ == wdoc.__revision__
Example #8
0
def test_revision():
    session = Session()
    updated_at = now()
    rev = Revision(session, updated_at)
    assert rev == (session, updated_at)
    assert rev[0] is rev.session is session
    assert rev[1] == rev.updated_at == updated_at
Example #9
0
def test_revision():
    session = Session()
    updated_at = now()
    rev = Revision(session, updated_at)
    assert rev == (session, updated_at)
    assert rev[0] is rev.session is session
    assert rev[1] == rev.updated_at == updated_at
Example #10
0
def tidy_generators_up():
    global entry_generators
    generators = []
    for key, (it, time_saved) in entry_generators.items():
        if time_saved >= now() - datetime.timedelta(minutes=30):
            generators.append((key, (it, time_saved)))
    generators = sorted(generators, key=lambda generator: generator[1][1],
                        reverse=True)
    entry_generators = dict(generators[:10])
Example #11
0
def tidy_generators_up():
    global entry_generators
    generators = []
    for key, (it, time_saved) in entry_generators.items():
        if time_saved >= now() - datetime.timedelta(minutes=30):
            generators.append((key, (it, time_saved)))
    generators = sorted(generators, key=lambda generator: generator[1][1],
                        reverse=True)
    entry_generators = dict(generators[:10])
Example #12
0
def test_revision_set_contains(fx_revision_set):
    assert not fx_revision_set.contains(Revision(Session('key0'), now()))
    assert not fx_revision_set.contains(
        Revision(Session('key1'),
                 datetime.datetime(2013, 9, 27, 16, 54, 50, tzinfo=utc)))
    assert fx_revision_set.contains(
        Revision(Session('key1'),
                 datetime.datetime(2013, 9, 22, 16, 58, 57, tzinfo=utc)))
    assert fx_revision_set.contains(
        Revision(Session('key1'),
                 datetime.datetime(2012, 9, 22, 16, 58, 57, tzinfo=utc)))
    assert not fx_revision_set.contains(
        Revision(Session('key0'),
                 datetime.datetime(2012, 9, 22, 16, 58, 57, tzinfo=utc)))
Example #13
0
def test_revision_set_contains(fx_revision_set):
    assert not fx_revision_set.contains(Revision(Session('key0'), now()))
    assert not fx_revision_set.contains(
        Revision(Session('key1'),
                 datetime.datetime(2013, 9, 27, 16, 54, 50, tzinfo=utc))
    )
    assert fx_revision_set.contains(
        Revision(Session('key1'),
                 datetime.datetime(2013, 9, 22, 16, 58, 57, tzinfo=utc))
    )
    assert fx_revision_set.contains(
        Revision(Session('key1'),
                 datetime.datetime(2012, 9, 22, 16, 58, 57, tzinfo=utc))
    )
    assert not fx_revision_set.contains(
        Revision(Session('key0'),
                 datetime.datetime(2012, 9, 22, 16, 58, 57, tzinfo=utc))
    )
Example #14
0
def test_ensure_revision_pair():
    session = Session()
    updated_at = now()
    assert ensure_revision_pair((session, updated_at)) == (session, updated_at)
    pair = ensure_revision_pair((session, updated_at), force_cast=True)
    assert isinstance(pair, Revision)
    assert pair == (session, updated_at)
    with raises(TypeError):
        ensure_revision_pair(())
    with raises(TypeError):
        ensure_revision_pair((session,))
    with raises(TypeError):
        ensure_revision_pair((session, updated_at, 1))
    with raises(TypeError):
        ensure_revision_pair(session)
    with raises(TypeError):
        ensure_revision_pair((session, 1))
    with raises(TypeError):
        ensure_revision_pair((1, updated_at))
Example #15
0
def test_ensure_revision_pair():
    session = Session()
    updated_at = now()
    assert ensure_revision_pair((session, updated_at)) == (session, updated_at)
    pair = ensure_revision_pair((session, updated_at), force_cast=True)
    assert isinstance(pair, Revision)
    assert pair == (session, updated_at)
    with raises(TypeError):
        ensure_revision_pair(())
    with raises(TypeError):
        ensure_revision_pair((session, ))
    with raises(TypeError):
        ensure_revision_pair((session, updated_at, 1))
    with raises(TypeError):
        ensure_revision_pair(session)
    with raises(TypeError):
        ensure_revision_pair((session, 1))
    with raises(TypeError):
        ensure_revision_pair((1, updated_at))
Example #16
0
def category_entries(category_id):
    cursor = Cursor(category_id)
    generator = None
    url_token, entry_after, read, starred = get_optional_args()
    if url_token:
        try:
            generator = get_entry_generator(url_token)
        except IteratorNotFound:
            pass
    else:
        url_token = text_type(now())
    if not generator:
        subscriptions = cursor.recursive_subscriptions
        generator = CategoryEntryGenerator()
        if entry_after:
            id_after, time_after = entry_after.split('@')
        else:
            time_after = None
            id_after = None
        for subscription in subscriptions:
            try:
                with stage:
                    feed = stage.feeds[subscription.feed_id]
            except KeyError:
                continue
            feed_title = text_type(feed.title)
            it = iter(feed.entries)
            feed_permalink = get_permalink(feed)
            try:
                child = FeedEntryGenerator(category_id, subscription.feed_id,
                                           feed_title, feed_permalink, it,
                                           now(), read, starred)
            except StopIteration:
                continue
            generator.add(child)
        generator.set_generators(id_after, time_after)
    save_entry_generators(url_token, generator)
    tidy_generators_up()
    entries = generator.get_entries()
    if not entries or len(entries) < app.config['PAGE_SIZE']:
        next_url = None
        if not entries:
            remove_entry_generator(url_token)
    else:
        entry_after = entries[-1]['entry_id'] + '@' + entries[-1]['updated']
        next_url = make_next_url(category_id, url_token, entry_after, read,
                                 starred)

    # FIXME: use Entry.updated_at instead of from json data.
    codec = Rfc3339()
    last_updated_at = ''
    if len(entries) and not entry_after:
        last_updated_at = max(codec.decode(x['updated'])
                              for x in entries).isoformat()

    if worker.is_running():
        crawl_url = url_for('update_entries', category_id=category_id),
    else:
        crawl_url = None
    return jsonify(
        title=category_id.split('/')[-1][1:] or app.config['ALLFEED'],
        entries=entries,
        read_url=url_for('read_all_entries', category_id=category_id,
                         last_updated=last_updated_at,
                         _external=True),
        crawl_url=crawl_url,
        next_url=next_url
    )
Example #17
0
def feed_entries(category_id, feed_id):
    try:
        Cursor(category_id)
    except InvalidCategoryID:
        r = jsonify(
            error='category-id-invalid',
            message='Given category does not exist'
        )
        r.status_code = 404
        return r
    try:
        with stage:
            feed = stage.feeds[feed_id]
    except KeyError:
        r = jsonify(
            error='feed-not-found',
            message='Given feed does not exist'
        )
        r.status_code = 404
        return r
    if feed.__revision__:
        updated_at = feed.__revision__.updated_at
        if request.if_modified_since:
            if_modified_since = request.if_modified_since.replace(tzinfo=utc)
            last_modified = updated_at.replace(microsecond=0)
            if if_modified_since >= last_modified:
                return '', 304, {}  # Not Modified
    else:
        updated_at = None

    if worker.is_running():
        crawl_url = url_for('update_entries',
                            category_id=category_id,
                            feed_id=feed_id)
    else:
        crawl_url = None

    url_token, entry_after, read, starred = get_optional_args()
    generator = None
    if url_token:
        try:
            generator = get_entry_generator(url_token)
        except IteratorNotFound:
            pass
    else:
        url_token = text_type(now())
    if not generator:
        it = iter(feed.entries)
        feed_title = text_type(feed.title)
        feed_permalink = get_permalink(feed)
        try:
            generator = FeedEntryGenerator(category_id, feed_id, feed_title,
                                           feed_permalink, it, now(), read,
                                           starred)
            generator.set_iterator(entry_after)
        except StopIteration:
            return jsonify(
                title=feed_title,
                entries=[],
                next_url=None,
                read_url=url_for('read_all_entries',
                                 feed_id=feed_id,
                                 last_updated=(updated_at or
                                               now()).isoformat(),
                                 _external=True),
                crawl_url=crawl_url
            )
    save_entry_generators(url_token, generator)
    tidy_generators_up()
    entries = generator.get_entries()
    if len(entries) < app.config['PAGE_SIZE']:
        next_url = None
        if not entries:
            remove_entry_generator(url_token)
    else:
        next_url = make_next_url(
            category_id,
            url_token,
            entries[-1]['entry_id'],
            read,
            starred,
            feed_id
        )
    response = jsonify(
        title=text_type(feed.title),
        entries=entries,
        next_url=next_url,
        read_url=url_for('read_all_entries',
                         feed_id=feed_id,
                         last_updated=(updated_at or now()).isoformat(),
                         _external=True),
        crawl_url=crawl_url
    )
    if feed.__revision__:
        response.last_modified = updated_at
    return response
Example #18
0
def save_entry_generators(url_token, generator):
    entry_generators[url_token] = generator, now()
Example #19
0
def save_entry_generators(url_token, generator):
    entry_generators[url_token] = generator, now()
Example #20
0
def test_now():
    before = datetime.datetime.utcnow().replace(tzinfo=utc)
    actual = now()
    after = datetime.datetime.utcnow().replace(tzinfo=utc)
    assert before <= actual <= after
Example #21
0
def feed_entries(category_id, feed_id):
    stage = get_stage()
    Cursor(category_id)
    try:
        with stage:
            feed = stage.feeds[feed_id]
    except KeyError:
        r = jsonify(error='feed-not-found',
                    message='Given feed does not exist')
        r.status_code = 404
        return r
    if feed.__revision__:
        updated_at = feed.__revision__.updated_at
        if request.if_modified_since:
            if_modified_since = request.if_modified_since.replace(tzinfo=utc)
            last_modified = updated_at.replace(microsecond=0)
            if if_modified_since >= last_modified:
                return '', 304, {}  # Not Modified
    else:
        updated_at = None
    url_token, entry_after, read, starred = get_optional_args()
    generator = None
    if url_token:
        try:
            generator = get_entry_generator(url_token)
        except IteratorNotFound:
            pass
    else:
        url_token = text_type(now())
    if not generator:
        it = iter(feed.entries)
        feed_title = text_type(feed.title)
        feed_permalink = get_permalink(feed)
        generator = FeedEntryGenerator(category_id,
                                       feed_id, feed_title, feed_permalink, it,
                                       now(), read, starred)
        try:
            generator.set_iterator(entry_after)
        except StopIteration:
            return jsonify(title=generator.feed_title,
                           entries=[],
                           next_url=None,
                           read_url=url_for(
                               'read_all_entries',
                               feed_id=feed_id,
                               last_updated=(updated_at or now()).isoformat(),
                               _external=True))
    save_entry_generators(url_token, generator)
    tidy_generators_up()
    entries = generator.get_entries()
    if len(entries) < app.config['PAGE_SIZE']:
        next_url = None
        if not entries:
            remove_entry_generator(url_token)
    else:
        next_url = make_next_url(category_id, url_token,
                                 entries[-1]['entry_id'], read, starred,
                                 feed_id)
    response = jsonify(title=text_type(feed.title),
                       entries=entries,
                       next_url=next_url,
                       read_url=url_for('read_all_entries',
                                        feed_id=feed_id,
                                        last_updated=(updated_at
                                                      or now()).isoformat(),
                                        _external=True))
    if feed.__revision__:
        response.last_modified = updated_at
    return response