def fx_stages(): repo = MemoryRepository() session = Session('SESSID') stage = Stage(session, repo) other_session = Session('SESSID2') other_stage = Stage(other_session, repo) return stage, other_stage
def fx_revision_set(): dt = datetime.datetime return RevisionSet([ (Session('key1'), dt(2013, 9, 22, 16, 58, 57, tzinfo=utc)), (Session('key2'), dt(2013, 9, 22, 16, 59, 30, tzinfo=utc)), (Session('key3'), dt(2013, 9, 22, 17, 0, 30, tzinfo=utc)), (Session('key4'), dt(2013, 9, 22, 17, 10, 30, tzinfo=utc)) ])
def fx_stages(tmpdir): if IRON_PYTHON: repo = MemoryRepository() else: repo = FileSystemRepository(str(tmpdir)) session_a = Session(identifier='a') session_b = Session(identifier='b') stage_a = Stage(session_a, repo) stage_b = Stage(session_b, repo) return stage_a, stage_b
def test_session_revise(): doc = TestMergeableDoc() min_updated_at = now() session = Session() session.revise(doc) assert isinstance(doc.__revision__, Revision) assert doc.__revision__.session is session assert min_updated_at <= doc.__revision__.updated_at <= now() time.sleep(0.1) min_updated_at = now() session.revise(doc) assert min_updated_at <= doc.__revision__.updated_at <= now()
def test_revision_set_contains(fx_revision_set): assert not fx_revision_set.contains(Revision(Session('key0'), now())) assert not fx_revision_set.contains( Revision(Session('key1'), datetime.datetime(2013, 9, 27, 16, 54, 50, tzinfo=utc))) assert fx_revision_set.contains( Revision(Session('key1'), datetime.datetime(2013, 9, 22, 16, 58, 57, tzinfo=utc))) assert fx_revision_set.contains( Revision(Session('key1'), datetime.datetime(2012, 9, 22, 16, 58, 57, tzinfo=utc))) assert not fx_revision_set.contains( Revision(Session('key0'), datetime.datetime(2012, 9, 22, 16, 58, 57, tzinfo=utc)))
def test_session_pull(revised): s1 = Session('s1') s2 = Session('s2') a = TestMergeableDoc(multi_text=['a', 'b', 'c']) if revised: s1.revise(a) b = s2.pull(a) assert b is not a assert b.__revision__.session is s2 if revised: assert b.__revision__.updated_at == a.__revision__.updated_at assert b.multi_text == ['a', 'b', 'c'] assert a.multi_text is not b.multi_text if revised: assert a.__revision__.session is s1
def test_revision(): session = Session() updated_at = now() rev = Revision(session, updated_at) assert rev == (session, updated_at) assert rev[0] is rev.session is session assert rev[1] == rev.updated_at == updated_at
def get_stage(): try: return app.config['STAGE'] except KeyError: session_id = app.config['SESSION_ID'] if request.environ['wsgi.multiprocess']: # Stage doesn't offer safe synchronization between multiprocess. # Unique session identifiers are actually needed to distinguish # different "installations" which technically means "processes," # hence we append pid to the session identifier configured by # the user to make them unique. # Note that it probably causes N times more disk usage # where N = the number of processes. So we should discourage # using web servers of prefork/worker model in the docs. session_id = '{0}.{1}'.format(session_id, os.getpid()) session = Session(session_id) url = urlparse.urlparse(app.config['REPOSITORY']) if url.scheme == 'file': repository = FileSystemRepository( url.path, atomic=request.environ['wsgi.multithread']) else: repository = from_url(app.config['REPOSITORY']) stage = Stage(session, repository) app.config['STAGE'] = stage return stage
def test_intern(): """Session of the same identifier cannot be multiple.""" session = Session('id1') assert session is Session('id1') assert session is not Session('id2') assert session == Session('id1') assert session != Session('id2') assert hash(session) == hash(Session('id1')) assert hash(session) != hash(Session('id2'))
def test_revision_codec(): session = Session('test-identifier') updated_at = datetime.datetime(2013, 9, 22, 3, 43, 40, tzinfo=utc) rev = Revision(session, updated_at) codec = RevisionCodec() assert codec.encode(rev) == 'test-identifier 2013-09-22T03:43:40Z' assert codec.encode(tuple(rev)) == 'test-identifier 2013-09-22T03:43:40Z' decoded = codec.decode('test-identifier 2013-09-22T03:43:40Z') assert decoded == rev assert decoded.session is session assert decoded.updated_at == updated_at
def test_revision_set(fx_revision_set): assert isinstance(fx_revision_set, collections.Mapping) assert len(fx_revision_set) == 4 assert set(fx_revision_set) == set( [Session('key1'), Session('key2'), Session('key3'), Session('key4')]) assert (fx_revision_set[Session('key1')] == datetime.datetime(2013, 9, 22, 16, 58, 57, tzinfo=utc)) assert (fx_revision_set[Session('key2')] == datetime.datetime(2013, 9, 22, 16, 59, 30, tzinfo=utc)) for pair in fx_revision_set.items(): assert isinstance(pair, Revision) assert fx_revision_set assert not RevisionSet()
def crawl_command(args): repo = from_url(args.repository) session = Session(args.session_id) stage = Stage(session, repo) with stage: opml = stage.subscriptions if not opml: print('OPML does not exist in the repository', file=sys.stderr) return feed_id = args.feed_id if feed_id: feed_map = dict((sub.feed_uri, sub.feed_id) for sub in opml.recursive_subscriptions if sub.feed_id == feed_id) if not feed_map: print('There is no such feed:', feed_id, file=sys.stderr) return else: feed_map = dict((sub.feed_uri, sub.feed_id) for sub in opml.recursive_subscriptions) if not feed_map: print('No feeds to crawl', file=sys.stderr) return threads_count = args.threads if args.threads is not None else cpu_count() iterator = iter(crawl(feed_map.keys(), threads_count)) while 1: try: feed_url, feed_data, crawler_hints = next(iterator) if args.verbose: print('{0.title} - {1} entries'.format(feed_data, len(feed_data.entries))) with stage: feed_id = feed_map[feed_url] stage.feeds[feed_id] = feed_data except (CrawlError, SchemaError) as e: if isinstance(e, CrawlError): print('Something went wrong with', e.feed_uri, file=sys.stderr) if args.verbose: traceback.print_exc() else: print(e, file=sys.stderr) except StopIteration: break
def test_ensure_revision_pair(): session = Session() updated_at = now() assert ensure_revision_pair((session, updated_at)) == (session, updated_at) pair = ensure_revision_pair((session, updated_at), force_cast=True) assert isinstance(pair, Revision) assert pair == (session, updated_at) with raises(TypeError): ensure_revision_pair(()) with raises(TypeError): ensure_revision_pair((session, )) with raises(TypeError): ensure_revision_pair((session, updated_at, 1)) with raises(TypeError): ensure_revision_pair(session) with raises(TypeError): ensure_revision_pair((session, 1)) with raises(TypeError): ensure_revision_pair((1, updated_at))
def main(): """Entrypoint for OS X.""" root = tk.Tk() menubar = tk.Menu(root) filemenu = tk.Menu(menubar) filemenu.add_command(label="Open Browser", command=lambda: open_webbrowser(port)) menubar.add_cascade(label="File", menu=filemenu) root.config(menu=menubar) root.withdraw() directory = os.path.expanduser('~/.earthreader') repository = urllib.parse.urljoin('file://', directory) session_id = Session().identifier app.config.update(REPOSITORY=repository, SESSION_ID=session_id, USE_WORKER=True) server = create_server(app, port=0) port = server.effective_port proc = threading.Thread(target=server.run) proc.daemon = True proc.start() open_webbrowser(port) root.mainloop()
help="host to listen. [default: %(default)s]") server_parser.add_argument('-p', '--port', type=int, default=5000, help='port number to listen.' '[default: %(default)s]') server_parser.add_argument('-d', '--debug', default=False, action='store_true', help='debug mode. it makes the server possible to ' 'automatically restart when files touched.') server_parser.add_argument('-i', '--session-id', default=Session().identifier, help='session identifier. [default: %(default)s]') server_parser.add_argument('-P', '--profile', '--linesman', default=False, action='store_true', help="profile using linesman. it's available only " 'when linesman is installed') server_parser.add_argument('-w', '--no-worker', default=False, action='store_true', help='Disable worker thread that crawl feeds') server_parser.add_argument('repository', help='repository for Earth Reader')
def test_session_merge(): # s1 s2 # ------ # (1) a b (2) # | / | # (3) c b (4) # | \ | # | d (5) # | / # (5) e s1 = Session('s1') a = TestMergeableDoc( attr='a', text='a', multi_text=['a', 'b', 'c'], unique_entities=[ TestUniqueEntity(ident='a', value='s1-a'), TestUniqueEntity(ident='b', value='s1-b'), TestUniqueEntity(ident='c', value='s1-c') ], rev_entities=[ TestRevisedEntity(ident='a', value='s1-a', rev=2), TestRevisedEntity(ident='b', value='s1-b', rev=2), TestRevisedEntity(ident='c', value='s1-c', rev=2) ], rev_entity=TestRevisedEntity(ident='a', value='s1', rev=1) ) a_c = TestMergeableContentDoc(content='a') s1.revise(a) # (1) s1.revise(a_c) wait() s2 = Session('s2') b = TestMergeableDoc( attr='b', text='b', multi_text=['d', 'e', 'f'], unique_entities=[ TestUniqueEntity(ident='c', value='s2-c'), TestUniqueEntity(ident='d', value='s2-d'), TestUniqueEntity(ident='e', value='s2-e') ], rev_entities=[ TestRevisedEntity(ident='b', value='s2-b', rev=1), TestRevisedEntity(ident='c', value='s2-c', rev=3), TestRevisedEntity(ident='d', value='s2-d', rev=2) ], rev_entity=TestRevisedEntity(ident='a', value='s2', rev=2) ) b_c = TestMergeableContentDoc(content='b') s2.revise(b) # (2) s2.revise(b_c) wait() c = s1.merge(b, a) # (3) c_c = s1.merge(b_c, a_c) wait() assert c.__revision__.session is s1 assert c.__revision__.updated_at > a.__revision__.updated_at assert c.__base_revisions__ == RevisionSet([a.__revision__, b.__revision__]) print((c.attr, c.text, c_c.content)) assert c.attr == c.text == c_c.content == 'b' assert list(c.multi_text) == ['a', 'b', 'c', 'd', 'e', 'f'] assert ([entity.value for entity in c.unique_entities] == ['s1-a', 's1-b', 's2-c', 's2-d', 's2-e']) assert ([(e.value, e.rev) for e in c.rev_entities] == [('s1-a', 2), ('s1-b', 2), ('s2-c', 3), ('s2-d', 2)]) assert c.rev_entity.rev == 2 assert c.rev_entity.value == 's2' assert c.nullable is None c.nullable = TestUniqueEntity(ident='nullable', value='nullable') b.attr = b.text = b_c.content = 'd' b.multi_text.append('blah') b.unique_entities.append(TestUniqueEntity(ident='blah', value='s2-blah')) s2.revise(b) # (4) s2.revise(b_c) wait() assert list(b.multi_text) == ['d', 'e', 'f', 'blah'] assert ([entity.value for entity in b.unique_entities] == ['s2-c', 's2-d', 's2-e', 's2-blah']) d = s2.merge(b, c) # (5) d_c = s2.merge(b_c, c_c) wait() assert d.__revision__.session is s2 assert d.__revision__.updated_at >= c.__revision__.updated_at assert d.__base_revisions__ == RevisionSet([b.__revision__, c.__revision__]) assert d.attr == d.text == d_c.content == 'd' assert list(d.multi_text) == ['a', 'b', 'c', 'd', 'e', 'f', 'blah'] assert ([entity.value for entity in d.unique_entities] == ['s1-a', 's1-b', 's2-c', 's2-d', 's2-e', 's2-blah']) assert d.nullable is not None assert d.nullable.value == 'nullable' e = s1.merge(c, d) # (5) e_c = s1.merge(c_c, d_c) wait() assert e.__revision__.session is s1 assert e.__revision__.updated_at == d.__revision__.updated_at assert e.__base_revisions__ == d.__base_revisions__ assert e.attr == e.text == e_c.content == 'd' assert list(e.multi_text) == ['a', 'b', 'c', 'd', 'e', 'f', 'blah'] assert ([entity.value for entity in d.unique_entities] == ['s1-a', 's1-b', 's2-c', 's2-d', 's2-e', 's2-blah'])
def test_session_pull_same_session(): session = Session('s1') doc = TestMergeableDoc() session.revise(doc) assert session.pull(doc) is doc
def get_session(): session_id = 'ergae-{0}'.format(get_application_id()) return Session(session_id)
def fx_session(): return Session(identifier='SESSID')
def fx_other_session(): return Session(identifier='SESSID2')
e_c = s1.merge(c_c, d_c) wait() assert e.__revision__.session is s1 assert e.__revision__.updated_at == d.__revision__.updated_at assert e.__base_revisions__ == d.__base_revisions__ assert e.attr == e.text == e_c.content == 'd' assert list(e.multi_text) == ['a', 'b', 'c', 'd', 'e', 'f', 'blah'] assert ([entity.value for entity in d.unique_entities ] == ['s1-a', 's1-b', 's2-c', 's2-d', 's2-e', 's2-blah']) @mark.parametrize(('iterable', 'rv'), [ ([ '<doc ', 'xmlns:s="', SESSION_XMLNS, '" s:revision="test 2013-09-22T03:43:40Z" ', 's:bases="" ', '/>' ], (Revision(Session('test'), datetime.datetime(2013, 9, 22, 3, 43, 40, tzinfo=utc)), RevisionSet())), ([ '<doc ', 'xmlns:s="', SESSION_XMLNS, '" s:revision="test 2013-09-22T03:43:40Z" ', 's:bases="">', '<a />', '</doc>' ], (Revision(Session('test'), datetime.datetime(2013, 9, 22, 3, 43, 40, tzinfo=utc)), RevisionSet())), ([ '<doc ', 'xmlns:s="', SESSION_XMLNS, '" s:revision="test 2013-09-22T03:43:40Z" ', 's:bases=""><a /></doc>' ], (Revision(Session('test'), datetime.datetime(2013, 9, 22, 3, 43, 40, tzinfo=utc)), RevisionSet())),
def test_invalid_identifier(): with raises(ValueError): Session('i n v a l i d') with raises(ValueError): Session('i*n*v*a*l*i*d') with raises(ValueError): Session('i+n+v+a+l+i+d') with raises(ValueError): Session('i/n/v/a/l/i/d') with raises(ValueError): Session('i\nn\nv\na\nl\ni\nd') with raises(ValueError): Session('i\tn\tv\ta\tl\ti\td') with raises(ValueError): Session('(invalid)') Session('valid') Session('v-a-l-i-d') Session('v.a.l.i.d') Session('v_a_l_i_d') Session('v1a2l3i4d') Session('v-a.l_i4d')
def test_default_identifier(): assert Session().identifier != '' assert Session().identifier is not None assert Session().identifier != Session.identifier
def test_session_merge(): # s1 s2 # ------ # (1) a b (2) # | / | # (3) c b (4) # | \ | # | d (5) # | / # (5) e s1 = Session('s1') a = TestMergeableDoc(attr='a', text='a', multi_text=['a', 'b', 'c'], unique_entities=[ TestUniqueEntity(ident='a', value='s1-a'), TestUniqueEntity(ident='b', value='s1-b'), TestUniqueEntity(ident='c', value='s1-c') ], rev_entities=[ TestRevisedEntity(ident='a', value='s1-a', rev=2), TestRevisedEntity(ident='b', value='s1-b', rev=2), TestRevisedEntity(ident='c', value='s1-c', rev=2) ], rev_entity=TestRevisedEntity(ident='a', value='s1', rev=1)) a_c = TestMergeableContentDoc(content='a') s1.revise(a) # (1) s1.revise(a_c) wait() s2 = Session('s2') b = TestMergeableDoc(attr='b', text='b', multi_text=['d', 'e', 'f'], unique_entities=[ TestUniqueEntity(ident='c', value='s2-c'), TestUniqueEntity(ident='d', value='s2-d'), TestUniqueEntity(ident='e', value='s2-e') ], rev_entities=[ TestRevisedEntity(ident='b', value='s2-b', rev=1), TestRevisedEntity(ident='c', value='s2-c', rev=3), TestRevisedEntity(ident='d', value='s2-d', rev=2) ], rev_entity=TestRevisedEntity(ident='a', value='s2', rev=2)) b_c = TestMergeableContentDoc(content='b') s2.revise(b) # (2) s2.revise(b_c) wait() c = s1.merge(b, a) # (3) c_c = s1.merge(b_c, a_c) wait() assert c.__revision__.session is s1 assert c.__revision__.updated_at > a.__revision__.updated_at assert c.__base_revisions__ == RevisionSet( [a.__revision__, b.__revision__]) print((c.attr, c.text, c_c.content)) assert c.attr == c.text == c_c.content == 'b' assert list(c.multi_text) == ['a', 'b', 'c', 'd', 'e', 'f'] assert ([entity.value for entity in c.unique_entities ] == ['s1-a', 's1-b', 's2-c', 's2-d', 's2-e']) assert ([(e.value, e.rev) for e in c.rev_entities] == [('s1-a', 2), ('s1-b', 2), ('s2-c', 3), ('s2-d', 2)]) assert c.rev_entity.rev == 2 assert c.rev_entity.value == 's2' assert c.nullable is None c.nullable = TestUniqueEntity(ident='nullable', value='nullable') b.attr = b.text = b_c.content = 'd' b.multi_text.append('blah') b.unique_entities.append(TestUniqueEntity(ident='blah', value='s2-blah')) s2.revise(b) # (4) s2.revise(b_c) wait() assert list(b.multi_text) == ['d', 'e', 'f', 'blah'] assert ([entity.value for entity in b.unique_entities ] == ['s2-c', 's2-d', 's2-e', 's2-blah']) d = s2.merge(b, c) # (5) d_c = s2.merge(b_c, c_c) wait() assert d.__revision__.session is s2 assert d.__revision__.updated_at >= c.__revision__.updated_at assert d.__base_revisions__ == RevisionSet( [b.__revision__, c.__revision__]) assert d.attr == d.text == d_c.content == 'd' assert list(d.multi_text) == ['a', 'b', 'c', 'd', 'e', 'f', 'blah'] assert ([entity.value for entity in d.unique_entities ] == ['s1-a', 's1-b', 's2-c', 's2-d', 's2-e', 's2-blah']) assert d.nullable is not None assert d.nullable.value == 'nullable' e = s1.merge(c, d) # (5) e_c = s1.merge(c_c, d_c) wait() assert e.__revision__.session is s1 assert e.__revision__.updated_at == d.__revision__.updated_at assert e.__base_revisions__ == d.__base_revisions__ assert e.attr == e.text == e_c.content == 'd' assert list(e.multi_text) == ['a', 'b', 'c', 'd', 'e', 'f', 'blah'] assert ([entity.value for entity in d.unique_entities ] == ['s1-a', 's1-b', 's2-c', 's2-d', 's2-e', 's2-blah'])
def fx_test_stage(tmpdir): session = Session() repo = FileSystemRepository(str(tmpdir)) stage = Stage(session, repo) app.config['STAGE'] = stage return stage
def test_revision_set_merge(fx_revision_set): dt = datetime.datetime initial = fx_revision_set.copy() with raises(TypeError): fx_revision_set.merge() with raises(TypeError): fx_revision_set.merge(fx_revision_set, []) assert fx_revision_set.merge(fx_revision_set) == fx_revision_set assert fx_revision_set == initial session_a = Session() session_b = Session() merged = fx_revision_set.merge( RevisionSet([(Session('key1'), dt(2013, 9, 23, 18, 40, 48, tzinfo=utc)), (Session('key2'), dt(2012, 9, 23, 18, 40, 48, tzinfo=utc)), (session_a, dt(2013, 9, 23, 18, 40, 48, tzinfo=utc)), (session_b, dt(2013, 9, 23, 18, 41, 00, tzinfo=utc))])) assert merged == RevisionSet([ (Session('key1'), dt(2013, 9, 23, 18, 40, 48, tzinfo=utc)), (Session('key2'), dt(2013, 9, 22, 16, 59, 30, tzinfo=utc)), (Session('key3'), dt(2013, 9, 22, 17, 0, 30, tzinfo=utc)), (Session('key4'), dt(2013, 9, 22, 17, 10, 30, tzinfo=utc)), (session_a, dt(2013, 9, 23, 18, 40, 48, tzinfo=utc)), (session_b, dt(2013, 9, 23, 18, 41, 00, tzinfo=utc)) ]) assert fx_revision_set == initial merged = fx_revision_set.merge( RevisionSet([(Session('key1'), dt(2013, 9, 23, 18, 40, 48, tzinfo=utc)), (Session('key2'), dt(2012, 9, 23, 18, 40, 48, tzinfo=utc)), (session_a, dt(2013, 9, 23, 18, 40, 48, tzinfo=utc))]), RevisionSet([(Session('key3'), dt(2012, 9, 22, 17, 0, 30, tzinfo=utc)), (Session('key4'), dt(2013, 9, 23, 19, 10, 30, tzinfo=utc)), (session_a, dt(2013, 9, 23, 19, 8, 47, tzinfo=utc))])) assert merged == RevisionSet([ (Session('key1'), dt(2013, 9, 23, 18, 40, 48, tzinfo=utc)), (Session('key2'), dt(2013, 9, 22, 16, 59, 30, tzinfo=utc)), (Session('key3'), dt(2013, 9, 22, 17, 0, 30, tzinfo=utc)), (Session('key4'), dt(2013, 9, 23, 19, 10, 30, tzinfo=utc)), (session_a, dt(2013, 9, 23, 19, 8, 47, tzinfo=utc)) ])
from libearth.session import Session from waitress.server import create_server def serve(): server.run() def open_webbrowser(port): webbrowser.open('http://0.0.0.0:{}'.format(port)) if __name__ == "__main__": directory = os.path.expanduser('~/.earthreader') repository = urlparse.urljoin('file://', directory) session_id = Session().identifier app.config.update(REPOSITORY=repository, SESSION_ID=session_id) server = create_server(app, port=0) port = server.effective_port spawn_worker() proc = threading.Thread(target=serve) proc.daemon = True proc.start() open_webbrowser(port) root = tk.Tk() menubar = tk.Menu(root) filemenu = tk.Menu(menubar) filemenu.add_command(label="Open Browser", command= lambda: open_webbrowser(port)) menubar.add_cascade(label="File", menu=filemenu) root.config(menu=menubar)