def states(request): if request.param == "memory": ms = MemoryStates(100) yield ms return if request.param == "sqlalchemy": engine = create_engine('sqlite:///:memory:', echo=False) session_cls = sessionmaker() session_cls.configure(bind=engine) StateModel.__table__.create(bind=engine) sqla_states = SQLAlchemyStates(session_cls, StateModel, 100) yield sqla_states sqla_states.frontier_stop() engine.dispose() return if request.param == "hbase": conn = get_hbase_connection() states = HBaseState(conn, b'states', cache_size_limit=300000, write_log_size=5000, drop_all_tables=True) yield states states.frontier_stop() return raise KeyError("Unknown backend param")
def test_drop_all_tables_when_table_name_is_str(self): connection = Connection(host='hbase-docker', port=9090) for table in connection.tables(): connection.delete_table(table, True) hbase_queue_table = 'queue' hbase_metadata_table = 'metadata' hbase_states_table = 'states' connection.create_table(hbase_queue_table, {'f': {'max_versions': 1}}) connection.create_table(hbase_metadata_table, {'f': { 'max_versions': 1 }}) connection.create_table(hbase_states_table, {'f': {'max_versions': 1}}) tables = connection.tables() assert set(tables) == set([b'metadata', b'queue', b'states']) # Failure of test itself try: HBaseQueue(connection=connection, partitions=1, table_name=hbase_queue_table, use_snappy=False, drop=True) HBaseMetadata(connection=connection, table_name=hbase_metadata_table, drop_all_tables=True, use_snappy=False, batch_size=300000, store_content=True) HBaseState(connection, hbase_states_table, cache_size_limit=100, write_log_size=10, drop_all_tables=True) except AlreadyExists: assert False, "failed to drop hbase tables"
def test_state(self): connection = Connection(host='hbase-docker', port=9090) state = HBaseState(connection, b'metadata', 300000) state.set_states([r1, r2, r3]) assert [r.meta[b'state'] for r in [r1, r2, r3]] == [States.NOT_CRAWLED]*3 state.update_cache([r1, r2, r3]) assert state._state_cache == {b'10': States.NOT_CRAWLED, b'11': States.NOT_CRAWLED, b'12': States.NOT_CRAWLED} r1.meta[b'state'] = States.CRAWLED r2.meta[b'state'] = States.CRAWLED r3.meta[b'state'] = States.CRAWLED state.update_cache([r1, r2, r3]) state.flush(True) assert state._state_cache == {} state.fetch([b'10', b'11', b'12']) assert state._state_cache == {b'10': States.CRAWLED, b'11': States.CRAWLED, b'12': States.CRAWLED} r4.meta[b'state'] = States.ERROR state.set_states([r1, r2, r4]) assert r4.meta[b'state'] == States.CRAWLED state.flush(True) assert state._state_cache == {}
def test_state(self): connection = Connection(host='hbase-docker', port=9090) state = HBaseState(connection, b'metadata', 300000) state.set_states([r1, r2, r3]) assert [r.meta[b'state'] for r in [r1, r2, r3]] == [States.NOT_CRAWLED] * 3 state.update_cache([r1, r2, r3]) assert state._state_cache == { b'10': States.NOT_CRAWLED, b'11': States.NOT_CRAWLED, b'12': States.NOT_CRAWLED } r1.meta[b'state'] = States.CRAWLED r2.meta[b'state'] = States.CRAWLED r3.meta[b'state'] = States.CRAWLED state.update_cache([r1, r2, r3]) state.flush(True) assert state._state_cache == {} state.fetch([b'10', b'11', b'12']) assert state._state_cache == { b'10': States.CRAWLED, b'11': States.CRAWLED, b'12': States.CRAWLED } r4.meta[b'state'] = States.ERROR state.set_states([r1, r2, r4]) assert r4.meta[b'state'] == States.CRAWLED state.flush(True) assert state._state_cache == {}
def test_state(self): connection = Connection(host='hbase-docker', port=9090) state = HBaseState(connection, b'states', cache_size_limit=300000, write_log_size=5000, drop_all_tables=True) state.set_states([r1, r2, r3]) assert [r.meta[b'state'] for r in [r1, r2, r3]] == [States.NOT_CRAWLED] * 3 state.update_cache([r1, r2, r3]) assert dict(state._state_cache) == { b'10': States.NOT_CRAWLED, b'11': States.NOT_CRAWLED, b'12': States.NOT_CRAWLED } assert state._state_batch._mutation_count == 3 r1.meta[b'state'] = States.CRAWLED r2.meta[b'state'] = States.CRAWLED r3.meta[b'state'] = States.CRAWLED state.update_cache([r1, r2, r3]) assert state._state_batch._mutation_count == 6 state.flush() assert state._state_batch._mutation_count == 0 state.fetch([b'10', b'11', b'12']) assert dict(state._state_cache) == { b'10': States.CRAWLED, b'11': States.CRAWLED, b'12': States.CRAWLED } r4.meta[b'state'] = States.ERROR state.set_states([r1, r2, r4]) assert r4.meta[b'state'] == States.CRAWLED state.flush() assert state._state_batch._mutation_count == 0