예제 #1
0
def states(request):
    if request.param == "memory":
        ms = MemoryStates(100)
        yield ms
        return

    if request.param == "sqlalchemy":
        engine = create_engine('sqlite:///:memory:', echo=False)
        session_cls = sessionmaker()
        session_cls.configure(bind=engine)
        StateModel.__table__.create(bind=engine)
        sqla_states = SQLAlchemyStates(session_cls, StateModel, 100)
        yield sqla_states
        sqla_states.frontier_stop()
        engine.dispose()
        return

    if request.param == "hbase":
        conn = get_hbase_connection()
        states = HBaseState(conn,
                            b'states',
                            cache_size_limit=300000,
                            write_log_size=5000,
                            drop_all_tables=True)
        yield states
        states.frontier_stop()
        return
    raise KeyError("Unknown backend param")
예제 #2
0
def states(request):
    if request.param == "memory":
        ms = MemoryStates(100)
        yield ms
        return

    if request.param == "sqlalchemy":
        engine = create_engine('sqlite:///:memory:', echo=False)
        session_cls = sessionmaker()
        session_cls.configure(bind=engine)
        StateModel.__table__.create(bind=engine)
        sqla_states = SQLAlchemyStates(session_cls, StateModel, 100)
        yield sqla_states
        sqla_states.frontier_stop()
        engine.dispose()
        return

    if request.param == "hbase":
        conn = get_hbase_connection()
        states = HBaseState(conn, b'states', cache_size_limit=300000,
                            write_log_size=5000, drop_all_tables=True)
        yield states
        states.frontier_stop()
        return
    raise KeyError("Unknown backend param")
예제 #3
0
 def test_drop_all_tables_when_table_name_is_str(self):
     connection = Connection(host='hbase-docker', port=9090)
     for table in connection.tables():
         connection.delete_table(table, True)
     hbase_queue_table = 'queue'
     hbase_metadata_table = 'metadata'
     hbase_states_table = 'states'
     connection.create_table(hbase_queue_table, {'f': {'max_versions': 1}})
     connection.create_table(hbase_metadata_table,
                             {'f': {
                                 'max_versions': 1
                             }})
     connection.create_table(hbase_states_table, {'f': {'max_versions': 1}})
     tables = connection.tables()
     assert set(tables) == set([b'metadata', b'queue',
                                b'states'])  # Failure of test itself
     try:
         HBaseQueue(connection=connection,
                    partitions=1,
                    table_name=hbase_queue_table,
                    use_snappy=False,
                    drop=True)
         HBaseMetadata(connection=connection,
                       table_name=hbase_metadata_table,
                       drop_all_tables=True,
                       use_snappy=False,
                       batch_size=300000,
                       store_content=True)
         HBaseState(connection,
                    hbase_states_table,
                    cache_size_limit=100,
                    write_log_size=10,
                    drop_all_tables=True)
     except AlreadyExists:
         assert False, "failed to drop hbase tables"
예제 #4
0
 def test_state(self):
     connection = Connection(host='hbase-docker', port=9090)
     state = HBaseState(connection, b'metadata', 300000)
     state.set_states([r1, r2, r3])
     assert [r.meta[b'state'] for r in [r1, r2, r3]] == [States.NOT_CRAWLED]*3
     state.update_cache([r1, r2, r3])
     assert state._state_cache == {b'10': States.NOT_CRAWLED,
                                   b'11': States.NOT_CRAWLED,
                                   b'12': States.NOT_CRAWLED}
     r1.meta[b'state'] = States.CRAWLED
     r2.meta[b'state'] = States.CRAWLED
     r3.meta[b'state'] = States.CRAWLED
     state.update_cache([r1, r2, r3])
     state.flush(True)
     assert state._state_cache == {}
     state.fetch([b'10', b'11', b'12'])
     assert state._state_cache == {b'10': States.CRAWLED,
                                   b'11': States.CRAWLED,
                                   b'12': States.CRAWLED}
     r4.meta[b'state'] = States.ERROR
     state.set_states([r1, r2, r4])
     assert r4.meta[b'state'] == States.CRAWLED
     state.flush(True)
     assert state._state_cache == {}
예제 #5
0
 def test_state(self):
     connection = Connection(host='hbase-docker', port=9090)
     state = HBaseState(connection, b'metadata', 300000)
     state.set_states([r1, r2, r3])
     assert [r.meta[b'state']
             for r in [r1, r2, r3]] == [States.NOT_CRAWLED] * 3
     state.update_cache([r1, r2, r3])
     assert state._state_cache == {
         b'10': States.NOT_CRAWLED,
         b'11': States.NOT_CRAWLED,
         b'12': States.NOT_CRAWLED
     }
     r1.meta[b'state'] = States.CRAWLED
     r2.meta[b'state'] = States.CRAWLED
     r3.meta[b'state'] = States.CRAWLED
     state.update_cache([r1, r2, r3])
     state.flush(True)
     assert state._state_cache == {}
     state.fetch([b'10', b'11', b'12'])
     assert state._state_cache == {
         b'10': States.CRAWLED,
         b'11': States.CRAWLED,
         b'12': States.CRAWLED
     }
     r4.meta[b'state'] = States.ERROR
     state.set_states([r1, r2, r4])
     assert r4.meta[b'state'] == States.CRAWLED
     state.flush(True)
     assert state._state_cache == {}
예제 #6
0
 def test_state(self):
     connection = Connection(host='hbase-docker', port=9090)
     state = HBaseState(connection,
                        b'states',
                        cache_size_limit=300000,
                        write_log_size=5000,
                        drop_all_tables=True)
     state.set_states([r1, r2, r3])
     assert [r.meta[b'state']
             for r in [r1, r2, r3]] == [States.NOT_CRAWLED] * 3
     state.update_cache([r1, r2, r3])
     assert dict(state._state_cache) == {
         b'10': States.NOT_CRAWLED,
         b'11': States.NOT_CRAWLED,
         b'12': States.NOT_CRAWLED
     }
     assert state._state_batch._mutation_count == 3
     r1.meta[b'state'] = States.CRAWLED
     r2.meta[b'state'] = States.CRAWLED
     r3.meta[b'state'] = States.CRAWLED
     state.update_cache([r1, r2, r3])
     assert state._state_batch._mutation_count == 6
     state.flush()
     assert state._state_batch._mutation_count == 0
     state.fetch([b'10', b'11', b'12'])
     assert dict(state._state_cache) == {
         b'10': States.CRAWLED,
         b'11': States.CRAWLED,
         b'12': States.CRAWLED
     }
     r4.meta[b'state'] = States.ERROR
     state.set_states([r1, r2, r4])
     assert r4.meta[b'state'] == States.CRAWLED
     state.flush()
     assert state._state_batch._mutation_count == 0