def test_drop_all_tables_when_table_name_is_str(self): connection = Connection(host='hbase-docker', port=9090) for table in connection.tables(): connection.delete_table(table, True) hbase_queue_table = 'queue' hbase_metadata_table = 'metadata' hbase_states_table = 'states' connection.create_table(hbase_queue_table, {'f': {'max_versions': 1}}) connection.create_table(hbase_metadata_table, {'f': { 'max_versions': 1 }}) connection.create_table(hbase_states_table, {'f': {'max_versions': 1}}) tables = connection.tables() assert set(tables) == set([b'metadata', b'queue', b'states']) # Failure of test itself try: HBaseQueue(connection=connection, partitions=1, table_name=hbase_queue_table, use_snappy=False, drop=True) HBaseMetadata(connection=connection, table_name=hbase_metadata_table, drop_all_tables=True, use_snappy=False, batch_size=300000, store_content=True) HBaseState(connection, hbase_states_table, cache_size_limit=100, write_log_size=10, drop_all_tables=True) except AlreadyExists: assert False, "failed to drop hbase tables"
def test_metadata(self): connection = Connection(host='hbase-docker', port=9090) metadata = HBaseMetadata(connection, b'metadata', True, False, 300000, True) metadata.add_seeds([r1, r2, r3]) resp = Response('https://www.example.com', request=r1) metadata.page_crawled(resp) metadata.links_extracted(resp.request, [r2, r3]) metadata.request_error(r4, 'error') metadata.frontier_stop() table = connection.table('metadata') assert set([to_native_str(data[b'm:url'], 'utf-8') for _, data in table.scan()]) == \ set([r1.url, r2.url, r3.url]) self.delete_rows(table, [b'10', b'11', b'12'])