Example #1
0
 def test_drop_all_tables_when_table_name_is_str(self):
     connection = Connection(host='hbase-docker', port=9090)
     for table in connection.tables():
         connection.delete_table(table, True)
     hbase_queue_table = 'queue'
     hbase_metadata_table = 'metadata'
     hbase_states_table = 'states'
     connection.create_table(hbase_queue_table, {'f': {'max_versions': 1}})
     connection.create_table(hbase_metadata_table,
                             {'f': {
                                 'max_versions': 1
                             }})
     connection.create_table(hbase_states_table, {'f': {'max_versions': 1}})
     tables = connection.tables()
     assert set(tables) == set([b'metadata', b'queue',
                                b'states'])  # Failure of test itself
     try:
         HBaseQueue(connection=connection,
                    partitions=1,
                    table_name=hbase_queue_table,
                    use_snappy=False,
                    drop=True)
         HBaseMetadata(connection=connection,
                       table_name=hbase_metadata_table,
                       drop_all_tables=True,
                       use_snappy=False,
                       batch_size=300000,
                       store_content=True)
         HBaseState(connection,
                    hbase_states_table,
                    cache_size_limit=100,
                    write_log_size=10,
                    drop_all_tables=True)
     except AlreadyExists:
         assert False, "failed to drop hbase tables"
Example #2
0
 def test_metadata(self):
     connection = Connection(host='hbase-docker', port=9090)
     metadata = HBaseMetadata(connection, b'metadata', True, False, 300000,
                              True)
     metadata.add_seeds([r1, r2, r3])
     resp = Response('https://www.example.com', request=r1)
     metadata.page_crawled(resp)
     metadata.links_extracted(resp.request, [r2, r3])
     metadata.request_error(r4, 'error')
     metadata.frontier_stop()
     table = connection.table('metadata')
     assert set([to_native_str(data[b'm:url'], 'utf-8') for _, data in table.scan()]) == \
         set([r1.url, r2.url, r3.url])
     self.delete_rows(table, [b'10', b'11', b'12'])
Example #3
0
 def test_metadata(self):
     connection = Connection(host='hbase-docker', port=9090)
     metadata = HBaseMetadata(connection, b'metadata', True, False, 300000, True)
     metadata.add_seeds([r1, r2, r3])
     resp = Response('https://www.example.com', request=r1)
     metadata.page_crawled(resp)
     metadata.links_extracted(resp.request, [r2, r3])
     metadata.request_error(r4, 'error')
     metadata.frontier_stop()
     table = connection.table('metadata')
     assert set([to_native_str(data[b'm:url'], 'utf-8') for _, data in table.scan()]) == \
         set([r1.url, r2.url, r3.url])
     self.delete_rows(table, [b'10', b'11', b'12'])