예제 #1
0
def get_content_chunk():
    """
    Get metadata for 5 chunks on each request
    :return: List of metadata, empty list if no content chunks
    """
    # Get the first number_of_chunks crawled chunk ids
    results = db_manager.get_first_n_crawled_chunks(number_of_chunks)

    print(results)
    temp_chunks = []
    if len(results) != 0:
        for chunk in results:
            temp_dict = {}
            temp_dict['chunk_id'] = chunk['chunk_id']
            temp_dict['host'] = chunk['c_host']
            temp_chunks.append(temp_dict)

            # Insert to index builder relation and mark chunk as 'building'
            db_manager.operate_on_index_builder_relation(
                'INSERT',
                chunk['chunk_id'],
                host=request.remote_addr,
                task='building')

        return jsonify(temp_chunks)
    else:
        return jsonify([])
예제 #2
0
def test_get_first_n_crawled_chunk_ids():
    # Setup test env
    db_manager = DatabaseManager()
    db_manager.operate_on_chunk_relation('INSERT', chunk_id='101c')
    db_manager.operate_on_host_relation('INSERT', host='101.101.101.101:101', type='Test Server')
    db_manager.operate_on_crawler_relation('INSERT', chunk_id='101c', host='101.101.101.101:101', task='crawled')

    # Test
    result = db_manager.get_first_n_crawled_chunks(1)
    if len(result) == 1:
        print('> PASSED | get_first_n_crawled_chunk_ids()')
    else:
        print('> FAILED | get_first_n_crawled_chunk_ids()')

    # Clean up test env
    db_manager.operate_on_chunk_relation('DELETE', chunk_id='101c')
    db_manager.operate_on_host_relation('DELETE', host='101.101.101.101:101')
    db_manager.operate_on_crawler_relation('DELETE', chunk_id='101c')