Ejemplo n.º 1
0
def scan_parallel(slice_parameters):

    slice_id = slice_parameters[0]
    slice_count = slice_parameters[1]
    slice_size = slice_parameters[2]

    # Parallelism data
    print('')
    print('Begin SCANNING. PID: {0}'.format(os.getpid()))

    # Create a new Elasticsearch client because the ES client does not handle fork well.
    # https://elasticsearch-py.readthedocs.io/en/master/
    client = Elasticsearch(ES_SERVER_NAME)
    client.query = {"query": {"match_all": {}}}

    # The features of each document in the chunk
    features = []

    for document_id, text in client.get_document_text_slice(
            slice_count=slice_count, slice_size=slice_size, slice_id=slice_id):

        # TODO: Do Work
        pass