Exemple #1
0
def main(argv):
    try:
        opts, args = getopt.getopt(argv, "", ["help", "index=", "host=", "queue=", "limit=", "offset=", "workers=", "batchsize=", "engine="])

    except getopt.GetoptError:
        usage()
        sys.exit(2)

    for opt, arg in opts:
        if opt in ("-h", "--help"):
            usage()
            sys.exit()
        elif opt in ("-i", "--index"):
            params['index'] = arg
        elif opt in ("-h", "--host"):
            params['host'] = arg
        elif opt in ("-q", "--queue"):
            params['queue'] = arg
        elif opt in ("-l", "--limit"):
            params['limit'] = arg
        elif opt in ("-o", "--offset"):
            params['offset'] = arg
        elif opt in ("-w", "--workers"):
            params['workers'] = arg
        elif opt in ("-b", "--batchsize"):
            params['batchsize'] = arg
        elif opt in ("-e", "--engine"):
            params['engine'] = arg

    if params['engine'] == 'solr':
        from solr_consumer import SolrConsumer as Consumer
    else:
        from elastic_consumer import ElasticConsumer as Consumer

    if params['index'] == '' or params['host'] == '':
        usage()
        sys.exit(2)

    data_queue = queue.Queue(int(params['queue']))

    producer = CSVStreamProducer(sys.stdin, int(params['limit']), int(params['offset']), batch_size=int(params['batchsize']))
    consumers = []
    for num in range(0, int(params['workers'])):
        consumers.append(Consumer(params['index'], host=params['host']))

    producer.launch(data_queue).start()
    for consumer in consumers:
        consumer.launch(data_queue).start()

    no_one_is_working = 0

    while True:
        if not (producer.is_working() or workers_working(consumers)):
            no_one_is_working += 1
        else:
            no_one_is_working = 0

        if no_one_is_working > 10:
            sys.exit(0)
        time.sleep(1)
Exemple #2
0
import Queue as queue
from csv_producer import CSVStreamProducer
from elastic_consumer import ElasticConsumer
import threading, sys, time

data_queue = queue.Queue(1000)

producer = CSVStreamProducer(sys.stdin)

if len(sys.argv) < 3:
    consumer = ElasticConsumer('xxx2')
else:
    consumer = ElasticConsumer(sys.argv[1], host=sys.argv[2])

# consumer.create_index()

producer.launch(data_queue).start()
consumer.launch(data_queue).start()

no_one_is_working = 0
while True:
    if not (producer.is_working() or consumer.is_working()):
        no_one_is_working += 1
    else:
        no_one_is_working = 0

    if no_one_is_working > 10:
        sys.exit(0)
    time.sleep(1)