def test_live_queue(): live_queue = RedisQueue("test_queue", MockTask, namespace="pytest") assert live_queue.connected is False task = MockTask() task2 = MockTask() task.uri = 'thisIsUnique' task2.uri = 'thisIsUnique' assert live_queue.connect(host=live_host, port=live_port, password=live_pass) is True assert live_queue.connected is True live_queue.clear() assert live_queue.qsize == 0 live_queue.put(task) assert live_queue.qsize == 1 live_queue.put(task2) assert live_queue.qsize == 2 new_task = live_queue.get() assert isinstance(new_task, MockTask) assert new_task.uid == task.uid assert new_task.uri == 'thisIsUnique' live_queue.clear() task.unique = True task2.unique = True assert task.unique_hash() == task2.unique_hash() live_queue.put(task) with pytest.raises(TaskAlreadyInQueueException): live_queue.put(task2) assert live_queue.qsize == 1 live_queue.clear() # test getting and putting the same task into the queue assert live_queue.qsize == 0 live_queue.put(task) my_task = live_queue.get() live_queue.put(my_task) assert live_queue.qsize == 1
if ex[0]==17: # directory already exists pass else: raise def makeprocessqentry(config, jobid, path, imgname, timestamp, size): return { "action": "process_file", "jobid": jobid, "path": path, "name": imgname, "changed": timestamp, "filesize": size } if __name__=='__main__': config= json.load(open("config.json")) makedatadir(config) downloadq= RedisQueue(host=config["redis-host"], namespace=config["redis-namespace"], name=config["redis-download-queue"]) processq= RedisQueue(host=config["redis-host"], namespace=config["redis-namespace"], name=config["redis-process-queue"]) session= requests.Session() processq.clear() while True: row= json.loads(downloadq.get()) print "%s => " % row['name'].encode('utf-8'), r= session.get(row['url']) print(r.status_code) if r.status_code!=200: raise RuntimeError("requests.get(%s) returned %s" % (row['url'], r.status_code)) outputpath= os.path.join(os.path.expanduser(config["download-dir"]), row["name"]) with open(outputpath, "w") as f: f.write(r.content) processq.put(json.dumps(makeprocessqentry(config, row["jobid"], outputpath, row["name"], row["changed"], row["filesize"])))
count+= 1 if limit and count>=limit: return def makeorderqentry(jobid, resume): return { "jobid": jobid, "resume": resume } def makedownloadqentry(config, jobid, name,url,timestamp,size): return { "jobid": jobid, "name": name, "url": url, "changed": timestamp, "filesize": size } if __name__=='__main__': config= json.load(open("config.json")) joborderq= RedisQueue(host=config["redis-host"], namespace=config["redis-namespace"], name=config["redis-job-order-queue"]) downloadq= RedisQueue(host=config["redis-host"], namespace=config["redis-namespace"], name=config["redis-download-queue"]) joborderq.clear() downloadq.clear() # xxxx remove stale files? jobid= 1 for row in commonsfiles(sortkey='img_sha1', limit=50): # wait for queue to shrink # xxx todo: it would be nicer to have a blocking version of this, instead of polling every second while joborderq.qsize()>=config["redis-max-queued-jobs"]: time.sleep(1) joborderq.put(json.dumps(makeorderqentry(jobid, row['resume']))) downloadq.put(json.dumps(makedownloadqentry(config, jobid, row["img_name"], row["url"], row["img_timestamp"], row["img_size"]))) print("pushed job %d (%s)..." % (jobid, row['img_name'])) jobid+= 1