env=config() c_prod=[] c_harvest=[] url_reg = "https://registry.cdlib.org" url_reg_api = '{}{}'.format(url_reg, "/api/v1/collection/") url='{}{}'.format(url_reg_api, "?format=json&limit=1000") resp=requests.get(url) api=resp.json() nextpage=api['meta']['next'] print "NEXTPAGE:{}".format(nextpage) while nextpage: for o in api['objects']: if o['ready_for_publication']: c_prod.append(o) url_api_collection = '{}{}/'.format(url_reg_api, o['id']) print url_api_collection queue_harvest('*****@*****.**', url_api_collection, redis_host=env['redis_host'], redis_port=env['redis_port'], redis_pswd=env['redis_password'], rq_queue='normal-production') if o['url_harvest']: c_harvest.append(o) resp = requests.get(''.join(('https://registry.cdlib.org', nextpage))) api = resp.json() nextpage=api['meta']['next'] print "NEXTPAGE:{}".format(nextpage) print "READY FOR PUB:{}".format(len(c_prod)) print "READY FOR HARVEST:{}".format(len(c_harvest))
c_prod = [] c_harvest = [] url_reg = "https://registry.cdlib.org" url_reg_api = '{}{}'.format(url_reg, "/api/v1/collection/") url = '{}{}'.format(url_reg_api, "?format=json&limit=1000") resp = requests.get(url) api = resp.json() nextpage = api['meta']['next'] print "NEXTPAGE:{}".format(nextpage) while nextpage: for o in api['objects']: if o['ready_for_publication']: c_prod.append(o) url_api_collection = '{}{}/'.format(url_reg_api, o['id']) print url_api_collection queue_harvest('*****@*****.**', url_api_collection, redis_host=env['redis_host'], redis_port=env['redis_port'], redis_pswd=env['redis_password'], rq_queue='normal-production') if o['url_harvest']: c_harvest.append(o) resp = requests.get(''.join(('https://registry.cdlib.org', nextpage))) api = resp.json() nextpage = api['meta']['next'] print "NEXTPAGE:{}".format(nextpage) print "READY FOR PUB:{}".format(len(c_prod)) print "READY FOR HARVEST:{}".format(len(c_harvest))
from harvester.collection_registry_client import ResourceIterator from harvester.collection_registry_client import url_base, api_path from harvester.config import config from harvester.scripts.queue_harvest import main as queue_harvest for c in ResourceIterator(url_base, api_path + 'collection', 'collection'): if c.harvest_type != 'X': print c.name, c.slug, c.harvest_type, c.url_harvest env = config() queue_harvest('*****@*****.**', url_base + c.resource_uri, redis_host=env['redis_host'], redis_port=env['redis_port'], redis_pswd=env['redis_password'], id_ec2_ingest=env['id_ec2_ingest'], id_ec2_solr=env['id_ec2_solr_build'], job_timeout=6000)
from harvester.collection_registry_client import ResourceIterator from harvester.collection_registry_client import url_base, api_path from harvester.config import config from harvester.scripts.queue_harvest import main as queue_harvest for c in ResourceIterator(url_base, api_path + "collection", "collection"): if c.harvest_type != "X": print c.name, c.slug, c.harvest_type, c.url_harvest env = config() queue_harvest( "*****@*****.**", url_base + c.resource_uri, redis_host=env["redis_host"], redis_port=env["redis_port"], redis_pswd=env["redis_password"], id_ec2_ingest=env["id_ec2_ingest"], id_ec2_solr=env["id_ec2_solr_build"], job_timeout=6000, )