env=config()
c_prod=[]
c_harvest=[]
url_reg = "https://registry.cdlib.org"
url_reg_api = '{}{}'.format(url_reg, "/api/v1/collection/")
url='{}{}'.format(url_reg_api, "?format=json&limit=1000")
resp=requests.get(url)
api=resp.json()
nextpage=api['meta']['next']
print "NEXTPAGE:{}".format(nextpage)
while nextpage:
    for o in api['objects']:
        if o['ready_for_publication']:
            c_prod.append(o)
            url_api_collection = '{}{}/'.format(url_reg_api, o['id'])
            print url_api_collection
            queue_harvest('*****@*****.**', url_api_collection,
                    redis_host=env['redis_host'],
                    redis_port=env['redis_port'],
                    redis_pswd=env['redis_password'],
                    rq_queue='normal-production')
        if o['url_harvest']:
            c_harvest.append(o)
    resp = requests.get(''.join(('https://registry.cdlib.org', nextpage)))
    api = resp.json()
    nextpage=api['meta']['next']
    print "NEXTPAGE:{}".format(nextpage)

print "READY FOR PUB:{}".format(len(c_prod))
print "READY FOR HARVEST:{}".format(len(c_harvest))
Ejemplo n.º 2
0
c_prod = []
c_harvest = []
url_reg = "https://registry.cdlib.org"
url_reg_api = '{}{}'.format(url_reg, "/api/v1/collection/")
url = '{}{}'.format(url_reg_api, "?format=json&limit=1000")
resp = requests.get(url)
api = resp.json()
nextpage = api['meta']['next']
print "NEXTPAGE:{}".format(nextpage)
while nextpage:
    for o in api['objects']:
        if o['ready_for_publication']:
            c_prod.append(o)
            url_api_collection = '{}{}/'.format(url_reg_api, o['id'])
            print url_api_collection
            queue_harvest('*****@*****.**',
                          url_api_collection,
                          redis_host=env['redis_host'],
                          redis_port=env['redis_port'],
                          redis_pswd=env['redis_password'],
                          rq_queue='normal-production')
        if o['url_harvest']:
            c_harvest.append(o)
    resp = requests.get(''.join(('https://registry.cdlib.org', nextpage)))
    api = resp.json()
    nextpage = api['meta']['next']
    print "NEXTPAGE:{}".format(nextpage)

print "READY FOR PUB:{}".format(len(c_prod))
print "READY FOR HARVEST:{}".format(len(c_harvest))
Ejemplo n.º 3
0
from harvester.collection_registry_client import ResourceIterator
from harvester.collection_registry_client import url_base, api_path
from harvester.config import config
from harvester.scripts.queue_harvest import main as queue_harvest

for c in ResourceIterator(url_base, api_path + 'collection', 'collection'):
    if c.harvest_type != 'X':
        print c.name, c.slug, c.harvest_type, c.url_harvest
        env = config()
        queue_harvest('*****@*****.**',
                      url_base + c.resource_uri,
                      redis_host=env['redis_host'],
                      redis_port=env['redis_port'],
                      redis_pswd=env['redis_password'],
                      id_ec2_ingest=env['id_ec2_ingest'],
                      id_ec2_solr=env['id_ec2_solr_build'],
                      job_timeout=6000)
from harvester.collection_registry_client import ResourceIterator
from harvester.collection_registry_client import url_base, api_path
from harvester.config import config
from harvester.scripts.queue_harvest import main as queue_harvest

for c in ResourceIterator(url_base, api_path + "collection", "collection"):
    if c.harvest_type != "X":
        print c.name, c.slug, c.harvest_type, c.url_harvest
        env = config()
        queue_harvest(
            "*****@*****.**",
            url_base + c.resource_uri,
            redis_host=env["redis_host"],
            redis_port=env["redis_port"],
            redis_pswd=env["redis_password"],
            id_ec2_ingest=env["id_ec2_ingest"],
            id_ec2_solr=env["id_ec2_solr_build"],
            job_timeout=6000,
        )