Example #1
0
def main():
    found = []
    journals = []
    with open(sys.argv[1]) as infile:
        for num, row in enumerate(csv.DictReader(infile)):
            print >> sys.stderr, "Processing", row['ISSN']
            jid = row.get('ID', num)
            journals.append((jid, row['ISSN']))

    lookup_groups = client.grouper(journals, client.BATCH_SIZE)
    for idx, batch in enumerate(lookup_groups):
        xml = prep_request(batch)
        print >> sys.stderr, "Processing batch", idx
        # Post the batch
        rsp = client.get(xml)
        found.append(rsp)

    with open(sys.argv[2], 'wb') as outfile:
        writer = csv.writer(outfile)
        writer.writerow(('number', 'ISSN', 'JCR'))
        for grp in found:
            for item in grp:
                writer.writerow([
                    item, grp[item].get('issn', 'na'),
                    grp[item].get('impactGraphURL', 'na')
                ])
Example #2
0
def main():
    try:
        infile = sys.argv[1]
        outfile = sys.argv[2]
    except IndexError:
        raise Exception("An input and outpfile file is required.")
    found = []
    to_check = []
    with open(infile) as inf:
        for row in csv.DictReader(inf):
            d = {}
            for k, v in row.items():
                d[k.lower()] = v.strip()
            to_check.append(d)

    lookup_groups = client.grouper(to_check, client.BATCH_SIZE)
    for idx, batch in enumerate(lookup_groups):
        xml = prep_request(batch)
        print>> sys.stderr, "Processing batch", idx
        # Post the batch
        rsp = client.get(xml)
        found.append(rsp)

    # Write the results to a csv file.
    with open(outfile, 'wb') as of:
        writer = csv.writer(of)
        writer.writerow(('id', 'ut', 'doi', 'pmid', 'times cited', 'source'))
        for grp in found:
            for k, item in grp.items():
                ut = item.get('ut')
                if ut is not None:
                    ut = "WOS:" + ut
                writer.writerow([k, ut, item.get('doi', ""), item.get('pmid', ""), item.get('timesCited', '0'),
                                 item.get('sourceURL', 'N/A')])
Example #3
0
def main():
    try:
        infile = sys.argv[1]
        outfile = sys.argv[2]
    except IndexError:
        raise Exception("An input and outfile file are required.")
    found = []
    to_check = []
    with open(infile) as inf:
        for row in csv.DictReader(inf):
            d = {}
            for k, v in row.items():
                d[k.lower()] = v.strip()
            to_check.append(d)

    lookup_groups = client.grouper(to_check, client.BATCH_SIZE)
    start_time = datetime.now().timestamp()
    throttle_group = 1
    for idx, batch in enumerate(lookup_groups, 1):
        xml = prep_request(batch)

        # Respect throttling of records per minute
        time_elapsed = datetime.now().timestamp() - start_time
        if (client.BATCH_SIZE*idx) > (client.THROTTLE_CAP*throttle_group) \
           and time_elapsed < (60*throttle_group):
            sleep_length = 60 * throttle_group - time_elapsed + 1
            print("Rate throttling in effect, waiting {} seconds...".format(
                round(sleep_length)))
            sleep(sleep_length)
            print("Restarting requests...")
            throttle_group += 1

        print("Processing batch {}".format(idx))
        # Post the batch
        rsp = client.get(xml)
        found.append(rsp)

    # Write the results to a csv file.
    with open(outfile, 'w') as of:
        writer = csv.writer(of)
        writer.writerow(('id', 'ut', 'doi', 'pmid', 'times cited', 'source'))
        for grp in found:
            for k, item in grp.items():
                ut = item.get('ut')
                if ut is not None:
                    ut = "WOS:" + ut
                writer.writerow([
                    k, ut,
                    item.get('doi', ""),
                    item.get('pmid', ""),
                    item.get('timesCited', '0'),
                    item.get('sourceURL', 'N/A')
                ])
Example #4
0
def main():
    found = []
    journals = []
    with open(sys.argv[1]) as infile:
        for num, row in enumerate(csv.DictReader(infile)):
            print("Processing {}".format(row['ISSN']))
            jid = row.get('ID', num)
            journals.append((jid, row['ISSN']))

    lookup_groups = client.grouper(journals, client.BATCH_SIZE)
    start_time = datetime.now().timestamp()
    throttle_group = 1
    for idx, batch in enumerate(lookup_groups, 1):
        xml = prep_request(batch)

        # Respect throttling of records per minute
        time_elapsed = datetime.now().timestamp() - start_time
        if (client.BATCH_SIZE*idx) > (client.THROTTLE_CAP*throttle_group) \
           and time_elapsed < (60*throttle_group):
            sleep_length = 60 * throttle_group - time_elapsed + 1
            print("Rate throttling in effect, waiting {} seconds...".format(
                round(sleep_length)))
            sleep(sleep_length)
            print("Restarting requests...")
            throttle_group += 1

        print("Processing batch {}".format(idx))
        # Post the batch
        rsp = client.get(xml)
        found.append(rsp)

    with open(sys.argv[2], 'w') as outfile:
        writer = csv.writer(outfile)
        writer.writerow(('number', 'ISSN', 'JCR'))
        for grp in found:
            for item in grp:
                writer.writerow([
                    item, grp[item].get('issn', 'na'),
                    grp[item].get('impactGraphURL', 'na')
                ])