def main(): found = [] journals = [] with open(sys.argv[1]) as infile: for num, row in enumerate(csv.DictReader(infile)): print >> sys.stderr, "Processing", row['ISSN'] jid = row.get('ID', num) journals.append((jid, row['ISSN'])) lookup_groups = client.grouper(journals, client.BATCH_SIZE) for idx, batch in enumerate(lookup_groups): xml = prep_request(batch) print >> sys.stderr, "Processing batch", idx # Post the batch rsp = client.get(xml) found.append(rsp) with open(sys.argv[2], 'wb') as outfile: writer = csv.writer(outfile) writer.writerow(('number', 'ISSN', 'JCR')) for grp in found: for item in grp: writer.writerow([ item, grp[item].get('issn', 'na'), grp[item].get('impactGraphURL', 'na') ])
def main(): try: infile = sys.argv[1] outfile = sys.argv[2] except IndexError: raise Exception("An input and outpfile file is required.") found = [] to_check = [] with open(infile) as inf: for row in csv.DictReader(inf): d = {} for k, v in row.items(): d[k.lower()] = v.strip() to_check.append(d) lookup_groups = client.grouper(to_check, client.BATCH_SIZE) for idx, batch in enumerate(lookup_groups): xml = prep_request(batch) print>> sys.stderr, "Processing batch", idx # Post the batch rsp = client.get(xml) found.append(rsp) # Write the results to a csv file. with open(outfile, 'wb') as of: writer = csv.writer(of) writer.writerow(('id', 'ut', 'doi', 'pmid', 'times cited', 'source')) for grp in found: for k, item in grp.items(): ut = item.get('ut') if ut is not None: ut = "WOS:" + ut writer.writerow([k, ut, item.get('doi', ""), item.get('pmid', ""), item.get('timesCited', '0'), item.get('sourceURL', 'N/A')])
def main(): try: infile = sys.argv[1] outfile = sys.argv[2] except IndexError: raise Exception("An input and outfile file are required.") found = [] to_check = [] with open(infile) as inf: for row in csv.DictReader(inf): d = {} for k, v in row.items(): d[k.lower()] = v.strip() to_check.append(d) lookup_groups = client.grouper(to_check, client.BATCH_SIZE) start_time = datetime.now().timestamp() throttle_group = 1 for idx, batch in enumerate(lookup_groups, 1): xml = prep_request(batch) # Respect throttling of records per minute time_elapsed = datetime.now().timestamp() - start_time if (client.BATCH_SIZE*idx) > (client.THROTTLE_CAP*throttle_group) \ and time_elapsed < (60*throttle_group): sleep_length = 60 * throttle_group - time_elapsed + 1 print("Rate throttling in effect, waiting {} seconds...".format( round(sleep_length))) sleep(sleep_length) print("Restarting requests...") throttle_group += 1 print("Processing batch {}".format(idx)) # Post the batch rsp = client.get(xml) found.append(rsp) # Write the results to a csv file. with open(outfile, 'w') as of: writer = csv.writer(of) writer.writerow(('id', 'ut', 'doi', 'pmid', 'times cited', 'source')) for grp in found: for k, item in grp.items(): ut = item.get('ut') if ut is not None: ut = "WOS:" + ut writer.writerow([ k, ut, item.get('doi', ""), item.get('pmid', ""), item.get('timesCited', '0'), item.get('sourceURL', 'N/A') ])
def main(): found = [] journals = [] with open(sys.argv[1]) as infile: for num, row in enumerate(csv.DictReader(infile)): print("Processing {}".format(row['ISSN'])) jid = row.get('ID', num) journals.append((jid, row['ISSN'])) lookup_groups = client.grouper(journals, client.BATCH_SIZE) start_time = datetime.now().timestamp() throttle_group = 1 for idx, batch in enumerate(lookup_groups, 1): xml = prep_request(batch) # Respect throttling of records per minute time_elapsed = datetime.now().timestamp() - start_time if (client.BATCH_SIZE*idx) > (client.THROTTLE_CAP*throttle_group) \ and time_elapsed < (60*throttle_group): sleep_length = 60 * throttle_group - time_elapsed + 1 print("Rate throttling in effect, waiting {} seconds...".format( round(sleep_length))) sleep(sleep_length) print("Restarting requests...") throttle_group += 1 print("Processing batch {}".format(idx)) # Post the batch rsp = client.get(xml) found.append(rsp) with open(sys.argv[2], 'w') as outfile: writer = csv.writer(outfile) writer.writerow(('number', 'ISSN', 'JCR')) for grp in found: for item in grp: writer.writerow([ item, grp[item].get('issn', 'na'), grp[item].get('impactGraphURL', 'na') ])