def simple_query_tool(): body = request.json dirty_dois_list = {d for d in body["dois"] if d} clean_dois = [ c for c in [clean_doi(d, return_none_if_error=True) for d in dirty_dois_list] if c ] q = db.session.query(pub.Pub.response_jsonb).filter( pub.Pub.id.in_(clean_dois)) rows = q.all() pub_responses = [row[0] for row in rows if row[0]] pub_dois = [r['doi'] for r in pub_responses] missing_dois = [ d for d in dirty_dois_list if clean_doi(d, return_none_if_error=True) not in pub_dois ] placeholder_responses = [ pub.build_new_pub(d, None).to_dict_v2() for d in missing_dois ] responses = pub_responses + placeholder_responses # save jsonl with open("output.jsonl", 'wb') as f: for response_jsonb in responses: f.write(json.dumps(response_jsonb, sort_keys=True)) f.write("\n") # save csv csv_dicts = [ pub.csv_dict_from_response_dict(my_dict) for my_dict in responses ] csv_dicts = [my_dict for my_dict in csv_dicts if my_dict] fieldnames = sorted(csv_dicts[0].keys()) fieldnames = ["doi"] + [name for name in fieldnames if name != "doi"] with open("output.csv", 'wb') as f: writer = unicodecsv.DictWriter(f, fieldnames=fieldnames, dialect='excel') writer.writeheader() for my_dict in csv_dicts: writer.writerow(my_dict) # prep email email_address = body["email"] email = create_email(email_address, "Your Unpaywall results", "simple_query_tool", {"profile": {}}, ["output.csv", "output.jsonl"]) send(email, for_real=True) return jsonify({"got it": email_address, "dois": pub_dois + missing_dois})
def simple_query_tool(): body = request.json return_type = body.get("return_type", "csv") dirty_dois_list = body["dois"] clean_dois = [ clean_doi(dirty_doi, return_none_if_error=True) for dirty_doi in dirty_dois_list ] clean_dois = [doi for doi in clean_dois if doi] q = db.session.query(pub.Pub.response_jsonb).filter( pub.Pub.id.in_(clean_dois)) rows = q.all() pub_responses = [row[0] for row in rows] # save jsonl with open("output.jsonl", 'wb') as f: for response_jsonb in pub_responses: f.write(json.dumps(response_jsonb, sort_keys=True)) f.write("\n") # save csv csv_dicts = [ pub.csv_dict_from_response_dict(my_dict) for my_dict in pub_responses ] csv_dicts = [my_dict for my_dict in csv_dicts if my_dict] fieldnames = sorted(csv_dicts[0].keys()) fieldnames = ["doi"] + [name for name in fieldnames if name != "doi"] with open("output.csv", 'wb') as f: writer = unicodecsv.DictWriter(f, fieldnames=fieldnames, dialect='excel') writer.writeheader() for my_dict in csv_dicts: writer.writerow(my_dict) # prep email email_address = body["email"] email = create_email(email_address, "Your Unpaywall results", "simple_query_tool", {"profile": {}}, ["output.csv", "output.jsonl"]) send(email, for_real=True) # @todo make sure in the return dict that there is a row for every doi # even those not in our db return jsonify({"got it": email_address, "dois": clean_dois})
def simple_query_tool(): body = request.json dirty_dois_list = {d for d in body["dois"] if d} clean_dois = [c for c in [clean_doi(d, return_none_if_error=True) for d in dirty_dois_list] if c] q = db.session.query(pub.Pub.response_jsonb).filter(pub.Pub.id.in_(clean_dois)) rows = q.all() pub_responses = [row[0] for row in rows] pub_dois = [r['doi'] for r in pub_responses] missing_dois = [d for d in dirty_dois_list if clean_doi(d, return_none_if_error=True) not in pub_dois] placeholder_responses = [pub.build_new_pub(d, None).to_dict_v2() for d in missing_dois] responses = pub_responses + placeholder_responses # save jsonl with open("output.jsonl", 'wb') as f: for response_jsonb in responses: f.write(json.dumps(response_jsonb, sort_keys=True)) f.write("\n") # save csv csv_dicts = [pub.csv_dict_from_response_dict(my_dict) for my_dict in responses] csv_dicts = [my_dict for my_dict in csv_dicts if my_dict] fieldnames = sorted(csv_dicts[0].keys()) fieldnames = ["doi"] + [name for name in fieldnames if name != "doi"] with open("output.csv", 'wb') as f: writer = unicodecsv.DictWriter(f, fieldnames=fieldnames, dialect='excel') writer.writeheader() for my_dict in csv_dicts: writer.writerow(my_dict) # prep email email_address = body["email"] email = create_email(email_address, "Your Unpaywall results", "simple_query_tool", {"profile": {}}, ["output.csv", "output.jsonl"]) send(email, for_real=True) return jsonify({"got it": email_address, "dois": pub_dois + missing_dois})
def simple_query_tool(): body = request.json dirty_dois_list = {d for d in body["dois"] if d} clean_dois = [c for c in [clean_doi(d, return_none_if_error=True) for d in dirty_dois_list] if c] q = db.session.query(pub.Pub.response_jsonb).filter(pub.Pub.id.in_(clean_dois)) rows = q.all() pub_responses = [row[0] for row in rows if row[0]] pub_dois = [r['doi'] for r in pub_responses] missing_dois = [d for d in dirty_dois_list if clean_doi(d, return_none_if_error=True) not in pub_dois] placeholder_responses = [pub.build_new_pub(d, None).to_dict_v2() for d in missing_dois] responses = pub_responses + placeholder_responses formats = body.get("formats", []) or ["jsonl", "csv"] files = [] if "jsonl" in formats: # save jsonl with open("output.jsonl", 'wb') as f: for response_jsonb in responses: f.write(json.dumps(response_jsonb, sort_keys=True)) f.write("\n") files.append("output.jsonl") csv_dicts = [pub.csv_dict_from_response_dict(my_dict) for my_dict in responses] csv_dicts = [my_dict for my_dict in csv_dicts if my_dict] fieldnames = sorted(csv_dicts[0].keys()) fieldnames = ["doi"] + [name for name in fieldnames if name != "doi"] if "csv" in formats: # save csv with open("output.csv", 'wb') as f: writer = unicodecsv.DictWriter(f, fieldnames=fieldnames, dialect='excel') writer.writeheader() for my_dict in csv_dicts: writer.writerow(my_dict) files.append("output.csv") if "xlsx" in formats: book = Workbook() sheet = book.worksheets[0] sheet.title = "results" for col_idx, field_name in enumerate(fieldnames): sheet.cell(column=col_idx+1, row=1, value=field_name) for row_idx, row in enumerate(csv_dicts): for col_idx, field_name in enumerate(fieldnames): sheet.cell(column=col_idx+1, row=row_idx+2, value=row[field_name]) book.save(filename="output.xlsx") files.append("output.xlsx") # prep email email_address = body["email"] email = create_email(email_address, "Your Unpaywall results", "simple_query_tool", {"profile": {}}, files) send(email, for_real=True) return jsonify({"got it": email_address, "dois": pub_dois + missing_dois})