Beispiel #1
0
def simple_query_tool():
    body = request.json
    dirty_dois_list = {d for d in body["dois"] if d}

    clean_dois = [
        c for c in
        [clean_doi(d, return_none_if_error=True) for d in dirty_dois_list] if c
    ]

    q = db.session.query(pub.Pub.response_jsonb).filter(
        pub.Pub.id.in_(clean_dois))
    rows = q.all()

    pub_responses = [row[0] for row in rows if row[0]]

    pub_dois = [r['doi'] for r in pub_responses]
    missing_dois = [
        d for d in dirty_dois_list
        if clean_doi(d, return_none_if_error=True) not in pub_dois
    ]
    placeholder_responses = [
        pub.build_new_pub(d, None).to_dict_v2() for d in missing_dois
    ]

    responses = pub_responses + placeholder_responses

    # save jsonl
    with open("output.jsonl", 'wb') as f:
        for response_jsonb in responses:
            f.write(json.dumps(response_jsonb, sort_keys=True))
            f.write("\n")

    # save csv
    csv_dicts = [
        pub.csv_dict_from_response_dict(my_dict) for my_dict in responses
    ]
    csv_dicts = [my_dict for my_dict in csv_dicts if my_dict]
    fieldnames = sorted(csv_dicts[0].keys())
    fieldnames = ["doi"] + [name for name in fieldnames if name != "doi"]
    with open("output.csv", 'wb') as f:
        writer = unicodecsv.DictWriter(f,
                                       fieldnames=fieldnames,
                                       dialect='excel')
        writer.writeheader()
        for my_dict in csv_dicts:
            writer.writerow(my_dict)

    # prep email
    email_address = body["email"]
    email = create_email(email_address, "Your Unpaywall results",
                         "simple_query_tool", {"profile": {}},
                         ["output.csv", "output.jsonl"])
    send(email, for_real=True)

    return jsonify({"got it": email_address, "dois": pub_dois + missing_dois})
Beispiel #2
0
def simple_query_tool():
    body = request.json
    return_type = body.get("return_type", "csv")
    dirty_dois_list = body["dois"]

    clean_dois = [
        clean_doi(dirty_doi, return_none_if_error=True)
        for dirty_doi in dirty_dois_list
    ]
    clean_dois = [doi for doi in clean_dois if doi]

    q = db.session.query(pub.Pub.response_jsonb).filter(
        pub.Pub.id.in_(clean_dois))
    rows = q.all()
    pub_responses = [row[0] for row in rows]

    # save jsonl
    with open("output.jsonl", 'wb') as f:
        for response_jsonb in pub_responses:
            f.write(json.dumps(response_jsonb, sort_keys=True))
            f.write("\n")

    # save csv
    csv_dicts = [
        pub.csv_dict_from_response_dict(my_dict) for my_dict in pub_responses
    ]
    csv_dicts = [my_dict for my_dict in csv_dicts if my_dict]
    fieldnames = sorted(csv_dicts[0].keys())
    fieldnames = ["doi"] + [name for name in fieldnames if name != "doi"]
    with open("output.csv", 'wb') as f:
        writer = unicodecsv.DictWriter(f,
                                       fieldnames=fieldnames,
                                       dialect='excel')
        writer.writeheader()
        for my_dict in csv_dicts:
            writer.writerow(my_dict)

    # prep email
    email_address = body["email"]
    email = create_email(email_address, "Your Unpaywall results",
                         "simple_query_tool", {"profile": {}},
                         ["output.csv", "output.jsonl"])
    send(email, for_real=True)

    # @todo make sure in the return dict that there is a row for every doi
    # even those not in our db
    return jsonify({"got it": email_address, "dois": clean_dois})
Beispiel #3
0
def simple_query_tool():
    body = request.json
    dirty_dois_list = {d for d in body["dois"] if d}

    clean_dois = [c for c in [clean_doi(d, return_none_if_error=True) for d in dirty_dois_list] if c]

    q = db.session.query(pub.Pub.response_jsonb).filter(pub.Pub.id.in_(clean_dois))
    rows = q.all()

    pub_responses = [row[0] for row in rows]

    pub_dois = [r['doi'] for r in pub_responses]
    missing_dois = [d for d in dirty_dois_list if clean_doi(d, return_none_if_error=True) not in pub_dois]
    placeholder_responses = [pub.build_new_pub(d, None).to_dict_v2() for d in missing_dois]

    responses = pub_responses + placeholder_responses

    # save jsonl
    with open("output.jsonl", 'wb') as f:
        for response_jsonb in responses:
            f.write(json.dumps(response_jsonb, sort_keys=True))
            f.write("\n")


    # save csv
    csv_dicts = [pub.csv_dict_from_response_dict(my_dict) for my_dict in responses]
    csv_dicts = [my_dict for my_dict in csv_dicts if my_dict]
    fieldnames = sorted(csv_dicts[0].keys())
    fieldnames = ["doi"] + [name for name in fieldnames if name != "doi"]
    with open("output.csv", 'wb') as f:
        writer = unicodecsv.DictWriter(f, fieldnames=fieldnames, dialect='excel')
        writer.writeheader()
        for my_dict in csv_dicts:
            writer.writerow(my_dict)

    # prep email
    email_address = body["email"]
    email = create_email(email_address,
                 "Your Unpaywall results",
                 "simple_query_tool",
                 {"profile": {}},
                 ["output.csv", "output.jsonl"])
    send(email, for_real=True)

    return jsonify({"got it": email_address, "dois": pub_dois + missing_dois})
Beispiel #4
0
def simple_query_tool():
    body = request.json
    dirty_dois_list = {d for d in body["dois"] if d}

    clean_dois = [c for c in [clean_doi(d, return_none_if_error=True) for d in dirty_dois_list] if c]

    q = db.session.query(pub.Pub.response_jsonb).filter(pub.Pub.id.in_(clean_dois))
    rows = q.all()

    pub_responses = [row[0] for row in rows if row[0]]

    pub_dois = [r['doi'] for r in pub_responses]
    missing_dois = [d for d in dirty_dois_list if clean_doi(d, return_none_if_error=True) not in pub_dois]
    placeholder_responses = [pub.build_new_pub(d, None).to_dict_v2() for d in missing_dois]

    responses = pub_responses + placeholder_responses

    formats = body.get("formats", []) or ["jsonl", "csv"]
    files = []

    if "jsonl" in formats:
        # save jsonl
        with open("output.jsonl", 'wb') as f:
            for response_jsonb in responses:
                f.write(json.dumps(response_jsonb, sort_keys=True))
                f.write("\n")
        files.append("output.jsonl")

    csv_dicts = [pub.csv_dict_from_response_dict(my_dict) for my_dict in responses]
    csv_dicts = [my_dict for my_dict in csv_dicts if my_dict]
    fieldnames = sorted(csv_dicts[0].keys())
    fieldnames = ["doi"] + [name for name in fieldnames if name != "doi"]

    if "csv" in formats:
        # save csv
        with open("output.csv", 'wb') as f:
            writer = unicodecsv.DictWriter(f, fieldnames=fieldnames, dialect='excel')
            writer.writeheader()
            for my_dict in csv_dicts:
                writer.writerow(my_dict)
        files.append("output.csv")

    if "xlsx" in formats:
        book = Workbook()
        sheet = book.worksheets[0]
        sheet.title = "results"

        for col_idx, field_name in enumerate(fieldnames):
            sheet.cell(column=col_idx+1, row=1, value=field_name)

        for row_idx, row in enumerate(csv_dicts):
            for col_idx, field_name in enumerate(fieldnames):
                sheet.cell(column=col_idx+1, row=row_idx+2, value=row[field_name])

        book.save(filename="output.xlsx")
        files.append("output.xlsx")

    # prep email
    email_address = body["email"]
    email = create_email(email_address,
                 "Your Unpaywall results",
                 "simple_query_tool",
                 {"profile": {}},
                 files)
    send(email, for_real=True)

    return jsonify({"got it": email_address, "dois": pub_dois + missing_dois})