Beispiel #1
0
def check_issns(row_object):
    __tracebackhide__ = True
    row = row_object.row
    line_str = '{}, line {}: '.format(row_object.file_name, row_object.line_number)
    for issn_column in [row["issn"], row["issn_print"], row["issn_electronic"], row["issn_l"]]:
        if issn_column != "NA":
            if not oat.is_wellformed_ISSN(issn_column):
                fail(line_str + 'value "' + issn_column + '" is not a ' +
                            'well-formed ISSN')
            elif not oat.is_valid_ISSN(issn_column):
                fail(line_str + 'value "' + issn_column + '" is no valid ' +
                            'ISSN (check digit mismatch)')
    issn_l = row["issn_l"]
    if issn_l != "NA":
        msg = line_str + "Two entries share a common {} ({}), but the issn_l differs ({} vs {})"
        issn = row["issn"]
        if issn != "NA":
            for reduced_row in issn_dict[issn]:
                if reduced_row["issn_l"] != issn_l:
                    fail(msg.format("issn", issn, issn_l, reduced_row["issn_l"]))
        issn_p = row["issn_print"]
        if issn_p != "NA":
            for reduced_row in issn_p_dict[issn_p]:
                if reduced_row["issn_l"] != issn_l:
                    fail(msg.format("issn_p", issn_p, issn_l, reduced_row["issn_l"]))
        issn_e = row["issn_electronic"]
        if issn_e != "NA":
            for reduced_row in issn_e_dict[issn_e]:
                if reduced_row["issn_l"] != issn_l:
                    fail(msg.format("issn_e", issn_e, issn_l, reduced_row["issn_l"]))
Beispiel #2
0
def check_issns(row_object):
    __tracebackhide__ = True
    row = row_object.row
    line_str = '{}, line {}: '.format(row_object.file_name, row_object.line_number)
    for issn_column in [row["issn"], row["issn_print"], row["issn_electronic"], row["issn_l"]]:
        if issn_column != "NA":
            if not oat.is_wellformed_ISSN(issn_column):
                pytest.fail(line_str + 'value "' + issn_column + '" is not a ' +
                            'well-formed ISSN')
            if not oat.is_valid_ISSN(issn_column):
                pytest.fail(line_str + 'value "' + issn_column + '" is no valid ' +
                            'ISSN (check digit mismatch)')
Beispiel #3
0
def check_issns(row_object):
    __tracebackhide__ = True
    row = row_object.row
    line_str = '{}, line {}: '.format(row_object.file_name,
                                      row_object.line_number)
    for issn_column in [
            row["issn"], row["issn_print"], row["issn_electronic"],
            row["issn_l"]
    ]:
        if issn_column != "NA":
            if not oat.is_wellformed_ISSN(issn_column):
                pytest.fail(line_str + 'value "' + issn_column +
                            '" is not a ' + 'well-formed ISSN')
            if not oat.is_valid_ISSN(issn_column):
                pytest.fail(line_str + 'value "' + issn_column +
                            '" is no valid ' + 'ISSN (check digit mismatch)')
def main():
    analysed_journals = {}
    if os.path.isfile(JOURNALTOC_RESULTS_FILE):
        with open(JOURNALTOC_RESULTS_FILE) as results:
            reader = DictReader(results)
            for line in reader:
                title = line["journal_full_title"]
                if title not in analysed_journals:
                    analysed_journals[title] = line
    remaining_journals = {}
    with open(APC_DE_FILE) as apc_de:
        reader = DictReader(apc_de)
        for line in reader:
            title = line["journal_full_title"]
            if title in analysed_journals:
                continue
            if title not in remaining_journals:
                remaining_journals[title] = {
                    "journal_full_title": line["journal_full_title"],
                    "publisher": line["publisher"],
                    "is_hybrid": line["is_hybrid"],
                    "issns": []
                }
            for issn_type in ISSN_TYPES:
                issn = line[issn_type]
                if issn not in remaining_journals[title][
                        "issns"] and oat.is_wellformed_ISSN(issn):
                    remaining_journals[title]["issns"].append(issn)
            is_hybrid = line["is_hybrid"]
            if is_hybrid in [
                    "TRUE", "FALSE"
            ] and is_hybrid != remaining_journals[title]["is_hybrid"]:
                remaining_journals[title]["is_hybrid"] = "FLIPPED"

    msg = "{} unique journals found in OpenAPC core data file, {} already analysed, {} remaining."
    oat.print_g(
        msg.format(
            len(remaining_journals) + len(analysed_journals),
            len(analysed_journals), len(remaining_journals)))

    count = 0
    for title, fields in remaining_journals.items():
        count += 1
        entry = {field: None for field in RESULTS_FILE_FIELDNAMES}
        entry["journal_full_title"] = title
        for key in ["publisher", "is_hybrid"]:
            entry[key] = fields[key]
        entry["issns"] = "|".join(fields["issns"])
        msg = 'Analysing journal "{}" ({}), OpenAPC hybrid status is {}...'
        msg = msg.format(entry["journal_full_title"], entry["issns"],
                         entry["is_hybrid"])
        oat.print_b(msg)
        for issn in fields["issns"]:
            oat.print_y("Looking up ISSN " + issn + "...")
            jtoc_metadata = get_jtoc_metadata(issn)
            if jtoc_metadata["jtoc_id"] is not None:
                entry["in_jtoc"] = "TRUE"
                for key in ["jtoc_publisher", "jtoc_title"]:
                    entry[key] = jtoc_metadata[key]
                journal_type = get_jtoc_journal_type(jtoc_metadata["jtoc_id"])
                entry["jtoc_type"] = journal_type
                msg = 'Journal found ("{}"), JournalTOCs type is {}'
                oat.print_g(msg.format(entry["jtoc_title"],
                                       entry["jtoc_type"]))
                break
        else:
            oat.print_r("None of the associated ISSNS found in JTOCs!")
        analysed_journals[title] = entry
        if count < BATCH_SIZE:
            sleep(2)
        else:
            break

    with open(JOURNALTOC_RESULTS_FILE, "w") as res_file:
        writer = DictWriter(res_file, fieldnames=RESULTS_FILE_FIELDNAMES)
        writer.writeheader()
        for _, entry in analysed_journals.items():
            writer.writerow(entry)