Beispiel #1
0
def test_range():
    """Parses YYYY/YYYY into a range of years."""

    assert year_parser.integer_years(["1937/1939", "1942/1943"]) == [
        1937,
        1938,
        1939,
        1942,
        1943,
    ]

    #Parses YYY/YYYY into a range of years.

    assert year_parser.integer_years(["990/1000"]) == [
        990,
        991,
        992,
        993,
        994,
        995,
        996,
        997,
        998,
        999,
        1000
    ]
Beispiel #2
0
def test_duplicates():
    """Deduplicates elements in output."""

    assert year_parser.integer_years(["1934-06/1935-07", "1934-06-01", "1934"]) == [
        1934,
        1935,
    ]
Beispiel #3
0
def test_multiple_dates():
    """Parses multiple input values into a list of outputs."""

    assert year_parser.integer_years(["1941-10-01", "1935", "1945"]) == [
        1935,
        1941,
        1945,
    ]
def test_range():
    """Parses YYYY/YYYY into a range of years."""

    assert year_parser.integer_years(["1937/1939", "1942/1943"]) == [
        1937,
        1938,
        1939,
        1942,
        1943,
    ]
Beispiel #5
0
def map_record(row: DLCSRecord, config: typing.Dict) -> UrsusRecord:
    """Maps a metadata record from CSV to Ursus Solr.

    Args:
        record: A mapping representing the CSV record.

    Returns:
        A mapping representing the record to submit to Solr.

    """
    record: UrsusRecord = {
        field_name: map_field_value(row, field_name, config=config)
        for field_name in mapper.FIELD_MAPPING
    }

    # thumbnail
    record["thumbnail_url_ss"] = (record.get("thumbnail_url_ss")
                                  or thumbnail_from_child(record,
                                                          config=config)
                                  or thumbnail_from_manifest(record))

    # collection name
    if "Parent ARK" in row and row["Parent ARK"] in config["collection_names"]:
        dlcs_collection_name = config["collection_names"][row["Parent ARK"]]
        record["dlcs_collection_name_tesim"] = [dlcs_collection_name]

    # facet fields
    record["features_sim"] = record.get("features_tesim")
    record["genre_sim"] = record.get("genre_tesim")
    record["human_readable_language_sim"] = record.get(
        "human_readable_language_tesim")
    record["human_readable_resource_type_sim"] = record.get(
        "resource_type_tesim")
    record["location_sim"] = record.get("location_tesim")
    record["member_of_collections_ssim"] = record.get(
        "dlcs_collection_name_tesim")
    record["named_subject_sim"] = record.get("named_subject_tesim")
    record["place_of_origin_sim"] = record.get("place_of_origin_tesim")
    record["script_sim"] = record.get("script_tesim")
    record["subject_sim"] = record.get("subject_tesim")
    record["support_sim"] = record.get("support_tesim")
    record["writing_system_sim"] = record.get("writing_system_tesim")
    record["year_isim"] = year_parser.integer_years(
        record.get("normalized_date_tesim"))

    # sort fields
    titles = record.get("title_tesim")
    if isinstance(titles, typing.Sequence) and len(titles) >= 1:
        record["sort_title_ssort"] = titles[0]

    years = record.get("year_isim")
    if isinstance(years, typing.Sequence) and len(years) >= 1:
        record["sort_year_isi"] = min(years)

    return record
Beispiel #6
0
def map_record(row: DLCSRecord, solr_client: Solr,
               config: typing.Dict) -> UrsusRecord:
    """Maps a metadata record from CSV to Ursus Solr.

    Args:
        record: A mapping representing the CSV record.

    Returns:
        A mapping representing the record to submit to Solr.

    """
    record: UrsusRecord = {
        field_name: map_field_value(row, field_name, config=config)
        for field_name in mapper.FIELD_MAPPING
    }

    # THUMBNAIL
    record["thumbnail_url_ss"] = (record.get("thumbnail_url_ss")
                                  or thumbnail_from_child(record,
                                                          config=config)
                                  or thumbnail_from_manifest(record))

    # COLLECTION NAME
    if "Parent ARK" in row and row["Parent ARK"] in config["collection_names"]:
        dlcs_collection_name = config["collection_names"][row["Parent ARK"]]
        record["dlcs_collection_name_tesim"] = [dlcs_collection_name]

    # FIELDS
    record["uniform_title_sim"] = record.get("uniform_title_tesim")
    record["architect_sim"] = record.get("architect_tesim")
    record["author_sim"] = record.get("author_tesim")
    record["illuminator_sim"] = record.get("illuminator_tesim")
    record["scribe_sim"] = record.get("scribe_tesim")
    record["rubricator_sim"] = record.get("rubricator_tesim")
    record["commentator_sim"] = record.get("commentator_tesim")
    record["translator_sim"] = record.get("translator_tesim")
    record["lyricist_sim"] = record.get("lyricist_tesim")
    record["composer_sim"] = record.get("composer_tesim")
    record["illustrator_sim"] = record.get("illustrator_tesim")
    record["editor_sim"] = record.get("editor_tesim")
    record["calligrapher_sim"] = record.get("calligrapher_tesim")
    record["engraver_sim"] = record.get("engraver_tesim")
    record["printmaker_sim"] = record.get("printmaker_tesim")
    record["human_readable_language_sim"] = record.get(
        "human_readable_language_tesim")
    record["names_sim"] = name_fields(record)
    record["keywords_sim"] = keywords_fields(record)
    # explicit
    record["features_sim"] = record.get("features_tesim")
    # incipit
    # inscription
    record["script_sim"] = record.get("script_tesim")
    record["writing_system_sim"] = record.get("writing_system_tesim")
    record["year_isim"] = year_parser.integer_years(
        record.get("normalized_date_tesim"))
    record["date_dtsim"] = solr_transformed_dates(
        solr_client,
        (date_parser.get_dates(record.get("normalized_date_tesim"))))
    record["place_of_origin_sim"] = record.get("place_of_origin_tesim")
    record["associated_name_sim"] = record.get("associated_name_tesim")
    record["form_sim"] = record.get("form_ssi")
    record["support_sim"] = record.get("support_tesim")
    record["genre_sim"] = record.get("genre_tesim")
    record["subject_sim"] = record.get("subject_tesim")
    record["location_sim"] = record.get("location_tesim")
    record["named_subject_sim"] = record.get("named_subject_tesim")
    record["human_readable_resource_type_sim"] = record.get(
        "resource_type_tesim")
    record["member_of_collections_ssim"] = record.get(
        "dlcs_collection_name_tesim")

    # SINAI INDEX
    record["header_index_tesim"] = header_fields(record)
    record["name_fields_index_tesim"] = name_fields_index(record)

    # SORT FIELDS
    titles = record.get("title_tesim")
    if isinstance(titles, typing.Sequence) and len(titles) >= 1:
        record["sort_title_ssort"] = titles[0]

    # used a solr copyfield for shelfmark sorting
    # shelfmarks = record.get("shelfmark_ssi")
    # print(shelfmarks)
    # if isinstance(shelfmarks, typing.Sequence) and len(shelfmarks) >= 1:
    # print(shelfmarks[0])
    # record["shelfmark_aplha_numeric_ssort"] = shelfmarks[0]


# -----------------------------------------------------------------------
    years = record.get("year_isim")
    if isinstance(years, typing.Sequence) and len(years) >= 1:
        record["sort_year_isi"] = min(years)

    dates = record.get("date_dtsim")
    if isinstance(dates, typing.Sequence) and len(dates) >= 1:
        record["date_dtsort"] = dates[0]
    return record
Beispiel #7
0
def test_iso_8601():
    """Parses an iso 8601 standard string"""

    assert year_parser.integer_years(["1941-10-01"]) == [1941]
Beispiel #8
0
def test_range_with_months():
    """Months can be included in range elements, but are ingored."""

    assert year_parser.integer_years(["1934-06/1934-07"]) == [1934]
Beispiel #9
0
def test_unparseable():
    """Doesn't return anything for unparseable values, but still parses other elements in input."""

    assert year_parser.integer_years(["1953", "[between 1928-1939]"]) == [1953]
Beispiel #10
0
def test_empty():
    """Returns an empty list if given an empty input."""
    assert year_parser.integer_years([]) == []
Beispiel #11
0
def test_year_and_month():
    """Parses YYYY-MM"""

    assert year_parser.integer_years(["1953-10"]) == [1953]
Beispiel #12
0
def test_just_year():
    """Parses a bare year."""

    assert year_parser.integer_years(["1953"]) == [1953]