Beispiel #1
0
def test_column_contents():
    """Test the columns have reasonable dtypes."""
    df = Collection.from_dir("t/data/2019-12-04")._df
    b = df.loc[2366570]  # Les Chouans

    # timestamp columns are ok
    assert str(b.Added.date()) == "2016-04-18"
    assert str(b.Started.date()) == "2016-09-08"
    assert str(b.Read.date()) == "2016-11-06"
    assert b.Published == 1829  # pandas can't do very old dates...

    b = df.loc[3071647]  # La faute de l'abbé Mouret
    assert str(
        b.Scheduled.date()) == "2020-01-01", "Scheduled column is a timestamp"

    b = df.loc[28595808]  # The McCabe Reader
    # missing publication year
    assert np.isnan(b.Published)

    c = Collection.from_dir("t/data/2019-12-04", fixes=False)
    assert set(c._df.Category) == {
        "articles",
        "novels",
        "short-stories",
        "non-fiction",
        "graphic",
        np.nan,
    }
Beispiel #2
0
def test_dedup():
    """Test deduplication."""
    c = Collection.from_dir("t/data/2019-12-04")
    assert c.dedup is False, "No dedup by default"

    c = Collection.from_dir("t/data/2019-12-04", merge=True, dedup=True)
    assert c.merge is True, "Enabled merging"
    assert c.dedup is True, "Enabled dedup"
Beispiel #3
0
def test_merge():
    """General merging tests."""
    c_un = Collection.from_dir("t/data/merging")
    assert c_un.dedup is False, "No merging by default"

    c = Collection.from_dir("t/data/merging", merge=True)
    assert c.merge is True, "Enabled merging"

    assert_frame_equal(c._df, c_un._df)  # underlying dataframes are identical
Beispiel #4
0
def test_filter_borrowed():
    """Test the borrowed() method."""
    c = Collection.from_dir("t/data/2019-12-04")
    assert set(c.borrowed().df.Borrowed) == {True, False}

    c = Collection.from_dir("t/data/2019-12-04")
    assert set(c.borrowed(True).df.Borrowed) == {True}

    c = Collection.from_dir("t/data/2019-12-04")
    assert set(c.borrowed(False).df.Borrowed) == {False}
Beispiel #5
0
def test_reset():
    """Test the reset() method."""
    c1 = Collection.from_dir("t/data/2019-12-04")
    c2 = Collection.from_dir("t/data/2019-12-04")

    assert_frame_equal(c1.df, c2.df)  # Identical dataframes are the same

    c2.shelves(["library"])
    assert not c1.df.equals(c2.df), "Changed dataframe is different"

    c2.reset()
    assert_frame_equal(c1.df, c2.df)  # Reset dataframe is the same again
Beispiel #6
0
def test_collection():
    """General tests."""
    c = Collection.from_dir("/does/not/exist")
    assert c, "Created an empty collection"
    assert c.merge is False, "No merge by default"
    assert c.dedup is False, "No dedup by default"
    assert (repr(c) == "Collection(_df=[0 books], merge=False, dedup=False)"
            ), "Legible __repr__ for an empty collection"

    c = Collection.from_dir("t/data/2019-12-04/")
    assert c, "Created a collection from a directory"
    assert (repr(c) == "Collection(_df=[157 books], merge=False, dedup=False)"
            ), "Legible __repr__ for a collection with books"
Beispiel #7
0
def test_merged_added():
    """The earliest Added date is used."""
    c = Collection.from_dir("t/data/merging/")
    df = c._merged()

    book = df.loc[21124]
    assert str(book.Added.date()) == "2018-01-04", "Added on the earlier date"
Beispiel #8
0
def test_save_df(tmp_path):
    df = Collection.from_dir("t/data/2019-12-04", fixes=False,
                             metadata=False).df

    # pick out a few books
    df = df[df.AuthorId == 9121]

    sorted_csv = tmp_path / "ebooks.csv"
    save_df("ebooks", df, sorted_csv)
    assert sorted_csv.read_text() == """\
BookId,Author,Title,Category,Language,Words,Added
38290,James Fenimore Cooper,The Pioneers,novels,,,2017-02-27
246245,James Fenimore Cooper,The Deerslayer,novels,en,,2016-11-08
347245,James Fenimore Cooper,The Pathfinder,novels,en,,2016-11-08
621017,James Fenimore Cooper,The Prairie,novels,,,2016-11-08
1041744,James Fenimore Cooper,The Last of the Mohicans,novels,,,2017-02-16
""", "Wrote a csv of only some columns"

    shuffled_df = df[sorted(df.columns)].sample(frac=1)
    shuffled_csv = tmp_path / "shuffled.csv"
    save_df("ebooks", shuffled_df, shuffled_csv)
    assert shuffled_csv.read_text() == """\
BookId,Author,Title,Category,Language,Words,Added
38290,James Fenimore Cooper,The Pioneers,novels,,,2017-02-27
246245,James Fenimore Cooper,The Deerslayer,novels,en,,2016-11-08
347245,James Fenimore Cooper,The Pathfinder,novels,en,,2016-11-08
621017,James Fenimore Cooper,The Prairie,novels,,,2016-11-08
1041744,James Fenimore Cooper,The Last of the Mohicans,novels,,,2017-02-16
""", "CSV is ordered even if the df isn't"
Beispiel #9
0
def test_from_author_name():
    c = Collection.from_dir("t/data/2019-12-04")

    s = Chain.from_author_name(c.all, "Murakami")
    assert s, "Created a Chain from an author name"
    assert s.order == Order.Published, "Authors are read in published order by default"
    assert s.missing == Missing.Ignore, "Authors have no missing books to ignore"
Beispiel #10
0
def test_from_series_name():
    c = Collection.from_dir("t/data/2019-12-04")

    s = Chain.from_series_name(c.all, "Culture")
    assert s, "Created a Chain from a series name"
    assert s.order == Order.Series, "Series are read in order"
    assert s.missing == Missing.Ignore, "Missing books are ignored by default"
Beispiel #11
0
def test_remaining():
    c = Collection.from_dir("t/data/2019-12-04")

    # shuffle them up a bit
    books = c.df
    all_shelves = set(books.Shelf)
    assert all_shelves == {
        "currently-reading",
        "ebooks",
        "elsewhere",
        "kindle",
        "library",
        "pending",
        "read",
        "to-read",
    }

    s = Chain(df=books, order=Order.Published)

    remaining = s.remaining
    assert set(remaining.Shelf) == all_shelves - {
        "currently-reading",
        "read",
        "to-read",
    }, "All but the read and unreadable shelves"
Beispiel #12
0
def test_currently_reading():
    c = Collection.from_dir("t/data/2019-12-04")

    s = Chain.from_author_name(c.all, "Vonnegut")
    assert s.currently_reading, "currently reading Vonnegut"

    s = Chain.from_author_name(c.all, "Murakami")
    assert not s.currently_reading, "not currently reading Murakami"
Beispiel #13
0
def test_merge_df():
    """Test merging."""
    c = Collection.from_dir("t/data/merging", merge=True)

    assert c.df is not None, "it didn't explode"

    assert 956320 in c.df.index, "Novel is there"
    c.categories(["non-fiction"])
    assert 21124 in c.df.index, "Non-fiction book is there"
    assert 956320 not in c.df.index, "Novel is not"
Beispiel #14
0
def test_all():
    """Test the .all property."""
    c = Collection.from_dir("t/data/2019-12-04/")

    assert_frame_equal(
        c.df, c.all)  # .df and .all are the same when no filters applied

    df = c.all.copy()
    assert_frame_equal(df,
                       c.shelves(["read"
                                  ]).all)  # .all is not affected by filters
Beispiel #15
0
def test_merged_kindle():
    """Simple case: a kindle book."""
    c = Collection.from_dir("t/data/merging/")
    df = c._merged()

    book = df.loc["novels/pg13947.mobi"]
    unmerged = c._df[c._df.Title.str.contains("Le vicomte de Bragelonne")]

    assert book.Title == "Le vicomte de Bragelonne", "Combined title has no volume number"
    assert book.Pages == sum(unmerged.Pages), "Pages is the sum"
    assert book["_Mask"], "Mask has been retained"
Beispiel #16
0
def test_read():
    c = Collection.from_dir("t/data/2019-12-04")

    s = Chain.from_author_name(c.all, "Murakami")
    assert list(s.read.Title) == [
        "Hard-Boiled Wonderland and the End of the World",
        "Norwegian Wood",
    ], "Read Chain"

    s = Chain.from_author_name(c.all, "Leroux")
    assert s.read.empty, "Unread Chain"
Beispiel #17
0
def test_last_read():
    c = Collection.from_dir("t/data/2019-12-04")

    s = Chain.from_author_name(c.all, "Gaston Leroux")
    assert s.last_read is None, "Never read"

    s = Chain.from_author_name(c.all, "Vonnegut")
    assert s.last_read.date() == pd.Timestamp(
        "today").date(), "Currently reading"

    s = Chain.from_author_name(c.all, "Murakami")
    assert str(s.last_read.date()) == "2019-08-26", "Previously read"
Beispiel #18
0
def test_merged():
    """General tests of the guts of the merge process."""
    c = Collection.from_dir("t/data/merging/")

    df_clean = c._df.copy()
    df = c._merged()

    assert_frame_equal(c._df, df_clean)  # _df hasn't been modified

    assert len(df) < len(df_clean), "Merged dataframe is shorter"
    assert set(df.index) < set(
        df_clean.index), "Remaining index values are unchanged"
Beispiel #19
0
def test_kindle_books():
    """Tests specific to ebooks."""
    c = Collection.from_dir("t/data/2019-12-04/")
    df = c.all
    df = df[df.Shelf == "kindle"]

    assert set(df.Binding) == {"ebook"}, "ebook binding is always 'ebook'"
    assert set(df.Borrowed) == {False}, "ebooks are never borrowed"
    assert set(df.Shelf) == {"kindle"}, "ebook shelf is always 'kindle'"

    b = df.loc["non-fiction/pg14154.mobi"]  # A Tale of Terror
    assert str(b.Added.date()) == "2013-02-06", "Added date is sensible"
Beispiel #20
0
def test_chaining():
    """Test that filters chain correctly."""
    c = Collection.from_dir("t/data/2019-12-04")
    c.shelves(["pending"]).borrowed(True).languages(["fr"])

    assert_frame_equal(
        c.df, c.all[(c.all.Shelf == "pending") & c.all.Borrowed &
                    (c.all.Language == "fr")])

    c = Collection.from_dir("t/data/2019-12-04")
    c.shelves(["pending"
               ]).categories(["graphic"
                              ]).languages(exclude=["fr"]).borrowed(False)

    assert_frame_equal(
        c.df,
        c.all[(c.all.Shelf == "pending")
              & (c.all.Category == "graphic")
              & (c.all.Language == "fr")
              & ~c.all.Borrowed],
    )
Beispiel #21
0
def test_filter_shelves():
    """Test the shelves() method."""
    c = Collection.from_dir("t/data/2019-12-04")
    c2 = Collection.from_dir("t/data/2019-12-04")

    assert_frame_equal(c.shelves().df, c2.df)  # no argument makes it a no-op

    assert set(c.shelves(["library"
                          ]).df.Shelf) == {"library"
                                           }, "Only the selected shelf"

    c = Collection.from_dir("t/data/2019-12-04")

    c.shelves(exclude=["library"])
    assert "library" not in set(c.df.Shelf), "Not the excluded shelf"
    assert "kindle" in set(c.df.Shelf), "Does include others"

    c = Collection.from_dir("t/data/2019-12-04")
    df = pd.concat([
        Collection.from_dir("t/data/2019-12-04").shelves(
            exclude=["library"]).df,
        Collection.from_dir("t/data/2019-12-04").shelves(
            include=["library"]).df,
    ])
    assert_frame_equal(
        df, c.df, check_like=True)  # A ∪ ¬A = U, though the rows get mixed up
Beispiel #22
0
def test_filter_categories():
    """Test the categories() method."""
    c = Collection.from_dir("t/data/2019-12-04")
    c2 = Collection.from_dir("t/data/2019-12-04")

    assert_frame_equal(c.categories().df,
                       c2.df)  # no argument makes it a no-op

    assert set(c.categories(
        ["novels"]).df.Category) == {"novels"}, "Only the selected category"

    c = Collection.from_dir("t/data/2019-12-04")

    c.categories(exclude=["novels"])
    assert "novels" not in set(c.df.Category), "Not the excluded category"
    assert "articles" in set(c.df.Category), "Does include others"

    c = Collection.from_dir("t/data/2019-12-04")
    df = pd.concat([
        Collection.from_dir("t/data/2019-12-04").categories(
            exclude=["novels"]).df,
        Collection.from_dir("t/data/2019-12-04").categories(
            include=["novels"]).df,
    ])
    assert_frame_equal(
        df, c.df, check_like=True)  # A ∪ ¬A = U, though the rows get mixed up
Beispiel #23
0
def test_fixes(monkeypatch):
    """Test fix application."""
    with open("t/data/2019-12-04/config.yml") as fh:
        fixes = yaml.safe_load(fh)

    monkeypatch.setattr(config, "_conf", fixes)
    assert config("fixes") == fixes["fixes"]

    c_with = Collection.from_dir("t/data/2019-12-04",
                                 metadata=False,
                                 fixes=True)
    c_wout = Collection.from_dir("t/data/2019-12-04",
                                 metadata=False,
                                 fixes=False)

    assert c_with.all.shape == c_wout.all.shape, "The shape hasn't changed"
    assert not c_with.all.equals(c_wout.all), "But they're not the same"

    # Read date has been fixed
    assert str(c_wout.all.loc[20636970].Read.date()) == "2018-03-14"
    assert str(c_with.all.loc[20636970].Read.date()) == "2018-02-09"

    # Page count has been fixed
    assert math.isnan(c_wout.all.loc[3110594].Pages)
    assert c_with.all.loc[3110594].Pages == 341

    # Category has been fixed
    assert math.isnan(c_wout.all.loc[7022275].Category)
    assert c_with.all.loc[7022275].Category == "novels"

    # Language has been fixed
    assert math.isnan(c_wout.all.loc[816920].Language)
    assert c_with.all.loc[816920].Language == "fr"

    # Fixing an ebook
    assert c_wout.all.loc[
        "short-stories/Les_soirees_de_Medan.pdf"].Language == "en"
    assert c_with.all.loc[
        "short-stories/Les_soirees_de_Medan.pdf"].Language == "fr"
Beispiel #24
0
def test_chain():
    """General tests."""
    c = Collection.from_dir("t/data/2019-12-04")

    s = Chain(df=c.all)

    assert s, "Created a Chain"
    assert s.order == Order.Published, "Default is to use published order"
    assert s.missing == Missing.Ignore, "Default is to ignore"

    assert (
        repr(s) ==
        "Chain(_df=[157 books], order=Order.Published, missing=Missing.Ignore)"
    ), "Legible __repr__ for the Chain"
Beispiel #25
0
def test_metadata(monkeypatch):
    """Test metadata application."""
    c_with = Collection.from_dir("t/data/2019-12-04",
                                 fixes=False,
                                 metadata=True)
    c_wout = Collection.from_dir("t/data/2019-12-04",
                                 fixes=False,
                                 metadata=False)

    assert c_with.all.shape == c_wout.all.shape, "The shape hasn't changed"
    assert not c_with.all.equals(c_wout.all), "But they're not the same"

    assert c_wout.all.Gender.isnull().all(), "Gender is unset without metadata"
    assert c_wout.all.Nationality.isnull().all(
    ), "Nationality is unset without metadata"

    assert c_with.all.Gender.notnull().any(), "At least one gender is set"
    assert c_with.all.Nationality.notnull().any(
    ), "At least one nationality is set"

    # Metadata has been applied
    assert c_wout.all.loc[
        "novels/b869w.mobi"].Author == "Emily, Bronte,; Brontë, Emily, 1818-1848"
    assert c_with.all.loc["novels/b869w.mobi"].Author == "Emily Brontë"

    with open("t/data/2019-12-04/config.yml") as fh:
        monkeypatch.setattr(config, "_conf", yaml.safe_load(fh))

    c_fixes = Collection.from_dir("t/data/2019-12-04",
                                  fixes=True,
                                  metadata=True)

    # Fixes take precedence over metadata
    assert c_with.all.loc[
        "short-stories/Les_soirees_de_Medan.pdf"].Pages == 290  # from metadata
    assert c_fixes.all.loc[
        "short-stories/Les_soirees_de_Medan.pdf"].Pages == 777  # from fixes
Beispiel #26
0
def test_collection_columns():
    """Test the columns are present and correct."""
    columns = [
        "Author",
        "AuthorId",
        "Title",
        "Work",
        "Shelf",
        "Category",
        "Scheduled",
        "Borrowed",
        "Series",
        "SeriesId",
        "Entry",
        "Binding",
        "Published",
        "Language",
        "Pages",
        "Added",
        "Started",
        "Read",
        "Rating",
        "AvgRating",
        "Words",
        "Gender",
        "Nationality",
        "_Mask",  # FIXME
    ]

    c = Collection.from_dir("t/data/2019-12-04")
    assert list(c._df.columns) == columns, "All the columns are there"

    c = Collection.from_dir("t/data/2019-12-04", metadata=False)
    assert list(
        c._df.columns
    ) == columns, "All the columns are still there when metadata is off"
Beispiel #27
0
def test_merged_goodreads():
    """Simple case: a goodreads book."""
    c = Collection.from_dir("t/data/merging/")
    df = c._merged()

    book = df.loc[956320]
    unmerged = c._df[c._df.Title.str.contains("Monte-Cristo")]

    assert book.Title == "Le Comte de Monte-Cristo", "Combined title has no volume number"
    assert book.Pages == sum(unmerged.Pages), "Pages is the sum"
    assert book["_Mask"], "Mask has been retained"
    assert str(book.Added.date()) == "2016-07-11"
    assert str(book.Started.date()) == "2017-02-19"
    assert str(book.Read.date()) == "2017-06-02"
    assert book.Rating == 4.5
    assert not book.Entry  # unset until i decide what to do with it
Beispiel #28
0
def test_read():
    """Test the .read property."""
    c = Collection.from_dir("t/data/2019-12-04/")

    df = c.read

    assert set(df.Shelf) == {"read",
                             "currently-reading"}, "Only expected shelves"

    assert 10374 in df.index, "Read book is there"
    assert 38290 not in df.index, "Unread book is not"
    assert 38290 in c.all.index, "Unread book is still in the collection however"

    all_read = c.read.copy()  # copy to be safe

    assert_frame_equal(c.shelves(["library"]).read,
                       all_read)  # Same result even with a filtered frame
Beispiel #29
0
def test_sort():
    c = Collection.from_dir("t/data/2019-12-04")

    # shuffle them up a bit
    books = c.df[c.df.Series.str.contains("Culture",
                                          na=False)].sort_values("Title")

    s = Chain(df=books, order=Order.Published)
    values = list(s.sort()._df.Published)
    assert values == sorted(values), "Sorted by published date"

    s.order = Order.Series
    values = list(s.sort()._df.Series)
    assert values == sorted(values), "Sorted by entry"

    s.order = Order.Added
    values = list(s.sort()._df.Added)
    assert values == sorted(values), "Sorted by added date"
Beispiel #30
0
def test_read_authorids():
    c = Collection.from_dir("t/data/2019-12-04")

    assert read_authorids(c) == {
        1654,
        3354,
        4750,
        4785,
        7628,
        9343,
        228089,
        874602,
        2778055,
        5807106,
    }

    assert 2778055 in read_authorids(
        c), "Author in currently-reading is included"