Beispiel #1
0
def do_file_checks(datadir, bookdata, procdata):
    file_checks.assert_location_exists(datadir)
    file_checks.assert_file_exists(bookdata)
    try:
        file_checks.assert_file_not_exists(procdata)
    except AssertionError:
        message = f"File {processed_data} already exists. If you proceed it " \
            "will be overwritten. Continue anyways?"
        response = user_interaction.force_user_input(["Y", "n"], message)
        if response == "n":
            raise SystemExit()
Beispiel #2
0
def main():
    file_checks.assert_location_exists(data_directory)
    file_checks.assert_file_exists(source_data)

    count_book_ratings, count_user_ratings = Counter(), Counter()
    with open(source_data, 'r') as src:
        reader = csv.DictReader(src, delimiter=';')

        for row in reader:
            count_book_ratings[row["ISBN"]] += 1
            count_user_ratings[row["User-ID"]] += 1

    plot_graph.plot_top_n(count_book_ratings, "Top books with most ratings",
                          "ISBNS")
    plot_graph.plot_top_n(count_user_ratings, "Top users who rated", "User ID")
Beispiel #3
0
def do_file_checks(datadir, srcdata, procdata):
    """
    Performs validation to ensure datadir is a valid location,
    srcdata exists, and procdata does not exist.  Aborts if
    datadir or srcdata do not exist. Asks to proceed if procdata
    already exists
    """
    file_checks.assert_location_exists(datadir)
    file_checks.assert_file_exists(srcdata)
    try:
        file_checks.assert_file_not_exists(procdata)
    except AssertionError:
        message = f"File {procdata} already exists. If you proceed it " \
            "will be overwritten. Continue anyways?"
        response = user_interaction.force_user_input(["Y", "n"], message)
        if response == "n":
            raise SystemExit()
Beispiel #4
0
def main():
    file_checks.assert_location_exists(data_directory)
    file_checks.assert_file_exists(source_data)

    locations, ages = Counter(), Counter()
    with open(source_data, "r") as src:
        reader = csv.DictReader(src, delimiter=';')

        for row in reader:
            country = pull_country_from_location(row["Location"])

            locations[country] += 1
            if row["Age"] != TOKEN:
                ages[int(row["Age"])] += 1

    plot_graph.plot_top_n(locations, "Nations with most users", "Counts")
    plot_graph.plot_top_n(ages, "User age distribution", "Age [Years]",
                          len(ages))
Beispiel #5
0
    def __init__(self, books, users, ratings):
        self.books = books
        self.users = users
        self.ratings = ratings

        self.user_ids, self.book_isbns = set(), set()

        file_checks.assert_file_exists(self.books)
        file_checks.assert_file_exists(self.users)
        file_checks.assert_file_exists(self.ratings)