コード例 #1
0
def prepareDBHeaderRow(beer):
    headers = []
    for col in beer.keys():
        if col != "Review Text":
            headers.append(col)

    for cat_type in getBeerWordCategories().keys():
        for col in getBeerWordCategories()[cat_type]:
            headers.append(col)

    headers.append("Total Count")

    return headers
コード例 #2
0
def getBeerWordColumns(csv_filename):
    beer_word_categories = getBeerWordCategories()

    with open(csv_filename, "rU") as csvfile:
        csvreader = csv.reader(csvfile)
        header_row = csvreader.next()

        for cat_type in beer_word_categories.keys():
            for category in beer_word_categories[cat_type]:
                beer_word_categories[cat_type][category] = header_row.index(category)

    return beer_word_categories
コード例 #3
0
def prepareDBRow(beer):
    row = []
    for col in beer.keys():
        if col != "Review Text":
            row.append(beer[col])

    total_beer_words = 0
    beer_word_counts = countBeerWordsInText(beer["Review Text"])

    for cat_type in ["scaled", "binary"]:
        categories = getBeerWordCategories()[cat_type]
        for category in categories.keys():
            words_in_category = beer_word_counts[cat_type][category]
            row.append(words_in_category)
            total_beer_words += words_in_category

    row.append(total_beer_words)
    return row
コード例 #4
0
def getCategoryAvgs(csv_filename):
    category_columns = getBeerWordColumns(csv_filename)
    category_totals = getBeerWordCategories()
    num_beers = 0

    with open(csv_filename, "rU") as csvfile:
        csvreader = csv.reader(csvfile)
        csvreader.next()
        for row in csvreader:
            num_beers += 1
            for cat_type in category_columns.keys():
                for category in category_columns[cat_type].keys():
                    cat_count = row[category_columns[cat_type][category]]
                    total_count = row[len(row) - 1]
                    category_totals[cat_type][category] += float(cat_count) / float(total_count)

        for cat_type in category_totals.keys():
            for category in category_totals[cat_type].keys():
                print category + ": " + str(category_totals[cat_type][category])
                category_totals[cat_type][category] = category_totals[cat_type][category] / num_beers
        print "num beers: " + str(num_beers)
    return category_totals