def prepareDBHeaderRow(beer): headers = [] for col in beer.keys(): if col != "Review Text": headers.append(col) for cat_type in getBeerWordCategories().keys(): for col in getBeerWordCategories()[cat_type]: headers.append(col) headers.append("Total Count") return headers
def getBeerWordColumns(csv_filename): beer_word_categories = getBeerWordCategories() with open(csv_filename, "rU") as csvfile: csvreader = csv.reader(csvfile) header_row = csvreader.next() for cat_type in beer_word_categories.keys(): for category in beer_word_categories[cat_type]: beer_word_categories[cat_type][category] = header_row.index(category) return beer_word_categories
def prepareDBRow(beer): row = [] for col in beer.keys(): if col != "Review Text": row.append(beer[col]) total_beer_words = 0 beer_word_counts = countBeerWordsInText(beer["Review Text"]) for cat_type in ["scaled", "binary"]: categories = getBeerWordCategories()[cat_type] for category in categories.keys(): words_in_category = beer_word_counts[cat_type][category] row.append(words_in_category) total_beer_words += words_in_category row.append(total_beer_words) return row
def getCategoryAvgs(csv_filename): category_columns = getBeerWordColumns(csv_filename) category_totals = getBeerWordCategories() num_beers = 0 with open(csv_filename, "rU") as csvfile: csvreader = csv.reader(csvfile) csvreader.next() for row in csvreader: num_beers += 1 for cat_type in category_columns.keys(): for category in category_columns[cat_type].keys(): cat_count = row[category_columns[cat_type][category]] total_count = row[len(row) - 1] category_totals[cat_type][category] += float(cat_count) / float(total_count) for cat_type in category_totals.keys(): for category in category_totals[cat_type].keys(): print category + ": " + str(category_totals[cat_type][category]) category_totals[cat_type][category] = category_totals[cat_type][category] / num_beers print "num beers: " + str(num_beers) return category_totals