Beispiel #1
0
def get_style_popular_hops(style: Style, use_filter: list) -> DataFrame:
    style_ids = style.get_ids_including_sub_styles()

    query = '''
        SELECT rh.recipe_id, rh.kind_id, rh.amount_percent
        FROM recipe_db_recipehop AS rh
        JOIN recipe_db_recipe AS r ON rh.recipe_id = r.uid
        WHERE r.style_id IN ({})
        '''.format(','.join('%s' for _ in style_ids))

    if len(use_filter):
        query += " AND rh.use IN ({})".format(','.join('%s' for _ in use_filter))

    df = pd.read_sql(query, connection, params=style_ids + use_filter)
    if len(df) == 0:
        return df

    # Aggregate amount per recipe
    hops = df.groupby(["recipe_id", "kind_id"]).agg({"amount_percent": "sum"}).reset_index()

    # Filter top hops
    top_hops_ids = hops["kind_id"].value_counts()[:10].index.values
    top_hops = hops[hops['kind_id'].isin(top_hops_ids)]  # Get only the values of the mostly used hops

    # Calculate boxplot values
    agg = [lowerfence, q1, 'median', 'mean', q3, upperfence, 'count']
    aggregated = top_hops.groupby('kind_id').agg({'amount_percent': agg})
    aggregated = aggregated.reset_index()
    aggregated = aggregated.sort_values(by=('amount_percent', 'count'), ascending=False)

    # Finally, add hop names
    aggregated['hop'] = aggregated['kind_id'].map(get_hop_names_dict())

    return aggregated
Beispiel #2
0
def get_style_metric_values(style: Style, metric: str) -> DataFrame:
    style_ids = style.get_ids_including_sub_styles()
    precision = METRIC_PRECISION[
        metric] if metric in METRIC_PRECISION else METRIC_PRECISION['default']

    query = '''
            SELECT round({}, {}) as {}
            FROM recipe_db_recipe
            WHERE {} IS NOT NULL AND style_id IN ({})
        '''.format(metric, precision, metric, metric,
                   ','.join('%s' for _ in style_ids))

    df = pd.read_sql(query, connection, params=style_ids)
    df = remove_outliers(df, metric, 0.02)

    bins = 16
    if metric in ['og', 'fg'] and len(df) > 0:
        abs = df[metric].max() - df[metric].min()
        bins = max([1, round(abs / 0.002)])
        if bins > 18:
            bins = round(bins / math.ceil(bins / 12))

    histogram = df.groupby([pd.cut(df[metric], bins, precision=precision)
                            ])[metric].agg(['count'])
    histogram = histogram.reset_index()
    histogram[metric] = histogram[metric].map(str)

    return histogram
Beispiel #3
0
def get_style_hop_pairings(style: Style) -> DataFrame:
    style_ids = style.get_ids_including_sub_styles()

    query = '''
        SELECT rh.recipe_id, rh.kind_id, rh.amount_percent
        FROM recipe_db_recipehop AS rh
        JOIN recipe_db_recipe AS r
            ON rh.recipe_id = r.uid
        WHERE rh.kind_id IS NOT NULL AND r.style_id IN ({})
    '''.format(','.join('%s' for _ in style_ids))

    hops = pd.read_sql(query, connection, params=style_ids)
    return get_hop_pairings(hops)
Beispiel #4
0
    def calculate_all_style_metrics(self, calculator: StyleMetricCalculator,
                                    style: Style) -> None:
        style.recipes_count = calculator.calc_recipes_count(style)

        for metric in calculator.available_metrics:
            self.stdout.write('Calculate {} for style {}'.format(
                metric.value, style.name))
            (min, mean, max) = calculator.calc_metric(style, metric)
            self.stdout.write(str((min, mean, max)))
            setattr(style, "recipes_%s_min" % metric.value,
                    None if math.isnan(min) else min)
            setattr(style, "recipes_%s_mean" % metric.value,
                    None if math.isnan(mean) else mean)
            setattr(style, "recipes_%s_max" % metric.value,
                    None if math.isnan(max) else max)
def get_style_popular_fermentables(style: Style, category_filter: list,
                                   type_filter: list) -> DataFrame:
    style_ids = style.get_ids_including_sub_styles()

    query = '''
        SELECT rf.recipe_id, f.name AS fermentable, rf.amount_percent
        FROM recipe_db_recipefermentable AS rf
        JOIN recipe_db_recipe AS r ON rf.recipe_id = r.uid
        JOIN recipe_db_fermentable AS f ON rf.kind_id = f.id
        WHERE r.style_id IN ({})
        '''.format(','.join('%s' for _ in style_ids))

    if len(category_filter):
        query += " AND f.category IN ({})".format(','.join(
            '%s' for _ in category_filter))
    if len(type_filter):
        query += " AND f.type IN ({})".format(','.join('%s'
                                                       for _ in type_filter))

    df = pd.read_sql(query,
                     connection,
                     params=style_ids + category_filter + type_filter)
    if len(df) == 0:
        return df

    # Aggregate amount per recipe
    fermentables = df.groupby(["recipe_id", "fermentable"]).agg({
        "amount_percent":
        "sum"
    }).reset_index()

    top_fermentables_ids = fermentables["fermentable"].value_counts(
    )[:10].index.values
    top_fermentables = fermentables[fermentables['fermentable'].isin(
        top_fermentables_ids
    )]  # Get only the values of the mostly used fermentable

    agg = [lowerfence, q1, 'median', 'mean', q3, upperfence, 'count']
    aggregated = top_fermentables.groupby('fermentable').agg(
        {'amount_percent': agg})
    aggregated = aggregated.reset_index()
    aggregated = aggregated.sort_values(by=('amount_percent', 'count'),
                                        ascending=False)

    return aggregated
Beispiel #6
0
def get_style_trending_hops(style: Style) -> DataFrame:
    style_ids = style.get_ids_including_sub_styles()
    recipes_per_month = get_style_num_recipes_per_month(style_ids)

    query = '''
            SELECT
                date(r.created, 'start of month') AS month,
                rh.kind_id,
                count(DISTINCT r.uid) AS recipes
            FROM recipe_db_recipe AS r
            JOIN recipe_db_recipehop AS rh
                ON r.uid = rh.recipe_id
            WHERE
                r.created IS NOT NULL
                AND r.created > %s
                AND rh.kind_id IS NOT NULL
                AND r.style_id IN ({})
            GROUP BY date(r.created, 'start of month'), rh.kind_id
        '''.format(','.join('%s' for _ in style_ids))

    per_month = pd.read_sql(query,
                            connection,
                            params=[POPULARITY_MIN_MONTH] + style_ids)
    per_month = per_month.merge(recipes_per_month, on="month")
    per_month['month'] = pd.to_datetime(per_month['month'])
    per_month['recipes_percent'] = per_month['recipes'] / per_month[
        'total_recipes'] * 100

    trending = filter_trending(per_month, 'kind_id', 'month',
                               'recipes_percent')
    trending = set_multiple_series_start(trending, 'kind_id', 'month',
                                         'recipes_percent')

    # Finally, add hop names
    trending['hop'] = trending['kind_id'].map(get_hop_names_dict())

    return trending
def load_styles():
    csv_file = load_csv('styles.csv')
    header = next(csv_file)

    styles = {}
    for row in csv_file:
        if len(row) == 1:
            continue  # Skip empty lines

        row = map(cast_values, row)
        data = dict(zip(header, row))

        style_id = make_style_id(data['id'])
        parent_style_id = make_style_id(data['parent_style_id'])
        parent_style = None

        if data['og_min'] is not None:
            data['og_min'] /= 1000
        if data['og_max'] is not None:
            data['og_max'] /= 1000
        if data['fg_min'] is not None:
            data['fg_min'] /= 1000
        if data['fg_max'] is not None:
            data['fg_max'] /= 1000

        if parent_style_id is not None:
            if parent_style_id in styles:
                parent_style = styles[parent_style_id]
            else:
                raise Exception(
                    "Could not find parent style {}".format(parent_style_id))

        try:
            style = Style.objects.get(pk=style_id)
        except Style.DoesNotExist:
            style = Style()
            pass

        for field in data:
            setattr(style, field, data[field])
        style.id = style_id
        style.parent_style = parent_style
        style.save()
        styles[style.id] = style
Beispiel #8
0
def get_style_popularity(style: Style) -> DataFrame:
    style_ids = style.get_ids_including_sub_styles()
    return calculate_styles_popularity(style_ids)