Example #1
0
 def get(self):
     args = self.reqparse.parse_args()
     regex_id = args['regex_id']
     post = Regex.query.outerjoin(
         Rating, Regex.id == Rating.regex_id).add_columns(
             func.count(Rating.regex_id).label('views'),
             func.avg(func.coalesce(
                 Rating.mark, 0)).label('avgmark')).filter(
                     Regex.id == regex_id).group_by(Regex.id).order_by(
                         func.count(Rating.regex_id).desc(),
                         func.avg(func.coalesce(Rating.mark,
                                                0)).desc()).first()
     return post[0].to_dict(views=post[1], avgmark=float(post[2])), 200
Example #2
0
def get_average_test_runs(items=None, release=None):
    """Returns a list of `Test Runs` grouped by a `Release`."""
    avg_runs = db.session.query(
        TestRun.name,
        func.avg(TestRun.passed).label('passed'),
        func.avg(TestRun.failed).label('failed'),
        func.avg(TestRun.skipped).label('skipped'),
        func.avg(TestRun.error).label('error'),
        func.avg(TestRun.percent_passed).label('percent_passed'),
        func.avg(TestRun.percent_failed).label('percent_failed'),
        func.avg(TestRun.percent_executed).label('percent_executed'),
        func.avg(TestRun.percent_not_executed).label('percent_not_executed'),
    ).group_by(TestRun.release_id, TestRun.name).order_by(
        TestRun.timestamp.desc(),
        TestRun.name.desc(),
    )

    avg_runs = avg_runs.filter_by(waved=False)

    if release:
        avg_runs = avg_runs.filter_by(release=release)

    if items:
        avg_runs = avg_runs.limit(items)

    return avg_runs
Example #3
0
def get_average_test_runs(items=None, release=None):
    """Returns a list of `Test Runs` grouped by a `Release`."""
    avg_runs = db.session.query(
        TestRun.name,
        func.avg(TestRun.passed).label('passed'),
        func.avg(TestRun.failed).label('failed'),
        func.avg(TestRun.skipped).label('skipped'),
        func.avg(TestRun.error).label('error'),
        func.avg(TestRun.percent_passed).label('percent_passed'),
        func.avg(TestRun.percent_failed).label('percent_failed'),
        func.avg(TestRun.percent_executed).label('percent_executed'),
        func.avg(TestRun.percent_not_executed).label('percent_not_executed'),
    ).group_by(
        TestRun.release_id,
        TestRun.name
    ).order_by(
            TestRun.timestamp.desc(),
            TestRun.name.desc(),
        )

    avg_runs = avg_runs.filter_by(
            waved=False
        )

    if release:
        avg_runs = avg_runs.filter_by(release=release)

    if items:
        avg_runs = avg_runs.limit(items)

    return avg_runs
 def get(self):
     args = self.reqparse.parse_args()
     regex_id = args['regex_id']
     post = Regex.query.outerjoin(
         Rating, Regex.id == Rating.regex_id).add_columns(
             func.count(Rating.regex_id).label('views'),
             func.avg(Rating.mark).label('avgmark')).filter(
                 Regex.id == regex_id).group_by(Regex.id).order_by(
                     func.count(Rating.regex_id).desc(),
                     func.avg(Rating.mark).desc()).first()
     post, views, avg_mark = post[0], post[1], post[2]
     user = User.query.get_or_404(post.author_id)
     return post.to_dict(author=user.username,
                         views=views,
                         avg_mark=float(avg_mark) if avg_mark else 0), 200
Example #5
0
    def post(self):
        args = self.reqparse.parse_args()
        regex = args['regex']
        posts = Regex.query.outerjoin(
            Rating, Regex.id == Rating.regex_id
        ).add_columns(
            func.count(Rating.regex_id).label('views'), func.avg(func.coalesce(Rating.mark, 0)).label('avgmark')
        ).filter(
            Regex.expression.like(f'{regex}%')
        ).group_by(
            Regex.id
        ).order_by(
            func.count(Rating.regex_id).desc(), func.avg(func.coalesce(Rating.mark, 0)).desc()
        ).all()

        return [post.to_dict(views=views, avg_mark=float(avgmark)) for post, views, avgmark in posts], 200
Example #6
0
    def get(self):
        args = self.reqparse.parse_args()
        user_id = args['user_id']
        views = func.count(Rating.regex_id).label('views')
        avgmark = func.avg(func.coalesce(Rating.mark, 0)).label('avgmark')
        posts = db.session.query(Regex, views, avgmark).outerjoin(
            Rating, Regex.id == Rating.regex_id).group_by(Regex.id).subquery()

        user_posts = db.session.query(Rating.mark, posts).join(
            posts, posts.c.id == Rating.regex_id).filter(
                Rating.user_id == user_id).order_by(desc(
                    posts.c.views), desc(posts.c.avgmark)).all()

        return [{
            'id': regex_id,
            'expression': regex_expression,
            'explanation': regex_explanation,
            'date': regex_date,
            'author_id': regex_author_id,
            'views': regex_views,
            'avg_mark': regex_avgmark,
            'user_mark': user_mark
        } for user_mark, regex_id, regex_expression, regex_explanation,
                regex_date, regex_author_id, regex_views, regex_avgmark in
                user_posts]
Example #7
0
    def selectables(cls, bag, agg_spec):
        """ Create a list of statements from spec

        :type bag: mongosql.bag.ModelPropertyBags
        :rtype: list[sqlalchemy.sql.elements.ColumnElement]
        """
        # TODO: calculation expressions for selection: http://docs.mongodb.org/manual/meta/aggregation-quick-reference/
        selectables = []
        for comp_field, comp_expression in agg_spec.items():
            # Column reference
            if isinstance(comp_expression, basestring):
                selectables.append(bag.columns[comp_expression].label(comp_field))
                continue

            # Computed expression
            assert isinstance(comp_expression, dict), 'Aggregate: Expression should be either a column name, or an object'
            assert len(comp_expression) == 1, 'Aggregate: expression can only contain a single operator'
            operator, expression = comp_expression.popitem()

            # Expression statement
            if isinstance(expression, int) and operator == '$sum':
                # Special case for count
                expression_stmt = expression
            elif isinstance(expression, basestring):
                # Column name
                expression_stmt = bag.columns[expression]
                # Json column?
                if bag.columns.is_column_json(expression):
                    # PostgreSQL always returns text values from it, and for aggregation we usually need numbers :)
                    expression_stmt = cast(expression_stmt, Float)
            elif isinstance(expression, dict):
                # Boolean expression
                expression_stmt = MongoCriteria.statement(bag, expression)
                # Need to cast it to int
                expression_stmt = cast(expression_stmt, Integer)
            else:
                raise AssertionError('Aggregate: expression should be either a column name, or an object')

            # Operator
            if operator == '$max':
                comp_stmt = func.max(expression_stmt)
            elif operator == '$min':
                comp_stmt = func.min(expression_stmt)
            elif operator == '$avg':
                comp_stmt = func.avg(expression_stmt)
            elif operator == '$sum':
                if isinstance(expression_stmt, int):
                    # Special case for count
                    comp_stmt = func.count()
                    if expression_stmt != 1:
                        comp_stmt *= expression_stmt
                else:
                    comp_stmt = func.sum(expression_stmt)
            else:
                raise AssertionError('Aggregate: unsupported operator "{}"'.format(operator))

            # Append
            selectables.append(comp_stmt.label(comp_field))

        return selectables
    def get(self):
        args = self.reqparse.parse_args()
        limit_by, offset = args['limit_by'], args['offset']
        posts = Regex.query.outerjoin(
            Rating, Regex.id == Rating.regex_id).add_columns(
                func.count(Rating.regex_id).label('views'),
                func.avg(Rating.mark).label('avgmark')).group_by(
                    Regex.id).order_by(
                        func.count(Rating.regex_id).desc(),
                        func.avg(Rating.mark).desc()).all(
                        )  # .limit(limit_by).offset(0 + limit_by * offset)

        return [
            post.to_dict(views=views, avg_mark=float(avgmark))
            if avgmark else post.to_dict(views=views, avg_mark=0)
            for post, views, avgmark in posts
        ], 200
Example #9
0
    def get(self):
        args = self.reqparse.parse_args()
        limit_by, offset, author_id = args['limit_by'], args['offset'], args['author_id']
        u = User.query.get_or_404(author_id)

        posts = Regex.query.outerjoin(
            Rating, Regex.id == Rating.regex_id
        ).add_columns(
            func.count(Rating.regex_id).label('views'), func.avg(func.coalesce(Rating.mark, 0)).label('avgmark')
        ).filter(
            Regex.author_id == u.id
        ).group_by(
            Regex.id
        ).order_by(
            func.count(Rating.regex_id).desc(), func.avg(func.coalesce(Rating.mark, 0)).desc()
        ).all()

        return [post.to_dict(views=views, avg_mark=float(avgmark)) for post, views, avgmark in posts], 200
Example #10
0
    def query_aggregate(self, ctx, drills, cuts, limit=5000):
        mappings = self.sql_mappings(ctx)
        joins = self.sql_joins(ctx, None)
        pk = self.pk(ctx)

        connection = self.sqltable.connection.connection()
        engine = self.sqltable.connection._engine

        # Build query
        Session = sessionmaker()
        Session.configure(bind=engine)
        session = Session()
        q = session.query()

        #q = q.add_columns(self.sqltable.sa_table.columns['is_bot_id'].label("x"))
        #q = q.add_entity(self.sqltable.sa_table)

        # Include measures
        for measure in [m for m in mappings if isinstance(m.field, Measure)]:
            sa_column = self.sqltable.sa_table.columns[measure.sqlcolumn.name]
            q = q.add_columns(func.avg(sa_column).label(measure.field.name + "_avg"))
            q = q.add_columns(func.sum(sa_column).label(measure.field.name + "_sum"))

        q = q.add_columns(func.count(self.sqltable.sa_table).label("record_count"))

        # Drills
        for dimension in [m for m in mappings if isinstance(m.field, Dimension)]:
            # We shoulld check the dimension-path here, with drills, and use key/lookup for drill
            if dimension.field.name in drills:
                sa_column = None
                try:
                    sa_column = self.sqltable.sa_table.columns[dimension.sqlcolumn.name]
                except KeyError as e:
                    raise ETLException("Unknown column in backend SQL table (table=%s, column=%s). Columns: %s" % (self.sqltable.sa_table, dimension.sqlcolumn.name, [c.name for c in self.sqltable.sa_table.columns]))
                q = q.add_columns(sa_column)
                q = q.group_by(sa_column)

        # Cuts
        # TODO: Filterng on any dimension attribute, not only the key
        #       (ie filter cities with type A or icon B), but then again
        #       that could be a different (nested) dimension.
        for dimension in [m for m in mappings if isinstance(m.field, Dimension)]:
            # We shoulld check the dimension-path here, with drills
            if dimension.field.name in cuts.keys():
                sa_column = self.sqltable.sa_table.columns[dimension.sqlcolumn.name]
                cut_value = cuts[dimension.field.name]
                q = q.filter(sa_column==cut_value)

        # Limit
        q = q.limit(5000)

        statement = q.statement
        logger.debug("Statement: %s", str(statement).replace("\n", " "))
        rows = connection.execute(statement).fetchall()

        return rows
Example #11
0
    def average_distance_travelled(self, start_date, end_date):
        submission_subclass = CompostSalesRegister
        query = (DBSession.query(
            func.avg(submission_subclass.json_data[
                submission_subclass.DISTANCE_TRAVELLED].cast(Float))).join(
                    MunicipalitySubmission,
                    (MunicipalitySubmission.submission_id
                     == submission_subclass.id)).
                 filter(MunicipalitySubmission.municipality == self).filter(
                     submission_subclass.status == Submission.APPROVED).filter(
                         and_(Submission.date >= start_date,
                              Submission.date <= end_date)))

        return query.first()[0]
    def get(self):
        args = self.reqparse.parse_args()
        token, limit_by, offset, author_id = args['token'], args[
            'limit_by'], args['offset'], args['author_id']
        if int(r[token]) != author_id:
            abort(403)
        u = User.query.get_or_404(author_id)

        posts = Regex.query.outerjoin(
            Rating, Regex.id == Rating.regex_id).add_columns(
                func.count(Rating.regex_id).label('views'),
                func.avg(Rating.mark).label('avgmark')).filter(
                    Regex.author_id == u.id).group_by(Regex.id).order_by(
                        func.count(Rating.regex_id).desc(),
                        func.avg(Rating.mark).desc()).all()

        return [
            post.to_dict(views=views,
                         avg_mark=float(avgmark),
                         author=u.username) if avgmark else post.to_dict(
                             views=views, avg_mark=0, author=u.username)
            for post, views, avgmark in posts
        ], 200
Example #13
0
def index():
    # How many items?
    items = request.args.get('items', 10)
    test_runs = TestRun.query.filter_by(waved=False).join(
        OperatingSystem).filter().order_by(
            TestRun.timestamp.desc(),
            TestRun.name.desc(),
            OperatingSystem.major_version.desc()
        ).limit(items)
    rows = []
    for row in test_runs:
        rows.append([row.name, row.passed, row.failed, row.skipped])

    rows.reverse()

    # Average numbers
    avg_rows = db.session.query(
        TestRun.name,
        func.avg(TestRun.passed).label('passed'),
        func.avg(TestRun.failed).label('failed'),
        func.avg(TestRun.skipped).label('skipped'),
    ).group_by(
        TestRun.name).order_by(
            TestRun.timestamp.desc(),
            TestRun.name.desc(),
        ).limit(items)
    avg_data = []
    for row in avg_rows:
        avg_data.append([row.name, row.passed, row.failed, row.skipped])
    avg_data.reverse()

    return render_template(
        'index.html',
        avg_data=json.dumps(avg_data),
        data=json.dumps(rows),
        test_runs=test_runs,
    )
Example #14
0
def api_property_search(context, request):
    street_number = request.GET.get("street_number")
    street_name = request.GET.get("street_name")
    city = request.GET.get("city")
    state = request.GET.get("state")
    zip = request.GET.get("zip")

    q = DBSession.query(Property, func.avg(Review.rating_average))

    if street_number is not None:
        q = q.filter(Property.street_number == int(street_number))

    if street_name is not None:
        q = q.filter(func.lower(Property.street_name).like("%%%s%%" %
                                                           street_name.lower()))

    if city is not None:
        q = q.filter(func.lower(Property.city).like("%%%s%%" % city.lower()))

    if state is not None:
        q = q.filter(func.lower(Property.state).like("%%%s%%" % state.lower()))

    if zip is not None:
        q = q.filter(Property.zip == zip)

    q = q.outerjoin(Property.reviews)
    q = q.group_by(Property.id)

    q = q.limit(7)

    results = []

    for property, avg in q.all():
        if avg is not None:
            property.overall = int(avg)
        else:
            property.overall = None
        results.append(property)

    return results
Example #15
0
    def compile(self):
        # Json column?
        if self.is_column_json:
            # PostgreSQL always returns text values from it, and for aggregation we usually need numbers :)
            column = cast(self.column, Float)
        else:
            # Simply use
            column = self.column

        # Now, handle the operator, and apply it to the expression
        if self.operator == '$max':
            stmt = func.max(column)
        elif self.operator == '$min':
            stmt = func.min(column)
        elif self.operator == '$avg':
            stmt = func.avg(column)
        elif self.operator == '$sum':
            stmt = func.sum(column)
        else:
            raise InvalidQueryError(
                'Aggregate: unsupported operator "{}"'.format(self.operator))
        return self.labeled_expression(stmt)
    def get(self):
        args = self.reqparse.parse_args()
        token = args['token']
        user_id = int(r[token])
        views = func.count(Rating.regex_id).label('views')
        avgmark = func.avg(Rating.mark).label('avgmark')
        posts = db.session.query(Regex, views, avgmark).outerjoin(
            Rating, Regex.id == Rating.regex_id).group_by(Regex.id).subquery()

        user_posts = db.session.query(Rating.mark, posts).join(
            posts, posts.c.id == Rating.regex_id).filter(
                Rating.user_id == user_id).order_by(desc(
                    posts.c.views), desc(posts.c.avgmark)).all()

        return [{
            'id': regex_id,
            'expression': regex_expression,
            'explanation': regex_explanation,
            'date': str(regex_date),
            'views': regex_views,
            'avg_mark': float(regex_avgmark) if regex_avgmark else 0,
            'user_mark': user_mark
        } for user_mark, regex_id, regex_expression, regex_explanation,
                regex_date, _, regex_views, regex_avgmark in user_posts], 200
Example #17
0
 def density_of_rejects_from_sieving(self, start_date, end_date):
     query = self.get_report_query(
         MonthlyRejectsDensity, start_date, end_date,
         func.avg(Report.report_json['density'].cast(Float)))
     return query.first()[0]
Example #18
0
 def conversion_factor_mature_to_sieved(self, start_date, end_date):
     query = self.get_report_query(
         MonthlyRejectsComposition, start_date, end_date,
         func.avg(Report.report_json['conversion_factor'].cast(Float)))
     return query.first()[0]
Example #19
0
 def density_of_mature_compost(self, start_date, end_date):
     query = self.get_report_query(
         MonthlyRejectsComposition, start_date, end_date,
         func.avg(
             Report.report_json['density_of_mature_compost'].cast(Float)))
     return query.first()[0]
Example #20
0
 def density_of_msw(self, start_date, end_date):
     query = self.get_report_query(
         DailyWaste, start_date, end_date,
         func.avg(Report.report_json['density'].cast(Float)))
     return query.first()[0]
Example #21
0
    def selectables(cls, bag, agg_spec):
        """ Create a list of statements from spec

        :type bag: mongosql.bag.ModelPropertyBags
        :rtype: list[sqlalchemy.sql.elements.ColumnElement]
        """
        # TODO: calculation expressions for selection: http://docs.mongodb.org/manual/meta/aggregation-quick-reference/
        selectables = []
        for comp_field, comp_expression in agg_spec.items():
            # Column reference
            if isinstance(comp_expression, string_types):
                selectables.append(
                    bag.columns[comp_expression].label(comp_field))
                continue

            # Computed expression
            assert isinstance(
                comp_expression, dict
            ), 'Aggregate: Expression should be either a column name, or an object'
            assert len(
                comp_expression
            ) == 1, 'Aggregate: expression can only contain a single operator'
            operator, expression = comp_expression.popitem()

            # Expression statement
            if isinstance(expression, int) and operator == '$sum':
                # Special case for count
                expression_stmt = expression
            elif isinstance(expression, string_types):
                # Column name
                expression_stmt = bag.columns[expression]
                # Json column?
                if bag.columns.is_column_json(expression):
                    # PostgreSQL always returns text values from it, and for aggregation we usually need numbers :)
                    expression_stmt = cast(expression_stmt, Float)
            elif isinstance(expression, dict):
                # Boolean expression
                expression_stmt = MongoCriteria.statement(bag, expression)
                # Need to cast it to int
                expression_stmt = cast(expression_stmt, Integer)
            else:
                raise AssertionError(
                    'Aggregate: expression should be either a column name, or an object'
                )

            # Operator
            if operator == '$max':
                comp_stmt = func.max(expression_stmt)
            elif operator == '$min':
                comp_stmt = func.min(expression_stmt)
            elif operator == '$avg':
                comp_stmt = func.avg(expression_stmt)
            elif operator == '$sum':
                if isinstance(expression_stmt, int):
                    # Special case for count
                    comp_stmt = func.count()
                    if expression_stmt != 1:
                        comp_stmt *= expression_stmt
                else:
                    comp_stmt = func.sum(expression_stmt)
            else:
                raise AssertionError(
                    'Aggregate: unsupported operator "{}"'.format(operator))

            # Append
            selectables.append(comp_stmt.label(comp_field))

        return selectables
Example #22
0
def init_list_analysis(list_id, list_name, count,
	open_rate, api_key, data_center, user_email):

	# Try to pull the list stats from database
	existing_list = (ListStats.query.
		filter_by(list_id=list_id).first())

	# Placeholder for list stats
	stats = None

	if existing_list is None:

		stats = import_analyze_store_list(list_id,
			count, open_rate, api_key, data_center, user_email)

	else:

		# Get list stats from database results
		# Deserialize the histogram data
		stats = {'subscribers': existing_list.subscribers,
			'open_rate': existing_list.open_rate,
			'hist_bin_counts': json.loads(existing_list.hist_bin_counts),
			'subscribed_pct': existing_list.subscribed_pct,
			'unsubscribed_pct': existing_list.unsubscribed_pct,
			'cleaned_pct': existing_list.cleaned_pct,
			'pending_pct': existing_list.pending_pct,
			'high_open_rt_pct': existing_list.high_open_rt_pct,
			'cur_yr_inactive_pct': existing_list.cur_yr_inactive_pct}

	# Log that the request occured
	current_user = AppUser(user_email=user_email,
		list_id=list_id)
	db.session.add(current_user)
	db.session.commit()

	# Generate averages
	avg_stats = db.session.query(func.avg(ListStats.subscribers),
		func.avg(ListStats.open_rate),
		func.avg(ListStats.subscribed_pct),
		func.avg(ListStats.unsubscribed_pct),
		func.avg(ListStats.cleaned_pct),
		func.avg(ListStats.pending_pct),
		func.avg(ListStats.high_open_rt_pct),
		func.avg(ListStats.cur_yr_inactive_pct)).first()
	
	# Generate charts
	# Using OrderedDict (for now) as Pygal occasionally seems to break with
	# The Python 3.5 dictionary standard which preserves order by default
	# Export them as pngs to /charts
	list_size_chart = BarChart('Chart A: List Size vs. '
		'Database Average (Mean)',
		OrderedDict([
			('Your List', [stats['subscribers']]),
			('Average (Mean)', [avg_stats[0]])]),
		percentage=False)
	list_size_chart.render_png(list_id + '_size')

	list_breakdown_chart = BarChart('Chart B: List Composition vs. '
		'Database Average (Mean)',
		OrderedDict([
			('Subscribed %', [stats['subscribed_pct'], avg_stats[2]]),
			('Unsubscribed %', [stats['unsubscribed_pct'], avg_stats[3]]),
			('Cleaned %', [stats['cleaned_pct'], avg_stats[4]]),
			('Pending %', [stats['pending_pct'], avg_stats[5]])]),
		x_labels=('Your List', 'Average (Mean)'))
	list_breakdown_chart.render_png(list_id + '_breakdown')

	open_rate_chart = BarChart('Chart C: List Open Rate vs. '
		'Database Average (Mean)',
		OrderedDict([
			('Your List', [stats['open_rate']]),
			('Average (Mean)', [avg_stats[1]])]))
	open_rate_chart.render_png(list_id + '_open_rate')

	open_rate_hist_chart = Histogram('Chart D: Distribution of '
		'User Unique Open Rates',
		OrderedDict([
			('Your List', stats['hist_bin_counts'])]))
	open_rate_hist_chart.render_png(list_id + '_open_rate_histogram')

	high_open_rt_pct_chart = BarChart('Chart E: Percentage of '
		'Subscribers with User Unique Open Rate >80% vs. '
		'Database Average (Mean)',
		OrderedDict([
			('Your List', [stats['high_open_rt_pct']]),
			('Average (Mean)', [avg_stats[6]])]))
	high_open_rt_pct_chart.render_png(list_id + '_high_open_rt')

	cur_yr_member_pct_chart = BarChart('Chart F: Percentage of Subscribers '
		'who did not Open in last 365 Days vs. Database Average (Mean)',
		OrderedDict([
			('Your List', [stats['cur_yr_inactive_pct']]),
			('Average (Mean)', [avg_stats[7]])]))
	cur_yr_member_pct_chart.render_png(list_id + '_cur_yr_inactive_pct')

	# Send charts as an email report
	# Due to the way Flask-Mail works, reimport app_context first
	with app.app_context():
		msg = Message('Your Email Benchmarking Report is Ready!',
			sender='*****@*****.**',
			recipients=[user_email],
			html=render_template('report_email.html',
				title='We\'ve analyzed the {} List!'.format(list_name), 
				list_id=list_id))
		mail.send(msg)
def send_report(stats, list_id, list_name, user_email_or_emails):
    """Generates charts using Plotly and emails them to the user.

    Args:
        stats: a dictionary containing analysis results for a list.
        list_id: the list's unique MailChimp id.
        list_name: the list's name.
        user_email_or_emails: a list of emails to send the report to.
    """

    # This subquery generates the most recent stats
    # For each unique list_id in the database
    # Where store_aggregates is True
    subquery = ListStats.query.filter(
        ListStats.list.has(store_aggregates=True)).order_by(
            'list_id',
            desc('analysis_timestamp')).distinct(ListStats.list_id).subquery()

    # Generate aggregates within the subquery
    agg_stats = db.session.query(
        func.avg(subquery.columns.subscribers),
        func.avg(subquery.columns.subscribed_pct),
        func.avg(subquery.columns.unsubscribed_pct),
        func.avg(subquery.columns.cleaned_pct),
        func.avg(subquery.columns.pending_pct),
        func.avg(subquery.columns.open_rate),
        func.avg(subquery.columns.high_open_rt_pct),
        func.avg(subquery.columns.cur_yr_inactive_pct)).first()

    # Make sure we have no 'None' values
    agg_stats = [agg if agg else 0 for agg in agg_stats]

    # Convert subscribers average to an integer
    agg_stats[0] = int(agg_stats[0])

    # Generate epoch time (to get around image caching in webmail)
    epoch_time = str(int(time.time()))

    # Generate charts
    draw_bar(['Your List', 'Dataset Average'],
             [stats['subscribers'], agg_stats[0]], 'Chart A: List Size',
             list_id + '_size_' + epoch_time)

    draw_stacked_horizontal_bar(
        ['Dataset Average', 'Your List'],
        [('Subscribed %', [agg_stats[1], stats['subscribed_pct']]),
         ('Unsubscribed %', [agg_stats[2], stats['unsubscribed_pct']]),
         ('Cleaned %', [agg_stats[3], stats['cleaned_pct']]),
         ('Pending %', [agg_stats[4], stats['pending_pct']])],
        'Chart B: List Composition', list_id + '_breakdown_' + epoch_time)

    draw_bar(['Your List', 'Dataset Average'],
             [stats['open_rate'], agg_stats[5]],
             'Chart C: List Open Rate',
             list_id + '_open_rate_' + epoch_time,
             percentage_values=True)

    histogram_legend_uri = ('https://s3-us-west-2.amazonaws.com/email-'
                            'benchmarking-imgs/open_rate_histogram_legend.png')

    draw_histogram(
        {
            'title': 'Open Rate by Decile',
            'vals': np.linspace(.05, .95, num=10)
        }, {
            'title': 'Subscribers',
            'vals': stats['hist_bin_counts']
        }, 'Chart D: Distribution of Subscribers by Open Rate',
        histogram_legend_uri, list_id + '_open_rate_histogram_' + epoch_time)

    draw_donuts(['Open Rate >80%', 'Open Rate <=80%'], [
        ('Your List',
         [stats['high_open_rt_pct'], 1 - stats['high_open_rt_pct']]),
        ('Dataset Average', [agg_stats[6], 1 - agg_stats[6]])
    ], 'Chart E: Percentage of Subscribers with User Unique Open Rate >80%',
                list_id + '_high_open_rt_pct_' + epoch_time)

    draw_donuts(
        ['Inactive in Past 365 Days', 'Active in Past 365 Days'],
        [('Your List',
          [stats['cur_yr_inactive_pct'], 1 - stats['cur_yr_inactive_pct']]),
         ('Dataset Average', [agg_stats[7], 1 - agg_stats[7]])],
        'Chart F: Percentage of Subscribers who did not Open '
        'in last 365 Days', list_id + '_cur_yr_inactive_pct_' + epoch_time)

    # Send charts as an email report
    send_email('Your Email Benchmarking Report is Ready!',
               user_email_or_emails,
               'report-email.html', {
                   'title': 'We\'ve analyzed the {} list!'.format(list_name),
                   'list_id': list_id,
                   'epoch_time': epoch_time
               },
               configuration_set_name=(os.environ.get('SES_CONFIGURATION_SET')
                                       or None))
Example #24
0
    def query_aggregate(self, ctx, drills, cuts, limit=5000):
        mappings = self.sql_mappings(ctx)
        joins = self.sql_joins(ctx, None)
        pk = self.pk(ctx)

        connection = self.sqltable.connection.connection()
        engine = self.sqltable.connection._engine

        # Build query
        Session = sessionmaker()
        Session.configure(bind=engine)
        session = Session()
        q = session.query()

        #q = q.add_columns(self.sqltable.sa_table.columns['is_bot_id'].label("x"))
        #q = q.add_entity(self.sqltable.sa_table)

        # Include measures
        for measure in [m for m in mappings if isinstance(m.field, Measure)]:
            sa_column = self.sqltable.sa_table.columns[measure.sqlcolumn.name]
            q = q.add_columns(
                func.avg(sa_column).label(measure.field.name + "_avg"))
            q = q.add_columns(
                func.sum(sa_column).label(measure.field.name + "_sum"))

        q = q.add_columns(
            func.count(self.sqltable.sa_table).label("record_count"))

        # Drills
        for dimension in [
                m for m in mappings if isinstance(m.field, Dimension)
        ]:
            # We shoulld check the dimension-path here, with drills, and use key/lookup for drill
            if dimension.field.name in drills:
                sa_column = None
                try:
                    sa_column = self.sqltable.sa_table.columns[
                        dimension.sqlcolumn.name]
                except KeyError as e:
                    raise ETLException(
                        "Unknown column in backend SQL table (table=%s, column=%s). Columns: %s"
                        % (self.sqltable.sa_table, dimension.sqlcolumn.name,
                           [c.name for c in self.sqltable.sa_table.columns]))
                q = q.add_columns(sa_column)
                q = q.group_by(sa_column)

        # Cuts
        # TODO: Filterng on any dimension attribute, not only the key
        #       (ie filter cities with type A or icon B), but then again
        #       that could be a different (nested) dimension.
        for dimension in [
                m for m in mappings if isinstance(m.field, Dimension)
        ]:
            # We shoulld check the dimension-path here, with drills
            if dimension.field.name in cuts.keys():
                sa_column = self.sqltable.sa_table.columns[
                    dimension.sqlcolumn.name]
                cut_value = cuts[dimension.field.name]
                q = q.filter(sa_column == cut_value)

        # Limit
        q = q.limit(5000)

        statement = q.statement
        logger.debug("Statement: %s", str(statement).replace("\n", " "))
        rows = connection.execute(statement).fetchall()

        return rows