def clicked_urls_query(): return select([ search_table.c.id, func.unnest(search_table.c.clicked_urls).label('result'), query_table.c.search_term_lowercase, ]).select_from(search_table.join(query_table)).where( query_table.c.high_volume == True)
def suggest_sequence(attribute): """Suggest values from an array attribute of an entity""" entity = Sequence query = (db.session.query(func.unnest(getattr(entity, attribute)) .label('label')) .distinct() .filter(entity.user_login == session['user'])) if request.values: query = query.join(TopicDomainClass) for key, value in request.values.items(): query = query.filter(getattr(TopicDomainClass, key) == value) return storify(query.all(), 'label', 'label')
def make_comment(post_type): Session = sessionmaker(bind=engine) db = Session() dead_comments = db.query(Comment.text) \ .filter(Comment.dead == True, Comment.text != None) \ .order_by(func.random()).limit(30000) dead_comment_sim = train_from_query(dead_comments, CommentSim) comment_query = db.query(Comment.text).filter(Comment.dead == False) if post_type != "normal": comment_query = comment_query.filter(Comment.id.in_( db.query(func.unnest(Story.all_kids)).filter_by(**queries[post_type]["query"]) )) comment_query = comment_query.order_by(func.random()).limit(60000) random_user_query = db.query(Comment.by).order_by(func.random()) comment_sim = train_from_query(comment_query, CommentSim) user_names = (by[0] for by in random_user_query.limit(random.randint(0, 50))) comments = [] for user_name in user_names: is_dead = random.randint(2, 100) < 5 sim = comment_sim if not is_dead else dead_comment_sim comment_length, comment = random.randint(0, 200), "" while len(comment) < comment_length: if (comment_length - len(comment)) < 25: break elif (comment_length - len(comment)) < 50: comment += sim.make_short_sentence(50, tries=10000, max_overlap_total=10, max_overlap_ratio=0.5) + "\n" else: comment += sim.make_short_sentence(100, tries=10000, max_overlap_total=10, max_overlap_ratio=0.5) + "\n" comment = comment.replace(".", ". ") comment_data = {"text": comment, "by": user_name, "dead": is_dead} comments.append(comment_data) return comments
def get_skipped_urls(conn): """ Get every query/result pair where the result was skipped This is done by unnesting everything so we have (session, result, query) tuples, and then excluding any clicked tuples from the passed over tuples. It probably makes sense(?) """ passed_over = select([ search_table.c.id, func.unnest(search_table.c.passed_over_urls).label('result'), query_table.c.search_term_lowercase, ]).select_from(search_table.join(query_table)).where( query_table.c.high_volume == True) clicked = clicked_urls_query() stmt = except_(passed_over, clicked) return pd.read_sql(stmt, conn, index_col='id')
def get(self): args = self.parse_arg() releases = db.session.query( func.substring(cast(Release.date, db.String), 0,8).label('month'), func.unnest(Release.activities).label('activity'), func.count(Release.value).label('count'), func.sum(Release.value).label('total_value')) releases = self.filter_request(releases, args) releases = releases.group_by('month', 'activity') releases = self.sort_request(releases, args) release_count = releases.count() (releases, offset, limit) = self.offset_limit(releases, args) #Generate output structure output = dict() output["meta"] = { "count": release_count, "pagination" : {"offset" : offset, "limit": limit} } months_dict = {} for release in releases: r = release._asdict() current_month = r["month"] del r["month"] if current_month not in months_dict: months_dict[current_month] = [] months_dict[current_month].append(r) output["releases"] = [{"month" : month, "activities": activities} for month,activities in months_dict.iteritems()] #Ugly hack to remove activities that were not requested by provided because #some services are in 2 different activities if 'activity' in args and args['activity'] != None: activity_list = args['activity'].split(';') for j, m in reversed(list(enumerate(output["releases"]))): for i, a in reversed(list(enumerate(m["activities"]))): if a["activity"] not in activity_list: del m["activities"][i] if len(output["releases"][j]["activities"]) == 0: del output["releases"][j] #Another hack to aggregate activities not in the top N if 'aggregate' in args and (args['aggregate'] == "value" or args['aggregate'] == "count"): top = [] activities = db.session.query( Release.activities[1].label('activity'), func.sum(Release.value).label('total_value'), func.count(Release.value).label('count')) activities = self.filter_request(activities, args) activities = activities.filter(Release.activities[1] != "Autres") activities = activities.group_by('activity') if args['aggregate'] == "value": activities = activities.order_by("total_value desc") else : activities = activities.order_by("count desc") activities = activities[0:app.config["AGG_ACTIVITIES"]] top = [a._asdict()['activity'] for a in activities] for m in output["releases"]: count = 0 total_value = 0 for i, a in reversed(list(enumerate(m["activities"]))): if a["activity"] not in top: count += a["count"] total_value += a["total_value"] del m["activities"][i] if count > 0 and total_value > 0: autres = {"activity": "Autres", "count": count,"total_value": total_value} m["activities"].append(autres) return output