def top(group): max_amount = request.args.get("k", 10, type=int) period, start, end, cache_time = get_period(request, "day") params = { "start": start.strftime(time_format), "end": end.strftime(time_format), "group": group } data = cache(process_top, group, max_amount, params, cache_time=cache_time, path=get_req_path(request)) if isinstance(data, Response): return data if len(data) < max_amount: max_amount = len(data) template_data = { "data": data, "group": group, "disp_group": display_group(group), "max_amount": str(max_amount), "period": period, "start": display_datetime(start), "end": display_datetime(end), "title": make_title("Top {} {}".format(max_amount, display_group(group))) } return render_template("top.html", **template_data)
def view_anomalies(group): keywords = cache(tweety.get_group, group, cache_time=60 * 60, path=get_req_path(request)) if isinstance(keywords, Response): return keywords keywords = [k["lemma"] for k in keywords] anomalies = json.loads(redis.get("anomalies")) anomalies = [a for a in anomalies if a[0] in keywords] start = datetime.strptime(json.loads(redis.get("anomalies_start")), time_format) end = datetime.strptime(json.loads(redis.get("anomalies_end")), time_format) template_data = { "peaks": anomalies, "start": display_datetime(start), "end": display_datetime(end), "num_peaks": len(anomalies) } return render_template("anomaly.html", title=make_title("Piekdetectie"), **template_data)
def view_token_co_occurrences(keyword): period, start, end, cache_time = get_period(request, "week") params = { "start": start.strftime(time_format), "end": end.strftime(time_format) } keyword_data = cache(process_tokens, keyword, params, cache_time=cache_time, path=get_req_path(request)) if isinstance(keyword_data, Response): return keyword_data occurrences = [] for k in keyword_data["occurrences"]: if k["text"] != keyword: k["pos"] = parse_pos(k["pos"].split("(")[0]) occurrences.append(k) nums = range(1, len(occurrences) + 1) template_data = { "keyword": keyword, "period": period, "start": display_datetime(start), "end": display_datetime(end), "occurrences": zip(nums, occurrences) } return render_template("occurrences.html", title=make_title(keyword), **template_data)
def view_groups(): groups = cache(tweety.get_groups) if isinstance(groups, Response): return groups return render_template("groups.html", title=make_title("Groepen"), groups=groups)
def view_group(group): period, start, end, cache_time = get_period(request, "day") params = { "start": start.strftime(time_format), "end": end.strftime(time_format), "group": group } keywords = cache(tweety.get_keywords, cache_time=cache_time, path=get_req_path(request), **params) if isinstance(keywords, Response): return keywords total = sum([entry["count"] for entry in keywords]) for keyword in keywords: keyword["percentage"] = "{:.2f}".format(keyword["count"] / total * 100) keyword["count"] = display_number(keyword["count"]) nums = range(1, len(keywords) + 1) template_data = { "nums_keywords": zip(nums, keywords), "group": group, "disp_group": display_group(group), "nums": nums, "total": display_number(total), "period": period, "start": display_datetime(start), "end": display_datetime(end) } return render_template("group.html", title=make_title(template_data["disp_group"]), **template_data)
def get_search_results(query): """ Accept query text (string). Returns <OrderedDict> with parsed search results data. """ url = cnst.URL_SV.AUTOCOMPLETE + '?query=%s' % query response_json = JSON.ObjectFromString( get_request(url, cache=MAIN_PAGE_CACHE_TIME).content) result = OrderedDict() for index, item in enumerate(response_json['data']): if 'serial-' in item: season_id = response_json['id'][index] suggestion = response_json['suggestions'][index] get_name = Re.SEARCH_SUGGESTION_NAME.search( suggestion) or Re.SEARCH_SUGGESTION_NAME_ALT.search(suggestion) if get_name: name = get_name.group(1) else: name = suggestion get_season_number = Re.SEASON_TITLE_NUMBER.search(suggestion) if get_season_number: season_number = get_season_number.group(1) else: season_number = 0 result[season_id] = { 'season_id': season_id, 'name': name, 'title': make_title(season_number, name), 'thumb': make_thumb_url(season_id), } return result
def view_tweets_about_keyword(keyword): period, start, end, cache_time = get_period(request, "week") params = { "start": start.strftime(time_format), "end": end.strftime(time_format) } keyword_data = cache(process_details, keyword, params, cache_time=cache_time, path=get_req_path(request)) if isinstance(keyword_data, Response): return keyword_data num_tweets = keyword_data["num_tweets"] tweets = keyword_data["tweets"] retweets = keyword_data["retweets"] template_data = { "keyword": keyword, "num_tweets": num_tweets, "num_unique_tweets": len(tweets), "tweets": tweets, "retweets": retweets, "period": period, "start": display_datetime(start), "end": display_datetime(end) } return render_template("tweets.html", title=make_title(keyword), **template_data)
def format_api_season_data(data): result = { 'season_id': data['id'], 'name': data['name'], 'summary': data['description'], 'season_number': data['season_number'], # NB: can be 0 'title': make_title(data.get('season_number'), data['name']), 'thumb_small': data['poster_small'], 'thumb': data['poster'], 'rating': average_rating(data.get('rating')), 'playlist': {} } if 'other_season' in data: result['other_season'] = data['other_season'] if 'playlist' in data: for episode in data['playlist']: episode_id = Regex(EPISODE_ID_PATTERN).search(episode['name']) episode['episode_id'] = episode_id.group( 1) if episode_id else episode['name'] if 'perevod' not in episode: if 'TRANSLATE_DEFAULT' in result['playlist']: result['playlist']['TRANSLATE_DEFAULT'].append(episode) else: result['playlist']['TRANSLATE_DEFAULT'] = [episode] else: if episode['perevod'] not in cnst.UNSUPPORTED_TRANSLATES: if episode['perevod'] in result['playlist']: result['playlist'][episode['perevod']].append(episode) else: result['playlist'][episode['perevod']] = [episode] return result
def view_news(keyword): period, start, end, cache_time = get_period(request, "week") news_data = cache(process_news, keyword, start, end, cache_time=cache_time, path=get_req_path(request)) if isinstance(news_data, Response): return news_data period_name = { "day": "dag", "week": "week", "month": "maand" }.get(period, "dag") news = [] for item in news_data: item["pubdate"] = display_datetime(item["pubdate"]) del item["nid"] news.append(item) template_data = { "keyword": keyword, "start": display_datetime(start), "end": display_datetime(end), "period": period, "period_name": period_name, "news": news } return render_template("news.html", title=make_title(keyword), **template_data)
def loading(loading_id): if request.method == "GET": return render_template("loading.html", title=make_title("Laden")) elif request.method == "POST": loading = redis.get("loading:" + loading_id) if loading in [b"done", None]: status = "done" else: status = "loading" return jsonify({"status": status})
def format_api_search_data(data): result = OrderedDict() for item in data: if item['id'] not in cnst.BLACKLISTED_SEASONS: result[item['id']] = { 'season_id': item['id'], 'name': item['name'], 'thumb': item['poster'], 'title': make_title(item.get('season'), item['name']), } return result
def get_season_data_by_link(url): """ Accept url or uri (string). Returns dict with parsed season data. """ result = {} page_html = HTML.ElementFromString( get_request(url, cache=SEASON_PAGE_CACHE_TIME).content) season_id = unicode( page_html.find('.' + cnst.XP_SeasonP.MAIN).get('data-id-season')) if season_id not in cnst.BLACKLISTED_SEASONS: title_element = page_html.find('.' + cnst.XP_SeasonP.TITLE) title_text = title_element.text_content().strip() get_season_number = Re.SEASON_TITLE_NUMBER.search(title_text) if get_season_number: season_number = get_season_number.group(1) else: season_number = 0 get_name = Re.SEASON_TITLE_NAME.search(title_text) if get_name: name = get_name.group(1) else: name = Re.SEASON_TITLE_NAME_ALT.search(title_text).group(1) ratings_block = page_html.findall('.' + cnst.XP_SeasonP.RATINGS + cnst.XP.SPAN) summary = page_html.find('.' + cnst.XP_SeasonP.INFO + cnst.XP.TEXT).text_content().strip() result = { 'season_id': season_id, 'name': name, 'summary': summary, 'season_number': season_number, 'title': make_title(season_number, name), 'thumb': make_thumb_url(season_id), 'rating': average_rating(ratings_block), 'playlist': get_season_playlist(page_html) } season_list = page_html.findall('.' + cnst.XP.H2 + cnst.XP.LINK) if len(season_list) > 1: other_season = OrderedDict() for item in season_list: item_title_text = item.text_content().strip() item_season_id = Re.SEASON_URL_ID.search( item.get('href')).group(1) item_season_number = Re.SEASON_TITLE_NUMBER.search( item_title_text).group(1) if item_season_id != season_id: other_season[item_season_number] = item_season_id result['other_season'] = other_season return result
def format_api_updates_data(data): result = OrderedDict() for item in data: if item['id'] not in cnst.BLACKLISTED_SEASONS: if item['id'] not in result and item['id']: result[item['id']] = { 'season_id': item['id'], 'name': item['name'], 'thumb': item['poster'], 'title': make_title(item.get('season'), item['name']), 'update_messages': [item['message']] } else: result[item['id']]['update_messages'].append(item['message']) return result
def profile(): roles = get_roles(current_user) groups = [n[2:] for n in roles if n.startswith("g:")] labels = [display_group(g) for g in groups] has_group = len(groups) > 0 is_admin = "admin" in roles has_confirmed_email = current_user.has_confirmed_email() template_data = { "groups": [(val, labels[i]) for (i, val) in enumerate(groups)], "has_group": has_group, "has_confirmed_email": has_confirmed_email, "is_admin": is_admin } return render_template("profile.html", title=make_title("Profiel"), **template_data)
def view_keywords_in_group(group): """Show a list of all the keywords in the group.""" keywords = cache(tweety.get_group, group, cache_time=60 * 60, path=get_req_path(request)) if isinstance(keywords, Response): return keywords if keywords: keywords.sort(key=lambda x: x["lemma"]) for k in keywords: k["pos"] = display_pos(k["pos"]) template_data = { "disp_group": display_group(group), "title": make_title("Trefwoorden in {}".format(display_group(group))), "keywords": keywords, "total": len(keywords) } return render_template("group_keywords.html", **template_data)
def home(): sync_time = redis.get("sync_time") if sync_time: sync_time = sync_time.decode("utf-8") max_amount = request.args.get("k", 10, type=int) period, start, end, cache_time = get_period(request, "day") params = { "start": start.strftime(time_format), "end": end.strftime(time_format), "group": "bloemen" } bloemen = cache(process_top, "bloemen", max_amount, params, cache_time=cache_time, path=get_req_path(request)) params["group"] = "groente_en_fruit" groente_en_fruit = cache(process_top, "groente_en_fruit", max_amount, params, cache_time=cache_time, path=get_req_path(request)) if isinstance(bloemen, Response): return bloemen if isinstance(groente_en_fruit, Response): return groente_en_fruit template_data = { "bloemen": bloemen, "groente_en_fruit": groente_en_fruit, "sync_time": sync_time, "start": display_datetime(start), "end": display_datetime(end), "period": period } return render_template("home.html", title=make_title("BigTU research project"), **template_data)
def get_update_list(day_count=1): """ Accept numbers (int|str). Returns <OrderedDict> with parsed updates data. """ day_count = int(day_count) if day_count > GET_UPDATE_LIST_DAYS_LIMIT: day_count = GET_UPDATE_LIST_DAYS_LIMIT result = OrderedDict() page_html = HTML.ElementFromString( get_request('/', cache=MAIN_PAGE_CACHE_TIME).content) day_blocks = page_html.findall('.' + cnst.XP_MainP.DAY) for d_block in day_blocks[:1 + day_count]: season_blocks = d_block.findall('.' + cnst.XP.LINK) for s_block in season_blocks: season_id = unicode(s_block.get('data-id')) title_element = s_block.find('.' + cnst.XP_MainP.SEASON_TITLE) update_message = s_block.find( '.' + cnst.XP_MainP.SEASON_UPDATE_MSG).text_content().strip() number_text = title_element.tail.strip() name = title_element.text_content().strip() season_number = 0 if number_text == '' else Re.SEASON_TITLE_NUMBER.search( number_text).group(1) if season_id not in cnst.BLACKLISTED_SEASONS: if season_id not in result: result[season_id] = { 'season_id': season_id, 'name': name, 'update_messages': [update_message], 'title': make_title(season_number, name), # NB: otherwise need to request every season page and parse it 'thumb': cnst.URL_SV_CDN.MAIN + cnst.URL_SV_CDN.THUMB + '/%s.jpg' % season_id } else: result[season_id]['update_messages'].append(update_message) return result
def edit_group(group): roles = get_roles(current_user) if ("g:" + group) not in roles and "admin" not in roles: flash( "U heeft geen rechten om de groep \"{}\" aan te passen.".format( display_group(group)), "error") return redirect(url_for("horti.home")) if request.method == "GET": keywords = cache(tweety.get_group, group, cache_time=60 * 60, path=get_req_path(request)) if isinstance(keywords, Response): return keywords if keywords: keywords.sort(key=lambda x: x["lemma"]) for k in keywords: k["pos"] = display_pos(k["pos"]) template_data = { "keywords": keywords, "group": group, "disp_group": display_group(group), "title": make_title("{} aanpassen".format(group)) } return render_template("edit_group.html", **template_data) elif request.method == "POST": data = json.loads(request.data) keywords = data["keywords"] if data["action"] == "delete": for k in keywords: k["pos"] = parse_pos(k["pos"]) keywords = [(k["lemma"], k["pos"]) for k in keywords] current_keywords = json.loads(tweety.get_group(group)) current_keywords = [(k["lemma"], k["pos"]) for k in current_keywords] new_keywords = set(current_keywords) - set(keywords) new_keywords = [{"lemma": k[0], "pos": k[1]} for k in new_keywords] tweety.put_group(group, data=json.dumps(new_keywords)) cache(tweety.get_group, group, cache_time=60 * 60, force_refresh=True) return jsonify({"status": "ok"}) elif data["action"] == "add": keywords = [(k["lemma"], k["pos"]) for k in keywords] user_lemmas = [k[0] for k in keywords] key = "".join([choice(ascii_letters) for _ in range(11)]) lemmatize.apply_async((key, user_lemmas), queue="workers") current_keywords = json.loads(tweety.get_group(group)) current_keywords = [(k["lemma"], k["pos"]) for k in current_keywords] processed_lemmas = None while processed_lemmas is None: processed_lemmas = redis.get(key) sleep(0.1) processed_lemmas = json.loads(processed_lemmas) diff = {} for (p, u) in zip(processed_lemmas, user_lemmas): if u != p: diff[u] = p if diff: return jsonify({"status": "diff", "diff": diff}) new_keywords = set(current_keywords) | set(keywords) new_keywords = [{"lemma": k[0], "pos": k[1]} for k in new_keywords] tweety.put_group(group, data=json.dumps(new_keywords)) cache(tweety.get_group, group, cache_time=60 * 60, force_refresh=True) return jsonify({"status": "ok"})
def admin(): role_form = RoleForm() users = User.query.all() usernames = [u.username for u in users] role_form.username.validators.append( AnyOf(usernames, message="Username not found.")) if role_form.validate_on_submit(): form = role_form user = User.query.filter(User.username == form.username.data).one() try: role = Role.query.filter(Role.name == form.role.data).one() except NoResultFound: role = Role(name=form.role.data) db.session.add(role) if form.action.data == "add": if role not in user.roles: user.roles.append(role) db.session.add(user) elif form.action.data == "remove": if role in user.roles: user.roles.remove(role) db.session.add(user) db.session.commit() return redirect(url_for("horti.admin")) group_form = GroupForm() if group_form.validate_on_submit(): form = group_form name = form.name.data if form.action.data == "add": tweety.post_groups(name=name) elif form.action.data == "remove": tweety.delete_group(name) groups = cache(tweety.get_groups, force_refresh=True) return redirect(url_for("horti.admin")) # display groups have_groups = False while not have_groups: groups = cache(tweety.get_groups) if not isinstance(groups, Response): have_groups = True groups.sort() sleep(0.2) # display roles roles = {} for user in users: roles[user.username] = ", ".join(sorted([r.name for r in user.roles])) template_data = { "role_form": role_form, "users": users, "roles": roles, "groups": groups, "group_form": group_form } return render_template("admin.html", title=make_title("Admin"), **template_data)
def make_pull_plot_21(misIDRatios, catRatios, name = "gen", mydir = "pull_plots_21", y_range = None, excluded = []): pull_plots = [] sum_plots = [] sum_plots_2 = [] chi2s = {} sum_plot = TH1D("sum_plot", "", 21, 0, 21 ) gen_plot = TH1D("gen_plot", "", 21, 0, 21 ) c = TCanvas("Plot", "Plot", 1920,1080) ROOT.gStyle.SetOptStat(0) test1 = TH1D("test1", "test1", 1, 0, 1 ) test2 = TH1D("test2", "test2", 1, 0, 1 ) for b in range(1, len(bin_names_composite_nice)+1): pull_plots.append(TH1D("cats%d"%b, "", 21, 0, 21 )) sum_plots.append(TH1D("sums%d"%b, "sums%d"%b, 21, 0, 21 )) sum_plots_2.append(TH1D("sums2_%d"%b, "sums2_%d"%b, 21, 0, 21 )) gen_plot.GetXaxis().SetBinLabel(b,bin_names_composite_nice[b-1]) sum_plot.GetXaxis().SetBinLabel(b,bin_names_composite_nice[b-1]) for b in range(1, len(bin_names_composite_nice)+1): for i in range(1, len(bin_names_composite_nice)+1): pull_plots[b-1].GetXaxis().SetBinLabel(i,bin_names_composite_nice[i-1]) sum_plots[b-1].GetXaxis().SetBinLabel(i,bin_names_composite_nice[i-1]) sum_plots_2[b-1].GetXaxis().SetBinLabel(i,bin_names_composite_nice[i-1]) if bin_names_composite_nice[b-1] in excluded: continue (cat1, cat2) = get_component_cats(bin_names_composite_nice[b-1]) (value_gen, err, err_plus) = catRatios[bin_names_composite_nice[b-1]] pull_plots[b-1].SetBinContent(b, value_gen) pull_plots[b-1].SetBinError(b, err) (value, err, err_plus) = misIDRatios[cat1] sum_plots[b-1].SetBinContent(b, value) sum_plots[b-1].SetBinError(b, err) (value, err, err_plus) = misIDRatios[cat2] sum_plots_2[b-1].SetBinContent(b, value) sum_plots_2[b-1].SetBinError(b, err) sum_plots[b-1].Add(sum_plots_2[b-1]) test1.SetBinContent(1, pull_plots[b-1].GetBinContent(b)) test1.SetBinError(1, pull_plots[b-1].GetBinError(b)) test2.SetBinContent(1, sum_plots[b-1].GetBinContent(b)) test2.SetBinError(1, sum_plots[b-1].GetBinError(b)) #chi2s.append(test1.Chi2Test(test2, "WW")) #Chi2 method from histogram doesn't give expected results, will calculate manually chi2s[bin_names_composite_nice[b-1]] = abs(test1.GetBinContent(1) - test2.GetBinContent(1)) / (test1.GetBinError(1) + test2.GetBinError(1)) #print b, test1.GetBinContent(1), test2.GetBinContent(1) gen_plot.Add(pull_plots[b-1]) sum_plot.Add(sum_plots[b-1]) if y_range: gen_plot.SetAxisRange(y_range[0], y_range[1],"Y") gen_plot.SetLineColor(ROOT.kRed) gen_plot.SetLineWidth(3) sum_plot.SetLineWidth(2) title = make_title(name, excluded) gen_plot.SetNameTitle(title, title) gen_plot.Draw("e1") sum_plot.Draw("e1 same") leg = ROOT.TLegend(0.5,0.75,0.9,0.85) leg.SetBorderSize(0) leg.SetLineStyle(0) leg.SetTextSize(0.04) leg.SetFillColor(0) leg.AddEntry(sum_plot,"Sum of component categories","l") leg.AddEntry(gen_plot,"Category for 2 electrons","l") leg.Draw() mkdir_p(mydir) c.SaveAs("%s/pulls_%s.pdf" % (mydir, name)) c.SaveAs("%s/pulls_%s.png" % (mydir, name)) return chi2s
def api(): return render_template("api.html", title=make_title("API documentatie"), docs=docs)
def view_stories(group): period, start, end, cache_time = get_period(request, "week") params = { "start": start.strftime(time_format), "end": end.strftime(time_format) } story_data = cache(process_stories, group, params, cache_time=cache_time, path=get_req_path(request)) if isinstance(story_data, Response): return story_data active_stories, closed_stories = story_data storify_data = [] timeline_data = [] timeline_start = timegm(start.timetuple()) * 1000 timeline_end = timegm(end.timetuple()) * 1000 display_tweets = 11 display_active_stories = 10 display_closed_stories = 5 for story in active_stories: if not (len(storify_data) < display_active_stories): break story = filter_story(story, display_tweets) timeline_info = { "label": len(storify_data), "times": story["cluster_details"] } del story["cluster_details"] storify_data.append(story) timeline_data.append(timeline_info) for story in closed_stories: if not (len(storify_data) < display_active_stories + display_closed_stories): break story = filter_story(story, display_tweets) timeline_info = { "label": len(storify_data), "times": story["cluster_details"] } del story["cluster_details"] storify_data.append(story) timeline_data.append(timeline_info) template_data = { "group": display_group(group), "storify_data": json.dumps(storify_data), "timeline_data": json.dumps(timeline_data), "timeline_start_ts": timeline_start, "timeline_end_ts": timeline_end, "display_tweets": display_tweets, "num_stories": min(display_active_stories + display_closed_stories, len(storify_data)), "start": display_datetime(start), "end": display_datetime(end), "period": period } return render_template("storify.html", title=make_title(group), **template_data)
def view_keyword(keyword): deluxe = is_deluxe( current_user ) # users in the "deluxe" group can specify their own time period period, start, end, cache_time = get_period(request, "week") if period == "custom": if not deluxe: flash( "Deze functionaliteit is alleen beschikbaar voor goedgekeurde gebruikers.", "error") return redirect(url_for("horti.home")) if (end - start).days > 31: flash("Periode langer dan een maand is niet toegestaan", "error") return redirect(url_for("horti.home")) if start > end: flash("De einddatum moet na de begindatum zijn.", "error") return redirect(url_for("horti.home")) params = { "start": start.strftime(time_format), "end": end.strftime(time_format) } keyword_data = cache(process_details, keyword, params, cache_time=cache_time, path=get_req_path(request)) if isinstance(keyword_data, Response): return keyword_data urls = keyword_data["URLs"][:16] for url in urls: url["display_url"] = shorten(url["link"], 80) del keyword_data["URLs"] keyword_data["tagCloud"] = keyword_data["tagCloud"][:200] photos = enumerate(keyword_data["photos"] ) # number of photo's is limited in processing.py del keyword_data["photos"] display_tweets = 11 max_tweets = 200 keyword_data["tweets"] = keyword_data["tweets"][:max_tweets] keyword_data["retweets"] = keyword_data["retweets"][:display_tweets] keyword_data["interaction_tweets"] = keyword_data[ "interaction_tweets"][:max_tweets] num_tweets = keyword_data["num_tweets"] del keyword_data["num_tweets"] graph = keyword_data["graph"] del keyword_data["graph"] polarity = keyword_data["polarity"] del keyword_data["polarity"] polarity_face = display_polarity(polarity) gtrends_period = { "day": "now 1-d", "week": "now 7-d", "month": "today 1-m" }.get(period, "now 1-d") period_name = { "day": "dag", "week": "week", "month": "maand" }.get(period, "dag") news = [] for item in keyword_data["news"]: item["pubdate"] = display_datetime(item["pubdate"]) del item["nid"] news.append(item) del keyword_data["news"] template_data = { "keyword": keyword, "keyword_data": json.dumps(keyword_data), "deluxe": deluxe, "num_tweets": display_number(num_tweets), "urls": urls, "graph": json.dumps(graph), "photos": photos, "display_tweets": display_tweets, "start": display_datetime(start), "end": display_datetime(end), "period": period, "period_name": period_name, "polarity": polarity, "polarity_face": polarity_face, "gtrends_period": gtrends_period, "news": news } return render_template("keyword.html", title=make_title(keyword), **template_data)