def get_random_streams(fr=0, sz=30): body = { "from": fr, "size": sz, "query": { "function_score": { "query": { "bool": { "filter": [ { "match_phrase": { "status": "live" } }, ], } }, "random_score": { "seed": str(int(time.mktime(datetime.now().timetuple()))), "field": "_seq_no" }, "boost": "5", "boost_mode": "replace" } } } return es_search(body=body)
def get_top_viewers(): body = { "size": 48, "query": { "bool": { "filter": [ { "match_phrase": { "status": "live" } }, ], } }, "sort": [ { "popular_rate": { "order": "desc" } }, { "published": { "order": "desc" } }, ] } response = es_search(body=body) if not response: abort(400) return jsonify(response)
def get_channel_data(channel): body = { "size": 1, "query": { "bool": { "must": [ { "match_phrase": { "channel": channel } }, { "match_phrase": { "status": "live" } }, ], } }, "sort": [ { "timestamp": { "order": "desc" } }, { "published": { "order": "desc" } }, ] } res = es_search(body) return res
def get_platform_data(platform, fr=0, sz=30, language="", exclude_language=[]): query = { "bool": { "must": [ { "match_phrase": { "status": "live" } }, ], } } if not platform == "all": query["bool"]["filter"] = [{"match_phrase": {"platform": platform}}] if language: query["bool"]["filter"].append( {"match_phrase": { "language": language }}) if exclude_language: if not "must_not" in query["bool"] or not query["bool"]["must_not"]: query["bool"]["must_not"] = [] elif not type(query["bool"]["must_not"]) == list: query["bool"]["must_not"] = [query["bool"]["must_not"]] for l in exclude_language: query["bool"]["must_not"].append({"match_phrase": {"language": l}}) # body = { # "from": fr, # "size": sz, # "query": query, # "sort": [ # {"viewers": {"order": "desc"}}, # {"timestamp": {"order": "desc"}}, # {"published": {"order": "desc"}}, # ] # } body = { "from": fr, "size": sz, "query": { "function_score": { "query": query, "random_score": { "seed": str(int(time.mktime(datetime.now().timetuple()))), "field": "_seq_no" }, "boost": "5", "boost_mode": "replace" } }, } res = es_search(body) return res
def update_videos_click_through(): qs = get_parameters_from_url(request) try: video_url = qs["videourl"][0] except KeyError: abort(400) res = es_search( body={ "query": { "bool": { "must": [ { "match_phrase": { "videourl": video_url } }, ], "filter": [ { "match_phrase": { "status": "live" } }, ] } }, "_source": "_id", }) if not res: return False if len(res['hits']['hits']) == 0: return "Can't find corresponding data" else: res = es_update( _id=res["hits"]["hits"][0]["_id"], body={ "script": { "source": "if(ctx._source.containsKey(\"click_through\")){ctx._source.s+=params.count} else{ctx._source.click_through=1}", "lang": "painless", "params": { "count": 1 } } }) if not res: return False return 'ok'
def run(self): print(self.name, " starts!!") try: while True: body = { "size": 1000, "query": { "bool": { "must": [{ "range": { "timestamp": { "lt": datetime.datetime.now() - TOLERANT__TIMESTAMP_TIMEDELTA } } }], "must_not": [ { "match_phrase": { "status": "invalid" } }, ] } }, } results = es_search(body=body) if not results: continue for hit in results['hits']['hits']: es_update( hit['_id'], {"script": { "source": "ctx._source.status='invalid'" }}) logfunc( self.name, "Mark {} data as invalid".format( len(results['hits']['hits']))) time.sleep(5) except KeyboardInterrupt: print("Forced Stop.") except Exception as e: logfunc(e)
def home_page(): results = get_random_streams() response = { "subscriptions": results["hits"]["hits"][0:4], # Subscriptions "upcoming": results["hits"]["hits"][4:5], # Upcoming Stream "recommended": results["hits"]["hits"][5:9], # Recommended "today": results["hits"]["hits"][9:13], "within_72_hours": results["hits"]["hits"][13:17] } # Most Viewed body = { "size": 4, "query": { "bool": { "filter": [ { "match_phrase": { "status": "live" } }, ], } }, "sort": [ { "viewers": { "order": "desc" } }, { "published": { "order": "desc" } }, ] } results = es_search(body=body) response["most_viewed"] = results["hits"]["hits"][0:4] # Hot body = { "size": 4, "query": { "bool": { "filter": [ { "match_phrase": { "status": "live" } }, ], } }, "sort": [ { "popular_rate": { "order": "desc" } }, { "published": { "order": "desc" } }, ] } results = es_search(body=body) response["hot"] = results["hits"]["hits"][0:4] return jsonify(response)
def query_elastic(q, fr=0, sz=50): body = { "size": sz, "from": fr, "query": { "bool": { "must_not": [ { "match_phrase": { "status": "invalid" } }, ], "should": [{ "match": { "title": { "query": q, "boost": 3, "minimum_should_match": "90%", } } }, { "match": { "description": { "query": q, "minimum_should_match": "70%", } } }, { "match": { "tags": { "query": q, "boost": 4, "minimum_should_match": "80%", } } }, { "match": { "host": { "query": q, "boost": 2, "minimum_should_match": "80%", } } }, { "match": { "platform": { "query": q, "boost": 1, "minimum_should_match": "80%", } } }], "minimum_should_match": 1, } } } # 針對非英文再加上match_phrase if not is_ascii(q): body["query"]["bool"]["should"].extend([ { "match_phrase": { "title": { "query": q, "boost": 4, "slop": int(len(q) * 0.4) + 1 } } }, { "match_phrase": { "description": { "query": q, "slop": int(len(q) * 0.6) + 1 } } }, { "match_phrase": { "tags": { "query": q, "boost": 3, "slop": int(len(q) * 0.2) + 1 } } }, { "match_phrase": { "host": { "query": q, "boost": 2, "slop": int(len(q) * 0.2) + 1 } } }, { "match_phrase": { "platform": { "query": q, "boost": 1, "slop": int(len(q) * 0.4) + 1 } } }, ]) body["sort"] = [ "_score", { "published": { "order": "desc" } }, { "timestamp": { "order": "desc" } }, ] res = es_search(body) if not res: return False # if no results if len(res['hits']['hits']) == 0: body = { "size": sz, "from": fr, "query": { "bool": { "must_not": [ { "match_phrase": { "status": "invalid" } }, ], "should": [{ "match_phrase": { "platform": "twitch" } }, { "match_phrase": { "platform": "youtube" } }] } }, "sort": [ { "viewers": { "order": "desc" } }, { "timestamp": { "order": "desc" } }, ] } res = es_search(body) if not res: return False res["found"] = False else: res["found"] = True return res
def create_or_update_doc(): if request.is_json: form = request.get_json() else: form = request.form.copy() form = trans_to_smallcase_key(form) try: if not form["host"] or not form["platform"] or not form[ "title"] or not form["published"]: return abort(400) except KeyError: return abort(400) # 以videourl當作unique ID res = es_search( body={ "query": { "bool": { "must": { "match_phrase": { "videourl": form["videourl"] } }, "filter": [{ "match_phrase": { "host": form["host"] } }, { "match_phrase": { "platform": form["platform"] } }], "must_not": [ { "match_phrase": { "status": "invalid" } }, ] }, }, "_source": "_id", "sort": { "timestamp": { "order": "desc" } }, }) if not res: return False form["timestamp"] = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ") form["click_through"] = 0 if not "status" in form or not form["status"]: form["status"] = "live" try: published_time = time.strptime(form["published"], "%Y-%m-%dT%H:%M:%SZ") except ValueError: published_time = time.strptime(form["published"], "%Y-%m-%dT%H:%M:%S+0000") viewers = 0 if not form.get("viewers", None) else int(form["viewers"]) form["popular_rate"] = int(viewers * 10000000000 / (time.mktime(datetime.now().timetuple()) - time.mktime(published_time))) try: if len(res['hits']['hits']) == 0: # Classify video's language if not "language" in form or not form["language"]: test_string = form["title"] + " " + form[ "description"] + " " + form["host"] form["language"] = detect_language(test_string) # Create data es.index(index="livestreams", body=form) else: res = es_update(_id=res["hits"]["hits"][0]["_id"], body={ "doc": { "timestamp": form["timestamp"], "description": form["description"], "status": form["status"], "popular_rate": form["popular_rate"] } }) if not res: return abort(500) except Exception as e: logfunc("'Wrong Request'") print(form) print(e) abort(400) return 'ok'
def run(self): print(self.name, " starts!!") try: while True: # body = { # "size": 3000, # "query": { # "bool": { # "filter": [ # {"match_phrase": {"status": "live"}}, # {"match_phrase": {"platform": self.platform}}, # # {"range": {"timestamp": {"gt": datetime.datetime.now() - datetime.timedelta(minutes=10)}}} # ] # } # }, # "sort": [ # {"viewers": {"order": "desc"}}, # {"timestamp": {"order": "desc"}}, # {"published": {"order": "desc"}}, # ], # "_source": ["_id", "thumbnails", "host"], # } body = { "size": 100, "query": { "function_score": { "query": { "bool": { "filter": [ { "match_phrase": { "status": "live" } }, { "match_phrase": { "platform": self.platform } }, ] } }, "random_score": { "seed": str( int( time.mktime(datetime.datetime.now(). timetuple())) + int(random.random() * 100000000) * self.name_no), "field": "_seq_no" }, "boost": "5", "boost_mode": "replace" } }, "_source": ["_id", "thumbnails", "host"], } results = es_search(body) if not results: time.sleep(10) continue for hit in results["hits"]["hits"]: try: if not hit['_source']["thumbnails"]: continue self.process(hit) except KeyError: logfunc(self.name, "No Key 'thumbnails'") except RequestsHTTPError as e: logfunc(self.name, e) except Exception as e: logfunc(self.name, e) time.sleep(1) except KeyBoardInterrupt: if os.path.isfile(self.compared_img_name): os.remove(self.compared_img_name)