Example #1
0
def get_random_streams(fr=0, sz=30):
    body = {
        "from": fr,
        "size": sz,
        "query": {
            "function_score": {
                "query": {
                    "bool": {
                        "filter": [
                            {
                                "match_phrase": {
                                    "status": "live"
                                }
                            },
                        ],
                    }
                },
                "random_score": {
                    "seed": str(int(time.mktime(datetime.now().timetuple()))),
                    "field": "_seq_no"
                },
                "boost": "5",
                "boost_mode": "replace"
            }
        }
    }

    return es_search(body=body)
Example #2
0
def get_top_viewers():
    body = {
        "size":
        48,
        "query": {
            "bool": {
                "filter": [
                    {
                        "match_phrase": {
                            "status": "live"
                        }
                    },
                ],
            }
        },
        "sort": [
            {
                "popular_rate": {
                    "order": "desc"
                }
            },
            {
                "published": {
                    "order": "desc"
                }
            },
        ]
    }
    response = es_search(body=body)
    if not response:
        abort(400)
    return jsonify(response)
Example #3
0
def get_channel_data(channel):
    body = {
        "size":
        1,
        "query": {
            "bool": {
                "must": [
                    {
                        "match_phrase": {
                            "channel": channel
                        }
                    },
                    {
                        "match_phrase": {
                            "status": "live"
                        }
                    },
                ],
            }
        },
        "sort": [
            {
                "timestamp": {
                    "order": "desc"
                }
            },
            {
                "published": {
                    "order": "desc"
                }
            },
        ]
    }
    res = es_search(body)
    return res
Example #4
0
def get_platform_data(platform, fr=0, sz=30, language="", exclude_language=[]):
    query = {
        "bool": {
            "must": [
                {
                    "match_phrase": {
                        "status": "live"
                    }
                },
            ],
        }
    }
    if not platform == "all":
        query["bool"]["filter"] = [{"match_phrase": {"platform": platform}}]
    if language:
        query["bool"]["filter"].append(
            {"match_phrase": {
                "language": language
            }})
    if exclude_language:
        if not "must_not" in query["bool"] or not query["bool"]["must_not"]:
            query["bool"]["must_not"] = []
        elif not type(query["bool"]["must_not"]) == list:
            query["bool"]["must_not"] = [query["bool"]["must_not"]]
        for l in exclude_language:
            query["bool"]["must_not"].append({"match_phrase": {"language": l}})

    # body = {
    #     "from": fr,
    #     "size": sz,
    #     "query": query,
    #     "sort": [
    #         {"viewers": {"order": "desc"}},
    #         {"timestamp": {"order": "desc"}},
    #         {"published": {"order": "desc"}},
    #     ]
    # }
    body = {
        "from": fr,
        "size": sz,
        "query": {
            "function_score": {
                "query": query,
                "random_score": {
                    "seed": str(int(time.mktime(datetime.now().timetuple()))),
                    "field": "_seq_no"
                },
                "boost": "5",
                "boost_mode": "replace"
            }
        },
    }
    res = es_search(body)
    return res
Example #5
0
def update_videos_click_through():
    qs = get_parameters_from_url(request)
    try:
        video_url = qs["videourl"][0]
    except KeyError:
        abort(400)
    res = es_search(
        body={
            "query": {
                "bool": {
                    "must": [
                        {
                            "match_phrase": {
                                "videourl": video_url
                            }
                        },
                    ],
                    "filter": [
                        {
                            "match_phrase": {
                                "status": "live"
                            }
                        },
                    ]
                }
            },
            "_source": "_id",
        })
    if not res:
        return False

    if len(res['hits']['hits']) == 0:
        return "Can't find corresponding data"
    else:
        res = es_update(
            _id=res["hits"]["hits"][0]["_id"],
            body={
                "script": {
                    "source":
                    "if(ctx._source.containsKey(\"click_through\")){ctx._source.s+=params.count} else{ctx._source.click_through=1}",
                    "lang": "painless",
                    "params": {
                        "count": 1
                    }
                }
            })
        if not res:
            return False
    return 'ok'
Example #6
0
    def run(self):
        print(self.name, " starts!!")
        try:
            while True:
                body = {
                    "size": 1000,
                    "query": {
                        "bool": {
                            "must": [{
                                "range": {
                                    "timestamp": {
                                        "lt":
                                        datetime.datetime.now() -
                                        TOLERANT__TIMESTAMP_TIMEDELTA
                                    }
                                }
                            }],
                            "must_not": [
                                {
                                    "match_phrase": {
                                        "status": "invalid"
                                    }
                                },
                            ]
                        }
                    },
                }
                results = es_search(body=body)
                if not results:
                    continue
                for hit in results['hits']['hits']:
                    es_update(
                        hit['_id'],
                        {"script": {
                            "source": "ctx._source.status='invalid'"
                        }})
                logfunc(
                    self.name, "Mark {} data as invalid".format(
                        len(results['hits']['hits'])))
                time.sleep(5)

        except KeyboardInterrupt:
            print("Forced Stop.")

        except Exception as e:
            logfunc(e)
Example #7
0
def home_page():
    results = get_random_streams()
    response = {
        "subscriptions": results["hits"]["hits"][0:4],  # Subscriptions
        "upcoming": results["hits"]["hits"][4:5],  # Upcoming Stream
        "recommended": results["hits"]["hits"][5:9],  # Recommended
        "today": results["hits"]["hits"][9:13],
        "within_72_hours": results["hits"]["hits"][13:17]
    }
    # Most Viewed
    body = {
        "size":
        4,
        "query": {
            "bool": {
                "filter": [
                    {
                        "match_phrase": {
                            "status": "live"
                        }
                    },
                ],
            }
        },
        "sort": [
            {
                "viewers": {
                    "order": "desc"
                }
            },
            {
                "published": {
                    "order": "desc"
                }
            },
        ]
    }
    results = es_search(body=body)
    response["most_viewed"] = results["hits"]["hits"][0:4]
    # Hot
    body = {
        "size":
        4,
        "query": {
            "bool": {
                "filter": [
                    {
                        "match_phrase": {
                            "status": "live"
                        }
                    },
                ],
            }
        },
        "sort": [
            {
                "popular_rate": {
                    "order": "desc"
                }
            },
            {
                "published": {
                    "order": "desc"
                }
            },
        ]
    }
    results = es_search(body=body)
    response["hot"] = results["hits"]["hits"][0:4]

    return jsonify(response)
Example #8
0
def query_elastic(q, fr=0, sz=50):
    body = {
        "size": sz,
        "from": fr,
        "query": {
            "bool": {
                "must_not": [
                    {
                        "match_phrase": {
                            "status": "invalid"
                        }
                    },
                ],
                "should": [{
                    "match": {
                        "title": {
                            "query": q,
                            "boost": 3,
                            "minimum_should_match": "90%",
                        }
                    }
                }, {
                    "match": {
                        "description": {
                            "query": q,
                            "minimum_should_match": "70%",
                        }
                    }
                }, {
                    "match": {
                        "tags": {
                            "query": q,
                            "boost": 4,
                            "minimum_should_match": "80%",
                        }
                    }
                }, {
                    "match": {
                        "host": {
                            "query": q,
                            "boost": 2,
                            "minimum_should_match": "80%",
                        }
                    }
                }, {
                    "match": {
                        "platform": {
                            "query": q,
                            "boost": 1,
                            "minimum_should_match": "80%",
                        }
                    }
                }],
                "minimum_should_match":
                1,
            }
        }
    }
    # 針對非英文再加上match_phrase
    if not is_ascii(q):
        body["query"]["bool"]["should"].extend([
            {
                "match_phrase": {
                    "title": {
                        "query": q,
                        "boost": 4,
                        "slop": int(len(q) * 0.4) + 1
                    }
                }
            },
            {
                "match_phrase": {
                    "description": {
                        "query": q,
                        "slop": int(len(q) * 0.6) + 1
                    }
                }
            },
            {
                "match_phrase": {
                    "tags": {
                        "query": q,
                        "boost": 3,
                        "slop": int(len(q) * 0.2) + 1
                    }
                }
            },
            {
                "match_phrase": {
                    "host": {
                        "query": q,
                        "boost": 2,
                        "slop": int(len(q) * 0.2) + 1
                    }
                }
            },
            {
                "match_phrase": {
                    "platform": {
                        "query": q,
                        "boost": 1,
                        "slop": int(len(q) * 0.4) + 1
                    }
                }
            },
        ])

    body["sort"] = [
        "_score",
        {
            "published": {
                "order": "desc"
            }
        },
        {
            "timestamp": {
                "order": "desc"
            }
        },
    ]

    res = es_search(body)
    if not res:
        return False

    # if no results
    if len(res['hits']['hits']) == 0:
        body = {
            "size":
            sz,
            "from":
            fr,
            "query": {
                "bool": {
                    "must_not": [
                        {
                            "match_phrase": {
                                "status": "invalid"
                            }
                        },
                    ],
                    "should": [{
                        "match_phrase": {
                            "platform": "twitch"
                        }
                    }, {
                        "match_phrase": {
                            "platform": "youtube"
                        }
                    }]
                }
            },
            "sort": [
                {
                    "viewers": {
                        "order": "desc"
                    }
                },
                {
                    "timestamp": {
                        "order": "desc"
                    }
                },
            ]
        }
        res = es_search(body)
        if not res:
            return False
        res["found"] = False
    else:
        res["found"] = True
    return res
Example #9
0
def create_or_update_doc():
    if request.is_json:
        form = request.get_json()
    else:
        form = request.form.copy()
    form = trans_to_smallcase_key(form)

    try:
        if not form["host"] or not form["platform"] or not form[
                "title"] or not form["published"]:
            return abort(400)
    except KeyError:
        return abort(400)

    # 以videourl當作unique ID
    res = es_search(
        body={
            "query": {
                "bool": {
                    "must": {
                        "match_phrase": {
                            "videourl": form["videourl"]
                        }
                    },
                    "filter": [{
                        "match_phrase": {
                            "host": form["host"]
                        }
                    }, {
                        "match_phrase": {
                            "platform": form["platform"]
                        }
                    }],
                    "must_not": [
                        {
                            "match_phrase": {
                                "status": "invalid"
                            }
                        },
                    ]
                },
            },
            "_source": "_id",
            "sort": {
                "timestamp": {
                    "order": "desc"
                }
            },
        })
    if not res:
        return False

    form["timestamp"] = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")
    form["click_through"] = 0
    if not "status" in form or not form["status"]:
        form["status"] = "live"
    try:
        published_time = time.strptime(form["published"], "%Y-%m-%dT%H:%M:%SZ")
    except ValueError:
        published_time = time.strptime(form["published"],
                                       "%Y-%m-%dT%H:%M:%S+0000")
    viewers = 0 if not form.get("viewers", None) else int(form["viewers"])
    form["popular_rate"] = int(viewers * 10000000000 /
                               (time.mktime(datetime.now().timetuple()) -
                                time.mktime(published_time)))

    try:
        if len(res['hits']['hits']) == 0:
            # Classify video's language
            if not "language" in form or not form["language"]:
                test_string = form["title"] + " " + form[
                    "description"] + " " + form["host"]
                form["language"] = detect_language(test_string)
            # Create data
            es.index(index="livestreams", body=form)

        else:
            res = es_update(_id=res["hits"]["hits"][0]["_id"],
                            body={
                                "doc": {
                                    "timestamp": form["timestamp"],
                                    "description": form["description"],
                                    "status": form["status"],
                                    "popular_rate": form["popular_rate"]
                                }
                            })
            if not res:
                return abort(500)

    except Exception as e:
        logfunc("'Wrong Request'")
        print(form)
        print(e)
        abort(400)
    return 'ok'
Example #10
0
    def run(self):
        print(self.name, " starts!!")
        try:
            while True:
                # body = {
                #     "size": 3000,
                #     "query": {
                #         "bool": {
                #             "filter": [
                #                 {"match_phrase": {"status": "live"}},
                #                 {"match_phrase": {"platform": self.platform}},
                #                # {"range": {"timestamp": {"gt": datetime.datetime.now() - datetime.timedelta(minutes=10)}}}
                #             ]
                #         }
                #     },
                #     "sort": [
                #         {"viewers": {"order": "desc"}},
                #         {"timestamp": {"order": "desc"}},
                #         {"published": {"order": "desc"}},
                #     ],
                #     "_source": ["_id", "thumbnails", "host"],
                # }
                body = {
                    "size": 100,
                    "query": {
                        "function_score": {
                            "query": {
                                "bool": {
                                    "filter": [
                                        {
                                            "match_phrase": {
                                                "status": "live"
                                            }
                                        },
                                        {
                                            "match_phrase": {
                                                "platform": self.platform
                                            }
                                        },
                                    ]
                                }
                            },
                            "random_score": {
                                "seed":
                                str(
                                    int(
                                        time.mktime(datetime.datetime.now().
                                                    timetuple())) +
                                    int(random.random() * 100000000) *
                                    self.name_no),
                                "field":
                                "_seq_no"
                            },
                            "boost": "5",
                            "boost_mode": "replace"
                        }
                    },
                    "_source": ["_id", "thumbnails", "host"],
                }

                results = es_search(body)
                if not results:
                    time.sleep(10)
                    continue

                for hit in results["hits"]["hits"]:
                    try:
                        if not hit['_source']["thumbnails"]:
                            continue
                        self.process(hit)
                    except KeyError:
                        logfunc(self.name, "No Key 'thumbnails'")
                    except RequestsHTTPError as e:
                        logfunc(self.name, e)
                    except Exception as e:
                        logfunc(self.name, e)
                time.sleep(1)

        except KeyBoardInterrupt:
            if os.path.isfile(self.compared_img_name):
                os.remove(self.compared_img_name)