Exemple #1
0
 def __init__(self, videoid, msghandler, seektime=0):
     self.videoid = videoid
     self.msghandler = msghandler
     logging.info('videoid:' + videoid + ' seektime:' + str(seektime))
     self.livechat = LiveChatAsync(video_id=videoid,
                                   callback=self.chatProcessor,
                                   seektime=seektime)
Exemple #2
0
async def main():
    livechat = LiveChatAsync(settings.VIDEO_ID, callback=func)
    while livechat.is_alive():
        await asyncio.sleep(1)
        #other background operation.

    # If you want to check the reason for the termination,
    # you can use `raise_for_status()` function.
    try:
        livechat.raise_for_status()
    except pytchat.ChatDataFinished:
        print("Chat data finished.")
    except Exception as e:
        print(type(e), str(e))
Exemple #3
0
async def main_async():
    global pytchatObjAsync
    pytchatObjAsync = LiveChatAsync(video_id=broadcastId,
                                    callback=pytchat_check)
    while pytchatObjAsync.is_alive():
        #await asyncio.sleep(3)
        await websocket_setup_listen()

    # If you want to check the reason for the termination,
    # you can use `raise_for_status()` function.
    try:
        pytchatObjAsync.raise_for_status()
    except pytchat.ChatDataFinished:
        print("Chat data finished.")
    except Exception as e:
        print(type(e), str(e))
Exemple #4
0
class LiveChatProcessor:
    def __init__(self, videoid, msghandler, seektime=0):
        self.videoid = videoid
        self.msghandler = msghandler
        logging.info('videoid:' + videoid + ' seektime:' + str(seektime))
        self.livechat = LiveChatAsync(video_id=videoid,
                                      callback=self.chatProcessor,
                                      seektime=seektime)

    # callback function (automatically called)
    async def chatProcessor(self, chatdata):
        logging.debug(f'[{self.videoid}]chatProcessor')
        for c in chatdata.items:
            # logging.debug('chatdata.items.c\ndatetime:'+c.datetime)
            if c.author.isChatModerator or c.author.isChatOwner:
                logging.info('Chat:' + c.author.name + '\n' + c.message)
                self.sendChat(self.chatRendererToJson(c))
            await chatdata.tick_async()

    def chatRendererToJson(self, c):
        return {
            'type': c.type,
            'message': c.message,
            'timestamp': c.timestamp,
            'datetime': c.datetime,
            'author': {
                'name': c.author.name,
                'channelId': c.author.channelId,
                'imageUrl': c.author.imageUrl,
                'isChatOwner': c.author.isChatOwner,
                'isChatModerator': c.author.isChatModerator
            }
        }

    def sendChat(self, msg):
        self.msghandler.send_message(msg, self.videoid)

    def terminate(self):
        if not self.livechat is None:
            self.livechat.terminate()
Exemple #5
0
async def workload(vid_id):
    data = []
    vid_path = ROOT.joinpath(vid_id + ".json")

    if vid_path.exists():
        logger.critical("File already exists for stream {}", vid_id)
        return

    logger.debug("task {} started", vid_id)

    async def callback(chat_data: Chatdata):

        logger.debug("Processing Data")

        async for chat in chat_data.async_items():
            chat: Chat
            json_data: dict = json.loads(chat.json())
            logger.debug(f"S:[{json_data['author']['name']}][{json_data['timestamp']}][{json_data['message']}]")
            data.append(json_data)

    try:
        # live_chat = LiveChatAsync(vid_id, callback=callback)
        live_chat = LiveChatAsync(vid_id, callback=callback, force_replay=True, direct_mode=True)
    except Exception:
        raise
    else:
        while live_chat.is_alive():
            await asyncio.sleep(5)

        try:
            live_chat.raise_for_status()
        except ChatDataFinished:
            logger.info("Chat data finished.")
        except Exception:
            raise

    new_dict = {idx: chat_json for idx, chat_json in enumerate(data)}
    vid_path.write_text(json.dumps(new_dict, indent=2), encoding="utf8")

    logger.info("Written {} chats for {}", len(data), vid_id)
 async def monitor(self):
     livechat = LiveChatAsync(self.video_id, callback=self.fetchcallback)
     while livechat.is_alive():
         await asyncio.sleep(3)
 async def main(self):
     if not self.loop:
         self.loop = asyncio.get_running_loop()
     await self.log_output(self.videoinfo,10)
     pgsql_config_file = open("postgres-config.json")
     pgsql_creds = json.load(pgsql_config_file)
     self.conn = await asyncpg.connect(user = pgsql_creds["username"], password = pgsql_creds["password"], host = pgsql_creds["host"], database = pgsql_creds["database"])
     old_meta_row = await self.conn.fetchrow('SELECT c.name, channel_id, title, caught_while, live, old_title, length, createdDateTime, publishDateTime, startedLogAt, endedLogAt, scheduledStartTime, actualStartTime, actualEndTime, retries_of_rerecording, retries_of_rerecording_had_scs FROM video INNER JOIN channel c on channel_id = c.id WHERE video_id = $1', self.videoid)
     old_meta = dict(old_meta_row) if old_meta_row else None
     if old_meta:
         old_time_meta = {"scheduledStartTime": old_meta["scheduledstarttime"].timestamp() if old_meta["scheduledstarttime"] else 0,
                          "actualStartTime": old_meta["actualstarttime"].timestamp() if old_meta["actualstarttime"] else 0,
                          "actualEndTime": old_meta["actualendtime"].timestamp() if old_meta["actualendtime"] else 0}
         if self.videoinfo:
             for time in old_time_meta.keys():
                 if "liveStreamingDetails" in self.videoinfo.keys():
                     if time in self.videoinfo["liveStreamingDetails"].keys():
                         if not old_time_meta[time] and self.videoinfo["liveStreamingDetails"][time]:
                             old_time_meta[time] = self.videoinfo["liveStreamingDetails"][time]
         time_meta_keys = list(old_time_meta.keys())
         for timekey in time_meta_keys:
             if not old_time_meta[timekey]:
                 old_time_meta.pop(timekey)
         old_meta["liveStreamingDetails"] = old_time_meta
         if not self.videoinfo:
             self.videoinfo = copy.deepcopy(self.skeleton_dict)
         await self.log_output(self.videoinfo,10)
         if self.videoinfo["title"] != old_meta["title"] and self.videoinfo["title"]:
             old_meta["old_title"] = old_meta["title"]
             old_meta["title"] = self.videoinfo["title"]
         old_meta_keys_l = [k.lower() for k in old_meta.keys()]
         old_meta_keys_n = [k for k in old_meta.keys()]
         old_meta_keys = dict(zip(old_meta_keys_l, old_meta_keys_n))
         #await self.log_output(old_meta_keys,10)
         for info in self.skeleton_dict.keys():
             if info.lower() in old_meta_keys_l:
                 if type(old_meta[old_meta_keys[info.lower()]]) is datetime:
                     self.videoinfo[info] = old_meta[old_meta_keys[info.lower()]].timestamp()
                 elif old_meta[old_meta_keys[info.lower()]]:
                     self.videoinfo[info] = old_meta[old_meta_keys[info.lower()]]
                 elif old_meta[old_meta_keys[info.lower()]] is None and "time" in info.lower():
                     if info in self.videoinfo.keys():
                         await self.log_output((info,"key found", self.videoinfo[info],self.videoinfo.keys()))
                         self.videoinfo[info] = self.videoinfo[info] if self.videoinfo[info] else 0
                     else:
                         await self.log_output((info,"key not found",self.videoinfo[info],self.videoinfo.keys()))
                         self.videoinfo[info] = 0
                 else:
                     await self.log_output("else case",10)
         self.channel_id = old_meta["channel_id"]
         self.videoinfo["channel"] = old_meta["name"]
         self.videoinfo["channelId"] = self.channel_id
         self.videoinfo["id"] = self.videoid
         self.videoPostedAt = self.videoinfo['publishDateTime']
         self.metadata_list.append(self.videoinfo)
         self.ended_at = old_meta["endedlogat"] if old_meta["endedlogat"] else None
         self.videoinfo["endedLogAt"] = self.ended_at.timestamp() if self.ended_at else None
         if self.metadata:
             self.videoinfo["live"] = self.metadata["live"]
     await self.log_output(self.videoinfo)
     if not self.videoinfo:
         await self.conn.close()
         return
     self.insert_channels = await self.conn.prepare("INSERT INTO channel(id, name, tracked) VALUES ($1,$2,$3) "
                                                    "ON CONFLICT DO NOTHING")
     self.channel_name_history = await self.conn.prepare("INSERT INTO chan_names(id, name, time_discovered, time_used) "
                                                         "VALUES ($1,$2,$3,$4) ON CONFLICT (id,name) DO UPDATE SET time_used = $4")
     self.insert_messages = await self.conn.prepare("INSERT INTO messages(video_id, chat_id, user_id, message_txt, "
                                                    "time_sent, currency, value, color) "
                                                    "VALUES ($1,$2,$3,$4,$5,$6,$7,$8) ON CONFLICT DO NOTHING")
     async with self.conn.transaction():
         if self.channel_id and self.videoinfo["channel"]:
             await self.conn.execute("INSERT INTO channel VALUES($1,$2,$3) ON CONFLICT (id) DO UPDATE SET tracked = $3",
                                    self.channel_id, self.videoinfo["channel"], True)
             await self.conn.execute("INSERT INTO chan_names VALUES($1,$2,$3) ON CONFLICT DO NOTHING",
                                     self.channel_id, self.videoinfo["channel"],
                                     datetime.now(tz=pytz.timezone('Europe/Berlin')))
     self.chat_err = True
     repeats = 0
     log_exist_test, filesize, db_retries_had_scs, repeats = await self.already_done(self.conn)
     self.videoinfo["retries_of_rerecording_had_scs"] = db_retries_had_scs
     self.videoinfo["retries_of_rerecording"] = repeats
     if log_exist_test:
         await self.log_output(self.videoinfo["channel"] + " - " + self.videoinfo[
                 "title"] + " already analyzed, skipping. Existing file size: " + str(
                 filesize) + " bytes")
         return
     had_scs = db_retries_had_scs if db_retries_had_scs else 0
     self.msg_counter = 0
     islive = True
     while (repeats < self.max_retry_attempts and had_scs < self.min_successful_attempts and not self.cancelled and islive):
         self.msg_counter = 0
         self.chat_err = True
         if self.metadata:
             islive = self.metadata["live"] in ["upcoming","live"]
         while self.chat_err and not self.cancelled:
             if "liveStreamingDetails" in self.videoinfo.keys() or self.videoinfo["live"] != "none" or repeats >= 1:
                 self.stats.clear()
                 self.chat_err = False
                 self.started_at = datetime.now(tz=pytz.timezone('Europe/Berlin'))
                 publishtime = datetime.fromtimestamp(self.videoPostedAt,timezone.utc)
                 async with self.conn.transaction():
                     await self.conn.execute(
                         "INSERT INTO video (video_id,channel_id,title,startedlogat,createddatetime) "
                         "VALUES($1,$2,$3,$4,$5) ON CONFLICT DO NOTHING",
                         self.videoid, self.videoinfo["channelId"], self.videoinfo["title"], self.started_at, publishtime)
                 await self.update_psql_metadata()
                 await self.log_output("Started Analysis #"+str(repeats+1)+" at: "+self.started_at.isoformat())
                 await self.log_output("of video " + publishtime.isoformat() + " " +self.videoinfo["channel"]+" - " + self.videoinfo["title"] + " ["+self.videoid+"]")
                 if repeats >= 1:
                     await self.log_output("Recording the YouTube-archived chat after livestream finished")
                 self.httpclient = httpx.AsyncClient(http2=True)
                 self.running_chat = LiveChatAsync(self.videoid, callback = self.display, processor = (SuperChatLogProcessor(), SuperchatCalculator()),logger=self.logger, client = self.httpclient, exception_handler = self.exception_handling)
                 while self.running_chat.is_alive() and not self.cancelled:
                     await asyncio.sleep(3)
                 if type(self.running_chat.exception) is exceptions.InvalidVideoIdException or type(self.running_chat.exception) is exceptions.ChatParseException:
                     #Video ID invalid: Private or Membership vid or deleted. Treat as cancelled
                     #ChatParseException: No chat found
                     self.cancelled = True
                 if repeats == 0 and not self.chat_err and not self.cancelled and islive:
                     self.ended_at = datetime.now(tz=pytz.timezone('Europe/Berlin'))
                     self.videoinfo["endedLogAt"] = self.ended_at.timestamp()
                 await self.httpclient.aclose()
                 newmetadata = await self.async_get_video_info(self.videoid) #when livestream chat parsing ends, get some more metadata
                 if newmetadata is not None:
                     if newmetadata["live"] in ["upcoming","live"]: #in case the livestream has not ended yet!
                         await self.log_output(("Error! Chat monitor ended prematurely!",self.running_chat.is_alive()))
                         self.chat_err = True
                 else:
                     islive = False
                 if self.videoinfo["caught_while"] in ["upcoming","live"]:
                     #use newer metadata while rescuing certain fields from the old metadata
                     createdDateTime = self.videoPostedAt
                     caught_while = self.videoinfo["caught_while"]
                     old_title = self.videoinfo["title"]
                     retries_w_scs = self.videoinfo["retries_of_rerecording_had_scs"]
                     retries_total = self.videoinfo["retries_of_rerecording"]
                     if newmetadata is not None:
                         self.videoinfo = newmetadata
                         self.videoinfo["endedLogAt"] = self.ended_at.timestamp() if self.ended_at else None
                         self.videoinfo["retries_of_rerecording_had_scs"] = retries_w_scs
                         self.videoinfo["retries_of_rerecording"] = retries_total
                         self.videoinfo["createdDateTime"] = createdDateTime
                         self.videoinfo["caught_while"] = caught_while
                         if self.videoinfo["title"] != old_title:
                             self.videoinfo["old_title"] = old_title
                     else:
                         await self.log_output(("couldn't retrieve new metadata for",self.videoid,old_title))
                 else:
                     islive = False
                 if self.msg_counter > 0 and not self.chat_err:
                     had_scs += 1
                     self.videoinfo["retries_of_rerecording_had_scs"] = had_scs
                     self.total_counted_msgs = 0
                     self.total_member_msgs = 0
                     self.total_new_members = 0
                 self.videoinfo["startedLogAt"] = self.started_at.timestamp()
                 self.videoinfo["retries_of_rerecording"] = repeats
                 await self.update_psql_metadata()
                 self.metadata_list.append(self.videoinfo)
             else:
                 await self.log_output(self.videoinfo["title"]+" is not a broadcast recording or premiere")
                 return
         repeats += 1
         await self.log_output((repeats,self.cancelled,had_scs,self.videoinfo["live"]))
         if repeats >= 1 and not self.cancelled and had_scs < 2 and islive:
             await self.log_output("Waiting "+str(self.minutes_wait)+" minutes before re-recording sc-logs")
             await asyncio.sleep(self.minutes_wait*60)
     self.running = False
     await self.log_output("writing to files")
     proper_sc_list = []
     unique_currency_donors={}
     count_scs = 0
     for msg in self.sc_msgs:
         msg_loaded = json.loads(msg)
         if msg_loaded["type"] not in ["newSponsor", "sponsorMessage"]:
             count_scs += 1
             donations = self.donors[msg_loaded["userid"]]["donations"].setdefault(msg_loaded["currency"],[0,0])
             self.donors[msg_loaded["userid"]]["donations"][msg_loaded["currency"]][0] = donations[0] + 1 #amount of donations
             self.donors[msg_loaded["userid"]]["donations"][msg_loaded["currency"]][1] = donations[1] + msg_loaded["value"] #total amount of money donated
             self.unique_donors.setdefault(msg_loaded["currency"], set())
             self.unique_donors[msg_loaded["currency"]].add(msg_loaded["userid"])
         proper_sc_list.append(msg_loaded)
     for currency in self.unique_donors.keys():
         unique_currency_donors[currency] = len(self.unique_donors[currency])
     f = open(self.sc_file, "w")
     f_stats = open(self.stats_file, "w")
     f.write(json.dumps(proper_sc_list))
     await self.log_output((len(proper_sc_list), "unique messages written",count_scs,"are superchats"))
     f.close()
     self.stats.append(await self.loop.run_in_executor(self.t_pool, recount_money, proper_sc_list))
     f_stats.write(json.dumps([self.metadata_list[-1], self.stats[-1], unique_currency_donors]))
     f_stats.close()
     f_donors = open(self.donor_file,"w")
     f_donors.write(json.dumps(self.donors))
     f_donors.close()
     await self.conn.close()
     if self.cancelled:
         os.rename(f.name, f.name+".cancelled")
         os.rename(f_stats.name, f_stats.name + ".cancelled")
         os.rename(f_donors.name, f_donors.name + ".cancelled")
     if not self.chat_err and self.gen_wc and len(self.sc_msgs) > 0 and repeats >= 1 and not self.cancelled:
         await self.loop.run_in_executor(self.t_pool, self.generate_wordcloud, proper_sc_list)
class SuperchatArchiver:
    def __init__(self,vid_id, api_key, gen_WC = False, loop = None, file_suffix = ".standalone.txt", minutes_wait = 30, retry_attempts = 72, min_successful_attempts = 2, logger = None):
        self.total_counted_msgs = 0
        self.total_new_members = 0
        self.max_retry_attempts = retry_attempts
        self.min_successful_attempts = min_successful_attempts
        self.file_suffix = file_suffix
        self.minutes_wait = minutes_wait
        self.started_at = None
        self.ended_at = None
        self.cancelled = False
        self.loop = loop
        self.t_pool = concurrent.futures.ThreadPoolExecutor(max_workers=100)
        self.api_points_used = 1.0
        self.api = YouTubeDataAPI(api_key) #uses 1p to check key
        self.videoid = vid_id
        self.channel_id = ""
        self.metadata = {}
        self.videoinfo = {}
        self.donors = {}
        self.stats = []
        self.sc_msgs = set()
        self.sc_logs_list = []
        self.metadata_list = []
        self.gen_wc = gen_WC
        self.unique_donors = {}
        self.clean_currency = {"¥": "JPY",
                          "NT$": "TWD",
                          "$": "USD",
                          "CA$": "CAD",
                          "MX$": "MXN",
                          "HK$": "HKD",
                          "A$": "AUD",
                          "£": "GBP",
                          "€": "EUR",
                          "R$": "BRL",
                          "₹": "INR",
                          "\u20b1": "PHP",
                          "\u20aa": "ILS"}

        self.metadata = self.get_video_info(self.videoid)
        self.api_points_used += 1.0
        self.total_member_msgs = 0
        self.running = True
        self.running_chat = None
        if self.metadata is not None:
            self.videoinfo = self.metadata
            self.videoinfo["retries_of_rerecording_had_scs"] = 0
            self.videoinfo["retries_of_rerecording"] = 0
            self.videoPostedAt = copy.deepcopy(self.videoinfo["publishDateTime"])
            self.channel_id = self.metadata["channelId"]
        else:
            self.videoPostedAt = 0
            self.channel_id = "privatted-deleted-memebershipped"
        self.skeleton_dict = {"channel": None,
                              "channelId": None,
                              "id": None,
                              "title": None,
                              "live": None,
                              "caught_while": None,
                              "publishDateTime": None,
                              "length": None,
                              "endedLogAt": None,
                              "retries_of_rerecording": None,
                              "retries_of_rerecording_had_scs": None,
                              "createdDateTime": None,
                              "liveStreamingDetails":{"scheduledStartTime": None,
                                                      "actualStartTime": None,
                                                      "actualEndTime": None}
                             }
        self.sc_file = self.channel_id + "/sc_logs/" + self.videoid + ".txt"+self.file_suffix
        self.donor_file = self.channel_id + "/vid_stats/donors/" + self.videoid + ".txt"+self.file_suffix
        self.stats_file = self.channel_id + "/vid_stats/" + self.videoid + "_stats.txt"+self.file_suffix
        #await self.log_output((self.metadata, self.channel_id, self.videoid, self.file_suffix))
        pathlib.Path('./' + self.channel_id + '/vid_stats/donors').mkdir(parents=True, exist_ok=True)
        pathlib.Path('./' + self.channel_id + '/sc_logs').mkdir(parents=True, exist_ok=True)
        self.placeholders = 0
        if logger:
            self.logger = logger
        else:
            self.logger = logging.getLogger(__name__)
            self.logger.setLevel(logging.DEBUG)
            fh = logging.FileHandler('./' + self.channel_id +"/"+args.yt_vid_id+'.applog')
            fh.setLevel(logging.DEBUG)
            ch = logging.StreamHandler()
            ch.setLevel(logging.INFO)
            dbg_formatter = config.mylogger.MyFormatter()
            formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
            fh.setFormatter(dbg_formatter)
            ch.setFormatter(formatter)
            self.logger.addHandler(fh)
            self.logger.addHandler(ch)

    def __str__(self):
        return "["+self.videoid+"] " + self.videoinfo["channel"] + " - " + self.videoinfo["title"] + " - Running: "+str(self.running) + " Live: " + self.videoinfo["live"]
    
    def __repr__(self):
        return "["+self.videoid+"] " + self.videoinfo["channel"] + " - " + self.videoinfo["title"] + " - Running: "+str(self.running) + " Live: " + self.videoinfo["live"]

    def get_video_info(self,video_ID:str):
        response = None
        try:
            response = self.api.get_video_metadata(video_id=video_ID, parser=None,
                                                   part=["liveStreamingDetails", "contentDetails", "snippet"])
            api_metadata = {"channel": response["snippet"]["channelTitle"],
                            "channelId": response["snippet"]["channelId"],
                            "id": video_ID,
                            "title": response["snippet"]["title"],
                            "live": response["snippet"]["liveBroadcastContent"],
                            "caught_while": response["snippet"]["liveBroadcastContent"],
                            "publishDateTime": datetime.strptime(response["snippet"]["publishedAt"] + " +0000",
                                                                 "%Y-%m-%dT%H:%M:%SZ %z").timestamp()}
            delta = isodate.parse_duration(response["contentDetails"]["duration"])
            api_metadata["length"] = delta.total_seconds()
            if 'liveStreamingDetails' in response.keys():
                api_metadata["liveStreamingDetails"] = {}
                for d in response["liveStreamingDetails"].keys():
                    if "Time" in d or "time" in d:
                        api_metadata["liveStreamingDetails"][d] = datetime.strptime(
                            response["liveStreamingDetails"][d] + " +0000", "%Y-%m-%dT%H:%M:%SZ %z").timestamp()
            return api_metadata

        except Exception as e:
            print(self.videoid)
            print(e)
            print(response)
            return None

    async def async_get_video_info(self,video_ID:str):
        self.api_points_used += 1.0
        api_metadata = await self.loop.run_in_executor(self.t_pool,self.get_video_info,video_ID)
        return api_metadata

    def cancel(self):
        self.cancelled = True
        if self.running_chat:
            self.running_chat.terminate()
        

    async def update_psql_metadata(self):
        async with self.conn.transaction():
            await self.conn.execute(
                "UPDATE video SET caught_while = $2, live = $3, title = $4,"
                "retries_of_rerecording = $5, retries_of_rerecording_had_scs = $6 WHERE video_id = $1",
                self.videoid, self.videoinfo["caught_while"], self.videoinfo["live"],
                self.videoinfo["title"], self.videoinfo["retries_of_rerecording"],
                self.videoinfo["retries_of_rerecording_had_scs"])
            if "scheduledStartTime" in self.videoinfo["liveStreamingDetails"].keys():
                await self.conn.execute("UPDATE video SET scheduledstarttime = $2 WHERE video_id = $1",
                                        self.videoid, datetime.fromtimestamp(
                        self.videoinfo["liveStreamingDetails"]["scheduledStartTime"], timezone.utc))
            if "actualStartTime" in self.videoinfo["liveStreamingDetails"].keys():
                await self.conn.execute("UPDATE video SET actualstarttime = $2 WHERE video_id = $1",
                                        self.videoid, datetime.fromtimestamp(
                        self.videoinfo["liveStreamingDetails"]["actualStartTime"], timezone.utc))
            if "actualEndTime" in self.videoinfo["liveStreamingDetails"].keys():
                await self.conn.execute("UPDATE video SET actualendtime = $2 WHERE video_id = $1",
                                        self.videoid, datetime.fromtimestamp(
                        self.videoinfo["liveStreamingDetails"]["actualEndTime"], timezone.utc))
            if "old_title" in self.videoinfo.keys():
                await self.conn.execute("UPDATE video SET old_title = $2 WHERE  video_id = $1", self.videoid,
                                        self.videoinfo["old_title"])
            if "length" in self.videoinfo.keys():
                await self.conn.execute("UPDATE video SET length = $2 WHERE  video_id = $1", self.videoid,
                                        self.videoinfo["length"])
            if "publishDateTime" in self.videoinfo.keys():
                await self.conn.execute("UPDATE video SET publishDateTime = $2 WHERE video_id = $1",
                                        self.videoid, datetime.fromtimestamp(self.videoinfo["publishDateTime"],
                                                                             timezone.utc))
            if "endedLogAt" in self.videoinfo.keys():
                await self.conn.execute("UPDATE video SET endedLogAt = $2 WHERE video_id = $1",
                                        self.videoid, self.ended_at)
                
    async def already_done(self,conn):
        row = await conn.fetchrow('SELECT retries_of_rerecording_had_scs, retries_of_rerecording FROM video WHERE video_id = $1', self.videoid)
        successful_sc_recordings = 0
        repeats = 0
        if row:
            successful_sc_recordings = row["retries_of_rerecording_had_scs"] if row["retries_of_rerecording_had_scs"] else 0
            repeats = row["retries_of_rerecording"] if row["retries_of_rerecording"] else 0
        test_file = pathlib.Path(self.sc_file)
        file_has_content = False
        if test_file.is_file():
            if test_file.stat().st_size > 2:
                file_has_content = True
        if successful_sc_recordings >= 2 and file_has_content:
            return True, test_file.stat().st_size, successful_sc_recordings, repeats
        else:
            return False, 0, successful_sc_recordings, repeats

    async def main(self):
        if not self.loop:
            self.loop = asyncio.get_running_loop()
        await self.log_output(self.videoinfo,10)
        pgsql_config_file = open("postgres-config.json")
        pgsql_creds = json.load(pgsql_config_file)
        self.conn = await asyncpg.connect(user = pgsql_creds["username"], password = pgsql_creds["password"], host = pgsql_creds["host"], database = pgsql_creds["database"])
        old_meta_row = await self.conn.fetchrow('SELECT c.name, channel_id, title, caught_while, live, old_title, length, createdDateTime, publishDateTime, startedLogAt, endedLogAt, scheduledStartTime, actualStartTime, actualEndTime, retries_of_rerecording, retries_of_rerecording_had_scs FROM video INNER JOIN channel c on channel_id = c.id WHERE video_id = $1', self.videoid)
        old_meta = dict(old_meta_row) if old_meta_row else None
        if old_meta:
            old_time_meta = {"scheduledStartTime": old_meta["scheduledstarttime"].timestamp() if old_meta["scheduledstarttime"] else 0,
                             "actualStartTime": old_meta["actualstarttime"].timestamp() if old_meta["actualstarttime"] else 0,
                             "actualEndTime": old_meta["actualendtime"].timestamp() if old_meta["actualendtime"] else 0}
            if self.videoinfo:
                for time in old_time_meta.keys():
                    if "liveStreamingDetails" in self.videoinfo.keys():
                        if time in self.videoinfo["liveStreamingDetails"].keys():
                            if not old_time_meta[time] and self.videoinfo["liveStreamingDetails"][time]:
                                old_time_meta[time] = self.videoinfo["liveStreamingDetails"][time]
            time_meta_keys = list(old_time_meta.keys())
            for timekey in time_meta_keys:
                if not old_time_meta[timekey]:
                    old_time_meta.pop(timekey)
            old_meta["liveStreamingDetails"] = old_time_meta
            if not self.videoinfo:
                self.videoinfo = copy.deepcopy(self.skeleton_dict)
            await self.log_output(self.videoinfo,10)
            if self.videoinfo["title"] != old_meta["title"] and self.videoinfo["title"]:
                old_meta["old_title"] = old_meta["title"]
                old_meta["title"] = self.videoinfo["title"]
            old_meta_keys_l = [k.lower() for k in old_meta.keys()]
            old_meta_keys_n = [k for k in old_meta.keys()]
            old_meta_keys = dict(zip(old_meta_keys_l, old_meta_keys_n))
            #await self.log_output(old_meta_keys,10)
            for info in self.skeleton_dict.keys():
                if info.lower() in old_meta_keys_l:
                    if type(old_meta[old_meta_keys[info.lower()]]) is datetime:
                        self.videoinfo[info] = old_meta[old_meta_keys[info.lower()]].timestamp()
                    elif old_meta[old_meta_keys[info.lower()]]:
                        self.videoinfo[info] = old_meta[old_meta_keys[info.lower()]]
                    elif old_meta[old_meta_keys[info.lower()]] is None and "time" in info.lower():
                        if info in self.videoinfo.keys():
                            await self.log_output((info,"key found", self.videoinfo[info],self.videoinfo.keys()))
                            self.videoinfo[info] = self.videoinfo[info] if self.videoinfo[info] else 0
                        else:
                            await self.log_output((info,"key not found",self.videoinfo[info],self.videoinfo.keys()))
                            self.videoinfo[info] = 0
                    else:
                        await self.log_output("else case",10)
            self.channel_id = old_meta["channel_id"]
            self.videoinfo["channel"] = old_meta["name"]
            self.videoinfo["channelId"] = self.channel_id
            self.videoinfo["id"] = self.videoid
            self.videoPostedAt = self.videoinfo['publishDateTime']
            self.metadata_list.append(self.videoinfo)
            self.ended_at = old_meta["endedlogat"] if old_meta["endedlogat"] else None
            self.videoinfo["endedLogAt"] = self.ended_at.timestamp() if self.ended_at else None
            if self.metadata:
                self.videoinfo["live"] = self.metadata["live"]
        await self.log_output(self.videoinfo)
        if not self.videoinfo:
            await self.conn.close()
            return
        self.insert_channels = await self.conn.prepare("INSERT INTO channel(id, name, tracked) VALUES ($1,$2,$3) "
                                                       "ON CONFLICT DO NOTHING")
        self.channel_name_history = await self.conn.prepare("INSERT INTO chan_names(id, name, time_discovered, time_used) "
                                                            "VALUES ($1,$2,$3,$4) ON CONFLICT (id,name) DO UPDATE SET time_used = $4")
        self.insert_messages = await self.conn.prepare("INSERT INTO messages(video_id, chat_id, user_id, message_txt, "
                                                       "time_sent, currency, value, color) "
                                                       "VALUES ($1,$2,$3,$4,$5,$6,$7,$8) ON CONFLICT DO NOTHING")
        async with self.conn.transaction():
            if self.channel_id and self.videoinfo["channel"]:
                await self.conn.execute("INSERT INTO channel VALUES($1,$2,$3) ON CONFLICT (id) DO UPDATE SET tracked = $3",
                                       self.channel_id, self.videoinfo["channel"], True)
                await self.conn.execute("INSERT INTO chan_names VALUES($1,$2,$3) ON CONFLICT DO NOTHING",
                                        self.channel_id, self.videoinfo["channel"],
                                        datetime.now(tz=pytz.timezone('Europe/Berlin')))
        self.chat_err = True
        repeats = 0
        log_exist_test, filesize, db_retries_had_scs, repeats = await self.already_done(self.conn)
        self.videoinfo["retries_of_rerecording_had_scs"] = db_retries_had_scs
        self.videoinfo["retries_of_rerecording"] = repeats
        if log_exist_test:
            await self.log_output(self.videoinfo["channel"] + " - " + self.videoinfo[
                    "title"] + " already analyzed, skipping. Existing file size: " + str(
                    filesize) + " bytes")
            return
        had_scs = db_retries_had_scs if db_retries_had_scs else 0
        self.msg_counter = 0
        islive = True
        while (repeats < self.max_retry_attempts and had_scs < self.min_successful_attempts and not self.cancelled and islive):
            self.msg_counter = 0
            self.chat_err = True
            if self.metadata:
                islive = self.metadata["live"] in ["upcoming","live"]
            while self.chat_err and not self.cancelled:
                if "liveStreamingDetails" in self.videoinfo.keys() or self.videoinfo["live"] != "none" or repeats >= 1:
                    self.stats.clear()
                    self.chat_err = False
                    self.started_at = datetime.now(tz=pytz.timezone('Europe/Berlin'))
                    publishtime = datetime.fromtimestamp(self.videoPostedAt,timezone.utc)
                    async with self.conn.transaction():
                        await self.conn.execute(
                            "INSERT INTO video (video_id,channel_id,title,startedlogat,createddatetime) "
                            "VALUES($1,$2,$3,$4,$5) ON CONFLICT DO NOTHING",
                            self.videoid, self.videoinfo["channelId"], self.videoinfo["title"], self.started_at, publishtime)
                    await self.update_psql_metadata()
                    await self.log_output("Started Analysis #"+str(repeats+1)+" at: "+self.started_at.isoformat())
                    await self.log_output("of video " + publishtime.isoformat() + " " +self.videoinfo["channel"]+" - " + self.videoinfo["title"] + " ["+self.videoid+"]")
                    if repeats >= 1:
                        await self.log_output("Recording the YouTube-archived chat after livestream finished")
                    self.httpclient = httpx.AsyncClient(http2=True)
                    self.running_chat = LiveChatAsync(self.videoid, callback = self.display, processor = (SuperChatLogProcessor(), SuperchatCalculator()),logger=self.logger, client = self.httpclient, exception_handler = self.exception_handling)
                    while self.running_chat.is_alive() and not self.cancelled:
                        await asyncio.sleep(3)
                    if type(self.running_chat.exception) is exceptions.InvalidVideoIdException or type(self.running_chat.exception) is exceptions.ChatParseException:
                        #Video ID invalid: Private or Membership vid or deleted. Treat as cancelled
                        #ChatParseException: No chat found
                        self.cancelled = True
                    if repeats == 0 and not self.chat_err and not self.cancelled and islive:
                        self.ended_at = datetime.now(tz=pytz.timezone('Europe/Berlin'))
                        self.videoinfo["endedLogAt"] = self.ended_at.timestamp()
                    await self.httpclient.aclose()
                    newmetadata = await self.async_get_video_info(self.videoid) #when livestream chat parsing ends, get some more metadata
                    if newmetadata is not None:
                        if newmetadata["live"] in ["upcoming","live"]: #in case the livestream has not ended yet!
                            await self.log_output(("Error! Chat monitor ended prematurely!",self.running_chat.is_alive()))
                            self.chat_err = True
                    else:
                        islive = False
                    if self.videoinfo["caught_while"] in ["upcoming","live"]:
                        #use newer metadata while rescuing certain fields from the old metadata
                        createdDateTime = self.videoPostedAt
                        caught_while = self.videoinfo["caught_while"]
                        old_title = self.videoinfo["title"]
                        retries_w_scs = self.videoinfo["retries_of_rerecording_had_scs"]
                        retries_total = self.videoinfo["retries_of_rerecording"]
                        if newmetadata is not None:
                            self.videoinfo = newmetadata
                            self.videoinfo["endedLogAt"] = self.ended_at.timestamp() if self.ended_at else None
                            self.videoinfo["retries_of_rerecording_had_scs"] = retries_w_scs
                            self.videoinfo["retries_of_rerecording"] = retries_total
                            self.videoinfo["createdDateTime"] = createdDateTime
                            self.videoinfo["caught_while"] = caught_while
                            if self.videoinfo["title"] != old_title:
                                self.videoinfo["old_title"] = old_title
                        else:
                            await self.log_output(("couldn't retrieve new metadata for",self.videoid,old_title))
                    else:
                        islive = False
                    if self.msg_counter > 0 and not self.chat_err:
                        had_scs += 1
                        self.videoinfo["retries_of_rerecording_had_scs"] = had_scs
                        self.total_counted_msgs = 0
                        self.total_member_msgs = 0
                        self.total_new_members = 0
                    self.videoinfo["startedLogAt"] = self.started_at.timestamp()
                    self.videoinfo["retries_of_rerecording"] = repeats
                    await self.update_psql_metadata()
                    self.metadata_list.append(self.videoinfo)
                else:
                    await self.log_output(self.videoinfo["title"]+" is not a broadcast recording or premiere")
                    return
            repeats += 1
            await self.log_output((repeats,self.cancelled,had_scs,self.videoinfo["live"]))
            if repeats >= 1 and not self.cancelled and had_scs < 2 and islive:
                await self.log_output("Waiting "+str(self.minutes_wait)+" minutes before re-recording sc-logs")
                await asyncio.sleep(self.minutes_wait*60)
        self.running = False
        await self.log_output("writing to files")
        proper_sc_list = []
        unique_currency_donors={}
        count_scs = 0
        for msg in self.sc_msgs:
            msg_loaded = json.loads(msg)
            if msg_loaded["type"] not in ["newSponsor", "sponsorMessage"]:
                count_scs += 1
                donations = self.donors[msg_loaded["userid"]]["donations"].setdefault(msg_loaded["currency"],[0,0])
                self.donors[msg_loaded["userid"]]["donations"][msg_loaded["currency"]][0] = donations[0] + 1 #amount of donations
                self.donors[msg_loaded["userid"]]["donations"][msg_loaded["currency"]][1] = donations[1] + msg_loaded["value"] #total amount of money donated
                self.unique_donors.setdefault(msg_loaded["currency"], set())
                self.unique_donors[msg_loaded["currency"]].add(msg_loaded["userid"])
            proper_sc_list.append(msg_loaded)
        for currency in self.unique_donors.keys():
            unique_currency_donors[currency] = len(self.unique_donors[currency])
        f = open(self.sc_file, "w")
        f_stats = open(self.stats_file, "w")
        f.write(json.dumps(proper_sc_list))
        await self.log_output((len(proper_sc_list), "unique messages written",count_scs,"are superchats"))
        f.close()
        self.stats.append(await self.loop.run_in_executor(self.t_pool, recount_money, proper_sc_list))
        f_stats.write(json.dumps([self.metadata_list[-1], self.stats[-1], unique_currency_donors]))
        f_stats.close()
        f_donors = open(self.donor_file,"w")
        f_donors.write(json.dumps(self.donors))
        f_donors.close()
        await self.conn.close()
        if self.cancelled:
            os.rename(f.name, f.name+".cancelled")
            os.rename(f_stats.name, f_stats.name + ".cancelled")
            os.rename(f_donors.name, f_donors.name + ".cancelled")
        if not self.chat_err and self.gen_wc and len(self.sc_msgs) > 0 and repeats >= 1 and not self.cancelled:
            await self.loop.run_in_executor(self.t_pool, self.generate_wordcloud, proper_sc_list)

    async def display(self,data,amount):
        if len(data.items) > 0:
            start = datetime.now(timezone.utc)
            chatters = []
            channels = []
            messages = []
            for c in data.items: #data.items contains superchat messages - save them in list while also saving the calculated
                if c.type == "placeholder":
                    self.placeholders += 1
                if c.type == "newSponsor":
                    sc_datetime = datetime.fromtimestamp(c.timestamp/1000.0,timezone.utc)
                    sc_info = {"type": c.type, "id": c.id, "time":c.timestamp,
                               "userid":c.author.channelId, "member_level": c.member_level, "debugtime":sc_datetime.isoformat()}
                    self.total_new_members += 1
                    self.sc_msgs.add(json.dumps(sc_info))
                #sums in a list
                if c.type in ["superChat","superSticker","sponsorMessage"]:
                    if c.currency in self.clean_currency.keys():
                        c.currency = self.clean_currency[c.currency]
                    sc_datetime = datetime.fromtimestamp(c.timestamp/1000.0,timezone.utc)
                    name_used_datetime = start if self.videoinfo["live"] == "none" else sc_datetime
                    sc_weekday = sc_datetime.weekday()
                    sc_hour = sc_datetime.hour
                    sc_minute = sc_datetime.minute
                    sc_user = c.author.name
                    sc_userid = c.author.channelId
                    chat_id = c.id
                    chatters.append((sc_userid,sc_user,sc_datetime,name_used_datetime))
                    channels.append((sc_userid, sc_user, False))
                    if sc_userid not in self.donors.keys():
                        self.donors[sc_userid] = {"names":[sc_user],
                                                 "donations": {}}
                    else:
                        if sc_user not in self.donors[sc_userid]["names"]:
                            self.donors[sc_userid]["names"].append(sc_user)
                    sc_message = c.message
                    sc_color = c.bgColor
                    sc_currency = c.currency.replace(u'\xa0', '')
                    sc_info = {"type": c.type, "id": chat_id, "time":c.timestamp,"currency":sc_currency,"value":c.amountValue,"weekday":sc_weekday,
                               "hour":sc_hour,"minute":sc_minute, "userid":sc_userid, "message":sc_message,
                               "color":sc_color, "debugtime":sc_datetime.isoformat()}
                    if c.type == "sponsorMessage":
                        self.total_member_msgs += 1
                        sc_info["member_level"] = c.member_level
                        #await self.log_output(sc_info)
                    else:
                        self.total_counted_msgs += 1
                    messages.append((self.videoid,chat_id,sc_userid,sc_message,sc_datetime,sc_currency,Decimal(c.amountValue),sc_color))
                    self.stats.append(amount)
                    self.sc_msgs.add(json.dumps(sc_info))
            self.msg_counter = amount["amount_sc"]
            async with self.conn.transaction():
                await self.insert_channels.executemany(channels)
                await self.channel_name_history.executemany(chatters)
                await self.insert_messages.executemany(messages)
            end = datetime.now(timezone.utc)
            await self.log_output(
                self.videoinfo["channel"] + " " + self.videoinfo["title"] + " " + data.items[-1].elapsedTime + " " +
                str(self.msg_counter) + "/"+str(self.total_counted_msgs) + " superchats, "+str(self.total_new_members)+" new members, "+str(self.total_member_msgs)+" member anniversary scs took "+ str((end-start).total_seconds()*1000)+" ms, placeholders: " + str(self.placeholders))

    def generate_wordcloud(self,log):
        wordcloudmake = superchat_wordcloud(log, logname=self.videoid)
        wordcloudmake.generate()

    async def log_output(self,logmsg,level = 20):
        msg_string = ""
        msg_len = len(logmsg)
        if isinstance(logmsg, tuple):
            part_count = 0
            for msg_part in logmsg:
                part_count += 1
                msg_string += str(msg_part)
                if msg_len > part_count:
                    msg_string += " "
        elif isinstance(logmsg, str):
            msg_string = logmsg
        else:
            msg_string = str(logmsg)
        await self.loop.run_in_executor(self.t_pool,self.logger.log,level,msg_string)
        
    def exception_handling(self,loop,context):
        ex_time = datetime.now(timezone.utc)
        self.logger.log(40,"Exception caught")
        self.logger.log(40,context)
Exemple #9
0
 async def run(self):
     live_chat = LiveChatAsync(self.video_id, callback=self.on_message)
     while live_chat.is_alive():
         await asyncio.sleep(3)