コード例 #1
0
    def get_check(self):
        ''' check comment '''
        self.load_av_lists()
        av_id_list = [[ii['aid'], ii['comment']]
                      for ii in self.av_id_map.values()
                      if not re.findall(self.ignore_list, str(ii['aid']))]
        av_map = {ii['aid']: ii for ii in self.av_id_map.values()}
        self.comment_next = {ii: True for (ii, _) in av_id_list}
        if self.av_id_list and len(
                self.av_id_list) and len(self.av_id_list) != len(av_id_list):
            new_av_id = [
                ii for (ii, _) in av_id_list
                if not ii in self.av_id_list and not ii in self.del_map
            ]
            self.rank_map = {**self.rank_map, **{ii: [] for ii in new_av_id}}
            echo(1, new_av_id)
            for ii in new_av_id:
                shell_str = 'nohup ipython3 bilibili/bsocket.py {} %d >> log.txt 2>&1 &'.format(
                    ii)
                echo(0, shell_str)
                os.system(shell_str % 1)
                os.system(shell_str % 2)
                email_str = '{} av:{} was releasing at {}!!! Please check the auto pipeline.'.format(
                    av_map[ii]['title'], ii, time_str(av_map[ii]['created']))
                email_str2 = '{} {} is release at {}.\nPlease check the online & common program.\n\nBest wish for you\n--------\nSend from script by gunjianpan.'.format(
                    av_map[ii]['title'], time_str(av_map[ii]['created']),
                    self.BASIC_AV_URL % ii)
                send_email(email_str2, email_str)
                self.update_ini(ii)
                self.public[ii] = [av_map[ii]['created'], av_map[ii]['mid']]

        self.av_id_list = [ii for (ii, _) in av_id_list]
        now_hour = int(time_str(time_format='%H'))
        now_min = int(time_str(time_format='%M'))
        now_time = now_hour + now_min / 60
        if now_time > self.ignore_start and now_time < self.ignore_end:
            return
        if os.path.exists('{}comment.pkl'.format(comment_dir)):
            with codecs.open('{}comment.pkl'.format(comment_dir), 'rb') as f:
                self.comment = pickle.load(f)
        if self.assign_up_mid == -1:
            return

        threading_list = []
        for (ii, jj) in av_id_list:
            if ii not in self.comment:
                self.comment[ii] = {}
            work = threading.Thread(target=self.comment_check_schedule,
                                    args=(
                                        ii,
                                        jj,
                                    ))
            threading_list.append(work)
        for work in threading_list:
            work.start()
        for work in threading_list:
            work.join()
        with codecs.open('{}comment.pkl'.format(comment_dir), 'wb') as f:
            pickle.dump(self.comment, f)
        return av_id_list
コード例 #2
0
ファイル: upBilibili.py プロジェクト: zhujuanzhu/spider
 def check_rank_rose(self, bv_id: str, rank_info: dict):
     """ check rank rose """
     if not self.check_rank_info(bv_id, rank_info):
         return
     idx, pts = rank_info["id"], rank_info["pts"]
     b_id = bv_id + str(rank_info["day"]) + str(rank_info["type"])
     if b_id not in self.rank["T"]:
         self.rank["T"][b_id] = [idx // 10]
     else:
         self.rank["T"][b_id].append(idx // 10)
     self.rank["L"][b_id] = idx
     is_hot = self.is_hot(bv_id)
     is_hot = "[热门]" if is_hot else ""
     title = (self.bv_ids[bv_id]["title"].split("|", 1)[0]
              if bv_id in self.bv_ids else "")
     rank_str = "热榜{}(%s){}|{}Day List, Rank: {}, Score: {}".format(
         is_hot, title, rank_info["day"], idx, rank_info["pts"])
     if bv_id in self.bv_ids:
         created = self.bv_ids[bv_id]["created"]
         ts = get_time_str((time.time() - created) / 60)
         ts_str = time_str(created) + "-" + time_str()
     else:
         ts, ts_str = "", ""
     rank_context = rank_str % ts_str
     rank_str = rank_str % ts
     send_email(rank_context, rank_str)
コード例 #3
0
    def check_rank(self, av_id: int, times=0):
        rank_list = self.rank_map[av_id] if av_id in self.rank_map else []
        changeHeaders({'Referer': self.BASIC_AV_URL % av_id})

        url = self.ARCHIVE_STAT_URL % av_id
        json_req = proxy_req(url, 1)

        if not self.have_error(json_req):
            if (av_id not in self.av_id_list
                    and times < 3) or (av_id in self.av_id_list and times < 6):
                self.check_rank(av_id, times + 1)
            return
        json_req = json_req['data']
        need = [
            'view', 'like', 'coin', 'favorite', 'reply', 'share', 'danmaku'
        ]
        data = [json_req[index] for index in need]
        if not self.check_view(av_id, data[0]):
            if times < 3:
                self.check_rank(av_id, times + 1)
            return
        if len(rank_list):
            data = [time_str(), *data, *rank_list[:2], *rank_list[-2:]]
        else:
            data = [time_str(), *data]

        with codecs.open('%s%d.csv' % (history_dir, av_id),
                         'a',
                         encoding='utf-8') as f:
            f.write(','.join([str(index) for index in data]) + '\n')

        if av_id in self.last_check and int(
                time.time()) - self.last_check[av_id] > one_day:
            self.del_map[av_id] = 1
            del self.rank_map[av_id]
            if av_id == self.basic_av_id:
                clean_csv(av_id)
        elif av_id not in self.last_check and int(
                time.time()) > one_day + self.begin_timestamp:
            self.del_map[av_id] = 1
            del self.rank_map[av_id]
            if av_id == self.basic_av_id:
                clean_csv(av_id)
        self.last_view[av_id] = data[1]
        now_time = time.time()
        echo(0, av_id, av_id == self.basic_av_id, av_id in self.public,
             (now_time - self.public[av_id][0]) < 3.1 * one_day * 60,
             self.public[av_id])
        if av_id == self.basic_av_id and av_id in self.public and (
                now_time - self.public[av_id][0]) < 3.1 * one_day * 60:
            time_gap = (now_time - self.public[av_id][0]) / 60
            echo(3, 'Time Gap:', round(time_gap / 10))
            if round(time_gap / 10) in self.history_check_list and round(
                    time_gap / 10) not in self.history_check_finish:
                self.history_rank(time_gap, data, av_id)
コード例 #4
0
ファイル: upBilibili.py プロジェクト: zhujuanzhu/spider
    def have_bad_comment(self,
                         req_list: list,
                         av_id: int,
                         pn: int,
                         parent_rpid=None):
        """ check comment and send warning email if error """
        rpid, ctime, like, plat, current_level, uname, sex, content, sign, idx, sort = (
            req_list)
        ctimes = time_str(ctime, time_format=self.T_FORMAT)
        ctime = time_str(ctime)

        if not len(regex.findall(self.keyword, content)):
            return True
        rpid = "{}{}".format(
            rpid, "" if not parent_rpid else "-{}".format(parent_rpid))

        url = self.BASIC_AV_URL % av_id
        rpid_str = "{}-{}".format(av_id, rpid)
        if rpid in [kk for ii in self.ignore_rpid.values() for kk in ii]:
            return True
        if self.email_limit < 1 or (
                rpid_str in self.email_send_time
                and self.email_send_time[rpid_str] >= self.email_limit):
            return True
        if rpid_str in self.email_send_time:
            self.email_send_time[rpid_str] += 1
        else:
            self.email_send_time[rpid_str] = 1
        rank_info = [
            r_info for bv_id, r_info in self.bv_ids.items()
            if r_info["aid"] == av_id
        ][0]
        title = rank_info["title"].split("|", 1)[0]
        sort = "热门" if sort else "时间"

        email_content = "Date: {}\nUrl: {}\nTitle: {},\nPage: {} #{}@{},\nUser: {},\nSex: {},\nsign: {}\nlike: {}\nplat: {}\nlevel:{}\nconetnt: {},\n".format(
            ctime,
            title,
            url,
            pn,
            idx,
            rpid,
            uname,
            sex,
            sign,
            like,
            plat,
            current_level,
            content,
        )
        email_subject = "评论({}){}{}{}#{}".format(ctimes, title, sort, pn, idx)
        echo("4|warning", email_content, email_subject)
        send_email(email_content, email_subject, assign_rec=self.assign_rec)
コード例 #5
0
 def history_rank(self, time_gap: int, now_info: list, av_id: int):
     echo(0, 'send history rank')
     time_gap = round(time_gap / 10) * 10
     history_map = {
         ii: jj
         for ii, jj in self.history_map[time_gap].items() if jj[1]
     }
     other_views = [int(ii[1]) for ii in history_map.values()]
     other_views_len = len(other_views)
     other_views.append(now_info[1])
     ov_sort_idx = np.argsort(-np.array(other_views))
     av_ids = list(history_map.keys())
     now_sorted = [
         jj for jj, ii in enumerate(ov_sort_idx) if ii == other_views_len
     ][0] + 1
     other_result = [(jj + 1, av_ids[ii])
                     for jj, ii in enumerate(ov_sort_idx[:4])
                     if ii != other_views_len]
     time_tt = self.get_time_str(time_gap)
     email_title = 'av{}发布{}, 本年度排名No.{}/{}, 播放量: {}, 点赞: {}, 硬币: {}, 收藏: {}, 弹幕: {}'.format(
         av_id, time_tt, now_sorted, len(other_views), now_info[1],
         now_info[2], now_info[3], now_info[4], now_info[7])
     email_title += self.get_history_rank(now_info)
     context = '{}\n\n'.format(email_title)
     for no, av in other_result[:3]:
         data_info = history_map[av]
         context += '{}, av{}, 本年度No.{}, 播放量: {}, 点赞: {}, 硬币: {}, 收藏: {}, 弹幕: {}{}, 发布时间: {}\n'.format(
             self.av_id_map[av]['title'].split('|',
                                               1)[0], av, no, data_info[1],
             data_info[2], data_info[3], data_info[4], data_info[7],
             self.get_history_rank(data_info),
             time_str(self.av_id_map[av]['created']))
     context += '\nBest wish for you\n--------\nSend from script by gunjianpan.'
     send_email(context, email_title)
     self.history_check_finish.append(round(time_gap / 10))
コード例 #6
0
    def public_monitor(self, av_id: int, times: int):
        ''' a monitor '''
        self.public_list.append(av_id)
        data_time, mid = self.public[av_id]
        self.get_star_num(mid, 0)
        self.check_rank_v2(av_id, 0)
        time.sleep(5)
        follower = self.star[mid] if mid in self.star else 0
        origin_data = self.data_v2[av_id] if av_id in self.data_v2 else []
        sleep_time = data_time + one_day - int(time.time())
        if sleep_time < 0:
            return
        print('Monitor Begin %d' % (av_id))
        time.sleep(sleep_time)
        self.get_star_num(mid, 0)
        self.check_rank_v2(av_id, 0)

        time.sleep(5)
        follower_2 = self.star[mid] if mid in self.star else 0
        one_day_data = self.data_v2[av_id] if av_id in self.data_v2 else []

        data = [
            time_str(data_time), av_id, follower, follower_2, *origin_data,
            *one_day_data
        ]
        with codecs.open(data_dir + 'public.csv', 'a', encoding='utf-8') as f:
            f.write(','.join([str(ii) for ii in data]) + '\n')
コード例 #7
0
 def get_star_num(self, mid: int, times: int, load_disk=False):
     ''' get star num'''
     url = self.RELATION_STAT_URL % mid
     header = {
         **headers,
         **{
             'Origin': self.BILIBILI_URL,
             'Referer': self.AV_URL
         }
     }
     if 'Host' in header:
         del header['Host']
     req = proxy_req(url, 2, header=header)
     if req is None or req.status_code != 200 or len(
             req.text) < 8 or not '{' in req.text:
         if times < 3:
             self.get_star_num(mid, times + 1, load_disk)
         return
     try:
         json_req = json.loads(req.text[7:-1])
         self.star[mid] = json_req['data']['follower']
         if load_disk and self.check_star(mid, self.star[mid]):
             self.last_star[mid] = self.star[mid]
             with open('{}star.csv'.format(data_dir), 'a') as f:
                 f.write('%s,%d\n' % (time_str(), self.star[mid]))
     except:
         pass
コード例 #8
0
ファイル: upBilibili.py プロジェクト: zhujuanzhu/spider
    def public_monitor(self, bv_id: str):
        """ a monitor """
        self.public["L"].append(bv_id)
        created, mid = self.public["T"][bv_id]
        self.get_star_num(mid)
        self.check_rank_v2(bv_id)
        time.sleep(5)
        follower = self.star["T"][mid] if mid in self.star["T"] else 0
        data1 = self.data_v2[bv_id] if bv_id in self.data_v2 else {}
        sleep_time = created + one_day - int(time_stamp())
        if sleep_time < 0:
            return
        echo("4|debug", "Monitor Begin %s" % (bv_id))
        time.sleep(sleep_time)
        self.get_star_num(mid)
        self.check_rank_v2(bv_id)
        time.sleep(5)
        follower_2 = self.star["T"][mid] if mid in self.star["T"] else 0
        data2 = self.data_v2[bv_id] if bv_id in self.data_v2 else []

        data = [
            time_str(created),
            bv_id,
            follower,
            follower_2,
            *list(data.values()),
            *list(data2.values()),
        ]
        with codecs.open(data_dir + "public.csv", "a", encoding="utf-8") as f:
            f.write(",".join([str(ii) for ii in data]) + "\n")
コード例 #9
0
ファイル: hotelDetail.py プロジェクト: zhujuanzhu/spider
    def prepare_req(self,
                    hotel_id: int = 4889292,
                    city_id: int = 2,
                    startDate: str = time_str(-1, '%Y-%m-%d'),
                    depDate: str = time_str(int(time.time() + one_day),
                                            '%Y-%m-%d')):
        referer_url = HOTEL_DETAIL_URL % hotel_id

        changeHeaders({'Referer': referer_url})
        data = {
            'city': city_id,
            'checkin': startDate,
            'cjeckout': depDate,
            'defalutVal': None
        }
        return basic_req(AJAX_PROMOTION_URL, 11, data=data)
コード例 #10
0
ファイル: hotelDetail.py プロジェクト: zhujuanzhu/spider
 def generate_other_params(self,
                           hotel_id: int = 4889292,
                           city_id: int = 2,
                           startDate: str = time_str(-1, '%Y-%m-%d'),
                           depDate: str = time_str(
                               int(time.time() + one_day), '%Y-%m-%d')):
     ''' generate other params '''
     params = {
         'psid': None,
         'MasterHotelID': hotel_id,
         'hotel': hotel_id,
         'EDM': 'F',
         'roomId': None,
         'IncludeRoom': None,
         'city': city_id,
         'showspothotel': 'T',
         'supplier': None,
         'IsDecoupleSpotHotelAndGroup': 'F',
         'contrast': 0,
         'brand': 776,
         'startDate': startDate,
         'depDate': depDate,
         'IsFlash': 'F',
         'RequestTravelMoney': 'F',
         'hsids': None,
         'IsJustConfirm': None,
         'contyped': 0,
         'priceInfo': -1,
         'equip': None,
         'filter': None,
         'productcode': None,
         'couponList': None,
         'abForHuaZhu': None,
         'defaultLoad': 'T',
         'esfiltertag': None,
         'estagid': None,
         'Currency': None,
         'Exchange': None,
         'minRoomId': 0,
         'maskDiscount': 0,
         'TmFromList': 'F',
         'th': 119,
         'RoomGuestCount': '1,1,0',
         'promotionf': None,
         'allpoint': None,
     }
     return params
コード例 #11
0
 def email_update_result(self, article_id: str, r_log: list, r_num: int):
     p = self.share2article[article_id][-2].split("/")[-1]
     article_info = self.list_recent[p]
     name = article_info["name"].replace(".note", "")
     subject = "更新({}){}/{}条[{}]".format(
         time_str(time_format=self.T_FORMAT), r_num, len(r_log), article_info["name"]
     )
     content = "\n".join(
         [
             "Title: {}".format(article_info["name"]),
             "Time: {}".format(time_str()),
             "Update Num: {}/{}条".format(r_num, len(r_log)),
             "",
             *r_log,
         ]
     )
     send_email(content, subject, assign_rec=self.assign_rec)
コード例 #12
0
ファイル: upBilibili.py プロジェクト: zhujuanzhu/spider
 def check_rank_v2(self, bv_id: str):
     rank_info = self.rank_map[bv_id] if bv_id in self.rank_map else {}
     stat = self.get_view_detail(bv_id)
     if stat is None or "stat" not in stat:
         return
     stat = stat["stat"]
     data = {**stat, "time": time_str(), **rank_info}
     self.data_v2[bv_id] = data
コード例 #13
0
ファイル: upBilibili.py プロジェクト: zhujuanzhu/spider
    def check_rank(self, bv_id: str):
        rank_info = self.rank_map[bv_id] if bv_id in self.rank_map else {}
        stat = self.get_view_detail(bv_id)
        if stat is None or "stat" not in stat:
            return
        stat = stat["stat"]
        data = {**stat, "time": time_str(), **rank_info}
        need = [
            "time",
            "view",
            "like",
            "coin",
            "favorite",
            "reply",
            "share",
            "danmaku",
            "id",
            "pts",
            "type",
            "day",
        ]
        output = [str(data[ii]) for ii in need if ii in data]
        output = output + [str(v) for k, v in data.items() if k not in need]
        with codecs.open("{}{}.csv".format(history_dir, bv_id),
                         "a",
                         encoding="utf-8") as f:
            f.write(",".join(output) + "\n")

        if (bv_id in self.last_check
                and int(time_stamp()) - self.last_check[bv_id] > one_day / 2):
            self.del_map[bv_id] = 1
            del self.rank_map[bv_id]
            if bv_id == self.basic_bv_id:
                clean_csv(bv_id)
        elif (bv_id not in self.last_check
              and int(time_stamp()) > one_day + self.begin_timestamp):
            self.del_map[bv_id] = 1
            del self.rank_map[bv_id]
            if bv_id == self.basic_bv_id:
                clean_csv(bv_id)
        self.last_view[bv_id] = data["view"]
        now_time = time_stamp()
        if not bv_id in self.public["T"] or bv_id not in self.assign_ids:
            return
        time_gap = (now_time - self.public["T"][bv_id][0]) / 60
        echo("0|debug", bv_id, time_gap < (4.5 * one_day / 60),
             self.public["T"][bv_id])
        if time_gap >= (4.5 * one_day / 60):
            return
        if not bv_id in self.check_done:
            self.check_done[bv_id] = []
        echo("3|info", "Time Gap:", int(time_gap / 10))
        if (int(time_gap / 10) in self.history_check_list
                and int(time_gap / 10) not in self.check_done[bv_id]):
            self.history_rank(time_gap, data, bv_id)
コード例 #14
0
    def check_rank_v2(self, av_id: int, times=0):
        rank_list = self.rank_map[av_id] if av_id in self.rank_map else []
        changeHeaders({'Referer': self.BASIC_AV_URL % av_id})

        url = self.ARCHIVE_STAT_URL % av_id
        json_req = proxy_req(url, 1)

        if not self.have_error(json_req):
            if times < 3:
                self.check_rank_v2(av_id, times + 1)
            return
        json_req = json_req['data']
        need = [
            'view', 'like', 'coin', 'favorite', 'reply', 'share', 'danmaku'
        ]
        data = [json_req[index] for index in need]
        if len(rank_list):
            data = [time_str(), *data, *rank_list[:2], *rank_list[-2:]]
        else:
            data = [time_str(), *data]
        self.data_v2[av_id] = data
コード例 #15
0
ファイル: upBilibili.py プロジェクト: zhujuanzhu/spider
 def get_star_num(self, mid: int, load_disk: bool = False):
     """ get star num"""
     url = self.RELATION_STAT_URL % mid
     star_json = self.get_api_req(url, self.basic_bv_id, 1)
     if star_json is None:
         return
     self.star["T"][mid] = star_json["follower"]
     if not load_disk or not self.check_star(mid, star_json["follower"]):
         return
     self.star["L"][mid] = star_json["follower"]
     with open("{}star.csv".format(data_dir), "a") as f:
         f.write("{},{}\n".format(time_str(), star_json["follower"]))
コード例 #16
0
 def decoder_tpwd_once(self, article_id: str, tpwd: str, mode: int = 0):
     req = self.decoder_tpwd(tpwd)
     if req is None or not len(req):
         return
     temp_map = {ii: req[ii] for ii in self.NEED_KEY}
     if temp_map["validDate"] == self.ZERO_STAMP or "-" in temp_map["validDate"]:
         temp_map["validDate"] = 1500000000
     else:
         temp_map["validDate"] = (
             time_stamp(time_format="%d天%H小时%M分%S秒", time_str=req["validDate"])
             - self.BASIC_STAMP
             + time_stamp()
         )
     temp_map["validDate"] = time_str(temp_map["validDate"])
     temp_map["url"] = temp_map["url"].strip()
     if article_id not in self.tpwd_map:
         self.tpwd_map[article_id] = {}
     self.tpwd_map[article_id][tpwd] = temp_map
     if not mode:
         self.decoder_tpwd_url(article_id, tpwd)
コード例 #17
0
ファイル: upBilibili.py プロジェクト: zhujuanzhu/spider
    def load_rank(self):
        """ load rank """
        self.load_rank_index(1, 3)
        self.load_rank_index(1, 1)
        assign_1, assign_2 = self.have_assign[1], self.have_assign[3]
        have_assign = assign_1 + assign_2
        echo("4|debug", assign_1, assign_2, have_assign)
        not_ranks = [
            ii for ii in self.have_assign["T"] if not ii in have_assign
        ]
        self.have_assign = have_assign

        echo(
            "4|debug",
            "Rank_map_len:",
            len(self.rank_map.keys()),
            "Empty:",
            len([1 for ii in self.rank_map.values() if not len(ii)]),
        )
        youshan = [
            ",".join([str(kk) for kk in [ii, *list(jj.values())]])
            for ii, jj in self.rank_map.items()
        ]
        with codecs.open(data_dir + "youshang", "w", encoding="utf-8") as f:
            f.write("\n".join(youshan))

        if not len(not_ranks):
            return
        for bv_id in not_ranks:
            title = (self.bv_ids[bv_id]["title"].split("|", 1)[0]
                     if bv_id in self.bv_ids else "")
            no_rank_warning = "下榜({}){},{}".format(
                time_str(time_format=self.T_FORMAT), title,
                self.NO_RANK_CONSTANT)
            send_email(no_rank_warning, no_rank_warning,
                       self.special_info_email)
            time.sleep(pow(np.pi, 2))
            send_email(no_rank_warning, no_rank_warning,
                       self.special_info_email)
            echo("4|error", no_rank_warning)
コード例 #18
0
 def get_comment_detail(self,
                        comment: dict,
                        av_id: int,
                        pn: int,
                        parent_rpid=None) -> List:
     ''' get comment detail '''
     # print(comment)
     ctime = time_str(comment['ctime'])
     wait_list = ['rpid', 'member', 'content', 'like', 'idx']
     wait_list_mem = ['uname', 'sex', 'sign', 'level_info']
     wait_list_content = ['message', 'plat']
     rpid, member, content, like, idx = [comment[ii] for ii in wait_list]
     uname, sex, sign, level = [member[ii] for ii in wait_list_mem]
     current_level = level['current_level']
     content, plat = [content[ii] for ii in wait_list_content]
     req_list = [
         rpid, ctime, like, plat, current_level, uname, sex, content, sign,
         idx
     ]
     self.have_bad_comment(req_list, av_id, pn, parent_rpid)
     req_list[-2] = req_list[-2].replace(',', ' ').replace('\n', ' ')
     req_list[-3] = req_list[-3].replace(',', ' ').replace('\n', ' ')
     return req_list
コード例 #19
0
ファイル: upBilibili.py プロジェクト: zhujuanzhu/spider
    def get_danmaku(self, av_id: int):
        self.dm_map = {}
        self.dm_exec = ThreadPoolExecutor(max_workers=100)
        mkdir(dm_dir)
        output_path = "{}{}_dm.csv".format(dm_dir, av_id)

        view_data = self.get_view_detail(av_id)
        if view_data is None:
            return

        cid_list = [ii["cid"] for ii in view_data["pages"]]
        dm_map = self.dm_map[av_id] if av_id in self.dm_map else {}
        cid_list = [
            ii for ii in cid_list if ii not in dm_map or len(dm_map[ii]) == 0
        ]
        dm_thread = [
            self.dm_exec.submit(self.get_danmaku_once, ii) for ii in cid_list
        ]
        need_dm = view_data["stat"]["danmaku"]
        need_p = len(view_data["pages"])
        echo(2, "Begin {} p thread, need {} dm".format(need_p, need_dm))

        dm_list = list(as_completed(dm_thread))
        dm_list = [ii.result() for ii in as_completed(dm_thread)]
        dm_list = [ii for ii in dm_list if ii is not None]
        dm_map = {**dm_map, **{jj: ii for ii, jj in dm_list}}
        dm_num = sum([len(ii) for ii in dm_map.values()])
        p_num = len(dm_map)
        self.dm_map[av_id] = dm_map

        title = "{} {} Total {} p {} dm, Actual {} p {} dm".format(
            view_data["title"],
            self.BASIC_AV_URL % av_id,
            need_p,
            need_dm,
            p_num,
            dm_num,
        )
        result = [title, ""]
        for cid in view_data["pages"]:
            if cid["cid"] not in dm_map:
                continue
            dm = dm_map[cid["cid"]]
            dm = [[
                float(ii.split(",")[0]),
                time_str(time_stamp=int(ii.split(",")[4])),
                jj,
            ] for ii, jj in dm]
            dm = sorted(dm, key=lambda i: i[0])
            dm = [",".join([get_min_s(str(ii)), jj, kk]) for ii, jj, kk in dm]
            p_title = "p{} {} Total {} dm".format(cid["page"], cid["part"],
                                                  len(dm))
            result.extend([p_title, *dm, ""])

        with open(output_path, "w") as f:
            f.write("\n".join(result))
        print_str = "Load {} p {} dm to {}, except {} p {} m".format(
            output_path, len(dm_list), dm_num, need_p, need_dm)
        if need_dm == dm_num:
            echo(1, print_str, "success")
        else:
            echo(0, print_str, "error")
コード例 #20
0
ファイル: bsocket.py プロジェクト: zhujuanzhu/spider
 def get_data(self, origin_data: list) -> str:
     ''' get data '''
     return ','.join(str(ii) for ii in [time_str(), *origin_data]) + '\n'
コード例 #21
0
ファイル: upBilibili.py プロジェクト: zhujuanzhu/spider
 def history_rank(self, time_gap: int, now_info: dict, bv_id: int):
     echo("0|info", "send history rank")
     time_gap = int(time_gap / 10) * 10
     history_map = {
         ii: jj
         for ii, jj in self.history_map[time_gap].items() if jj[1]
     }
     if len(history_map) < 5:
         self.load_history_data()
     other_views = [int(ii[1]) for ii in history_map.values()]
     other_views_len = len(other_views)
     other_views.append(now_info["view"])
     ov_sort_idx = np.argsort(-np.array(other_views))
     av_ids = list(history_map.keys())
     now_sorted = [
         jj for jj, ii in enumerate(ov_sort_idx) if ii == other_views_len
     ][0] + 1
     other_result = [(jj + 1, av_ids[ii])
                     for jj, ii in enumerate(ov_sort_idx[:4])
                     if ii != other_views_len]
     time_tt = get_time_str(time_gap)
     rank_info = self.bv_ids[bv_id]
     title = rank_info["title"].split("|", 1)[0]
     title_email = "排名(发布{}){}本年度排名No.{}/{}, 播放量: {}, 点赞: {}, 硬币: {}, 收藏: {}, 弹幕: {}".format(
         time_tt,
         title,
         now_sorted,
         len(other_views),
         now_info["view"],
         now_info["like"],
         now_info["coin"],
         now_info["favorite"],
         now_info["danmaku"],
     )
     email_title = "bv{}发布{}, 本年度排名No.{}/{}, 播放量: {}, 点赞: {}, 硬币: {}, 收藏: {}, 弹幕: {}".format(
         bv_id,
         time_tt,
         now_sorted,
         len(other_views),
         now_info["view"],
         now_info["like"],
         now_info["coin"],
         now_info["favorite"],
         now_info["danmaku"],
     )
     email_title += self.get_history_rank(now_info)
     context = "{}\n\n".format(email_title)
     for no, bv in other_result[:3]:
         data_info = history_map[bv]
         context += "{}, bv{}, 本年度No.{}, 播放量: {}, 点赞: {}, 硬币: {}, 收藏: {}, 弹幕: {}, 累计播放: {}{}, 发布时间: {}\n".format(
             self.bv_ids[bv]["title"].split("|", 1)[0],
             bv,
             no,
             data_info[1],
             data_info[2],
             data_info[3],
             data_info[4],
             data_info[7],
             self.bv_ids[bv]["play"],
             self.get_history_rank(data_info),
             time_str(self.bv_ids[bv]["created"]),
         )
     send_email(context, title_email)
     self.check_done[bv_id].append(round(time_gap / 10))
コード例 #22
0
ファイル: upBilibili.py プロジェクト: zhujuanzhu/spider
    def get_check(self):
        """ check comment """
        self.delay_load_history_data()
        bv_list = [[ii["bvid"], ii["aid"], ii["comment"]]
                   for ii in self.bv_ids.values()
                   if not regex.findall(self.ignore_list, str(ii["aid"]))]
        bv_map = {ii["bvid"]: ii for ii in self.bv_ids.values()}
        if self.bv_list and len(
                self.bv_list) and len(self.bv_list) != len(bv_list):
            new_bv_list = [(ii, jj) for ii, jj, _ in bv_list
                           if not ii in self.bv_list and not ii in self.del_map
                           ]
            self.rank_map = {
                **self.rank_map,
                **{ii: {}
                   for ii, _ in new_bv_list}
            }
            echo("1|error", "New Bv av ids:", new_bv_list)
            for bv_id, av_id in new_bv_list:
                rank_info = bv_map[bv_id]
                shell_str = "nohup python3 bilibili/bsocket.py {} %d >> log.txt 2>&1 &".format(
                    av_id)
                echo("0|error", "Shell str:", shell_str)
                os.system(shell_str % 1)
                os.system(shell_str % 2)
                email_str = "发布({}){}#{} {}".format(
                    time_str(rank_info["created"], time_format=self.T_FORMAT),
                    rank_info["title"],
                    bv_id,
                    av_id,
                )
                email_str2 = "{} {} is release at {}.\nPlease check the online & common program.".format(
                    rank_info["title"],
                    time_str(rank_info["created"]),
                    self.BASIC_BV_URL % bv_id,
                )
                send_email(email_str2, email_str, self.special_info_email)
                self.update_ini(bv_id, av_id)
                self.public["T"][bv_id] = [
                    rank_info["created"], rank_info["mid"]
                ]
                self.last_check[bv_id] = int(time_stamp())

        self.bv_list = [ii for (ii, _, _) in bv_list]
        now_hour = int(time_str(time_format="%H"))
        now_min = int(time_str(time_format="%M"))
        now_time = now_hour + now_min / 60
        if now_time > self.ignore_start and now_time < self.ignore_end:
            return
        if self.assign_mid == -1:
            return

        # threads = [self.pool.submit(self.check_type_req, bv_id) for bv_id in rank_map.keys()]
        # list(as_completed(threads))
        threading_list = []
        for (_, ii, jj) in bv_list:
            work = threading.Thread(target=self.comment_check_schedule,
                                    args=(ii, jj))
            threading_list.append(work)
        for work in threading_list:
            work.start()
        for work in threading_list:
            work.join()
        return bv_list
コード例 #23
0
ファイル: getproxy.py プロジェクト: Marin111/spider-1
 def log_write(self, url):
     """
     failure log
     """
     with codecs.open("proxy.log", 'a', encoding='utf-8') as f:
         f.write(time_str() + url + '\n')
コード例 #24
0
ファイル: bsocket.py プロジェクト: onlyoneprogram/spider
 def get_data(self, origin_data: list) -> str:
     """ get data """
     return ",".join(str(ii) for ii in [time_str(), *origin_data]) + "\n"