Ejemplo n.º 1
0
    def control(vDelta=0.0, hDelta=0.0):
        Servo.initialize()
        if vDelta != 0:
            duty_cycle = float(Cache().get(Servo.V_DB_KEY))
            duty_cycle += vDelta
            if duty_cycle < 2.5 or duty_cycle > 12.5:
                return
            print(duty_cycle)
            Cache().set(Servo.V_DB_KEY, duty_cycle)
            Servo.vPin.ChangeDutyCycle(duty_cycle)
            time.sleep(0.02)
            Servo.vPin.ChangeDutyCycle(0)
            time.sleep(0.2)

        if hDelta != 0:
            duty_cycle = float(Cache().get(Servo.H_DB_KEY))
            duty_cycle += hDelta
            if duty_cycle < 2.5 or duty_cycle > 12.5:
                return
            print(duty_cycle)
            Cache().set(Servo.H_DB_KEY, duty_cycle)
            Servo.hPin.ChangeDutyCycle(duty_cycle)
            time.sleep(0.02)
            Servo.hPin.ChangeDutyCycle(0)
            time.sleep(0.2)
Ejemplo n.º 2
0
    def stat(self, unit="GB", update_cache=True):
        self.login_if_not()

        soup_obj = HttpUtils.get(self.site.stat_page,
                                 headers=self.site.login_headers)
        assert soup_obj is not None

        div_list = soup_obj.select(
            "table.mainouter tr td table tr td div[align='center']")
        assert len(div_list) == 1

        content = div_list[0].contents[0]
        m = re.search(u"获取(\d+.\d+)个魔力", content)
        assert m
        mp = float(m.group(1))

        span_list = soup_obj.select("#usermsglink span")
        up = HttpUtils.pretty_format(span_list[1].contents[2], unit)
        down = HttpUtils.pretty_format(span_list[1].contents[4], unit)

        prev_up = Cache().get(self.get_site_name() + "_up")
        prev_down = Cache().get(self.get_site_name() + "_down")

        if prev_up is None:
            prev_up = 0
        else:
            prev_up = float(prev_up.decode())

        if prev_down is None:
            prev_down = 0
        else:
            prev_down = float(prev_down.decode())

        delta_up = round(up - prev_up, 2)
        delta_down = round(down - prev_down, 2)
        if delta_down == 0:
            delta_ratio = "Inf"
        else:
            delta_ratio = round(delta_up / delta_down, 2)

        current_upload = round(up - down, 2)
        print(
            "%s, mp=%s, up=%s, down=%s, current=%s, delta_up=%s, delta_down=%s, delta_ratio=%s"
            % (str(time.strftime("%Y-%m-%d %H:%M:%S")), mp, up, down,
               current_upload, delta_up, delta_down, delta_ratio))

        if update_cache:
            Cache().set(self.get_site_name() + "_up", up)
            Cache().set(self.get_site_name() + "_down", down)

        return mp, up, down
Ejemplo n.º 3
0
    def ignore(self, seed_id=None):
        if seed_id is None:
            # ignore all seeds @ home page
            seeds = self.crawl()

            # skip if already in cache
            seeds = list(filter(lambda x: Cache().get(x.id) is None, seeds))

            for seed in seeds:
                print("Ignore seed: " + str(seed))
                Cache().set_with_expire(seed.id, str(seed), self.ttl)
        else:
            print("Ignore seed ID: " + str(seed_id))
            Cache().set_with_expire(seed_id, "", self.ttl)
Ejemplo n.º 4
0
    def check_and_notify(animation_id):
        bucket_name = ShuHuiWatchDog.BUCKET_NAME_PREFIX + str(animation_id)
        previous_chapter_num = Cache().get(bucket_name)
        if previous_chapter_num is None:
            previous_chapter_num = -1
        else:
            previous_chapter_num = int(previous_chapter_num)
        comic_name, current_chapter_num = ShuHuiWatchDog.get_max_chapter_num(
            animation_id)

        if current_chapter_num == ShuHuiWatchDog.INVALID_CHAPTER_NUM:
            EmailSender.send("错误:鼠绘-" + comic_name, "无法抓取最新章节号")
        elif current_chapter_num > previous_chapter_num:
            EmailSender.send("鼠绘-{0}更新啦".format(comic_name),
                             "最新章节号是" + str(current_chapter_num))
            Cache().set(bucket_name, current_chapter_num)
Ejemplo n.º 5
0
    def monitor(self):
        data = self.generate_data()
        now = datetime.datetime.now()
        Cache().append(self.get_bucket(), now.strftime('%y%m%d-%H:%M:%S') + self.DELIMITER + str(data))

        # if data is below/above threshold, send alert
        self.alert(data)
Ejemplo n.º 6
0
    def filter(self, data):
        # common strategy
        # 1. hasn't been found before
        # 2. not exceed max size
        max_size = Config.get("seed_max_size_mb")
        data = list(
            filter(lambda x: x.size < max_size and Cache().get(x.id) is None,
                   data))

        # customized strategy
        final_seeds = []
        if Config.get("mteam_strategy") == "easy":
            final_seeds = self.easy_strategy(data)
        elif Config.get("mteam_strategy") == "medium":
            final_seeds = self.medium_strategy(data)
        elif Config.get("mteam_strategy") == "hard":
            final_seeds = self.hard_strategy(data)

        # white list
        white_lists = Config.get("putao_white_list").split("|")
        for seed in data:
            for white_list in white_lists:
                if re.search(white_list, seed.title):
                    final_seeds.append(seed)
                    break

        for seed in final_seeds:
            print("Find valuable seed: " + str(seed))

        return final_seeds
Ejemplo n.º 7
0
    def query(self, account_id, limit=21):
        """ query for last 20 records of activity lists"""

        c = Cache(db=settings.CACHE_DATABASES['activity_resources'])
        list_key = 'activity_stream::%s' % str(account_id)
        activity_keys = c.lrange(str(list_key), 0, limit)
        r = redis.Redis(host=settings.REDIS_READER_HOST,
                        port=settings.REDIS_PORT,
                        db=settings.CACHE_DATABASES['activity_resources'])
        pipeline = r.pipeline()

        for key in activity_keys:
            pipeline.hgetall(key)
        activities = []
        users = {}  # avoid duplicated sql queries
        for h in pipeline.execute():
            user_id = h.get('user_id', None)
            if user_id:
                if not users.get(user_id, None):
                    user = User.objects.get(pk=user_id)
                    users[user_id] = user
                else:
                    user = users[user_id]
                h['user_nick'] = user.nick
                h['user_email'] = user.email
                h['user_name'] = user.name

                activities.append(h)

        # if settings.DEBUG: logger.info('Returned activities %s' % str(activities))
        return activities
Ejemplo n.º 8
0
 def get_total_datastreams(self):
     c = Cache(db=0)
     total_datastreams = c.get('my_total_datastreams_' + str(self.id))
     if not total_datastreams:
         total_datastreams = DataStream.objects.filter(user=self.id).count()
         if total_datastreams > 0:
             c.set('my_total_datastreams_' + str(self.id), total_datastreams, settings.REDIS_STATS_TTL)
     return total_datastreams
Ejemplo n.º 9
0
 def latest(self):
     res = dict()
     for single in Cache().get_by_range(self.get_bucket(), start=0, end=0):
         item = single.split(self.DELIMITER)
         res['title'] = item[0]
         res['data'] = float(item[1])
         break
     return res
Ejemplo n.º 10
0
 def get_total_visualizations(self):
     c = Cache(db=0)
     total_visualizations = c.get('my_total_visualizations_' + str(self.id))
     if not total_visualizations:
         total_visualizations = Visualization.objects.filter(user=self.id).count()
         if total_visualizations > 0:
             c.set('my_total_visualizations_' + str(self.id), total_visualizations, settings.REDIS_STATS_TTL)
     return total_visualizations
Ejemplo n.º 11
0
    def create(self, account_id, user_id, revision_id, resource_type,
               resource_id, action_id, resource_title, resource_category):
        """ Create a redis-hash and then addit to a redis-lits"""
        if settings.DEBUG:
            logger.info('Create ActivityStreamDAO %d %s' %
                        (action_id, resource_title))
        c = Cache(db=settings.CACHE_DATABASES['activity_resources'])

        timeformat = "%s %s %s %s" % (ugettext('APP-ON-TEXT'), "%Y-%m-%d,",
                                      ugettext('APP-AT-TEXT'), "%H:%M")
        now = datetime.datetime.now()
        time = now.strftime(timeformat)
        l_permalink = ""

        #TODO check and fix al urls.
        if int(action_id) != int(choices.ActionStreams.DELETE):
            if resource_type == settings.TYPE_DATASTREAM:
                l_permalink = reverse('manageDataviews.view',
                                      urlconf='workspace.urls',
                                      kwargs={'revision_id': revision_id})
            elif resource_type == settings.TYPE_VISUALIZATION:
                l_permalink = reverse('manageVisualizations.view',
                                      urlconf='workspace.urls',
                                      kwargs={'revision_id': revision_id})
            elif resource_type == settings.TYPE_DATASET:
                l_permalink = reverse('manageDatasets.view',
                                      urlconf='workspace.urls',
                                      kwargs={'revision_id': revision_id})
            else:
                for plugin in DatalPluginPoint.get_plugins():
                    if (plugin.is_active() and hasattr(plugin, 'doc_type')
                            and plugin.doc_type == resource_type
                            and hasattr(plugin, 'workspace_permalink')):
                        l_permalink = plugin.workspace_permalink(revision_id)

        list_key = 'activity_stream::%s' % str(account_id)
        n = c.incr(
            "%s_counter" % list_key
        )  # count any use of the list indexing hash and never repeat an ID
        activity_key = 'activity.stream_%s:%s' % (str(account_id), str(n))
        activity_value = {
            "user_id": user_id,
            "revision_id": revision_id,
            "type": resource_type,
            "resource_id": resource_id,
            "action_id": action_id,
            "title": resource_title,
            "time": time,
            "resource_link": l_permalink,
            "category": resource_category
        }

        r1 = c.hmset(activity_key, activity_value)
        r2 = c.lpush(str(list_key), activity_key)
        if settings.DEBUG:
            logger.info('Saved ActivityStreamDAO {} {} {} {} {}'.format(
                str(r1), str(r2), list_key, activity_key, activity_value))
        return list_key, activity_key, activity_value
Ejemplo n.º 12
0
    def add_seed(self, seed_id):
        self.login_if_not()

        self.download_seed_file(seed_id)
        seeds = list(filter(lambda x: x.id == seed_id, self.crawl(False)))
        assert len(seeds) == 1

        SeedManager.try_add_seeds(seeds)
        Cache().set_with_expire(seeds[0].id, str(seeds[0]), 5 * 864000)
Ejemplo n.º 13
0
 def __init__(self, visualization):
     self.visualization = visualization
     if isinstance(self.visualization, dict):
         self.visualization_id = self.visualization['visualization_id']
     else:
         self.visualization_id = self.visualization.visualization_id
     self.search_index = ElasticsearchIndex()
     self.logger = logging.getLogger(__name__)
     self.cache = Cache()
Ejemplo n.º 14
0
 def __init__(self, datastream):
     self.datastream = datastream
     if isinstance(self.datastream, dict):
         self.datastream_id = self.datastream['datastream_id']
     else:
         self.datastream_id = self.datastream.id
     #self.datastream_revision = datastream.last_published_revision
     self.search_index = ElasticsearchIndex()
     self.cache = Cache()
Ejemplo n.º 15
0
    def action(self, data):
        if len(data) == 0:
            return

        # send email
        for seed in data:
            EmailSender.send(u"种子", str(seed))
            Cache().set_with_expire(seed.id, str(seed), 864000)

        SeedManager.try_add_seeds(data)
Ejemplo n.º 16
0
    def initialize():
        if Servo.hPin is None and Servo.vPin is None:
            if Cache().get(Servo.H_DB_KEY) is None:
                Cache().set(Servo.H_DB_KEY, 7.5)
            if Cache().get(Servo.V_DB_KEY) is None:
                Cache().set(Servo.V_DB_KEY, 7.5)

            try:
                # horizontal servo
                GPIO.setmode(GPIO.BCM)
                GPIO.setup(17, GPIO.OUT, initial=False)
                Servo.hPin = GPIO.PWM(17, 50)  # 50HZ
                Servo.hPin.start(0)
            except Exception as e:
                print(e)


            # vertical servo
            GPIO.setup(19, GPIO.OUT, initial=False)
            Servo.vPin = GPIO.PWM(19, 50)  # 50HZ
            Servo.vPin.start(0)
Ejemplo n.º 17
0
 def history(self):
     res = dict()
     data = list()
     title = list()
     for single in Cache().get_by_range(self.get_bucket(), start=0, end=self.LIMIT - 1):
         item = single.decode("utf-8").split(self.DELIMITER)
         title.append(item[0])
         data.append(float(item[1]))
     data.reverse()
     title.reverse()
     res['data'] = data
     res['title'] = title
     return res
Ejemplo n.º 18
0
 def get_total_datasets(self):
     c = Cache(db=0)
     users = User.objects.filter(account=self)
     total_datasets = c.get('account_total_datasets_' + str(self.id))
     if not total_datasets:
         total_datasets =  Dataset.objects.filter(user__in=users).count()
         #if settings.DEBUG: logger.info('get_total_datasets from database %d' % total_datasets)
         if total_datasets > 0:
             c.set('account_total_datasets_' + str(self.id), total_datasets, settings.REDIS_STATS_TTL)
     #else:
     #    if settings.DEBUG: logger.info('get_total_datasets from cache %s' % total_datasets)
         
     return total_datasets
Ejemplo n.º 19
0
    def action(self, candidate_seeds):
        if len(candidate_seeds) == 0:
            return

        for seed in candidate_seeds:
            self.download_seed_file(seed.id)

        success_seeds, fail_seeds = SeedManager.try_add_seeds(candidate_seeds)

        for success_seed in success_seeds:
            Cache().set_with_expire(success_seed.id, str(success_seed),
                                    5 * 864000)

        # make the failed seed cool down for some time
        for fail_seed in fail_seeds:
            cool_down_time = 3600  # 1 hour
            if fail_seed.free or fail_seed.sticky:
                cool_down_time = 300  # 5 minutes
            elif fail_seed.discount <= 50:
                cool_down_time = 1800  # 30 minutes

            Cache().set_with_expire(fail_seed.id, str(fail_seed),
                                    cool_down_time)
Ejemplo n.º 20
0
    def filter(self, data):
        # common strategy
        # 1. hasn't been found before
        # 2. not exceed max size
        max_size = Config.get("seed_max_size_mb")
        data = list(
            filter(lambda x: x.size < max_size and Cache().get(x.id) is None,
                   data))

        # choose customized strategy
        strategy_map = {
            "easy": self.easy_strategy,
            "medium": self.medium_strategy,
            "hard": self.hard_strategy,
            "hell": self.hell_strategy
        }
        strategy = strategy_map[Config.get(self.get_site_name() + "_strategy")]
        assert strategy is not None

        # execute customized strategy
        final_seeds = strategy(data)

        # white list
        white_lists_str = Config.get(self.get_site_name() + "_white_list")
        if white_lists_str is not None:
            white_lists = white_lists_str.split("|")
            for seed in data:
                for white_list in white_lists:
                    if re.search(white_list, seed.title):
                        final_seeds.append(seed)
                        break

        for seed in final_seeds:
            print("Find valuable seed: " + str(seed))

        return final_seeds
Ejemplo n.º 21
0
    def filter(self, data):
        # common strategy
        # 1. hasn't been found before
        # 2. not exceed max size
        max_size = Config.get("seed_max_size_mb")
        data = list(
            filter(lambda x: x.size < max_size and Cache().get(x.id) is None,
                   data))

        # sticky
        filtered_seeds = set(filter(lambda x: x.sticky, data))

        # white list
        white_lists = Config.get("putao_white_list").split("|")
        for seed in data:
            for white_list in white_lists:
                if re.search(white_list, seed.title):
                    filtered_seeds.add(seed)
                    break

        for seed in filtered_seeds:
            print("Add seed: " + str(seed))

        return filtered_seeds
Ejemplo n.º 22
0
    def zz(self):
        source_url_template = "https://bh.sb/post/category/main/page/{0}/"
        post_url = "http://www.miui.com/forum.php?mod=post&action=newthread&fid=5&extra=&topicsubmit=yes"

        self.check_in()

        max_cnt = 10
        cnt = 0
        page_num = 1
        articles = list()
        stop_flag = False
        while not stop_flag:
            # get article of bhsb
            soup = HttpUtils.get(source_url_template.format(page_num))
            article_urls = HttpUtils.get_attrs(soup, "h2 a", "href")
            page_num += 1

            for article_index in range(len(article_urls)):
                article_url = article_urls[article_index]
                if Cache().get(article_url) is not None:
                    continue

                article_soup = HttpUtils.get(article_url)
                titles = HttpUtils.get_contents(article_soup,
                                                ".article-content p")

                title_cnt = int(len(titles) / 2)

                for title_index in range(0, title_cnt):
                    try:
                        title = titles[title_index * 2].split("】")[1]
                        image = titles[title_index * 2 + 1]

                        if type(image) != Tag:
                            continue

                        src = image.attrs["src"]
                        if src.endswith("jpg"):
                            continue

                        message = "好玩您就点个赞,不好笑请期待下一贴~\n"
                        message += "[img]{0}[/img]".format(src)

                        if Cache().get(title) is not None:
                            continue
                        Cache().set(title, message)

                        articles.append((title, message))

                        cnt += 1

                        if cnt >= max_cnt:
                            stop_flag = True
                            break
                    except:
                        pass

                if stop_flag:
                    break

                # only if all articles are included, then mark this url
                Cache().set(article_url, article_url)

        type_id_list = ["1629", "1631", "1633", "4481", "1641"]
        type_index = 0
        for (title, message) in articles:
            print((title, message))

            post_data = dict()
            post_data["posttime"] = str(int(time.time()))
            post_data["formhash"] = self.form_hash_mirror
            post_data["wysiwyg"] = "1"
            post_data["typeid"] = type_id_list[type_index]
            post_data["allownoticeauthor"] = "1"
            post_data["addfeed"] = "1"
            post_data["usesig"] = "1"
            post_data["save"] = ""
            post_data["uploadalbum"] = "-2"
            post_data["newalbum"] = "请输入相册名称"
            post_data["subject"] = title
            post_data["message"] = message

            post_result = HttpUtils.post(post_url,
                                         headers=self.site.login_headers,
                                         data=post_data,
                                         returnRaw=False)
            assert post_result is not None
            type_index = (type_index + 1) % len(type_id_list)
            time.sleep(int(random() * 300) + 2700)
Ejemplo n.º 23
0
 def _delete_cache(self, cache_key, cache_db=0):
     """ limpiar un cache específico
     cache_db=0 es el cache principal (CACHE_DATABASES)
     usado para actualizar luego de modificar recursos que requieren actualización rápida"""
     c = Cache(db=cache_db)
     c.delete(cache_key)
Ejemplo n.º 24
0
 def getBrightness():
     value = Cache().get("brightness")
     if value is None:
         value = 60
         Cache().set("brightness", value)
     return value
Ejemplo n.º 25
0
 def setBrightness(delta):
     value = int(Camera.getBrightness()) + int(delta)
     value = min(100, max(0, value))
     Cache().set("brightness", value)
Ejemplo n.º 26
0
 def is_enable_face_detect():
     return Cache().get("face_detect") == "1"
Ejemplo n.º 27
0
 def disable_face_detect():
     Cache().set("face_detect", "0")
Ejemplo n.º 28
0
class UploadCheck(AdultAlert):
    cache = Cache()

    is_store = True

    def parse(self, soup_obj):
        assert soup_obj is not None

        info_block = soup_obj.select(
            "#info_block table tr td:nth-of-type(1) span")[0]

        prev_info = ""
        upload = 0
        download = 0
        for info in info_block.contents:
            if "上傳量" in prev_info:
                upload = HttpUtils.pretty_format(info, "GB")
            elif "下載量" in prev_info:
                download = HttpUtils.pretty_format(info, "GB")
                break
            prev_info = str(info)

        return upload, download

    def filter(self, data):
        return data

    def action(self, data):
        prev_up = self.cache.get("mt_up")
        prev_down = self.cache.get("mt_down")

        if prev_up is None:
            prev_up = 0
        else:
            prev_up = float(prev_up.decode())
        if prev_down is None:
            prev_down = 0
        else:
            prev_down = float(prev_down.decode())

        delta_up = round(data[0] - prev_up, 2)
        delta_down = round(data[1] - prev_down, 2)
        if delta_down == 0:
            delta_ratio = "Inf"
        else:
            delta_ratio = round(delta_up / delta_down, 2)

        upload_target = Config.get("mteam_upload_target")
        current_upload = round(data[0] - data[1], 2)
        print(
            "%s, upload=%s, download=%s, current=%s, delta_up=%s, delta_down=%s, delta_ratio=%s, target=%s"
            %
            (str(time.strftime("%Y-%m-%d %H:%M:%S")), data[0], data[1],
             current_upload, delta_up, delta_down, delta_ratio, upload_target))

        if self.is_store:
            self.cache.set("mt_up", data[0])
            self.cache.set("mt_down", data[1])

        if upload_target < current_upload:
            for i in range(5):
                EmailSender.send(u"完成上传", Config.get("mteam_username"))
                time.sleep(10000)

    def init(self):
        self.cache.set("mt_up", 0)
        self.cache.set("mt_down", 0)

    def check_not_store(self):
        # backup current configuration
        is_store = self.is_store
        self.is_store = False
        self.check()
        self.is_store = is_store
Ejemplo n.º 29
0
class UserCrawl(NormalAlert):
    buffer = []
    errors = []

    id_bucket_name = "mteam_user_id"
    name_bucket_name = "mteam_user_name"
    msg_bucket_name = "mteam_msg"

    min_id = 1
    max_id = 200000
    scan_batch_size = 500
    skip_if_exist = False  # ignore cache and re-crawl all user

    cache = Cache()

    msg_subject = "分享率过低的账户会被警告并封禁,请注意(%s)"
    msg_body = "如需快速增加上传,消除警告,请微信联系 helloword1984(用户名是薛定谔的小仓鼠)\n\n注1:本人非网站工作人员\n注2:如果打扰到了您,表示抱歉,请pm回复'谢谢勿扰'"

    msg_urgent_subject = "注意!注意!注意!分享率低于0.3的账户可能随时会被封号!"
    msg_urgent_body = "如需快速增加上传,消除警告,请微信联系 helloword1984(用户名是薛定谔的小仓鼠)\n\n注1:本人非网站工作人员\n注2:如果打扰到了您,表示抱歉,请pm回复'谢谢勿扰'"

    def generate_site(self):
        self.site.home_page = "https://kp.m-team.cc/userdetails.php?id=%s"
        return self.site

    def crawl_single(self, user_id):

        if self.skip_if_exist and self.cache.hash_get(self.id_bucket_name,
                                                      user_id) is not None:
            print("Skip " + str(user_id))
            return

        try:
            url = self.site.home_page % str(user_id)
            soup_obj = HttpUtils.get(url,
                                     headers=self.site.login_headers,
                                     return_raw=False)
            assert soup_obj is not None

            user = User()
            user.id = user_id
            user.name = HttpUtils.get_content(soup_obj, "#outer h1 span b")

            if user.name is None:
                return

            user.is_warn = len(
                soup_obj.select("#outer h1 span img[alt='Leechwarned']")) > 0
            user.is_ban = len(
                soup_obj.select("#outer h1 span img[alt='Disabled']")) > 0
            if user.is_warn:
                user.warn_time = str(time.strftime("%Y-%m-%d %H:%M:%S"))

            try:
                if len(soup_obj.select("#outer table tr")) <= 5:
                    user.is_secret = True
                    # print("secret user: name={0} id={1}".format(user.name, str(user_id)))
                else:
                    tr_list = soup_obj.select("#outer table tr")
                    for tr in tr_list:
                        td_name = HttpUtils.get_content(
                            tr, "td:nth-of-type(1)")
                        if td_name == "加入日期":
                            user.create_time = HttpUtils.get_content(
                                tr, "td:nth-of-type(2)").replace(" (", "")
                        elif td_name == "最近動向":
                            user.last_time = HttpUtils.get_content(
                                tr, "td:nth-of-type(2)").replace(" (", "")
                        elif td_name == "傳送":
                            user.ratio = HttpUtils.get_content(
                                tr, "td:nth-of-type(2) table tr td font")
                            if user.ratio is None:
                                # seems that no download is made and ratio is infinite
                                user.ratio = -1
                                user.up = self.parse_size_in_gb(
                                    HttpUtils.get_content(
                                        tr,
                                        "td:nth-of-type(2) table tr:nth-of-type(1) td:nth-of-type(1)",
                                        1))
                                user.down = self.parse_size_in_gb(
                                    HttpUtils.get_content(
                                        tr,
                                        "td:nth-of-type(2) table tr:nth-of-type(1) td:nth-of-type(2)",
                                        2))
                            else:
                                user.ratio = user.ratio.replace(",", "")
                                user.up = self.parse_size_in_gb(
                                    HttpUtils.get_content(
                                        tr,
                                        "td:nth-of-type(2) table tr:nth-of-type(2) td:nth-of-type(1)",
                                        1))
                                user.down = self.parse_size_in_gb(
                                    HttpUtils.get_content(
                                        tr,
                                        "td:nth-of-type(2) table tr:nth-of-type(2) td:nth-of-type(2)",
                                        2))
                        elif td_name == "魔力值":
                            user.mp = HttpUtils.get_content(
                                tr, "td:nth-of-type(2)")

                    # parse rank
                    user.rank = "secret"
                    imgs = soup_obj.select(
                        "table.main table tr > td > img[title!='']")
                    for img in imgs:
                        if not img.has_attr("class"):
                            user.rank = img["title"]

                            if "Peasant" in user.rank:
                                user.warn_time = str(
                                    time.strftime("%Y-%m-%d %H:%M:%S"))
                                # print("###### find user="******" id=" + str(user_id) + " rank=" + user.rank)
            except Exception as e:
                print(str(user_id) + "\n" + str(e) + "\n")

            self.buffer.append(user)
        except Exception as e:
            print(">>>>> fail to parse " + str(user_id))
            self.errors.append(user_id)

    def parse_size_in_gb(self, size_str):
        assert size_str is not None
        return HttpUtils.pretty_format(size_str.replace(": ", ""), "GB")

    def store_cache(self, data):
        if data is None or len(data) == 0:
            return

        for user in data:
            res = self.cache.hash_get(self.id_bucket_name, user.id)
            if res is not None:
                exist_user = User.parse(res.decode())
                # warned before, do not update warn time
                if user.is_warn and exist_user.is_warn:
                    user.warn_time = exist_user.warn_time

            self.cache.hash_set(self.id_bucket_name, user.id, str(user))
            self.cache.hash_set(self.name_bucket_name, user.name, str(user))

        print("########### finish storing cache ###########")

    def crawl(self, ids=None):
        self.login_if_not()

        if ids is None:
            ids = range(self.min_id, self.max_id)
            self.skip_if_exist = True

        start = 0
        end = len(ids)
        step = self.scan_batch_size

        current = start
        while current < end:
            print(">>>>>>>>>>>> crawl {0} -> {1} >>>>>>>>>>>>>>>>".format(
                ids[current], ids[min(current + step, end - 1)]))
            ParallelTemplate(500).run(
                func=self.crawl_single,
                inputs=ids[current:min(current + step, end)])
            current += step + 1

            if len(self.errors) > 0:
                print(">>>>>>>>>>>>>>>>> retry >>>>>>>>>>>>>>>>>>>>>>")
                ParallelTemplate(100).run(func=self.crawl_single,
                                          inputs=self.errors)
                self.errors.clear()
                print(
                    ">>>>>>>>>>>>>>>>> retry finished >>>>>>>>>>>>>>>>>>>>>>")

            if len(self.buffer) > 300:
                self.store_cache(self.buffer)
                self.buffer.clear()

        # write all others left
        self.store_cache(self.buffer)
        self.buffer.clear()

    def refresh(self):
        ids = list(
            sorted(
                map(lambda x: int(x.decode()),
                    self.cache.hash_get_all_key(self.id_bucket_name))))
        print("max ID=" + str(ids[-1]))
        self.min_id = ids[-1] + 1
        self.max_id = self.min_id + 1000

        print("\n############## refresh user ##############\n")
        # refresh existing user
        self.crawl(ids)

        print("\n############## crawl new user ##############\n")
        # find new user
        self.crawl()

    def warn(self):
        self.refresh()
        warn_white_list = str(Config.get("mt_warn_white_list")).split("|")

        user_ids = self.cache.hash_get_all_key(self.id_bucket_name)
        now = datetime.now()
        for user_id in user_ids:
            user_str = self.cache.hash_get(self.id_bucket_name,
                                           user_id).decode()
            user = User.parse(user_str)
            if user.is_ban or not user.is_warn or "VIP" in user.rank or "職人" in user.rank:
                continue
            if user.is_secret or (0.5 > user.ratio > -1 or
                                  (0.9 > user.ratio
                                   and user.down - user.up > 50)):
                if user.create_time == "":
                    create_since = 999999
                else:
                    create_time = datetime.strptime(user.create_time,
                                                    "%Y-%m-%d %H:%M:%S")
                    create_since = (now - create_time).days
                warn_time = datetime.strptime(user.warn_time,
                                              "%Y-%m-%d %H:%M:%S")
                warn_since = (now - warn_time).days
                print("{0}|{1}|{2}".format(str(user), str(create_since),
                                           str(warn_since)))

                # new user and ratio lower than 0.3 will be baned any time
                if create_since < 30 and user.ratio < 0.3 and warn_since in [
                        0, 1
                ]:
                    self.send_msg(user.id, self.msg_urgent_subject,
                                  self.msg_urgent_body)
                    continue

                # skip user who has registered for less than 2 days
                if create_since < 2:
                    continue

                if user.name in warn_white_list:
                    continue

                if warn_since in [0, 3, 5]:
                    self.send_msg(user.id, self.msg_subject % (7 - warn_since),
                                  self.msg_body)

    def order(self, limit=250):
        user_ids = self.cache.hash_get_all_key(self.id_bucket_name)
        users = []
        for user_id in user_ids:
            user_str = self.cache.hash_get(self.id_bucket_name,
                                           user_id).decode()
            user = User.parse(user_str)
            if not user.is_secret and not user.is_ban and user.ratio >= 0 and user.down >= 10 and "VIP" not in user.rank and "職人" not in user.rank:
                users.append(user)

        users.sort(key=lambda x: x.ratio)
        for i in range(0, int(limit)):
            print(users[i])

    def load_by_id(self, user_id):
        res = self.cache.hash_get(self.id_bucket_name, user_id)
        if res is not None:
            print(res.decode())
        else:
            print("Cannot find user by ID: " + user_id)

    def load_by_name(self, user_name):
        res = self.cache.hash_get(self.name_bucket_name, user_name)
        if res is not None:
            print(res.decode())
        else:
            print("Cannot find user by name: " + user_name)

    def send_msg(self, user_id, subject, body):
        if self.cache.get(self.msg_bucket_name + str(user_id)) is not None:
            print("Skip sending msg, user in cache: " + str(user_id))
            return

        self.login_if_not()

        url = "https://kp.m-team.cc/takemessage.php"
        data = {
            "receiver": user_id,
            "subject": subject,
            "body": body,
            "save": "yes"
        }

        HttpUtils.post(url=url, data=data, headers=self.site.login_headers)
        print(">>>>>>>>> Send msg to {0}, subject={1}, body={2} >>>>>>>>>>".
              format(user_id, subject, body))

        self.cache.set_with_expire(self.msg_bucket_name + str(user_id), "",
                                   86400)

        # sleep 30 ~ 120 seconds before sending next message
        time.sleep(round(30 + random.random() * 90))
Ejemplo n.º 30
0
 def enable_face_detect():
     Cache().set("face_detect", "1")