Ejemplo n.º 1
0
    def handle(self, *args, **options):
        if options['group'] != 1 and options['group'] != 2 and options[
                'group'] is not None:
            otapick.print_console('groupID {} is not supported.'.format(
                options['group']))
            quit()

        member_image_crawler = otapick.MemberImageCrawler()
        member_image_crawler_ex = otapick.MemberImageCrawlerEx()
        member_image_downloader = otapick.MemberImageDownloader()
        member_image_downloader_ex = otapick.MemberImageDownloaderEx()

        members = Member.objects.filter(temporary=False, is_other=False)
        members = members if options['group'] is None else members.filter(
            belonging_group__group_id=options['group'])
        for member in members:
            if not member.image or (options['force'] and
                                    not member.graduate) or options['fforce']:
                if not member.graduate:
                    image_url = member_image_crawler.crawl(
                        group_key=member.belonging_group.key, ct=member.ct)
                    media = member_image_downloader.download(
                        image_url, member.belonging_group.group_id, member.ct)
                else:
                    image_url = member_image_crawler_ex.crawl(
                        member.full_kanji)
                    media = member_image_downloader_ex.download(
                        image_url, member.belonging_group.group_id, member.ct)
                if media is not None:
                    member.image = media
                    member.save()
                    otapick.print_console("{}'s image is saved!!".format(
                        member.full_kanji))
Ejemplo n.º 2
0
def get_article_tag(group_key, soup):
    if group_key == 'keyaki' or group_key == 'sakura':
        article_tag = soup.find('div', class_='box-article')
    elif group_key == 'hinata':
        article_tag = soup.find('div', class_='c-blog-article__text')
    ### Edit ###
    else:
        return
    if article_tag is None:
        otapick.print_console('article_tag not found')
    return article_tag
Ejemplo n.º 3
0
def get_blog_tag(group_key, soup):
    if group_key == 'keyaki':
        blog_tag = soup.select_one('article')
    elif group_key == 'hinata':
        blog_tag = soup.select_one('div.p-blog-article')
    elif group_key == 'sakura':
        blog_tag = soup.select_one('article.post')
    ### Edit ###
    else:
        return
    if blog_tag is None:
        otapick.print_console('blog_tag not found')
    return blog_tag
Ejemplo n.º 4
0
def get_blog_title_tag(group_key, blog_tag):
    if group_key == 'keyaki':
        title_tag = blog_tag.select_one('.box-ttl a')
    elif group_key == 'hinata':
        title_tag = blog_tag.select_one('div.c-blog-article__title')
    elif group_key == 'sakura':
        title_tag = blog_tag.select_one('h1.title')
    ### Edit ###
    else:
        return
    if title_tag is None:
        otapick.print_console('title_tag not found')
    return title_tag
Ejemplo n.º 5
0
def get_blog_tags(group_key, soup):
    if group_key == 'keyaki':
        blog_tags = soup.select('article')
    elif group_key == 'hinata':
        blog_tags = soup.select('div.p-blog-article')
    elif group_key == 'sakura':
        blog_tags = soup.select('ul.com-blog-part > li.box')
    ### Edit ###
    else:
        return
    if blog_tags is None:
        otapick.print_console('blog_tags not found')
    return blog_tags
Ejemplo n.º 6
0
def get_blog_writer_name(group_key, blog_tag):
    if group_key == 'keyaki':
        writer_name = blog_tag.select_one('div.box-ttl > p.name').text
    elif group_key == 'hinata':
        writer_name = blog_tag.select_one(
            'div.p-blog-article__info > div.c-blog-article__name').text
    elif group_key == 'sakura':
        writer_name = blog_tag.select_one('div.blog-foot p.name').text
    ### Edit ###
    else:
        return
    if writer_name is None:
        otapick.print_console('writer_name not found')
    return writer_name
Ejemplo n.º 7
0
 def handle(self, *args, **options):
     if options['reverse']:
         otapick.shift_score(blogs=Blog.objects.all(), order=False)
         otapick.shift_score(images=Image.objects.all(), order=False)
     else:
         start = time.time()
         otapick.shift_score(blogs=Blog.objects.all(), order=True)
         otapick.print_console('finished shift_per_day blogs!!: {}s'.format(
             round(time.time() - start, 2)))
         start = time.time()
         otapick.shift_score(images=Image.objects.all(), order=True)
         otapick.print_console(
             'finished shift_per_day images!!: {}s'.format(
                 round(time.time() - start, 2)))
Ejemplo n.º 8
0
    def handle(self, *args, **options):
        if options['recommend']:
            high_score_members, divided_blogs, divided_images = otapick.init_calc_recommend_score(
            )

            start = time.time()
            otapick.calc_recommend_score(high_score_members,
                                         divided_blogs=divided_blogs)
            otapick.print_console(
                'finished calc_recommend_score blogs!!: {}s'.format(
                    round(time.time() - start, 2)))

            start = time.time()
            otapick.calc_recommend_score(high_score_members,
                                         divided_images=divided_images)
            otapick.print_console(
                'finished calc_recommend_score images!!: {}s'.format(
                    round(time.time() - start, 2)))
        else:
            start = time.time()
            otapick.calc_score(blogs=Blog.objects.all())
            otapick.print_console('finished calc_score blogs!!: {}s'.format(
                round(time.time() - start, 2)))
            start = time.time()
            otapick.calc_score(images=Image.objects.all())
            otapick.print_console('finished calc_score images!!: {}s'.format(
                round(time.time() - start, 2)))

            # tweet popularity top 3 images
            if options['tweet']:
                for group in Group.objects.filter(is_active=True):
                    otapick.PopularityBot().tweet(group_id=group.group_id)
Ejemplo n.º 9
0
def get_blog_url(group_key, blog_tag):
    if group_key == 'keyaki':
        bottomul_tag = blog_tag.select_one('div.box-bottom ul')
        bottomli_tags = bottomul_tag.select('li')
        blog_url = bottomli_tags[1].find('a').get('href')
    elif group_key == 'hinata':
        blog_url = blog_tag.select_one('div.p-button__blog_detail').find(
            'a').get('href')
    elif group_key == 'sakura':
        blog_url = blog_tag.find('a').get('href')
    ### Edit ###
    else:
        return
    if blog_url is None:
        otapick.print_console('blog_url not found')
    return blog_url
Ejemplo n.º 10
0
def get_blog_postdate_tag(group_key, blog_tag):
    if group_key == 'keyaki':
        bottomul_tag = blog_tag.select_one('div.box-bottom ul')
        bottomli_tags = bottomul_tag.select('li')
        postdate_tag = bottomli_tags[0]
    elif group_key == 'hinata':
        postdate_tag = blog_tag.select_one(
            'div.p-blog-article__info > div.c-blog-article__date')
    elif group_key == 'sakura':
        postdate_tag = blog_tag.select_one('div.blog-foot p.date.wf-a')
    ### Edit ###
    else:
        return
    if postdate_tag is None:
        otapick.print_console('postdate_tag not found')
    return postdate_tag
Ejemplo n.º 11
0
def get_member_image_tag(group_key, soup):
    if group_key == 'keyaki':
        image_tag = soup.find('div', class_='box-profile_img')
    elif group_key == 'hinata':
        image_tag = soup.find('div', class_='c-member__thumb')
    elif group_key == 'sakura':
        image_tag = soup.find('p', class_='ph')
    ### Edit ###
    else:
        return
    if image_tag is None:
        otapick.print_console(
            'image_tag_wrapper not found. 指定したctのメンバーのプロフィールページが存在しない場合がございます。'
        )
    image_tag = image_tag.find('img')
    if image_tag is None:
        otapick.print_console('image_tag not found')
    return image_tag
Ejemplo n.º 12
0
def exe_unregistration(blog, group_id, group_key):
    sleep_time_unregister = 1
    blog_info = otapick.BlogDetailCrawler().crawl(
        group_key=group_key, blog_ct=blog.blog_ct)

    if blog_info is None:
        return
    elif blog_info == 404:
        otapick.print_console(str(blog.blog_ct) + "/" + str(group_id) +
                              ' blog is not found in official blog. unregister this.')
        blog.delete()
    else:
        otapick.print_console(str(blog.blog_ct) + "/" + str(group_id) +
                              ' blog is found in official blog. leave this. and execute get_blogs -p 5 -a -t')
        # ページ中間位置に未取得のブログがあった場合、延々ここが呼ばれてしまうため、対処↓
        otapick.register_blogs(
            group_id=group_id, up_limit=5, all_check=True, tweet=True)

    time.sleep(sleep_time_unregister)
Ejemplo n.º 13
0
def unregister(correct_cts_list, group, unregister_num):
    group_id = group.group_id
    paginate_by = group.blog_list_paginate_by
    blog_crawler = otapick.BlogListCrawler()
    groups = Group.objects.filter(group_id=group_id)
    if not groups.exists():
        return
    group_key = groups.first().key
    for page in range(unregister_num):
        # unregister_numを2以上で設定していた時、1ページ目で登録処理が終了した場合など
        if len(correct_cts_list) <= page and len(correct_cts_list) < unregister_num:
            blogs_data = blog_crawler.crawl(group_key, page)
            correct_cts_list.append([blog_info['blog_ct']
                                     for blog_info in blogs_data])
        for blog in Blog.objects.filter(publishing_group__group_id=group_id, writer__graduate=False).order_by('-post_date', 'order_for_simul')[paginate_by*page:paginate_by*(page+1)]:
            if not blog.blog_ct in correct_cts_list[page]:
                otapick.print_console(str(blog.blog_ct) + "/" + str(group_id) +
                                      ' blog is not found in official blog on page ' + str(page) + '.')
                otapick.print_console('Investigate in detail...')
                exe_unregistration(blog, group_id, group_key)
Ejemplo n.º 14
0
    def handle(self, *args, **options):
        threshold = 10  # 10より小さいときのみ更新
        blog_num_of_views_range = (10, 50)
        image_num_of_views_range = (10, 50)
        image_num_of_downloads_range = (10, 30)

        if not options['date']:
            otapick.print_console('date option is required.')
            return

        try:
            fromDate = datetime.strptime(options['date'], '%Y/%m/%d')
        except:
            otapick.print_console('date format is wrong.')
            return

        today = timezone.now()

        blogs = Blog.objects.filter(post_date__range=(fromDate, today))
        images = Image.objects.filter(publisher__post_date__range=(fromDate,
                                                                   today))

        for blog in blogs:
            if not blog.num_of_views > threshold:
                # num_of_views
                fake_blog_num_of_views = random.randint(
                    *blog_num_of_views_range)
                otapick.increment_num_of_views(blog=blog,
                                               num=fake_blog_num_of_views)

            otapick.print_console(f'add fake score「{blog.title}」!!')

        for image in images:
            if not image.num_of_views > threshold:
                # num_of_views
                fake_image_num_of_views = random.randint(
                    *image_num_of_views_range)
                otapick.increment_num_of_views(image=image,
                                               num=fake_image_num_of_views)

            if not image.num_of_downloads > threshold:
                # num_of_downloads
                fake_image_num_of_downloads = random.randint(
                    *image_num_of_downloads_range)
                otapick.increment_num_of_downloads(
                    image, image.publisher, num=fake_image_num_of_downloads)

            otapick.print_console(
                f'add fake score「{image.publisher}」({image.order}) !!')
Ejemplo n.º 15
0
 def handle(self, *args, **options):
     # 1
     blog_detail_crawler = otapick.BlogDetailCrawler()
     for temporary_member_info in self.independent_temporary_cts:
         if Member.objects.filter(belonging_group__group_id=temporary_member_info[0], ct=temporary_member_info[1]):
             temporary_member = Member.objects.get(belonging_group__group_id=temporary_member_info[0], ct=temporary_member_info[1])
             for blog in Blog.objects.filter(writer=temporary_member):
                 blog_info = blog_detail_crawler.crawl(group_key=temporary_member.belonging_group.key, blog_ct=blog.blog_ct)
                 if blog_info is not None:
                     blog.writer = blog_info['member']
                     blog.save()
                     otapick.print_console('「{}」 changed writer {}'.format(blog.title, blog_info['member']))
                 else:
                     otapick.print_console('blog crawl failed...')
             else:
                 if not Blog.objects.filter(writer=temporary_member).exists():
                     otapick.print_console('success writer({}) migration work!!'.format(temporary_member.full_kanji))
                     otapick.print_console('delete {}!!'.format(temporary_member.full_kanji))
                     temporary_member.delete()
Ejemplo n.º 16
0
def exe_registration(blog_info_list, post_date, group_id, all_check, tweet, console):
    """
    ブログの登録処理
    Args:
        blog_info_list (list): ブログ情報のリスト。前提としてリストの中のブログは同じpost_dateを持つ。
        post_date (date): 共通のpost_date
        group_id (int): グループID
        all_check (bool): 保存済みのブログを見つけても処理を実行
        tweet (bool): 更新通知をtweetするか否か
        console (bool): ログ出力するか否か
    Returns:
        True(登録処理終了), False(登録処理続行)
    """
    download_count = 0
    blog_objects = []
    image_objects = []

    for i, blog_info in enumerate(blog_info_list):
        # new blog
        if not Blog.objects.filter(blog_ct=blog_info['blog_ct'], publishing_group__group_id=group_id).exists():
            blog = Blog(
                blog_ct=blog_info['blog_ct'],
                title=blog_info['title'],
                post_date=post_date,
                order_for_simul=i,
                writer=blog_info['member'],
                publishing_group=Group.objects.filter(
                    group_id=group_id).first(),
            )
            blog_objects.append(blog)
            download_count += 1
        # already saved
        else:
            blog = Blog.objects.get(
                blog_ct=blog_info['blog_ct'], publishing_group__group_id=group_id)

        if len(blog_info['image_urls']) > 0:
            order = 0
            for image_url in blog_info['image_urls']:
                if not Image.objects.filter(publisher=blog).exists():
                    media = otapick.BlogImageDownloader().download(
                        image_url, group_id, blog.blog_ct, blog.writer.ct)
                    if media == 'not_image':  # exclude gif
                        pass
                    elif media is not None:
                        image = Image(
                            order=order,
                            picture=media,
                            publisher=blog,
                        )

                        # set width & height
                        w, h = otapick.get_image_w_h(image)
                        image.width = w
                        image.height = h

                        image_objects.append(image)
                        order += 1
                    else:
                        import traceback
                        traceback.print_exc()

        # change the order_for_simul of already saved blog with the same post_date
        if Blog.objects.filter(post_date=post_date).exists():
            for saved_simultime_blog in Blog.objects.filter(post_date=post_date):
                saved_simultime_blog.order_for_simul += download_count
                saved_simultime_blog.save()

    # save new blog
    for blog_object in blog_objects:
        blog_object.save()
        if console:
            otapick.print_console(
                'register 「' + blog_object.title + '」 written by ' + blog_object.writer.full_kanji)

    # save new image
    for image_object in image_objects:
        image_object.save()
        otapick.compress_blog_image(image_object)

    # tweet update info
    if tweet:
        updateBot = otapick.UpdateBot()
        for blog_object in blog_objects:
            updateBot.tweet(
                group_id=blog_object.publishing_group.group_id, blog_ct=blog_object.blog_ct)

    # When there is at least one already saved blog in blog_list and all_check is False
    if download_count != len(blog_info_list) and not all_check:
        return True

    # When all blog in blog_list are new or when all_check is True
    else:
        return False
Ejemplo n.º 17
0
def register_blogs(group_id, up_limit=100, all_check=False, unregister_num=1, tweet=False):
    """
    グループごとにブログの登録を行う。以下、処理の流れ。
    最新から次々チェックしていき、保存済みのブログに当たったら終了。また、一番最後まで行ったら終了。これまでを仮にプロセスと呼ぶ。
    プロセス間はをリスト(simultime_blogs)を所持して毎ブログ、リストにappendしていく。次のブログが先ほどのブログと違った時間なら解放(1)。同時刻ならappendする。(1)の解放のタイミングでリストの中のblogを登録する。
    この時、保存済みのblogに同時刻があるか調べる。
    1. なかったら
        添字をそのままorder_for_simulに書き込み、全てのblogを登録。
    2. あったら
        添字をそのままorder_for_simulに書き込み、全てのblogを登録。加えて、リストのサイズ分だけ保存済みのblogのorder_for_simulに足す。

    Args:
        group_id (int): グループID
        up_limit (int): 最大ページ(default: 100)
        all_check (bool): 保存済みのブログを見つけても処理を実行(default: False)
        unregister_num (int): 何ページ分、登録解除処理をするか(default: 1)
        tweet (bool): 更新通知をtweetするか否か
    Returns:
        True(success), None(failed)
    """
    sleep_time_pagetransition = 3
    simultime_blogs = []
    simultime_post_date = ""
    correct_cts_list = []  # [[234, 3422, ...], [214, 423, ...]]

    blog_crawler = otapick.BlogListCrawler()
    groups = Group.objects.filter(group_id=group_id)
    group = groups.first()
    group_key = group.key
    for page, is_last in otapick.lastone(range(up_limit)):
        blogs_data = blog_crawler.crawl(group_key, page)
        if blogs_data is None:
            return
        elif len(blogs_data) == 0:
            otapick.print_console("register unacquired blog...")
            exe_registration(simultime_blogs, simultime_post_date,
                             group_id, all_check, tweet, console=True)
            otapick.print_console("finished!!")
            break
        if len(correct_cts_list) < unregister_num:
            correct_cts = [blog_info['blog_ct'] for blog_info in blogs_data]
            correct_cts_list.append(correct_cts)
        for blog_info in blogs_data:
            # first time
            if not simultime_blogs and not simultime_post_date:
                simultime_blogs.append(blog_info)
                simultime_post_date = blog_info['post_date']
            # When the post_date is same time as previous one,
            elif simultime_post_date == blog_info['post_date']:
                simultime_blogs.append(blog_info)
            # When the post_date isn't same time as previous one,
            else:
                finished = exe_registration(
                    simultime_blogs, simultime_post_date, group_id, all_check, tweet, console=True)

                if finished:
                    unregister(correct_cts_list, group, unregister_num)
                    break
                simultime_blogs = [blog_info]
                simultime_post_date = blog_info['post_date']
        else:
            if is_last:
                exe_registration(simultime_blogs, simultime_post_date,
                                 group_id, all_check, tweet, console=True)
                break
            time.sleep(sleep_time_pagetransition)
            otapick.print_console('go next page.')
            continue
        break
    return True
Ejemplo n.º 18
0
def get_member_image_tag_ex(soup):
    image_tag = soup.find('td', class_='infobox-image-wrapper')
    image_tag = image_tag.find('img')
    if image_tag is None:
        otapick.print_console('image_tag not found')
    return image_tag