Example #1
0
def get_once():
    topic_url = base_topic_url + str(362683)
    resp = session.get(topic_url, headers=headers)
    soup = BS(resp.content, 'html.parser')
    once = soup.find('input', attrs={"name": "once"}).get("value")
    # once 加入全局变量
    globlevalue.once = once
Example #2
0
 def focus(self, author):
     focus_url = home_page_url+"/follow/" + str(author.id) + "?once="+str(globlevalue.once)
     response = session.get(focus_url, headers=headers)
     if response.status_code == 200:
         print termcolor.colored(u"已关注 {name}.".format(name=author.name), "green")
     else:
         print termcolor.colored(u"关注失败.", "red")
Example #3
0
 def block(self, author):
     # 这个 t 参数是当前登录用户注册的时间浮点数
     block_url = home_page_url+"/block/" + str(author.id) + "?t="+str(globlevalue.time)
     response = session.get(block_url, headers=headers)
     if response.status_code == 200:
         print termcolor.colored(u"已屏蔽 {name}.".format(name=author.name), "green")
     else:
         print termcolor.colored(u"屏蔽失败.", "red")
Example #4
0
 def collect(self, topic):
     topic_url = base_topic_url + str(topic.id)
     resp = session.get(topic_url, headers=headers)
     soup = BS(resp.content, 'html.parser')
     div = soup.find('div', class_="topic_buttons")
     # 获取 “收藏” 的url
     collect_url_a = div.contents[1]
     tmp_url = collect_url_a['href']
     collect_url = home_page_url + tmp_url
     # # 获取 once 值
     # global once
     # once = soup.find('input', attrs={"name": "once"}).get("value")
     # # once 加入全局变量
     # globlevalue.once = once
     resp = session.get(collect_url, headers=headers)
     if resp.status_code == 200:
         print termcolor.colored(u"收藏话题成功.", "green")
     else:
         print termcolor.colored(u"收藏话题失败.", "red")
Example #5
0
def _crawl_pages(*, start_page: int, end_page: int, url: str):
    for page in tqdm(range(start_page, end_page + 1)):
        try:
            r = session.get(f"{url}&page={page}",
                            headers={'user-agent': ua.random})
            r.raise_for_status()
            yield _get_article_hits_from_query_response(r.json())
        except HTTPError:
            print(f"Exception at {page} of page {end_page}")

        sleep(REASONABLE_WAITING_TIME)
Example #6
0
def mission():
    content = session.get(mission_url, headers=headers).content
    soup = BS(content, "html.parser")
    # 获取领取金币的链接
    short_url = soup.find('input', attrs={
        'class': 'super normal button'
    }).get('onclick')
    start = short_url.find("'")
    end = short_url.find("'", start + 1)
    final_url = home_page_url + short_url[start + 1:end]
    page = session.get(final_url, headers=headers).content
    soup = BS(page, "html.parser")
    successful = soup.find('li', attrs={'class': 'fa fa-ok-sign'})
    if successful:
        Logging.success(u'领取金币成功!')
        # print termcolor.colored('领取金币成功!', 'green')
    else:
        Logging.error(u'领取金币失败!')
        # print termcolor.colored('领取金币失败!', 'red')


# if __name__ == '__main__':
#     flag, username = is_login()
#     if flag:
#         Logging.debug(u"你已经登录过咯")
#     else:
#         resp = login()
#         if resp.status_code == 200:
#             Logging.success(u'登录成功,正在领取金币...')
#             session.cookies.save(ignore_discard=True)
#             page = session.get(mission_url, headers=headers).content
#             soup = BS(page, "html.parser")
#             is_attain = soup.find('li', attrs={'class': 'fa fa-ok-sign'})
#             if is_attain:
#                 Logging.success(u'今日金币已领取!')
#             else:
#                 mission()
#         else:
#             Logging.error(u'登录失败!')
Example #7
0
def main():
    txt = get_logo()
    print termcolor.colored(txt, "cyan")
    if not os.path.exists("cookies"):
        flag, username = is_login()
        # 用户名放入全局变量
        globlevalue.username = username
        if flag:
            Logging.debug(u"你已经登录过咯")
        else:
            resp = login()
            if resp.status_code == 200:
                Logging.success(u'登录成功,正在领取金币...')
                session.cookies.save()
                get_once()
                page = session.get(mission_url, headers=headers).content
                soup = BS(page, "html.parser")
                is_attain = soup.find('li', attrs={'class': 'fa fa-ok-sign'})
                if is_attain:
                    Logging.success(u'今日金币已领取!')
                    work()
                else:
                    mission()
                    work()
            else:
                Logging.error(u'登录失败!')
    else:
        get_once()
        flag, username = is_login()
        # 用户名放入全局变量
        globlevalue.username = username
        page = session.get(mission_url, headers=headers).content
        soup = BS(page, "html.parser")
        is_attain = soup.find('li', attrs={'class': 'fa fa-ok-sign'})
        if is_attain:
            Logging.success(u'今日金币已领取!')
        else:
            mission()
        work()
Example #8
0
def is_login():
    try:
        index = session.get(home_page_url, headers=headers).content
    except:
        print termcolor.colored(u"网络故障,请检查您的网络设置", "yellow")
        sys.exit()
    soup = BS(index, 'html.parser')
    login_flag = soup.find('a', href='https://workspace.v2ex.com/')
    if login_flag:
        # print termcolor.colored("已经登录过啦", "magenta")
        user = re.findall(r'<a href="/member/.*?">(.*?)</a>', index)[0]
        return True, user
    else:
        print termcolor.colored(u"请您登录...", "magenta")
        return False, None
Example #9
0
def crawl_paginated_search_hits(url: str):
    r = session.get(url, headers={'user-agent': ua.random})
    r.raise_for_status()

    data = r.json()

    start_page, total_pages = _get_start_end_pages(data)

    paginated_query_hits = list(
        _crawl_pages(start_page=start_page, end_page=total_pages, url=url))
    flattened_query_hits = [
        items for page in paginated_query_hits for items in page
    ]

    _store_query_data(flattened_query_hits)
Example #10
0
 def author_info(self, topic):
     author_url = home_page_url + "/api/members/show.json?username=" + topic.author
     response = session.get(author_url)
     data = json.loads(response.content)
     user = User()
     user.id = data.get('id')
     user.name = data.get('username')
     user.website = data.get('website')
     user.twitter = data.get('twitter')
     user.github = data.get('github')
     user.location = data.get('location')
     user.tagline = data.get('tagline')
     user.bio = data.get('bio')
     user.time = format_time(data.get('created'))
     return user
Example #11
0
 def answer(self, topic):
     answer_list = []
     url = "https://www.v2ex.com/api/replies/show.json?topic_id=" + str(
         topic.id)
     response = session.get(url)
     json_data = json.loads(response.content)
     for data in json_data:
         answer = Answer()
         answer.id = data.get('id')
         answer.thanks = data.get('thanks')
         answer.content = filter_emoji(data.get('content'))
         answer.author = data.get('member').get('username')
         answer.time = format_time(data.get('created'))
         answer_list.append(answer)
     return answer_list, topic.replies
Example #12
0
def get_login_data():
    content = session.get(login_url, headers=headers).content
    soup = BS(content, "html.parser")
    once_value = soup.find('input', attrs={"name": "once"}).get('value')
    psw_param = soup.find('input', attrs={"type": "password"}).get('name')
    user_param = soup.find('input', attrs={
        "autofocus": "autofocus"
    }).get('name')
    hidden_param = '/'
    data = {
        user_param: username,
        psw_param: password,
        "next": hidden_param,
        "once": once_value
    }
    return data
Example #13
0
def _download_article(article: ArticlePreScraping):
    url = article.full_url
    r = session.get(url, headers={'user-agent': ua.random})

    try:
        r.raise_for_status()
    except HTTPError:
        return

    html = r.html

    data_path = BASE_ARTICLE_DATA_PATH / Path(article.id)

    if not data_path.exists():
        os.makedirs(data_path)

    with open(data_path / "source.html", "w+") as out_file:
        out_file.write(html.raw_html.decode("utf-8"))