Esempio n. 1
0
def page_source_get(url):
    try:
        rep_data = requests.get(url).text
        if url.split('.')[-1] == 'json':
            return json.loads(rep_data)
        else:
            return rep_data

    except requests.exceptions.RequestException:
        helper.logger_getter().error('Network connection error')
        exit(1)
Esempio n. 2
0
 def source_get_by_phantomjs():
     dcap = dict(DesiredCapabilities.PHANTOMJS)
     dcap["phantomjs.page.settings.userAgent"] = headers['User-Agent']
     dcap["phantomjs.page.settings.loadImages"] = False
     driver = webdriver.PhantomJS(executable_path=helper.CURR_PATH +
                                  '/core/phantomjs-2.1.1',
                                  desired_capabilities=dcap)
     helper.logger_getter().debug('The phantomjs is running...')
     try:
         driver.implicitly_wait(5)
         driver.set_page_load_timeout(10)
         # 设置10秒脚本超时时间
         driver.set_script_timeout(10)
         driver.get(url)
         source = driver.page_source
         driver.quit()
         return source
     except Exception as e:
         helper.logger_getter().error(str(e))
         driver.quit()
         exit(1)
Esempio n. 3
0
def check_new():
    # if no txt file existing, first init
    if not os.path.isfile(helper.TEMP_DIR + '/v2ex_id_data.txt'):
        id_persistence()
        helper.logger_getter().info('First init to store id data,exit!')
        exit(0)

    # read previous v2ex_id_data.txt and compare
    with open(helper.TEMP_DIR + '/v2ex_id_data.txt') as f:
        old_id_list = [_.rstrip() for _ in f.readlines()]
    new_id_list = [str(_['id']) for _ in html.page_source_get(hot_url) if str(_['id']) not in old_id_list]

    # if new_id_list is not 0 which means new hot post occurs
    if len(new_id_list) != 0:
        id_persistence()
        mail_body = []
        for new_id in new_id_list:
            for data_collection in html.page_source_get(hot_url):
                if new_id == str(data_collection['id']):
                    mail_body.append(data_collection['title'] + ': ' +
                                     data_collection['url'])
        helper.mail_send(helper.date_getter() + '  V2exHot Update!', '\n\n'.join(mail_body))
        helper.logger_getter().info('V2ex has new hot posts.')
    else:
        helper.logger_getter().info('V2ex has no new hot post.')
Esempio n. 4
0
def check_new(option):
    if not os.path.isfile(temp_file):
        helper.logger_getter().info(
            "First init to store the url of all posts!")
        data_persistence()
        exit(0)

    with open(temp_file) as f:
        previous_posts = [i.split('\n')[0] for i in f.readlines()]
    # inside the list of new_posts, there still are the aTag object
    new_posts = [
        i for i in aTags_list
        if i.get('href') + '|' + i.get_text() not in previous_posts
    ]

    if len(new_posts) == 0:
        helper.logger_getter().info('Yin did not publish any blog yet!')

    else:
        for i in new_posts:
            msg_content = 'Yin published a new blog'
            blog_url = yinwang_blog + i.get('href')
            blog_title = i.get_text()
            helper.logger_getter().info(msg_content)
            helper.mail_send(
                helper.date_getter() + '  ' + msg_content + ':' + blog_title,
                blog_url)
            # helper.dir_check(helper.CURR_PATH + '/yinblog_back')
            # html.make_screenshot(blog_url, helper.CURR_PATH + '/yinblog_back/' + blog_title + '.png')
        data_persistence()
Esempio n. 5
0
def check_new(option):
    if not os.path.isfile(helper.TEMP_DIR + '/yinBlog_1stURL.txt'):
        firstURL_persistence()
        helper.logger_getter().info(
            "First init to store the url of the first post!")
        exit(0)
    with open(helper.TEMP_DIR + '/yinBlog_1stURL.txt') as f:
        # if new first url doesn't equal to the record one, upgrade it first!
        if first_aTag.get('href') != f.readline():
            helper.logger_getter().info('Yinwang published a new blog!')
            helper.logger_getter().info('Renew the first url in the file')
            firstURL_persistence()
            blog_url = 'http://www.yinwang.org' + first_aTag.get('href')
            blog_title = first_aTag.get_text().strip()
            helper.mail_send('垠神发表了新Blog: ' + blog_title, blog_url)

            # begin making screenshot
            helper.dir_check('yinBlogBak')
            html.make_screenshot(
                blog_url,
                helper.CURR_PATH + '/yinBlogBak/' + blog_title + '.png')

            # decide whether push the screenshot to github repo or not
            if option:
                os.system('git add .')
                os.system("git commit -m 'backup yinwang blog'")
                os.system('git push origin master')

        else:
            helper.logger_getter.info('Yin did not publish any blog yet!')
Esempio n. 6
0
 def main(self):
     misc = Misc()
     logger = helper.logger_getter()
     first_recent_comment = self.get_query_obj.find()[0]
     comment_content = first_recent_comment.get('comment')
     comment_id = first_recent_comment.get('objectId')
     if not helper.file_check('tmp/obj_id'):
         misc.data_persistence(comment_id)
         logger.debug('First run and persistent the object id...')
     else:
         last_comment_id = open('tmp/obj_id')
         with open('tmp/obj_id') as f:
             last_comment_text = f.read().strip()
             if last_comment_text != comment_id:
                 helper.mail_send(subject='你的博客有了一个新评论!',
                                  mail_body=comment_content)
                 # 有了新的评论,要及时把新的id持久化
                 misc.data_persistence(comment_id)
                 logger.debug('Successfully sent the e-mail.')
             else:
                 logger.debug('There has no new comment for now.')
Esempio n. 7
0
def page_source_get(url, pagetype=None):
    headers = {'User-Agent': generate_user_agent(os='win')}

    def source_get_by_phantomjs():
        dcap = dict(DesiredCapabilities.PHANTOMJS)
        dcap["phantomjs.page.settings.userAgent"] = headers['User-Agent']
        dcap["phantomjs.page.settings.loadImages"] = False
        driver = webdriver.PhantomJS(executable_path=helper.CURR_PATH +
                                     '/core/phantomjs-2.1.1',
                                     desired_capabilities=dcap)
        helper.logger_getter().debug('The phantomjs is running...')
        try:
            driver.implicitly_wait(5)
            driver.set_page_load_timeout(10)
            # 设置10秒脚本超时时间
            driver.set_script_timeout(10)
            driver.get(url)
            source = driver.page_source
            driver.quit()
            return source
        except Exception as e:
            helper.logger_getter().error(str(e))
            driver.quit()
            exit(1)

    if pagetype is not None:
        return source_get_by_phantomjs()
    else:
        for _ in range(3):
            try:
                seconds = random.choice([i / 10 for i in range(35, 82)])
                rep_data = requests.get(url, headers=headers).text
                if url.split('.')[-1] == 'json':
                    return json.loads(rep_data)
                else:
                    return rep_data
            except requests.exceptions.RequestException as e:
                helper.logger_getter().debug('Exception: ' + str(e))
                helper.logger_getter().debug(url)
                helper.logger_getter().debug('Sleep for ' + str(seconds) +
                                             's after timeout...')
                time.sleep(seconds)
Esempio n. 8
0
def check_new():
    with open(temp_data) as f:
        previous_data_format = [i.split('\n')[0] for i in f.readlines()]

    new_notification = [
        i.text for i in timestamps_list if i.text not in previous_data_format
    ]

    for i in new_notification:
        for j in entries_list:
            if i == j.find('published').text and j.find('title').text == '':
                new_notification.remove(i)

    if len(new_notification) == 0:
        helper.logger_getter().info('V2ex has no notification 4 you!')

    else:
        current_time = time.strftime("%m-%d|%H:%M", time.localtime())
        msg_content = 'V2ex has a new notification for you.'
        helper.mail_send(current_time + '  ' + msg_content, msg_content)
        helper.logger_getter().info(msg_content)
        data_persistence()
        helper.logger_getter().info('Renew the data file')
Esempio n. 9
0
        previous_data_format = [i.split('\n')[0] for i in f.readlines()]

    new_notification = [
        i.text for i in timestamps_list if i.text not in previous_data_format
    ]

    for i in new_notification:
        for j in entries_list:
            if i == j.find('published').text and j.find('title').text == '':
                new_notification.remove(i)

    if len(new_notification) == 0:
        helper.logger_getter().info('V2ex has no notification 4 you!')

    else:
        current_time = time.strftime("%m-%d|%H:%M", time.localtime())
        msg_content = 'V2ex has a new notification for you.'
        helper.mail_send(current_time + '  ' + msg_content, msg_content)
        helper.logger_getter().info(msg_content)
        data_persistence()
        helper.logger_getter().info('Renew the data file')


if __name__ == '__main__':
    if not os.path.isfile(temp_data):
        data_persistence()
        helper.logger_getter().info(
            "First init to store some temp data from v2ex!")
        exit(0)
    check_new()