Ejemplo n.º 1
0
def main(keyword_list: str):
    while True:
        print('Searching Wusuobuneng......')
        driver = webdriver.Chrome(share.FILE_PATH + '/chromedriver')
        driver.get('http://www.wusuobuneng.com/')
        timer.sleep(7)
        try:
            button_xpath = '//*[@id="root"]/div/div[3]/div/div[3]/ul/li[1]/div[2]/button'
            while True:
                button = driver.find_element_by_xpath(button_xpath)
                button.click()
                timer.sleep(5)
                l = driver.find_elements_by_class_name('article-item')
                last_time = l[-1].find_element_by_class_name(
                    'article-item-time-icon').text
                m, d = get_date(last_time)
                if not share.compare_date(None, m, d):
                    break
            timer.sleep(2)
            article_list = l
            process_articles(article_list, keyword_list)
            get_all_body(driver)
            if len(result_list) != 0:
                share.write_file(result_list)
            print('Finished')
        # except selenium.common.exceptions.NoSuchElementException:
        # 	main(keyword_list)
        except:
            pass
        else:
            break

        finally:
            driver.close()
def add_to_result(block, title, year, month, day, driver, keyword_list):
    if share.double_check(keyword_list, title) and share.compare_date(
            year, month, day) and not check_exist(title):
        b = block.find_element_by_class_name('article-wrapper')
        link = b.find_element_by_tag_name('a').get_attribute('href')
        # body = find_body(link,driver)
        result_list.append(share.Page_info(link, title, None))
    return
def list_article(block, keyword_list, driver):
    title = block.find_element_by_tag_name('h2').text
    date = block.find_element_by_tag_name('time').text.split('.')
    y, m, d = date[2], date[0], date[1]
    # print(title,date)
    if share.double_check(keyword_list, title) and share.compare_date(
            y, m, d) and not check_exist(title):
        link = block.find_element_by_tag_name('a').get_attribute('href')
        # body = find_body(link,driver)
        result_list.append(share.Page_info(link, title, None))
Ejemplo n.º 4
0
def compare_date(text: str) -> bool:
    '''Return True if date is within 14 days, else False'''
    text_list = text.split()
    date = text_list[-1]
    if '昨天' in date or '前天' in date or '分钟' in date or '刚刚' in date or '小时' in date or '今天' in date:
        return True
    else:
        date_list = date.split('-')
        y, m, d = date_list[0], date_list[1], date_list[2]
        return share.compare_date(y, m, d)
def same_h(block, keyword_list, driver):
    title = block.find_element_by_tag_name('h2').text
    date = block.find_element_by_tag_name('time').text
    if len(date.split('.')) == 1:
        date = date.split()
        y, m, d = date[2], date[0], date[1][:-1]
    else:
        date = date.split('.')
        y, m, d = date[2], date[0], date[1]
    if share.double_check(keyword_list, title) and share.compare_date(
            y, m, d) and not check_exist(title):
        b = block.find_element_by_tag_name('h2')
        link = b.find_element_by_tag_name('a').get_attribute('href')
        # body = find_body(link,driver)
        result_list.append(share.Page_info(link, title, None))
def squared_label(block, keyword_list, driver):
    text_list = [
        text.strip() for text in block.text.split('\n') if text.strip() != ''
    ]
    title = text_list[1]
    date = text_list[-1].split('|')[-1].strip().split()
    y, m, d = date[2], date[0], date[1][:-1]
    if share.double_check(keyword_list, title) and share.compare_date(
            y, m, d) and not check_exist(title):
        b = block.find_element_by_class_name('article-wrapper')
        link = b.find_element_by_tag_name('a').get_attribute('href')
        xpath = '//*[@id="wrapper"]/div[1]/div/div/section/div[2]/div/p'
        driver.get(link)
        para = driver.find_element_by_xpath(xpath)
        body = para.text
        result_list.append(share.Page_info(link, title, body))
        driver.back()
def main(keyword_list: list):
    print('Searching Cleantechnica......')
    result_list = []
    try:
        for l in link_list:
            driver = webdriver.Chrome(share.FILE_PATH + '/chromedriver')
            # print(l)
            driver.get(l)
            time.sleep(.1)
            driver.refresh()
            main_part = driver.find_elements_by_class_name('omc-blog-one')
            for article in main_part:
                date = article.find_element_by_class_name(
                    'omc-date-time-one').text.split('|')[0][13:-1].split()
                y, m, d = date[-1], date[0], date[1][:-3]
                title = article.find_element_by_tag_name('h2').text
                link = article.find_elements_by_tag_name('a')[1].get_attribute(
                    'href')
                if share.double_check(keyword_list,
                                      title) and share.compare_date(y, m, d):
                    result_list.append(share.Page_info(link, title, None))
            driver.close()
        driver = webdriver.Chrome(share.FILE_PATH + '/chromedriver')
        for page in result_list:
            driver.get(page.link)
            time.sleep(1)
            page.body = driver.find_element_by_xpath(
                '//*[@id="omc-full-article"]/p[2]').text
            if len(page.body) <= 40:
                page.body = driver.find_element_by_xpath(
                    '//*[@id="omc-full-article"]/p[3]').text
                if len(page.body) <= 40:
                    page.body = driver.find_element_by_xpath(
                        '//*[@id="omc-full-article"]/p[4]').text
        if len(result_list) != 0:
            share.write_file(result_list)
        print('Finished')
    finally:
        try:
            driver.close()
        except selenium.common.exceptions.WebDriverException:
            pass
Ejemplo n.º 8
0
def main(keyword_list: list):
    print("Searching Azocleantech......")
    driver = webdriver.Chrome(share.FILE_PATH + '/chromedriver')
    driver.get('https://www.azocleantech.com/news-index.aspx')
    try:
        main_part = driver.find_element_by_xpath(
            '//*[@id="ctl00_cphBody_latestNewsItems_posts"]')
        full_list = main_part.find_elements_by_class_name('row')
        striped = [a.text.strip() for a in full_list]
        length = int(len(striped) / 2)
        for i in range(length):
            title_and_despt = striped[2 * i].split('\n')
            title = title_and_despt[0].strip()
            date = striped[2 * i + 1].split('\n')[-1].strip().split()
            y, m, d = date[2], date[1], date[0]
            if share.compare_date(y, m, d) and share.double_check(
                    keyword_list, title):
                link, body = get_link(i, driver)
                result_list.append(share.Page_info(link, title, body))
        if len(result_list) != 0:
            share.write_file(result_list)
        print("Finished")
    finally:
        driver.close()
Ejemplo n.º 9
0
def process(date: str) -> bool:
    real_date = date[:date.find('20') + 4].split()
    y, m, d = real_date[-1], real_date[0], real_date[1][:-1]
    return share.compare_date(y, m, d)