コード例 #1
0
def crawl(coin):
    page = news_pages[search_keyword.index(coin)]
    news = GoogleNews(lang='ko', encode='utf-8')
    news.search(coin)
    time.sleep(30)
    news.getpage(page)
    title = news.get_texts()
    url = news.get_links()
    desc = news.get_desc()
    for t, u, d in zip(title, url, desc):
        # print(d)
        idx = coin_index[search_keyword.index(coin)]
        if t != "" and u != "" and d != "":
            dic = {
                u"title": u'{}'.format(t),
                u"desc": u'{}'.format(d),
                u"link": u'{}'.format(u)
            }
            if coin_list[search_keyword.index(coin)] in t or coin_eng[
                    search_keyword.index(coin)] in t:
                if idx == 0:
                    ref = db.collection(u'{}'.format(
                        coin_eng[search_keyword.index(coin)]))
                    ref.add(dic)
                    time.sleep(random.uniform(2, 4))
                    coin_index[search_keyword.index(coin)] += 1
                else:
                    flag = True
                    ref = db.collection(u'{}'.format(
                        coin_eng[search_keyword.index(coin)])).stream()
                    for doc in ref:
                        time.sleep(random.uniform(1, 3))
                        check_dic = doc.to_dict()
                        #print('[check] {}'.format(check_dic))
                        if dic['title'] == check_dic['title']:
                            flag = False
                            break
                    if flag:
                        print('[{}] ///// {} '.format(coin, dic))
                        ref = db.collection(u'{}'.format(
                            coin_eng[search_keyword.index(coin)]))
                        ref.add(dic)
                        time.sleep(random.uniform(1, 5))
                        #print(coin,t,u)
                        coin_index[search_keyword.index(coin)] += 1
    news_pages[search_keyword.index(coin)] += 1