def crawl(coin): page = news_pages[search_keyword.index(coin)] news = GoogleNews(lang='ko', encode='utf-8') news.search(coin) time.sleep(30) news.getpage(page) title = news.get_texts() url = news.get_links() desc = news.get_desc() for t, u, d in zip(title, url, desc): # print(d) idx = coin_index[search_keyword.index(coin)] if t != "" and u != "" and d != "": dic = { u"title": u'{}'.format(t), u"desc": u'{}'.format(d), u"link": u'{}'.format(u) } if coin_list[search_keyword.index(coin)] in t or coin_eng[ search_keyword.index(coin)] in t: if idx == 0: ref = db.collection(u'{}'.format( coin_eng[search_keyword.index(coin)])) ref.add(dic) time.sleep(random.uniform(2, 4)) coin_index[search_keyword.index(coin)] += 1 else: flag = True ref = db.collection(u'{}'.format( coin_eng[search_keyword.index(coin)])).stream() for doc in ref: time.sleep(random.uniform(1, 3)) check_dic = doc.to_dict() #print('[check] {}'.format(check_dic)) if dic['title'] == check_dic['title']: flag = False break if flag: print('[{}] ///// {} '.format(coin, dic)) ref = db.collection(u'{}'.format( coin_eng[search_keyword.index(coin)])) ref.add(dic) time.sleep(random.uniform(1, 5)) #print(coin,t,u) coin_index[search_keyword.index(coin)] += 1 news_pages[search_keyword.index(coin)] += 1