post_one_data['like_count'] = get_like_count(post_id) #post_one_data['likes'] = get_like_list(post_id) #post_one_data['shared'] = get_shared_list(post_id) #分享名單 post_one_data['comments'] = get_comment_list(post_id) #提取文本關鍵字 keywords = function.keyword_extract(post_one_data['message']) post_one_data['keywords'] = keywords #存進 mongodb collect.insert_one(post_one_data) #存進 pgdb function.keywords_insert_pgdb(keywords) function.kw_relation_insert_pgdb(keywords) function.doc_insert_pgdb(post_one_data,18,3) #doc,source,big_source function.doc_join_kw_insert_pgdb(keywords,post_one_data['href']) function.daily_kw_insert_pgdb(keywords,post_one_data['date'],18) #keywords,date,source_fk function.fb_doc_relation_keyword(post_one_data['href'],page['id']) #某粉絲團貼文與該粉絲團關聯一起 #更新貼文 for post_id in crawled_post_ids: res = requests.get('https://graph.facebook.com/v2.3/%s?access_token=%s'%(post_id,token)) post = json.loads(res.text) href = 'https://www.facebook.com/'+post_id #href try: share_count = post['shares']['count'] #分享數 except Exception as e: share_count = 0 print '沒有分享數',post_id like_count = get_like_count(post_id) comments = get_comment_list(post_id) #更新 mongodb
'date': date, 'title': title, 'content': content, 'href': href, 'share_count': share_count, 'like_count': like_count, 'comments': comments, 'keywords': keywords } collect.insert_one(doc) #存進 pgdb function.keywords_insert_pgdb(keywords) function.kw_relation_insert_pgdb(keywords) function.doc_insert_pgdb(doc, 68, 2) #doc,source,big_source function.doc_join_kw_insert_pgdb(keywords, href) function.daily_kw_insert_pgdb(keywords, date, 68) #keywords,date,source_fk print '%d/%d' % (ind1, t1) ind1 += 1 ind2 = 1 t2 = len(crawled_new_links) for link in crawled_new_links: try: res = requests.get(link) except Exception as e: print e continue soup = BeautifulSoup(res.text) href = link #更新已抓過的文章的按讚數 res = requests.get(