'author': '壹電視新聞', 'date': date, 'title': title, 'content': content, 'href': href, 'share_count': share_count, 'like_count': like_count, 'comments': comments, 'keywords': keywords } collect.insert_one(doc) #存進 pgdb function.keywords_insert_pgdb(keywords) function.kw_relation_insert_pgdb(keywords) function.doc_insert_pgdb(doc, 68, 2) #doc,source,big_source function.doc_join_kw_insert_pgdb(keywords, href) function.daily_kw_insert_pgdb(keywords, date, 68) #keywords,date,source_fk print '%d/%d' % (ind1, t1) ind1 += 1 ind2 = 1 t2 = len(crawled_new_links) for link in crawled_new_links: try: res = requests.get(link) except Exception as e: print e continue soup = BeautifulSoup(res.text) href = link #更新已抓過的文章的按讚數
print '沒有分享數',post_id post_one_data['like_count'] = get_like_count(post_id) #post_one_data['likes'] = get_like_list(post_id) #post_one_data['shared'] = get_shared_list(post_id) #分享名單 post_one_data['comments'] = get_comment_list(post_id) #提取文本關鍵字 keywords = function.keyword_extract(post_one_data['message']) post_one_data['keywords'] = keywords #存進 mongodb collect.insert_one(post_one_data) #存進 pgdb function.keywords_insert_pgdb(keywords) function.kw_relation_insert_pgdb(keywords) function.doc_insert_pgdb(post_one_data,18,3) #doc,source,big_source function.doc_join_kw_insert_pgdb(keywords,post_one_data['href']) function.daily_kw_insert_pgdb(keywords,post_one_data['date'],18) #keywords,date,source_fk function.fb_doc_relation_keyword(post_one_data['href'],page['id']) #某粉絲團貼文與該粉絲團關聯一起 #更新貼文 for post_id in crawled_post_ids: res = requests.get('https://graph.facebook.com/v2.3/%s?access_token=%s'%(post_id,token)) post = json.loads(res.text) href = 'https://www.facebook.com/'+post_id #href try: share_count = post['shares']['count'] #分享數 except Exception as e: share_count = 0 print '沒有分享數',post_id like_count = get_like_count(post_id) comments = get_comment_list(post_id)
if i['message'] == '': res = requests.get('https://graph.facebook.com/v2.5/%s?fields=attachment&access_token=%s'%(i['id'],token)) try: i['message'] = json.loads(res.text)['attachment']['url'] except: print 'no attachment:',i['id'] cur.execute("""INSERT INTO article_fb_comment (comment_id_pk,message,like_count,recomment_count,recomment_like_count,post_id_fk) VALUES(%s,%s,%s,%s,%s,%s)""", (i['id'],i['message'],i['like_count'],i['recomment_count'],i['recomment_like_count'],post_id)) conn.commit() #insert comments #insert article_document href = 'https://www.facebook.com/'+post_id title = message.split('\n')[0] doc = {'href':href,'author':page['name'],'title':title,'date':date,'like_count':like_count,'share_count':share_count,'comment_count':comment_count} function.keywords_insert_pgdb(post_hashgtags) function.doc_insert_pgdb(doc,18,3) function.doc_join_kw_insert_pgdb(post_hashgtags,href) if isover: print 'insert %d posts'%t break try: posts_url = posts['paging']['next'] except Exception as e: break cur.close() conn.close() print 'success'