print 'error5', link #抓新聞按讚數 res = requests.get( 'http://api.facebook.com/restserver.php?method=links.getstats&format=json&urls=%s' % link) data = json.loads(res.text)[0] like_count = data['like_count'] share_count = data['share_count'] #抓新聞的 comments try: comments = function.get_fb_comments(data['comments_fbid']) except Exception as e: comments = [] print '抓取fb_comments錯誤', e #提取文本關鍵字 keywords = function.keyword_extract(content) #存進 mongodb doc = { 'author': '壹電視新聞', 'date': date, 'title': title, 'content': content, 'href': href, 'share_count': share_count, 'like_count': like_count, 'comments': comments, 'keywords': keywords } collect.insert_one(doc) #存進 pgdb function.keywords_insert_pgdb(keywords)
date = post['created_time'].replace('T',' ') date = date.split('+')[0] post_one_data['date'] = date #創建時間 (UTC+00) print post_one_data['date'] try: post_one_data['share_count'] = post['shares']['count'] #分享數 except Exception as e: post_one_data['share_count'] = 0 print '沒有分享數',post_id post_one_data['like_count'] = get_like_count(post_id) #post_one_data['likes'] = get_like_list(post_id) #post_one_data['shared'] = get_shared_list(post_id) #分享名單 post_one_data['comments'] = get_comment_list(post_id) #提取文本關鍵字 keywords = function.keyword_extract(post_one_data['message']) post_one_data['keywords'] = keywords #存進 mongodb collect.insert_one(post_one_data) #存進 pgdb function.keywords_insert_pgdb(keywords) function.kw_relation_insert_pgdb(keywords) function.doc_insert_pgdb(post_one_data,18,3) #doc,source,big_source function.doc_join_kw_insert_pgdb(keywords,post_one_data['href']) function.daily_kw_insert_pgdb(keywords,post_one_data['date'],18) #keywords,date,source_fk function.fb_doc_relation_keyword(post_one_data['href'],page['id']) #某粉絲團貼文與該粉絲團關聯一起 #更新貼文 for post_id in crawled_post_ids: res = requests.get('https://graph.facebook.com/v2.3/%s?access_token=%s'%(post_id,token)) post = json.loads(res.text)
print resu[0][0], resu[0][1] print '-------------------------------------------------' ''' for o in combi(a): print o print len(combi(a)) print type(combi(a)) ''' # In[ ]: import requests import json import function for k in function.keyword_extract('蔡英文明訪日 日本李登輝之友會協辦晚宴'): print k res = requests.get( 'https://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=的') a = json.loads(res.text) print a['responseData']['cursor']['estimatedResultCount'] # In[1]: #兩文本相似度 #!/usr/bin/env python # -*- coding: utf-8 -*- from copy import deepcopy # 作業系統 import os