def build_IpTable_with_IpList(request): if request.GET: file = request.GET['file'] apiKey = request.GET['apiKey'] p = pttJson() p.build_IpTable_with_IpList(file, apiKey) return HttpResponse("Build IpTable with List {} and key {}!!!".format(file, apiKey))
def locations(request, datetime): """ Generate JSON with location. and score Returns: { "issue": "大巨蛋", "map": { "Taiwan": { "Taipei City": { "positive": 2.75, "attendee": 3, "negative": 0 } } } } function: reverse: reverse will render url pattern, eg: /url/pattern. request.get_host: return CNAME + domain name, eg: www.google.com/. urllib.parse.quote: change Non-ascii Char into utf-8 Char with %, eg: %E5%85. variable: urlPattern: pattern of api. apiURL: full api url without http protocol. jsonText: json response getten from api. result: issue: the topic you want to query. map: data classified by geographic location. score: the sentiment value caculated from social network. """ issue = request.GET['issue'] p = pttJson() if p.hasFile(issue, "locations", datetime): result = p.loadFile(p.getIssueFilePath(issue, 'locations', datetime)) else: jsonText = getJsonFromApi(request, 'http', 'PTT_KCM_API', 'ip', (('issue', issue), ("date", datetime.date()))) result = dict(issue=issue, map={}) ipList = set((i['ip'], i['score']) for i in jsonText['attendee'] if i['ip'] != None and i['ip'] != "None") ipList = ipList.union( set((i['ip'], i['score']) for i in jsonText['author'] if i['ip'] != None and i['ip'] != "None")) build_map(ipList, result) p.save2DB(issue, 'locations', result, datetime) return JsonResponse(result, safe=False)
def articles(request, date): """Generate list of term data source files Returns: if contains invalid queryString key, it will raise exception. """ p = pttJson() issue = request.GET['issue'] if p.hasFile(issue, 'articles', date): p.articleLists = p.loadFile(p.getIssueFilePath(issue, 'articles', date)) elif os.path.exists(p.getIssueFolderPath(issue)): p.filter_with_issue(issue, date, 'articles') p.saveFile(issue, 'articles', p.articleLists, date) else: p.filter_with_issue(issue, date, 'articles') os.makedirs(p.getIssueFolderPath(issue)) p.saveFile(issue, 'articles', p.articleLists, date) return JsonResponse(p.get_articles(), safe=False)
def putIntoDB(request): if request.GET: jsonfile = request.GET['file'] p = pttJson() p.putIntoDB(jsonfile) return HttpResponse("putIntoDB {} finish!!".format(jsonfile))
def build_IpTable(request): p = pttJson() p.build_IpTable() return HttpResponse("Build IpTable!!!")
def ip(request, date): """Generate JSON has key of Issue, attendee, author. Returns: { "attendee": [ # ptt留言 { "ip": "140.120.4.13", "push_ipdatetime": "10/25 11:34", "push_userid": "McCain", "score": "3" } ], "author": { # 發文者 "ip": "140.120.4.13", "push_ipdatetime": "10/25 11:34", "push_userid": "McCain", "score": "3" }, "issue": "大巨蛋" } function: reverse: reverse will render url pattern, eg: /url/pattern. request.get_host: return CNAME + domain name, eg: www.google.com/. urllib.parse.quote: change Non-ascii Char into utf-8 Char with %, eg: %E5%85. variable: urlPattern: pattern of api. apiURL: full api url without http protocol. jsonText: json response getten from api. """ issue = request.GET['issue'] p = pttJson() if p.hasFile(issue, "ip", date): result = p.loadFile(p.getIssueFilePath(issue, 'ip', date)) else: jsonText = getJsonFromApi(request, 'http', 'PTT_KCM_API', 'articles', (('issue', issue),("date", date.date()))) result = dict( issue=issue, attendee=[], author=[] ) result['author'] = [ dict( author=i['author'], ip=i['ip'], date=i['date'], score=get_score(i, i['article_title'])) for i in jsonText ] for i in jsonText: for j in i['messages']: result['attendee'].append( dict( ip=get_IpofUser("", j['push_userid']), push_ipdatetime=j['push_ipdatetime'], push_userid=j['push_userid'], score=get_score(j ,j['push_tag'])) ) p.saveFile(issue, 'ip', result, date) return JsonResponse(result, safe=False)
def tfidf(request, date): """Generate JSON has key TF-IDF value of specific issue. Returns: { "issue": "馬英九", "totalDocs":150000, "df":{ '馬皇':5000, }, "idf":{ '馬皇':log(totalDocs/df), }, "articleList":[ { "articleID":"id1" "tf":{ "馬皇":1, '馬娘娘':2 }, 'tf-idf':{ "馬皇":1, '馬娘娘':2 } } ] } function: reverse: reverse will render url pattern, eg: /url/pattern. request.get_host: return CNAME + domain name, eg: www.google.com/. urllib.parse.quote: change Non-ascii Char into utf-8 Char with %, eg: %E5%85. variable: urlPattern: pattern of api. apiURL: full api url without http protocol. jsonText: json response getten from api. """ issue = request.GET['issue'] p = pttJson() if p.hasFile(issue, "tfidf", date): result = p.loadFile(p.getIssueFilePath(issue, 'tfidf', date)) else: jsonText = getJsonFromApi(request, 'http', 'PTT_KCM_API', 'articles', (('issue', issue))) jieba.analyse.set_stop_words( "PTT_KCM_API/api/dictionary/stop_words.txt") jieba.analyse.set_idf_path("PTT_KCM_API/api/dictionary/idf.txt.big") jieba.load_userdict('PTT_KCM_API/api/dictionary/dict.txt.big.txt') jieba.load_userdict('PTT_KCM_API/api/dictionary/jieba_expandDict.txt') result = dict(issue=issue, totalDocs=p.length, df={}, articleList=[]) for article in jsonText: # content = pseg.cut(i['content']) # messages = ( pseg.cut(['push_content']) for j in i['messages']) # tf = set( i for i in content if i[0] in ['nr','n','x'] ) tags = dict( jieba.analyse.extract_tags(article['content'], topK=10, withWeight=True)) for push in article['messages']: pushtags = dict( jieba.analyse.extract_tags(push['push_content'], topK=10, withWeight=True)) for i in pushtags: if i in tags: tags[i] = (tags[i] + pushtags[i]) / 2 else: tags[i] = pushtags[i] tags = OrderedDict( sorted(tags.items(), key=lambda x: x[1], reverse=True)[:10]) result['articleList'].append( dict(articleID=article['article_id'], tfidf=tags)) p.saveFile(issue, 'tfidf', result, date) return JsonResponse(result, safe=False)