コード例 #1
0
ファイル: views.py プロジェクト: UDICatNCHU/PTT_KCM_API
def build_IpTable_with_IpList(request):
	if request.GET:
		file = request.GET['file']
		apiKey = request.GET['apiKey']
		p = pttJson()
		p.build_IpTable_with_IpList(file, apiKey)
	return HttpResponse("Build IpTable with List {} and key {}!!!".format(file, apiKey))
コード例 #2
0
ファイル: locations.py プロジェクト: UDICatNCHU/PTT_KCM_API
def locations(request, datetime):
    """ Generate JSON with location. and score
	Returns:
		{
		  "issue": "大巨蛋",
		  "map": {
		    "Taiwan": {
		      "Taipei City": {
		        "positive": 2.75,
		        "attendee": 3,
		        "negative": 0
		      }
		    }
		  }
		}

	function:
		reverse: reverse will render url pattern, eg: /url/pattern.
		request.get_host: return CNAME + domain name, eg: www.google.com/.
		urllib.parse.quote: change Non-ascii Char into utf-8 Char with %, eg: %E5%85.

	variable:
		urlPattern: pattern of api.
		apiURL: full api url without http protocol.
		jsonText: json response getten from api.
		result:
			issue: the topic you want to query.
			map: data classified by geographic location.
				score: the sentiment value caculated from social network.
  	"""
    issue = request.GET['issue']
    p = pttJson()
    if p.hasFile(issue, "locations", datetime):
        result = p.loadFile(p.getIssueFilePath(issue, 'locations', datetime))
    else:
        jsonText = getJsonFromApi(request, 'http', 'PTT_KCM_API', 'ip',
                                  (('issue', issue),
                                   ("date", datetime.date())))

        result = dict(issue=issue, map={})

        ipList = set((i['ip'], i['score']) for i in jsonText['attendee']
                     if i['ip'] != None and i['ip'] != "None")
        ipList = ipList.union(
            set((i['ip'], i['score']) for i in jsonText['author']
                if i['ip'] != None and i['ip'] != "None"))
        build_map(ipList, result)
        p.save2DB(issue, 'locations', result, datetime)

    return JsonResponse(result, safe=False)
コード例 #3
0
ファイル: articles.py プロジェクト: UDICatNCHU/PTT_KCM_API
def articles(request, date):
    """Generate list of term data source files
	Returns:
		if contains invalid queryString key, it will raise exception.
	"""
    p = pttJson()
    issue = request.GET['issue']

    if p.hasFile(issue, 'articles', date):
        p.articleLists = p.loadFile(p.getIssueFilePath(issue, 'articles',
                                                       date))
    elif os.path.exists(p.getIssueFolderPath(issue)):
        p.filter_with_issue(issue, date, 'articles')
        p.saveFile(issue, 'articles', p.articleLists, date)
    else:
        p.filter_with_issue(issue, date, 'articles')
        os.makedirs(p.getIssueFolderPath(issue))
        p.saveFile(issue, 'articles', p.articleLists, date)
    return JsonResponse(p.get_articles(), safe=False)
コード例 #4
0
ファイル: views.py プロジェクト: UDICatNCHU/PTT_KCM_API
def putIntoDB(request):
	if request.GET:
		jsonfile = request.GET['file']
		p = pttJson()
		p.putIntoDB(jsonfile)
	return HttpResponse("putIntoDB {} finish!!".format(jsonfile))
コード例 #5
0
ファイル: views.py プロジェクト: UDICatNCHU/PTT_KCM_API
def build_IpTable(request):
	p = pttJson()
	p.build_IpTable()
	return HttpResponse("Build IpTable!!!")
コード例 #6
0
ファイル: ip.py プロジェクト: UDICatNCHU/PTT_KCM_API
def ip(request, date):
	"""Generate JSON has key of Issue, attendee, author.

	Returns:
		{
		  "attendee": [ # ptt留言
			{
			  "ip": "140.120.4.13",
			  "push_ipdatetime": "10/25 11:34",
			  "push_userid": "McCain",
			  "score": "3"
			}
		  ],
		  "author": { # 發文者
			"ip": "140.120.4.13",
			"push_ipdatetime": "10/25 11:34",
			"push_userid": "McCain",
			"score": "3"
		  },
		  "issue": "大巨蛋"
		}

	function:
		reverse: reverse will render url pattern, eg: /url/pattern.
		request.get_host: return CNAME + domain name, eg: www.google.com/.
		urllib.parse.quote: change Non-ascii Char into utf-8 Char with %, eg: %E5%85.

	variable:
		urlPattern: pattern of api.
		apiURL: full api url without http protocol.
		jsonText: json response getten from api.
	"""
	issue = request.GET['issue']
	p = pttJson()
	if p.hasFile(issue, "ip", date):
		result = p.loadFile(p.getIssueFilePath(issue, 'ip', date))
	else:
		jsonText = getJsonFromApi(request, 'http', 'PTT_KCM_API', 'articles', (('issue', issue),("date", date.date())))

		result = dict(
			issue=issue, 
			attendee=[], 
			author=[]
		)
		result['author'] = [ dict(
			author=i['author'], 
			ip=i['ip'], 
			date=i['date'], 
			score=get_score(i, i['article_title'])) for i in jsonText 
		]
		for i in jsonText:
			for j in i['messages']:
				result['attendee'].append( dict(
						ip=get_IpofUser("", j['push_userid']), 
						push_ipdatetime=j['push_ipdatetime'], 
						push_userid=j['push_userid'], 
						score=get_score(j ,j['push_tag'])) 
				)

		p.saveFile(issue, 'ip', result, date)
	return JsonResponse(result, safe=False)
コード例 #7
0
ファイル: tfidf.py プロジェクト: UDICatNCHU/PTT_KCM_API
def tfidf(request, date):
    """Generate JSON has key TF-IDF value of specific issue.

	Returns:
		{
		  "issue": "馬英九",
		  "totalDocs":150000,
		  "df":{
		  	'馬皇':5000,
		  },
		  "idf":{
		  	'馬皇':log(totalDocs/df),
		  },
		  "articleList":[
		  	{
		  		"articleID":"id1"
		  		"tf":{
		  			"馬皇":1,
		  			'馬娘娘':2
		  		},
		  		'tf-idf':{
					"馬皇":1,
					'馬娘娘':2  			
		  		}
		  	}
		  ]
		}


	function:
		reverse: reverse will render url pattern, eg: /url/pattern.
		request.get_host: return CNAME + domain name, eg: www.google.com/.
		urllib.parse.quote: change Non-ascii Char into utf-8 Char with %, eg: %E5%85.

	variable:
		urlPattern: pattern of api.
		apiURL: full api url without http protocol.
		jsonText: json response getten from api.
	"""
    issue = request.GET['issue']
    p = pttJson()
    if p.hasFile(issue, "tfidf", date):
        result = p.loadFile(p.getIssueFilePath(issue, 'tfidf', date))
    else:
        jsonText = getJsonFromApi(request, 'http', 'PTT_KCM_API', 'articles',
                                  (('issue', issue)))
        jieba.analyse.set_stop_words(
            "PTT_KCM_API/api/dictionary/stop_words.txt")
        jieba.analyse.set_idf_path("PTT_KCM_API/api/dictionary/idf.txt.big")
        jieba.load_userdict('PTT_KCM_API/api/dictionary/dict.txt.big.txt')
        jieba.load_userdict('PTT_KCM_API/api/dictionary/jieba_expandDict.txt')

        result = dict(issue=issue, totalDocs=p.length, df={}, articleList=[])
        for article in jsonText:
            # content = pseg.cut(i['content'])
            # messages = ( pseg.cut(['push_content']) for j in i['messages'])
            # tf = set( i for i in content if i[0] in ['nr','n','x'] )
            tags = dict(
                jieba.analyse.extract_tags(article['content'],
                                           topK=10,
                                           withWeight=True))
            for push in article['messages']:
                pushtags = dict(
                    jieba.analyse.extract_tags(push['push_content'],
                                               topK=10,
                                               withWeight=True))
                for i in pushtags:
                    if i in tags:
                        tags[i] = (tags[i] + pushtags[i]) / 2
                    else:
                        tags[i] = pushtags[i]
            tags = OrderedDict(
                sorted(tags.items(), key=lambda x: x[1], reverse=True)[:10])

            result['articleList'].append(
                dict(articleID=article['article_id'], tfidf=tags))

        p.saveFile(issue, 'tfidf', result, date)
    return JsonResponse(result, safe=False)