コード例 #1
0
             
    tp = sum([combin(l,2) for g in gw.values() for l in g.values() if l > 1])
    fn = 0
    for c in cs:
        l = []
        for g in gw.values():
            l.append(g[c])
        combines = itertools.combinations(l,2)  
        for x,y in combines:
            fn += x*y
            
    tn =  tp_tn_fp_fn - tp_tn - fn
    return float((tp + tn))/tp_tn_fp_fn
            
if __name__ == '__main__':
    inverse_index('bing','bing_index.json')
    f = open('bing_index.json')
    inverse_index = json.loads(f.read())
    
    f1 = open('test_idf','w')
    f2 = open('test_idf2.json','w')
    fj = open('tf_doc.json','r')
    tf_doc = json.loads(fj.read())
        
    word_map = {}
    doc_map = {}
    index_doc_map = {}
    index_word_map = {}
    i = 0
    j = 0
    for word in inverse_index:
コード例 #2
0
Created on Jun 12, 2013

@author: Administrator
'''
import json, os
from part_1 import inverse_index

TWEET_PATH = 'C:/Users/Administrator/workspace/670_hw_1/mars_tweets_medium.json'
TWEET_TEXT_PATH = 'C:/Users/Administrator/workspace/670_hw_1/tweet/'


def get_tweet_text():
    if not os.path.exists(TWEET_TEXT_PATH):
        os.mkdir(TWEET_TEXT_PATH)
    wf = open(TWEET_PATH, 'r')
    i = 0
    for line in wf:
        encoded_string = line.strip().decode('utf-8')
        tweet = json.loads(encoded_string)
        tweet_text = tweet['text'].encode('utf-8').lower()
        # print tweet_text
        f = open(TWEET_TEXT_PATH + '/' + str(i), 'w')
        f.write(tweet_text)
        f.close()
        i += 1


if __name__ == '__main__':
    #get_tweet_text()
    inverse_index('tweet', 'tweet_index.json')
コード例 #3
0
    tp = sum([combin(l, 2) for g in gw.values() for l in g.values() if l > 1])
    fn = 0
    for c in cs:
        l = []
        for g in gw.values():
            l.append(g[c])
        combines = itertools.combinations(l, 2)
        for x, y in combines:
            fn += x * y

    tn = tp_tn_fp_fn - tp_tn - fn
    return float((tp + tn)) / tp_tn_fp_fn


if __name__ == '__main__':
    inverse_index('bing', 'bing_index.json')
    f = open('bing_index.json')
    inverse_index = json.loads(f.read())

    f1 = open('test_idf', 'w')
    f2 = open('test_idf2.json', 'w')
    fj = open('tf_doc.json', 'r')
    tf_doc = json.loads(fj.read())

    word_map = {}
    doc_map = {}
    index_doc_map = {}
    index_word_map = {}
    i = 0
    j = 0
    for word in inverse_index:
コード例 #4
0
'''
Created on Jun 12, 2013

@author: Administrator
'''
import json,os
from part_1 import inverse_index

TWEET_PATH = 'C:/Users/Administrator/workspace/670_hw_1/mars_tweets_medium.json'
TWEET_TEXT_PATH = 'C:/Users/Administrator/workspace/670_hw_1/tweet/'

  
def get_tweet_text():
    if not os.path.exists(TWEET_TEXT_PATH):
        os.mkdir(TWEET_TEXT_PATH)
    wf = open(TWEET_PATH,'r')
    i = 0
    for line in wf:
        encoded_string = line.strip().decode('utf-8')
        tweet = json.loads(encoded_string)
        tweet_text = tweet['text'].encode('utf-8').lower()
        # print tweet_text
        f = open(TWEET_TEXT_PATH + '/' + str(i),'w')
        f.write(tweet_text)
        f.close()
        i += 1

if __name__ == '__main__':
    #get_tweet_text()
    inverse_index('tweet','tweet_index.json')