コード例 #1
0
def get_features(tweet):
	features=[]
	features.append(len(str(tweet)))
	features.append(len(str(tweet).split(" ")))
	features.append(len(set(str(tweet))))
	features.append(happy(str(tweet)))
	features.append(sad(str(tweet)))
	features.append(swear_number(str(tweet)))
	features.append(pos_emo(str(tweet)))
	features.append(neg_emo(str(tweet)))
	features=features+feature_getter(str(tweet))
	features=features+pos_getter(str(tweet))
	features.append(num_stop_words(str(tweet)))
	features.append(num_punc(str(tweet)))
	return features
def get_features(tweet):
    features = []
    features.append(timediff(tweet['created_at']))
    if ('web' in tweet['source']):
        features.append(2)
    elif ('mobile' in tweet['source']):
        features.append(1)
    else:
        features.append(0)
    if (tweet['coordinates'] == None):
        features.append(0)
        features.append(0)
    else:
        a, b = tweet['coordinates']
        features.append(a)
        features.append(b)
    features.append(len(str(tweet['text'].encode('utf-8'))))
    features.append(len(str(tweet['text'].encode('utf-8')).split(" ")))
    features.append(len(tweet['entities']['hashtags']))
    features.append(len(set(str(tweet['text'].encode('utf-8')))))
    features.append(len(tweet['entities']['symbols']))
    features.append(len(tweet['entities']['urls']))
    features.append(happy(str(tweet['text'].encode('utf-8'))))
    features.append(sad(str(tweet['text'].encode('utf-8'))))
    if (":" in str(tweet['text'].encode('utf-8'))
            or ";" in str(tweet['text'].encode('utf-8'))):
        features.append(1)
    else:
        features.append(0)
    features.append(swear_number(str(tweet['text'].encode('utf-8'))))
    features.append(pos_emo(str(tweet['text'].encode('utf-8'))))
    features.append(neg_emo(str(tweet['text'].encode('utf-8'))))
    features = features + feature_getter(str(tweet['text'].encode('utf-8')))
    features = features + pos_getter(str(tweet['text'].encode('utf-8')))
    features.append(tweet['user']['followers_count'])
    features.append(tweet['user']['friends_count'])
    features.append(timediff(tweet['user']['created_at']))
    features.append(tweet['retweet_count'])
    features.append(len(tweet['entities']['user_mentions']))
    if (tweet['in_reply_to_status_id_str'] != None):
        features.append(1)
    else:
        features.append(0)
    if tweet['retweeted']:
        features.append(1)
    else:
        features.append(0)
    wot = 0.0
    for url in tweet['entities']['urls']:
        wot += get_wotscore(str(url))
    if (wot == 0):
        features.append(wot)
    else:
        features.append(wot / len(tweet['entities']['urls']))
    classifier = raw_input("enter pos or neg or neu:")
    if '1' in classifier:
        features.append(1)
    elif '2' in classifier:
        features.append(2)
    elif '3' in classifier:
        features.append(3)
    features.append(num_stop_words(str(tweet['text'].encode('utf-8'))))
    features.append(num_punc(str(tweet['text'].encode('utf-8'))))
    return features