Python getFeatures 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: features.features_polarity

메소드/함수: getFeatures

hotexamples.com에서의 예제들: 2

Python getFeatures - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 features.features_polarity.getFeatures에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: polarity.py 프로젝트: adna9/twitter-sentiment-analysis

def classify(messages_train,labels_train,messages_test,process_messages_train,process_messages_test,tokens_train,tokens_test,process_tokens_train,process_tokens_test,pos_tags_train,pos_tags_test,negationList,clusters,slangDictionary,lexicons,mpqa_lexicons): 
    # 0 - negative messages
    # 1 - positives messages
    labels_train = [0 if x=="negative" else 1 for x in labels_train]
    
    #compute pos tag bigrams for all messages
    pos_bigrams_train = getBigrams(pos_tags_train)
    pos_bigrams_test = getBigrams(pos_tags_test)

    #compute pos tag trigrams for all messages
    pos_trigrams_train = getTrigrams(pos_tags_train)
    pos_trigrams_test = getTrigrams(pos_tags_test)

    #get the unique pos bigrams and trigrams from training set
    unique_pos_tags = getPosTagsSet(pos_tags_train)
    unique_bigrams = getBigramsSet(pos_bigrams_train)
    unique_trigrams= getTrigramsSet(pos_trigrams_train)

    #calculate pos bigrams score for all categories
    #both dictionaries will be used for training and testing (cannot create new for testing because we don't know the labels of the new messages)
    pos_tags_scores_negative = posTagsScore(unique_pos_tags,0,pos_tags_train,labels_train)
    pos_tags_scores_positive = posTagsScore(unique_pos_tags,1,pos_tags_train,labels_train)

    #calculate pos bigrams score for all categories
    #both dictionaries will be used for training and testing (cannot create new for testing because we don't know the labels of the new messages)
    pos_bigrams_scores_negative = posBigramsScore(unique_bigrams,0,pos_bigrams_train,labels_train)
    pos_bigrams_scores_positive = posBigramsScore(unique_bigrams,1,pos_bigrams_train,labels_train)

    #calculate pos bigrams score for all categories
    #both dictionaries will be used for training and testing (cannot create new for testing because we don't know the labels of the new messages)
    pos_trigrams_scores_negative = posTrigramsScore(unique_trigrams,0,pos_trigrams_train,labels_train)
    pos_trigrams_scores_positive = posTrigramsScore(unique_trigrams,1,pos_trigrams_train,labels_train)

    #assign a precision and F1 score to each word of to all mpqa lexicons
    mpqaScores = getScores(mpqa_lexicons,process_messages_train,labels_train)


    #get features from train messages
    features_train = features.getFeatures(messages_train,process_messages_train,tokens_train,process_tokens_train,pos_tags_train,slangDictionary,lexicons,mpqa_lexicons,pos_bigrams_train,pos_trigrams_train,pos_bigrams_scores_negative,pos_bigrams_scores_positive,pos_trigrams_scores_negative,pos_trigrams_scores_positive,pos_tags_scores_negative,pos_tags_scores_positive,mpqaScores,negationList,clusters)

    
    #regularize train features
    features_train=regularization.regularize(features_train)


    #get features from test messages 
    features_test = features.getFeatures(messages_test,process_messages_test,tokens_test,process_tokens_test,pos_tags_test,slangDictionary,lexicons,mpqa_lexicons,pos_bigrams_test,pos_trigrams_test,pos_bigrams_scores_negative,pos_bigrams_scores_positive,pos_trigrams_scores_negative,pos_trigrams_scores_positive,pos_tags_scores_negative,pos_tags_scores_positive,mpqaScores,negationList,clusters)


    #regularize test features
    features_test=regularization.regularize(features_test)

    #feature selection
    #features_train, features_test = selection.feature_selection(features_train,labels_train,features_test,1150)

    #C parameter of SVM
    
    C = 0.001953125
    #C = 19.3392161013
    
    #train classifier and return trained model
    #model = LogisticRegression.train(features_train,labels_train)
    model = SVM.train(features_train,labels_train,c=C,k="linear")
        
    #predict labels
    #prediction = LogisticRegression.predict(features_test,model)
    prediction = SVM.predict(features_test,model)

    return prediction

예제 #2

파일 보기

파일: SVMOpt.py 프로젝트: adna9/twitter-sentiment-analysis

    #calculate pos bigrams score for all categories
    #both dictionaries will be used for training and testing (cannot create new for testing because we don't know the labels of the new messages)
    pos_bigrams_scores_negative = posBigramsScore(unique_bigrams,0,pos_bigrams_train,labels_train)
    pos_bigrams_scores_positive = posBigramsScore(unique_bigrams,1,pos_bigrams_train,labels_train)

    #calculate pos bigrams score for all categories
    #both dictionaries will be used for training and testing (cannot create new for testing because we don't know the labels of the new messages)
    pos_trigrams_scores_negative = posTrigramsScore(unique_trigrams,0,pos_trigrams_train,labels_train)
    pos_trigrams_scores_positive = posTrigramsScore(unique_trigrams,1,pos_trigrams_train,labels_train)

    #assign a precision and F1 score to each word of to all mpqa lexicons
    mpqaScores = getScores(mpqa_lexicons,process_messages_train,labels_train)

    #get features from train messages
    features_train = features_polarity.getFeatures(messages_train,process_messages_train,tokens_train,process_tokens_train,pos_tags_train,slangDictionary,lexicons,mpqa_lexicons,pos_bigrams_train,pos_trigrams_train,pos_bigrams_scores_negative,pos_bigrams_scores_positive,pos_trigrams_scores_negative,pos_trigrams_scores_positive,pos_tags_scores_negative,pos_tags_scores_positive,mpqaScores,negationList,clusters)

    #regularize train features
    features_train=regularization.regularize(features_train)

    #get features from test messages 
    features_test = features_polarity.getFeatures(messages_test,process_messages_test,tokens_test,process_tokens_test,pos_tags_test,slangDictionary,lexicons,mpqa_lexicons,pos_bigrams_test,pos_trigrams_test,pos_bigrams_scores_negative,pos_bigrams_scores_positive,pos_trigrams_scores_negative,pos_trigrams_scores_positive,pos_tags_scores_negative,pos_tags_scores_positive,mpqaScores,negationList,clusters)

    #regularize test features
    features_test=regularization.regularize(features_test)



t1 = time.time()

C=[1,2,3,4,5]