コード例 #1
0
def run_experiment_with_rake():
    print "\nBegin experiment using RAKE algorithm..."
    # RAKE: predict keyword dengan RAKE, ambil words dengan RAKE skor tertinggi
    rake = RakeKeywordExtractor()
    tweets_rake['keyword'] = tweets_rake.apply(lambda t: rake.extract_keyword(
        rake.extract_candidates(t['text'], incl_scores=True)),
                                               axis=1)

    # RAKE: infer aspect dengan aspect mapping, dengan similarity terbesar
    tweets_rake['selected_keyword'] = tweets_rake.apply(
        lambda t: asp.find_nearest_inferred_aspect(t['keyword'], emb)[1],
        axis=1)
    tweets_rake['inferred_aspect'] = tweets_rake.apply(
        lambda t: asp.find_nearest_inferred_aspect(t['keyword'], emb)[0],
        axis=1)
    tweets_rake['gold_aspect'] = tweets_rake.apply(
        lambda t: asp.INVERTED_ASPECTS[t['inferred_aspect']], axis=1)

    tweets_rake.to_csv('dump/result_rake.csv', encoding='utf-8', index=False)

    # RAKE: Evaluasi dengan accuracy
    eva_rake = Evaluation(tweets_rake)
    conf_matrix = eva_rake.build_confusion_matrix(tweets_rake)
    print "Confusion matrix:"
    print conf_matrix
    print "Accuracy using RAKE algorithm: {}".format(eva_rake.accuracy())
    print "Average Precision using RAKE algorithm: {}".format(
        eva_rake.average_precision())
    print "Average Recall using RAKE algorithm: {}".format(
        eva_rake.average_recall())
コード例 #2
0
def evaluate_accuracy(contingency_table: np.ndarray,
                      evaluation: Evaluation,
                      is_sampled_graph: bool = False) -> np.ndarray:
    """Evaluates the accuracy of partitioning.

    Parameters
    ---------
    contingency_table : np.ndarray (int)
        the contingency table (confusion matrix) comparing the true block assignment to the algorithmically
        determined block assignment
    evaluation : Evaluation
        stores evaluation results
    is_sampled_graph : bool
        True if evaluation is for a sampled graph. Default = False

    Returns
    -------
    joint_prob : np.ndarray (float)
        the normalized contingency table
    """
    # joint probability of the two partitions is just the normalized contingency table
    joint_prob = contingency_table / sum(sum(contingency_table))
    accuracy = sum(joint_prob.diagonal())
    print('Accuracy (with optimal partition matching): {}'.format(accuracy))
    print()
    if is_sampled_graph:
        evaluation.sampled_graph_accuracy = accuracy
    else:
        evaluation.accuracy = accuracy
    return joint_prob
コード例 #3
0
def run_experiment_with_tfidf(tweets_tfidf):
    print "\nBegin experiment using TF-IDF weighting algorithm..."
    # TF-IDF: cari keyword dengan TF-IDF, ambil yang single word aja dengan bobot tertinggi
    tfidf = TfidfKeywordExtractor()
    tfidf_weight = tfidf.fit_transform(tweets_tfidf)
    tfidf_weight['keyword'] = tfidf_weight.idxmax(axis=1)

    # MUST BE after extracting keyword
    # OTHERWISE, the keyword will be "tweet_no" for all tweets
    tfidf_weight = tfidf_weight.reset_index().rename(
        columns={'index': 'tweet_no'})
    tfidf_weight['tweet_no'] = tfidf_weight['tweet_no'] + 1
    tfidf_weight = tfidf_weight[['tweet_no', 'keyword']]
    tfidf_weight.to_csv('tfidf_keyword.csv', encoding='utf-8', index=False)

    tweets_tfidf = tweets_tfidf.reset_index().rename(
        columns={'index': 'tweet_no'})
    tweets_tfidf['tweet_no'] = tweets_tfidf['tweet_no'] + 1
    tweets_tfidf.to_csv('tweets_tfidf.csv', encoding='utf-8', index=False)

    tweets_tfidf = pd.merge(tweets_tfidf,
                            tfidf_weight,
                            how='left',
                            on='tweet_no')
    tweets_tfidf.to_csv('tweets_tfidf_after_merge.csv',
                        encoding='utf-8',
                        index=False)

    # TF-IDF: infer aspect dengan aspect mapping, dengan similarity terbesar
    tweets_tfidf['selected_keyword'] = tweets_tfidf.apply(
        lambda t: asp.find_nearest_inferred_aspect(t['keyword'], emb)[1],
        axis=1)
    tweets_tfidf['inferred_aspect'] = tweets_tfidf.apply(
        lambda t: asp.find_nearest_inferred_aspect(t['keyword'], emb)[0],
        axis=1)
    tweets_tfidf['gold_aspect'] = tweets_tfidf.apply(
        lambda t: asp.INVERTED_ASPECTS[t['inferred_aspect']], axis=1)

    tweets_tfidf.to_csv('dump/result_tfidf.csv', encoding='utf-8', index=False)

    # RAKE: Evaluasi dengan accuracy
    eva_tfidf = Evaluation(tweets_tfidf)
    conf_matrix = eva_tfidf.build_confusion_matrix(tweets_tfidf)
    print "Confusion matrix:"
    print conf_matrix
    print "Accuracy using TF-IDF weighting algorithm: {}".format(
        eva_tfidf.accuracy())
    print "Average Precision using TF-IDF weighting algorithm: {}".format(
        eva_tfidf.average_precision())
    print "Average Recall using TF-IDF weighting algorithm: {}".format(
        eva_tfidf.average_recall())
コード例 #4
0
ファイル: optim_params.py プロジェクト: ykrmm/REDS
    weight_train,weight_test = dataset.get_weight_train_test()
    eval_ = Evaluation()
    all_result_df = pd.DataFrame(columns=['models','params','AMS','Accuracy','Precision','Recall']) # Notre tableau de résultat
    optimal_parameters = pd.DataFrame(columns=['models','params','AMS']) # Notre tableau résultats optimaux
    
    #Random forest
    score_opt_ams = 0
    for bootstrap in [True,False]:
        for max_depth in range(3,10):
            for n_estimators in [10, 30, 50, 100]:

                rf = RandomForestClassifier(n_estimators=n_estimators,bootstrap=bootstrap,max_depth=max_depth)
                rf.fit(Xtrain,ytrain)
                ypred = rf.predict(Xtest)
                score_ams = eval_.AMS(ytest,ypred,weights=weight_test)
                score_accuracy = eval_.accuracy(ytest,ypred)
                score_precision = eval_.precision(ytest,ypred)
                score_recall = eval_.rappel(ytest,ypred)
                l = {'models':'Random Forest','params':str(rf.get_params()),\
                    'AMS':score_ams,'Accuracy':score_accuracy,'Precision':score_precision,'Recall':score_recall}
                all_result_df=all_result_df.append(l,ignore_index=True)
                if score_ams > score_opt_ams:
                    optim_param = str(rf.get_params())
                    score_opt_ams = score_ams

    l = {'models':'Random Forest','params':optim_param,'AMS':score_opt_ams}
    optimal_parameters=optimal_parameters.append(l,ignore_index = True)  
    optimal_parameters.to_csv("optimal_parameters.csv",index=False)
    all_result_df.to_csv("all_result.csv",index=False)

    """
コード例 #5
0
def task_predict(input_files, input_model, isDynamic):
    """
    Predict the speaker from the given file(s)
    
    Args:
        input_files (string): full path to the speaker file
        input_model (string): model trained to give the solution
    """
    # Loads the model object and retrieve the number of speaker #

    m = ModelInterface.load(input_model)
    n_label = m.get_n_label()

    # Computes the threshold (dynamic or static) #

    if (isDynamic):
        dyn_thrsh = m.get_dyn_threshold()
    else:
        threshold = 1 / n_label

    # Creates an Evaluation object to save the results #

    ev = Evaluation()

    # Starts the prediction process #

    print(input_files)
    for f in glob.glob(os.path.expanduser(input_files)):
        try:
            start_time = time.time()
            fs, signal = read_wav(f)
            signal = signal / max(abs(signal))

            # Extracts the features and predicts the label using the higher score within all possible speaker #

            label, score = m.predict(fs, VAD_process(signal))

        except Exception as e:
            print(f + ' error %s' % (e))

        # Retrieves the expected label from the directory (evaluation not real time only) #

        root = os.path.split(f)
        if (input_files[-9:] == "*/*/*.wav"):
            root = os.path.split(root[0])
        speaker = os.path.basename(root[0])

        # Recognition process : If the given score is higher than the threshold, the label is correct #
        # Else the speaker is not recognize #

        if (isDynamic):
            threshold = dyn_thrsh[label]
        recog = (score > threshold)

        # recog = True

        if not (recog):
            print(speaker, ' not recognize. ->', label, 'Score->', score)

        else:
            print(speaker, '->', label, ', score->', score)

        # Adds the speaker and its results to the evaluation object #

        ev.new(speaker, label, recog)

    # Retrieves the Database label used and prints the accuracy #

    path = os.path.split(root[0])[0]
    DB_name = os.path.split(path)[0]
    DB_name = os.path.basename(os.path.split(DB_name)[1])
    print('Accuracy : ', ev.accuracy(), '\n')
    ev.save(os.path.basename(path), n_label, DB_name,
            (time.time() - start_time))
コード例 #6
0
        m = ModelInterface.load(input_model)
        speaker = input(
            "Write the name of the speaker (for evaluation purposes) :")

        start_time = time.time()
        while tmp < 5:
            count += 1

            buffer.record(chunk_size=sampling_rate)  # 1 second of record
            data = buffer.get_data()
            data = np.frombuffer(data, 'int16')

            # Predicting every 3 loop #
            # Recording at 16000 Hz as sampling rate, (1 * 3) sec as buffer size and converting data in int16 type #

            if count >= 3:
                predict(data, m, ev, speaker)

                # save_RT(speaker, data, width =2, rate=sampling_rate)

                count = 0
                tmp += 1

        print("Ok, ", time.time() - start_time - 15, " seconds")
        # Stops the recording and closes the audio stream #

        print('Accuracy : ', ev.accuracy(), '\n')
        ev.save("Real-Time_Speaker_Recognition", tmp, "RTSP/RTSP_" + speaker,
                (time.time() - start_time - 15))

        buffer.stop_record()
コード例 #7
0
                              pair_wise=[3, 1],
                              train_or_test=0,
                              smoothness=0.1,
                              contour=False)
""" Instantiate an object of Evaluation class to calculate various model metrics. """

eval = Evaluation(bayes_case=bayes_classifier,
                  data_prep=data_preprocess,
                  test_size=0.30)
class_id = 1  # The class_id for the required class

# Returns confusion matrix for a given Bayesian Classifier Case
cm = eval.confusion_matrix()

# Returns the accuracy of classification for a given Bayesian Classifier Case
acc = eval.accuracy()

# Returns the precision for a given class for a given Bayesian Classifier Case
prec = eval.precision(class_id)

# Returns the recall for a given class for a given Bayesian Classifier Case
rec = eval.recall(class_id)

# Returns the F-score for a given class for a given Bayesian Classifier Case
f_score = eval.f_score(class_id)

# Returns the mean precision of classification for a given Bayesian Classifier Case
mean_prec = eval.mean_precision()

# Returns the mean recall of classification for a given Bayesian Classifier Case
mean_rec = eval.mean_recall()