Beispiel #1
0
    def get_dict(features, labels, percentage_words=0.1, iterations=50):
        real_features = features.copy()

        words_label = USES_MULTI.words_label(features, labels)

        number_words = len(words_label)
        number_selected_words = int(round(number_words * percentage_words))

        distinct_labels = list(set(labels))
        number_selected_words_label = int(number_selected_words /
                                          len(distinct_labels))

        features_score = USES_MULTI.feature_score(words_label, labels)

        candidates = {}

        for label in features_score:
            positive_candidates = features_score[
                label][:number_selected_words_label]
            candidates.update(positive_candidates)
            negative_candidates = features_score[label][
                -number_selected_words_label:]
            candidates.update(negative_candidates)

        results = {}
        actual_iteration = 0

        while (actual_iteration < iterations):
            actual_candidates = USES_MULTI.random_items(
                candidates, number_selected_words)
            filtered_features = NLP.filter_features(features,
                                                    actual_candidates)
            try:
                #pprint(actual_iteration)
                FMeasure = USES_MULTI.classifier(filtered_features, labels)
                #pprint(FMeasure)
            except Exception as e:
                #pprint(e)
                FMeasure = 0

            results.update({FMeasure: actual_candidates})

            actual_iteration = actual_iteration + 1

        results = sorted(results.items(), key=lambda x: x[0], reverse=True)

        best_FM = results[0][0]
        dict_words = results[0][1]

        #print('Best: ', best_FM)

        return dict_words
    def get_dict(features, labels, max_words=5):
        real_features = features.copy()
        real_labels = labels.copy()

        words_label = USES_MULTI.words_label(features, labels)
        number_words = len(words_label)

        if (number_words < max_words):
            max_words = number_words

        features_score = ALTER_USES.feature_score(words_label, labels)

        results = {}
        number_words = 1

        while (number_words <= max_words):
            dict_words = ALTER_USES.build_dict(features, labels,
                                               features_score, number_words)

            filtered_features = NLP.filter_features(features, dict_words)

            try:
                #pprint(number_words)
                FMeasure = USES_MULTI.classifier(filtered_features, labels)
                #pprint(FMeasure)
            except Exception as e:
                #pprint(e)
                FMeasure = 0

            results.update({FMeasure: dict_words})

            number_words = number_words + 1

        results = sorted(results.items(), key=lambda x: x[0], reverse=True)

        best_FM = results[0][0]
        dict_words = results[0][1]

        #print('Best FM: ', best_FM)

        return dict_words
Beispiel #3
0
defect = ' '.join(lines)

defect = defect.lower().strip().replace("\n", "")

features = NLP.tokenizer([defect])

features = NLP.remove_numbers(features)
features = NLP.remove_small_words(features)
features = NLP.remove_stop_words(features, 'portuguese')
features = NLP.lemmatizer(features, 'portuguese')
features = NLP.remove_punctuation(features)

dict_words = load('files/dict.joblib')
dict_words = dict_words[0]

features = NLP.filter_features(features, dict_words)

features = NLP.text_to_numeric(features, [dict_words])

model = load('files/model.joblib')

labels = model.predict(features)

if (labels[0] == '1'):
    print('High Severity')
else:
    print('Low Severity')

fim = time.time()
#print ("Tempo decorrido: ", fim-ini)