def predict(parser_params, metric: int) -> List[int]:
    '''
        Runs method to parse the review's file obtaining a List of reviews.
        Then uses this List to predict the reviews quality with the correspondent metric (1 or 2).

        :Return List[int]: the quality predicted for each review (0 - insufficient; 1 - sufficient;
        2 - good; 3 - excellent)
    '''

    # runs the parser
    parsed_data = _parse_file(parser_params)

    if not parsed_data:
        print('Failed to parse reviews')

    input_prediction = []
    predictions = None

    for entry in parsed_data:

        text_features = []

        # enter author's reputation
        text_features.append(1)

        # finding patterns
        tokens = tokensManager.GetTokens(entry, 0)
        tagsTokens = taggerManager.TaggerComment(entry)
        tags = taggerManager.TagsDict(tagsTokens)
        patt1, patt3, patt4, patt5 = patternsManager.GetPatternsDict(tags)

        number_tuples = len(patt1[1]) + len(patt3[1]) + len(patt3[1]) + len(
            patt3[1])
        text_features.append(number_tuples)

        # correctness
        correctness = correctnessManager.Correctness(entry)
        text_features.append(correctness)

        # enter features
        input_prediction.append(text_features)

    predictions = annManager.AnnPredict(input_prediction, metric)

    if not predictions.any():
        print(
            "Prediction failed. Make sure the requested model was trained and the given metric is valid(1 or 2)."
        )
        return []
    return predictions
Exemple #2
0
def Correctness(comment):
    tokens = tokensManager.GetTokens(comment, 1)
    corr = 0
    if not tokens:  #no tokens found
        return 0.0
    with open('UserDictionary_pt.txt', encoding="utf8") as list1:
        dict1 = list1.read().split("\n")
        for w in tokens:
            if w in dict1:
                corr += 1
            else:
                pass
        acc = (corr / len(tokens)) * 100
        return acc
Exemple #3
0
def train_model(parsed_data: List, metric: int):
    ''' Trains the model using the specified metric(1 or 2) or input 0 to train both. '''

    input_text = []
    output_text = []

    for entry in parsed_data:

        text_features = []

        # enter author's reputation
        text_features.append(1)

        # finding patterns
        tokens = tokensManager.GetTokens(entry[0], 0)
        tagsTokens = taggerManager.TaggerComment(entry[0])
        tags = taggerManager.TagsDict(tagsTokens)
        patt1, patt3, patt4, patt5 = patternsManager.GetPatternsDict(tags)

        number_tuples = len(patt1[1]) + len(patt3[1]) + len(patt3[1]) + len(
            patt3[1])
        text_features.append(number_tuples)

        # correctness
        correctness = correctnessManager.Correctness(entry[0])
        text_features.append(correctness)

        # enter features
        input_text.append(text_features)

        # enter classification
        output_text.append(entry[1])

    print("The features vector (input) is", input_text,
          "and the expected value (output) is", output_text, "\n")
    annManager.AnnTraining(input_text, output_text, metric)
Exemple #4
0
output_text = []

# from here, you must have load your texts, to extract their features. It can be by SQL file, txt files, csv... anyway, feel free, it is important the plain text!
file_examples = open("examples.txt", encoding="utf8")
for text in file_examples.readlines():

    # list with the text features.
    text_features = []

    # author reputation
    author = 1
    print("Author Reputation:", author)
    text_features.append(author)

    # number of tuples
    tokens = tokensManager.GetTokens(text, 0)
    tagsTokens = taggerManager.TaggerComment(text)
    tags = taggerManager.TagsDict(tagsTokens)
    patt1, patt3, patt4, patt5 = patternsManager.GetPatternsDict(tags)

    number_tuples = len(patt1[1]) + len(patt3[1]) + len(patt3[1]) + len(
        patt3[1])
    print("Number of Tuples:", number_tuples)
    text_features.append(number_tuples)

    # correctness
    correctness = correctnessManager.Correctness(text)
    print("Correctness: ", correctness, "\n-----\n")
    text_features.append(correctness)

    # in the end, add this list in the input text list, to the training purposes.
def TaggerComment(comment):
    tokens = tokensManager.GetTokens(comment, 0)
    tags = TaggerOffline(tokens)
    #tags = TaggerOnline(tokens)
    return tags