def test(self): input_file = open(self.test_file, 'r', encoding="utf-8") output_file = open(self.trace_file, 'w', encoding="utf-8") for line in input_file: # skip empty lines if line is "\n": continue partitioned_line = line.split(maxsplit=3) id = partitioned_line[0] actual_language = partitioned_line[2] tweet = partitioned_line[3] if isinstance(self.vocabulary, CaseInsensitiveAlphabetChars): tweet = tweet.lower() highest_score = None language_with_highest_score = None for language in self.training_model.language_data.keys(): score = self.training_model.get_language_score_of_tweet(language, tweet) if highest_score is None or highest_score < score: highest_score = score language_with_highest_score = language languages_match = 'correct' if language_with_highest_score == actual_language else 'wrong' output_file.write(str.join(' ', [id, language_with_highest_score, str(highest_score), actual_language, languages_match]) + '\n') input_file.close() output_file.close() eval = Eval(self.trace_file, self.eval_file) eval.write_to_file()
def result(arrayExpected, arrayTest): """ test the model against the given test file :param arrayExpected: :param arrayTest: """ match_yes = 0 match_no = 0 fail_no = 0 fail_yes = 0 for _ in range(len(arrayExpected)): if arrayExpected[_] != None and arrayTest[_] != None: if arrayExpected[_] == arrayTest[_]: if arrayExpected[_] == 'yes': match_yes += 1 else: match_no += 1 else: if arrayExpected[_] == 'yes': fail_yes += 1 else: fail_no += 1 # print('Matched values:', match) # print('NON-Matched:', fail) # print('ID3 Accuracy:', (match / (match + fail)), '%') Eval(match_yes, match_no, fail_yes, fail_no)
def main(): print('Insert file name of train corpus') train = str(input()) print('Insert file name of test corpus') test = str(input()) dict = Dict.make_dict(train) gold_standart = Gold_standart.extract_seg_sent(test) test_sent = Test_sent.extact(test) def maxmatch(sentence, dictionary): global parsed_sent if len(sentence) == 0: return 'list is empty' for i in range(len(sentence), -1, -1): firstword = sentence[0:i] remainder = sentence[i:] if firstword in dictionary: parsed_sent.append(firstword) return maxmatch(remainder, dictionary) if i == 1: firstword = sentence[0] remainder = sentence[1:] parsed_sent.append(remainder) parsed_sent.append(firstword) def parser(used_dict, sentences): global parsed_sent res = [] for sent in sentences: maxmatch(sent, used_dict) res.append(' '.join(parsed_sent)) parsed_sent=[] print('All sentences were parsed') return res result = parser(dict, test_sent) Eval.score(gold_standart,result)
def eval_test(method): #evaluation table_fname = 'precison_recall.txt' eval_ = Eval(method, table_fname=table_fname, max_result=10) map_ = eval_.MAP() mrr_ = eval_.MRR() avg_pak = eval_.avg_PatK(5) print('map:', map_) print('mrr:', mrr_) print('avg_pak:', avg_pak) eval_.precision_recall(20)
def naiveBayes(test, train, structFile): """ print the accuracy of the model by test file :param test: :param train: :param structure: """ thisDict = allArraysOfFetures(train, 'class') rows = test.shape[0] match_yes = 0 match_no = 0 fail_no = 0 fail_yes = 0 # save model to file filename = 'naiveBayes_model.sav' joblib.dump(thisDict, filename) column = getColumnTitles(test)[:-1] # clean 'class' column for _ in range(rows): noPar = 1 yesPar = 1 for col in column: try: index = valuesType(train, col).index(test.iloc[_][col]) yesPar *= thisDict[(col, 'yes')][index] noPar *= thisDict[(col, 'no')][index] except: continue if yesPar > noPar: if test.iloc[_]['class'] == 'yes': match_yes += 1 else: fail_yes += 1 else: if test.iloc[_]['class'] == 'no': match_no += 1 else: fail_no += 1 #print('naiveBayes accuracy:', ((match_yes+match_no)) / rows), '%') Eval(match_yes, match_no, fail_yes, fail_no)
def K_MeansClass(test, train, struct): """ check k means for each @param train: cvs file for training the module @param test: cvs file for testing the module @param struct: text file of the cvs structure @return: """ numOfCluster = (int) numOfCluster = 5 column = numericCol(train, struct) # get column names numOfColumn = len(column) #train = train.dropna() # remove NaN raws train = train.reset_index(drop=True) numOfRow = len(train) numericColList = getColList(train, column) # list of numeric value kMeanDict = {} for i in range(numOfColumn): kMeanDict[column[i]] = (single_kMean(numericColList[i], numOfCluster)) yesNoDict = makeColDict(column, kMeanDict) # init YesNo class counter # get valss value for each center for i in range(numOfRow): for col in column: if train['class'][i] == 'yes': incYes(yesNoDict, col, takeClosest(train[col][i], kMeanDict[col])) else: incNo(yesNoDict, col, takeClosest(train[col][i], kMeanDict[col])) # classification dict classDict = {} tmpDict = {} for col in column: for center in kMeanDict[col]: if yesNoDict[col][center]['yes'] > yesNoDict[col][center]['no']: tmpDict[center] = 'yes' else: tmpDict[center] = 'no' classDict[col] = tmpDict # test file #test = test.dropna() test = test.reset_index(drop=True) tp = 0 tn = 0 fp = 0 fn = 0 for i in range(len(test)): row = test.loc[i, :] # getRow if getClass(classDict, row, column, kMeanDict) == 'yes': if test['class'][i] == 'yes': tp += 1 else: fp += 1 else: if test['class'][i] == 'yes': fn += 1 else: tn += 1 Eval(tp, tn, fp, fn) filename = 'K-means_model.sav' joblib.dump(kMeanDict, filename) """for i in range(len(train)):
def __init__(self, name, extra=None, caller=None): self._minEvaluator = Eval('MinimalValue') self._maxEvaluator = Eval('MaximalValue') RangeIndex.inheritedAttribute('__init__')(self, name, extra, caller)