def create_df_all_sentences(): """create the df with pos tags given by each libraries, for each sentence""" df_pos = pd.read_csv(os.path.join(THIS_FOLDER, 'source/utils/sentences_to_GT_POS_corrected.csv')) for index, doc in enumerate(df_pos['sentence'].tolist()): for lib in LIST_PACKAGES: df_pos.loc[index, lib + '_pos'] = str(map_results_to_universal_tags(_pos_tag_sentence(lib, doc), lib)) df_pos.to_csv(os.path.join(THIS_FOLDER, 'source/utils/sentences_to_GT_POS_libraries.csv'))
def test_nb_votes(documents: list): """we check that we have the right number of votes for a given token """ for doc in documents: iterables = [ map_results_to_universal_tags(_pos_tag_sentence(lib, doc), lib) for lib in LIST_PACKAGES ] for list_token_tags in zip(*iterables): assert len([i[1] for i in list_token_tags]) == len(LIST_PACKAGES) assert len(list(set([i[0] for i in list_token_tags]))) == 1
def test_wether_majority_token_equals_gt(documents: list): """we check whether the comparison between majority and GT is correct """ for doc in documents: iterables = [ map_results_to_universal_tags(_pos_tag_sentence(lib, doc), lib) for lib in LIST_PACKAGES ] for list_token_tags in zip(*iterables): majority_token = return_majority_token(list_token_tags) bool = return_wether_majority_token_equals_gt(list_token_tags) assert bool in [True, False] assert bool == (majority_token == list_token_tags[-1])
def test_each_token_has_a_mapped_correct_tag(documents: list): """ Check that each mapped tag is a valid value """ mappings = _read_tag_map() values = list(mappings['UNIV'].values()) + list( mappings['PTB-UNIV'].values()) + list( mappings['ARTICLE-UNIV'].values()) for doc in documents: for lib in LIST_PACKAGES: assert all(item in values for item in [ i[1] for i in map_results_to_universal_tags( _pos_tag_sentence(lib, doc), lib) ]) == True
def test_token_majority(documents: list): """we check that we have the token with the majority votes is indeed the one having most votes, and we check how many votes he gets """ for doc in documents: iterables = [ map_results_to_universal_tags(_pos_tag_sentence(lib, doc), lib) for lib in LIST_PACKAGES ] for list_token_tags in zip(*iterables): number_votes_majority_token = return_number_votes_majority_token( list_token_tags) assert number_votes_majority_token <= len(list_token_tags) assert number_votes_majority_token >= 1 assert number_votes_majority_token == max( [list_token_tags.count(i) for i in list_token_tags])
def test_unique_tokens_voted(documents: list): """we check that the number of unique tokens voted is the right number """ for doc in documents: iterables = [ map_results_to_universal_tags(_pos_tag_sentence(lib, doc), lib) for lib in LIST_PACKAGES ] for list_token_tags in zip(*iterables): nb_unique_tokens = return_unique_tokens(list_token_tags) number_votes_majority_token = return_number_votes_majority_token( list_token_tags) assert nb_unique_tokens <= len(list_token_tags) assert nb_unique_tokens >= 1 assert nb_unique_tokens <= len( list_token_tags) - number_votes_majority_token + 1
def evaluation(file): import pandas as pd df = pd.read_csv(file) df['GT'] = df['sentence'].apply(lambda x: [ i[1] for i in map_results_to_universal_tags( _pos_tag_sentence('article', x), 'article') ]) print(df['GT']) # nltk df['nltk'] = df['sentence'].apply(lambda x: [ i[1] for i in map_results_to_universal_tags( _pos_tag_sentence('nltk', x), 'nltk') ]) print('nltk') # we remove sentences when things could be removed when manually reviewing df['same'] = df[['GT', 'nltk']].apply(lambda x: 1 if len(x[0]) == len(x[1]) else 0, axis=1) df = df[df.same == 1] # stanza df['stanza'] = df['sentence'].apply(lambda x: [ i[1] for i in map_results_to_universal_tags( _pos_tag_sentence('stanza', x), 'stanza') ]) print('stanza') df['same'] = df[['GT', 'stanza']].apply(lambda x: 1 if len(x[0]) == len(x[1]) else 0, axis=1) df = df[df.same == 1] # spacy df['spacy'] = df['sentence'].apply(lambda x: [ i[1] for i in map_results_to_universal_tags( _pos_tag_sentence('spacy', x), 'spacy') ]) print('spacy') df['same'] = df[['GT', 'spacy']].apply(lambda x: 1 if len(x[0]) == len(x[1]) else 0, axis=1) df = df[df.same == 1] df.to_csv('test_set_pos_tagging.csv') # Take only Basel's corrected pos df = pd.read_csv('test_set_pos_tagging.csv')[500:] df['nltk'] = df['nltk'].apply(lambda x: ast.literal_eval(x)) df['spacy'] = df['spacy'].apply(lambda x: ast.literal_eval(x)) df['stanza'] = df['stanza'].apply(lambda x: ast.literal_eval(x)) df['GT'] = df['GT'].apply(lambda x: ast.literal_eval(x)) # we check whenever the 3 libraries agree to get the confusion matrices + classification reports only for the tokens where # there are disagreements df['agree'] = df[['nltk', 'spacy', 'stanza']].apply( lambda x: [1 if nl == sp == st else 0 for nl, sp, st in zip(x[0], x[1], x[2])], axis=1) flat_list_nltk = [ item for sublist in df[['nltk', 'agree']].apply( lambda x: [nltk for nltk, agree in zip(x[0], x[1]) if agree == 0], axis=1).tolist() for item in sublist ] flat_list_spacy = [ item for sublist in df[['spacy', 'agree']].apply( lambda x: [nltk for nltk, agree in zip(x[0], x[1]) if agree == 0], axis=1).tolist() for item in sublist ] flat_list_stanza = [ item for sublist in df[['stanza', 'agree']].apply( lambda x: [nltk for nltk, agree in zip(x[0], x[1]) if agree == 0], axis=1).tolist() for item in sublist ] flat_list_gt = [ item for sublist in df[['GT', 'agree']].apply( lambda x: [nltk for nltk, agree in zip(x[0], x[1]) if agree == 0], axis=1).tolist() for item in sublist ] array_nltk = confusion_matrix(flat_list_gt, flat_list_nltk, labels=list(set(flat_list_gt))) array_spacy = confusion_matrix(flat_list_gt, flat_list_spacy, labels=list(set(flat_list_gt))) array_stanza = confusion_matrix(flat_list_gt, flat_list_stanza, labels=list(set(flat_list_gt))) # nltk confusion matrix + classification report df_cm = pd.DataFrame(array_nltk, index=[i for i in list(set(flat_list_gt))], columns=[i for i in list(set(flat_list_gt))]) plt.figure(figsize=(10, 7)) sn.heatmap(df_cm, annot=True, fmt='g') plt.title('nltk confusion matrix_' + str(np.sum(array_nltk))) plt.xlabel("predicted") plt.ylabel("actual") plt.show() print("NLTK:") print( classification_report(flat_list_gt, flat_list_nltk, labels=list(set(flat_list_gt)))) # spacy confusion matrix + classification report df_cm = pd.DataFrame(array_spacy, index=[i for i in list(set(flat_list_gt))], columns=[i for i in list(set(flat_list_gt))]) plt.figure(figsize=(10, 7)) sn.heatmap(df_cm, annot=True, fmt='g') plt.title('spacy confusion matrix_' + str(np.sum(array_spacy))) plt.xlabel("predicted") plt.ylabel("actual") plt.show() print("SPACY:") print( classification_report(flat_list_gt, flat_list_spacy, labels=list(set(flat_list_gt)))) # stanza confusion matrix + classification report df_cm = pd.DataFrame(array_stanza, index=[i for i in list(set(flat_list_gt))], columns=[i for i in list(set(flat_list_gt))]) plt.figure(figsize=(10, 7)) sn.heatmap(df_cm, annot=True, fmt='g') plt.title('stanza confusion matrix_' + str(np.sum(array_stanza))) plt.xlabel("predicted") plt.ylabel("actual") plt.show() print("STANZA:") print( classification_report(flat_list_gt, flat_list_stanza, labels=list(set(flat_list_gt))))