return word_frequency def get_part_of_speech_tags(self): """ Returns the part of speech tags as a list of tuples. The first part of each tuple is the term, the second one the part of speech tag. Note: the same word can have a different part of speech tag. In the example below, see "refuse" and "permit" >>> from gender_novels.novel import Novel >>> summary = "They refuse to permit us to obtain the refuse permit." >>> novel_metadata = {'author': 'Hawthorne, Nathaniel', 'title': 'Scarlet Letter', ... 'corpus_name': 'sample_novels', 'date': '1900', ... 'filename': None, 'text': summary} >>> novel = Novel(novel_metadata) >>> novel.get_part_of_speech_tags()[:4] [('They', 'PRP'), ('refuse', 'VBP'), ('to', 'TO'), ('permit', 'VB')] >>> novel.get_part_of_speech_tags()[-4:] [('the', 'DT'), ('refuse', 'NN'), ('permit', 'NN'), ('.', '.')] :rtype: list """ text = nltk.word_tokenize(self.text) pos_tags = nltk.pos_tag(text) return pos_tags if __name__ == '__main__': from dh_testers.testRunner import main_test main_test()
encoding=target_encoding) as target_file: target_file.write(text) def load_graph_settings(show_grid_lines=True): ''' This function sets the seaborn graph settings to the defaults for our project. Defaults to displaying gridlines. To remove gridlines, call with False. :return: ''' show_grid_lines_string = str(show_grid_lines) palette = "colorblind" style_name = "white" background_color = (252 / 255, 245 / 255, 233 / 255, 0.4) style_list = { 'axes.edgecolor': '.6', 'grid.color': '.9', 'axes.grid': show_grid_lines_string, 'font.family': 'serif', 'axes.facecolor': background_color, 'figure.facecolor': background_color } sns.set_color_codes(palette) sns.set_style(style_name, style_list) if __name__ == '__main__': from dh_testers.testRunner import main_test main_test(import_plus_relative=True ) # this allows for relative calls in the import.