예제 #1
0
        return word_frequency

    def get_part_of_speech_tags(self):
        """
        Returns the part of speech tags as a list of tuples. The first part of each tuple is the
        term, the second one the part of speech tag.
        Note: the same word can have a different part of speech tag. In the example below,
        see "refuse" and "permit"
        >>> from gender_novels.novel import Novel
        >>> summary = "They refuse to permit us to obtain the refuse permit."
        >>> novel_metadata = {'author': 'Hawthorne, Nathaniel', 'title': 'Scarlet Letter',
        ...                   'corpus_name': 'sample_novels', 'date': '1900',
        ...                   'filename': None, 'text': summary}
        >>> novel = Novel(novel_metadata)
        >>> novel.get_part_of_speech_tags()[:4]
        [('They', 'PRP'), ('refuse', 'VBP'), ('to', 'TO'), ('permit', 'VB')]
        >>> novel.get_part_of_speech_tags()[-4:]
        [('the', 'DT'), ('refuse', 'NN'), ('permit', 'NN'), ('.', '.')]

        :rtype: list
        """
        text = nltk.word_tokenize(self.text)
        pos_tags = nltk.pos_tag(text)
        return pos_tags


if __name__ == '__main__':
    from dh_testers.testRunner import main_test

    main_test()
예제 #2
0
                     encoding=target_encoding) as target_file:
        target_file.write(text)


def load_graph_settings(show_grid_lines=True):
    '''
    This function sets the seaborn graph settings to the defaults for our project.
    Defaults to displaying gridlines. To remove gridlines, call with False.
    :return:
    '''
    show_grid_lines_string = str(show_grid_lines)
    palette = "colorblind"
    style_name = "white"
    background_color = (252 / 255, 245 / 255, 233 / 255, 0.4)
    style_list = {
        'axes.edgecolor': '.6',
        'grid.color': '.9',
        'axes.grid': show_grid_lines_string,
        'font.family': 'serif',
        'axes.facecolor': background_color,
        'figure.facecolor': background_color
    }
    sns.set_color_codes(palette)
    sns.set_style(style_name, style_list)


if __name__ == '__main__':
    from dh_testers.testRunner import main_test
    main_test(import_plus_relative=True
              )  # this allows for relative calls in the import.