Esempio n. 1
0
    def predict(self, name):
        '''Recognizes entities contained within the specified filename.

        Args:
            name (string): The name of the file to recognize entities.

        Returns:
            list: The list of tuples containing the entity and associated value.
        '''

        x = preprocessing.prepare_input(name)
        x_out = postprocessing.prepare_output(name)
        y = self.model(x)
        y = [(e.label_, e.start) for e in y.ents]

        # Merge entities
        y_merged = {}
        for (label, start) in y:
            word = x_out.split()[start]

            if label in y_merged:
                y_merged[label] = y_merged[label] + SEP + word
            else:
                y_merged[label] = word

        # Remove leading s and e from season and episode numbers
        if SID in y_merged:
            try:
                y_merged[SID] = int(y_merged[SID].lstrip('sS'))
            except ValueError:
                y_merged[SID] = y_merged[SID].lstrip('sS')

        if EID in y_merged:
            try:
                y_merged[EID] = int(y_merged[EID].lstrip('eE'))
            except ValueError:
                y_merged[EID] = y_merged[EID].lstrip('eE')

        # Title case title and episode names
        if TITLE in y_merged:
            y_merged[TITLE] = titlecase(y_merged[TITLE])

        if EPNAME in y_merged:
            y_merged[EPNAME] = titlecase(y_merged[EPNAME])

        return [(i, y_merged[i]) for i in y_merged]
def test_prepare_output_removes_commas(name, expected):
    assert postprocessing.prepare_output(name) == expected
def test_prepare_output_tv():
    assert postprocessing.prepare_output(
        'Some.TV.Show.S01E01.mp4') == 'Some TV Show S01 E01 mp4'
def test_prepare_output_movie():
    assert postprocessing.prepare_output(
        'Some.Movie.II (2007).1080p[WEB].mkv'
    ) == 'Some Movie II 2007 1080p WEB mkv'
def test_prepare_output_converts_ampersand_to_and(name, expected):
    assert postprocessing.prepare_output(name) == expected
def test_prepare_output_removes_extraneous_spaces(name, expected):
    assert postprocessing.prepare_output(name) == expected
def test_prepare_output_splits_season_episode():
    assert postprocessing.prepare_output('s01e01') == 's01 e01'
def test_prepare_output_retains_punctuation():
    assert postprocessing.prepare_output('\'!@$%?') == '\'!@$%?'
def test_prepare_output_removes_non_word_characters():
    assert postprocessing.prepare_output('\"`~#^*()-_+=[]|;:<>,./{}') == ''
def test_prepare_output_normalizes_word_separators():
    assert postprocessing.prepare_output('a.b_c-d[e]f+g') == 'a b c d e f g'
def test_prepare_output_outputs_retains_case():
    assert postprocessing.prepare_output('AbCd') == 'AbCd'