def predict(self, name): '''Recognizes entities contained within the specified filename. Args: name (string): The name of the file to recognize entities. Returns: list: The list of tuples containing the entity and associated value. ''' x = preprocessing.prepare_input(name) x_out = postprocessing.prepare_output(name) y = self.model(x) y = [(e.label_, e.start) for e in y.ents] # Merge entities y_merged = {} for (label, start) in y: word = x_out.split()[start] if label in y_merged: y_merged[label] = y_merged[label] + SEP + word else: y_merged[label] = word # Remove leading s and e from season and episode numbers if SID in y_merged: try: y_merged[SID] = int(y_merged[SID].lstrip('sS')) except ValueError: y_merged[SID] = y_merged[SID].lstrip('sS') if EID in y_merged: try: y_merged[EID] = int(y_merged[EID].lstrip('eE')) except ValueError: y_merged[EID] = y_merged[EID].lstrip('eE') # Title case title and episode names if TITLE in y_merged: y_merged[TITLE] = titlecase(y_merged[TITLE]) if EPNAME in y_merged: y_merged[EPNAME] = titlecase(y_merged[EPNAME]) return [(i, y_merged[i]) for i in y_merged]
def test_prepare_output_removes_commas(name, expected): assert postprocessing.prepare_output(name) == expected
def test_prepare_output_tv(): assert postprocessing.prepare_output( 'Some.TV.Show.S01E01.mp4') == 'Some TV Show S01 E01 mp4'
def test_prepare_output_movie(): assert postprocessing.prepare_output( 'Some.Movie.II (2007).1080p[WEB].mkv' ) == 'Some Movie II 2007 1080p WEB mkv'
def test_prepare_output_converts_ampersand_to_and(name, expected): assert postprocessing.prepare_output(name) == expected
def test_prepare_output_removes_extraneous_spaces(name, expected): assert postprocessing.prepare_output(name) == expected
def test_prepare_output_splits_season_episode(): assert postprocessing.prepare_output('s01e01') == 's01 e01'
def test_prepare_output_retains_punctuation(): assert postprocessing.prepare_output('\'!@$%?') == '\'!@$%?'
def test_prepare_output_removes_non_word_characters(): assert postprocessing.prepare_output('\"`~#^*()-_+=[]|;:<>,./{}') == ''
def test_prepare_output_normalizes_word_separators(): assert postprocessing.prepare_output('a.b_c-d[e]f+g') == 'a b c d e f g'
def test_prepare_output_outputs_retains_case(): assert postprocessing.prepare_output('AbCd') == 'AbCd'