def test_search_chars(self) -> None: """ Test the function search_chars. """ # The expected result expected_result = [[19, 50], [4, 10, 24, 37], [16, 20, 27]] # Call the function text = 'Text, with, commas (and), parenthesis, everywhere (!)' actual_result = auxiliary.search_chars(text, ['(', ',', 'a']) # Verify the result self.assertEqual(expected_result, actual_result)
def process_title(title: str) -> [str, bool, bool]: """ Process the title, removing special markers and reformatting the title: - (VP) - for portuguese audio; - (VO) - for original audio; - (extended cut) or (versão alargada) - for extended cut; - some of the words come unordered, thus the unordered list of words. :param title: the title as is in the file. :return: a tuple with the clean title, whether or not it is a session with the portuguese voice and whether or not it is a session with the extended cut. """ # Replace all quotation marks for the same quotation mark title = re.sub('[´`]', '\'', title) search_result = auxiliary.search_chars(title, ['(', ')']) vp = False extended_cut = False # If the number of opening parenthesis is not the same as the closing ones if len(search_result[0]) != len(search_result[1]): return title.strip(), vp # From the last position of the parenthesis for i in range(len(search_result[0]) - 1, -1, -1): start_pos = search_result[0][i] end_pos = search_result[1][i] text = title[start_pos + 1:end_pos] if text == 'VP': vp = True elif text == 'VO': vp = False elif text == 'versão alargada' or text == 'extended cut': extended_cut = True elif re.search(r'[0-9]{4}', text.strip()): pass else: continue # Remove the parenthesis and its context title = title[:search_result[0][i]] + title[search_result[1][i] + 1:] return TVCine.fix_title_order(title).strip(), vp, extended_cut
def process_title(title: str, title_format: str, is_movie: bool) -> str: """ Process the title, removing the year. :param title: the title as is in the file. :param title_format: the format of the title. :param is_movie: whether or not this entry is a movie. :return: the title. """ if not is_movie: if 'S_season_at_the_end' in title_format: series = re.search(r'S[0-9]+', title.strip()) if series is not None: title = title[:series.span(0)[0]] elif 'season_at_the_end' in title_format: series = re.search(r'^(.*) [0-9]+$', title.strip()) if series is not None: title = series.group(1) if 'has_year' in title_format: # From the last position of the parenthesis search_result = auxiliary.search_chars(title, ['(', ')']) for i in range(len(search_result[0]) - 1, -1, -1): start_pos = search_result[0][i] end_pos = search_result[1][i] text = title[start_pos + 1:end_pos] # Check if it has an year if re.search(r'[0-9]{4}', text.strip()): pass else: continue # Remove the parenthesis and its context title = title[:search_result[0][i]] + title[ search_result[1][i] + 1:] # Replace all quotation marks for the same quotation mark return re.sub('[´`]', '\'', title.strip())
def process_title(title: str) -> [str, bool, int]: """ Process the title, removing special markers: - VP - for portuguese audio. :param title: the title as is in the file. :return: a tuple with the clean title, whether or not it is a session with the portuguese voice and the season, when applicable. """ # Replace all quotation marks for the same quotation mark title = re.sub('[´`]', '\'', title) # Search for VP in the end of the title vp = title.endswith('VP') if vp: # Remove the VP when it exists title = title[:-3] # Sometimes the title contains multiple titles char_pos = auxiliary.search_chars(title, ['/']) # Get the first title if len(char_pos[0]) > 0: title = title[:char_pos[0][0]] # Check if it is a series series = re.search(r'S[0-9]+', title.strip()) if series is not None: season = int(series.group(0)[1:]) title = title[:series.span(0)[0]] else: season = None return title.strip(), vp, season
def add_file_data(db_session: sqlalchemy.orm.Session, filename: str, channel_name: str) \ -> Optional[get_file_data.InsertionResult]: """ Add the data, in the file, to the DB. :param db_session: the DB session. :param filename: the path to the file. :param channel_name: the name of the channel (invalid in this case). :return: the InsertionResult. """ wb = openpyxl.load_workbook(filename) insertion_result = get_file_data.InsertionResult() first_event_datetime = None date_time = None today_00_00 = datetime.datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0) # Skip row 1, with the headers for row in wb.active.iter_rows(min_row=2, max_col=15): # Skip rows that contain only the date if row[0].value is None: continue # Skip the rows in which the year is not a number (header rows) if not isinstance(row[4].value, int): continue # Get the data channel_name = row[0].value date = row[1].value time = row[2].value original_title = str(row[3].value) year = int(row[4].value) age_classification = row[5].value genre = row[6].value duration = int(row[7].value) languages = row[8].value countries = row[9].value synopsis = row[10].value directors = row[11].value cast = row[12].value localized_title = str(row[13].value) # episode_title = row[14].value # If the show is not yet defined if 'Programa a Designar' in original_title: continue # Combine the date with the time date_time = date.replace(hour=time.hour, minute=time.minute) # Add the Lisbon timezone info, then convert it to UTC # and then remove the timezone info date_time = auxiliary.convert_datetime_to_utc(auxiliary.get_datetime_with_tz_offset(date_time)) \ .replace(tzinfo=None) # Ignore old sessions if date_time < (today_00_00 - datetime.timedelta(days=configuration.show_sessions_validity_days)): continue # Get the first event's datetime if first_event_datetime is None: first_event_datetime = date_time # Check if it matches the regex of a series series = re.search('(.+) T([0-9]+),[ ]+([0-9]+)', localized_title.strip()) # If it is a series, extract it's season and episode if series: localized_title = series.group(1) is_movie = False season = int(series.group(2)) episode = int(series.group(3)) # episode_synopsis = synopsis synopsis = None # Also get the original title without the season and episode series = re.search('(.+) T([0-9]+),[ ]+([0-9]+)', original_title.strip()) if series: original_title = series.group(1) else: season = None episode = None is_movie = True # Process the titles localized_title, vp, extended_cut = TVCine.process_title(localized_title) audio_language = 'pt' if vp else None original_title, _, _ = TVCine.process_title(original_title) # Sometimes the cast is switched with the director if cast is not None and directors is not None: cast_commas = auxiliary.search_chars(cast, [','])[0] director_commas = auxiliary.search_chars(directors, [','])[0] # When that happens, switch them if len(cast_commas) < len(director_commas): aux = cast cast = directors directors = aux # Process the directors if directors is not None: directors = directors.split(',') # Genre is movie, series, documentary, news... if genre is None or 'Document' not in genre: subgenre = genre # Subgenre is in portuguese genre = 'Movie' if is_movie else 'Series' else: genre = 'Documentary' subgenre = None channel_name = 'TVCine ' + channel_name.strip().split()[1] channel_id = db_calls.get_channel_name(db_session, channel_name).id # Process file entry insertion_result = get_file_data.process_file_entry(db_session, insertion_result, original_title, localized_title, is_movie, genre, date_time, channel_id, year, directors, subgenre, synopsis, season, episode, cast=cast, duration=duration, countries=countries, age_classification=age_classification, audio_languages=languages, session_audio_language=audio_language, extended_cut=extended_cut) if insertion_result is None: return None if insertion_result.total_nb_sessions_in_file != 0: db_calls.commit(db_session) # Delete old sessions for the same time period file_start_datetime = first_event_datetime - datetime.timedelta(minutes=5) file_end_datetime = date_time + datetime.timedelta(minutes=5) nb_deleted_sessions = get_file_data.delete_old_sessions(db_session, file_start_datetime, file_end_datetime, TVCine.channels) # Set the remaining information insertion_result.nb_deleted_sessions = nb_deleted_sessions insertion_result.start_datetime = file_start_datetime insertion_result.end_datetime = file_end_datetime return insertion_result else: return None