def test_get_datetime_with_tz_offset_02(self) -> None: """ Test the function get_datetime_with_tz_offset with a date where timezone does not matter. """ # The expected result expected_result = '2020-01-01 09:00:00+00:00' # Call the function date_time = datetime.datetime(2020, 1, 1, 9) actual_result = auxiliary.get_datetime_with_tz_offset(date_time) # Verify the result self.assertEqual(expected_result, str(actual_result))
def test_convert_datetime_to_utc_02(self) -> None: """ Test the function convert_datetime_to_utc with a date where timezone does not matter. """ # The expected result expected_result = datetime.datetime(2020, 1, 1, 9, tzinfo=datetime.timezone.utc) # Call the function date_time = auxiliary.get_datetime_with_tz_offset( datetime.datetime(2020, 1, 1, 9)) actual_result = auxiliary.convert_datetime_to_utc(date_time) # Verify the result self.assertEqual(expected_result, actual_result)
def add_file_data(db_session: sqlalchemy.orm.Session, filename: str, channel_name: str) \ -> Optional[get_file_data.InsertionResult]: """ Add the data, in the file, to the DB. :param db_session: the DB session. :param filename: the path to the file. :param channel_name: the name of the channel (invalid in this case). :return: the InsertionResult. """ wb = openpyxl.load_workbook(filename) insertion_result = get_file_data.InsertionResult() first_event_datetime = None date_time = None today_00_00 = datetime.datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0) # Skip row 1, with the headers for row in wb.active.iter_rows(min_row=2, max_col=15): # Skip rows that contain only the date if row[0].value is None: continue # Skip the rows in which the year is not a number (header rows) if not isinstance(row[4].value, int): continue # Get the data channel_name = row[0].value date = row[1].value time = row[2].value original_title = str(row[3].value) year = int(row[4].value) age_classification = row[5].value genre = row[6].value duration = int(row[7].value) languages = row[8].value countries = row[9].value synopsis = row[10].value directors = row[11].value cast = row[12].value localized_title = str(row[13].value) # episode_title = row[14].value # If the show is not yet defined if 'Programa a Designar' in original_title: continue # Combine the date with the time date_time = date.replace(hour=time.hour, minute=time.minute) # Add the Lisbon timezone info, then convert it to UTC # and then remove the timezone info date_time = auxiliary.convert_datetime_to_utc(auxiliary.get_datetime_with_tz_offset(date_time)) \ .replace(tzinfo=None) # Ignore old sessions if date_time < (today_00_00 - datetime.timedelta(days=configuration.show_sessions_validity_days)): continue # Get the first event's datetime if first_event_datetime is None: first_event_datetime = date_time # Check if it matches the regex of a series series = re.search('(.+) T([0-9]+),[ ]+([0-9]+)', localized_title.strip()) # If it is a series, extract it's season and episode if series: localized_title = series.group(1) is_movie = False season = int(series.group(2)) episode = int(series.group(3)) # episode_synopsis = synopsis synopsis = None # Also get the original title without the season and episode series = re.search('(.+) T([0-9]+),[ ]+([0-9]+)', original_title.strip()) if series: original_title = series.group(1) else: season = None episode = None is_movie = True # Process the titles localized_title, vp, extended_cut = TVCine.process_title(localized_title) audio_language = 'pt' if vp else None original_title, _, _ = TVCine.process_title(original_title) # Sometimes the cast is switched with the director if cast is not None and directors is not None: cast_commas = auxiliary.search_chars(cast, [','])[0] director_commas = auxiliary.search_chars(directors, [','])[0] # When that happens, switch them if len(cast_commas) < len(director_commas): aux = cast cast = directors directors = aux # Process the directors if directors is not None: directors = directors.split(',') # Genre is movie, series, documentary, news... if genre is None or 'Document' not in genre: subgenre = genre # Subgenre is in portuguese genre = 'Movie' if is_movie else 'Series' else: genre = 'Documentary' subgenre = None channel_name = 'TVCine ' + channel_name.strip().split()[1] channel_id = db_calls.get_channel_name(db_session, channel_name).id # Process file entry insertion_result = get_file_data.process_file_entry(db_session, insertion_result, original_title, localized_title, is_movie, genre, date_time, channel_id, year, directors, subgenre, synopsis, season, episode, cast=cast, duration=duration, countries=countries, age_classification=age_classification, audio_languages=languages, session_audio_language=audio_language, extended_cut=extended_cut) if insertion_result is None: return None if insertion_result.total_nb_sessions_in_file != 0: db_calls.commit(db_session) # Delete old sessions for the same time period file_start_datetime = first_event_datetime - datetime.timedelta(minutes=5) file_end_datetime = date_time + datetime.timedelta(minutes=5) nb_deleted_sessions = get_file_data.delete_old_sessions(db_session, file_start_datetime, file_end_datetime, TVCine.channels) # Set the remaining information insertion_result.nb_deleted_sessions = nb_deleted_sessions insertion_result.start_datetime = file_start_datetime insertion_result.end_datetime = file_end_datetime return insertion_result else: return None
def add_file_data(db_session: sqlalchemy.orm.Session, filename: str, channel_name: str) \ -> Optional[get_file_data.InsertionResult]: """ Add the data, in the file, to the DB. :param db_session: the DB session. :param filename: the path to the file. :param channel_name: the name of the channel. :return: the InsertionResult. """ wb = openpyxl.load_workbook(filename) first_event_datetime = None date_time = None insertion_result = get_file_data.InsertionResult() today_00_00 = datetime.datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0) # Skip row 1, with the headers for row in wb.active.iter_rows(min_row=3, max_col=12): # Skip rows that do not contain a date if row[0].value is None: continue # Get the data date = datetime.datetime.strptime(str(row[0].value), '%Y%m%d') time = row[1].value original_title = str(row[2].value) localized_title = str(row[3].value) synopsis = row[4].value year = int(row[5].value) age_classification = row[6].value directors = row[7].value cast = row[8].value subgenre = row[9].value # Obtained in portuguese # Combine the date with the time date_time = date.replace(hour=time.hour, minute=time.minute) # Add the Lisbon timezone info, then convert it to UTC # and then remove the timezone info date_time = auxiliary.convert_datetime_to_utc(auxiliary.get_datetime_with_tz_offset(date_time)) \ .replace(tzinfo=None) # Ignore old sessions if date_time < (today_00_00 - datetime.timedelta( days=configuration.show_sessions_validity_days)): continue # Get the first event's datetime if first_event_datetime is None: first_event_datetime = date_time # Process the titles localized_title, vp, _ = Cinemundo.process_title(localized_title) audio_language = 'pt' if vp else None original_title, _, season = Cinemundo.process_title(original_title) if season is not None: is_movie = False genre = 'Series' else: is_movie = True genre = 'Movie' # Process the directors if directors is not None: directors = re.split(',| e ', directors) # Get the channel's id channel_id = db_calls.get_channel_name(db_session, 'Cinemundo').id # Process an entry insertion_result = get_file_data.process_file_entry( db_session, insertion_result, original_title, localized_title, is_movie, genre, date_time, channel_id, year, directors, subgenre, synopsis, season, None, cast=cast, age_classification=age_classification, audio_languages=audio_language) if insertion_result is None: return None if insertion_result.total_nb_sessions_in_file != 0: db_calls.commit(db_session) # Delete old sessions for the same time period file_start_datetime = first_event_datetime - datetime.timedelta( minutes=5) file_end_datetime = date_time + datetime.timedelta(minutes=5) nb_deleted_sessions = get_file_data.delete_old_sessions( db_session, file_start_datetime, file_end_datetime, Cinemundo.channels) # Set the remaining information insertion_result.nb_deleted_sessions = nb_deleted_sessions insertion_result.start_datetime = file_start_datetime insertion_result.end_datetime = file_end_datetime return insertion_result else: return None
def add_file_data(db_session: sqlalchemy.orm.Session, filename: str, channel_name: str) \ -> Optional[get_file_data.InsertionResult]: """ Add the data, in the file, to the DB. :param db_session: the DB session. :param filename: the path to the file. :param channel_name: the name of the channel. :return: the InsertionResult. """ dom_tree = xml.dom.minidom.parse(filename) collection = dom_tree.documentElement # Get all events events = collection.getElementsByTagName('Event') # If there are no events if len(events) == 0: return None first_event_datetime = None date_time = None insertion_result = get_file_data.InsertionResult() today_00_00 = datetime.datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0) # Process each event for event in events: # --- START DATA GATHERING --- # Get the date and time begin_time = event.getAttribute('beginTime') date_time = datetime.datetime.strptime(begin_time, '%Y%m%d%H%M%S') # Add the Lisbon timezone info, then convert it to UTC # and then remove the timezone info date_time = auxiliary.convert_datetime_to_utc(auxiliary.get_datetime_with_tz_offset(date_time)) \ .replace(tzinfo=None) # Ignore old sessions if date_time < (today_00_00 - datetime.timedelta( days=configuration.show_sessions_validity_days)): continue # Get the first event's datetime if first_event_datetime is None: first_event_datetime = date_time # Get the event's duration in minutes duration = int(int(event.getAttribute('duration')) / 60) # Inside the Event -> EpgProduction epg_production = event.getElementsByTagName('EpgProduction')[0] # Get the genre genre_list = epg_production.getElementsByTagName('Genere') # Check if it is the genre that we are assuming it always is if len(genre_list) > 0 and 'Document' not in genre_list[ 0].firstChild.nodeValue: get_file_data.print_message( 'not a documentary', True, str(event.getAttribute('beginTime'))) # Subgenre is in portuguese subgenre = epg_production.getElementsByTagName( 'Subgenere')[0].firstChild.nodeValue # Age classification age_classification = epg_production.getElementsByTagName( 'ParentalRating')[0].firstChild.nodeValue # Inside the Event -> EpgProduction -> EpgText epg_text = epg_production.getElementsByTagName('EpgText')[0] # Get the localized title, in this case the portuguese one localized_title = epg_text.getElementsByTagName( 'Name')[0].firstChild.nodeValue # Get the localized synopsis, in this case the portuguese one short_description = epg_text.getElementsByTagName( 'ShortDescription') if short_description is not None and short_description[ 0].firstChild is not None: synopsis = short_description[0].firstChild.nodeValue else: synopsis = None # Iterate over the ExtendedInfo elements extended_info_elements = epg_text.getElementsByTagName( 'ExtendedInfo') original_title = None directors = None season = None episode = None year = None countries = None cast = None for extended_info in extended_info_elements: attribute = extended_info.getAttribute('name') if attribute == 'OriginalEventName' and extended_info.firstChild is not None: original_title = extended_info.firstChild.nodeValue elif attribute == 'Year' and extended_info.firstChild is not None: year = int(extended_info.firstChild.nodeValue) # Sometimes the year is 0 if year == 0: year = None elif attribute == 'Director' and extended_info.firstChild is not None: directors = extended_info.firstChild.nodeValue elif attribute == 'Casting' and extended_info.firstChild is not None: cast = extended_info.firstChild.nodeValue elif attribute == 'Nationality' and extended_info.firstChild is not None: countries = extended_info.firstChild.nodeValue elif attribute == 'Cycle' and extended_info.firstChild is not None: season = int(extended_info.firstChild.nodeValue) elif attribute == 'EpisodeNumber' and extended_info.firstChild is not None: episode = int(extended_info.firstChild.nodeValue) # Get the channel's id channel_id = db_calls.get_channel_name(db_session, 'Odisseia').id # Process titles original_title = Odisseia.process_title(original_title) localized_title = Odisseia.process_title(localized_title) # Process the directors if directors is not None: directors = directors.split(',') is_movie = season is None genre = 'Documentary' # --- END DATA GATHERING --- # Process file entry insertion_result = get_file_data.process_file_entry( db_session, insertion_result, original_title, localized_title, is_movie, genre, date_time, channel_id, year, directors, subgenre, synopsis, season, episode, cast=cast, duration=duration, countries=countries, age_classification=age_classification) if insertion_result is None: return None # If there only invalid sessions if first_event_datetime is None: return None db_calls.commit(db_session) # Delete old sessions for the same time period file_start_datetime = first_event_datetime - datetime.timedelta( minutes=5) file_end_datetime = date_time + datetime.timedelta(minutes=5) nb_deleted_sessions = get_file_data.delete_old_sessions( db_session, file_start_datetime, file_end_datetime, Odisseia.channels) # Set the remaining information insertion_result.nb_deleted_sessions = nb_deleted_sessions insertion_result.start_datetime = file_start_datetime insertion_result.end_datetime = file_end_datetime return insertion_result
def add_file_data(db_session: sqlalchemy.orm.Session, filename: str, channel_name: str) \ -> Optional[get_file_data.InsertionResult]: """ Add the data, in the file, to the DB. :param db_session: the DB session. :param filename: the path to the file. :param channel_name: the name of the channel. :return: the InsertionResult. """ # Get the position and format of the fields for this channel fields = GenericXlsx.process_configuration(channel_name) channel_name = GenericXlsx.channels_file[channel_name][0] # If it is invalid if fields is None: return None if '_file_format' in fields: file_format = fields['_file_format'].field_format else: file_format = '.xlsx' if file_format == '.xls': book = xlrd.open_workbook(filename) sheet = book.sheets()[0] rows = sheet.nrows else: book = openpyxl.load_workbook(filename) sheet = book.active rows = sheet.max_row insertion_result = get_file_data.InsertionResult() first_event_datetime = None date_time = None today_00_00 = datetime.datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0) row_skipped = False for rx in range(rows): if file_format == '.xls': row = sheet.row(rx) else: row = sheet[rx + 1] # Skip row 1, with the headers if not row_skipped: row_skipped = True continue # Skip the rows in which the year is not a number (header rows) if row[fields['year'].position].value is None: continue try: year = int(row[fields['year'].position].value) except ValueError: continue # Get the date_time if 'date_time' in fields: date_time = datetime.datetime.strptime( row[fields['date_time'].position].value, fields['date_time'].field_format) else: if file_format == '.xls': date = xlrd.xldate_as_datetime( row[fields['date'].position].value, book.datemode) try: # If the time comes in the time format time = xlrd.xldate_as_datetime( row[fields['time'].position].value, book.datemode) except TypeError: # If the time comes as text time = datetime.datetime.strptime( row[fields['time'].position].value, fields['time'].field_format) else: try: date = datetime.datetime.strptime( row[fields['date'].position].value, fields['date'].field_format) time = datetime.datetime.strptime( row[fields['time'].position].value, fields['time'].field_format) except TypeError: continue # Combine the date with the time date_time = date.replace(hour=time.hour, minute=time.minute) # Add the Lisbon timezone info, then convert it to UTC # and then remove the timezone info date_time = auxiliary.convert_datetime_to_utc(auxiliary.get_datetime_with_tz_offset(date_time)) \ .replace(tzinfo=None) # Ignore old sessions if date_time < (today_00_00 - datetime.timedelta( days=configuration.show_sessions_validity_days)): continue original_title = str(row[fields['original_title'].position].value) localized_title = str( row[fields['localized_title'].position].value) # If it is a placeholder show or temporary program if '_temporary_program' in fields: if str(fields['_temporary_program'].field_format ) in original_title: continue if 'localized_synopsis' in fields: synopsis = str( row[fields['localized_synopsis'].position].value).strip() else: synopsis = None if 'cast' in fields: cast = row[fields['cast'].position].value.strip() if len(cast) == 0: cast = None else: cast = None if 'directors' in fields: directors = row[fields['directors'].position].value # Process the directors if directors is not None: if re.match('^ *$', directors): directors = None else: directors = directors.split(',') # If the name of the directors is actually a placeholder if '_ignore_directors' in fields and directors: if directors[0].strip( ) == fields['_ignore_directors'].field_format: directors = None else: directors = None if 'creators' in fields: creators = row[fields['creators'].position].value # Process the creators if creators is not None: if re.match('^ *$', creators): creators = None else: creators = creators.split(',') else: creators = None if 'countries' in fields: countries = row[fields['countries'].position].value.strip() else: countries = None # Duration if 'duration' in fields: if fields['duration'].field_format == 'seconds': duration = int( int(row[fields['duration'].position].value) / 60) else: if file_format == '.xls': duration = xlrd.xldate_as_datetime( row[fields['duration'].position].value, book.datemode) else: duration = datetime.datetime.strptime( row[fields['duration'].position].value, fields['duration'].field_format) duration = duration.hour * 60 + duration.minute else: duration = None if 'age_classification' in fields: age_classification = str( row[fields['age_classification'].position].value).strip() else: age_classification = None if 'subgenre' in fields: subgenre = row[fields['subgenre'].position].value.strip() else: subgenre = None # Get the first event's datetime if first_event_datetime is None: first_event_datetime = date_time if 'season' not in fields or 'episode' not in fields: season = None episode = None else: try: season = int(row[fields['season'].position].value) except ValueError: try: # There are entries with a season 2.5, which will be converted to 2 season = int( float(row[fields['season'].position].value)) except ValueError: season = None # Some files use 0 as a placeholder if season == 0: season = None episode = None if fields['episode'].field_format == 'int': episode = int(row[fields['episode'].position].value) elif 'title_with_Ep.' in fields['episode'].field_format: series = re.search( r'Ep\. [0-9]+', row[fields['episode'].position].value.strip()) if series is not None: episode = int(series.group(0)[4:]) if season == 0: season = None # Determine whether or not it is a movie is_movie = season is None or episode is None # Make sure the season and episode are None for movies if is_movie: season = None episode = None # Genre is movie, series, documentary, news... genre = 'Movie' if is_movie else 'Series' # Process the title original_title = GenericXlsx.process_title( original_title, fields['original_title'].field_format, is_movie) localized_title = GenericXlsx.process_title( localized_title, fields['localized_title'].field_format, is_movie) channel_id = db_calls.get_channel_name(db_session, channel_name).id # Process file entry insertion_result = get_file_data.process_file_entry( db_session, insertion_result, original_title, localized_title, is_movie, genre, date_time, channel_id, year, directors, subgenre, synopsis, season, episode, cast=cast, duration=duration, countries=countries, age_classification=age_classification, creators=creators) if insertion_result is None: return None if insertion_result.total_nb_sessions_in_file != 0: db_calls.commit(db_session) # Delete old sessions for the same time period file_start_datetime = first_event_datetime - datetime.timedelta( minutes=5) file_end_datetime = date_time + datetime.timedelta(minutes=5) nb_deleted_sessions = get_file_data.delete_old_sessions( db_session, file_start_datetime, file_end_datetime, [channel_name]) # Set the remaining information insertion_result.nb_deleted_sessions = nb_deleted_sessions insertion_result.start_datetime = file_start_datetime insertion_result.end_datetime = file_end_datetime return insertion_result else: return None