def test_get_datetime_with_tz_offset_02(self) -> None:
        """ Test the function get_datetime_with_tz_offset with a date where timezone does not matter. """

        # The expected result
        expected_result = '2020-01-01 09:00:00+00:00'

        # Call the function
        date_time = datetime.datetime(2020, 1, 1, 9)
        actual_result = auxiliary.get_datetime_with_tz_offset(date_time)

        # Verify the result
        self.assertEqual(expected_result, str(actual_result))
    def test_convert_datetime_to_utc_02(self) -> None:
        """ Test the function convert_datetime_to_utc with a date where timezone does not matter. """

        # The expected result
        expected_result = datetime.datetime(2020,
                                            1,
                                            1,
                                            9,
                                            tzinfo=datetime.timezone.utc)

        # Call the function
        date_time = auxiliary.get_datetime_with_tz_offset(
            datetime.datetime(2020, 1, 1, 9))
        actual_result = auxiliary.convert_datetime_to_utc(date_time)

        # Verify the result
        self.assertEqual(expected_result, actual_result)
Example #3
0
    def add_file_data(db_session: sqlalchemy.orm.Session, filename: str, channel_name: str) \
            -> Optional[get_file_data.InsertionResult]:
        """
        Add the data, in the file, to the DB.

        :param db_session: the DB session.
        :param filename: the path to the file.
        :param channel_name: the name of the channel (invalid in this case).
        :return: the InsertionResult.
        """

        wb = openpyxl.load_workbook(filename)

        insertion_result = get_file_data.InsertionResult()

        first_event_datetime = None
        date_time = None

        today_00_00 = datetime.datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)

        # Skip row 1, with the headers
        for row in wb.active.iter_rows(min_row=2, max_col=15):
            # Skip rows that contain only the date
            if row[0].value is None:
                continue

            # Skip the rows in which the year is not a number (header rows)
            if not isinstance(row[4].value, int):
                continue

            # Get the data
            channel_name = row[0].value
            date = row[1].value
            time = row[2].value
            original_title = str(row[3].value)
            year = int(row[4].value)
            age_classification = row[5].value
            genre = row[6].value
            duration = int(row[7].value)
            languages = row[8].value
            countries = row[9].value
            synopsis = row[10].value
            directors = row[11].value
            cast = row[12].value
            localized_title = str(row[13].value)
            # episode_title = row[14].value

            # If the show is not yet defined
            if 'Programa a Designar' in original_title:
                continue

            # Combine the date with the time
            date_time = date.replace(hour=time.hour, minute=time.minute)

            # Add the Lisbon timezone info, then convert it to UTC
            # and then remove the timezone info
            date_time = auxiliary.convert_datetime_to_utc(auxiliary.get_datetime_with_tz_offset(date_time)) \
                .replace(tzinfo=None)

            # Ignore old sessions
            if date_time < (today_00_00 - datetime.timedelta(days=configuration.show_sessions_validity_days)):
                continue

            # Get the first event's datetime
            if first_event_datetime is None:
                first_event_datetime = date_time

            # Check if it matches the regex of a series
            series = re.search('(.+) T([0-9]+),[ ]+([0-9]+)', localized_title.strip())

            # If it is a series, extract it's season and episode
            if series:
                localized_title = series.group(1)
                is_movie = False

                season = int(series.group(2))
                episode = int(series.group(3))

                # episode_synopsis = synopsis
                synopsis = None

                # Also get the original title without the season and episode
                series = re.search('(.+) T([0-9]+),[ ]+([0-9]+)', original_title.strip())

                if series:
                    original_title = series.group(1)
            else:
                season = None
                episode = None

                is_movie = True

            # Process the titles
            localized_title, vp, extended_cut = TVCine.process_title(localized_title)
            audio_language = 'pt' if vp else None

            original_title, _, _ = TVCine.process_title(original_title)

            # Sometimes the cast is switched with the director
            if cast is not None and directors is not None:
                cast_commas = auxiliary.search_chars(cast, [','])[0]
                director_commas = auxiliary.search_chars(directors, [','])[0]

                # When that happens, switch them
                if len(cast_commas) < len(director_commas):
                    aux = cast
                    cast = directors
                    directors = aux

            # Process the directors
            if directors is not None:
                directors = directors.split(',')

            # Genre is movie, series, documentary, news...
            if genre is None or 'Document' not in genre:
                subgenre = genre  # Subgenre is in portuguese
                genre = 'Movie' if is_movie else 'Series'
            else:
                genre = 'Documentary'
                subgenre = None

            channel_name = 'TVCine ' + channel_name.strip().split()[1]
            channel_id = db_calls.get_channel_name(db_session, channel_name).id

            # Process file entry
            insertion_result = get_file_data.process_file_entry(db_session, insertion_result, original_title,
                                                                localized_title,
                                                                is_movie, genre, date_time, channel_id, year, directors,
                                                                subgenre,
                                                                synopsis, season, episode, cast=cast, duration=duration,
                                                                countries=countries,
                                                                age_classification=age_classification,
                                                                audio_languages=languages,
                                                                session_audio_language=audio_language,
                                                                extended_cut=extended_cut)

            if insertion_result is None:
                return None

        if insertion_result.total_nb_sessions_in_file != 0:
            db_calls.commit(db_session)

            # Delete old sessions for the same time period
            file_start_datetime = first_event_datetime - datetime.timedelta(minutes=5)
            file_end_datetime = date_time + datetime.timedelta(minutes=5)

            nb_deleted_sessions = get_file_data.delete_old_sessions(db_session, file_start_datetime, file_end_datetime,
                                                                    TVCine.channels)

            # Set the remaining information
            insertion_result.nb_deleted_sessions = nb_deleted_sessions
            insertion_result.start_datetime = file_start_datetime
            insertion_result.end_datetime = file_end_datetime

            return insertion_result
        else:
            return None
Example #4
0
    def add_file_data(db_session: sqlalchemy.orm.Session, filename: str, channel_name: str) \
            -> Optional[get_file_data.InsertionResult]:
        """
        Add the data, in the file, to the DB.

        :param db_session: the DB session.
        :param filename: the path to the file.
        :param channel_name: the name of the channel.
        :return: the InsertionResult.
        """

        wb = openpyxl.load_workbook(filename)

        first_event_datetime = None
        date_time = None

        insertion_result = get_file_data.InsertionResult()

        today_00_00 = datetime.datetime.utcnow().replace(hour=0,
                                                         minute=0,
                                                         second=0,
                                                         microsecond=0)

        # Skip row 1, with the headers
        for row in wb.active.iter_rows(min_row=3, max_col=12):
            # Skip rows that do not contain a date
            if row[0].value is None:
                continue

            # Get the data
            date = datetime.datetime.strptime(str(row[0].value), '%Y%m%d')
            time = row[1].value
            original_title = str(row[2].value)
            localized_title = str(row[3].value)
            synopsis = row[4].value
            year = int(row[5].value)
            age_classification = row[6].value
            directors = row[7].value
            cast = row[8].value
            subgenre = row[9].value  # Obtained in portuguese

            # Combine the date with the time
            date_time = date.replace(hour=time.hour, minute=time.minute)

            # Add the Lisbon timezone info, then convert it to UTC
            # and then remove the timezone info
            date_time = auxiliary.convert_datetime_to_utc(auxiliary.get_datetime_with_tz_offset(date_time)) \
                .replace(tzinfo=None)

            # Ignore old sessions
            if date_time < (today_00_00 - datetime.timedelta(
                    days=configuration.show_sessions_validity_days)):
                continue

            # Get the first event's datetime
            if first_event_datetime is None:
                first_event_datetime = date_time

            # Process the titles
            localized_title, vp, _ = Cinemundo.process_title(localized_title)
            audio_language = 'pt' if vp else None

            original_title, _, season = Cinemundo.process_title(original_title)

            if season is not None:
                is_movie = False
                genre = 'Series'
            else:
                is_movie = True
                genre = 'Movie'

            # Process the directors
            if directors is not None:
                directors = re.split(',| e ', directors)

            # Get the channel's id
            channel_id = db_calls.get_channel_name(db_session, 'Cinemundo').id

            # Process an entry
            insertion_result = get_file_data.process_file_entry(
                db_session,
                insertion_result,
                original_title,
                localized_title,
                is_movie,
                genre,
                date_time,
                channel_id,
                year,
                directors,
                subgenre,
                synopsis,
                season,
                None,
                cast=cast,
                age_classification=age_classification,
                audio_languages=audio_language)

            if insertion_result is None:
                return None

        if insertion_result.total_nb_sessions_in_file != 0:
            db_calls.commit(db_session)

            # Delete old sessions for the same time period
            file_start_datetime = first_event_datetime - datetime.timedelta(
                minutes=5)
            file_end_datetime = date_time + datetime.timedelta(minutes=5)

            nb_deleted_sessions = get_file_data.delete_old_sessions(
                db_session, file_start_datetime, file_end_datetime,
                Cinemundo.channels)

            # Set the remaining information
            insertion_result.nb_deleted_sessions = nb_deleted_sessions
            insertion_result.start_datetime = file_start_datetime
            insertion_result.end_datetime = file_end_datetime

            return insertion_result
        else:
            return None
Example #5
0
    def add_file_data(db_session: sqlalchemy.orm.Session, filename: str, channel_name: str) \
            -> Optional[get_file_data.InsertionResult]:
        """
        Add the data, in the file, to the DB.

        :param db_session: the DB session.
        :param filename: the path to the file.
        :param channel_name: the name of the channel.
        :return: the InsertionResult.
        """

        dom_tree = xml.dom.minidom.parse(filename)
        collection = dom_tree.documentElement

        # Get all events
        events = collection.getElementsByTagName('Event')

        # If there are no events
        if len(events) == 0:
            return None

        first_event_datetime = None
        date_time = None

        insertion_result = get_file_data.InsertionResult()

        today_00_00 = datetime.datetime.utcnow().replace(hour=0,
                                                         minute=0,
                                                         second=0,
                                                         microsecond=0)

        # Process each event
        for event in events:
            # --- START DATA GATHERING ---
            # Get the date and time
            begin_time = event.getAttribute('beginTime')
            date_time = datetime.datetime.strptime(begin_time, '%Y%m%d%H%M%S')

            # Add the Lisbon timezone info, then convert it to UTC
            # and then remove the timezone info
            date_time = auxiliary.convert_datetime_to_utc(auxiliary.get_datetime_with_tz_offset(date_time)) \
                .replace(tzinfo=None)

            # Ignore old sessions
            if date_time < (today_00_00 - datetime.timedelta(
                    days=configuration.show_sessions_validity_days)):
                continue

            # Get the first event's datetime
            if first_event_datetime is None:
                first_event_datetime = date_time

            # Get the event's duration in minutes
            duration = int(int(event.getAttribute('duration')) / 60)

            # Inside the Event -> EpgProduction
            epg_production = event.getElementsByTagName('EpgProduction')[0]

            # Get the genre
            genre_list = epg_production.getElementsByTagName('Genere')

            # Check if it is the genre that we are assuming it always is
            if len(genre_list) > 0 and 'Document' not in genre_list[
                    0].firstChild.nodeValue:
                get_file_data.print_message(
                    'not a documentary', True,
                    str(event.getAttribute('beginTime')))

            # Subgenre is in portuguese
            subgenre = epg_production.getElementsByTagName(
                'Subgenere')[0].firstChild.nodeValue

            # Age classification
            age_classification = epg_production.getElementsByTagName(
                'ParentalRating')[0].firstChild.nodeValue

            # Inside the Event -> EpgProduction -> EpgText
            epg_text = epg_production.getElementsByTagName('EpgText')[0]

            # Get the localized title, in this case the portuguese one
            localized_title = epg_text.getElementsByTagName(
                'Name')[0].firstChild.nodeValue

            # Get the localized synopsis, in this case the portuguese one
            short_description = epg_text.getElementsByTagName(
                'ShortDescription')

            if short_description is not None and short_description[
                    0].firstChild is not None:
                synopsis = short_description[0].firstChild.nodeValue
            else:
                synopsis = None

            # Iterate over the ExtendedInfo elements
            extended_info_elements = epg_text.getElementsByTagName(
                'ExtendedInfo')

            original_title = None
            directors = None
            season = None
            episode = None
            year = None
            countries = None
            cast = None

            for extended_info in extended_info_elements:
                attribute = extended_info.getAttribute('name')

                if attribute == 'OriginalEventName' and extended_info.firstChild is not None:
                    original_title = extended_info.firstChild.nodeValue
                elif attribute == 'Year' and extended_info.firstChild is not None:
                    year = int(extended_info.firstChild.nodeValue)

                    # Sometimes the year is 0
                    if year == 0:
                        year = None
                elif attribute == 'Director' and extended_info.firstChild is not None:
                    directors = extended_info.firstChild.nodeValue
                elif attribute == 'Casting' and extended_info.firstChild is not None:
                    cast = extended_info.firstChild.nodeValue
                elif attribute == 'Nationality' and extended_info.firstChild is not None:
                    countries = extended_info.firstChild.nodeValue
                elif attribute == 'Cycle' and extended_info.firstChild is not None:
                    season = int(extended_info.firstChild.nodeValue)
                elif attribute == 'EpisodeNumber' and extended_info.firstChild is not None:
                    episode = int(extended_info.firstChild.nodeValue)

            # Get the channel's id
            channel_id = db_calls.get_channel_name(db_session, 'Odisseia').id

            # Process titles
            original_title = Odisseia.process_title(original_title)
            localized_title = Odisseia.process_title(localized_title)

            # Process the directors
            if directors is not None:
                directors = directors.split(',')

            is_movie = season is None
            genre = 'Documentary'

            # --- END DATA GATHERING ---

            # Process file entry
            insertion_result = get_file_data.process_file_entry(
                db_session,
                insertion_result,
                original_title,
                localized_title,
                is_movie,
                genre,
                date_time,
                channel_id,
                year,
                directors,
                subgenre,
                synopsis,
                season,
                episode,
                cast=cast,
                duration=duration,
                countries=countries,
                age_classification=age_classification)

            if insertion_result is None:
                return None

        # If there only invalid sessions
        if first_event_datetime is None:
            return None

        db_calls.commit(db_session)

        # Delete old sessions for the same time period
        file_start_datetime = first_event_datetime - datetime.timedelta(
            minutes=5)
        file_end_datetime = date_time + datetime.timedelta(minutes=5)

        nb_deleted_sessions = get_file_data.delete_old_sessions(
            db_session, file_start_datetime, file_end_datetime,
            Odisseia.channels)

        # Set the remaining information
        insertion_result.nb_deleted_sessions = nb_deleted_sessions
        insertion_result.start_datetime = file_start_datetime
        insertion_result.end_datetime = file_end_datetime

        return insertion_result
    def add_file_data(db_session: sqlalchemy.orm.Session, filename: str, channel_name: str) \
            -> Optional[get_file_data.InsertionResult]:
        """
        Add the data, in the file, to the DB.

        :param db_session: the DB session.
        :param filename: the path to the file.
        :param channel_name: the name of the channel.
        :return: the InsertionResult.
        """

        # Get the position and format of the fields for this channel
        fields = GenericXlsx.process_configuration(channel_name)
        channel_name = GenericXlsx.channels_file[channel_name][0]

        # If it is invalid
        if fields is None:
            return None

        if '_file_format' in fields:
            file_format = fields['_file_format'].field_format
        else:
            file_format = '.xlsx'

        if file_format == '.xls':
            book = xlrd.open_workbook(filename)
            sheet = book.sheets()[0]
            rows = sheet.nrows
        else:
            book = openpyxl.load_workbook(filename)
            sheet = book.active
            rows = sheet.max_row

        insertion_result = get_file_data.InsertionResult()

        first_event_datetime = None
        date_time = None

        today_00_00 = datetime.datetime.utcnow().replace(hour=0,
                                                         minute=0,
                                                         second=0,
                                                         microsecond=0)

        row_skipped = False

        for rx in range(rows):
            if file_format == '.xls':
                row = sheet.row(rx)
            else:
                row = sheet[rx + 1]

            # Skip row 1, with the headers
            if not row_skipped:
                row_skipped = True
                continue

            # Skip the rows in which the year is not a number (header rows)
            if row[fields['year'].position].value is None:
                continue

            try:
                year = int(row[fields['year'].position].value)
            except ValueError:
                continue

            # Get the date_time
            if 'date_time' in fields:
                date_time = datetime.datetime.strptime(
                    row[fields['date_time'].position].value,
                    fields['date_time'].field_format)
            else:
                if file_format == '.xls':
                    date = xlrd.xldate_as_datetime(
                        row[fields['date'].position].value, book.datemode)

                    try:
                        #  If the time comes in the time format
                        time = xlrd.xldate_as_datetime(
                            row[fields['time'].position].value, book.datemode)
                    except TypeError:
                        # If the time comes as text
                        time = datetime.datetime.strptime(
                            row[fields['time'].position].value,
                            fields['time'].field_format)
                else:
                    try:
                        date = datetime.datetime.strptime(
                            row[fields['date'].position].value,
                            fields['date'].field_format)
                        time = datetime.datetime.strptime(
                            row[fields['time'].position].value,
                            fields['time'].field_format)
                    except TypeError:
                        continue

                # Combine the date with the time
                date_time = date.replace(hour=time.hour, minute=time.minute)

            # Add the Lisbon timezone info, then convert it to UTC
            # and then remove the timezone info
            date_time = auxiliary.convert_datetime_to_utc(auxiliary.get_datetime_with_tz_offset(date_time)) \
                .replace(tzinfo=None)

            # Ignore old sessions
            if date_time < (today_00_00 - datetime.timedelta(
                    days=configuration.show_sessions_validity_days)):
                continue

            original_title = str(row[fields['original_title'].position].value)
            localized_title = str(
                row[fields['localized_title'].position].value)

            # If it is a placeholder show or temporary program
            if '_temporary_program' in fields:
                if str(fields['_temporary_program'].field_format
                       ) in original_title:
                    continue

            if 'localized_synopsis' in fields:
                synopsis = str(
                    row[fields['localized_synopsis'].position].value).strip()
            else:
                synopsis = None

            if 'cast' in fields:
                cast = row[fields['cast'].position].value.strip()

                if len(cast) == 0:
                    cast = None
            else:
                cast = None

            if 'directors' in fields:
                directors = row[fields['directors'].position].value

                # Process the directors
                if directors is not None:
                    if re.match('^ *$', directors):
                        directors = None
                    else:
                        directors = directors.split(',')

                # If the name of the directors is actually a placeholder
                if '_ignore_directors' in fields and directors:
                    if directors[0].strip(
                    ) == fields['_ignore_directors'].field_format:
                        directors = None
            else:
                directors = None

            if 'creators' in fields:
                creators = row[fields['creators'].position].value

                # Process the creators
                if creators is not None:
                    if re.match('^ *$', creators):
                        creators = None
                    else:
                        creators = creators.split(',')
            else:
                creators = None

            if 'countries' in fields:
                countries = row[fields['countries'].position].value.strip()
            else:
                countries = None

            # Duration
            if 'duration' in fields:
                if fields['duration'].field_format == 'seconds':
                    duration = int(
                        int(row[fields['duration'].position].value) / 60)
                else:
                    if file_format == '.xls':
                        duration = xlrd.xldate_as_datetime(
                            row[fields['duration'].position].value,
                            book.datemode)
                    else:
                        duration = datetime.datetime.strptime(
                            row[fields['duration'].position].value,
                            fields['duration'].field_format)

                    duration = duration.hour * 60 + duration.minute
            else:
                duration = None

            if 'age_classification' in fields:
                age_classification = str(
                    row[fields['age_classification'].position].value).strip()
            else:
                age_classification = None

            if 'subgenre' in fields:
                subgenre = row[fields['subgenre'].position].value.strip()
            else:
                subgenre = None

            # Get the first event's datetime
            if first_event_datetime is None:
                first_event_datetime = date_time

            if 'season' not in fields or 'episode' not in fields:
                season = None
                episode = None
            else:
                try:
                    season = int(row[fields['season'].position].value)
                except ValueError:
                    try:
                        # There are entries with a season 2.5, which will be converted to 2
                        season = int(
                            float(row[fields['season'].position].value))
                    except ValueError:
                        season = None

                # Some files use 0 as a placeholder
                if season == 0:
                    season = None

                episode = None

                if fields['episode'].field_format == 'int':
                    episode = int(row[fields['episode'].position].value)
                elif 'title_with_Ep.' in fields['episode'].field_format:
                    series = re.search(
                        r'Ep\. [0-9]+',
                        row[fields['episode'].position].value.strip())

                    if series is not None:
                        episode = int(series.group(0)[4:])

                if season == 0:
                    season = None

            # Determine whether or not it is a movie
            is_movie = season is None or episode is None

            # Make sure the season and episode are None for movies
            if is_movie:
                season = None
                episode = None

            # Genre is movie, series, documentary, news...
            genre = 'Movie' if is_movie else 'Series'

            # Process the title
            original_title = GenericXlsx.process_title(
                original_title, fields['original_title'].field_format,
                is_movie)
            localized_title = GenericXlsx.process_title(
                localized_title, fields['localized_title'].field_format,
                is_movie)

            channel_id = db_calls.get_channel_name(db_session, channel_name).id

            # Process file entry
            insertion_result = get_file_data.process_file_entry(
                db_session,
                insertion_result,
                original_title,
                localized_title,
                is_movie,
                genre,
                date_time,
                channel_id,
                year,
                directors,
                subgenre,
                synopsis,
                season,
                episode,
                cast=cast,
                duration=duration,
                countries=countries,
                age_classification=age_classification,
                creators=creators)

            if insertion_result is None:
                return None

        if insertion_result.total_nb_sessions_in_file != 0:
            db_calls.commit(db_session)

            # Delete old sessions for the same time period
            file_start_datetime = first_event_datetime - datetime.timedelta(
                minutes=5)
            file_end_datetime = date_time + datetime.timedelta(minutes=5)

            nb_deleted_sessions = get_file_data.delete_old_sessions(
                db_session, file_start_datetime, file_end_datetime,
                [channel_name])

            # Set the remaining information
            insertion_result.nb_deleted_sessions = nb_deleted_sessions
            insertion_result.start_datetime = file_start_datetime
            insertion_result.end_datetime = file_end_datetime

            return insertion_result
        else:
            return None