Esempio n. 1
0
def update_channel(channel=None, link: str = None):
    """
    Connect to the Channel's host website and pull a catalog of all videos.  Insert any new videos into the DB.

    It is expected that any missing videos will be downloaded later.
    """
    with get_db_context() as (engine, session):
        if not channel:
            channel = session.query(Channel).filter_by(link=link).one()

    logger.info(f'Downloading video list for {channel.name} at {channel.url}  This may take several minutes.')
    info = YDL.extract_info(channel.url, download=False, process=False)
    if 'url' in info:
        url = info['url']
        info = YDL.extract_info(url, download=False, process=False)

    # Resolve all entries to dictionaries.
    entries = info['entries'] = list(info['entries'])

    # Youtube-DL may hand back a list of URLs, lets use the "Uploads" URL, if available.
    try:
        entries[0]['id']
    except Exception:
        for entry in entries:
            if entry['title'] == 'Uploads':
                logger.info('Youtube-DL gave back a list of URLs, found the "Uploads" URL and using it.')
                info = YDL.extract_info(entry['url'], download=False, process=False)
                entries = info['entries'] = list(info['entries'])
                break

    # This is all the source id's that are currently available.
    try:
        all_source_ids = {i['id'] for i in entries}
    except KeyError as e:
        logger.warning(f'No ids for entries!  Was the channel update successful?  Is the channel URL correct?')
        logger.warning(f'entries: {entries}')
        raise KeyError('No id key for entry!') from e

    with get_db_context(commit=True) as (engine, session):
        # Get the channel in this new context.
        channel = session.query(Channel).filter_by(id=channel.id).one()

        download_frequency = channel.download_frequency

        channel.info_json = info
        channel.info_date = datetime.now()
        channel.next_download = today() + timedelta(seconds=download_frequency)

        with get_db_curs() as curs:
            # Insert any new videos.
            query = 'SELECT source_id FROM video WHERE channel_id=%s AND source_id IS NOT NULL'
            curs.execute(query, (channel.id,))
            known_source_ids = {i[0] for i in curs.fetchall()}

        new_source_ids = all_source_ids.difference(known_source_ids)

        logger.info(f'Got {len(new_source_ids)} new videos for channel {channel.name}')
        channel_id = channel.id
        for source_id in new_source_ids:
            session.add(Video(source_id=source_id, channel_id=channel_id))
Esempio n. 2
0
def download_all_missing_videos(reporter: ProgressReporter, link: str = None):
    """Find any videos identified by the info packet that haven't yet been downloaded, download them."""
    missing_videos = list(find_all_missing_videos(link))
    reporter.set_progress_total(1, len(missing_videos))
    reporter.message(1, f'Found {len(missing_videos)} missing videos.')

    for idx, (channel, id_, missing_video) in enumerate(missing_videos):
        reporter.send_progress(1, idx, f'Downloading {channel["name"]}: {missing_video["title"]}')
        try:
            video_path = download_video(channel, missing_video)
        except Exception as e:
            logger.warning(f'Failed to download "{missing_video["title"]}" with exception: {e}')
            if _skip_download(e):
                # The video failed to download, and the error will never be fixed.  Skip it forever.
                source_id = missing_video.get('id')
                logger.warning(f'Adding video "{source_id}" to skip list for this channel.  WROLPi will not '
                               f'attempt to download it again.')

                with get_db_context(commit=True) as (engine, session):
                    channel = db['channel'].get_one(id=channel['id'])
                    add_video_to_skip_list(channel, {'source_id': source_id})

            reporter.error(1, f'Failed to download "{missing_video["title"]}", see server logs...')
            continue
        with get_db_context(commit=True) as (engine, session):
            upsert_video(db, video_path, channel, id_=id_)

    reporter.finish(1, 'All videos are downloaded')
Esempio n. 3
0
def test_update_channels(tempdir):
    """
    Channels are only updated if their "next_download" has expired, or if they have never been updated.
    """
    q = Queue()
    reporter = ProgressReporter(q, 2)

    with get_db_context(commit=True) as (engine, session):
        channel1, channel2 = session.query(Channel).order_by(Channel.id).all()
        channel1.url = channel2.url = 'some url'

    with get_db_context() as (engine, session):
        channel1, channel2 = session.query(Channel).order_by(Channel.id).all()
        with mock.patch(
                'api.videos.downloader.update_channel') as update_channel:
            update_channel: MagicMock
            update_channels(reporter)

            # Both channels would have been updated.
            assert update_channel.call_count == 2
            update_channel.assert_any_call(channel1)
            update_channel.assert_any_call(channel2)

    with get_db_context() as (engine, session):
        channel1, channel2 = session.query(Channel).order_by(Channel.id).all()
        channel1.next_download = today() + timedelta(days=1)
        session.commit()

        with mock.patch(
                'api.videos.downloader.update_channel') as update_channel:
            update_channel: MagicMock
            update_channels(reporter)

            # Channel1 is not ready for an update
            update_channel.assert_called_once()
            update_channel.assert_called_with(channel2)

        channel1.next_download = today() + timedelta(days=1)
        channel2.next_download = today() + timedelta(days=1)
        session.commit()

        with mock.patch(
                'api.videos.downloader.update_channel') as update_channel:
            update_channel: MagicMock
            update_channels(reporter)

            # No channels needed to be updated
            assert update_channel.call_count == 0
Esempio n. 4
0
    def prepare() -> None:
        init(force=True)

        with get_db_context(commit=True) as (engine, session):
            for item in TEST_ITEMS:
                item = Item(**item)
                session.add(item)
Esempio n. 5
0
    def test_inventories_file(self):
        self.prepare()

        with tempfile.NamedTemporaryFile() as tf:
            # Can't import an empty file.
            self.assertRaises(ValueError, import_inventories_file, tf.name)

            save_inventories_file(tf.name)

            # Clear out the DB so the import will be tested
            with get_db_context(commit=True) as (engine, session):
                session.query(Item).delete()
                session.query(Inventory).delete()

            import_inventories_file(tf.name)

        inventories = get_inventories()
        self.assertEqual(len(inventories), 1)
        # ID has increased because we did not reset the sequence when deleting from the table.
        self.assertEqual(inventories[0].id, 2)
        self.assertEqual(inventories[0].name, 'Food Storage')

        # All items in the DB match those in the test list, except for the "deleted" item.
        non_deleted_items = [
            i for i in inventories[0].items if i.deleted_at is None
        ]
        self.assertEqual(len(non_deleted_items), len(TEST_ITEMS) - 1)
        test_items = {(i['name'], i['brand'], i['count'])
                      for i in TEST_ITEMS[:-1]}
        db_items = {(i.name, i.brand, i.count) for i in non_deleted_items}
        self.assertEqual(test_items, db_items)
Esempio n. 6
0
def import_inventories_file(path: str = None):
    path: Path = Path(path) if path else DEFAULT_SAVE_PATH

    with open(path, 'rt') as fh:
        contents = yaml.load(fh, Loader=yaml.Loader)

    if not contents or 'inventories' not in contents:
        raise ValueError(
            'Inventories file does not contain the expected "inventories" list.'
        )

    inventories = get_inventories()
    inventories_names = {i.name for i in inventories}
    new_inventories = [
        i for i in contents['inventories']
        if i['name'] not in inventories_names
    ]
    with get_db_context(commit=True) as (engine, session):
        for inventory in new_inventories:
            items = inventory['items']
            inventory = Inventory(
                name=inventory['name'],
                created_at=inventory['created_at'],
                deleted_at=inventory['deleted_at'],
            )
            session.add(inventory)
            # Get the Inventory from the DB so we can use it's ID.
            session.flush()
            session.refresh(inventory)

            for item in items:
                del item['inventory_id']
                item = Item(inventory_id=inventory.id, **item)
                item.inventory_id = inventory.id
                session.add(item)
Esempio n. 7
0
def test_update_channel(tempdir):
    with get_db_context(commit=True) as (engine, session):
        channel = session.query(Channel).one()
        channel.download_frequency = DEFAULT_DOWNLOAD_FREQUENCY
        assert channel.next_download is None

    with mock.patch('api.videos.downloader.YDL') as YDL:
        YDL.extract_info.return_value = {
            'entries': [],
        }
        update_channel(channel)

        with get_db_context() as (engine, session):
            channel = session.query(Channel).one()
            # After and update, the next_download should be incremented by the download_frequency.
            assert channel.next_download > today()
Esempio n. 8
0
        def wrapped(*args, **kwargs):
            # Dummy queue and reporter to receive messages.
            q = Queue()
            reporter = ProgressReporter(q, 2)

            # Convert the channel/video structure to a file structure for the test.
            file_structure = []
            for channel, paths in structure.items():
                for path in paths:
                    file_structure.append(f'{channel}/{path}')
                file_structure.append(f'{channel}/')

            with build_test_directories(file_structure) as tempdir:
                args, kwargs = insert_parameter(func, 'tempdir', tempdir, args,
                                                kwargs)

                with get_db_context(commit=True) as (engine, session):
                    for channel in structure:
                        channel = Channel(directory=str(tempdir / channel),
                                          name=channel,
                                          link=channel)
                        session.add(channel)
                        session.flush()
                        session.refresh(channel)
                        refresh_channel_videos(channel, reporter)

                return func(*args, **kwargs)
Esempio n. 9
0
def create_channel(data):
    with get_db_context() as (engine, session):
        # Verify that the URL/Name/Link aren't taken
        try:
            check_for_channel_conflicts(
                session,
                url=data.get('url'),
                name=data['name'],
                link=sanitize_link(data['name']),
                directory=str(data['directory']),
            )
        except APIError as e:
            raise ValidationError from e

        channel = Channel(
            name=data['name'],
            url=data.get('url'),
            match_regex=data.get('match_regex'),
            link=sanitize_link(data['name']),
            directory=str(data['directory']),
            download_frequency=data.get('download_frequency', DEFAULT_DOWNLOAD_FREQUENCY),
        )
        session.add(channel)
        session.commit()
        session.flush()
        session.refresh(channel)

        # Save these changes to the local.yaml as well
        channels = get_channels_config(session)
        save_settings_config(channels)

        return channel.dict()
Esempio n. 10
0
def get_channel(link) -> dict:
    with get_db_context() as (engine, session):
        try:
            channel = session.query(Channel).filter_by(link=link).one()
        except NoResultFound:
            raise UnknownChannel()
        return channel.dict()
Esempio n. 11
0
    def test_delete_video(self, tempdir: pathlib.Path):
        with get_db_context(commit=True) as (engine, session):
            channel1 = session.query(Channel).filter_by(name='channel1').one()
            channel2 = session.query(Channel).filter_by(name='channel2').one()
            vid1, vid2 = session.query(Video).order_by(Video.video_path).all()

            # No videos have been deleted yet.
            self.assertIsNone(channel1.skip_download_videos)
            self.assertIsNone(channel2.skip_download_videos)
            self.assertTrue((tempdir / 'channel1/vid1.mp4').is_file())

            delete_video(vid1)

            channel1 = session.query(Channel).filter_by(name='channel1').one()
            # Video was added to skip list.
            self.assertEqual(len(channel1.skip_download_videos), 1)
            # Deleting a video leaves it's entry in the DB, but its files are deleted.
            self.assertEqual(session.query(Video).count(), 2)
            self.assertFalse((tempdir / 'channel1/vid1.mp4').is_file())
            self.assertTrue((tempdir / 'channel2/vid2.mp4').is_file())

            delete_video(vid2)

            self.assertEqual(session.query(Video).count(), 2)
            self.assertFalse((tempdir / 'channel1/vid1.mp4').is_file())
            self.assertFalse((tempdir / 'channel2/vid2.mp4').is_file())
Esempio n. 12
0
    def test_video_search(self):
        """
        Test that videos can be searched and that their order is by their textsearch rank.
        """
        # These captions have repeated letters so they will be higher in the ranking
        videos = [
            ('1', 'b b b b e d d'),
            ('2', '2 b b b d'),
            ('3', 'b b'),
            ('4', 'b e e'),
            ('5', ''),
        ]
        with get_db_context(commit=True) as (engine, session):
            for title, caption in videos:
                session.add(
                    Video(title=title, caption=caption, video_path='foo'))

        def do_search(search_str, limit=20):
            d = json.dumps({'search_str': search_str, 'limit': limit})
            _, resp = api_app.test_client.post('/api/videos/search', data=d)
            return resp

        def search_is_as_expected(resp, expected):
            assert resp.status_code == HTTPStatus.OK
            response_ids = [i['id'] for i in resp.json['videos']]
            assert response_ids == expected
            assert resp.json['totals']['videos'] == len(expected)

        # Repeated runs should return the same result
        for _ in range(2):
            # Only videos with a b are returned, ordered by the amount of b's
            response = do_search('b')
            search_is_as_expected(response, [1, 2, 3, 4])

        # Only two captions have e
        response = do_search('e')
        search_is_as_expected(response, [4, 1])

        # Only two captions have d
        response = do_search('d')
        search_is_as_expected(response, [1, 2])

        # 5 can be gotten by it's title
        response = do_search('5')
        search_is_as_expected(response, [5])

        # only video 1 has e and d
        response = do_search('e d')
        search_is_as_expected(response, [1])

        # video 1 and 4 have b and e, but 1 has more
        response = do_search('b e')
        search_is_as_expected(response, [1, 4])

        # Check totals are correct even with a limit
        response = do_search('b', 2)
        assert [i['id'] for i in response.json['videos']] == [1, 2]
        assert response.json['totals']['videos'] == 4
Esempio n. 13
0
    def test_get_channel_videos(self):
        with get_db_context(commit=True) as (engine, session):
            channel1 = Channel(name='Foo', link='foo')
            channel2 = Channel(name='Bar', link='bar')
            session.add(channel1)
            session.add(channel2)
            session.flush()
            session.refresh(channel1)
            session.refresh(channel2)

        # Channels don't have videos yet
        d = dict(channel_link=channel1.link)
        request, response = api_app.test_client.post(f'/api/videos/search',
                                                     data=json.dumps(d))
        assert response.status_code == HTTPStatus.OK
        assert len(response.json['videos']) == 0

        with get_db_context(commit=True) as (engine, session):
            vid1 = Video(title='vid1',
                         channel_id=channel2.id,
                         video_path='foo')
            vid2 = Video(title='vid2',
                         channel_id=channel1.id,
                         video_path='foo')
            session.add(vid1)
            session.add(vid2)

        # Videos are gotten by their respective channels
        request, response = api_app.test_client.post(f'/api/videos/search',
                                                     data=json.dumps(d))
        assert response.status_code == HTTPStatus.OK
        assert len(response.json['videos']) == 1
        assert response.json['totals']['videos'] == 1
        self.assertDictContains(
            response.json['videos'][0],
            dict(id=2, title='vid2', channel_id=channel1.id))

        d = dict(channel_link=channel2.link)
        request, response = api_app.test_client.post(f'/api/videos/search',
                                                     data=json.dumps(d))
        assert response.status_code == HTTPStatus.OK
        assert len(response.json['videos']) == 1
        self.assertDictContains(
            response.json['videos'][0],
            dict(id=1, title='vid1', channel_id=channel2.id))
Esempio n. 14
0
def find_all_missing_videos(link: str = None) -> Tuple[dict, dict]:
    """
    Find all videos that don't have a video file, but are found in the DB (taken from the channel's info_json).

    Yields a Channel Dict object, our Video id, and the "entry" of the video from the channel's info_json['entries'].
    """
    with get_db_context() as (engine, session):
        Channel = db['channel']

        if link:
            channel = Channel.get_one(link=link)
            if not channel:
                raise UnknownChannel(f'No channel with link: {link}')
            if not channel['url']:
                raise ChannelURLEmpty('No URL for this channel')
            channels = [channel, ]
        else:
            channels = Channel.get_where(Channel['info_json'].IsNotNull())

        # Get all channels while in the db context.
        channels = list(channels)

    channels = {i['id']: i for i in channels}

    match_regexen = {i: re.compile(j['match_regex']) for i, j in channels.items() if j['match_regex']}

    # Convert the channel video entries into a form that allows them to be quickly retrieved without searching through
    # the entire entries list.
    channels_entries = {}
    for id_, channel in channels.items():
        channels_entries[id_] = {i['id']: i for i in channel['info_json']['entries']}

    missing_videos = _find_all_missing_videos(link)

    for id_, source_id, channel_id in missing_videos:
        channel = channels[channel_id]

        if channel['skip_download_videos'] and source_id in channel['skip_download_videos']:
            # This video has been marked to skip.
            continue

        try:
            missing_video = channels_entries[channel_id][source_id]
        except KeyError:
            logger.warning(f'Video {id_} / {source_id} is not in {channel["name"]} info_json')
            continue

        match_regex: re.compile = match_regexen.get(channel_id)
        if not match_regex or (match_regex and missing_video['title'] and match_regex.match(missing_video['title'])):
            # No title match regex, or the title matches the regex.
            yield channel, id_, missing_video
Esempio n. 15
0
def delete_channel(link):
    with get_db_context(commit=True) as (engine, session):
        try:
            channel = session.query(Channel).filter_by(link=link).one()
        except NoResultFound:
            raise UnknownChannel()

        # Delete all videos for this channel, then the channel itself.
        session.query(Video).filter_by(channel_id=channel.id).delete()
        session.query(Channel).filter_by(id=channel.id).delete()

        # Save these changes to the local.yaml as well
        channels = get_channels_config(session)
        save_settings_config(channels)
Esempio n. 16
0
    def test_process_captions(self):
        with get_db_context(commit=True) as (engine, session):
            video1 = Video(title='scream', caption_path=str(self.vtt_path1))
            session.add(video1)
            with mock.patch('api.videos.captions.get_absolute_video_caption', lambda *a: self.vtt_path1):
                captions.process_captions(video1)
            video2 = Video(title='bar', caption_path=str(self.vtt_path2))
            session.add(video2)
            with mock.patch('api.videos.captions.get_absolute_video_caption', lambda *a: self.vtt_path2):
                captions.process_captions(video2)

            session.flush()
            session.refresh(video1)
            session.refresh(video2)

            # Get the video from the DB
            video1 = session.query(Video).filter_by(id=video1.id).one()
            self.assertIsNotNone(video1.caption)
            video2 = session.query(Video).filter_by(id=video2.id).one()
            self.assertIsNotNone(video2.caption)

        # Search using the tsvector, "sessions" never actually appears in the text, but "session" does
        with get_db_curs() as curs:
            def select_textsearch(*args):
                curs.execute('SELECT id FROM video WHERE textsearch @@ to_tsquery(%s) ORDER BY id', args)

            select_textsearch('sessions')
            self.assertEqual(curs.fetchall(), [[1, ]])
            # Matches video1.title and video2.caption
            select_textsearch('scream')
            self.assertEqual(curs.fetchall(), [[1, ], [2, ]])
            # Matches video1.title and video2.caption
            select_textsearch('scream | sessions')
            self.assertEqual(curs.fetchall(), [[1, ], [2, ]])
            # Only matches video1.title
            select_textsearch('scream & sessions')
            self.assertEqual(curs.fetchall(), [[1, ]])
            # Matches neither
            select_textsearch('scream & sess')
            self.assertEqual(curs.fetchall(), [])
            # Matches video2.caption
            select_textsearch('yawn | sess')
            self.assertEqual(curs.fetchall(), [[2, ]])
            # Matches video2.caption
            select_textsearch('yawn')
            self.assertEqual(curs.fetchall(), [[2, ]])
            # Matches video2.title
            select_textsearch('bar')
            self.assertEqual(curs.fetchall(), [[2, ]])
Esempio n. 17
0
def update_channel(data, link):
    with get_db_context(commit=True) as (engine, session):
        try:
            channel = session.query(Channel).filter_by(link=link).one()
        except NoResultFound:
            raise UnknownChannel()

        # Only update directory if it was empty
        if data.get('directory') and not channel.directory:
            try:
                data['directory'] = get_relative_to_media_directory(data['directory'])
            except UnknownDirectory:
                if data['mkdir']:
                    make_media_directory(data['directory'])
                    data['directory'] = get_relative_to_media_directory(data['directory'])
                else:
                    raise

        if 'directory' in data:
            data['directory'] = str(data['directory'])

        if 'download_frequency' in data:
            try:
                data['download_frequency'] = int(data['download_frequency'])
            except ValueError:
                raise APIError('Invalid download frequency')

        if data.get('match_regex') in ('None', 'null'):
            data['match_regex'] = None

        # Verify that the URL/Name/Link aren't taken
        check_for_channel_conflicts(
            session,
            id=channel.id,
            url=data.get('url'),
            name=data.get('name'),
            link=data.get('link'),
            directory=data.get('directory'),
        )

        # Apply the changes now that we've OK'd them
        for key, value in data.items():
            setattr(channel, key, value)

        # Save these changes to the local.yaml as well
        channels = get_channels_config(session)
        save_settings_config(channels)

    return channel
Esempio n. 18
0
    def test_func(tempdir):
        assert isinstance(tempdir, Path)
        for path in paths:
            path = (tempdir / path)
            assert path.exists()
            assert path.is_file()

        with get_db_context() as (engine, session):
            for channel_name in _structure:
                channel = session.query(Channel).filter_by(
                    name=channel_name).one()
                assert (tempdir / channel_name).is_dir()
                assert channel
                assert channel.directory == str(tempdir / channel_name)
                assert len(channel.videos) == len(
                    [i for i in _structure[channel_name] if i.endswith('mp4')])
Esempio n. 19
0
def update_channels(reporter: ProgressReporter, link: str = None):
    """Update all information for each channel.  (No downloads performed)"""

    with get_db_context() as (engine, session):
        if session.query(Channel).count() == 0:
            raise UnknownChannel('No channels exist yet')

        if link:
            try:
                channel = session.query(Channel).filter_by(link=link).one()
            except NoResultFound:
                raise UnknownChannel(f'No channel with link: {link}')
            channels = [channel, ]
        else:
            channels = session.query(Channel).filter(
                Channel.url != None,  # noqa
                Channel.url != '',
                or_(
                    Channel.next_download == None,  # noqa
                    Channel.next_download <= today(),
                )
            ).all()

        if len(channels) == 0:
            logger.warning(f'All channels are up to date')

    reporter.set_progress_total(0, len(channels))
    reporter.send_progress(0, 0, f'{len(channels)} channels scheduled for update')

    # Randomize downloading of channels.
    shuffle(channels)

    logger.debug(f'Getting info for {len(channels)} channels')
    for idx, channel in enumerate(channels):
        reporter.send_progress(0, idx, f'Getting video list for {channel.name}')
        try:
            update_channel(channel)
        except Exception:
            logger.critical('Unable to fetch channel videos', exc_info=True)
            continue

    if channels:
        reporter.send_progress(0, len(channels), 'Done downloading video lists')
    else:
        reporter.finish(0, 'Done downloading video lists')
Esempio n. 20
0
    def test_get_video(self):
        """
        Test that you get can information about a video.  Test that video file can be gotten.
        """
        def raise_unknown_file(_):
            raise UnknownFile()

        with get_db_context(commit=True) as (engine, session), \
                mock.patch('api.videos.common.get_absolute_video_info_json', raise_unknown_file):
            channel = Channel(name='Foo', link='foo')
            session.add(channel)
            session.flush()
            session.refresh(channel)
            now = datetime.utcnow()
            session.add(
                Video(title='vid1', channel_id=channel.id, upload_date=now))
            session.add(
                Video(title='vid2',
                      channel_id=channel.id,
                      upload_date=now + timedelta(seconds=1)))

        # Test that a 404 is returned when no video exists
        _, response = api_app.test_client.get('/api/videos/video/10')
        assert response.status_code == HTTPStatus.NOT_FOUND, response.json
        assert response.json == {
            'code': 1,
            'api_error': 'The video could not be found.',
            'message': ''
        }

        # Get the video info we inserted
        _, response = api_app.test_client.get('/api/videos/video/1')
        assert response.status_code == HTTPStatus.OK, response.json
        self.assertDictContains(response.json['video'], {'title': 'vid1'})

        # The next video is included.
        self.assertIsNone(response.json['prev'])
        self.assertDictContains(response.json['next'], {'title': 'vid2'})
Esempio n. 21
0
    def test_get_channel_videos_pagination(self):
        with get_db_context(commit=True) as (engine, session):
            channel1 = Channel(name='Foo', link='foo')
            session.add(channel1)
            session.flush()
            session.refresh(channel1)

            for i in range(50):
                session.add(
                    Video(title=f'Foo.Video{i}',
                          channel_id=channel1.id,
                          video_path='foo'))

            channel2 = Channel(name='Bar', link='bar')
            session.add(channel2)
            session.flush()
            session.refresh(channel2)
            session.add(
                Video(title='vid2', channel_id=channel2.id, video_path='foo'))

        # Get first, second, third, and empty pages of videos.
        tests = [
            # (offset, video_count)
            (0, 20),
            (20, 20),
            (40, 10),
            (50, 0),
        ]
        last_ids = []
        for offset, video_count in tests:
            d = dict(channel_link=channel1.link, order_by='id', offset=offset)
            _, response = api_app.test_client.post(f'/api/videos/search',
                                                   data=json.dumps(d))
            assert response.status_code == HTTPStatus.OK
            assert len(response.json['videos']) == video_count
            current_ids = [i['id'] for i in response.json['videos']]
            assert current_ids != last_ids, f'IDs are unchanged current_ids={current_ids}'
            last_ids = current_ids
Esempio n. 22
0
def _refresh_videos(q: Queue, channel_links: list = None):
    """
    Find any videos in the channel directories and add them to the DB.  Delete DB records of any videos not in the
    file system.

    Yields status updates to be passed to the UI.

    :return:
    """
    logger.info('Refreshing video files')
    with get_db_context() as (engine, session):
        reporter = ProgressReporter(q, 2)
        reporter.code('refresh-started')
        reporter.set_progress_total(0, session.query(Channel).count())

        if channel_links:
            channels = session.query(Channel).filter(
                Channel.link.in_(channel_links))
        else:
            channels = session.query(Channel).all()

        channels = list(channels)

    if not channels and channel_links:
        raise Exception(f'No channels match links(s): {channel_links}')
    elif not channels:
        raise Exception(f'No channels in DB.  Have you created any?')

    for idx, channel in enumerate(channels):
        reporter.send_progress(
            0, idx, f'Checking {channel.name} directory for new videos')
        refresh_channel_videos(channel, reporter)

    # Fill in any missing data for all videos.
    process_video_meta_data()

    reporter.send_progress(0, len(channels), 'All videos refreshed.')
    reporter.code('refresh-complete')
Esempio n. 23
0
def _find_all_missing_videos(link: str = None) -> List[Tuple]:
    """
    Get all Video entries which don't have the required media files (i.e. hasn't been downloaded).  Restrict to a
    single channel if "link" is provided.
    """
    with get_db_context() as (engine, session):
        curs = db_conn.cursor()

        # Get all channels by default.
        where = ''
        params = ()

        if link:
            # Restrict by channel when link is provided.
            query = 'SELECT id FROM channel WHERE link = %s'
            curs.execute(query, (link,))
            channel_id = curs.fetchall()[0][0]
            where = 'AND channel_id = %s'
            params = (channel_id,)

        query = f'''
            SELECT
                video.id, video.source_id, video.channel_id
            FROM
                video
                LEFT JOIN channel ON channel.id = video.channel_id
            WHERE
                channel.url IS NOT NULL
                AND channel.url != ''
                AND source_id IS NOT NULL
                {where}
                AND channel_id IS NOT NULL
                AND (video_path IS NULL OR video_path = '' OR poster_path IS NULL OR poster_path = '')
        '''
        curs.execute(query, params)
        missing_videos = list(curs.fetchall())
        return missing_videos
Esempio n. 24
0
def video_delete(request: Request, video_id: int):
    with get_db_context(commit=True) as (engine, session):
        video = get_video(session, video_id)
    delete_video(video)
    return response.raw('', HTTPStatus.NO_CONTENT)
Esempio n. 25
0
def test_bulk_replace_invalid_posters(tempdir: Path):
    """
    Test that when a video has an invalid poster format, we convert it to JPEG.
    """
    channel1, channel2 = sorted(tempdir.iterdir())
    jpg, mp4 = sorted(channel1.iterdir())
    flv, webp = sorted(channel2.iterdir())

    Image.new('RGB', (25, 25)).save(jpg)
    Image.new('RGB', (25, 25)).save(webp)

    with open(jpg, 'rb') as jpg_fh, open(webp, 'rb') as webp_fh:
        # Files are different formats.
        jpg_fh_contents = jpg_fh.read()
        webp_fh_contents = webp_fh.read()
        assert jpg_fh_contents != webp_fh_contents
        assert Image.open(jpg_fh).format == 'JPEG'
        assert Image.open(webp_fh).format == 'WEBP'

    with get_db_context() as (engine, session):
        vid1 = session.query(Video).filter_by(poster_path='vid1.jpg').one()
        assert vid1.validated_poster is False

        vid2 = session.query(Video).filter_by(poster_path='vid2.webp').one()
        assert vid2.validated_poster is False

    # Convert the WEBP image.  convert_image() should only be called once.
    mocked_convert_image = Mock(wraps=convert_image)
    with mock.patch('api.videos.common.convert_image', mocked_convert_image):
        video_ids = [vid1.id, vid2.id]
        bulk_validate_posters(video_ids)

    mocked_convert_image.assert_called_once_with(webp,
                                                 tempdir / 'channel2/vid2.jpg')

    with get_db_context() as (engine, session):
        # Get the video by ID because it's poster is now a JPEG.
        vid2 = session.query(Video).filter_by(id=vid2.id).one()
        assert vid2.poster_path == 'vid2.jpg'
        assert all('webp' not in i.poster_path
                   for i in session.query(Video).all())
        assert vid2.validated_poster is True

        # Vid1's image was validated, but not converted.
        vid1 = session.query(Video).filter_by(id=vid1.id).one()
        assert vid1.poster_path == 'vid1.jpg'
        assert vid1.validated_poster is True

    # Old webp was removed
    assert not webp.is_file()
    new_jpg = tempdir / 'channel2/vid2.jpg'
    assert new_jpg.is_file()
    # chmod 644
    assert new_jpg.stat().st_mode == 0o100644
    with open(new_jpg, 'rb') as new_jpg_fh:
        # The converted image is the same as the other JPEG because both are black 25x25 pixel images.
        assert jpg_fh_contents == new_jpg_fh.read()
        assert Image.open(new_jpg_fh).format == 'JPEG'

    # Calling convert again has no effect.
    mocked_convert_image.reset_mock()
    with mock.patch('api.videos.common.convert_image', mocked_convert_image):
        video_ids = [vid1.id, vid2.id]
        bulk_validate_posters(video_ids)

    mocked_convert_image.assert_not_called()
Esempio n. 26
0
def channel_conflict(_: Request, data: dict):
    with get_db_context() as (engine, session):
        check_for_channel_conflicts(db, url=data.get('url'), name=data.get('name'),
                                    directory=data.get('directory'))
    return response.raw('', HTTPStatus.NO_CONTENT)
Esempio n. 27
0
def refresh_channel_videos(channel: Channel, reporter: ProgressReporter):
    """
    Find all video files in a channel's directory.  Add any videos not in the DB to the DB.
    """
    # This function is hard to predict, so we will simply progress in chunks :(
    reporter.set_progress_total(1, 6)
    reporter.send_progress(1, 0, 'Preparing channel.')

    # Set the idempotency key so we can remove any videos not touched during this search
    with get_db_curs(commit=True) as curs:
        curs.execute('UPDATE video SET idempotency=NULL WHERE channel_id=%s',
                     (channel.id, ))

    reporter.send_progress(1, 1,
                           'Finding all videos, checking for duplicates.')

    idempotency = str(uuid1())
    directory = get_absolute_media_path(channel.directory)

    # A set of absolute paths that exist in the file system
    possible_new_paths = generate_video_paths(directory)
    possible_new_paths = remove_duplicate_video_paths(possible_new_paths)

    reporter.send_progress(1, 2, 'Matching all videos to the database.')

    # Update all videos that match the current video paths
    relative_new_paths = [
        str(i.relative_to(directory)) for i in possible_new_paths
    ]
    with get_db_curs(commit=True) as curs:
        query = 'UPDATE video SET idempotency = %s WHERE channel_id = %s AND video_path = ANY(%s) RETURNING video_path'
        curs.execute(query, (idempotency, channel.id, relative_new_paths))
        existing_paths = {i for (i, ) in curs.fetchall()}

    reporter.send_progress(1, 3)

    # Get the paths for any video not yet in the DB
    # (paths in DB are relative, but we need to pass an absolute path)
    new_videos = {
        p
        for p in possible_new_paths
        if str(p.relative_to(directory)) not in existing_paths
    }

    reporter.send_progress(1, 4, f'Inserting {len(new_videos)} new videos.')

    for video_path in new_videos:
        with get_db_context(commit=True) as (engine, session):
            upsert_video(session,
                         pathlib.Path(video_path),
                         channel,
                         idempotency=idempotency)
            logger.debug(f'{channel.name}: Added {video_path}')

    reporter.send_progress(1, 5, 'Deleting unnecessary video entries.')

    with get_db_curs(commit=True) as curs:
        curs.execute(
            'DELETE FROM video WHERE channel_id=%s AND idempotency IS NULL RETURNING id',
            (channel.id, ))
        deleted_count = len(curs.fetchall())

    if deleted_count:
        deleted_status = f'Deleted {deleted_count} video records from channel {channel.name}'
        logger.info(deleted_status)

    logger.info(
        f'{channel.name}: {len(new_videos)} new videos, {len(existing_paths)} already existed. '
    )

    reporter.send_progress(1, 6, f'Processed all videos for {channel.name}')
Esempio n. 28
0
    def test_inventory1(self):
        self.prepare()

        # Insert a new Inventory.
        inventory = {
            'name': 'New Inventory',
            'viewed_at': 'asdf',  # This should be ignored.
        }
        save_inventory(inventory)

        with get_db_context() as (engine, session):
            i1, i2 = session.query(Inventory).order_by(Inventory.name).all()
            self.assertDictContains(i2, {
                'name': 'New Inventory',
                'viewed_at': None
            })

        # Inventories cannot share a name.
        self.assertRaises(sqlalchemy.exc.IntegrityError, save_inventory,
                          inventory)

        # Insert a second inventory.
        inventory['name'] = 'Super Inventory'
        save_inventory(inventory)

        # Cannot update the name to a name that is already used.
        with get_db_context() as (engine, session):
            i = session.query(Inventory).filter_by(
                name='Super Inventory').one()
            inventory['name'] = 'New Inventory'
            self.assertRaises(sqlalchemy.exc.IntegrityError, update_inventory,
                              i.id, inventory)

        # Add some items to "New Inventory"
        with get_db_context(commit=True) as (engine, session):
            before_item_count = session.query(Item).count()
            session.add(
                Item(inventory_id=2,
                     brand='a',
                     name='b',
                     item_size=45,
                     unit='pounds',
                     count=1))
            session.add(
                Item(inventory_id=2,
                     brand='a',
                     name='b',
                     item_size=45,
                     unit='pounds',
                     count=1))
            session.add(
                Item(inventory_id=2,
                     brand='a',
                     name='b',
                     item_size=45,
                     unit='pounds',
                     count=1))
            session.add(
                Item(inventory_id=2,
                     brand='a',
                     name='b',
                     item_size=45,
                     unit='pounds',
                     count=1))

        # You can rename a inventory to a conflicting name, if the other inventory is marked as deleted.
        with get_db_context(commit=True) as (engine, session):
            delete_inventory(2)
            # Check that the items from the deleted Inventory were not deleted, YET.
            self.assertEqual(before_item_count + 4,
                             session.query(Item).count())

            i = session.query(Inventory).filter_by(
                name='Super Inventory').one()
            inventory['name'] = 'New Inventory'
            update_inventory(i.id, inventory)

            # Check that the items from the deleted Inventory were really deleted.
            self.assertEqual(before_item_count, session.query(Item).count())
Esempio n. 29
0
    def test_refresh_videos(self):
        # There should be no messages until a refresh is called.
        pytest.raises(Empty, refresh_queue.get_nowait)

        # Setup a fake channel directory.
        with get_db_context() as (engine, session), \
                tempfile.TemporaryDirectory() as channel_dir:
            channel_path = pathlib.Path(channel_dir)

            # Files in subdirectories should be found and handled properly.
            subdir = channel_path / 'subdir'
            subdir.mkdir()

            # These are the types of files that will be found first.
            vid1 = pathlib.Path(subdir /
                                'channel name_20000101_abcdefghijk_title.mp4')
            vid1.touch()
            vid2 = pathlib.Path(channel_path /
                                'channel name_20000102_bcdefghijkl_title.webm')
            vid2.touch()

            # This video is named the same as vid1, except for the file extension.  Its possible that this was
            # downloaded later, or maybe the old video format has fallen out of favor.  WROLPi should ignore this
            # duplicate file.
            vid1_alt = pathlib.Path(
                subdir / 'channel name_20000101_abcdefghijk_title.webm')
            vid1_alt.touch()

            # These files are associated with the video files above, and should be found "near" them.
            poster1 = pathlib.Path(
                subdir / 'channel name_20000101_abcdefghijk_title.jpg')
            poster1.touch()
            poster2 = pathlib.Path(
                channel_path / 'channel name_20000102_bcdefghijkl_title.jpg')
            poster2.touch()

            # Create a channel, associate videos with it.
            channel = Channel(directory=channel_dir, link='foo', name='foo')
            session.add(channel)
            session.flush()
            session.refresh(channel)
            video1 = upsert_video(session, vid1, channel)
            video2 = upsert_video(session, vid2, channel)
            session.commit()
            self.assertEqual({i.video_path
                              for i in channel.videos},
                             {'subdir/' + vid1.name, vid2.name})

            # Poster files were found.
            self.assertEqual(video1.poster_path, 'subdir/' + poster1.name)
            self.assertEqual(video2.poster_path, poster2.name)

            # Add a bogus file, this should be removed during the refresh
            self.assertNotIn('foo', {i.video_path for i in channel.videos})
            session.add(Video(video_path='foo', channel_id=channel.id))
            session.flush()
            session.refresh(channel)
            self.assertIn('foo', {i.video_path for i in channel.videos})
            self.assertEqual(len(channel.videos), 3)

            # Add a video that isn't in the DB, it should be found and any meta files associated with it
            vid3 = pathlib.Path(channel_path /
                                'channel name_20000103_cdefghijklm_title.flv')
            vid3.touch()
            description3 = pathlib.Path(
                channel_path /
                'channel name_20000103_cdefghijklm_title.description')
            description3.touch()

            # An orphan meta-file should be ignored.  This shouldn't show up anywhere.  But, it shouldn't be deleted.
            poster3 = pathlib.Path(
                channel_path / 'channel name_20000104_defghijklmn_title.jpg')
            poster3.touch()

            # Finally, call the refresh.  Again, it should remove the "foo" video, then discover this 3rd video
            # file and it's description.
            api_app.test_client.post('/api/videos:refresh')

            # Bogus file was removed
            self.assertNotIn('foo', {i.video_path for i in channel.videos})

            # Final channel video list we built
            expected = {
                ('subdir/' + vid1.name, 'subdir/' + poster1.name,
                 None),  # in a subdirectory, no description
                (vid2.name, poster2.name, None),  # no description
                (vid3.name, None, description3.name),  # no poster
            }
            self.assertEqual(
                {(i.video_path, i.poster_path, i.description_path)
                 for i in channel.videos}, expected)

            assert poster3.is_file(), 'Orphan jpg file was deleted!'

        # During the refresh process, messages are pushed to a queue, make sure there are messages there
        messages = get_all_messages_in_queue(refresh_queue)
        assert 'refresh-started' in [i.get('code') for i in messages]
Esempio n. 30
0
    def test_get_video_prev_next(self):
        """
        Test that the previous and next videos will be retrieved when fetching a video.
        """

        with get_db_context(commit=True) as (engine, session):
            for _ in range(4):
                session.add(Channel(link=str(uuid4())))
            channel1, channel2, channel3, channel4 = session.query(
                Channel).all()

            now = datetime.utcnow()
            second = timedelta(seconds=1)

            # The upload_date decides the order of the prev/next videos.
            session.add(
                Video(title=f'vid1', channel_id=channel1.id, upload_date=now))
            session.add(
                Video(title=f'vid2',
                      channel_id=channel1.id,
                      upload_date=now + second))
            session.add(
                Video(title=f'vid3',
                      channel_id=channel2.id,
                      upload_date=now + (second * 4)))
            session.add(
                Video(title=f'vid4',
                      channel_id=channel1.id,
                      upload_date=now + (second * 3)))
            session.add(
                Video(title=f'vid5',
                      channel_id=channel2.id,
                      upload_date=now + (second * 2)))
            session.add(
                Video(title=f'vid6',
                      channel_id=channel2.id,
                      upload_date=now + (second * 5)))
            session.add(Video(title=f'vid7', channel_id=channel1.id))
            session.add(
                Video(title=f'vid8',
                      channel_id=channel2.id,
                      upload_date=now + (second * 7)))
            session.add(
                Video(title=f'vid9',
                      channel_id=channel3.id,
                      upload_date=now + (second * 8)))
            session.add(Video(title=f'vid10', channel_id=channel4.id))
            session.add(Video(title=f'vid11', channel_id=channel4.id))

            session.commit()

            tests = [
                # Channel 1's videos were inserted in upload_date order.
                (1, (None, 'vid2')),
                (2, ('vid1', 'vid4')),
                (4,
                 ('vid2',
                  None)),  # 7 has no upload_date, so it doesn't come after 4.
                (7, (None, None)
                 ),  # 7 has no upload_date, so we don't know the order of it.
                # Channel 3 has only one video.
                (9, (None, None)),
                # Channel 2 was inserted out of order.
                (5, (None, 'vid3')),
                (3, ('vid5', 'vid6')),
                (8, ('vid6', None)),
                # Channel 4's videos have no upload date, so we don't know what is previous/next.
                (10, (None, None)),
            ]

            for id_, (prev_title, next_title) in tests:
                video = session.query(Video).filter_by(id=id_).one()
                prev_video, next_video = get_surrounding_videos(
                    session, id_, video.channel_id)

                if prev_title is None:
                    self.assertIsNone(prev_video)
                else:
                    self.assertDictContains(prev_video, {'title': prev_title})

                if next_title is None:
                    self.assertIsNone(next_video)
                else:
                    self.assertDictContains(next_video, {'title': next_title})