def test_process_captions(self):
        with get_db_context(commit=True) as (engine, session):
            video1 = Video(title='scream', caption_path=str(self.vtt_path1))
            session.add(video1)
            with mock.patch('api.videos.captions.get_absolute_video_caption', lambda *a: self.vtt_path1):
                captions.process_captions(video1)
            video2 = Video(title='bar', caption_path=str(self.vtt_path2))
            session.add(video2)
            with mock.patch('api.videos.captions.get_absolute_video_caption', lambda *a: self.vtt_path2):
                captions.process_captions(video2)

            session.flush()
            session.refresh(video1)
            session.refresh(video2)

            # Get the video from the DB
            video1 = session.query(Video).filter_by(id=video1.id).one()
            self.assertIsNotNone(video1.caption)
            video2 = session.query(Video).filter_by(id=video2.id).one()
            self.assertIsNotNone(video2.caption)

        # Search using the tsvector, "sessions" never actually appears in the text, but "session" does
        with get_db_curs() as curs:
            def select_textsearch(*args):
                curs.execute('SELECT id FROM video WHERE textsearch @@ to_tsquery(%s) ORDER BY id', args)

            select_textsearch('sessions')
            self.assertEqual(curs.fetchall(), [[1, ]])
            # Matches video1.title and video2.caption
            select_textsearch('scream')
            self.assertEqual(curs.fetchall(), [[1, ], [2, ]])
            # Matches video1.title and video2.caption
            select_textsearch('scream | sessions')
            self.assertEqual(curs.fetchall(), [[1, ], [2, ]])
            # Only matches video1.title
            select_textsearch('scream & sessions')
            self.assertEqual(curs.fetchall(), [[1, ]])
            # Matches neither
            select_textsearch('scream & sess')
            self.assertEqual(curs.fetchall(), [])
            # Matches video2.caption
            select_textsearch('yawn | sess')
            self.assertEqual(curs.fetchall(), [[2, ]])
            # Matches video2.caption
            select_textsearch('yawn')
            self.assertEqual(curs.fetchall(), [[2, ]])
            # Matches video2.title
            select_textsearch('bar')
            self.assertEqual(curs.fetchall(), [[2, ]])
Beispiel #2
0
    def test_video_search(self):
        """
        Test that videos can be searched and that their order is by their textsearch rank.
        """
        # These captions have repeated letters so they will be higher in the ranking
        videos = [
            ('1', 'b b b b e d d'),
            ('2', '2 b b b d'),
            ('3', 'b b'),
            ('4', 'b e e'),
            ('5', ''),
        ]
        with get_db_context(commit=True) as (engine, session):
            for title, caption in videos:
                session.add(
                    Video(title=title, caption=caption, video_path='foo'))

        def do_search(search_str, limit=20):
            d = json.dumps({'search_str': search_str, 'limit': limit})
            _, resp = api_app.test_client.post('/api/videos/search', data=d)
            return resp

        def search_is_as_expected(resp, expected):
            assert resp.status_code == HTTPStatus.OK
            response_ids = [i['id'] for i in resp.json['videos']]
            assert response_ids == expected
            assert resp.json['totals']['videos'] == len(expected)

        # Repeated runs should return the same result
        for _ in range(2):
            # Only videos with a b are returned, ordered by the amount of b's
            response = do_search('b')
            search_is_as_expected(response, [1, 2, 3, 4])

        # Only two captions have e
        response = do_search('e')
        search_is_as_expected(response, [4, 1])

        # Only two captions have d
        response = do_search('d')
        search_is_as_expected(response, [1, 2])

        # 5 can be gotten by it's title
        response = do_search('5')
        search_is_as_expected(response, [5])

        # only video 1 has e and d
        response = do_search('e d')
        search_is_as_expected(response, [1])

        # video 1 and 4 have b and e, but 1 has more
        response = do_search('b e')
        search_is_as_expected(response, [1, 4])

        # Check totals are correct even with a limit
        response = do_search('b', 2)
        assert [i['id'] for i in response.json['videos']] == [1, 2]
        assert response.json['totals']['videos'] == 4
Beispiel #3
0
    def test_get_channel_videos(self):
        with get_db_context(commit=True) as (engine, session):
            channel1 = Channel(name='Foo', link='foo')
            channel2 = Channel(name='Bar', link='bar')
            session.add(channel1)
            session.add(channel2)
            session.flush()
            session.refresh(channel1)
            session.refresh(channel2)

        # Channels don't have videos yet
        d = dict(channel_link=channel1.link)
        request, response = api_app.test_client.post(f'/api/videos/search',
                                                     data=json.dumps(d))
        assert response.status_code == HTTPStatus.OK
        assert len(response.json['videos']) == 0

        with get_db_context(commit=True) as (engine, session):
            vid1 = Video(title='vid1',
                         channel_id=channel2.id,
                         video_path='foo')
            vid2 = Video(title='vid2',
                         channel_id=channel1.id,
                         video_path='foo')
            session.add(vid1)
            session.add(vid2)

        # Videos are gotten by their respective channels
        request, response = api_app.test_client.post(f'/api/videos/search',
                                                     data=json.dumps(d))
        assert response.status_code == HTTPStatus.OK
        assert len(response.json['videos']) == 1
        assert response.json['totals']['videos'] == 1
        self.assertDictContains(
            response.json['videos'][0],
            dict(id=2, title='vid2', channel_id=channel1.id))

        d = dict(channel_link=channel2.link)
        request, response = api_app.test_client.post(f'/api/videos/search',
                                                     data=json.dumps(d))
        assert response.status_code == HTTPStatus.OK
        assert len(response.json['videos']) == 1
        self.assertDictContains(
            response.json['videos'][0],
            dict(id=1, title='vid1', channel_id=channel2.id))
Beispiel #4
0
    def test_get_channel_videos_pagination(self):
        with get_db_context(commit=True) as (engine, session):
            channel1 = Channel(name='Foo', link='foo')
            session.add(channel1)
            session.flush()
            session.refresh(channel1)

            for i in range(50):
                session.add(
                    Video(title=f'Foo.Video{i}',
                          channel_id=channel1.id,
                          video_path='foo'))

            channel2 = Channel(name='Bar', link='bar')
            session.add(channel2)
            session.flush()
            session.refresh(channel2)
            session.add(
                Video(title='vid2', channel_id=channel2.id, video_path='foo'))

        # Get first, second, third, and empty pages of videos.
        tests = [
            # (offset, video_count)
            (0, 20),
            (20, 20),
            (40, 10),
            (50, 0),
        ]
        last_ids = []
        for offset, video_count in tests:
            d = dict(channel_link=channel1.link, order_by='id', offset=offset)
            _, response = api_app.test_client.post(f'/api/videos/search',
                                                   data=json.dumps(d))
            assert response.status_code == HTTPStatus.OK
            assert len(response.json['videos']) == video_count
            current_ids = [i['id'] for i in response.json['videos']]
            assert current_ids != last_ids, f'IDs are unchanged current_ids={current_ids}'
            last_ids = current_ids
Beispiel #5
0
    def test_get_video(self):
        """
        Test that you get can information about a video.  Test that video file can be gotten.
        """
        def raise_unknown_file(_):
            raise UnknownFile()

        with get_db_context(commit=True) as (engine, session), \
                mock.patch('api.videos.common.get_absolute_video_info_json', raise_unknown_file):
            channel = Channel(name='Foo', link='foo')
            session.add(channel)
            session.flush()
            session.refresh(channel)
            now = datetime.utcnow()
            session.add(
                Video(title='vid1', channel_id=channel.id, upload_date=now))
            session.add(
                Video(title='vid2',
                      channel_id=channel.id,
                      upload_date=now + timedelta(seconds=1)))

        # Test that a 404 is returned when no video exists
        _, response = api_app.test_client.get('/api/videos/video/10')
        assert response.status_code == HTTPStatus.NOT_FOUND, response.json
        assert response.json == {
            'code': 1,
            'api_error': 'The video could not be found.',
            'message': ''
        }

        # Get the video info we inserted
        _, response = api_app.test_client.get('/api/videos/video/1')
        assert response.status_code == HTTPStatus.OK, response.json
        self.assertDictContains(response.json['video'], {'title': 'vid1'})

        # The next video is included.
        self.assertIsNone(response.json['prev'])
        self.assertDictContains(response.json['next'], {'title': 'vid2'})
Beispiel #6
0
    def test_get_video_prev_next(self):
        """
        Test that the previous and next videos will be retrieved when fetching a video.
        """

        with get_db_context(commit=True) as (engine, session):
            for _ in range(4):
                session.add(Channel(link=str(uuid4())))
            channel1, channel2, channel3, channel4 = session.query(
                Channel).all()

            now = datetime.utcnow()
            second = timedelta(seconds=1)

            # The upload_date decides the order of the prev/next videos.
            session.add(
                Video(title=f'vid1', channel_id=channel1.id, upload_date=now))
            session.add(
                Video(title=f'vid2',
                      channel_id=channel1.id,
                      upload_date=now + second))
            session.add(
                Video(title=f'vid3',
                      channel_id=channel2.id,
                      upload_date=now + (second * 4)))
            session.add(
                Video(title=f'vid4',
                      channel_id=channel1.id,
                      upload_date=now + (second * 3)))
            session.add(
                Video(title=f'vid5',
                      channel_id=channel2.id,
                      upload_date=now + (second * 2)))
            session.add(
                Video(title=f'vid6',
                      channel_id=channel2.id,
                      upload_date=now + (second * 5)))
            session.add(Video(title=f'vid7', channel_id=channel1.id))
            session.add(
                Video(title=f'vid8',
                      channel_id=channel2.id,
                      upload_date=now + (second * 7)))
            session.add(
                Video(title=f'vid9',
                      channel_id=channel3.id,
                      upload_date=now + (second * 8)))
            session.add(Video(title=f'vid10', channel_id=channel4.id))
            session.add(Video(title=f'vid11', channel_id=channel4.id))

            session.commit()

            tests = [
                # Channel 1's videos were inserted in upload_date order.
                (1, (None, 'vid2')),
                (2, ('vid1', 'vid4')),
                (4,
                 ('vid2',
                  None)),  # 7 has no upload_date, so it doesn't come after 4.
                (7, (None, None)
                 ),  # 7 has no upload_date, so we don't know the order of it.
                # Channel 3 has only one video.
                (9, (None, None)),
                # Channel 2 was inserted out of order.
                (5, (None, 'vid3')),
                (3, ('vid5', 'vid6')),
                (8, ('vid6', None)),
                # Channel 4's videos have no upload date, so we don't know what is previous/next.
                (10, (None, None)),
            ]

            for id_, (prev_title, next_title) in tests:
                video = session.query(Video).filter_by(id=id_).one()
                prev_video, next_video = get_surrounding_videos(
                    session, id_, video.channel_id)

                if prev_title is None:
                    self.assertIsNone(prev_video)
                else:
                    self.assertDictContains(prev_video, {'title': prev_title})

                if next_title is None:
                    self.assertIsNone(next_video)
                else:
                    self.assertDictContains(next_video, {'title': next_title})
Beispiel #7
0
    def test_refresh_videos(self):
        # There should be no messages until a refresh is called.
        pytest.raises(Empty, refresh_queue.get_nowait)

        # Setup a fake channel directory.
        with get_db_context() as (engine, session), \
                tempfile.TemporaryDirectory() as channel_dir:
            channel_path = pathlib.Path(channel_dir)

            # Files in subdirectories should be found and handled properly.
            subdir = channel_path / 'subdir'
            subdir.mkdir()

            # These are the types of files that will be found first.
            vid1 = pathlib.Path(subdir /
                                'channel name_20000101_abcdefghijk_title.mp4')
            vid1.touch()
            vid2 = pathlib.Path(channel_path /
                                'channel name_20000102_bcdefghijkl_title.webm')
            vid2.touch()

            # This video is named the same as vid1, except for the file extension.  Its possible that this was
            # downloaded later, or maybe the old video format has fallen out of favor.  WROLPi should ignore this
            # duplicate file.
            vid1_alt = pathlib.Path(
                subdir / 'channel name_20000101_abcdefghijk_title.webm')
            vid1_alt.touch()

            # These files are associated with the video files above, and should be found "near" them.
            poster1 = pathlib.Path(
                subdir / 'channel name_20000101_abcdefghijk_title.jpg')
            poster1.touch()
            poster2 = pathlib.Path(
                channel_path / 'channel name_20000102_bcdefghijkl_title.jpg')
            poster2.touch()

            # Create a channel, associate videos with it.
            channel = Channel(directory=channel_dir, link='foo', name='foo')
            session.add(channel)
            session.flush()
            session.refresh(channel)
            video1 = upsert_video(session, vid1, channel)
            video2 = upsert_video(session, vid2, channel)
            session.commit()
            self.assertEqual({i.video_path
                              for i in channel.videos},
                             {'subdir/' + vid1.name, vid2.name})

            # Poster files were found.
            self.assertEqual(video1.poster_path, 'subdir/' + poster1.name)
            self.assertEqual(video2.poster_path, poster2.name)

            # Add a bogus file, this should be removed during the refresh
            self.assertNotIn('foo', {i.video_path for i in channel.videos})
            session.add(Video(video_path='foo', channel_id=channel.id))
            session.flush()
            session.refresh(channel)
            self.assertIn('foo', {i.video_path for i in channel.videos})
            self.assertEqual(len(channel.videos), 3)

            # Add a video that isn't in the DB, it should be found and any meta files associated with it
            vid3 = pathlib.Path(channel_path /
                                'channel name_20000103_cdefghijklm_title.flv')
            vid3.touch()
            description3 = pathlib.Path(
                channel_path /
                'channel name_20000103_cdefghijklm_title.description')
            description3.touch()

            # An orphan meta-file should be ignored.  This shouldn't show up anywhere.  But, it shouldn't be deleted.
            poster3 = pathlib.Path(
                channel_path / 'channel name_20000104_defghijklmn_title.jpg')
            poster3.touch()

            # Finally, call the refresh.  Again, it should remove the "foo" video, then discover this 3rd video
            # file and it's description.
            api_app.test_client.post('/api/videos:refresh')

            # Bogus file was removed
            self.assertNotIn('foo', {i.video_path for i in channel.videos})

            # Final channel video list we built
            expected = {
                ('subdir/' + vid1.name, 'subdir/' + poster1.name,
                 None),  # in a subdirectory, no description
                (vid2.name, poster2.name, None),  # no description
                (vid3.name, None, description3.name),  # no poster
            }
            self.assertEqual(
                {(i.video_path, i.poster_path, i.description_path)
                 for i in channel.videos}, expected)

            assert poster3.is_file(), 'Orphan jpg file was deleted!'

        # During the refresh process, messages are pushed to a queue, make sure there are messages there
        messages = get_all_messages_in_queue(refresh_queue)
        assert 'refresh-started' in [i.get('code') for i in messages]