def test_process_captions(self): with get_db_context(commit=True) as (engine, session): video1 = Video(title='scream', caption_path=str(self.vtt_path1)) session.add(video1) with mock.patch('api.videos.captions.get_absolute_video_caption', lambda *a: self.vtt_path1): captions.process_captions(video1) video2 = Video(title='bar', caption_path=str(self.vtt_path2)) session.add(video2) with mock.patch('api.videos.captions.get_absolute_video_caption', lambda *a: self.vtt_path2): captions.process_captions(video2) session.flush() session.refresh(video1) session.refresh(video2) # Get the video from the DB video1 = session.query(Video).filter_by(id=video1.id).one() self.assertIsNotNone(video1.caption) video2 = session.query(Video).filter_by(id=video2.id).one() self.assertIsNotNone(video2.caption) # Search using the tsvector, "sessions" never actually appears in the text, but "session" does with get_db_curs() as curs: def select_textsearch(*args): curs.execute('SELECT id FROM video WHERE textsearch @@ to_tsquery(%s) ORDER BY id', args) select_textsearch('sessions') self.assertEqual(curs.fetchall(), [[1, ]]) # Matches video1.title and video2.caption select_textsearch('scream') self.assertEqual(curs.fetchall(), [[1, ], [2, ]]) # Matches video1.title and video2.caption select_textsearch('scream | sessions') self.assertEqual(curs.fetchall(), [[1, ], [2, ]]) # Only matches video1.title select_textsearch('scream & sessions') self.assertEqual(curs.fetchall(), [[1, ]]) # Matches neither select_textsearch('scream & sess') self.assertEqual(curs.fetchall(), []) # Matches video2.caption select_textsearch('yawn | sess') self.assertEqual(curs.fetchall(), [[2, ]]) # Matches video2.caption select_textsearch('yawn') self.assertEqual(curs.fetchall(), [[2, ]]) # Matches video2.title select_textsearch('bar') self.assertEqual(curs.fetchall(), [[2, ]])
def test_video_search(self): """ Test that videos can be searched and that their order is by their textsearch rank. """ # These captions have repeated letters so they will be higher in the ranking videos = [ ('1', 'b b b b e d d'), ('2', '2 b b b d'), ('3', 'b b'), ('4', 'b e e'), ('5', ''), ] with get_db_context(commit=True) as (engine, session): for title, caption in videos: session.add( Video(title=title, caption=caption, video_path='foo')) def do_search(search_str, limit=20): d = json.dumps({'search_str': search_str, 'limit': limit}) _, resp = api_app.test_client.post('/api/videos/search', data=d) return resp def search_is_as_expected(resp, expected): assert resp.status_code == HTTPStatus.OK response_ids = [i['id'] for i in resp.json['videos']] assert response_ids == expected assert resp.json['totals']['videos'] == len(expected) # Repeated runs should return the same result for _ in range(2): # Only videos with a b are returned, ordered by the amount of b's response = do_search('b') search_is_as_expected(response, [1, 2, 3, 4]) # Only two captions have e response = do_search('e') search_is_as_expected(response, [4, 1]) # Only two captions have d response = do_search('d') search_is_as_expected(response, [1, 2]) # 5 can be gotten by it's title response = do_search('5') search_is_as_expected(response, [5]) # only video 1 has e and d response = do_search('e d') search_is_as_expected(response, [1]) # video 1 and 4 have b and e, but 1 has more response = do_search('b e') search_is_as_expected(response, [1, 4]) # Check totals are correct even with a limit response = do_search('b', 2) assert [i['id'] for i in response.json['videos']] == [1, 2] assert response.json['totals']['videos'] == 4
def test_get_channel_videos(self): with get_db_context(commit=True) as (engine, session): channel1 = Channel(name='Foo', link='foo') channel2 = Channel(name='Bar', link='bar') session.add(channel1) session.add(channel2) session.flush() session.refresh(channel1) session.refresh(channel2) # Channels don't have videos yet d = dict(channel_link=channel1.link) request, response = api_app.test_client.post(f'/api/videos/search', data=json.dumps(d)) assert response.status_code == HTTPStatus.OK assert len(response.json['videos']) == 0 with get_db_context(commit=True) as (engine, session): vid1 = Video(title='vid1', channel_id=channel2.id, video_path='foo') vid2 = Video(title='vid2', channel_id=channel1.id, video_path='foo') session.add(vid1) session.add(vid2) # Videos are gotten by their respective channels request, response = api_app.test_client.post(f'/api/videos/search', data=json.dumps(d)) assert response.status_code == HTTPStatus.OK assert len(response.json['videos']) == 1 assert response.json['totals']['videos'] == 1 self.assertDictContains( response.json['videos'][0], dict(id=2, title='vid2', channel_id=channel1.id)) d = dict(channel_link=channel2.link) request, response = api_app.test_client.post(f'/api/videos/search', data=json.dumps(d)) assert response.status_code == HTTPStatus.OK assert len(response.json['videos']) == 1 self.assertDictContains( response.json['videos'][0], dict(id=1, title='vid1', channel_id=channel2.id))
def test_get_channel_videos_pagination(self): with get_db_context(commit=True) as (engine, session): channel1 = Channel(name='Foo', link='foo') session.add(channel1) session.flush() session.refresh(channel1) for i in range(50): session.add( Video(title=f'Foo.Video{i}', channel_id=channel1.id, video_path='foo')) channel2 = Channel(name='Bar', link='bar') session.add(channel2) session.flush() session.refresh(channel2) session.add( Video(title='vid2', channel_id=channel2.id, video_path='foo')) # Get first, second, third, and empty pages of videos. tests = [ # (offset, video_count) (0, 20), (20, 20), (40, 10), (50, 0), ] last_ids = [] for offset, video_count in tests: d = dict(channel_link=channel1.link, order_by='id', offset=offset) _, response = api_app.test_client.post(f'/api/videos/search', data=json.dumps(d)) assert response.status_code == HTTPStatus.OK assert len(response.json['videos']) == video_count current_ids = [i['id'] for i in response.json['videos']] assert current_ids != last_ids, f'IDs are unchanged current_ids={current_ids}' last_ids = current_ids
def test_get_video(self): """ Test that you get can information about a video. Test that video file can be gotten. """ def raise_unknown_file(_): raise UnknownFile() with get_db_context(commit=True) as (engine, session), \ mock.patch('api.videos.common.get_absolute_video_info_json', raise_unknown_file): channel = Channel(name='Foo', link='foo') session.add(channel) session.flush() session.refresh(channel) now = datetime.utcnow() session.add( Video(title='vid1', channel_id=channel.id, upload_date=now)) session.add( Video(title='vid2', channel_id=channel.id, upload_date=now + timedelta(seconds=1))) # Test that a 404 is returned when no video exists _, response = api_app.test_client.get('/api/videos/video/10') assert response.status_code == HTTPStatus.NOT_FOUND, response.json assert response.json == { 'code': 1, 'api_error': 'The video could not be found.', 'message': '' } # Get the video info we inserted _, response = api_app.test_client.get('/api/videos/video/1') assert response.status_code == HTTPStatus.OK, response.json self.assertDictContains(response.json['video'], {'title': 'vid1'}) # The next video is included. self.assertIsNone(response.json['prev']) self.assertDictContains(response.json['next'], {'title': 'vid2'})
def test_get_video_prev_next(self): """ Test that the previous and next videos will be retrieved when fetching a video. """ with get_db_context(commit=True) as (engine, session): for _ in range(4): session.add(Channel(link=str(uuid4()))) channel1, channel2, channel3, channel4 = session.query( Channel).all() now = datetime.utcnow() second = timedelta(seconds=1) # The upload_date decides the order of the prev/next videos. session.add( Video(title=f'vid1', channel_id=channel1.id, upload_date=now)) session.add( Video(title=f'vid2', channel_id=channel1.id, upload_date=now + second)) session.add( Video(title=f'vid3', channel_id=channel2.id, upload_date=now + (second * 4))) session.add( Video(title=f'vid4', channel_id=channel1.id, upload_date=now + (second * 3))) session.add( Video(title=f'vid5', channel_id=channel2.id, upload_date=now + (second * 2))) session.add( Video(title=f'vid6', channel_id=channel2.id, upload_date=now + (second * 5))) session.add(Video(title=f'vid7', channel_id=channel1.id)) session.add( Video(title=f'vid8', channel_id=channel2.id, upload_date=now + (second * 7))) session.add( Video(title=f'vid9', channel_id=channel3.id, upload_date=now + (second * 8))) session.add(Video(title=f'vid10', channel_id=channel4.id)) session.add(Video(title=f'vid11', channel_id=channel4.id)) session.commit() tests = [ # Channel 1's videos were inserted in upload_date order. (1, (None, 'vid2')), (2, ('vid1', 'vid4')), (4, ('vid2', None)), # 7 has no upload_date, so it doesn't come after 4. (7, (None, None) ), # 7 has no upload_date, so we don't know the order of it. # Channel 3 has only one video. (9, (None, None)), # Channel 2 was inserted out of order. (5, (None, 'vid3')), (3, ('vid5', 'vid6')), (8, ('vid6', None)), # Channel 4's videos have no upload date, so we don't know what is previous/next. (10, (None, None)), ] for id_, (prev_title, next_title) in tests: video = session.query(Video).filter_by(id=id_).one() prev_video, next_video = get_surrounding_videos( session, id_, video.channel_id) if prev_title is None: self.assertIsNone(prev_video) else: self.assertDictContains(prev_video, {'title': prev_title}) if next_title is None: self.assertIsNone(next_video) else: self.assertDictContains(next_video, {'title': next_title})
def test_refresh_videos(self): # There should be no messages until a refresh is called. pytest.raises(Empty, refresh_queue.get_nowait) # Setup a fake channel directory. with get_db_context() as (engine, session), \ tempfile.TemporaryDirectory() as channel_dir: channel_path = pathlib.Path(channel_dir) # Files in subdirectories should be found and handled properly. subdir = channel_path / 'subdir' subdir.mkdir() # These are the types of files that will be found first. vid1 = pathlib.Path(subdir / 'channel name_20000101_abcdefghijk_title.mp4') vid1.touch() vid2 = pathlib.Path(channel_path / 'channel name_20000102_bcdefghijkl_title.webm') vid2.touch() # This video is named the same as vid1, except for the file extension. Its possible that this was # downloaded later, or maybe the old video format has fallen out of favor. WROLPi should ignore this # duplicate file. vid1_alt = pathlib.Path( subdir / 'channel name_20000101_abcdefghijk_title.webm') vid1_alt.touch() # These files are associated with the video files above, and should be found "near" them. poster1 = pathlib.Path( subdir / 'channel name_20000101_abcdefghijk_title.jpg') poster1.touch() poster2 = pathlib.Path( channel_path / 'channel name_20000102_bcdefghijkl_title.jpg') poster2.touch() # Create a channel, associate videos with it. channel = Channel(directory=channel_dir, link='foo', name='foo') session.add(channel) session.flush() session.refresh(channel) video1 = upsert_video(session, vid1, channel) video2 = upsert_video(session, vid2, channel) session.commit() self.assertEqual({i.video_path for i in channel.videos}, {'subdir/' + vid1.name, vid2.name}) # Poster files were found. self.assertEqual(video1.poster_path, 'subdir/' + poster1.name) self.assertEqual(video2.poster_path, poster2.name) # Add a bogus file, this should be removed during the refresh self.assertNotIn('foo', {i.video_path for i in channel.videos}) session.add(Video(video_path='foo', channel_id=channel.id)) session.flush() session.refresh(channel) self.assertIn('foo', {i.video_path for i in channel.videos}) self.assertEqual(len(channel.videos), 3) # Add a video that isn't in the DB, it should be found and any meta files associated with it vid3 = pathlib.Path(channel_path / 'channel name_20000103_cdefghijklm_title.flv') vid3.touch() description3 = pathlib.Path( channel_path / 'channel name_20000103_cdefghijklm_title.description') description3.touch() # An orphan meta-file should be ignored. This shouldn't show up anywhere. But, it shouldn't be deleted. poster3 = pathlib.Path( channel_path / 'channel name_20000104_defghijklmn_title.jpg') poster3.touch() # Finally, call the refresh. Again, it should remove the "foo" video, then discover this 3rd video # file and it's description. api_app.test_client.post('/api/videos:refresh') # Bogus file was removed self.assertNotIn('foo', {i.video_path for i in channel.videos}) # Final channel video list we built expected = { ('subdir/' + vid1.name, 'subdir/' + poster1.name, None), # in a subdirectory, no description (vid2.name, poster2.name, None), # no description (vid3.name, None, description3.name), # no poster } self.assertEqual( {(i.video_path, i.poster_path, i.description_path) for i in channel.videos}, expected) assert poster3.is_file(), 'Orphan jpg file was deleted!' # During the refresh process, messages are pushed to a queue, make sure there are messages there messages = get_all_messages_in_queue(refresh_queue) assert 'refresh-started' in [i.get('code') for i in messages]