def calculate_genres_with_films(): genres = Genre.search(sort_by='name') genre_names_with_films = [] for genre in genres: results = AudiovisualRecord.search( { 'deleted': False, 'has_downloads': True, 'general_information_fetched': True, 'genres__name': genre.name }, paginate=True, page_size=1, page=1, ).get('results') if len(results) > 0: ar = AudiovisualRecord.search({ 'deleted': False, 'has_downloads': True, 'general_information_fetched': True, 'genres__name': genre.name }) genre.number = len(ar) genre_names_with_films.append(genre) genre_names_with_films.sort(key=lambda g: g.number, reverse=True) configuration = _get_or_create_configuration(CONFIG_KEY_GENRES_WITH_FILMS) configuration.data = [g.name for g in genre_names_with_films] configuration.save()
def _worker_collect_download_links_for_the_first_time(source_class, logger): """ This search for download links for has_downloads or not has_downloads audiovisual_records """ source_name = source_class.source_name logger(f'Begin to retrieve audiovisual records for {source_name}') audiovisual_records = AudiovisualRecord.search( { 'deleted': False, 'general_information_fetched': True, f'metadata__downloads_fetch__{source_name}__exists': False, 'scores__votes__exists': True, 'global_score__gte': 0.1 }, paginate=True, page_size=50, page=1, sort_by='-global_score').get('results') logger(f'Read {len(audiovisual_records)} records') with ThreadPoolExecutor(max_workers=2) as executor: futures = [] for audiovisual_record in audiovisual_records: logger(f'Sleeping for 30 seconds for {source_class.source_name}') time.sleep(30) future = executor.submit(_worker_get_download_links, source_class, audiovisual_record, logger) futures.append(future) for future in concurrent.futures.as_completed(futures): future.result()
def _update(audiovisual_record, general_information_klass): general_information = general_information_klass(audiovisual_record) try: audiovisual_record.summary = general_information.summary audiovisual_record.images = [general_information.main_image] if not bool(audiovisual_record.year): audiovisual_record.year = general_information.year ( audiovisual_record.writers, audiovisual_record.directors, audiovisual_record.stars ) = general_information.writers_directors_stars audiovisual_record.genres = general_information.genres audiovisual_record.is_a_film = general_information.is_a_film if audiovisual_record.name != general_information.name: exists = len(AudiovisualRecord.search({'name': general_information.name})) > 0 if not exists: audiovisual_record.name = general_information.name audiovisual_record.slug = None else: audiovisual_record.delete() return audiovisual_record.general_information_fetched = True audiovisual_record.save() except GeneralInformationException as e: log_message(str(e), only_file=True) audiovisual_record.delete()
def genre_epoch_view(request, genre=None, epoch=None): try: referer_uri = request.META['HTTP_REFERER'] # get_params = {p.split('=')[0]: p.split('=')[1] for p in referer_uri.split('?')[1].split('&')} except (IndexError, KeyError): pass # get_params = {} page, raw_uri = _check_if_erroneous_page_and_get_page_and_right_uri( request) search = { 'deleted': False, 'has_downloads': True, 'general_information_fetched': True, 'genres__name': genre, 'global_score__gte': 0.1, } if epoch == '70s': search.update({'year__gte': '1970', 'year__lte': '1979'}) elif epoch == '80s': search.update({'year__gte': '1980', 'year__lte': '1989'}) elif epoch == '90s': search.update({'year__gte': '1990', 'year__lte': '1999'}) elif epoch == '2000s': search.update({'year__gte': '2000', 'year__lte': '2009'}) elif epoch == '2010s': search.update({'year__gte': '2010', 'year__lte': '2019'}) else: epoch = 'All-Times' paginator = AudiovisualRecord.search(search, paginate=True, page_size=20, page=page, sort_by=['-global_score']) serializer = AudiovisualRecordSerializer(paginator.get('results', []), many=True) paginator['results'] = serializer.data _add_previous_and_next_navigation_uris_to_search(raw_uri, paginator) context = { # 'filter_params': get_params, 'context_class': 'genre_view', 'is_landing': True, 'current_genre': genre, 'genres_names': _get_genres(), 'epoch': epoch, 'qualities': VideoQualityInStringDetector.our_qualities, 'search': paginator, 'year_range': range(1970, int(datetime.utcnow().strftime('%Y')) + 1) } return render(request, 'web/genre_epoch.html', context=context)
def handle(self, *args, **options): page = 1 has_next_page = True page_size = 500 while has_next_page: paginator = AudiovisualRecord.search({'deleted': False}, paginate=True, page_size=page_size, page=page) total_pages = paginator.get('total_pages') print(f'Checking audiovisual records: {(page - 1) * page_size}/{page * page_size} / Page: {page}/{total_pages}') for ar in paginator.get('results'): ar.calculate_has_downloads() has_next_page = paginator.get('next_page', False) page += 1
def save_audiovisual_record(self, record: AudiovisualRecord): record = MongoAudiovisualRecord.convert(record) for n, genre in enumerate(record.genres): record.genres[n] = self._save_if_not_exist_genre(genre) for n, director in enumerate(record.directors): record.directors[n] = self._save_if_not_exists_person(director) for n, writer in enumerate(record.writers): record.writers[n] = self._save_if_not_exists_person(writer) for n, star in enumerate(record.stars): record.stars[n] = self._save_if_not_exists_person(star) dict_obj = dict(record) collection = self._get_collection(MongoAudiovisualRecord) _id = dict_obj.get('_id', None) _check_audiovisual_slug(dict_obj, collection) dict_obj.pop('_id') if not _id: record._id = collection.insert_one(dict_obj).inserted_id else: collection.update({'_id': _id}, dict_obj) return record
def remove_film(request, object_id): if not request.user.is_superuser: return HttpResponse(status=403) _id = ObjectId(object_id) try: audiovisual_record = AudiovisualRecord.search({'_id': _id})[0] audiovisual_record.delete() except IndexError: pass finally: try: referer = request.META['HTTP_REFERER'] return redirect(referer) except IndexError: return redirect('/')
def autocomplete_general_information_for_empty_audiovisual_records(): audiovisual_records = AudiovisualRecord.search({ 'deleted': False, 'general_information_fetched': False, }, paginate=True, page_size=100, page=1).get('results') with ThreadPoolExecutor(max_workers=3) as executor: futures = [] for audiovisual_record in audiovisual_records: for general_information_klass in get_all_general_information_sources(): future = executor.submit(_update, audiovisual_record, general_information_klass) future.log_msg = f'Check {audiovisual_record.name} with {general_information_klass.source_name}' futures.append(future) for future in concurrent.futures.as_completed(futures): autocomplete_general_information_for_empty_audiovisual_records.log(future.log_msg) future.result(timeout=600)
def _group_by_genres(): six_month_ago = datetime.utcnow().replace(tzinfo=timezone.utc) - timedelta( days=180) all_genres = Genre.search() groups = {} for genre in all_genres: audiovisual_records = AudiovisualRecord.search({ 'deleted': False, 'general_information_fetched': True, 'has_downloads': True, 'global_score__gte': 0.5, 'created_date__gt': six_month_ago, 'genres__name': genre.name }) for ar in audiovisual_records: scores = [float(s['value']) for s in ar.scores] score = sum(scores) / len(scores) if len(scores) > 0 else 0 setattr(ar, 'ordering_score', score) # sorting, by year and later by ordering_score audiovisual_records = sorted(audiovisual_records, key=operator.attrgetter( 'ordering_score', 'year'), reverse=True)[:10] groups[genre.name] = [{ 'name': ar.name, 'slug': ar.slug, 'year': ar.year, 'images': ar.images, 'directors': [p['name'] for p in ar.directors], 'writers': [p['name'] for p in ar.writers], 'stars': [p['name'] for p in ar.stars], 'ordering_score': ar.ordering_score, } for ar in audiovisual_records] return groups
def recent_films_search_again_for_download_links(): n_days_ago = datetime.utcnow().replace(tzinfo=timezone.utc) - timedelta( days=180) audiovisual_records = AudiovisualRecord.search({ 'deleted': False, 'general_information_fetched': True, 'global_score__gte': 0.1, 'metadata__downloads_fetch__exists': True, 'year__gte': n_days_ago.strftime('%Y') }) for audiovisual_record in audiovisual_records: audiovisual_record.refresh() audiovisual_record.metadata['downloads_fetch'] = {} audiovisual_record.save()
def get(self, from_date: datetime, to_date: datetime) -> List[AudiovisualRecord]: headers = {'Accept-Language': 'en,es;q=0.9,pt;q=0.8'} session = PhantomBrowsingSession(referer=self.base_url + '/', headers=headers) session.get(self.get_search_url(from_date, to_date), timeout=30) response = session.last_response if response is None or not self.results_found(response.content): return [] audiovisual_records = [] names = self.extract_all_names(response.content) links = self.extract_all_detail_pages(response.content) for n, name in enumerate(names): link = links[n] audiovisual_record = AudiovisualRecord(name=name) self._prepare_metadata_dict(audiovisual_record) audiovisual_record.metadata['detailed_page'][ self.source_name] = link audiovisual_records.append(audiovisual_record) return audiovisual_records
def compile_trailers_for_audiovisual_records_in_youtube(): logger = compile_trailers_for_audiovisual_records_in_youtube.log audiovisual_record = AudiovisualRecord.search( { 'deleted': False, 'general_information_fetched': True, 'has_downloads': True, 'metadata__searched_trailers__youtube__exists': False, 'global_score__gte': 0.1, }, paginate=True, page_size=1, page=1, sort_by='-global_score').get('results') if len(audiovisual_record) == 0: return audiovisual_record = audiovisual_record[0] logger(f'Searching: {audiovisual_record.name}') search_string = f'{audiovisual_record.name.lower()} {audiovisual_record.year} trailer' video_id = _search(audiovisual_record.name.lower(), audiovisual_record.year, search_string, logger, original_audiovisual_record=audiovisual_record) _mark_as_searched(audiovisual_record, 'youtube') if video_id is None: return audiovisual_record.refresh() if 'trailers' not in audiovisual_record.metadata: audiovisual_record.metadata['trailers'] = {} audiovisual_record.metadata['trailers'][ 'youtube'] = f'https://www.youtube.com/embed/{video_id}' audiovisual_record.save()
def complete_correct_summaries(): # This is a fix for bad summaries compiled. audiovisual_records_without_summary_key = AudiovisualRecord.search( {'deleted': False, 'metadata__summary_fix__exists': False, 'summary__neq': '', 'global_score__gte': 0.1}, paginate=True, page_size=10, page=1, sort_by='-global_score' ).get('results') with ThreadPoolExecutor(max_workers=3) as executor: futures = [] for audiovisual_record in audiovisual_records_without_summary_key: for general_information_klass in get_all_general_information_sources(): future = executor.submit(_update_only_summary, audiovisual_record, general_information_klass) future.audiovisual_record = audiovisual_record future.log_msg = f'Fix summary of {audiovisual_record.name} with ' \ f'{general_information_klass.source_name}' futures.append(future) for future in concurrent.futures.as_completed(futures): future.result(timeout=600) audiovisual_record.refresh() audiovisual_record.metadata['summary_fix'] = True audiovisual_record.save() complete_correct_summaries.log(future.log_msg)
def save_audiovisual_images_locally(): local_root_path = save_audiovisual_images_locally.data.get('local_root_path', None) web_server_root_path = save_audiovisual_images_locally.data.get('web_server_root_path', None) if local_root_path is None: current_directory = os.path.dirname(os.path.abspath(__file__)) core_directory = os.path.dirname(current_directory) media_directory = os.path.dirname(core_directory) + '/media/' local_root_path = os.path.join(media_directory, 'ai') save_audiovisual_images_locally.data.set('local_root_path', local_root_path) if web_server_root_path is None: web_server_root_path = '/media/ai/' save_audiovisual_images_locally.data.set('web_server_root_path', web_server_root_path) audiovisual_records = AudiovisualRecord.search( { 'deleted': False, 'general_information_fetched': True, 'has_downloads': True, 'metadata__local_image__exists': False, 'global_score__gte': 0.1, }, paginate=True, page_size=10, page=1, sort_by='-global_score' ).get('results') with ThreadPoolExecutor(max_workers=3) as executor: futures = [] for audiovisual_record in audiovisual_records: future = executor.submit( _save_audiovisual_image_locally, audiovisual_record, local_root_path, web_server_root_path ) future.log_msg = f'Saving image to local for {audiovisual_record.name} ({audiovisual_record.year})' futures.append(future) for future in concurrent.futures.as_completed(futures): save_audiovisual_images_locally.log(future.log_msg) future.result(timeout=600)
def search_for_new_additions(): from core.fetchers.services import get_all_new_additions_sources try: klass = get_all_new_additions_sources()[0] except IndexError: log_message('There is no addition source to get new films/series') return today = datetime.utcnow() time_ago = today - timedelta(days=30) # Configuration config_key = f'search_for_new_additions_{klass.source_name}' configuration = _get_configuration(key=config_key) from_dt = configuration.data.get('from_dt', '') to_dt = configuration.data.get('to_dt', '') current_dt = configuration.data.get('current_dt', '') dts_done = configuration.data.get('dts_done', []) if from_dt == '' or to_dt == '': log_message( f'Need to provide from_dt and to_dt in the format YYYY-MM-DD for configuration {config_key}' ) return # parse dates to native objects from_native_dt = datetime.strptime(from_dt, '%Y-%m-%d') to_native_dt = datetime.strptime(to_dt, '%Y-%m-%d') try: current_native_dt = datetime.strptime(current_dt, '%Y-%m-%d') except ValueError: current_native_dt = from_native_dt if not (from_native_dt <= current_native_dt <= to_native_dt): if current_native_dt > to_native_dt: current_native_dt = to_native_dt else: current_native_dt = from_native_dt # main loop new_additions = klass() while from_native_dt <= current_native_dt <= to_native_dt and current_native_dt <= time_ago: if current_native_dt.strftime('%Y-%m-%d') in dts_done: current_native_dt += timedelta(days=1) continue from_str = current_native_dt.strftime('%Y-%m-%d') audiovisual_records_new = new_additions.get( current_native_dt, current_native_dt + timedelta(days=1)) for audiovisual_record in audiovisual_records_new: results = AudiovisualRecord.search( {'name': audiovisual_record.name}) if len(results) == 0: search_for_new_additions.log( f'Adding new {audiovisual_record.name}') audiovisual_record.save() dts_done.append(from_str) current_native_dt += timedelta(days=1) configuration.refresh() configuration.data['dts_done'] = dts_done configuration.data['current_dt'] = current_native_dt.strftime( '%Y-%m-%d') configuration.save()
def add_audiovisual_record_by_name(name, **additional_attributes): record = AudiovisualRecord(name=name, **additional_attributes) dao_implementation.save_audiovisual_record(record)
def refresh_audiovisual_record(self, record: AudiovisualRecord): collection = self._get_collection(MongoAudiovisualRecord) record_id = getattr(record, '_id', None) if record_id is None: return current = collection.find_one({'_id': record_id}) if current is not None: current = MongoAudiovisualRecord(**current) record.name = current.name record.genres = current.genres record.year = current.year record.summary = current.summary record.directors = current.directors record.writers = current.writers record.stars = current.stars record.images = current.images record.deleted = current.deleted record.downloads_disabled = current.downloads_disabled record.scores = current.scores record.global_score = current.global_score record.general_information_fetched = current.general_information_fetched record.metadata = current.metadata record.is_a_film = current.is_a_film record.has_downloads = current.has_downloads
def landing(request): get_params = dict(request.GET) get_params = {k: v[0] for k, v in get_params.items()} page, raw_uri = _check_if_erroneous_page_and_get_page_and_right_uri( request) last_records = AudiovisualRecord.search( { 'deleted': False, 'has_downloads': True, 'general_information_fetched': True, 'global_score__gte': 0.1 }, sort_by=['-year', '-created_date'], page_size=30, page=1, paginate=True).get('results') # filtering by users try: ordering = get_params.pop('ordering', None) filter_dict = _process_get_params_and_get_filter_dict(get_params) get_params['ordering'] = ordering filter_dict['deleted'] = False filter_dict['has_downloads'] = True additional_kwargs = { 'paginate': True, 'page_size': 20, 'page': page, 'sort_by': ordering } paginator = AudiovisualRecord.search(filter_dict, **additional_kwargs) serializer = AudiovisualRecordSerializer(paginator.get('results', []), many=True) paginator['results'] = serializer.data except Condition.InvalidOperator: paginator = {'current_page': 1, 'total_pages': 1, 'results': []} # here we translate next page number and previous page number into urls _add_previous_and_next_navigation_uris_to_search(raw_uri, paginator) context = { # 'genres': genres, 'context_class': 'landing', 'is_landing': True, 'last_records': last_records, 'search': paginator, 'filter_params': get_params, 'genres_names': _get_genres(), 'qualities': VideoQualityInStringDetector.our_qualities, 'year_range': [ str(y) for y in range(1970, int(datetime.utcnow().strftime('%Y')) + 1) ] } return render(request, 'web/landing.html', context=context)
def details(request, slug=None): try: referer_uri = request.META['HTTP_REFERER'] referer_uri = urllib.parse.unquote(referer_uri) get_params = { p.split('=')[0]: p.split('=')[1] for p in referer_uri.split('?')[1].split('&') } except (IndexError, KeyError): get_params = {} audiovisual_records = AudiovisualRecord.search({ 'deleted': False, 'has_downloads': True, 'general_information_fetched': True, 'slug': slug }) if len(audiovisual_records) == 0: context = {'genres_names': _get_genres()} return render(request, 'web/404.html', status=404, context=context) audiovisual_record = audiovisual_records[0] for score in audiovisual_record.scores: source = get_general_information_source_by_name( score.get('source_name')) score['external_url'] = source.base_url + audiovisual_record.metadata[ 'detailed_page'][source.source_name] # Add to each person the search url to be used later in the template for person in audiovisual_record.directors + audiovisual_record.writers + audiovisual_record.stars: person.search_url = f'/s/?ft=b&s="{person.name}"'.replace(' ', '+') # related audiovisual records # TODO esto toca un poco los huevos related_records = AudiovisualRecord.search( { 'deleted': False, 'has_downloads': True, 'general_information_fetched': True, 'name__neq': audiovisual_record.name, 'stars__name__in': [person.name for person in audiovisual_record.stars], }, page_size=10, page=1, paginate=True, sort_by=['-global_score']).get('results') # more = AudiovisualRecord.search( # { # 'deleted': False, 'has_downloads': True, 'general_information_fetched': True, # 'name__neq': audiovisual_record.name, # 'name__simil': audiovisual_record.name, # '_id__nin': [r.id for r in related_records] # }, # page_size=10, page=1, paginate=True, sort_by=['-global_score'] # ).get('results') related_records = related_records # + more # downloads # TODO esto toca mucho los huevos downloads = DownloadSourceResult.search( { 'audiovisual_record': audiovisual_record, 'deleted': False }, sort_by='quality') lang_translations = { 'eng': 'English', 'rus': 'Russian', 'spa': 'Spanish', 'hin': 'Hindi', 'deu': 'German', 'ita': 'Italian', 'jpn': 'Japanese', 'fra': 'French', 'kor': 'Korean', 'gre': 'Greek', 'pol': 'Polish', } names_used = [] lang_downloads = [] for lang in [ 'eng', 'rus', 'spa', 'deu', 'fra', 'ita', 'gre', 'pol', 'hin', 'jpn', 'kor' ]: ds = [] for d in downloads: if d.lang == lang and d.name not in names_used: names_used.append(d.name) ds.append(d) ds = ds[:10] if len(ds) > 0: lang_downloads.append((lang, ds, lang_translations[lang])) context = { 'context_class': 'details', 'is_landing': True, 'audiovisual_record': audiovisual_record, 'downloads': downloads, 'lang_downloads': lang_downloads, 'filter_params': get_params, 'genres_names': _get_genres(), 'qualities': VideoQualityInStringDetector.our_qualities, 'related_records': related_records, 'year_range': [ str(y) for y in range(1970, int(datetime.utcnow().strftime('%Y')) + 1) ] } return render(request, 'web/details.html', context=context)