Esempio n. 1
0
def calculate_genres_with_films():
    genres = Genre.search(sort_by='name')
    genre_names_with_films = []
    for genre in genres:
        results = AudiovisualRecord.search(
            {
                'deleted': False,
                'has_downloads': True,
                'general_information_fetched': True,
                'genres__name': genre.name
            },
            paginate=True,
            page_size=1,
            page=1,
        ).get('results')

        if len(results) > 0:
            ar = AudiovisualRecord.search({
                'deleted': False,
                'has_downloads': True,
                'general_information_fetched': True,
                'genres__name': genre.name
            })
            genre.number = len(ar)
            genre_names_with_films.append(genre)

    genre_names_with_films.sort(key=lambda g: g.number, reverse=True)
    configuration = _get_or_create_configuration(CONFIG_KEY_GENRES_WITH_FILMS)
    configuration.data = [g.name for g in genre_names_with_films]
    configuration.save()
Esempio n. 2
0
def _worker_collect_download_links_for_the_first_time(source_class, logger):
    """
    This search for download links for has_downloads or not has_downloads audiovisual_records
    """
    source_name = source_class.source_name
    logger(f'Begin to retrieve audiovisual records for {source_name}')
    audiovisual_records = AudiovisualRecord.search(
        {
            'deleted': False,
            'general_information_fetched': True,
            f'metadata__downloads_fetch__{source_name}__exists': False,
            'scores__votes__exists': True,
            'global_score__gte': 0.1
        },
        paginate=True,
        page_size=50,
        page=1,
        sort_by='-global_score').get('results')
    logger(f'Read {len(audiovisual_records)} records')
    with ThreadPoolExecutor(max_workers=2) as executor:
        futures = []
        for audiovisual_record in audiovisual_records:
            logger(f'Sleeping for 30 seconds for {source_class.source_name}')
            time.sleep(30)
            future = executor.submit(_worker_get_download_links, source_class,
                                     audiovisual_record, logger)
            futures.append(future)

        for future in concurrent.futures.as_completed(futures):
            future.result()
def _update(audiovisual_record, general_information_klass):
    general_information = general_information_klass(audiovisual_record)
    try:
        audiovisual_record.summary = general_information.summary
        audiovisual_record.images = [general_information.main_image]
        if not bool(audiovisual_record.year):
            audiovisual_record.year = general_information.year
        (
            audiovisual_record.writers,
            audiovisual_record.directors,
            audiovisual_record.stars
        ) = general_information.writers_directors_stars
        audiovisual_record.genres = general_information.genres
        audiovisual_record.is_a_film = general_information.is_a_film
        if audiovisual_record.name != general_information.name:
            exists = len(AudiovisualRecord.search({'name': general_information.name})) > 0
            if not exists:
                audiovisual_record.name = general_information.name
                audiovisual_record.slug = None
            else:
                audiovisual_record.delete()
                return
        audiovisual_record.general_information_fetched = True
        audiovisual_record.save()

    except GeneralInformationException as e:
        log_message(str(e), only_file=True)
        audiovisual_record.delete()
Esempio n. 4
0
def genre_epoch_view(request, genre=None, epoch=None):
    try:
        referer_uri = request.META['HTTP_REFERER']
        # get_params = {p.split('=')[0]: p.split('=')[1] for p in referer_uri.split('?')[1].split('&')}
    except (IndexError, KeyError):
        pass
        # get_params = {}

    page, raw_uri = _check_if_erroneous_page_and_get_page_and_right_uri(
        request)
    search = {
        'deleted': False,
        'has_downloads': True,
        'general_information_fetched': True,
        'genres__name': genre,
        'global_score__gte': 0.1,
    }
    if epoch == '70s':
        search.update({'year__gte': '1970', 'year__lte': '1979'})
    elif epoch == '80s':
        search.update({'year__gte': '1980', 'year__lte': '1989'})
    elif epoch == '90s':
        search.update({'year__gte': '1990', 'year__lte': '1999'})
    elif epoch == '2000s':
        search.update({'year__gte': '2000', 'year__lte': '2009'})
    elif epoch == '2010s':
        search.update({'year__gte': '2010', 'year__lte': '2019'})
    else:
        epoch = 'All-Times'

    paginator = AudiovisualRecord.search(search,
                                         paginate=True,
                                         page_size=20,
                                         page=page,
                                         sort_by=['-global_score'])

    serializer = AudiovisualRecordSerializer(paginator.get('results', []),
                                             many=True)
    paginator['results'] = serializer.data
    _add_previous_and_next_navigation_uris_to_search(raw_uri, paginator)

    context = {
        # 'filter_params': get_params,
        'context_class': 'genre_view',
        'is_landing': True,
        'current_genre': genre,
        'genres_names': _get_genres(),
        'epoch': epoch,
        'qualities': VideoQualityInStringDetector.our_qualities,
        'search': paginator,
        'year_range': range(1970,
                            int(datetime.utcnow().strftime('%Y')) + 1)
    }

    return render(request, 'web/genre_epoch.html', context=context)
Esempio n. 5
0
 def handle(self, *args, **options):
     page = 1
     has_next_page = True
     page_size = 500
     while has_next_page:
         paginator = AudiovisualRecord.search({'deleted': False}, paginate=True, page_size=page_size, page=page)
         total_pages = paginator.get('total_pages')
         print(f'Checking audiovisual records: {(page - 1) * page_size}/{page * page_size} / Page: {page}/{total_pages}')
         for ar in paginator.get('results'):
             ar.calculate_has_downloads()
         has_next_page = paginator.get('next_page', False)
         page += 1
Esempio n. 6
0
    def save_audiovisual_record(self, record: AudiovisualRecord):
        record = MongoAudiovisualRecord.convert(record)
        for n, genre in enumerate(record.genres):
            record.genres[n] = self._save_if_not_exist_genre(genre)
        for n, director in enumerate(record.directors):
            record.directors[n] = self._save_if_not_exists_person(director)
        for n, writer in enumerate(record.writers):
            record.writers[n] = self._save_if_not_exists_person(writer)
        for n, star in enumerate(record.stars):
            record.stars[n] = self._save_if_not_exists_person(star)
        dict_obj = dict(record)
        collection = self._get_collection(MongoAudiovisualRecord)

        _id = dict_obj.get('_id', None)
        _check_audiovisual_slug(dict_obj, collection)
        dict_obj.pop('_id')
        if not _id:
            record._id = collection.insert_one(dict_obj).inserted_id
        else:
            collection.update({'_id': _id}, dict_obj)
        return record
Esempio n. 7
0
def remove_film(request, object_id):
    if not request.user.is_superuser:
        return HttpResponse(status=403)
    _id = ObjectId(object_id)
    try:
        audiovisual_record = AudiovisualRecord.search({'_id': _id})[0]
        audiovisual_record.delete()
    except IndexError:
        pass
    finally:
        try:
            referer = request.META['HTTP_REFERER']
            return redirect(referer)
        except IndexError:
            return redirect('/')
def autocomplete_general_information_for_empty_audiovisual_records():

    audiovisual_records = AudiovisualRecord.search({
        'deleted': False, 'general_information_fetched': False,
    }, paginate=True, page_size=100, page=1).get('results')

    with ThreadPoolExecutor(max_workers=3) as executor:
        futures = []
        for audiovisual_record in audiovisual_records:
            for general_information_klass in get_all_general_information_sources():
                future = executor.submit(_update, audiovisual_record, general_information_klass)
                future.log_msg = f'Check {audiovisual_record.name} with {general_information_klass.source_name}'
                futures.append(future)
        for future in concurrent.futures.as_completed(futures):
            autocomplete_general_information_for_empty_audiovisual_records.log(future.log_msg)
            future.result(timeout=600)
Esempio n. 9
0
def _group_by_genres():
    six_month_ago = datetime.utcnow().replace(tzinfo=timezone.utc) - timedelta(
        days=180)
    all_genres = Genre.search()
    groups = {}
    for genre in all_genres:

        audiovisual_records = AudiovisualRecord.search({
            'deleted':
            False,
            'general_information_fetched':
            True,
            'has_downloads':
            True,
            'global_score__gte':
            0.5,
            'created_date__gt':
            six_month_ago,
            'genres__name':
            genre.name
        })

        for ar in audiovisual_records:
            scores = [float(s['value']) for s in ar.scores]
            score = sum(scores) / len(scores) if len(scores) > 0 else 0
            setattr(ar, 'ordering_score', score)

        # sorting, by year and later by ordering_score
        audiovisual_records = sorted(audiovisual_records,
                                     key=operator.attrgetter(
                                         'ordering_score', 'year'),
                                     reverse=True)[:10]

        groups[genre.name] = [{
            'name': ar.name,
            'slug': ar.slug,
            'year': ar.year,
            'images': ar.images,
            'directors': [p['name'] for p in ar.directors],
            'writers': [p['name'] for p in ar.writers],
            'stars': [p['name'] for p in ar.stars],
            'ordering_score': ar.ordering_score,
        } for ar in audiovisual_records]
    return groups
Esempio n. 10
0
def recent_films_search_again_for_download_links():
    n_days_ago = datetime.utcnow().replace(tzinfo=timezone.utc) - timedelta(
        days=180)
    audiovisual_records = AudiovisualRecord.search({
        'deleted':
        False,
        'general_information_fetched':
        True,
        'global_score__gte':
        0.1,
        'metadata__downloads_fetch__exists':
        True,
        'year__gte':
        n_days_ago.strftime('%Y')
    })
    for audiovisual_record in audiovisual_records:
        audiovisual_record.refresh()
        audiovisual_record.metadata['downloads_fetch'] = {}
        audiovisual_record.save()
Esempio n. 11
0
    def get(self, from_date: datetime,
            to_date: datetime) -> List[AudiovisualRecord]:
        headers = {'Accept-Language': 'en,es;q=0.9,pt;q=0.8'}
        session = PhantomBrowsingSession(referer=self.base_url + '/',
                                         headers=headers)
        session.get(self.get_search_url(from_date, to_date), timeout=30)
        response = session.last_response
        if response is None or not self.results_found(response.content):
            return []
        audiovisual_records = []
        names = self.extract_all_names(response.content)
        links = self.extract_all_detail_pages(response.content)
        for n, name in enumerate(names):
            link = links[n]
            audiovisual_record = AudiovisualRecord(name=name)
            self._prepare_metadata_dict(audiovisual_record)
            audiovisual_record.metadata['detailed_page'][
                self.source_name] = link
            audiovisual_records.append(audiovisual_record)

        return audiovisual_records
Esempio n. 12
0
def compile_trailers_for_audiovisual_records_in_youtube():
    logger = compile_trailers_for_audiovisual_records_in_youtube.log

    audiovisual_record = AudiovisualRecord.search(
        {
            'deleted': False,
            'general_information_fetched': True,
            'has_downloads': True,
            'metadata__searched_trailers__youtube__exists': False,
            'global_score__gte': 0.1,
        },
        paginate=True,
        page_size=1,
        page=1,
        sort_by='-global_score').get('results')

    if len(audiovisual_record) == 0:
        return

    audiovisual_record = audiovisual_record[0]

    logger(f'Searching: {audiovisual_record.name}')
    search_string = f'{audiovisual_record.name.lower()} {audiovisual_record.year} trailer'
    video_id = _search(audiovisual_record.name.lower(),
                       audiovisual_record.year,
                       search_string,
                       logger,
                       original_audiovisual_record=audiovisual_record)

    _mark_as_searched(audiovisual_record, 'youtube')
    if video_id is None:
        return
    audiovisual_record.refresh()
    if 'trailers' not in audiovisual_record.metadata:
        audiovisual_record.metadata['trailers'] = {}

    audiovisual_record.metadata['trailers'][
        'youtube'] = f'https://www.youtube.com/embed/{video_id}'
    audiovisual_record.save()
def complete_correct_summaries():
    # This is a fix for bad summaries compiled.
    audiovisual_records_without_summary_key = AudiovisualRecord.search(
        {'deleted': False, 'metadata__summary_fix__exists': False, 'summary__neq': '', 'global_score__gte': 0.1},
        paginate=True, page_size=10, page=1, sort_by='-global_score'
    ).get('results')

    with ThreadPoolExecutor(max_workers=3) as executor:
        futures = []
        for audiovisual_record in audiovisual_records_without_summary_key:
            for general_information_klass in get_all_general_information_sources():
                future = executor.submit(_update_only_summary, audiovisual_record, general_information_klass)
                future.audiovisual_record = audiovisual_record
                future.log_msg = f'Fix summary of {audiovisual_record.name} with ' \
                                 f'{general_information_klass.source_name}'
                futures.append(future)
        for future in concurrent.futures.as_completed(futures):
            future.result(timeout=600)
            audiovisual_record.refresh()
            audiovisual_record.metadata['summary_fix'] = True
            audiovisual_record.save()
            complete_correct_summaries.log(future.log_msg)
def save_audiovisual_images_locally():
    local_root_path = save_audiovisual_images_locally.data.get('local_root_path', None)
    web_server_root_path = save_audiovisual_images_locally.data.get('web_server_root_path', None)
    if local_root_path is None:
        current_directory = os.path.dirname(os.path.abspath(__file__))
        core_directory = os.path.dirname(current_directory)
        media_directory = os.path.dirname(core_directory) + '/media/'
        local_root_path = os.path.join(media_directory, 'ai')
        save_audiovisual_images_locally.data.set('local_root_path', local_root_path)
    if web_server_root_path is None:
        web_server_root_path = '/media/ai/'
        save_audiovisual_images_locally.data.set('web_server_root_path', web_server_root_path)

    audiovisual_records = AudiovisualRecord.search(
        {
            'deleted': False, 'general_information_fetched': True,
            'has_downloads': True, 'metadata__local_image__exists': False,
            'global_score__gte': 0.1,
        },
        paginate=True, page_size=10, page=1, sort_by='-global_score'
    ).get('results')

    with ThreadPoolExecutor(max_workers=3) as executor:
        futures = []
        for audiovisual_record in audiovisual_records:
            future = executor.submit(
                _save_audiovisual_image_locally,
                audiovisual_record,
                local_root_path,
                web_server_root_path
            )
            future.log_msg = f'Saving image to local for {audiovisual_record.name} ({audiovisual_record.year})'
            futures.append(future)
        for future in concurrent.futures.as_completed(futures):
            save_audiovisual_images_locally.log(future.log_msg)
            future.result(timeout=600)
Esempio n. 15
0
def search_for_new_additions():
    from core.fetchers.services import get_all_new_additions_sources
    try:
        klass = get_all_new_additions_sources()[0]
    except IndexError:
        log_message('There is no addition source to get new films/series')
        return

    today = datetime.utcnow()
    time_ago = today - timedelta(days=30)

    # Configuration
    config_key = f'search_for_new_additions_{klass.source_name}'
    configuration = _get_configuration(key=config_key)
    from_dt = configuration.data.get('from_dt', '')
    to_dt = configuration.data.get('to_dt', '')
    current_dt = configuration.data.get('current_dt', '')
    dts_done = configuration.data.get('dts_done', [])

    if from_dt == '' or to_dt == '':
        log_message(
            f'Need to provide from_dt and to_dt in the format YYYY-MM-DD for configuration {config_key}'
        )
        return

    # parse dates to native objects
    from_native_dt = datetime.strptime(from_dt, '%Y-%m-%d')
    to_native_dt = datetime.strptime(to_dt, '%Y-%m-%d')

    try:
        current_native_dt = datetime.strptime(current_dt, '%Y-%m-%d')
    except ValueError:
        current_native_dt = from_native_dt

    if not (from_native_dt <= current_native_dt <= to_native_dt):
        if current_native_dt > to_native_dt:
            current_native_dt = to_native_dt
        else:
            current_native_dt = from_native_dt

    # main loop
    new_additions = klass()
    while from_native_dt <= current_native_dt <= to_native_dt and current_native_dt <= time_ago:
        if current_native_dt.strftime('%Y-%m-%d') in dts_done:
            current_native_dt += timedelta(days=1)
            continue

        from_str = current_native_dt.strftime('%Y-%m-%d')
        audiovisual_records_new = new_additions.get(
            current_native_dt, current_native_dt + timedelta(days=1))
        for audiovisual_record in audiovisual_records_new:
            results = AudiovisualRecord.search(
                {'name': audiovisual_record.name})
            if len(results) == 0:
                search_for_new_additions.log(
                    f'Adding new {audiovisual_record.name}')
                audiovisual_record.save()

        dts_done.append(from_str)
        current_native_dt += timedelta(days=1)
        configuration.refresh()
        configuration.data['dts_done'] = dts_done
        configuration.data['current_dt'] = current_native_dt.strftime(
            '%Y-%m-%d')
        configuration.save()
Esempio n. 16
0
def add_audiovisual_record_by_name(name, **additional_attributes):
    record = AudiovisualRecord(name=name, **additional_attributes)
    dao_implementation.save_audiovisual_record(record)
Esempio n. 17
0
 def refresh_audiovisual_record(self, record: AudiovisualRecord):
     collection = self._get_collection(MongoAudiovisualRecord)
     record_id = getattr(record, '_id', None)
     if record_id is None:
         return
     current = collection.find_one({'_id': record_id})
     if current is not None:
         current = MongoAudiovisualRecord(**current)
         record.name = current.name
         record.genres = current.genres
         record.year = current.year
         record.summary = current.summary
         record.directors = current.directors
         record.writers = current.writers
         record.stars = current.stars
         record.images = current.images
         record.deleted = current.deleted
         record.downloads_disabled = current.downloads_disabled
         record.scores = current.scores
         record.global_score = current.global_score
         record.general_information_fetched = current.general_information_fetched
         record.metadata = current.metadata
         record.is_a_film = current.is_a_film
         record.has_downloads = current.has_downloads
Esempio n. 18
0
def landing(request):
    get_params = dict(request.GET)
    get_params = {k: v[0] for k, v in get_params.items()}

    page, raw_uri = _check_if_erroneous_page_and_get_page_and_right_uri(
        request)
    last_records = AudiovisualRecord.search(
        {
            'deleted': False,
            'has_downloads': True,
            'general_information_fetched': True,
            'global_score__gte': 0.1
        },
        sort_by=['-year', '-created_date'],
        page_size=30,
        page=1,
        paginate=True).get('results')

    # filtering by users
    try:
        ordering = get_params.pop('ordering', None)
        filter_dict = _process_get_params_and_get_filter_dict(get_params)
        get_params['ordering'] = ordering

        filter_dict['deleted'] = False
        filter_dict['has_downloads'] = True
        additional_kwargs = {
            'paginate': True,
            'page_size': 20,
            'page': page,
            'sort_by': ordering
        }

        paginator = AudiovisualRecord.search(filter_dict, **additional_kwargs)
        serializer = AudiovisualRecordSerializer(paginator.get('results', []),
                                                 many=True)
        paginator['results'] = serializer.data

    except Condition.InvalidOperator:
        paginator = {'current_page': 1, 'total_pages': 1, 'results': []}

    # here we translate next page number and previous page number into urls
    _add_previous_and_next_navigation_uris_to_search(raw_uri, paginator)

    context = {
        # 'genres': genres,
        'context_class':
        'landing',
        'is_landing':
        True,
        'last_records':
        last_records,
        'search':
        paginator,
        'filter_params':
        get_params,
        'genres_names':
        _get_genres(),
        'qualities':
        VideoQualityInStringDetector.our_qualities,
        'year_range': [
            str(y) for y in range(1970,
                                  int(datetime.utcnow().strftime('%Y')) + 1)
        ]
    }
    return render(request, 'web/landing.html', context=context)
Esempio n. 19
0
def details(request, slug=None):
    try:
        referer_uri = request.META['HTTP_REFERER']
        referer_uri = urllib.parse.unquote(referer_uri)
        get_params = {
            p.split('=')[0]: p.split('=')[1]
            for p in referer_uri.split('?')[1].split('&')
        }
    except (IndexError, KeyError):
        get_params = {}

    audiovisual_records = AudiovisualRecord.search({
        'deleted':
        False,
        'has_downloads':
        True,
        'general_information_fetched':
        True,
        'slug':
        slug
    })
    if len(audiovisual_records) == 0:
        context = {'genres_names': _get_genres()}
        return render(request, 'web/404.html', status=404, context=context)

    audiovisual_record = audiovisual_records[0]

    for score in audiovisual_record.scores:
        source = get_general_information_source_by_name(
            score.get('source_name'))
        score['external_url'] = source.base_url + audiovisual_record.metadata[
            'detailed_page'][source.source_name]

    # Add to each person the search url to be used later in the template
    for person in audiovisual_record.directors + audiovisual_record.writers + audiovisual_record.stars:
        person.search_url = f'/s/?ft=b&s="{person.name}"'.replace(' ', '+')

    # related audiovisual records
    # TODO esto toca un poco los huevos
    related_records = AudiovisualRecord.search(
        {
            'deleted':
            False,
            'has_downloads':
            True,
            'general_information_fetched':
            True,
            'name__neq':
            audiovisual_record.name,
            'stars__name__in':
            [person.name for person in audiovisual_record.stars],
        },
        page_size=10,
        page=1,
        paginate=True,
        sort_by=['-global_score']).get('results')
    # more = AudiovisualRecord.search(
    #     {
    #         'deleted': False, 'has_downloads': True, 'general_information_fetched': True,
    #         'name__neq': audiovisual_record.name,
    #         'name__simil': audiovisual_record.name,
    #         '_id__nin': [r.id for r in related_records]
    #     },
    #     page_size=10, page=1, paginate=True, sort_by=['-global_score']
    # ).get('results')

    related_records = related_records  # + more

    # downloads
    # TODO esto toca mucho los huevos
    downloads = DownloadSourceResult.search(
        {
            'audiovisual_record': audiovisual_record,
            'deleted': False
        },
        sort_by='quality')

    lang_translations = {
        'eng': 'English',
        'rus': 'Russian',
        'spa': 'Spanish',
        'hin': 'Hindi',
        'deu': 'German',
        'ita': 'Italian',
        'jpn': 'Japanese',
        'fra': 'French',
        'kor': 'Korean',
        'gre': 'Greek',
        'pol': 'Polish',
    }
    names_used = []
    lang_downloads = []
    for lang in [
            'eng', 'rus', 'spa', 'deu', 'fra', 'ita', 'gre', 'pol', 'hin',
            'jpn', 'kor'
    ]:
        ds = []
        for d in downloads:
            if d.lang == lang and d.name not in names_used:
                names_used.append(d.name)
                ds.append(d)
        ds = ds[:10]
        if len(ds) > 0:
            lang_downloads.append((lang, ds, lang_translations[lang]))

    context = {
        'context_class':
        'details',
        'is_landing':
        True,
        'audiovisual_record':
        audiovisual_record,
        'downloads':
        downloads,
        'lang_downloads':
        lang_downloads,
        'filter_params':
        get_params,
        'genres_names':
        _get_genres(),
        'qualities':
        VideoQualityInStringDetector.our_qualities,
        'related_records':
        related_records,
        'year_range': [
            str(y) for y in range(1970,
                                  int(datetime.utcnow().strftime('%Y')) + 1)
        ]
    }
    return render(request, 'web/details.html', context=context)