Python generate_search_titleの例、demoscene.utils.text.generate_search_title Pythonの例

コード例 #1

0

ファイルを表示

ファイル: matching.py プロジェクト: vitalkanev/demozoo

def automatch_productions(releaser):
    unmatched_demozoo_prods, unmatched_janeway_prods, matched_prods = get_production_match_data(
        releaser)

    matched_production_count = len(matched_prods)
    unmatched_demozoo_production_count = len(unmatched_demozoo_prods)
    unmatched_janeway_production_count = len(unmatched_janeway_prods)

    # mapping of lowercased prod title to a pair of lists of demozoo IDs and pouet IDs of
    # prods with that name
    prods_by_name_and_supertype = defaultdict(lambda: ([], []))

    for id, title, url, supertype in unmatched_demozoo_prods:
        prods_by_name_and_supertype[(generate_search_title(title),
                                     supertype)][0].append(id)

    for id, title, url, supertype in unmatched_janeway_prods:
        if supertype == 'music':
            title = strip_music_extensions(title)
        prods_by_name_and_supertype[(generate_search_title(title),
                                     supertype)][1].append(id)

    just_matched_janeway_ids = set()

    for (title,
         supertype), (demozoo_ids,
                      janeway_ids) in prods_by_name_and_supertype.items():
        if len(demozoo_ids) == 1 and len(janeway_ids) == 1:
            ProductionLink.objects.create(
                production_id=demozoo_ids[0],
                link_class='KestraBitworldRelease',
                parameter=janeway_ids[0],
                is_download_link=False,
                source='janeway-automatch',
            )
            just_matched_janeway_ids.add(janeway_ids[0])
            matched_production_count += 1
            unmatched_demozoo_production_count -= 1
            unmatched_janeway_production_count -= 1

    if unmatched_demozoo_production_count == 0:
        # all matchable prods are accounted for, so let's go on and import the remaining ones from janeway
        for id, title, url, supertype in unmatched_janeway_prods:
            if id in just_matched_janeway_ids:
                continue

            import_release(JanewayRelease.objects.get(janeway_id=id))
            matched_production_count += 1
            unmatched_janeway_production_count -= 1

    AuthorMatchInfo.objects.update_or_create(
        releaser_id=releaser.id,
        defaults={
            'matched_production_count':
            matched_production_count,
            'unmatched_demozoo_production_count':
            unmatched_demozoo_production_count,
            'unmatched_janeway_production_count':
            unmatched_janeway_production_count,
        })

コード例 #2

0

ファイルを表示

ファイル: matching.py プロジェクト: sagamusix/demozoo

def get_dz_releaser_ids_matching_by_name_and_type(janeway_author):
    """
    Return a list of Demozoo releaser IDs which match this Janeway author by at least one name,
    and are the same type (scener or group)
    """
    names = [generate_search_title(name.name) for name in janeway_author.names.all()]
    return list(Releaser.objects.filter(
        is_group=janeway_author.is_group,
        nicks__variants__search_title__in=names,
    ).distinct().values_list('id', flat=True))

コード例 #3

0

ファイルを表示

 def get_member_clean_names(self):
     """
     return a list of cleaned versions of all names used by members of this group
     (excluding ones of <=3 letters)
     """
     return list(
         filter(lambda s: len(s) > 3, [
             generate_search_title(name.name)
             for name in Name.objects.filter(author__group_memberships__group=self)
         ])
     )

コード例 #4

0

ファイルを表示

 def get_group_clean_names(self):
     """
     return a list of cleaned versions of all names of groups this scener is a member of
     (excluding ones of <=3 letters)
     """
     return list(
         filter(lambda s: len(s) > 3, [
             generate_search_title(name.name)
             for name in Name.objects.filter(author__is_group=True, author__member_memberships__member=self)
         ])
     )

コード例 #5

0

ファイルを表示

    def save(self, *args, **kwargs):
        self.search_title = generate_search_title(self.name)

        super().save(*args, **kwargs)

        # if name has changed, remove the old Name record
        if self.name != self._original_name:
            self.names.filter(name=self._original_name).delete()
            self._original_name = self.name

        # ensure that a Name with matching name exists for this BBS
        name, created = Name.objects.get_or_create(bbs=self, name=self.name)

コード例 #6

0

ファイルを表示

ファイル: models.py プロジェクト: jensadne/demozoo

	def save(self, *args, **kwargs):
		# populate search_title from name
		if self.name:
			self.search_title = generate_search_title(self.name)

		super(Party, self).save(*args, **kwargs)

		if self.share_image_file_url and not self.share_image_file:
			# clear the previous share_image_file_url field
			Party.objects.filter(pk=self.pk).update(share_image_file_url='')
			self.share_image_file_url = ''
		elif self.share_image_file and self.share_image_file.url != self.share_image_file_url:
			# update the share_image_file_url field with the URL from share_image_file
			Party.objects.filter(pk=self.pk).update(share_image_file_url=self.share_image_file.url)
			self.share_image_file_url = self.share_image_file.url

コード例 #7

0

ファイルを表示

    def save(self, *args, **kwargs):
        # populate search_title from name
        if self.name:
            self.search_title = generate_search_title(self.name)

        super(Party, self).save(*args, **kwargs)

        if self.share_image_file_url and not self.share_image_file:
            # clear the previous share_image_file_url field
            Party.objects.filter(pk=self.pk).update(share_image_file_url='')
            self.share_image_file_url = ''
        elif self.share_image_file and self.share_image_file.url != self.share_image_file_url:
            # update the share_image_file_url field with the URL from share_image_file
            Party.objects.filter(pk=self.pk).update(share_image_file_url=self.share_image_file.url)
            self.share_image_file_url = self.share_image_file.url

コード例 #8

0

ファイルを表示

ファイル: models.py プロジェクト: Gargaj/demozoo

	def save(self, *args, **kwargs):
		if self.id and not self.supertype:
			self.supertype = self.inferred_supertype

		# populate search_title and sortable_title from title
		if self.title:
			self.title = self.title.strip()
			self.search_title = generate_search_title(self.title)
			self.sortable_title = generate_sort_key(self.title)

		# auto-populate updated_at; this will only happen on creation
		# because it's a non-null field at the db level
		if self.updated_at is None:
			self.updated_at = datetime.datetime.now()

		return super(Production, self).save(*args, **kwargs)

コード例 #9

0

ファイルを表示

ファイル: models.py プロジェクト: asbjornu/demozoo

    def save(self, *args, **kwargs):
        if self.id and not self.supertype:
            self.supertype = self.inferred_supertype

        # populate search_title and sortable_title from title
        if self.title:
            self.title = self.title.strip()
            self.search_title = generate_search_title(self.title)
            self.sortable_title = generate_sort_key(self.title)

        # auto-populate updated_at; this will only happen on creation
        # because it's a non-null field at the db level
        if self.updated_at is None:
            self.updated_at = datetime.datetime.now()

        return super(Production, self).save(*args, **kwargs)

コード例 #10

0

ファイルを表示

ファイル: views.py プロジェクト: sagamusix/demozoo

def live_search(request):
    query = request.GET.get('q')
    category = request.GET.get('category')
    if query:
        clean_query = generate_search_title(query)

        # start with an empty queryset
        qs = Production.objects.annotate(type=models.Value(
            'empty', output_field=models.CharField()), ).values('pk',
                                                                'type').none()

        if (not category) or category in ('production', 'graphics', 'music'):
            prod_qs = Production.objects.annotate(
                type=models.Value('production',
                                  output_field=models.CharField()),
                name=models.Value('', output_field=models.CharField()),
            ).order_by().filter(search_title__startswith=clean_query).values(
                'pk', 'type')
            if category in ('production', 'graphics', 'music'):
                prod_qs = prod_qs.filter(supertype=category)
            qs = qs.union(prod_qs)

        if (not category) or category in ('scener', 'group'):
            releaser_qs = Releaser.objects.annotate(
                type=models.Value('releaser', output_field=models.CharField()),
            ).order_by('pk').filter(
                nicks__variants__search_title__startswith=clean_query).values(
                    'pk', 'type').distinct()
            if category in ('scener', 'group'):
                releaser_qs = releaser_qs.filter(
                    is_group=(category == 'group'))
            qs = qs.union(releaser_qs)

        if (not category) or category == 'party':
            qs = qs.union(
                Party.objects.annotate(type=models.Value(
                    'party',
                    output_field=models.CharField()), ).order_by().filter(
                        search_title__startswith=clean_query).values(
                            'pk', 'type'))

        if (not category) or category == 'bbs':
            qs = qs.union(
                BBS.objects.annotate(type=models.Value(
                    'bbs',
                    output_field=models.CharField()), ).order_by().filter(
                        search_title__startswith=clean_query).values(
                            'pk', 'type'))

        search_result_data = list(qs[:10])

        # Assemble the results into a plan for fetching the actual models -
        # form a dict that maps model/type to a set of PKs
        to_fetch = {}
        for d in search_result_data:
            to_fetch.setdefault(d['type'], set()).add(d['pk'])

        # now do the fetches, and store the results as a mapping of (type, pk) tuple to object
        fetched = {}

        if 'production' in to_fetch:
            production_ids = to_fetch['production']
            productions = Production.objects.filter(
                pk__in=production_ids).prefetch_related(
                    'author_nicks__releaser',
                    'author_affiliation_nicks__releaser')
            screenshots = Screenshot.select_for_production_ids(production_ids)

            for prod in productions:
                prod.selected_screenshot = screenshots.get(prod.pk)
                fetched[('production', prod.pk)] = prod

        if 'releaser' in to_fetch:
            releasers = Releaser.objects.filter(
                pk__in=to_fetch['releaser']).prefetch_related(
                    'group_memberships__group__nicks', 'nicks')
            for releaser in releasers:
                fetched[('releaser', releaser.pk)] = releaser

        if 'party' in to_fetch:
            parties = Party.objects.filter(pk__in=to_fetch['party'])
            for party in parties:
                fetched[('party', party.pk)] = party

        if 'bbs' in to_fetch:
            bbses = BBS.objects.filter(pk__in=to_fetch['bbs'])
            for bbs in bbses:
                fetched[('bbs', bbs.pk)] = bbs

        # Build final list in same order as returned by the original results query
        results = []
        for d in search_result_data:
            item = fetched.get((d['type'], d['pk'])) or None
            if item:
                if d['type'] == 'production':
                    if item.selected_screenshot:
                        screenshot = item.selected_screenshot
                        width, height = screenshot.thumb_dimensions_to_fit(
                            48, 36)
                        thumbnail = {
                            'url': screenshot.thumbnail_url,
                            'width': width,
                            'height': height,
                            'natural_width': screenshot.thumbnail_width,
                            'natural_height': screenshot.thumbnail_height,
                        }
                    else:
                        thumbnail = None

                    results.append({
                        'type': item.supertype,
                        'url': item.get_absolute_url(),
                        'value': item.title_with_byline,
                        'thumbnail': thumbnail
                    })
                elif d['type'] == 'releaser':
                    primary_nick = item.primary_nick
                    if primary_nick.differentiator:
                        differentiator = " (%s)" % primary_nick.differentiator
                    else:
                        differentiator = ""

                    results.append({
                        'type':
                        'group' if item.is_group else 'scener',
                        'url':
                        item.get_absolute_url(),
                        'value':
                        item.name_with_affiliations() + differentiator,
                    })
                elif d['type'] == 'party':
                    results.append({
                        'type': 'party',
                        'url': item.get_absolute_url(),
                        'value': item.name,
                    })
                elif d['type'] == 'bbs':
                    results.append({
                        'type': 'bbs',
                        'url': item.get_absolute_url(),
                        'value': item.name,
                    })

    else:
        results = []
    return JsonResponse(results, safe=False)

コード例 #11

0

ファイルを表示

ファイル: models.py プロジェクト: Gargaj/demozoo

	def save(self, *args, **kwargs):
		# populate search_title from name
		if self.name:
			self.search_title = generate_search_title(self.name)

		return super(NickVariant, self).save(*args, **kwargs)

コード例 #12

0

ファイルを表示

    def search(self, page_number=1, count=50):
        query = self.cleaned_data['q']

        # Look for filter expressions within query
        filter_expressions = collections.defaultdict(set)
        tag_names = set()

        def apply_filter(match):
            key, val = match.groups()
            if key in RECOGNISED_FILTER_KEYS:
                filter_expressions[key].add(val)
                return ''
            else:
                # the filter has not been recognised;
                # leave the original string intact to be handled as a search term
                return match.group(0)

        for filter_re in (FILTER_RE_ONEWORD, FILTER_RE_SINGLEQUOTE,
                          FILTER_RE_DOUBLEQUOTE):
            query = filter_re.sub(apply_filter, query)

        def apply_tag(match):
            tag_names.add(match.group(1))
            return ''

        query = TAG_RE.sub(apply_tag, query)

        asciified_query = unidecode(query).strip()
        has_search_term = bool(asciified_query)
        if has_search_term:
            psql_query = SearchQuery(unidecode(query))
            clean_query = generate_search_title(query)
            production_filter_q = Q(search_document=psql_query)
            releaser_filter_q = Q(search_document=psql_query)
            party_filter_q = Q(search_document=psql_query)
            bbs_filter_q = Q(search_document=psql_query)
        else:
            production_filter_q = Q()
            releaser_filter_q = Q()
            party_filter_q = Q()
            bbs_filter_q = Q()

        subqueries_to_perform = set(['production', 'releaser', 'party', 'bbs'])

        if 'platform' in filter_expressions or 'on' in filter_expressions:
            subqueries_to_perform &= set(['production'])
            platforms = filter_expressions['platform'] | filter_expressions[
                'on']

            platform_ids = Platform.objects.none().values_list('id', flat=True)
            for platform_name in platforms:
                platform_ids |= (Platform.objects.filter(
                    Q(name__iexact=platform_name)
                    | Q(aliases__name__iexact=platform_name)).values_list(
                        'id', flat=True))

            production_filter_q &= Q(platforms__id__in=list(platform_ids))

        if 'screenshot' in filter_expressions or 'screenshots' in filter_expressions:
            subqueries_to_perform &= set(['production'])

            for flag in filter_expressions['screenshot'] | filter_expressions[
                    'screenshots']:
                if flag in ('yes', 'true'):
                    production_filter_q &= Q(has_screenshot=True)
                elif flag in ('no', 'false'):
                    production_filter_q &= Q(has_screenshot=False)

        if 'by' in filter_expressions or 'author' in filter_expressions:
            subqueries_to_perform &= set(['production'])
            for name in filter_expressions['by'] | filter_expressions['author']:
                clean_name = generate_search_title(name)
                production_filter_q &= (
                    # join back through releaser so that we match any nick variant ever used by the author,
                    # not just the nick used on the prod. Better to err on the side of being too liberal
                    Q(author_nicks__releaser__nicks__variants__search_title=
                      clean_name) |
                    Q(author_affiliation_nicks__releaser__nicks__variants__search_title
                      =clean_name))

        if 'of' in filter_expressions:
            subqueries_to_perform &= set(['releaser'])
            for name in filter_expressions['of']:
                clean_name = generate_search_title(name)
                releaser_filter_q &= Q(
                    is_group=False,
                    group_memberships__group__nicks__variants__search_title=
                    clean_name)

        if 'group' in filter_expressions:
            subqueries_to_perform &= set(['production', 'releaser'])
            for name in filter_expressions['group']:
                clean_name = generate_search_title(name)
                releaser_filter_q &= Q(
                    is_group=False,
                    group_memberships__group__nicks__variants__search_title=
                    clean_name)
                production_filter_q &= (
                    # join back through releaser so that we match any nick variant ever used by the author,
                    # not just the nick used on the prod. Better to err on the side of being too liberal
                    Q(author_nicks__releaser__is_group=True,
                      author_nicks__releaser__nicks__variants__search_title=
                      clean_name) |
                    Q(author_affiliation_nicks__releaser__nicks__variants__search_title
                      =clean_name))

        if tag_names or ('tagged' in filter_expressions):
            subqueries_to_perform &= set(['production'])
            for tag_name in filter_expressions['tagged'] | tag_names:
                production_filter_q &= Q(tags__name=tag_name)

        if 'year' in filter_expressions or 'date' in filter_expressions:
            subqueries_to_perform &= set(['production', 'party'])
            for date_str in filter_expressions['year'] | filter_expressions[
                    'date']:
                try:
                    date_expr = FuzzyDate.parse(date_str)
                except ValueError:
                    continue

                production_filter_q &= Q(
                    release_date_date__gte=date_expr.date_range_start(),
                    release_date_date__lte=date_expr.date_range_end())
                party_filter_q &= Q(
                    end_date_date__gte=date_expr.date_range_start(),
                    start_date_date__lte=date_expr.date_range_end())

        if 'before' in filter_expressions:
            subqueries_to_perform &= set(['production', 'party'])
            for date_str in filter_expressions['before']:
                try:
                    date_expr = FuzzyDate.parse(date_str)
                except ValueError:
                    continue

                production_filter_q &= Q(
                    release_date_date__lt=date_expr.date_range_start())
                party_filter_q &= Q(
                    start_date_date__lt=date_expr.date_range_start())

        if 'until' in filter_expressions:
            subqueries_to_perform &= set(['production', 'party'])
            for date_str in filter_expressions['until']:
                try:
                    date_expr = FuzzyDate.parse(date_str)
                except ValueError:
                    continue

                production_filter_q &= Q(
                    release_date_date__lte=date_expr.date_range_end())
                party_filter_q &= Q(
                    start_date_date__lte=date_expr.date_range_end())

        if 'after' in filter_expressions:
            subqueries_to_perform &= set(['production', 'party'])
            for date_str in filter_expressions['after']:
                try:
                    date_expr = FuzzyDate.parse(date_str)
                except ValueError:
                    continue

                production_filter_q &= Q(
                    release_date_date__gt=date_expr.date_range_end())
                party_filter_q &= Q(
                    end_date_date__gt=date_expr.date_range_end())

        if 'since' in filter_expressions:
            subqueries_to_perform &= set(['production', 'party'])
            for date_str in filter_expressions['since']:
                try:
                    date_expr = FuzzyDate.parse(date_str)
                except ValueError:
                    continue

                production_filter_q &= Q(
                    release_date_date__gte=date_expr.date_range_start())
                party_filter_q &= Q(
                    end_date_date__gte=date_expr.date_range_start())

        if 'type' in filter_expressions:
            requested_types = filter_expressions['type']
            subqueries_from_type = set()
            filter_by_prod_supertype = False
            production_supertypes = []

            for supertype in ('production', 'graphics', 'music'):
                if supertype in requested_types:
                    filter_by_prod_supertype = True
                    production_supertypes.append(supertype)

            if filter_by_prod_supertype:
                subqueries_from_type.add('production')
                production_filter_q &= Q(supertype__in=production_supertypes)

            if 'releaser' in requested_types or 'scener' in requested_types or 'group' in requested_types:
                subqueries_from_type.add('releaser')

                if 'scener' in requested_types and not (
                        'releaser' in requested_types
                        or 'group' in requested_types):
                    releaser_filter_q &= Q(is_group=False)

                if 'group' in requested_types and not (
                        'releaser' in requested_types
                        or 'scener' in requested_types):
                    releaser_filter_q &= Q(is_group=True)

            if 'party' in requested_types:
                subqueries_from_type.add('party')

            if 'bbs' in requested_types:
                subqueries_from_type.add('bbs')

            # assume that any otherwise-unrecognised 'type' values indicate a production type
            production_types = set()
            for val in requested_types:
                if val not in ('production', 'graphics', 'music', 'scener',
                               'group', 'releaser', 'party', 'bbs'):
                    production_types.add(val)

            if production_types:
                prod_type_names_q = Q()
                for name in production_types:
                    prod_type_names_q |= Q(name__iexact=name)
                prod_type_ids = ProductionType.objects.filter(
                    prod_type_names_q).values_list('id', flat=True)

                subqueries_from_type.add('production')
                production_filter_q &= Q(types__in=prod_type_ids)

            subqueries_to_perform &= subqueries_from_type

        # Construct the master search query as a union of subqueries that search
        # one model each. Each subquery yields a queryset of dicts with the following fields:
        # 'type': 'production', 'releaser' or 'party'
        # 'pk': primary key of the relevant object
        # 'exactness': magic number used to prioritise exact/prefix title matches in the ordering:
        #     2 = (the cleaned version of) the title exactly matches (the cleaned verson of) the search query
        #     1 = (the cleaned version of) the title starts with (the cleaned version of) the search query
        #     0 = neither of the above
        # 'rank': search ranking as calculated by postgres search

        # start with an empty queryset
        if has_search_term:
            rank_annotation = SearchRank(F('search_document'), psql_query)
        else:
            rank_annotation = models.Value('', output_field=models.CharField())

        qs = Production.objects.annotate(
            type=models.Value('empty', output_field=models.CharField()),
            exactness=models.Value(0, output_field=models.IntegerField()),
            rank=rank_annotation).values('pk', 'type', 'exactness',
                                         'rank').none()

        if 'production' in subqueries_to_perform:
            # Search for productions

            if has_search_term:
                rank_annotation = SearchRank(F('search_document'), psql_query)
                exactness_annotation = models.Case(
                    models.When(search_title=clean_query,
                                then=models.Value(2)),
                    models.When(search_title__startswith=clean_query,
                                then=models.Value(1)),
                    default=models.Value(0,
                                         output_field=models.IntegerField()),
                    output_field=models.IntegerField())
            else:
                rank_annotation = F('sortable_title')
                exactness_annotation = models.Value(
                    0, output_field=models.IntegerField())

            qs = qs.union(
                Production.objects.annotate(
                    rank=rank_annotation,
                    type=models.Value('production',
                                      output_field=models.CharField()),
                    exactness=exactness_annotation).
                filter(production_filter_q).order_by(
                    # empty order_by to cancel the Production model's native ordering
                ).distinct().values('pk', 'type', 'exactness', 'rank'))

        if 'releaser' in subqueries_to_perform:
            # Search for releasers

            if has_search_term:
                rank_annotation = SearchRank(F('search_document'), psql_query)
                # Exactness test will be applied to each of the releaser's nick variants;
                # take the highest result
                exactness_annotation = models.Max(
                    models.Case(
                        models.When(nicks__variants__search_title=clean_query,
                                    then=models.Value(2)),
                        models.When(nicks__variants__search_title__startswith=
                                    clean_query,
                                    then=models.Value(1)),
                        default=models.Value(
                            0, output_field=models.IntegerField()),
                        output_field=models.IntegerField()))
            else:
                rank_annotation = F('name')
                exactness_annotation = models.Value(
                    0, output_field=models.IntegerField())

            qs = qs.union(
                Releaser.objects.annotate(
                    rank=rank_annotation,
                    type=models.Value('releaser',
                                      output_field=models.CharField()),
                    exactness=exactness_annotation).filter(releaser_filter_q).
                distinct().order_by(
                    # empty order_by to cancel the Releaser model's native ordering
                ).values('pk', 'type', 'exactness', 'rank'))

        if 'party' in subqueries_to_perform:
            # Search for parties

            if has_search_term:
                rank_annotation = SearchRank(F('search_document'), psql_query)
                exactness_annotation = models.Case(
                    models.When(search_title=clean_query,
                                then=models.Value(2)),
                    models.When(search_title__startswith=clean_query,
                                then=models.Value(1)),
                    default=models.Value(0,
                                         output_field=models.IntegerField()),
                    output_field=models.IntegerField())
            else:
                rank_annotation = F('name')
                exactness_annotation = models.Value(
                    0, output_field=models.IntegerField())

            qs = qs.union(
                Party.objects.annotate(
                    rank=rank_annotation,
                    type=models.Value('party',
                                      output_field=models.CharField()),
                    exactness=exactness_annotation,
                ).filter(party_filter_q).order_by(
                    # empty order_by to cancel the Party model's native ordering
                ).values('pk', 'type', 'exactness', 'rank'), )

        if 'bbs' in subqueries_to_perform:
            # Search for BBSes

            if has_search_term:
                rank_annotation = SearchRank(F('search_document'), psql_query)
                exactness_annotation = models.Case(
                    models.When(search_title=clean_query,
                                then=models.Value(2)),
                    models.When(search_title__startswith=clean_query,
                                then=models.Value(1)),
                    default=models.Value(0,
                                         output_field=models.IntegerField()),
                    output_field=models.IntegerField())
            else:
                rank_annotation = F('name')
                exactness_annotation = models.Value(
                    0, output_field=models.IntegerField())

            qs = qs.union(
                BBS.objects.annotate(
                    rank=rank_annotation,
                    type=models.Value('bbs', output_field=models.CharField()),
                    exactness=exactness_annotation,
                ).filter(bbs_filter_q).order_by(
                    # empty order_by to cancel any model-level native ordering
                ).values('pk', 'type', 'exactness', 'rank'), )

        if has_search_term:
            qs = qs.order_by('-exactness', '-rank', 'pk')
        else:
            qs = qs.order_by('-exactness', 'rank', 'pk')

        # Apply pagination to the query before performing the (expensive) real data fetches.

        paginator = Paginator(qs, count)
        # If page request (9999) is out of range, deliver last page of results.
        try:
            page = paginator.page(page_number)
        except (EmptyPage, InvalidPage):
            page = paginator.page(paginator.num_pages)

        # Assemble the results into a plan for fetching the actual models -
        # form a dict that maps model/type to a set of PKs
        to_fetch = {}
        for d in page.object_list:
            to_fetch.setdefault(d['type'], set()).add(d['pk'])

        # now do the fetches, and store the results as a mapping of (type, pk) tuple to object
        fetched = {}

        if 'production' in to_fetch:
            production_ids = to_fetch['production']
            productions = Production.objects.filter(
                pk__in=production_ids).prefetch_related(
                    'author_nicks__releaser',
                    'author_affiliation_nicks__releaser')
            if has_search_term:
                productions = productions.annotate(
                    search_snippet=TSHeadline('notes', psql_query))
            screenshots = Screenshot.select_for_production_ids(production_ids)

            for prod in productions:
                prod.selected_screenshot = screenshots.get(prod.pk)
                # Ignore any search snippets that don't actually contain a highlighted term
                prod.has_search_snippet = has_search_term and '<b>' in prod.search_snippet
                fetched[('production', prod.pk)] = prod

        if 'releaser' in to_fetch:
            releasers = Releaser.objects.filter(
                pk__in=to_fetch['releaser']).prefetch_related(
                    'group_memberships__group__nicks', 'nicks')
            if has_search_term:
                releasers = releasers.annotate(
                    search_snippet=TSHeadline('notes', psql_query))
            for releaser in releasers:
                releaser.has_search_snippet = has_search_term and '<b>' in releaser.search_snippet
                fetched[('releaser', releaser.pk)] = releaser

        if 'party' in to_fetch:
            parties = Party.objects.filter(pk__in=to_fetch['party'])
            if has_search_term:
                parties = parties.annotate(
                    search_snippet=TSHeadline('notes', psql_query))
            for party in parties:
                party.has_search_snippet = has_search_term and '<b>' in party.search_snippet
                fetched[('party', party.pk)] = party

        if 'bbs' in to_fetch:
            bbses = BBS.objects.filter(pk__in=to_fetch['bbs'])
            if has_search_term:
                bbses = bbses.annotate(
                    search_snippet=TSHeadline('notes', psql_query))
            for bbs in bbses:
                bbs.has_search_snippet = has_search_term and '<b>' in bbs.search_snippet
                fetched[('bbs', bbs.pk)] = bbs

        # Build final list in same order as returned by the original results query
        results = []
        for d in page.object_list:
            item = fetched.get((d['type'], d['pk'])) or None
            if item:
                item.search_info = d
                results.append(item)

        return (results, page)

コード例 #13

0

ファイルを表示

ファイル: 0007_populate_search_title.py プロジェクト: vitalkanev/demozoo

def populate_search_title(apps, schema_editor):
    BBS = apps.get_model("bbs", "BBS")
    for bbs in BBS.objects.all():
        bbs.search_title = generate_search_title(bbs.name)
        bbs.save(update_fields=['search_title'])

コード例 #14

0

ファイルを表示

ファイル: models.py プロジェクト: sagamusix/demozoo

    def save(self, *args, **kwargs):
        self.search_title = generate_search_title(self.name)

        return super().save(*args, **kwargs)

コード例 #15

0

ファイルを表示

    def save(self, *args, **kwargs):
        # populate search_title from name
        if self.name:
            self.search_title = generate_search_title(self.name)

        return super().save(*args, **kwargs)

コード例 #16

0

ファイルを表示

	def handle(self, *args, **kwargs):
		creation_count = 0

		dupes_file = open('janeway_dupes.csv', mode='w')
		dupes_csv = csv.writer(dupes_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
		dupes_csv.writerow(['Scener', 'Janeway URL', 'Demozoo URLs'])

		for (index, author) in enumerate(Author.objects.all()):
			if (index % 100 == 0):
				print("processed %d authors" % index)

			# find releasers in the Demozoo data that match any of this author's names (excluding abbreviations)
			# and are of the right type (scener vs group)
			candidate_releaser_ids = list(Releaser.objects.filter(
				is_group=author.is_group,
				nicks__variants__search_title__in=[generate_search_title(name.name) for name in author.names.all()]
			).distinct().values_list('id', flat=True))
			# print("-> %d name matches found: %r" % (len(candidate_releaser_ids), candidate_releaser_ids))

			if not candidate_releaser_ids:
				continue

			if author.is_group:
				# get this group's member memberships
				member_ids = Author.objects.filter(group_memberships__group=author).values_list('janeway_id', flat=True)
				#member_names = [
				#	generate_search_title(name.name)
				#	for name in Name.objects.filter(author__group_memberships__group=author)
				#]

				member_demozoo_ids = list(ReleaserExternalLink.objects.filter(
					link_class='KestraBitworldAuthor', parameter__in=[str(id) for id in member_ids]
				).values_list('releaser_id', flat=True))
				# see if any candidate releasers have one or more matching members by ID
				matching_releaser_ids = set(DZMembership.objects.filter(
					group_id__in=candidate_releaser_ids,
					member_id__in=member_demozoo_ids
				).distinct().values_list('group_id', flat=True))

				# see if any candidate releasers have one or more matching members by name
				# - commented out due to false positives, e.g. JP/Mayhem:
				#   http://janeway.exotica.org.uk/author.php?id=29340
				#   https://demozoo.org/sceners/40395/
				#matching_releaser_ids_by_group_name = set(DZMembership.objects.filter(
				#	group_id__in=candidate_releaser_ids,
				#	member__nicks__variants__search_title__in=member_names
				#).distinct().values_list('group_id', flat=True))

				#if matching_releaser_ids:
				#	print("group match for %s: %d => %r" % (author.name, author.janeway_id, matching_releaser_ids))
			else:
				# get this author's group memberships
				group_ids = Author.objects.filter(
					is_group=True, member_memberships__member=author
				).values_list('janeway_id', flat=True)
				#group_names = [
				#	generate_search_title(name.name)
				#	for name in Name.objects.filter(author__is_group=True, author__member_memberships__member=author)
				#]

				group_demozoo_ids = list(ReleaserExternalLink.objects.filter(
					link_class='KestraBitworldAuthor', parameter__in=[str(id) for id in group_ids]
				).values_list('releaser_id', flat=True))
				# see if any candidate releasers have one or more matching groups by ID
				matching_releaser_ids = set(DZMembership.objects.filter(
					member_id__in=candidate_releaser_ids,
					group_id__in=group_demozoo_ids
				).distinct().values_list('member_id', flat=True))

				# see if any candidate releasers have one or more matching groups by name
				# - commented out due to false positives, e.g. JP/Mayhem:
				#   http://janeway.exotica.org.uk/author.php?id=29340
				#   https://demozoo.org/sceners/40395/
				#matching_releaser_ids_by_group_name = set(DZMembership.objects.filter(
				#	member_id__in=candidate_releaser_ids,
				#	group__nicks__variants__search_title__in=group_names
				#).distinct().values_list('member_id', flat=True))

				#if matching_releaser_ids:
				#	print("Found match for %s (%d): %r" % (author.name, author.janeway_id, matching_releaser_ids))

			if len(matching_releaser_ids) > 1:
				dupes_csv.writerow(
					[author.name.encode('utf-8'), 'http://janeway.exotica.org.uk/author.php?id=%d' % author.janeway_id]
					+ [
						'https://demozoo.org/%s/%d/' % ('groups' if author.is_group else 'sceners', id) for id in list(matching_releaser_ids)
					]
				)

			already_linked_releasers = list(ReleaserExternalLink.objects.filter(
				link_class='KestraBitworldAuthor', parameter=author.janeway_id
			).values_list('releaser_id', flat=True))

			for releaser_id in matching_releaser_ids:
				if releaser_id not in already_linked_releasers:
					ReleaserExternalLink.objects.create(
						releaser_id=releaser_id,
						link_class='KestraBitworldAuthor',
						parameter=author.janeway_id,
						source='janeway-automatch',
					)
					creation_count += 1

		dupes_file.close()

		print("%d cross-links created" % creation_count)

コード例 #17

0

ファイルを表示

ファイル: forms.py プロジェクト: DannyCork/demozoo

	def search(self, with_real_names=False, page_number=1, count=50):
		query = self.cleaned_data['q']

		# Look for filter expressions within query
		filter_expressions = collections.defaultdict(set)
		tag_names = set()

		def apply_filter(match):
			key, val = match.groups()
			if key in RECOGNISED_FILTER_KEYS:
				filter_expressions[key].add(val)
				return ''
			else:
				# the filter has not been recognised;
				# leave the original string intact to be handled as a search term
				return match.group(0)

		for filter_re in (FILTER_RE_ONEWORD, FILTER_RE_SINGLEQUOTE, FILTER_RE_DOUBLEQUOTE):
			query = filter_re.sub(apply_filter, query)

		def apply_tag(match):
			tag_names.add(match.group(1))
			return ''

		query = TAG_RE.sub(apply_tag, query)

		psql_query = SearchQuery(unidecode(query))
		clean_query = generate_search_title(query)
		rank_annotation = SearchRank(F('search_document'), psql_query)

		subqueries_to_perform = set(['production', 'releaser', 'party'])

		production_filter_q = Q(search_document=psql_query)

		if with_real_names:
			releaser_filter_q = Q(admin_search_document=psql_query)
			releaser_rank_annotation = SearchRank(F('admin_search_document'), psql_query)
		else:
			releaser_filter_q = Q(search_document=psql_query)
			releaser_rank_annotation = rank_annotation

		party_filter_q = Q(search_document=psql_query)

		if 'platform' in filter_expressions or 'on' in filter_expressions:
			subqueries_to_perform &= set(['production'])
			platforms = filter_expressions['platform'] | filter_expressions['on']

			platform_ids = Platform.objects.none().values_list('id', flat=True)
			for platform_name in platforms:
				platform_ids |= Platform.objects.filter(Q(name__iexact=platform_name) | Q(aliases__name__iexact=platform_name)).values_list('id', flat=True)

			production_filter_q &= Q(platforms__id__in=list(platform_ids))

		if 'by' in filter_expressions or 'author' in filter_expressions:
			subqueries_to_perform &= set(['production'])
			for name in filter_expressions['by'] | filter_expressions['author']:
				clean_name = generate_search_title(name)
				production_filter_q &= (
					# join back through releaser so that we match any nick variant ever used by the author,
					# not just the nick used on the prod. Better to err on the side of being too liberal
					Q(author_nicks__releaser__nicks__variants__search_title=clean_name) |
					Q(author_affiliation_nicks__releaser__nicks__variants__search_title=clean_name)
				)

		if 'of' in filter_expressions:
			subqueries_to_perform &= set(['releaser'])
			for name in filter_expressions['of']:
				clean_name = generate_search_title(name)
				releaser_filter_q &= Q(
					is_group=False, group_memberships__group__nicks__variants__search_title=clean_name
				)

		if 'group' in filter_expressions:
			subqueries_to_perform &= set(['production', 'releaser'])
			for name in filter_expressions['group']:
				clean_name = generate_search_title(name)
				releaser_filter_q &= Q(
					is_group=False, group_memberships__group__nicks__variants__search_title=clean_name
				)
				production_filter_q &= (
					# join back through releaser so that we match any nick variant ever used by the author,
					# not just the nick used on the prod. Better to err on the side of being too liberal
					Q(
						author_nicks__releaser__is_group=True,
						author_nicks__releaser__nicks__variants__search_title=clean_name
					) | Q(
						author_affiliation_nicks__releaser__nicks__variants__search_title=clean_name
					)
				)

		if tag_names or ('tagged' in filter_expressions):
			subqueries_to_perform &= set(['production'])
			for tag_name in filter_expressions['tagged'] | tag_names:
				production_filter_q &= Q(tags__name=tag_name)

		if 'year' in filter_expressions:
			subqueries_to_perform &= set(['production', 'party'])
			for year_str in filter_expressions['year']:
				try:
					year = int(year_str)
				except ValueError:
					continue

				production_filter_q &= Q(release_date_date__year=year)
				party_filter_q &= (Q(start_date_date__year=year) | Q(end_date_date__year=year))

		if 'type' in filter_expressions:
			requested_types = filter_expressions['type']
			subqueries_from_type = set()
			filter_by_prod_supertype = False
			production_supertypes = []

			for supertype in ('production', 'graphics', 'music'):
				if supertype in requested_types:
					filter_by_prod_supertype = True
					production_supertypes.append(supertype)

			if filter_by_prod_supertype:
				subqueries_from_type.add('production')
				production_filter_q &= Q(supertype__in=production_supertypes)

			if 'releaser' in requested_types or 'scener' in requested_types or 'group' in requested_types:
				subqueries_from_type.add('releaser')

				if 'scener' in requested_types and not ('releaser' in requested_types or 'group' in requested_types):
					releaser_filter_q &= Q(is_group=False)

				if 'group' in requested_types and not ('releaser' in requested_types or 'scener' in requested_types):
					releaser_filter_q &= Q(is_group=True)

			if 'party' in requested_types:
				subqueries_from_type.add('party')

			# assume that any otherwise-unrecognised 'type' values indicate a production type
			production_types = set()
			for val in requested_types:
				if val not in ('production', 'graphics', 'music', 'scener', 'group', 'releaser', 'party'):
					production_types.add(val)

			if production_types:
				subqueries_from_type.add('production')
				production_filter_q &= Q(types__name__in=production_types)

			subqueries_to_perform &= subqueries_from_type

		# Construct the master search query as a union of subqueries that search
		# one model each. Each subquery yields a queryset of dicts with the following fields:
		# 'type': 'production', 'releaser' or 'party'
		# 'pk': primary key of the relevant object
		# 'exactness': magic number used to prioritise exact/prefix title matches in the ordering:
		#     2 = (the cleaned version of) the title exactly matches (the cleaned verson of) the search query
		#     1 = (the cleaned version of) the title starts with (the cleaned version of) the search query
		#     0 = neither of the above
		# 'rank': search ranking as calculated by postgres search

		# start with an empty queryset
		qs = Production.objects.annotate(
			type=models.Value('empty', output_field=models.CharField()),
			exactness=models.Value(0, output_field=models.IntegerField()),
			rank=rank_annotation
		).values('pk', 'type', 'exactness', 'rank').none()

		if 'production' in subqueries_to_perform:
			# Search for productions
			qs = qs.union(
				Production.objects.annotate(
					rank=rank_annotation,
					type=models.Value('production', output_field=models.CharField()),
					exactness=models.Case(
						models.When(search_title=clean_query, then=models.Value(2)),
						models.When(search_title__startswith=clean_query, then=models.Value(1)),
						default=models.Value(0, output_field=models.IntegerField()),
						output_field=models.IntegerField()
					)
				).filter(
					production_filter_q
				).order_by(
					# empty order_by to cancel the Production model's native ordering
				).distinct().values('pk', 'type', 'exactness', 'rank')
			)

		if 'releaser' in subqueries_to_perform:
			# Search for releasers
			qs = qs.union(
				Releaser.objects.annotate(
					rank=releaser_rank_annotation,
					type=models.Value('releaser', output_field=models.CharField()),
					# Exactness test will be applied to each of the releaser's nick variants;
					# take the highest result
					exactness=models.Max(models.Case(
						models.When(nicks__variants__search_title=clean_query, then=models.Value(2)),
						models.When(nicks__variants__search_title__startswith=clean_query, then=models.Value(1)),
						default=models.Value(0, output_field=models.IntegerField()),
						output_field=models.IntegerField()
					))
				).filter(
					releaser_filter_q
				).order_by(
					# empty order_by to cancel the Releaser model's native ordering
				).values('pk', 'type', 'exactness', 'rank')
			)

		if 'party' in subqueries_to_perform:
			# Search for parties
			qs = qs.union(
				Party.objects.annotate(
					rank=rank_annotation,
					type=models.Value('party', output_field=models.CharField()),
					exactness=models.Case(
						models.When(search_title=clean_query, then=models.Value(2)),
						models.When(search_title__startswith=clean_query, then=models.Value(1)),
						default=models.Value(0, output_field=models.IntegerField()),
						output_field=models.IntegerField()
					)
				).filter(
					party_filter_q
				).order_by(
					# empty order_by to cancel the Party model's native ordering
				).values('pk', 'type', 'exactness', 'rank'),
			)

		qs = qs.order_by('-exactness', '-rank', 'pk')

		# Apply pagination to the query before performing the (expensive) real data fetches.

		paginator = Paginator(qs, count)
		# If page request (9999) is out of range, deliver last page of results.
		try:
			page = paginator.page(page_number)
		except (EmptyPage, InvalidPage):
			page = paginator.page(paginator.num_pages)

		# Assemble the results into a plan for fetching the actual models -
		# form a dict that maps model/type to a set of PKs
		to_fetch = {}
		for d in page.object_list:
			to_fetch.setdefault(d['type'], set()).add(d['pk'])

		# now do the fetches, and store the results as a mapping of (type, pk) tuple to object
		fetched = {}

		if 'production' in to_fetch:
			production_ids = to_fetch['production']
			productions = Production.objects.filter(pk__in=production_ids).prefetch_related(
				'author_nicks__releaser', 'author_affiliation_nicks__releaser'
			).annotate(
				search_snippet=TSHeadline('notes', psql_query)
			)
			screenshots = Screenshot.select_for_production_ids(production_ids)

			for prod in productions:
				prod.selected_screenshot = screenshots.get(prod.pk)
				# Ignore any search snippets that don't actually contain a highlighted term
				prod.has_search_snippet = '<b>' in prod.search_snippet
				fetched[('production', prod.pk)] = prod

		if 'releaser' in to_fetch:
			releasers = Releaser.objects.filter(pk__in=to_fetch['releaser']).prefetch_related(
				'group_memberships__group__nicks', 'nicks'
			).annotate(
				search_snippet=TSHeadline('notes', psql_query)
			)
			for releaser in releasers:
				releaser.has_search_snippet = '<b>' in releaser.search_snippet
				fetched[('releaser', releaser.pk)] = releaser

		if 'party' in to_fetch:
			parties = Party.objects.filter(pk__in=to_fetch['party']).annotate(
				search_snippet=TSHeadline('notes', psql_query)
			)
			for party in parties:
				party.has_search_snippet = '<b>' in party.search_snippet
				fetched[('party', party.pk)] = party

		# Build final list in same order as returned by the original results query
		results = []
		for d in page.object_list:
			item = fetched.get((d['type'], d['pk'])) or None
			if item:
				item.search_info = d
				results.append(item)

		return (results, page)