def _recommend_rating(self, user, recommender, params, include=None, exclude=None):
        user = user.lower()
        if user not in recommender.known_users:
            raise NotFound(f"user <{user}> could not be found")

        params = params or {}
        include = (
            frozenset(_parse_ints(params.get("include")))
            if include is None
            else include
        )
        # we should only need this if params are set, but see #90
        games = include | frozenset(
            self.filter_queryset(self.get_queryset())
            .order_by()
            .values_list("bgg_id", flat=True)
        )
        games &= recommender.rated_games

        if not games:
            return ()

        exclude = self._excluded_games(user, params, include, exclude)
        similarity_model = take_first(params.get("model")) == "similarity"

        return recommender.recommend(
            users=(user,),
            games=games,
            similarity_model=similarity_model,
            exclude=_exclude(user, ids=exclude),
            exclude_known=parse_bool(take_first(params.get("exclude_known"))),
            exclude_clusters=parse_bool(take_first(params.get("exclude_clusters"))),
            star_percentiles=getattr(settings, "STAR_PERCENTILES", None),
        )
Beispiel #2
0
def merge_config(spider,
                 item="GameItem",
                 in_paths=None,
                 out_path=None,
                 full=False,
                 **kwargs):
    """Returns arguments for merging a given spider."""

    curr_date = now()
    curr_date_str = curr_date.strftime("%Y-%m-%dT%H-%M-%S")

    kwargs["in_paths"] = in_paths or FEEDS_DIR / spider / item / "*"
    kwargs.setdefault("keys", f"{spider}_id")
    kwargs.setdefault("key_types",
                      "int" if spider in ("bgg", "luding") else "str")
    kwargs.setdefault("latest", "scraped_at")
    kwargs.setdefault("latest_types", "date")
    kwargs.setdefault("latest_min", curr_date - timedelta(days=90))
    kwargs.setdefault("concat_output", True)

    if parse_bool(full):
        kwargs["out_path"] = (out_path or FEEDS_DIR / spider / item /
                              f"{curr_date_str}-merged.jl")

    else:
        kwargs[
            "out_path"] = out_path or DATA_DIR / "scraped" / f"{spider}_{item}.jl"
        kwargs.setdefault(
            "fieldnames_exclude",
            ("published_at", "updated_at", "scraped_at"),
        )
        kwargs.setdefault("sort_keys", True)

    return kwargs
    def _excluded_games(self, user, params, include=None, exclude=None):
        params = params or {}
        params.setdefault("exclude_known", True)

        exclude = frozenset(arg_to_iter(exclude)) | frozenset(
            _parse_ints(params.get("exclude"))
        )

        exclude_known = parse_bool(take_first(params.get("exclude_known")))
        exclude_fields = [
            field
            for field in self.collection_fields
            if parse_bool(take_first(params.get(f"exclude_{field}")))
        ]
        exclude_wishlist = parse_int(take_first(params.get("exclude_wishlist")))
        exclude_play_count = parse_int(take_first(params.get("exclude_play_count")))
        exclude_clusters = parse_bool(take_first(params.get("exclude_clusters")))

        try:
            queries = [Q(**{field: True}) for field in exclude_fields]
            if exclude_known and exclude_clusters:
                queries.append(Q(rating__isnull=False))
            if exclude_wishlist:
                queries.append(Q(wishlist__lte=exclude_wishlist))
            if exclude_play_count:
                queries.append(Q(play_count__gte=exclude_play_count))
            if queries:
                query = reduce(or_, queries)
                exclude |= frozenset(
                    User.objects.get(name=user)
                    .collection_set.order_by()
                    .filter(query)
                    .values_list("game_id", flat=True)
                )

        except Exception:
            pass

        return tuple(exclude) if not include else tuple(exclude - include)
    def games(self, request):
        """Similar to self.list(), but with full game details."""

        fat = parse_bool(next(_extract_params(request, "fat"), None))

        query_set = self.filter_queryset(self.get_queryset())
        page = self.paginate_queryset(query_set)

        if page is not None:
            if fat:
                serializer = RankingFatSerializer(page, many=True)
                return self.get_paginated_response(serializer.data)

            serializer = self.get_serializer(page, many=True)
            data = _add_games(serializer.data, (r.game_id for r in page))
            return self.get_paginated_response(data)

        if fat:
            serializer = RankingFatSerializer(query_set, many=True)
            return Response(serializer.data)

        serializer = self.get_serializer(query_set, many=True)
        data = _add_games(serializer.data, query_set.values_list("game", flat=True))
        return Response(data)
# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
DATA_DIR = os.path.join(BASE_DIR, "data")
MODELS_DIR = os.path.join(BASE_DIR, "models")

# Quick-start development settings - unsuitable for production
# See https://docs.djangoproject.com/en/2.1/howto/deployment/checklist/

# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = os.getenv(
    "SECRET_KEY", "+*6x!0^!j^&h4+l-w7h!)pk=1m7gie&@&0cjq7)19%d6v2xu=y"
)

# SECURITY WARNING: don't run with debug turned on in production!
DEBUG = parse_bool(os.getenv("DEBUG"))
ENVIRONMENT = os.getenv("ENVIRONMENT", "development" if DEBUG else "production")
READ_ONLY = ENVIRONMENT == "production"

ALLOWED_HOSTS = [
    "0.0.0.0",
    "127.0.0.1",
    "[::1]",
    "localhost",
    ".recommend.games",
    ".recommended.games",
]

if os.getenv("GC_PROJECT"):
    ALLOWED_HOSTS += [f".{os.getenv('GC_PROJECT')}.appspot.com"]
Beispiel #6
0
    from dotenv import find_dotenv, load_dotenv

    DOTENV_PATH = os.getenv("DOTENV_PATH") or find_dotenv(usecwd=True)
    load_dotenv(dotenv_path=DOTENV_PATH, verbose=True)

except ImportError:
    pass

BOT_NAME = "board-games"

SPIDER_MODULES = ["board_game_scraper.spiders"]
NEWSPIDER_MODULE = "board_game_scraper.spiders"

LOG_LEVEL = os.getenv("LOG_LEVEL") or "INFO"
LOG_FORMATTER = "scrapy_extensions.QuietLogFormatter"
LOG_SCRAPED_ITEMS = parse_bool(os.getenv("LOG_SCRAPED_ITEMS"))

BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))

FEED_EXPORT_FIELDS = (
    "name",
    "alt_name",
    "year",
    "game_type",
    "description",
    "designer",
    "artist",
    "publisher",
    "url",
    "official_url",
    "image_url",
Beispiel #7
0
def merge_configs(spider, full=False):
    """Yields configs for all items in a given spider."""

    full = parse_bool(full)

    if spider == "bga":
        yield merge_config(spider="bga", item="GameItem", full=full)
        yield merge_config(
            spider="bga",
            item="RatingItem",
            full=full,
            keys=("bga_user_id", "bga_id"),
            fieldnames_exclude=("bgg_user_play_count", )
            if parse_bool(full) else ("bgg_user_play_count", "published_at",
                                      "updated_at", "scraped_at"),
        )
        return

    if spider == "bgg":
        yield merge_config(spider="bgg", item="GameItem", full=full)
        yield merge_config(
            spider="bgg",
            item="UserItem",
            full=full,
            keys="bgg_user_name",
            key_types="istr",
            fieldnames_exclude=None if full else
            ("published_at", "scraped_at"),
        )
        yield merge_config(
            spider="bgg",
            item="RatingItem",
            full=full,
            keys=("bgg_user_name", "bgg_id"),
            key_types=("istr", "int"),
            fieldnames_exclude=None if full else
            ("published_at", "scraped_at"),
        )
        return

    if spider == "bgg_hotness":
        yield merge_config(
            spider="bgg_hotness",
            item="GameItem",
            full=full,
            keys=("published_at", "bgg_id"),
            key_types=("date", "int"),
            latest_min=None,
            fieldnames=None if full else (
                "published_at",
                "rank",
                "add_rank",
                "bgg_id",
                "name",
                "year",
                "image_url",
            ),
            fieldnames_exclude=None,
            sort_keys=False,
            sort_fields=("published_at", "rank"),
        )
        return

    if spider == "bgg_rankings":
        yield merge_config(
            spider="bgg_rankings",
            item="GameItem",
            full=full,
            keys=("published_at", "bgg_id"),
            key_types=("date", "int"),
            latest_min=now() - timedelta(days=7),
            fieldnames=None if full else (
                "published_at",
                "bgg_id",
                "rank",
                "add_rank",
                "name",
                "year",
                "num_votes",
                "bayes_rating",
                "avg_rating",
            ),
            fieldnames_exclude=None,
            sort_keys=False,
            sort_fields=("published_at", "rank"),
        )
        return

    # TODO news merge config

    yield merge_config(spider=spider, item="GameItem", full=full)