def _recommend_rating(self, user, recommender, params, include=None, exclude=None): user = user.lower() if user not in recommender.known_users: raise NotFound(f"user <{user}> could not be found") params = params or {} include = ( frozenset(_parse_ints(params.get("include"))) if include is None else include ) # we should only need this if params are set, but see #90 games = include | frozenset( self.filter_queryset(self.get_queryset()) .order_by() .values_list("bgg_id", flat=True) ) games &= recommender.rated_games if not games: return () exclude = self._excluded_games(user, params, include, exclude) similarity_model = take_first(params.get("model")) == "similarity" return recommender.recommend( users=(user,), games=games, similarity_model=similarity_model, exclude=_exclude(user, ids=exclude), exclude_known=parse_bool(take_first(params.get("exclude_known"))), exclude_clusters=parse_bool(take_first(params.get("exclude_clusters"))), star_percentiles=getattr(settings, "STAR_PERCENTILES", None), )
def merge_config(spider, item="GameItem", in_paths=None, out_path=None, full=False, **kwargs): """Returns arguments for merging a given spider.""" curr_date = now() curr_date_str = curr_date.strftime("%Y-%m-%dT%H-%M-%S") kwargs["in_paths"] = in_paths or FEEDS_DIR / spider / item / "*" kwargs.setdefault("keys", f"{spider}_id") kwargs.setdefault("key_types", "int" if spider in ("bgg", "luding") else "str") kwargs.setdefault("latest", "scraped_at") kwargs.setdefault("latest_types", "date") kwargs.setdefault("latest_min", curr_date - timedelta(days=90)) kwargs.setdefault("concat_output", True) if parse_bool(full): kwargs["out_path"] = (out_path or FEEDS_DIR / spider / item / f"{curr_date_str}-merged.jl") else: kwargs[ "out_path"] = out_path or DATA_DIR / "scraped" / f"{spider}_{item}.jl" kwargs.setdefault( "fieldnames_exclude", ("published_at", "updated_at", "scraped_at"), ) kwargs.setdefault("sort_keys", True) return kwargs
def _excluded_games(self, user, params, include=None, exclude=None): params = params or {} params.setdefault("exclude_known", True) exclude = frozenset(arg_to_iter(exclude)) | frozenset( _parse_ints(params.get("exclude")) ) exclude_known = parse_bool(take_first(params.get("exclude_known"))) exclude_fields = [ field for field in self.collection_fields if parse_bool(take_first(params.get(f"exclude_{field}"))) ] exclude_wishlist = parse_int(take_first(params.get("exclude_wishlist"))) exclude_play_count = parse_int(take_first(params.get("exclude_play_count"))) exclude_clusters = parse_bool(take_first(params.get("exclude_clusters"))) try: queries = [Q(**{field: True}) for field in exclude_fields] if exclude_known and exclude_clusters: queries.append(Q(rating__isnull=False)) if exclude_wishlist: queries.append(Q(wishlist__lte=exclude_wishlist)) if exclude_play_count: queries.append(Q(play_count__gte=exclude_play_count)) if queries: query = reduce(or_, queries) exclude |= frozenset( User.objects.get(name=user) .collection_set.order_by() .filter(query) .values_list("game_id", flat=True) ) except Exception: pass return tuple(exclude) if not include else tuple(exclude - include)
def games(self, request): """Similar to self.list(), but with full game details.""" fat = parse_bool(next(_extract_params(request, "fat"), None)) query_set = self.filter_queryset(self.get_queryset()) page = self.paginate_queryset(query_set) if page is not None: if fat: serializer = RankingFatSerializer(page, many=True) return self.get_paginated_response(serializer.data) serializer = self.get_serializer(page, many=True) data = _add_games(serializer.data, (r.game_id for r in page)) return self.get_paginated_response(data) if fat: serializer = RankingFatSerializer(query_set, many=True) return Response(serializer.data) serializer = self.get_serializer(query_set, many=True) data = _add_games(serializer.data, query_set.values_list("game", flat=True)) return Response(data)
# Build paths inside the project like this: os.path.join(BASE_DIR, ...) BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) DATA_DIR = os.path.join(BASE_DIR, "data") MODELS_DIR = os.path.join(BASE_DIR, "models") # Quick-start development settings - unsuitable for production # See https://docs.djangoproject.com/en/2.1/howto/deployment/checklist/ # SECURITY WARNING: keep the secret key used in production secret! SECRET_KEY = os.getenv( "SECRET_KEY", "+*6x!0^!j^&h4+l-w7h!)pk=1m7gie&@&0cjq7)19%d6v2xu=y" ) # SECURITY WARNING: don't run with debug turned on in production! DEBUG = parse_bool(os.getenv("DEBUG")) ENVIRONMENT = os.getenv("ENVIRONMENT", "development" if DEBUG else "production") READ_ONLY = ENVIRONMENT == "production" ALLOWED_HOSTS = [ "0.0.0.0", "127.0.0.1", "[::1]", "localhost", ".recommend.games", ".recommended.games", ] if os.getenv("GC_PROJECT"): ALLOWED_HOSTS += [f".{os.getenv('GC_PROJECT')}.appspot.com"]
from dotenv import find_dotenv, load_dotenv DOTENV_PATH = os.getenv("DOTENV_PATH") or find_dotenv(usecwd=True) load_dotenv(dotenv_path=DOTENV_PATH, verbose=True) except ImportError: pass BOT_NAME = "board-games" SPIDER_MODULES = ["board_game_scraper.spiders"] NEWSPIDER_MODULE = "board_game_scraper.spiders" LOG_LEVEL = os.getenv("LOG_LEVEL") or "INFO" LOG_FORMATTER = "scrapy_extensions.QuietLogFormatter" LOG_SCRAPED_ITEMS = parse_bool(os.getenv("LOG_SCRAPED_ITEMS")) BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) FEED_EXPORT_FIELDS = ( "name", "alt_name", "year", "game_type", "description", "designer", "artist", "publisher", "url", "official_url", "image_url",
def merge_configs(spider, full=False): """Yields configs for all items in a given spider.""" full = parse_bool(full) if spider == "bga": yield merge_config(spider="bga", item="GameItem", full=full) yield merge_config( spider="bga", item="RatingItem", full=full, keys=("bga_user_id", "bga_id"), fieldnames_exclude=("bgg_user_play_count", ) if parse_bool(full) else ("bgg_user_play_count", "published_at", "updated_at", "scraped_at"), ) return if spider == "bgg": yield merge_config(spider="bgg", item="GameItem", full=full) yield merge_config( spider="bgg", item="UserItem", full=full, keys="bgg_user_name", key_types="istr", fieldnames_exclude=None if full else ("published_at", "scraped_at"), ) yield merge_config( spider="bgg", item="RatingItem", full=full, keys=("bgg_user_name", "bgg_id"), key_types=("istr", "int"), fieldnames_exclude=None if full else ("published_at", "scraped_at"), ) return if spider == "bgg_hotness": yield merge_config( spider="bgg_hotness", item="GameItem", full=full, keys=("published_at", "bgg_id"), key_types=("date", "int"), latest_min=None, fieldnames=None if full else ( "published_at", "rank", "add_rank", "bgg_id", "name", "year", "image_url", ), fieldnames_exclude=None, sort_keys=False, sort_fields=("published_at", "rank"), ) return if spider == "bgg_rankings": yield merge_config( spider="bgg_rankings", item="GameItem", full=full, keys=("published_at", "bgg_id"), key_types=("date", "int"), latest_min=now() - timedelta(days=7), fieldnames=None if full else ( "published_at", "bgg_id", "rank", "add_rank", "name", "year", "num_votes", "bayes_rating", "avg_rating", ), fieldnames_exclude=None, sort_keys=False, sort_fields=("published_at", "rank"), ) return # TODO news merge config yield merge_config(spider=spider, item="GameItem", full=full)