Ejemplo n.º 1
0
def _get_stats_from_cache():
    """Get submission statistics from cache"""
    stats = cache.get(STATS_CACHE_KEY, namespace=STATS_CACHE_NAMESPACE)
    last_collected = cache.get(STATS_CACHE_LAST_UPDATE_KEY,
                                  namespace=STATS_CACHE_NAMESPACE)

    return last_collected, stats
Ejemplo n.º 2
0
 def test_delete_with_namespace(self):
     key = "testing"
     namespace = "spaaaaaaace"
     self.assertTrue(cache.set(key, u"Пример", namespace=namespace))
     self.assertEqual(cache.get(key, namespace=namespace), u"Пример")
     self.assertEqual(cache.delete(key, namespace=namespace), 1)
     self.assertIsNone(cache.get(key, namespace=namespace))
    def test_listen_counts_in_cache(self):
        count = self._create_test_data(self.testuser_name)
        self.assertEqual(count, self.logstore.get_listen_count_for_user(self.testuser_name, need_exact=True))
        user_key = '{}{}'.format(REDIS_INFLUX_USER_LISTEN_COUNT, self.testuser_name)
        self.assertEqual(count, int(cache.get(user_key, decode=False)))

        batch = generate_data(self.testuser_id, self.testuser_name, int(time.time()), 1)
        self.logstore.insert(batch)
        self.assertEqual(count + 1, int(cache.get(user_key, decode=False)))
Ejemplo n.º 4
0
    def test_datetime(self):
        self.assertTrue(cache.set('some_time', datetime.datetime.now()))
        self.assertEqual(type(cache.get('some_time')), datetime.datetime)

        dictionary = {
            "id": 1,
            "created": datetime.datetime.now(),
        }
        self.assertTrue(cache.set('some_other_time', dictionary))
        self.assertEqual(cache.get('some_other_time'), dictionary)
Ejemplo n.º 5
0
 def test_single_dict_fancy(self):
     dictionary = {
         "fancy": u"Да",
         "тест": 11,
     }
     cache.set('some_dict', dictionary)
     self.assertEqual(cache.get('some_dict'), dictionary)
Ejemplo n.º 6
0
 def test_single_dict(self):
     dictionary = {
         "fancy": "yeah",
         "wow": 11,
     }
     self.assertTrue(cache.set('some_dict', dictionary))
     self.assertEqual(cache.get('some_dict'), dictionary)
Ejemplo n.º 7
0
def mappings(mbid=None):
    """Get mappings to Spotify for a specified MusicBrainz ID.

    Returns:
        List containing Spotify URIs that are mapped to specified MBID.
    """
    if _base_url is None:
        flash.warn(lazy_gettext(_UNAVAILABLE_MSG))
        return []

    data = cache.get(mbid, _CACHE_NAMESPACE)
    if not data:
        try:
            session = requests.Session()
            session.mount(_base_url, HTTPAdapter(max_retries=2))
            resp = session.post(
                url=_base_url + 'mapping',
                headers={'Content-Type': 'application/json'},
                data=json.dumps({'mbid': mbid}),
            )
            resp.raise_for_status()
            data = resp.json().get('mappings')
        except RequestException:
            flash.warn(lazy_gettext("Spotify mapping server is unavailable. You will not see an embedded player."))
            return []
        cache.set(key=mbid, namespace=_CACHE_NAMESPACE, val=data)
    return data
Ejemplo n.º 8
0
def get_last_submitted_recordings():
    """Get list of last submitted recordings.

    Returns:
        List of dictionaries with basic info about last submitted recordings:
        mbid (MusicBrainz ID), artist (name), and title.
    """
    cache_key = "last-submitted-recordings"
    last_submissions = cache.get(cache_key)
    if not last_submissions:
        with db.engine.connect() as connection:
            # We are getting results with of offset of 10 rows because we'd
            # prefer to show recordings for which we already calculated
            # high-level data. This might not be the best way to do that.
            result = connection.execute("""SELECT ll.gid,
                                     llj.data->'metadata'->'tags'->'artist'->>0,
                                     llj.data->'metadata'->'tags'->'title'->>0
                                FROM lowlevel ll
                                JOIN lowlevel_json llj
                                  ON ll.id = llj.id
                            ORDER BY ll.id DESC
                               LIMIT 5
                              OFFSET 10""")
            last_submissions = result.fetchall()
            last_submissions = [
                {
                    "mbid": str(r[0]),
                    "artist": r[1],
                    "title": r[2],
                } for r in last_submissions if r[1] and r[2]
            ]
        cache.set(cache_key, last_submissions, time=LAST_MBIDS_CACHE_TIMEOUT)

    return last_submissions
Ejemplo n.º 9
0
def browse_release_groups(*, artist_id, release_types=None, limit=None, offset=None):
    """Get all release groups linked to an artist.

    Args:
        artist_id (uuid): MBID of the artist.
        release_types (list): List of types of release groups to be fetched.
        limit (int): Max number of release groups to return.
        offset (int): Offset that can be used in conjunction with the limit.

    Returns:
        Tuple containing the list of dictionaries of release groups ordered by release year
        and the total count of the release groups.
    """
    artist_id = str(artist_id)
    includes_data = defaultdict(dict)
    if release_types is None:
        release_types = []
    release_types = [release_type.capitalize() for release_type in release_types]
    key = cache.gen_key(artist_id, limit, offset, *release_types)
    release_groups = cache.get(key)
    if not release_groups:
        with mb_session() as db:
            release_groups_query = _browse_release_groups_query(db, artist_id, release_types)
            count = release_groups_query.count()
            release_groups = release_groups_query.order_by(
                case([(models.ReleaseGroupMeta.first_release_date_year.is_(None), 1)], else_=0),
                models.ReleaseGroupMeta.first_release_date_year.desc()
            ).limit(limit).offset(offset).all()
        for release_group in release_groups:
            includes_data[release_group.id]['meta'] = release_group.meta
        release_groups = ([to_dict_release_groups(release_group, includes_data[release_group.id])
                           for release_group in release_groups], count)
        cache.set(key=key, val=release_groups, time=DEFAULT_CACHE_EXPIRATION)
    return release_groups
Ejemplo n.º 10
0
def get_release_group_by_id(mbid):
    """Get release group using the MusicBrainz ID."""
    key = cache.gen_key(mbid)
    release_group = cache.get(key)
    if not release_group:
        release_group = _get_release_group_by_id(mbid)
        cache.set(key=key, val=release_group, time=DEFAULT_CACHE_EXPIRATION)
    return release_group_rel.process(release_group)
Ejemplo n.º 11
0
def get_recording_by_id(mbid):
    mbid = str(mbid)
    recording = cache.get(mbid)
    if not recording:
        try:
            recording = musicbrainzngs.get_recording_by_id(mbid, includes=['artists', 'releases', 'media'])['recording']
        except ResponseError as e:
            raise DataUnavailable(e)
    cache.set(mbid, recording, time=CACHE_TIMEOUT)
    return recording
    def get_total_listen_count(self, cache_value=True):
        """ Returns the total number of listens stored in the ListenStore.
            First checks the brainzutils cache for the value, if not present there
            makes a query to the db and caches it in brainzutils cache.
        """

        if cache_value:
            count = cache.get(InfluxListenStore.REDIS_INFLUX_TOTAL_LISTEN_COUNT, decode=False)
            if count:
                return int(count)

        try:
            result = self.influx.query("""SELECT %s
                                            FROM "%s"
                                        ORDER BY time DESC
                                           LIMIT 1""" % (COUNT_MEASUREMENT_NAME, TIMELINE_COUNT_MEASUREMENT))
        except (InfluxDBServerError, InfluxDBClientError) as err:
            self.log.error("Cannot query influx: %s" % str(err), exc_info=True)
            raise

        try:
            item = result.get_points(measurement=TIMELINE_COUNT_MEASUREMENT).__next__()
            count = int(item[COUNT_MEASUREMENT_NAME])
            timestamp = convert_to_unix_timestamp(item['time'])
        except (KeyError, ValueError, StopIteration):
            timestamp = 0
            count = 0

        # Now sum counts that have been added in the interval we're interested in
        try:
            result = self.influx.query("""SELECT sum(%s) as total
                                            FROM "%s"
                                           WHERE time > %s""" % (COUNT_MEASUREMENT_NAME, TEMP_COUNT_MEASUREMENT, get_influx_query_timestamp(timestamp)))
        except (InfluxDBServerError, InfluxDBClientError) as err:
            self.log.error("Cannot query influx: %s" % str(err), exc_info=True)
            raise

        try:
            data = result.get_points(measurement=TEMP_COUNT_MEASUREMENT).__next__()
            count += int(data['total'])
        except StopIteration:
            pass

        if cache_value:
            cache.set(
                InfluxListenStore.REDIS_INFLUX_TOTAL_LISTEN_COUNT,
                int(count),
                InfluxListenStore.TOTAL_LISTEN_COUNT_CACHE_TIME,
                encode=False,
            )
        return count
Ejemplo n.º 13
0
def get_place_by_id(mbid):
    """Get place with the MusicBrainz ID.

    Args:
        mbid (uuid): MBID(gid) of the place.
    Returns:
        Dictionary containing the place information.
    """
    key = cache.gen_key(mbid)
    place = cache.get(key)
    if not place:
        place = _get_place_by_id(mbid)
        cache.set(key=key, val=place, time=DEFAULT_CACHE_EXPIRATION)
    return place_rel.process(place)
Ejemplo n.º 14
0
def get_event_by_id(mbid):
    """Get event with the MusicBrainz ID.

    Args:
        mbid (uuid): MBID(gid) of the event.
    Returns:
        Dictionary containing the event information.
    """
    key = cache.gen_key(mbid)
    event = cache.get(key)
    if not event:
        event = _get_event_by_id(mbid)
        cache.set(key=key, val=event, time=DEFAULT_CACHE_EXPIRATION)
    return event
Ejemplo n.º 15
0
def get_release_by_id(mbid):
    """Get release with the MusicBrainz ID.

    Args:
        mbid (uuid): MBID(gid) of the release.
    Returns:
        Dictionary containing the release information.
    """
    key = cache.gen_key(mbid)
    release = cache.get(key)
    if not release:
        release = _get_release_by_id(mbid)
        cache.set(key=key, val=release, time=DEFAULT_CACHE_EXPIRATION)
    return release
Ejemplo n.º 16
0
    def create_record(cls, access_token, ip_address):
        """Creates new access log record with a current timestamp.

        It also checks if `DIFFERENT_IP_LIMIT` is exceeded within current time
        and `CLEANUP_RANGE_MINUTES`, alerts admins if that's the case.

        Args:
            access_token: Access token used to access the API.
            ip_address: IP access used to access the API.

        Returns:
            New access log record.
        """
        new_record = cls(
            token=access_token,
            ip_address=ip_address,
        )
        db.session.add(new_record)
        db.session.commit()

        # Checking if HOURLY_ALERT_THRESHOLD is exceeded
        count = cls.query \
            .distinct(cls.ip_address) \
            .filter(cls.timestamp > datetime.now(pytz.utc) - timedelta(minutes=CLEANUP_RANGE_MINUTES),
                    cls.token == access_token) \
            .count()
        if count > DIFFERENT_IP_LIMIT:
            msg = ("Hourly access threshold exceeded for token %s\n\n"
                   "This token has been used from %s different IP "
                   "addresses during the last %s minutes.") % \
                  (access_token, count, CLEANUP_RANGE_MINUTES)
            logging.info(msg)
            # Checking if notification for admins about this token abuse has
            # been sent in the last hour. This info is kept in cache.
            key = "alert_sent_%s" % access_token
            if not cache.get(key):
                send_mail(
                    subject="[MetaBrainz] Hourly access threshold exceeded",
                    recipients=current_app.config['NOTIFICATION_RECIPIENTS'],
                    text=msg,
                )
                cache.set(key, True, 3600)  # 1 hour

        return new_record
    def get_timestamps_for_user(self, user_name):
        """ Return the max_ts and min_ts for a given user and cache the result in brainzutils cache
        """

        tss = cache.get(REDIS_USER_TIMESTAMPS % user_name)
        if tss:
            (min_ts, max_ts) = tss.split(",")
            min_ts = int(min_ts)
            max_ts = int(max_ts)
        else:
            query = 'SELECT first(artist_msid) FROM ' + get_escaped_measurement_name(user_name)
            min_ts = self._select_single_timestamp(query, get_measurement_name(user_name))

            query = 'SELECT last(artist_msid) FROM ' + get_escaped_measurement_name(user_name)
            max_ts = self._select_single_timestamp(query, get_measurement_name(user_name))

            cache.set(REDIS_USER_TIMESTAMPS % user_name, "%d,%d" % (min_ts, max_ts), USER_CACHE_TIME)

        return min_ts, max_ts
    def insert(self, listens):
        """ Insert a batch of listens.
        """

        submit = []
        user_names = {}
        for listen in listens:
            user_names[listen.user_name] = 1
            submit.append(listen.to_influx(quote(listen.user_name)))

        if not self.influx.write_points(submit, time_precision='s'):
            self.log.error("Cannot write data to influx. (write_points returned False), data=%s", json.dumps(submit, indent=3))

        # If we reach this point, we were able to write the listens to the InfluxListenStore.
        # So update the listen counts of the users cached in brainzutils cache.
        for data in submit:
            user_key = "{}{}".format(REDIS_INFLUX_USER_LISTEN_COUNT, data['fields']['user_name'])

            cached_count = cache.get(user_key, decode=False)
            if cached_count:
                cache.increment(user_key)

        # Invalidate cached data for user
        for user_name in user_names.keys():
            cache.delete(REDIS_USER_TIMESTAMPS % user_name)

        if len(listens):
            # Enter a measurement to count items inserted
            submit = [{
                'measurement': TEMP_COUNT_MEASUREMENT,
                'tags': {
                    COUNT_MEASUREMENT_NAME: len(listens)
                },
                'fields': {
                    COUNT_MEASUREMENT_NAME: len(listens)
                }
            }]
            try:
                if not self.influx.write_points(submit):
                    self.log.error("Cannot write listen cound to influx. (write_points returned False)")
            except (InfluxDBServerError, InfluxDBClientError, ValueError) as err:
                self.log.error("Cannot write data to influx: %s, data: %s", str(err), json.dumps(submit, indent=3), exc_info=True)
                raise
    def get_listen_count_for_user(self, user_name, need_exact=False):
        """Get the total number of listens for a user. The number of listens comes from
           brainzutils cache unless an exact number is asked for.

        Args:
            user_name: the user to get listens for
            need_exact: if True, get an exact number of listens directly from the ListenStore
        """

        if not need_exact:
            # check if the user's listen count is already in cache
            # if already present return it directly instead of calculating it again
            # decode is set to False as we have not encoded the value when we set it
            # in brainzutils cache as we need to call increment operation which requires
            # an integer value
            user_key = '{}{}'.format(REDIS_INFLUX_USER_LISTEN_COUNT, user_name)
            count = cache.get(user_key, decode=False)
            if count:
                return int(count)

        try:
            results = self.influx.query(
                'SELECT count(*) FROM ' +
                get_escaped_measurement_name(user_name))
        except (InfluxDBServerError, InfluxDBClientError) as e:
            self.log.error("Cannot query influx: %s" % str(e), exc_info=True)
            raise

        # get the number of listens from the json
        try:
            count = results.get_points(measurement=get_measurement_name(
                user_name)).__next__()['count_recording_msid']
        except (KeyError, StopIteration):
            count = 0

        # put this value into brainzutils cache with an expiry time
        user_key = "{}{}".format(REDIS_INFLUX_USER_LISTEN_COUNT, user_name)
        cache.set(user_key,
                  int(count),
                  InfluxListenStore.USER_LISTEN_COUNT_CACHE_TIME,
                  encode=False)
        return int(count)
    def insert(self, listens):
        """
            Insert a batch of listens. Returns a list of (listened_at, track_name, user_name) that indicates
            which rows were inserted into the DB. If the row is not listed in the return values, it was a duplicate.
        """

        submit = []
        user_names = {}
        for listen in listens:
            user_names[listen.user_name] = 1
            submit.append(listen.to_timescale())

        query = """INSERT INTO listen (listened_at, track_name, user_name, data)
                        VALUES %s
                   ON CONFLICT (listened_at, track_name, user_name)
                    DO NOTHING
                     RETURNING listened_at, track_name, user_name"""

        inserted_rows = []
        conn = timescale.engine.raw_connection()
        with conn.cursor() as curs:
            execute_values(curs, query, submit, template=None)
            while True:
                result = curs.fetchone()
                if not result:
                    break
                inserted_rows.append((result[0], result[1], result[2]))

        conn.commit()

        # So update the listen counts of the users cached in brainzutils cache.
        for _, _, user_name in inserted_rows:
            user_key = "{}{}".format(self.ns + REDIS_TIMESCALE_USER_LISTEN_COUNT, user_name)
            cached_count = cache.get(user_key, decode=False)
            if cached_count:
                cache.increment(user_key)

        # Invalidate cached data for user
        for user_name in user_names:
            cache.delete(self.ns + REDIS_USER_TIMESTAMPS % user_name)

        return inserted_rows
Ejemplo n.º 21
0
def get_place_by_id(id):
    """Get event with the MusicBrainz ID.

    Returns:
        Event object with the following includes: artist-rels, place-rels, series-rels, url-rels.
    """
    key = cache.gen_key(id)
    place = cache.get(key)
    if not place:
        try:
            place = musicbrainzngs.get_place_by_id(id, [
                'artist-rels', 'place-rels', 'release-group-rels', 'url-rels'
            ]).get('place')
        except ResponseError as e:
            if e.cause.code == 404:
                return None
            else:
                raise InternalServerError(e.cause.msg)
        cache.set(key=key, val=place, time=DEFAULT_CACHE_EXPIRATION)
    return place
Ejemplo n.º 22
0
def get_place_by_id(mbid):
    """Get place with the MusicBrainz ID.

    Args:
        mbid (uuid): MBID(gid) of the place.
    Returns:
        Dictionary containing the place information.
    """
    key = cache.gen_key('place', mbid)
    place = cache.get(key)
    if not place:
        place = db.get_place_by_id(
            mbid,
            includes=[
                'artist-rels', 'place-rels', 'release-group-rels', 'url-rels'
            ],
            unknown_entities_for_missing=True,
        )
        cache.set(key=key, val=place, time=DEFAULT_CACHE_EXPIRATION)
    return place_rel.process(place)
Ejemplo n.º 23
0
def get_event_by_id(mbid):
    """Get event with the MusicBrainz ID.

    Args:
        mbid (uuid): MBID(gid) of the event.
    Returns:
        Dictionary containing the event information.
    """
    key = cache.gen_key('event', mbid)
    event = cache.get(key)
    if not event:
        event = db.fetch_multiple_events(
            [mbid],
            includes=[
                'artist-rels', 'place-rels', 'series-rels', 'url-rels',
                'release-group-rels'
            ],
        ).get(mbid)
        cache.set(key=key, val=event, time=DEFAULT_CACHE_EXPIRATION)
    return event
    def get_timestamps_for_user(self, user_name):
        """ Return the max_ts and min_ts for a given user and cache the result in brainzutils cache
        """

        tss = cache.get(REDIS_USER_TIMESTAMPS + user_name)
        if tss:
            (min_ts, max_ts) = tss.split(",")
            min_ts = int(min_ts)
            max_ts = int(max_ts)
        else:
            t0 = time.monotonic()
            min_ts = self._select_single_timestamp(True, user_name)
            max_ts = self._select_single_timestamp(False, user_name)
            cache.set(REDIS_USER_TIMESTAMPS + user_name,
                      "%d,%d" % (min_ts, max_ts),
                      expirein=0)
            # intended for production monitoring
            self.log.info("timestamps %s %.2fs" %
                          (user_name, time.monotonic() - t0))

        return min_ts, max_ts
Ejemplo n.º 25
0
def _fetch_access_token(refresh=False) -> str:
    """Get an access token from the OAuth credentials.

    https://developer.spotify.com/web-api/authorization-guide/#client-credentials-flow
    """
    key = cache.gen_key("spotify_oauth_access_token")
    access_token = cache.get(key)
    if refresh or not access_token:
        client_id = app.config.get("SPOTIFY_CLIENT_ID")
        client_secret = app.config.get("SPOTIFY_CLIENT_SECRET")
        auth_value = b64encode(bytes(f"{client_id}:{client_secret}", "utf-8")).decode("utf-8")
        response = requests.post(
            "https://accounts.spotify.com/api/token",
            data={"grant_type": "client_credentials"},
            headers={"Authorization": f"Basic {auth_value}"},
        ).json()
        access_token = response.get("access_token")
        if not access_token:
            raise SpotifyException("Could not fetch access token for Spotify API")
        # Making the token stored in cache expire at the same time as the actual token
        cache.set(key=key, val=access_token, time=response.get("expires_in", 10))
    return access_token
Ejemplo n.º 26
0
    def lookup_ips(users):
        """ Try to lookup and cache as many reverse DNS as possible in a window of time """

        data = []
        timeout = time.monotonic() + StatsView.IP_ADDR_TIMEOUT
        for user in users:
            row = list(user)

            reverse = cache.get(user[0])
            if not reverse: 
                if time.monotonic() < timeout:
                    reverse = StatsView.dns_lookup(user[0])
                else:
                    reverse = None

            if reverse:
                cache.set(user[0], reverse, 3600)
                row[0] = reverse

            data.append(row)

        return data
Ejemplo n.º 27
0
def get_top_users_overall():
    """ Gets top contributors since the beginning

    Returns:
        Returns:
        List of dictionaries where each dictionary has the following structure:
        {
            "id": (str),
            "display_name": (str),
            "review_count": (int),
            "comment_count": (int),
            "vote_count": (int),
            "score": (int),
        }
    """
    key = cache.gen_key("top_users_overall", _CACHE_NAMESPACE)
    top_users = cache.get(key, _CACHE_NAMESPACE)

    # if could not fetch results from cache, or fetched results have to be updated
    if not top_users:

        try:
            results = get_top_users(
                review_weight=5,
                comment_weight=2,
                vote_weight=1,
            )

            top_users = {
                "users": results,
            }

            cache.set(key=key,
                      val=top_users,
                      namespace=_CACHE_NAMESPACE,
                      time=_DEFAULT_CACHE_EXPIRATION)
        except db_exceptions.NoDataFoundException:
            return None
    return top_users["users"]
Ejemplo n.º 28
0
    def lookup_ips(users):
        """ Try to lookup and cache as many reverse DNS as possible in a window of time """

        data = []
        timeout = time.monotonic() + StatsView.IP_ADDR_TIMEOUT
        for user in users:
            row = list(user)

            reverse = cache.get(user[0])
            if not reverse:
                if time.monotonic() < timeout:
                    reverse = StatsView.dns_lookup(user[0])
                else:
                    reverse = None

            if reverse:
                cache.set(user[0], reverse, 3600)
                row[0] = reverse

            data.append(row)

        return data
Ejemplo n.º 29
0
def get_release_group_by_id(id):
    """Get release group with the MusicBrainz ID.

    Returns:
        Release group object with the following includes: artists, releases,
        release-group-rels, url-rels, work-rels.
    """
    key = cache.gen_key(id)
    release_group = cache.get(key)
    if not release_group:
        try:
            release_group = musicbrainzngs.get_release_group_by_id(
                id, [
                    'artists', 'releases', 'release-group-rels', 'url-rels',
                    'work-rels', 'tags'
                ]).get('release-group')
        except ResponseError as e:
            if e.cause.code == 404:
                return None
            else:
                raise InternalServerError(e.cause.msg)
        cache.set(key=key, val=release_group, time=DEFAULT_CACHE_EXPIRATION)
    return release_group_rel.process(release_group)
    def get_listen_count_for_user(self, user_name, need_exact=False):
        """Get the total number of listens for a user. The number of listens comes from
           brainzutils cache unless an exact number is asked for.

        Args:
            user_name: the user to get listens for
            need_exact: if True, get an exact number of listens directly from the ListenStore
        """

        if not need_exact:
            # check if the user's listen count is already in cache
            # if already present return it directly instead of calculating it again
            # decode is set to False as we have not encoded the value when we set it
            # in brainzutils cache as we need to call increment operation which requires
            # an integer value
            user_key = '{}{}'.format(self.ns + REDIS_TIMESCALE_USER_LISTEN_COUNT, user_name)
            count = cache.get(user_key, decode=False)
            if count:
                return int(count)

        query = "SELECT SUM(count) FROM listen_count WHERE user_name = :user_name"

        try:
            with timescale.engine.connect() as connection:
                result = connection.execute(sqlalchemy.text(query), {
                    "user_name": user_name,
                })
                count = int(result.fetchone()[0] or 0)

        except psycopg2.OperationalError as e:
            self.log.error("Cannot query timescale listen_count: %s" % str(e), exc_info=True)
            raise

        # put this value into brainzutils cache with an expiry time
        user_key = "{}{}".format(self.ns + REDIS_TIMESCALE_USER_LISTEN_COUNT, user_name)
        cache.set(user_key, count, TimescaleListenStore.USER_LISTEN_COUNT_CACHE_TIME, encode=False)
        return count
    def get_timestamps_for_user(self, user_name):
        """ Return the max_ts and min_ts for a given user and cache the result in brainzutils cache
        """

        tss = cache.get(REDIS_USER_TIMESTAMPS % user_name)
        if tss:
            (min_ts, max_ts) = tss.split(",")
            min_ts = int(min_ts)
            max_ts = int(max_ts)
        else:
            query = 'SELECT first(artist_msid) FROM ' + get_escaped_measurement_name(
                user_name)
            min_ts = self._select_single_timestamp(
                query, get_measurement_name(user_name))

            query = 'SELECT last(artist_msid) FROM ' + get_escaped_measurement_name(
                user_name)
            max_ts = self._select_single_timestamp(
                query, get_measurement_name(user_name))

            cache.set(REDIS_USER_TIMESTAMPS % user_name,
                      "%d,%d" % (min_ts, max_ts), USER_CACHE_TIME)

        return min_ts, max_ts
    def get_listen_count_for_user(self, user_name, need_exact=False):
        """Get the total number of listens for a user. The number of listens comes from
           brainzutils cache unless an exact number is asked for.

        Args:
            user_name: the user to get listens for
            need_exact: if True, get an exact number of listens directly from the ListenStore
        """

        if not need_exact:
            # check if the user's listen count is already in cache
            # if already present return it directly instead of calculating it again
            # decode is set to False as we have not encoded the value when we set it
            # in brainzutils cache as we need to call increment operation which requires
            # an integer value
            user_key = '{}{}'.format(REDIS_INFLUX_USER_LISTEN_COUNT, user_name)
            count = cache.get(user_key, decode=False)
            if count:
                return int(count)

        try:
            results = self.influx.query('SELECT count(*) FROM ' + get_escaped_measurement_name(user_name))
        except (InfluxDBServerError, InfluxDBClientError) as e:
            self.log.error("Cannot query influx: %s" % str(e), exc_info=True)
            raise

        # get the number of listens from the json
        try:
            count = results.get_points(measurement = get_measurement_name(user_name)).__next__()['count_recording_msid']
        except (KeyError, StopIteration):
            count = 0

        # put this value into brainzutils cache with an expiry time
        user_key = "{}{}".format(REDIS_INFLUX_USER_LISTEN_COUNT, user_name)
        cache.set(user_key, int(count), InfluxListenStore.USER_LISTEN_COUNT_CACHE_TIME, encode=False)
        return int(count)
    def get_total_listen_count(self):
        """ Returns the total number of listens stored in the ListenStore.
            First checks the brainzutils cache for the value, if not present there
            makes a query to the db and caches it in brainzutils cache.
        """
        count = cache.get(REDIS_TOTAL_LISTEN_COUNT)
        if count:
            return count

        query = "SELECT SUM(count) AS value FROM listen_user_metadata"
        try:
            with timescale.engine.connect() as connection:
                result = connection.execute(sqlalchemy.text(query))
                # psycopg2 returns the `value` as a DECIMAL type which is not recognized
                # by msgpack/redis. so cast to python int first.
                count = int(result.fetchone()["value"] or 0)
        except psycopg2.OperationalError:
            self.log.error("Cannot query listen counts:", exc_info=True)
            raise

        cache.set(REDIS_TOTAL_LISTEN_COUNT,
                  count,
                  expirein=REDIS_USER_LISTEN_COUNT_EXPIRY)
        return count
Ejemplo n.º 34
0
 def test_single_no_encode(self):
     self.assertTrue(cache.set("no encode", 1, encode=False))
     self.assertEqual(cache.get("no encode", decode=False), b"1")
Ejemplo n.º 35
0
    def get_popular(cls, limit=None):
        """Get list of popular reviews.

        Popularity is determined by rating of a particular review. Rating is a
        difference between positive votes and negative. In this case only votes
        from the last month are used to calculate rating.

        Results are cached for 12 hours.

        Args:
            limit: Maximum number of reviews to return.

        Returns:
            Randomized list of popular reviews which are converted into
            dictionaries using to_dict method.
        """
        cache_key = cache.gen_key('popular_reviews', limit)
        reviews = cache.get(cache_key, Review.CACHE_NAMESPACE)

        if not reviews:
            # Selecting reviews for distinct release groups
            # TODO(roman): The is a problem with selecting popular reviews like
            # this: if there are multiple reviews for a release group we don't
            # choose the most popular.
            distinct_subquery = db.session.query(Review) \
                .filter(Review.is_draft == False) \
                .distinct(Review.entity_id).subquery()

            # Randomizing results to get some variety
            rand_subquery = db.session.query(aliased(Review, distinct_subquery)) \
                .order_by(func.random()).subquery()

            # Sorting reviews by rating
            query = db.session.query(aliased(Review, rand_subquery))

            # Preparing base query for getting votes
            vote_query_base = db.session.query(
                Vote.revision_id, Vote.vote, func.count().label('c')) \
                .group_by(Vote.revision_id, Vote.vote) \
                .filter(Vote.rated_at > datetime.now() - timedelta(weeks=4))

            # Getting positive votes
            votes_pos = vote_query_base.subquery('votes_pos')
            query = query.outerjoin(Revision).outerjoin(
                votes_pos,
                and_(votes_pos.c.revision_id == Revision.id,
                     votes_pos.c.vote == True))

            # Getting negative votes
            votes_neg = vote_query_base.subquery('votes_neg')
            query = query.outerjoin(Revision).outerjoin(
                votes_neg,
                and_(votes_neg.c.revision_id == Revision.id,
                     votes_neg.c.vote == False))

            query = query.order_by(
                desc(
                    func.coalesce(votes_pos.c.c, 0) -
                    func.coalesce(votes_neg.c.c, 0)))

            if limit is not None:
                # Selecting more reviews there so we'll have something
                # different to show (shuffling is done below).
                query = query.limit(limit * 4)

            reviews = query.all()
            reviews = [review.to_dict(confidential=True) for review in reviews]
            cache.set(cache_key, reviews, 1 * 60 * 60,
                      Review.CACHE_NAMESPACE)  # 1 hour

        shuffle(reviews)  # a bit more variety
        return reviews[:limit]
Ejemplo n.º 36
0
    def add_legacy_listens_to_queue(self):
        """Fetch more legacy listens from the listens table by doing an left join
           on the matched listens, finding the next chunk of legacy listens to look up.
           Listens are added to the queue with a low priority."""

        # Check to see where we need to pick up from, or start new
        if not self.legacy_listens_index_date:
            dt = cache.get(LEGACY_LISTENS_INDEX_DATE_CACHE_KEY, decode=False) or b""
            try:
                self.legacy_listens_index_date = int(
                    datetime.datetime.strptime(str(dt, "utf-8"), "%Y-%m-%d").timestamp())
                self.app.logger.info("Loaded date index from cache: %d %s" % (
                    self.legacy_listens_index_date, str(dt)))
            except ValueError:
                self.legacy_listens_index_date = int(
                    datetime.datetime.now().timestamp())
                self.app.logger.info("Use date index now()")

        # Check to see if we're done
        if self.legacy_listens_index_date < DATA_START_YEAR_IN_SECONDS - LEGACY_LISTENS_LOAD_WINDOW:
            self.app.logger.info(
                "Finished looking up all legacy listens! Wooo!")
            self.legacy_next_run = monotonic() + UNMATCHED_LISTENS_COMPLETED_TIMEOUT
            self.legacy_listens_index_date = int(datetime.datetime.now().timestamp())
            self.num_legacy_listens_loaded = 0
            dt = datetime.datetime.fromtimestamp(self.legacy_listens_index_date)
            cache.set(LEGACY_LISTENS_INDEX_DATE_CACHE_KEY, dt.strftime("%Y-%m-%d"), expirein=0, encode=False)
            return

        # Load listens
        self.app.logger.info("Load more legacy listens for %s" % datetime.datetime.fromtimestamp(
            self.legacy_listens_index_date).strftime("%Y-%m-%d"))
        query = """SELECT data->'track_metadata'->'additional_info'->>'recording_msid'::TEXT AS recording_msid,
                          track_name,
                          data->'track_metadata'->'artist_name' AS artist_name
                     FROM listen
                LEFT JOIN listen_join_listen_mbid_mapping lj
                       ON data->'track_metadata'->'additional_info'->>'recording_msid' = lj.recording_msid::text
                 WHERE lj.recording_msid IS NULL
                      AND listened_at <= :max_ts
                      AND listened_at > :min_ts"""

        count = 0
        with timescale.engine.connect() as connection:
            curs = connection.execute(sqlalchemy.text(query),
                                      max_ts=self.legacy_listens_index_date,
                                      min_ts=self.legacy_listens_index_date - LEGACY_LISTENS_LOAD_WINDOW)
            while True:
                result = curs.fetchone()
                if not result:
                    break

                self.queue.put(JobItem(LEGACY_LISTEN, [{"data": {"artist_name": result[2],
                                                                 "track_name": result[1]},
                                                        "recording_msid": result[0],
                                                        "legacy": True}]))
                count += 1

        # update cache entry and count
        self.legacy_listens_index_date -= LEGACY_LISTENS_LOAD_WINDOW
        dt = datetime.datetime.fromtimestamp(self.legacy_listens_index_date)
        cache.set(LEGACY_LISTENS_INDEX_DATE_CACHE_KEY, dt.strftime("%Y-%m-%d"), expirein=0, encode=False)
        self.num_legacy_listens_loaded = count
Ejemplo n.º 37
0
 def test_single_with_namespace(self):
     self.assertTrue(cache.set("test", 42, namespace="testing"))
     self.assertEqual(cache.get("test", namespace="testing"), 42)
Ejemplo n.º 38
0
def get_popular(limit=None):
    """Get a list of popular reviews.

    Popularity is determined by 'popularity' of a particular review. popularity is a
    difference between positive votes and negative. In this case only votes
    from the last month are used to calculate popularity to make results more
    varied.

    Args:
        limit (int): Maximum number of reviews to return.

    Returns:
        Randomized list of popular reviews which are converted into
        dictionaries using to_dict method.
    """
    cache_key = cache.gen_key("popular_reviews", limit)
    reviews = cache.get(cache_key, REVIEW_CACHE_NAMESPACE)
    defined_limit = 4 * limit if limit else None

    if not reviews:
        with db.engine.connect() as connection:
            results = connection.execute(sqlalchemy.text("""
                SELECT review.id,
                       review.entity_id,
                       review.entity_type,
                       review.user_id,
                       review.edits,
                       review.is_draft,
                       review.is_hidden,
                       review.license_id,
                       review.language,
                       review.source,
                       review.source_url,
                       SUM(
                           CASE WHEN vote = 't' THEN 1
                                WHEN vote = 'f' THEN -1
                                WHEN vote IS NULL THEN 0 END
                       ) AS popularity,
                       latest_revision.id AS latest_revision_id,
                       latest_revision.timestamp AS latest_revision_timestamp,
                       latest_revision.text AS text,
                       latest_revision.rating AS rating
                  FROM review
                  JOIN revision ON revision.review_id = review.id
             LEFT JOIN (
                        SELECT revision_id, vote
                          FROM vote
                         WHERE rated_at > :last_month
                       ) AS votes_last_month
                    ON votes_last_month.revision_id = revision.id
                  JOIN (
                        revision JOIN (
                          SELECT review.id AS review_uuid,
                                 MAX(timestamp) AS latest_timestamp
                            FROM review
                            JOIN revision ON review.id = review_id
                        GROUP BY review.id
                        ) AS latest ON latest.review_uuid = revision.review_id
                        AND latest.latest_timestamp = revision.timestamp
                       ) AS latest_revision
                    ON review.id = latest_revision.review_id
                 WHERE entity_id
                    IN (
                        SELECT DISTINCT entity_id
                          FROM (
                        SELECT entity_id
                          FROM review
                      ORDER BY RANDOM()
                      ) AS randomized_entity_ids
                    )
                   AND latest_revision.text IS NOT NULL
                   AND review.is_hidden = 'f'
                   AND review.is_draft = 'f'
              GROUP BY review.id, latest_revision.id
              ORDER BY popularity
                 LIMIT :limit
            """), {
                "limit": defined_limit,
                "last_month": datetime.now() - timedelta(weeks=4)
            })
            reviews = results.fetchall()

        reviews = [dict(review) for review in reviews]
        if reviews:
            for review in reviews:
                review["rating"] = RATING_SCALE_1_5.get(review["rating"])
                review["last_revision"] = {
                    "id": review.pop("latest_revision_id"),
                    "timestamp": review.pop("latest_revision_timestamp"),
                    "text": review["text"],
                    "rating": review["rating"],
                    "review_id": review["id"],
                }
            reviews = [to_dict(review, confidential=True) for review in reviews]
        cache.set(cache_key, reviews, 1 * 60 * 60, REVIEW_CACHE_NAMESPACE)  # 1 hour
    shuffle(reviews)
    return reviews[:limit]
Ejemplo n.º 39
0
 def test_expire(self):
     cache.set("a", 1, time=100)
     self.assertEqual(cache.expire("a", 1), True)
     sleep(1.1)
     self.assertEqual(cache.get("a"), None)
Ejemplo n.º 40
0
    def add_legacy_listens_to_queue(self):
        """Fetch more legacy listens from the listens table by doing an left join
           on the matched listens, finding the next chunk of legacy listens to look up.
           Listens are added to the queue with a low priority."""

        # Find listens that have no entry in the mapping yet.
        legacy_query = """SELECT data->'track_metadata'->'additional_info'->>'recording_msid'::TEXT AS recording_msid
                            FROM listen
                       LEFT JOIN mbid_mapping m
                              ON data->'track_metadata'->'additional_info'->>'recording_msid' = m.recording_msid::text
                           WHERE m.recording_msid IS NULL
                             AND listened_at <= :max_ts
                             AND listened_at > :min_ts"""

        # Find mapping rows that need to be rechecked
        recheck_query = """SELECT recording_msid
                             FROM mbid_mapping
                            WHERE last_updated = '1970-01-01'
                            LIMIT %d""" % RECHECK_BATCH_SIZE

        # Check to see where we need to pick up from, or start new
        if not self.legacy_listens_index_date:
            dt = cache.get(LEGACY_LISTENS_INDEX_DATE_CACHE_KEY,
                           decode=False) or b""
            try:
                self.legacy_listens_index_date = int(
                    datetime.datetime.strptime(str(dt, "utf-8"),
                                               "%Y-%m-%d").timestamp())
                self.app.logger.info("Loaded date index from cache: %d %s" %
                                     (self.legacy_listens_index_date, str(dt)))
            except ValueError:
                self.legacy_listens_index_date = int(
                    datetime.datetime.now().timestamp())
                self.app.logger.info("Use date index now()")

        # Check to see if we're done
        if self.legacy_listens_index_date < DATA_START_YEAR_IN_SECONDS - LEGACY_LISTENS_LOAD_WINDOW:
            self.app.logger.info(
                "Finished looking up all legacy listens! Wooo!")
            self.legacy_next_run = monotonic(
            ) + UNMATCHED_LISTENS_COMPLETED_TIMEOUT
            self.legacy_listens_index_date = int(
                datetime.datetime.now().timestamp())
            self.num_legacy_listens_loaded = 0
            dt = datetime.datetime.fromtimestamp(
                self.legacy_listens_index_date)
            cache.set(LEGACY_LISTENS_INDEX_DATE_CACHE_KEY,
                      dt.strftime("%Y-%m-%d"),
                      expirein=0,
                      encode=False)

            return

        # Check to see if any listens have been marked for re-check
        count = self.fetch_and_queue_listens(recheck_query, {})
        if count > 0:
            self.app.logger.info("Loaded %d listens to be rechecked." % count)
            return
        else:
            # If none, check for old legacy listens
            count = self.fetch_and_queue_listens(
                legacy_query, {
                    "max_ts":
                    self.legacy_listens_index_date,
                    "min_ts":
                    self.legacy_listens_index_date - LEGACY_LISTENS_LOAD_WINDOW
                })
            self.app.logger.info(
                "Loaded %s more legacy listens for %s" %
                (count,
                 datetime.datetime.fromtimestamp(
                     self.legacy_listens_index_date).strftime("%Y-%m-%d")))

        # update cache entry and count
        self.legacy_listens_index_date -= LEGACY_LISTENS_LOAD_WINDOW
        dt = datetime.datetime.fromtimestamp(self.legacy_listens_index_date)
        cache.set(LEGACY_LISTENS_INDEX_DATE_CACHE_KEY,
                  dt.strftime("%Y-%m-%d"),
                  expirein=0,
                  encode=False)
        self.num_legacy_listens_loaded = count
Ejemplo n.º 41
0
 def test_single_with_namespace(self):
     self.assertTrue(cache.set("test", 42, namespace="testing"))
     self.assertEqual(cache.get("test", namespace="testing"), 42)
Ejemplo n.º 42
0
 def test_no_init(self):
     cache._r = None
     with self.assertRaises(RuntimeError):
         cache.set("test", "testing")
     with self.assertRaises(RuntimeError):
         cache.get("test")
Ejemplo n.º 43
0
def recalculate_all_user_data():

    timescale.init_db_connection(config.SQLALCHEMY_TIMESCALE_URI)
    db.init_db_connection(config.SQLALCHEMY_DATABASE_URI)
    init_cache(host=config.REDIS_HOST,
               port=config.REDIS_PORT,
               namespace=config.REDIS_NAMESPACE)

    # Find the created timestamp of the last listen
    query = "SELECT max(created) FROM listen WHERE created > :date"
    try:
        with timescale.engine.connect() as connection:
            result = connection.execute(sqlalchemy.text(query),
                                        date=datetime.now() -
                                        timedelta(weeks=4))
            row = result.fetchone()
            last_created_ts = row[0]
    except psycopg2.OperationalError as e:
        logger.error("Cannot query ts to fetch latest listen." % str(e),
                     exc_info=True)
        raise

    logger.info("Last created timestamp: " + str(last_created_ts))

    # Select a list of users
    user_list = []
    query = 'SELECT musicbrainz_id FROM "user"'
    try:
        with db.engine.connect() as connection:
            result = connection.execute(sqlalchemy.text(query))
            for row in result:
                user_list.append(row[0])
    except psycopg2.OperationalError as e:
        logger.error("Cannot query db to fetch user list." % str(e),
                     exc_info=True)
        raise

    logger.info("Fetched %d users. Setting empty cache entries." %
                len(user_list))

    # Reset the timestamps and listen counts to 0 for all users
    for user_name in user_list:
        cache.set(REDIS_USER_LISTEN_COUNT + user_name,
                  0,
                  expirein=0,
                  encode=False)
        cache.set(REDIS_USER_LISTEN_COUNT + user_name,
                  0,
                  expirein=0,
                  encode=False)
        cache.set(REDIS_USER_TIMESTAMPS + user_name, "0,0", expirein=0)

    # Tabulate all of the listen counts/timestamps for all users
    logger.info("Scan the whole listen table...")
    listen_counts = defaultdict(int)
    user_timestamps = {}
    query = "SELECT listened_at, user_name FROM listen where created <= :ts"
    try:
        with timescale.engine.connect() as connection:
            result = connection.execute(sqlalchemy.text(query),
                                        ts=last_created_ts)
            for row in result:
                ts = row[0]
                user_name = row[1]
                if user_name not in user_timestamps:
                    user_timestamps[user_name] = [ts, ts]
                else:
                    if ts > user_timestamps[user_name][1]:
                        user_timestamps[user_name][1] = ts
                    if ts < user_timestamps[user_name][0]:
                        user_timestamps[user_name][0] = ts

                listen_counts[user_name] += 1

    except psycopg2.OperationalError as e:
        logger.error("Cannot query db to fetch user list." % str(e),
                     exc_info=True)
        raise

    logger.info("Setting updated cache entries.")
    # Set the timestamps and listen counts for all users
    for user_name in user_list:
        try:
            cache.increment(REDIS_USER_LISTEN_COUNT + user_name,
                            amount=listen_counts[user_name])
        except KeyError:
            pass

        try:
            tss = cache.get(REDIS_USER_TIMESTAMPS + user_name)
            (min_ts, max_ts) = tss.split(",")
            min_ts = int(min_ts)
            max_ts = int(max_ts)
            if min_ts and min_ts < user_timestamps[user_name][0]:
                user_timestamps[user_name][0] = min_ts
            if max_ts and max_ts > user_timestamps[user_name][1]:
                user_timestamps[user_name][1] = max_ts
            cache.set(
                REDIS_USER_TIMESTAMPS + user_name,
                "%d,%d" %
                (user_timestamps[user_name][0], user_timestamps[user_name][1]),
                expirein=0)
        except KeyError:
            pass
Ejemplo n.º 44
0
 def test_delete(self):
     key = "testing"
     self.assertTrue(cache.set(key, u"Пример"))
     self.assertEqual(cache.get(key), u"Пример")
     self.assertEqual(cache.delete(key), 1)
     self.assertIsNone(cache.get(key))
Ejemplo n.º 45
0
def review_list_handler():
    """Get list of reviews.

    **Request Example:**

    .. code-block:: bash

        $ curl "https://critiquebrainz.org/ws/1/review/?limit=1&offset=50" \\
                -X GET

    **Response Example:**

    .. code-block:: json

        {
          "count": 9197,
          "limit": 1,
          "offset": 50,
          "reviews": [
            {
              "created": "Fri, 16 May 2008 00:00:00 GMT",
              "edits": 0,
              "entity_id": "09259937-6477-3959-8b10-af1cbaea8e6e",
              "entity_type": "release_group",
              "id": "c807d0b4-0dd0-43fe-a7c4-d29bb61f389e",
              "language": "en",
              "last_updated": "Fri, 16 May 2008 00:00:00 GMT",
              "license": {
                "full_name": "Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported",
                "id": "CC BY-NC-SA 3.0",
                "info_url": "https://creativecommons.org/licenses/by-nc-sa/3.0/"
              },
              "popularity": 0,
              "source": "BBC",
              "source_url": "http://www.bbc.co.uk/music/reviews/vh54",
              "text": "TEXT CONTENT OF REVIEW",
              "rating": 5,
              "user": {
                "created": "Wed, 07 May 2014 16:20:47 GMT",
                "display_name": "Jenny Nelson",
                "id": "3bf3fe0c-6db2-4746-bcf1-f39912113852",
                "karma": 0,
                "user_type": "Noob"
              },
              "votes": {
                "positive": 0,
                "negative": 0
              }
            }
          ]
        }

    :json uuid entity_id: UUID of the release group that is being reviewed
    :json string entity_type: One of the supported reviewable entities. 'release_group' or 'event' etc. **(optional)**
    :query user_id: user's UUID **(optional)**
    :query sort: ``popularity`` or ``published_on`` **(optional)**
    :query limit: results limit, min is 0, max is 50, default is 50 **(optional)**
    :query offset: result offset, default is 0 **(optional)**
    :query language: language code (ISO 639-1) **(optional)**

    :resheader Content-Type: *application/json*
    """
    # TODO: This checking is added to keep old clients working and needs to be removed.
    release_group = Parser.uuid('uri', 'release_group', optional=True)
    if release_group:
        entity_id = release_group
        entity_type = 'release_group'
    else:
        entity_id = Parser.uuid('uri', 'entity_id', optional=True)
        entity_type = Parser.string('uri', 'entity_type', valid_values=ENTITY_TYPES, optional=True)

    user_id = Parser.uuid('uri', 'user_id', optional=True)
    # TODO: "rating" sort value is deprecated and needs to be removed.
    sort = Parser.string('uri', 'sort', valid_values=['popularity', 'published_on', 'rating'], optional=True)
    if sort == 'rating':
        sort = 'popularity'
    limit = Parser.int('uri', 'limit', min=1, max=50, optional=True) or 50
    offset = Parser.int('uri', 'offset', optional=True) or 0
    language = Parser.string('uri', 'language', min=2, max=3, optional=True)
    if language and language not in supported_languages:
        raise InvalidRequest(desc='Unsupported language')

    # TODO(roman): Ideally caching logic should live inside the model. Otherwise it
    # becomes hard to track all this stuff.
    cache_key = cache.gen_key('list', entity_id, user_id, sort, limit, offset, language)
    cached_result = cache.get(cache_key, REVIEW_CACHE_NAMESPACE)
    if cached_result:
        reviews = cached_result['reviews']
        count = cached_result['count']

    else:
        reviews, count = db_review.list_reviews(
            entity_id=entity_id,
            entity_type=entity_type,
            user_id=user_id,
            sort=sort,
            limit=limit,
            offset=offset,
            language=language,
        )
        reviews = [db_review.to_dict(p) for p in reviews]
        cache.set(cache_key, {
            'reviews': reviews,
            'count': count,
        }, namespace=REVIEW_CACHE_NAMESPACE)

    return jsonify(limit=limit, offset=offset, count=count, reviews=reviews)
Ejemplo n.º 46
0
 def test_single(self):
     self.assertTrue(cache.set("test2", "Hello!"))
     self.assertEqual(cache.get("test2"), "Hello!")
Ejemplo n.º 47
0
 def test_single_fancy(self):
     self.assertTrue(cache.set("test3", u"Привет!"))
     self.assertEqual(cache.get("test3"), u"Привет!")
Ejemplo n.º 48
0
def review_list_handler():
    """Get list of reviews.

    **Request Example:**

    .. code-block:: bash

        $ curl "https://critiquebrainz.org/ws/1/review/?limit=1&offset=50" \\
                -X GET

    **Response Example:**

    .. code-block:: json

        {
          "count": 9197,
          "limit": 1,
          "offset": 50,
          "reviews": [
            {
              "created": "Fri, 16 May 2008 00:00:00 GMT",
              "edits": 0,
              "entity_id": "09259937-6477-3959-8b10-af1cbaea8e6e",
              "entity_type": "release_group",
              "id": "c807d0b4-0dd0-43fe-a7c4-d29bb61f389e",
              "language": "en",
              "last_updated": "Fri, 16 May 2008 00:00:00 GMT",
              "license": {
                "full_name": "Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported",
                "id": "CC BY-NC-SA 3.0",
                "info_url": "https://creativecommons.org/licenses/by-nc-sa/3.0/"
              },
              "popularity": 0,
              "source": "BBC",
              "source_url": "http://www.bbc.co.uk/music/reviews/vh54",
              "text": "TEXT CONTENT OF REVIEW",
              "rating": 5,
              "user": {
                "created": "Wed, 07 May 2014 16:20:47 GMT",
                "display_name": "Jenny Nelson",
                "id": "3bf3fe0c-6db2-4746-bcf1-f39912113852",
                "karma": 0,
                "user_type": "Noob"
              },
              "votes": {
                "positive": 0,
                "negative": 0
              }
            }
          ]
        }

    :json uuid entity_id: UUID of the release group that is being reviewed
    :json string entity_type: One of the supported reviewable entities. 'release_group' or 'event' etc. **(optional)**
    :query user_id: user's UUID **(optional)**
    :query sort: ``popularity`` or ``published_on`` **(optional)**
    :query limit: results limit, min is 0, max is 50, default is 50 **(optional)**
    :query offset: result offset, default is 0 **(optional)**
    :query language: language code (ISO 639-1) **(optional)**

    :resheader Content-Type: *application/json*
    """
    # TODO: This checking is added to keep old clients working and needs to be removed.
    release_group = Parser.uuid('uri', 'release_group', optional=True)
    if release_group:
        entity_id = release_group
        entity_type = 'release_group'
    else:
        entity_id = Parser.uuid('uri', 'entity_id', optional=True)
        entity_type = Parser.string('uri',
                                    'entity_type',
                                    valid_values=ENTITY_TYPES,
                                    optional=True)

    user_id = Parser.uuid('uri', 'user_id', optional=True)
    sort = Parser.string(
        'uri',
        'sort',
        valid_values=['popularity', 'published_on', 'rating', 'created'],
        optional=True)

    # "rating" and "created" sort values are deprecated and but allowed here for backward compatibility
    if sort == 'created':
        sort = 'published_on'
    if sort == 'rating':
        sort = 'popularity'

    limit = Parser.int('uri', 'limit', min=1, max=50, optional=True) or 50
    offset = Parser.int('uri', 'offset', optional=True) or 0
    language = Parser.string('uri', 'language', min=2, max=3, optional=True)
    if language and language not in supported_languages:
        raise InvalidRequest(desc='Unsupported language')

    # TODO(roman): Ideally caching logic should live inside the model. Otherwise it
    # becomes hard to track all this stuff.
    cache_key = cache.gen_key('list', entity_id, user_id, sort, limit, offset,
                              language)
    cached_result = cache.get(cache_key, REVIEW_CACHE_NAMESPACE)
    if cached_result:
        reviews = cached_result['reviews']
        count = cached_result['count']

    else:
        reviews, count = db_review.list_reviews(
            entity_id=entity_id,
            entity_type=entity_type,
            user_id=user_id,
            sort=sort,
            limit=limit,
            offset=offset,
            language=language,
        )
        reviews = [db_review.to_dict(p) for p in reviews]
        cache.set(cache_key, {
            'reviews': reviews,
            'count': count,
        },
                  namespace=REVIEW_CACHE_NAMESPACE)

    return jsonify(limit=limit, offset=offset, count=count, reviews=reviews)
Ejemplo n.º 49
0
 def test_single_fancy(self):
     self.assertTrue(cache.set("test3", u"Привет!"))
     self.assertEqual(cache.get("test3"), u"Привет!")
Ejemplo n.º 50
0
 def test_no_init(self):
     cache._r = None
     with self.assertRaises(RuntimeError):
         cache.set("test", "testing")
     with self.assertRaises(RuntimeError):
         cache.get("test")
Ejemplo n.º 51
0
 def test_expireat(self):
     cache.set("a", 1, time=100)
     self.assertEqual(cache.expireat("a", int(time() + 1)), True)
     sleep(1.1)
     self.assertEqual(cache.get("a"), None)
Ejemplo n.º 52
0
 def test_single(self):
     self.assertTrue(cache.set("test2", "Hello!"))
     self.assertEqual(cache.get("test2"), "Hello!")
Ejemplo n.º 53
0
 def test_delete(self):
     key = "testing"
     self.assertTrue(cache.set(key, u"Пример"))
     self.assertEqual(cache.get(key), u"Пример")
     self.assertEqual(cache.delete(key), 1)
     self.assertIsNone(cache.get(key))
Ejemplo n.º 54
0
def get_popular(limit=None):
    """Get a list of popular reviews.

    Popularity is determined by 'popularity' of a particular review. popularity is a
    difference between positive votes and negative. In this case only votes
    from the last month are used to calculate popularity to make results more
    varied.

    Args:
        limit (int): Maximum number of reviews to return.

    Returns:
        Randomized list of popular reviews which are converted into
        dictionaries using to_dict method.
    """
    cache_key = cache.gen_key("popular_reviews", limit)
    reviews = cache.get(cache_key, REVIEW_CACHE_NAMESPACE)
    defined_limit = 4 * limit if limit else None

    if not reviews:
        with db.engine.connect() as connection:
            results = connection.execute(
                sqlalchemy.text("""
                SELECT review.id,
                       review.entity_id,
                       review.entity_type,
                       review.user_id,
                       review.edits,
                       review.is_draft,
                       review.is_hidden,
                       review.license_id,
                       review.language,
                       review.source,
                       review.source_url,
                       SUM(
                           CASE WHEN vote = 't' THEN 1
                                WHEN vote = 'f' THEN -1
                                WHEN vote IS NULL THEN 0 END
                       ) AS popularity,
                       latest_revision.id AS latest_revision_id,
                       latest_revision.timestamp AS latest_revision_timestamp,
                       latest_revision.text AS text,
                       latest_revision.rating AS rating
                  FROM review
                  JOIN revision ON revision.review_id = review.id
             LEFT JOIN (
                        SELECT revision_id, vote
                          FROM vote
                         WHERE rated_at > :last_month
                       ) AS votes_last_month
                    ON votes_last_month.revision_id = revision.id
                  JOIN (
                        revision JOIN (
                          SELECT review.id AS review_uuid,
                                 MAX(timestamp) AS latest_timestamp
                            FROM review
                            JOIN revision ON review.id = review_id
                        GROUP BY review.id
                        ) AS latest ON latest.review_uuid = revision.review_id
                        AND latest.latest_timestamp = revision.timestamp
                       ) AS latest_revision
                    ON review.id = latest_revision.review_id
                 WHERE entity_id
                    IN (
                        SELECT DISTINCT entity_id
                          FROM (
                        SELECT entity_id
                          FROM review
                      ORDER BY RANDOM()
                      ) AS randomized_entity_ids
                    )
                   AND latest_revision.text IS NOT NULL
              GROUP BY review.id, latest_revision.id
              ORDER BY popularity
                 LIMIT :limit
            """), {
                    "limit": defined_limit,
                    "last_month": datetime.now() - timedelta(weeks=4)
                })
            reviews = results.fetchall()

        reviews = [dict(review) for review in reviews]
        if reviews:
            for review in reviews:
                review["rating"] = RATING_SCALE_1_5.get(review["rating"])
                review["last_revision"] = {
                    "id": review.pop("latest_revision_id"),
                    "timestamp": review.pop("latest_revision_timestamp"),
                    "text": review["text"],
                    "rating": review["rating"],
                    "review_id": review["id"],
                }
            reviews = [
                to_dict(review, confidential=True) for review in reviews
            ]
        cache.set(cache_key, reviews, 1 * 60 * 60,
                  REVIEW_CACHE_NAMESPACE)  # 1 hour
    shuffle(reviews)
    return reviews[:limit]
Ejemplo n.º 55
0
 def test_single_no_encode(self):
     self.assertTrue(cache.set("no encode", 1, encode=False))
     self.assertEqual(cache.get("no encode", decode=False), b"1")