コード例 #1
0
def test_get_latest_play(db_mock):
    """Tests that the latest play is returned"""
    date1 = datetime(2020, 10, 4, 10, 35, 0)
    date2 = datetime(2020, 10, 1, 10, 10, 0)
    date3 = datetime(2020, 9, 20, 8, 1, 0)

    with db_mock.scoped_session() as session:
        Play.__table__.create(db_mock._engine)
        session.add(Play(user_id=1, play_item_id=1, created_at=date1))
        session.add(Play(user_id=2, play_item_id=1, created_at=date2))
        session.add(Play(user_id=3, play_item_id=2, created_at=date3))

    latest_play = get_latest_play()
    assert latest_play == date1
コード例 #2
0
def create_play(offset: int) -> Play:
    return Play(
        id=offset,
        user_id=1,
        source=None,
        play_item_id=1,
        slot=1,
        signature=None,
        updated_at=datetime.now() + timedelta(days=offset),
        created_at=datetime.now() + timedelta(days=offset),
    )
コード例 #3
0
def populate_mock_db(db, date1, date2):
    """Helper function to populate thee mock DB with plays"""
    test_plays = [
        {
            "item_id": 1,
            "created_at": date1
        },
        {
            "item_id": 1,
            "created_at": date1
        },
        {
            "item_id": 1,
            "created_at": date2
        },
        {
            "item_id": 2,
            "created_at": date1
        },
        {
            "item_id": 2,
            "created_at": date2
        },
        {
            "item_id": 3,
            "created_at": date2
        },
        {
            "item_id": 3,
            "created_at": date2
        },
        {
            "item_id": 3,
            "created_at": date2
        },
    ]

    with db.scoped_session() as session:
        for i, play_meta in enumerate(test_plays):
            play = Play(id=i,
                        play_item_id=play_meta.get("item_id"),
                        created_at=play_meta.get("created_at", datetime.now()))
            session.add(play)
コード例 #4
0
def setup_trending(db):
    # pylint: disable=W0621
    with db.scoped_session() as session:
        # seed tracks + blocks
        for i, track_meta in enumerate(test_tracks):
            blockhash = hex(i)
            block = Block(
                blockhash=blockhash,
                number=i,
                parenthash='0x01',
                is_current=True,
            )

            track = Track(blockhash=blockhash,
                          blocknumber=i,
                          track_id=i,
                          is_current=track_meta.get("is_current", True),
                          is_delete=track_meta.get("is_delete", False),
                          owner_id=300,
                          route_id='',
                          track_segments=[],
                          genre=track_meta.get("genre", ""),
                          updated_at=track_meta.get("updated_at",
                                                    datetime.now()),
                          created_at=track_meta.get("created_at",
                                                    datetime.now()),
                          is_unlisted=track_meta.get("is_unlisted", False))

            # add block and then flush before
            # adding track, bc track.blocknumber foreign key
            # references block
            session.add(block)
            session.flush()
            session.add(track)

        # seed plays
        for i, play_meta in enumerate(test_plays):
            play = Play(id=i,
                        play_item_id=play_meta.get("item_id"),
                        created_at=play_meta.get("created_at", datetime.now()))
            session.add(play)
コード例 #5
0
def parse_sol_play_transaction(session, solana_client, tx_sig):
    try:
        tx_info = get_sol_tx_info(solana_client, tx_sig)
        logger.info(
            f"index_solana_plays.py | Got transaction: {tx_sig} | {tx_info}")
        if SECP_PROGRAM in tx_info["result"]["transaction"]["message"][
                "accountKeys"]:
            audius_program_index = tx_info["result"]["transaction"]["message"][
                "accountKeys"].index(TRACK_LISTEN_PROGRAM)
            for instruction in tx_info["result"]["transaction"]["message"][
                    "instructions"]:
                if instruction["programIdIndex"] == audius_program_index:
                    tx_slot = tx_info['result']['slot']
                    user_id, track_id, source, timestamp = parse_instruction_data(
                        instruction["data"])
                    created_at = datetime.datetime.utcfromtimestamp(timestamp)

                    logger.info("index_solana_plays.py | "
                                f"user_id: {user_id} "
                                f"track_id: {track_id} "
                                f"source: {source} "
                                f"created_at: {created_at} "
                                f"slot: {tx_slot} "
                                f"sig: {tx_sig}")

                    session.add(
                        Play(user_id=user_id,
                             play_item_id=track_id,
                             created_at=created_at,
                             source=source,
                             slot=tx_slot,
                             signature=tx_sig))
        else:
            logger.info(
                f"index_solana_plays.py | tx={tx_sig} Failed to find SECP_PROGRAM"
            )
    except Exception as e:
        logger.error(f"index_solana_plays.py | Error processing {tx_sig}, {e}",
                     exc_info=True)
コード例 #6
0
def setup_trending(db, date):
    # Test data

    # test tracks
    # when creating tracks, track_id == index
    test_tracks = [
        {"genre": "Electronic"},
        {"genre": "Pop"},
        {"genre": "Electronic"},
        # Tracks we don't want to count
        {"genre": "Electronic", "is_unlisted": True},
        {"genre": "Electronic", "is_delete": True},
    ]

    test_plays = [
        # Current Plays
        {"item_id": 0},
        {"item_id": 0},
        {"item_id": 1},
        {"item_id": 1},
        {"item_id": 2},
        {"item_id": 3},
        # > 1 wk plays
        {"item_id": 2, "created_at": date - timedelta(weeks=2)},
        {"item_id": 2, "created_at": date - timedelta(weeks=2)},
        {"item_id": 3, "created_at": date - timedelta(weeks=2)},
        # We don't want to count these guys (tracks deleted/unlisted)
        {"item_id": 3},
        {"item_id": 3},
        {"item_id": 4},
        {"item_id": 4},
    ]

    # pylint: disable=W0621
    with db.scoped_session() as session:
        # seed tracks + blocks
        for i, track_meta in enumerate(test_tracks):
            blockhash = hex(i)
            block = Block(
                blockhash=blockhash,
                number=i,
                parenthash="0x01",
                is_current=True,
            )

            track = Track(
                blockhash=blockhash,
                blocknumber=i,
                track_id=i,
                is_current=track_meta.get("is_current", True),
                is_delete=track_meta.get("is_delete", False),
                owner_id=300,
                route_id="",
                track_segments=[],
                genre=track_meta.get("genre", ""),
                updated_at=track_meta.get("updated_at", date),
                created_at=track_meta.get("created_at", date),
                is_unlisted=track_meta.get("is_unlisted", False),
            )

            # add block and then flush before
            # adding track, bc track.blocknumber foreign key
            # references block
            session.add(block)
            session.flush()
            session.add(track)

        # seed plays
        aggregate_plays = {}
        for i, play_meta in enumerate(test_plays):
            item_id = play_meta.get("item_id")
            if item_id in aggregate_plays:
                aggregate_plays[item_id] += 1
            else:
                aggregate_plays[item_id] = 1

            play = Play(
                id=i, play_item_id=item_id, created_at=play_meta.get("created_at", date)
            )
            session.add(play)
        for i, count in aggregate_plays.items():
            session.add(AggregatePlays(play_item_id=i, count=count))
コード例 #7
0
def get_track_plays(self, db, lock):
    start_time = time.time()
    job_extra_info = {'job': JOB}
    with db.scoped_session() as session:
        # Get the most retrieved play date in the db to use as an offet for fetching
        # more play counts from identity
        most_recent_play_date = session.query(Play.updated_at).order_by(
            desc(Play.updated_at), desc(Play.id)).first()
        if most_recent_play_date == None:
            # Make the date way back in the past to get the first play count onwards
            most_recent_play_date = datetime.datetime(2000, 1, 1, 0,
                                                      0).timestamp()
        else:
            most_recent_play_date = most_recent_play_date[0].timestamp()

        job_extra_info['most_recent_play_date'] = get_time_diff(start_time)

        # Create and query identity service endpoint for track play counts
        identity_url = update_play_count.shared_config['discprov'][
            'identity_service_url']
        params = {
            'startTime': most_recent_play_date,
            'limit': REQUEST_LISTENS_LIMIT
        }
        identity_tracks_endpoint = urljoin(identity_url, 'listens/bulk')

        track_listens = {}
        try:
            identity_response_time = time.time()
            resp = requests.get(identity_tracks_endpoint, params=params)
            track_listens = resp.json()
            job_extra_info['identity_response_time'] = get_time_diff(
                identity_response_time)
        except Exception as e:
            logger.error(
                f'Error retrieving track play counts - {identity_tracks_endpoint}, {e}'
            )

        plays = []
        user_track_listens = []
        track_hours = []
        # pylint: disable=W0105
        """
        Insert a row for each new count instance in the plays table
        1.) Loop through the listens to build a list of user_id to track_id pairs
        and track_id to current_hour for querying
        2.) Query the plays table for counts for both user-tracks pairs and anonymous
        listens of track by hour and build a dictionary for each mapping to counts
        3.) Loop through the listens again and only insert the difference in identity
        play count minus the existing plays in db
        """
        if 'listens' in track_listens:
            # 1.) Get the user_id to track_id pairs and track_id to current hr pairs
            listens_query_building_time = time.time()
            for listen in track_listens['listens']:
                if 'userId' in listen and listen['userId'] != None:
                    # Add the user_id to track_id mapping
                    user_track_listens.append(
                        and_(Play.play_item_id == listen['trackId'],
                             Play.user_id == listen['userId']))
                else:
                    # Since, the anonymous plays are stored by hour,
                    # find all plays in the last hour for this track
                    current_hour = dateutil.parser.parse(
                        listen['createdAt']).replace(microsecond=0,
                                                     second=0,
                                                     minute=0)
                    track_hours.append(
                        and_(Play.user_id == None,
                             Play.play_item_id == listen['trackId'],
                             Play.created_at == current_hour))

            job_extra_info['listens_query_building_time'] = get_time_diff(
                listens_query_building_time)
            # 2.) Query the plays and build a dict
            listens_query_time = time.time()

            # Query the plays for existing user-track listens & build
            # a dict of { '{user_id}-{track_id}' : listen_count }
            user_track_plays_dict = {}
            if user_track_listens:
                user_track_play_counts = session.query(
                    Play.play_item_id, Play.user_id,
                    func.count(Play.play_item_id)).filter(
                        or_(*user_track_listens)).group_by(
                            Play.play_item_id, Play.user_id).all()

                user_track_plays_dict = {
                    f'{play[0]}-{play[1]}': play[2]
                    for play in user_track_play_counts
                }

            # Query the plays for existing anon-tracks by hour & build
            # a dict of { '{track_id}-{timestamp}' : listen_count }
            anon_track_plays_dict = {}
            if track_hours:
                track_play_counts = session.query(
                    Play.play_item_id, func.min(Play.created_at),
                    func.count(Play.play_item_id)).filter(
                        or_(*track_hours)).group_by(Play.play_item_id).all()
                anon_track_plays_dict = {
                    # pylint: disable=C0301
                    f'{play[0]}-{play[1].replace(microsecond=0, second=0, minute=0)}':
                    play[2]
                    for play in track_play_counts
                }
            job_extra_info['listens_query_time'] = get_time_diff(
                listens_query_time)
            build_insert_query_time = time.time()

            # 3.) Insert new listens - subtracting the identity listens from existsing listens
            for listen in track_listens['listens']:
                if 'userId' in listen and listen['userId'] != None:
                    track_id = listen['trackId']
                    user_id = listen['userId']
                    user_track_key = f'{track_id}-{user_id}'
                    # Get the existing listens for the track_id-user_id
                    user_track_play_count = user_track_plays_dict.get(
                        user_track_key, 0)
                    new_play_count = listen['count'] - user_track_play_count
                    if new_play_count > 0:
                        plays.extend([
                            Play(
                                user_id=listen['userId'],
                                play_item_id=listen['trackId'],
                                updated_at=listen['updatedAt'],
                                created_at=listen['createdAt'],
                            ) for _ in range(new_play_count)
                        ])
                else:
                    # For anon track plays, check the current hour play counts
                    # and only insert new plays for the difference
                    current_hour = dateutil.parser.parse(
                        listen['createdAt']).replace(microsecond=0,
                                                     second=0,
                                                     minute=0,
                                                     tzinfo=None)
                    track_id = listen['trackId']
                    track_hr_key = f'{track_id}-{current_hour}'
                    # Get the existing listens from for the track_id-curren_hr
                    anon_hr_track_play_count = anon_track_plays_dict.get(
                        track_hr_key, 0)
                    new_play_count = listen['count'] - anon_hr_track_play_count
                    if new_play_count > 0:
                        plays.extend([
                            Play(play_item_id=listen['trackId'],
                                 updated_at=listen['updatedAt'],
                                 created_at=listen['createdAt'])
                            for _ in range(new_play_count)
                        ])
            job_extra_info['build_insert_query_time'] = get_time_diff(
                build_insert_query_time)

        insert_refresh_time = time.time()
        has_lock = lock.owned()
        if plays and has_lock:
            session.bulk_save_objects(plays)
            session.execute(
                "REFRESH MATERIALIZED VIEW CONCURRENTLY aggregate_plays")

        job_extra_info['has_lock'] = has_lock
        job_extra_info['number_rows_insert'] = len(plays)
        job_extra_info['insert_refresh_time'] = get_time_diff(
            insert_refresh_time)
        job_extra_info['total_time'] = get_time_diff(start_time)
        logger.info("index_plays.py | update_play_count complete",
                    extra=job_extra_info)