Python load_sim_users 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: load_similar_users

메소드/함수: load_sim_users

hotexamples.com에서의 예제들: 2

Python load_sim_users - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 load_similar_users.load_sim_users에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: cppr_computation.py 프로젝트: tdubourg/collaborative-personalized-pagerank-public

def CCPPR(u, q):
    """
        Returns the CCPPR vector for user u, combination of CPPR vectors of similar users.
        This PPR computation is a two-level-collaborativeness PPR. It will be tested if we have time.
    """
    sim_scores_and_users = load_sim_users(mdb().host, [u])[u]
    result = {}  # will map urlid -> cppr_score
    original_index = g.vertex_properties['original_ids']
    # For every similar user
    for sim_u_s_u, u_s in sim_scores_and_users:
        # First, compute the PPR vector
        ppr = CPPR(u_s, q)
        # Then, compute the combined score of every URL in the graph
        for v in g.vertices():
            orig_id = original_index[v] 
            result[orig_id] = sim_u_s_u * ppr[v] + result.setdefault(orig_id, 0)
    # result = sum([sim_u_s_u * PPR(u_s, q) for sim_u_s_u, u_s in sim_scores_and_users])
    return result

예제 #2

파일 보기

파일: compute_usage_scoring.py 프로젝트: tdubourg/collaborative-personalized-pagerank-public

def main(argv, no_compute=False, store_results=True):
    import sys
    # from sys import argv
    from time import time

    argc = len(argv)
    if argc <= len(CLI_ARGS):
        print "Usage: %s"  % argv[0], ' '.join(CLI_ARGS)
        print "Currently missing parameters arguments:", ' '.join(CLI_ARGS[len(argv)-1:])
        exit()

    pairs_path  = argv[1].strip()
    mdb_host    = argv[2].strip()

    t_init = time()

    with univ_open(pairs_path, 'r') as f:
        pairs = [tuple([int(_) for _ in l.strip().split(" ")]) for l in f]

    targeted_users = [p[1] for p in pairs]

    sim_users = load_sim_users(mdb_host, targeted_users)
    all_allowed_users = load_set_of_similar_users(mdb_host, targeted_users) | set(targeted_users)

    # This will load everything needed to compute sim() function in-ram
    similar_users(argv[1:], True, preset_set_of_users=all_allowed_users)

    print "We loaded", len(us.users_clicks_list_indexed_by_uid), "users' clicks information"

    # Init the sim_users array of the user_similiraty module
    us.init_sim_users(sim_users)

    if no_compute:
        return

    # We're ready to compute!

    for q, u in pairs:
        # Note: The collaborative score is not computed on the user's own clicked pages
        # but using the clicks of the users similar to him
        # so we have to compute scores for all pages clicked BY ITS SIMILAR USERS
        for sim, u_sim in sim_users[u]:
            try:
                pages = us.users_clicks_list_indexed_by_uid[u_sim][q].keys()
            except KeyError as err:
                key = int(err.message)
                if key == q and key != u_sim:
                    # Well, this similar user just does not have queried this query...
                    continue
                print "For u_sim, q", u_sim, q
                print "KeyError with value=", err
                continue
            if pages is None:
                print "?!?! The user", u_sim, "user similar to", u, "has no pages information."
            else:
                print "User", u_sim, "user similar to", u, "has", len(pages), "pages informations for query", q
                for page in pages:
                    # try:
                    sys.stdout.write("score(u=%s, q=%s, p=%s)=" % (u, q, page))
                    sys.stdout.flush()
                    sys.stdout.write("%.5e\n" % us.score(q, page, u))
                    # except KeyError as err:
                    #     print "The KeyError is:", err
                    #     print "For some reason, the user", u, "probably is not in the sim_users cache"
                    #     print "Dump of the sim_users cache:"
                    #     print sim_users
                    #     raise err
                    # except Exception as err:
                    #     print "Errors happen, this time it is:", err

    if store_results:
        print "Storing the computed scores in the DB"
        print "We are going to use the cache of the us module"
        print "The cache currently contains", len(us.scores), "entries"
        t0 = time()
        mdb = MongoClient(host=mdb_host)
        scores_vectors = {}
        for (q, p, u), score in us.scores.items():
            if score == 0.0:
                # Nil score is the same as no score
                continue
            try:
                scores_vectors[(u, q)].append((p, score))
            except KeyError:
                scores_vectors[(u, q)]= [(p, score)]
        print "Precomputation took", time()-t0
        print "We have", len(scores_vectors), "'(u, q) -> score' entries to commit to the DB"
        
        print "Committing new one..."
        t0 = time()
        scores_to_commit = []
        for (user, q), scores in scores_vectors.items():
            print "Dropping previous information..."
            mdb.users.urls_perso_scores.remove({'uid': user, 'qid': q})
            scores_to_commit.append(
                {
                    'uid': user,
                    'qid': q,
                    'vector': scores
                } \
            )
        mdb.users.urls_perso_scores.insert(scores_to_commit)
        print "Done, committing took", time()-t0
        print "Stats:"
        for (user, q), scores in scores_vectors.items():
            print "We have", len(scores), "scores for (q, u)=", (q, user)

    print "Script execution took", time()-t_init