Esempio n. 1
0
        description="Import sample data for recommendation engine")
    parser.add_argument('--engine_id', default='test_resource')
    parser.add_argument('--url', default="http://localhost:9090")
    parser.add_argument('--events_file', default="./cb_events.csv")
    parser.add_argument('--queries_file', default="./cb_queries.csv")
    parser.add_argument('--user_id', default=None)
    parser.add_argument('--user_id_2', default=None)
    parser.add_argument('--secret', default=None)
    parser.add_argument('--secret_2', default=None)

    args = parser.parse_args()
    print(args)

    events_client = harness.EventsClient(engine_id=args.engine_id,
                                         url=args.url,
                                         threads=5,
                                         qsize=500,
                                         user_id=args.user_id,
                                         user_secret=args.secret)
    print(events_client.host)

    import_events(events_client, args.events_file)

    query_client = harness.QueriesClient(engine_id=args.engine_id,
                                         url=args.url,
                                         threads=5,
                                         qsize=500,
                                         user_id=args.user_id,
                                         user_secret=args.secret)

    execute_queries(query_client, args.queries_file)
def run_map_test(data, eventNames, users = None,
                 primaryEvent = cfg.testing.primary_event,
                 consider_non_zero_scores = cfg.testing.consider_non_zero_scores_only,
                 num = 200, K = cfg.testing.map_k,
                 test = False, harness_url = "http://localhost:9090"):

    N_TEST = 2000
    user_information = {}
    res_data = {}

    # Create harness engine for events...
    engine_client = harness.EventsClient(
        engine_id = cfg.engine_id,
        url = harness_url,
        threads = 5,
        qsize = 500)

    import_events(engine_client, data)
    logging.info(engine_client.host)
    engine_client.close()

    # Create query client in harness...
    logging.info("Queries for " + cfg.engine_id)

    query_client = harness.QueriesClient(
        engine_id = cfg.engine_id,
        url = harness_url,
        threads=5,
        qsize=500)

    for rec in data:
        if rec.event == primaryEvent:
            user = rec.entityId
            item = rec.targetEntityId
            if not users or user in users:
                user_information.setdefault(user, []).append(item)

    if test:
        holdoutUsers = [*user_information.keys()][1:N_TEST]
    else:
        holdoutUsers = [*user_information.keys()]

    prediction = []
    ground_truth = []
    user_items_cnt = 0.0
    users_cnt = 0

    for user in tqdm(holdoutUsers):
        query = {
            "user": user,
            "eventNames": eventNames,
            "num": num,
        }

        try:
            res = query_client.send_query(query)
            # Sort by score then by item name
            tuples = sorted([(r["score"], r["item"]) for r in res.json_body['result']], reverse=True)
            scores = [score for score, item in tuples]
            items = [item for score, item in tuples]
            res_data[user] = {
                "items": items,
                "scores": scores,
            }
            # Consider only non-zero scores
            if consider_non_zero_scores:
                if len(scores) > 0 and scores[0] != 0.0:
                    prediction.append(items)
                    ground_truth.append(user_information.get(user, []))
                    user_items_cnt += len(user_information.get(user, []))
                    users_cnt += 1
            else:
                prediction.append(items)
                ground_truth.append(user_information.get(user, []))
                user_items_cnt += len(user_information.get(user, []))
                users_cnt += 1
        except harness.NotFoundError:
            print("Error with user: %s" % user)

    return ([metrics.mapk(ground_truth, prediction, k) for k in range(1, K + 1)],
            res_data, user_items_cnt / (users_cnt + 0.00001))