description="Import sample data for recommendation engine") parser.add_argument('--engine_id', default='test_resource') parser.add_argument('--url', default="http://localhost:9090") parser.add_argument('--events_file', default="./cb_events.csv") parser.add_argument('--queries_file', default="./cb_queries.csv") parser.add_argument('--user_id', default=None) parser.add_argument('--user_id_2', default=None) parser.add_argument('--secret', default=None) parser.add_argument('--secret_2', default=None) args = parser.parse_args() print(args) events_client = harness.EventsClient(engine_id=args.engine_id, url=args.url, threads=5, qsize=500, user_id=args.user_id, user_secret=args.secret) print(events_client.host) import_events(events_client, args.events_file) query_client = harness.QueriesClient(engine_id=args.engine_id, url=args.url, threads=5, qsize=500, user_id=args.user_id, user_secret=args.secret) execute_queries(query_client, args.queries_file)
def run_map_test(data, eventNames, users = None, primaryEvent = cfg.testing.primary_event, consider_non_zero_scores = cfg.testing.consider_non_zero_scores_only, num = 200, K = cfg.testing.map_k, test = False, harness_url = "http://localhost:9090"): N_TEST = 2000 user_information = {} res_data = {} # Create harness engine for events... engine_client = harness.EventsClient( engine_id = cfg.engine_id, url = harness_url, threads = 5, qsize = 500) import_events(engine_client, data) logging.info(engine_client.host) engine_client.close() # Create query client in harness... logging.info("Queries for " + cfg.engine_id) query_client = harness.QueriesClient( engine_id = cfg.engine_id, url = harness_url, threads=5, qsize=500) for rec in data: if rec.event == primaryEvent: user = rec.entityId item = rec.targetEntityId if not users or user in users: user_information.setdefault(user, []).append(item) if test: holdoutUsers = [*user_information.keys()][1:N_TEST] else: holdoutUsers = [*user_information.keys()] prediction = [] ground_truth = [] user_items_cnt = 0.0 users_cnt = 0 for user in tqdm(holdoutUsers): query = { "user": user, "eventNames": eventNames, "num": num, } try: res = query_client.send_query(query) # Sort by score then by item name tuples = sorted([(r["score"], r["item"]) for r in res.json_body['result']], reverse=True) scores = [score for score, item in tuples] items = [item for score, item in tuples] res_data[user] = { "items": items, "scores": scores, } # Consider only non-zero scores if consider_non_zero_scores: if len(scores) > 0 and scores[0] != 0.0: prediction.append(items) ground_truth.append(user_information.get(user, [])) user_items_cnt += len(user_information.get(user, [])) users_cnt += 1 else: prediction.append(items) ground_truth.append(user_information.get(user, [])) user_items_cnt += len(user_information.get(user, [])) users_cnt += 1 except harness.NotFoundError: print("Error with user: %s" % user) return ([metrics.mapk(ground_truth, prediction, k) for k in range(1, K + 1)], res_data, user_items_cnt / (users_cnt + 0.00001))