def producer(stats, test_duration): """Producer Worker The Producer Worker continuously post messages for the specified duration. The time taken for each post is recorded for calculating throughput and latency. """ cli = client.Client(URL) queue = cli.queue(QUEUE_PREFIX + '1') total_requests = 0 total_elapsed = 0 end = time.time() + test_duration while time.time() < end: marktime.start('post message') # TODO(TheSriram): Track/report errors try: queue.post(choose_message()) except TransportError as ex: print("Could not post a message : {0}".format(ex)) else: total_elapsed += marktime.stop('post message').seconds total_requests += 1 stats.put({ 'total_requests': total_requests, 'total_elapsed': total_elapsed })
def producer(queues, message_pool, stats, test_duration): """Producer Worker The Producer Worker continuously post messages for the specified duration. The time taken for each post is recorded for calculating throughput and latency. """ total_requests = 0 successful_requests = 0 total_elapsed = 0 end = time.time() + test_duration while time.time() < end: queue = random.choice(queues) try: marktime.start('post_message') queue.post(choose_message(message_pool)) total_elapsed += marktime.stop('post_message').seconds successful_requests += 1 except errors.TransportError as ex: sys.stderr.write("Could not post a message : {0}\n".format(ex)) total_requests += 1 stats.put({ 'successful_requests': successful_requests, 'total_requests': total_requests, 'total_elapsed': total_elapsed })
def test_remove_from_labels(self): marktime.start('test run') marktime.stop('test run') self.assertIn('test run', marktime.labels) marktime.stop('test run', remove_from_labels=True) self.assertNotIn('test run', marktime.labels)
def test_duration_data_in_dumps(self, mock_time): mock_time.return_value = 123 marktime.start('test run') time_diff = marktime.stop('test run', at=124, remove_from_labels=False).seconds self.assertEquals(marktime.labels['test run']['duration'], time_diff)
def calibrate_n_estimators(data, params): marktime.start('calibrate') log.info("Calibrate n_estimators to new options") cls = make_xgb(params, extra={'n_estimators': 100000}) n_estimators, score = find_n_estimators(cls, data, early_stopping_rounds=20) log.info("N_estimators calibrated to %d in %s", n_estimators, task_done("calibrate")) return n_estimators, score
def find_gamma(data, params): param_test = { 'gamma': [i/10.0 for i in range(0, 8)] } iteration = 0 best = 0.0 while True: marktime.start('first_step') cls = make_xgb(params) log.info("Fist step, iteration=%d, search in %s", iteration, param_test) gsearch = GridSearchCV(estimator=cls, param_grid=param_test, scoring='roc_auc', n_jobs=1, iid=False, cv=5) gsearch.fit(data['features'], data['labels']) best = gsearch.best_params_['gamma'] score = gsearch.best_score_ show_grid_scores(gsearch.grid_scores_) log.info("Frist step found params %s with score %s in %s", gsearch.best_params_, score, task_done("first_step")) # handle boundary value if is_on_right_bound(best, param_test['gamma']): end = int(param_test['gamma'][-1]*10) param_test['gamma'] = [i/10.0 for i in range(end, end+len(param_test['gamma']))] log.info("Optimal value is on boundary (%s), shift range and iterate again", best) else: log.info("Found optimal value for gamma=%s", best) break iteration += 1 return best, score
def test_stop_twice(self, mock_time): mock_time.return_value = 123 marktime.start('test run') self.assertEquals(marktime.stop('test run', at=124).seconds, 1) self.assertEquals(marktime.stop('test run', at=125).seconds, 1) self.assertEquals(marktime.stop('test run', at=183, stop_once=False).minutes, 1)
def test_duration(self): start_time = 1370451294 diff_time = round(random.random() * 100) stop_time = start_time + diff_time marktime.start('test run', at=start_time) marktime.stop('test run', at=stop_time) self.assertEquals(marktime.duration('test run').seconds, diff_time)
def claim_delete(queues, stats, test_duration, ttl, grace, limit): """Consumer Worker The Consumer Worker continuously claims and deletes messages for the specified duration. The time taken for each claim and delete is recorded for calculating throughput and latency. """ end = time.time() + test_duration claim_total_elapsed = 0 delete_total_elapsed = 0 total_failed_requests = 0 claim_total_requests = 0 delete_total_requests = 0 while time.time() < end: # NOTE(kgriffs): Distribute requests across all queues evenly. queue = random.choice(queues) try: marktime.start('claim_message') claim = queue.claim(ttl=ttl, grace=grace, limit=limit) claim_total_elapsed += marktime.stop('claim_message').seconds claim_total_requests += 1 except errors.TransportError as ex: sys.stderr.write("Could not claim messages : {0}\n".format(ex)) total_failed_requests += 1 else: for msg in claim: try: marktime.start('delete_message') msg.delete() elapsed = marktime.stop('delete_message').seconds delete_total_elapsed += elapsed delete_total_requests += 1 except errors.TransportError as ex: msg = "Could not delete messages: {0}\n".format(ex) sys.stderr.write(msg) total_failed_requests += 1 total_requests = (claim_total_requests + delete_total_requests + total_failed_requests) stats.put({ 'total_requests': total_requests, 'claim_total_requests': claim_total_requests, 'delete_total_requests': delete_total_requests, 'claim_total_elapsed': claim_total_elapsed, 'delete_total_elapsed': delete_total_elapsed, })
def claim_delete(queues, stats, test_duration, ttl, grace, limit): """Consumer Worker The Consumer Worker continuously claims and deletes messages for the specified duration. The time taken for each claim and delete is recorded for calculating throughput and latency. """ end = time.time() + test_duration claim_total_elapsed = 0 delete_total_elapsed = 0 total_failed_requests = 0 claim_total_requests = 0 delete_total_requests = 0 while time.time() < end: # NOTE(kgriffs): Distribute requests across all queues evenly. queue = random.choice(queues) try: marktime.start("claim_message") claim = queue.claim(ttl=ttl, grace=grace, limit=limit) claim_total_elapsed += marktime.stop("claim_message").seconds claim_total_requests += 1 except errors.TransportError as ex: sys.stderr.write("Could not claim messages : {0}\n".format(ex)) total_failed_requests += 1 else: for msg in claim: try: marktime.start("delete_message") msg.delete() elapsed = marktime.stop("delete_message").seconds delete_total_elapsed += elapsed delete_total_requests += 1 except errors.TransportError as ex: msg = "Could not delete messages: {0}\n".format(ex) sys.stderr.write(msg) total_failed_requests += 1 total_requests = claim_total_requests + delete_total_requests + total_failed_requests stats.put( { "total_requests": total_requests, "claim_total_requests": claim_total_requests, "delete_total_requests": delete_total_requests, "claim_total_elapsed": claim_total_elapsed, "delete_total_elapsed": delete_total_elapsed, } )
def find_maxdepth_minchildweight(data, params): param_test = { 'max_depth': range(2, 16, 2), 'min_child_weight': range(1, 6, 2) } centers = {} iteration = 0 while True: marktime.start('first_step') cls = make_xgb(params) log.info("Fist step, iteration=%d, search in %s", iteration, param_test) gsearch = GridSearchCV(estimator=cls, param_grid=param_test, scoring='roc_auc', n_jobs=1, iid=False, cv=5) gsearch.fit(data['features'], data['labels']) best = gsearch.best_params_ score = gsearch.best_score_ show_grid_scores(gsearch.grid_scores_) log.info("First step found params %s with score %s in %s", best, score, task_done("first_step")) # handle boundary value boundary = False for key in param_test.keys(): if is_on_right_bound(best[key], param_test[key]): end = param_test[key][-1] param_test[key] = range(end, end+len(param_test[key])+2, 2) log.info("Optimal value for %s is on boundary (%s), shift range and iterate again", key, best[key]) boundary = True if not boundary: for key in param_test.keys(): centers[key] = best[key] del param_test[key] log.info("Found optimal value for %s=%s", key, centers[key]) break iteration += 1 # do fine-tuning param_test = { key: [val-1, val, val+1] for key, val in centers.iteritems() } marktime.start("second_step") cls = make_xgb(params) log.info("Second step, search in %s", param_test) gsearch = GridSearchCV(estimator=cls, param_grid=param_test, scoring='roc_auc', n_jobs=1, iid=False, cv=5) gsearch.fit(data['features'], data['labels']) best = gsearch.best_params_ score = gsearch.best_score_ show_grid_scores(gsearch.grid_scores_) log.info("Second step found %s with score %s in %s", best, score, task_done('second_step')) return best['max_depth'], best['min_child_weight'], score
def test_duration_with_stop(self): start_time = 1370451294 diff_time = round(random.random() * 100) stop_time = start_time + diff_time marktime.start('test run', at=start_time) self.assertEquals( marktime.duration('test run', stop_it=True, stop_at=stop_time).sec, diff_time)
def test_real_times(self, mock_time): start_time = 1370451294.106749 diff_time = random.random() * 100 stop_time = start_time + diff_time mock_time.return_value = start_time marktime.start('test run') mock_time.return_value = stop_time self.assertEquals(round(marktime.stop('test run').seconds, 4), round(diff_time, 4))
def test_severals_markers(self, mock_time): start_time = 123 markers_count = 10 mock_time.return_value = start_time for i in range(1, markers_count): marktime.start('test run %d' % i) for i in range(1, markers_count): time_diff = marktime.stop('test run %d' % i, at=(start_time + i)).seconds self.assertEquals(time_diff, i)
def find_n_estimators(cls, data, cv_folds=5, early_stopping_rounds=50): marktime.start("find_n_estimators") xgb_params = cls.get_xgb_params() xgtrain = xgb.DMatrix(data['features'], label=data['labels']) cvresult = xgb.cv(xgb_params, xgtrain, num_boost_round=cls.get_params()['n_estimators'], nfold=cv_folds, metrics=args.metric, early_stopping_rounds=early_stopping_rounds, show_progress=False) n_estimators = cvresult.shape[0]-1 score = cvresult.iloc[n_estimators, 0] log.info("N_estimators search done in %s, result=%d", task_done("find_n_estimators"), n_estimators) return n_estimators, score
def claim_delete(stats, test_duration, ttl, grace, limit): """Consumer Worker The Consumer Worker continuously claims and deletes messages for the specified duration. The time taken for each claim and delete is recorded for calculating throughput and latency. """ cli = client.Client(conf.server_url) queue = cli.queue(conf.queue_prefix + '1') end = time.time() + test_duration total_elapsed = 0 total_requests = 0 claim_total_requests = 0 delete_total_requests = 0 while time.time() < end: marktime.start('claim_message') try: claim = queue.claim(ttl=ttl, grace=grace, limit=limit) except TransportError as ex: sys.stderr.write("Could not claim messages : {0}\n".format(ex)) else: total_elapsed += marktime.stop('claim_message').seconds claim_total_requests += 1 try: marktime.start('delete_message') for msg in claim: # TODO(TheSriram): Simulate actual work before deletion msg.delete() total_elapsed += marktime.stop('delete_message').seconds delete_total_requests += 1 except TransportError as ex: sys.stderr.write("Could not delete messages: {0}\n".format(ex)) finally: total_requests += 1 finally: total_requests += 1 stats.put({'total_requests': total_requests, 'claim_total_requests': claim_total_requests, 'delete_total_requests': delete_total_requests, 'total_elapsed': total_elapsed})
def claim_delete(stats, test_duration, ttl, grace, limit): """Consumer Worker The Consumer Worker continuously claims and deletes messages for the specified duration. The time taken for each claim and delete is recorded for calculating throughput and latency. """ cli = client.Client(URL) queue = cli.queue(QUEUE_PREFIX + '1') end = time.time() + test_duration total_elapsed = 0 total_requests = 0 claim_total_requests = 0 delete_total_requests = 0 while time.time() < end: marktime.start('claim_message') try: claim = queue.claim(ttl=ttl, grace=grace, limit=limit) except TransportError as ex: print("Could not claim messages : {0}".format(ex)) else: total_elapsed += marktime.stop('claim_message').seconds total_requests += 1 claim_total_requests += 1 try: marktime.start('delete_message') for msg in claim: # TODO(TheSriram): Simulate actual work before deletion msg.delete() total_elapsed += marktime.stop('delete_message').seconds delete_total_requests += 1 total_requests += 1 stats.put({ 'total_requests': total_requests, 'claim_total_requests': claim_total_requests, 'delete_total_requests': delete_total_requests, 'total_elapsed': total_elapsed }) except TransportError as ex: print("Could not claim and delete : {0}".format(ex))
def observer(queues, stats, test_duration, limit): """Observer Worker The observer lists messages without claiming them. """ end = time.time() + test_duration total_elapsed = 0 total_succeeded = 0 total_failed = 0 queues = [{'q': q, 'm': None} for q in queues] while time.time() < end: # NOTE(kgriffs): Distribute requests across all queues evenly. queue = random.choice(queues) try: marktime.start('list_messages') cursor = queue['q'].messages(limit=limit, marker=queue['m'], include_claimed=True) total_elapsed += marktime.stop('list_messages').seconds total_succeeded += 1 messages = list(cursor) if messages: # TODO(kgriffs): Figure out a less hacky way to do this # while preserving the ability to measure elapsed time # per request. queue['m'] = _extract_marker(cursor._links) except errors.TransportError as ex: sys.stderr.write("Could not list messages : {0}\n".format(ex)) total_failed += 1 total_requests = total_succeeded + total_failed stats.put({ 'total_requests': total_requests, 'total_succeeded': total_succeeded, 'total_elapsed': total_elapsed, })
def find_alpha_lambda(data, params, reg_steps): param_test = { 'reg_alpha': np.power(10.0, np.linspace(-5.0, 4.0, num=reg_steps)), 'reg_lambda': np.power(10.0, np.linspace(-5.0, 4.0, num=reg_steps)) } marktime.start("first_step") cls = make_xgb(params) log.info("First step, search in %s", param_test) gsearch = GridSearchCV(estimator=cls, param_grid=param_test, scoring='roc_auc', n_jobs=1, iid=False, cv=5) gsearch.fit(data['features'], data['labels']) best = gsearch.best_params_ score = gsearch.best_score_ log.info("First step found params %s with score %s in %s", best, score, task_done("first_step")) reg_alpha = best['reg_alpha'] reg_lambda = best['reg_lambda'] param_test = { 'reg_alpha': np.linspace(reg_alpha / 5.0, reg_alpha * 5.0, num=reg_steps), 'reg_lambda': np.linspace(reg_lambda / 5.0, reg_lambda * 5.0, num=reg_steps) } marktime.start("second_step") log.info("Second step, search in %s", param_test) gsearch = GridSearchCV(estimator=cls, param_grid=param_test, scoring='roc_auc', n_jobs=1, iid=False, cv=5) gsearch.fit(data['features'], data['labels']) best = gsearch.best_params_ score = gsearch.best_score_ show_grid_scores(gsearch.grid_scores_) log.info("Second step found params %s with score %s in %s", best, score, task_done("second_step")) return best['reg_alpha'], best['reg_lambda'], score
parser = argparse.ArgumentParser() parser.add_argument("--features", required=True, help="Input features file to use in numpy binary format") parser.add_argument("--labels", required=True, help="File with labels in numpy binary format") parser.add_argument("--log", required=False, help="Send log file to file instead of stdout") parser.add_argument("--state", help="If specified, state will be saved to or read from this file") parser.add_argument("--seed", type=int, default=42, help="Random seed value to use, default=42") parser.add_argument("--cores", type=int, default=None, help="Limit amount of cores to use, default=None") parser.add_argument("--objective", default="binary:logistic", help="Tree objective to use, default=binary:logistic") parser.add_argument("--metric", default="auc", help="Metric to use, default=auc") parser.add_argument("--reg-steps", default=10, type=int, help="How many steps to use in regularisation search") args = parser.parse_args() marktime.start("start") setup_logging(args.log) log.info("XGB_tune started.") log.info("Input features: %s", args.features) log.info("Input labels: %s", args.labels) # load data marktime.start("data_load") log.info("Loading data") features = np.load(args.features) labels = np.load(args.labels) log.info("Data loaded in %s", task_done("data_load")) log.info("Features shape: %s, labels shape: %s", features.shape, labels.shape) if features.shape[0] != labels.shape[0]: log.error("Shape of features and labels don't match!") sys.exit(1)
def test_duration_None(self): marktime.start('test run') self.assertIsNone(marktime.duration('test run', stop_it=False))
use multiprocessing.pool.ThreadPool instead ''' import marktime # stopwatch from multiprocessing.pool import ThreadPool def foo(bar, baz): print('hello {0}'.format(bar)) return 'foo' + baz def foo2(bar, baz): print('hello2 ' + bar) return 'foo2 ' + baz def main(): pool = ThreadPool(processes=1) async_result = pool.apply_async(foo, ('world', 'foo')) # tuple of args for foo async_result2 = pool.apply_async(foo2, ('world', 'foo2')) # do some other stuff in the main process return_val = async_result.get() # get the return value from your function. return_val2 = async_result2.get() print(return_val) print(return_val2) if __name__ == "__main__": marktime.start('task') main() marktime.stop('task') print(marktime.duration('task').msecs)
print "\nLoaded action %s from %s\n" % (action, file_path) for i, nparams in enumerate(param_lengths): policies.append(FFPolicy(nparams, action_lengths[i])) solvers.append(CMAES(policies[-1].total_params, popsize=DEFAULT_SOLVER_POPSIZE, weight_decay=0.0, sigma_init=0.5 )) # Run simulation policy_reward_history = [] runs = sim_params["runs"] for i in range(runs): mt.start("run") solns = [] policy_rewards = [None] * len(policies) for j, solver in enumerate(solvers): solns.append(solver.ask()) all_fitnesses = [] for solver in solvers: all_fitnesses.append(np.zeros(DEFAULT_SOLVER_POPSIZE)) for k in range(DEFAULT_SOLVER_POPSIZE): # Re-instantiate env for a clean run for each solution env = getattr(env_module, env_registry_entry["classname"])(*sim_params["environment"]["args"]) agents = []
def test_float_diffs(self, mock_time): mock_time.return_value = 123 marktime.start('test run') self.assertEquals(marktime.stop('test run', at=124.5).seconds, 1.5)
def test_start_stop_at(self): marktime.start('test run', at=123) self.assertEquals(marktime.stop('test run', at=124).msecs, 1000)
def test_start_stop(self, mock_time): mock_time.return_value = 123 marktime.start('test run') self.assertEquals(marktime.stop('test run', at=124).seconds, 1)