def test_public_ip_existing(requests_mock): # Set-up utilities.public_ip = '1.2.3.4' # run ip = utilities.get_public_ip() # asserts assert ip == utilities.public_ip requests_mock.get.assert_not_called()
def test_public_ip_fail(mock_get): # Set-up utilities.public_ip = None # run ip = utilities.get_public_ip() # asserts assert ip == utilities.public_ip assert ip == 'localhost' mock_get.assert_called_once_with(utilities.PUBLIC_IP_URL)
def test_public_ip_fail(requests_mock): # Set-up utilities.public_ip = None requests_mock.get.side_effect = Exception # Force fail # run ip = utilities.get_public_ip() # asserts assert ip == utilities.public_ip assert ip == 'localhost' requests_mock.get.assert_called_once_with(utilities.PUBLIC_IP_URL)
def test_public_ip_success(): # Set-up utilities.public_ip = None # run ip = utilities.get_public_ip() # asserts assert ip == utilities.public_ip try: socket.inet_aton(ip) except socket.error: pytest.fail("Invalid IP address")
def work(db, datarun_ids=None, save_files=False, choose_randomly=True, cloud_mode=False, aws_config=None, log_config=None, total_time=None, wait=True): """ Check the ModelHub database for unfinished dataruns, and spawn workers to work on them as they are added. This process will continue to run until it exceeds total_time or is broken with ctrl-C. db: Database instance with which we can make queries to ModelHub datarun_ids (optional): list of IDs of dataruns to compute on. If None, this will work on all unfinished dataruns in the database. choose_randomly: if True, work on all highest-priority dataruns in random order. If False, work on them in sequential order (by ID) cloud_mode: if True, save processed datasets to AWS. If this option is set, aws_config must be supplied. aws_config (optional): if cloud_mode is set, this must be an AWSConfig object with connection details for an S3 bucket. total_time (optional): if set to an integer, this worker will only work for total_time seconds. Otherwise, it will continue working until all dataruns are complete (or indefinitely). wait: if True, once all dataruns in the database are complete, keep spinning and wait for new runs to be added. If False, exit once all dataruns are complete. """ start_time = datetime.datetime.now() public_ip = get_public_ip() # main loop while True: # get all pending and running dataruns, or all pending/running dataruns # from the list we were given dataruns = db.get_dataruns(include_ids=datarun_ids, ignore_complete=True) if not dataruns: if wait: logger.warning('No dataruns found. Sleeping %d seconds and trying again.' % LOOP_WAIT) time.sleep(LOOP_WAIT) continue else: logger.warning('No dataruns found. Exiting.') break max_priority = max([r.priority for r in dataruns]) priority_runs = [r for r in dataruns if r.priority == max_priority] # either choose a run randomly, or take the run with the lowest ID if choose_randomly: run = random.choice(priority_runs) else: run = sorted(dataruns, key=attrgetter('id'))[0] # say we've started working on this datarun, if we haven't already db.mark_datarun_running(run.id) logger.info('Computing on datarun %d' % run.id) # actual work happens here worker = Worker(db, run, save_files=save_files, cloud_mode=cloud_mode, aws_config=aws_config, log_config=log_config, public_ip=public_ip) try: worker.run_classifier() except ClassifierError: # the exception has already been handled; just wait a sec so we # don't go out of control reporting errors logger.warning('Something went wrong. Sleeping %d seconds.' % LOOP_WAIT) time.sleep(LOOP_WAIT) elapsed_time = (datetime.datetime.now() - start_time).total_seconds() if total_time is not None and elapsed_time >= total_time: logger.warning('Total run time for worker exceeded; exiting.') break
def get_datarun_steps_info(datarun_id, classifier_start=None, classifier_end=None, nice=False): """ Get the scores of the hyperpartitions/method in each step. :param datarun_id: the id of the datarun :param classifier_start: only return the scores of and after the `classifier_start` th classifier :param classifier_end: only return the scores before the `classifier_end` th classifier, Note that :classifier_start and :classifier_end are not ids, they starts from 1. (This is because the caller may not know the classifier ids of the datarun) :param nice: A flag for return nice format result :return: if nice is False, [ {"1": 0.2, "2": 0.3, ...}, ... ] if nice is True, [ { "knn": [0.2, 0.3], "logreg": [0.1], ... }, ... ] """ if classifier_start is None: classifier_start = -np.inf if classifier_end is None: classifier_end = np.inf db = get_db() datarun = db.get_datarun(datarun_id=datarun_id) hyperpartitions = db.get_hyperpartitions(datarun_id=datarun_id) # load classifiers and build scores lists # make sure all hyperpartitions are present in the dict, even ones that # don't have any classifiers. That way the selector can choose hyperpartitions # that haven't been scored yet. hyperpartition_scores = {fs.id: [] for fs in hyperpartitions} classifiers = db.get_classifiers(datarun_id=datarun_id, status=ClassifierStatus.COMPLETE) selected_classifiers = [ c for c in classifiers if c.hyperpartition_id in hyperpartition_scores ] # Create a temporary worker worker = Worker(db, datarun, public_ip=get_public_ip()) bandit_scores_of_steps = [] for i, c in enumerate(selected_classifiers): if i >= classifier_end: break # the cast to float is necessary because the score is a Decimal; # doing Decimal-float arithmetic throws errors later on. score = float(getattr(c, datarun.score_target) or 0) hyperpartition_scores[c.hyperpartition_id].append(score) bandit_scores = selector_bandit_scores(worker.selector, hyperpartition_scores) bandit_scores = { key: float("%.5f" % val) for key, val in bandit_scores.items() } if i < classifier_start: continue bandit_scores_of_steps.append(bandit_scores) # For a nicer formatted output if nice: results = [] hp_id2method = {fs.id: fs.method for fs in hyperpartitions} for bandit_scores in bandit_scores_of_steps: res = defaultdict(list) for hp_id, score in bandit_scores.items(): res[hp_id2method[hp_id]].append(score) results.append(res) return results return bandit_scores_of_steps