def test_compute_fixed_hash(): complex_obj = [1, 'd', {'a': 4, 'b': np.arange(10)}, (7, range(10))] original_code = compute_fixed_hash(complex_obj) assert compute_fixed_hash(complex_obj) == original_code == '6c98fabc301361863f321f6149a8a12a' complex_obj[2]['b'][6]=0 assert compute_fixed_hash(complex_obj) == '8aee1f739fc9a612ed72e14682026627' != original_code complex_obj[2]['b'][6]=6 # Revert to old value assert compute_fixed_hash(complex_obj) == original_code
def test_compute_fixed_hash(): complex_obj = [1, 'd', {'a': 4, 'b': np.arange(10)}, (7, range(10))] assert compute_fixed_hash( complex_obj) == '285f0ff77c0cb6a5fc529ae55775f9dd' complex_obj[2]['b'][6] = 0 assert compute_fixed_hash( complex_obj) == '3124854bda236cd50f0cc04bd6f84935' complex_obj[2]['b'][6] = 6 # Revert to old value assert compute_fixed_hash( complex_obj) == '285f0ff77c0cb6a5fc529ae55775f9dd'
def is_valid(self, last_run_args=None, current_args=None): """ :return: True if the experiment arguments have changed, otherwise false. """ if last_run_args is None: last_run_args = dict(self.info.get_field(ExpInfoFields.ARGS)) if current_args is None: current_args = dict( load_experiment( record_id_to_experiment_id( self.get_identifier())).get_args()) return compute_fixed_hash(last_run_args) == compute_fixed_hash( current_args)
def test_compute_fixed_hash_terminates(): a = [] b = [a] a.append(b) code = compute_fixed_hash(a) assert code == 'cffaee424a62cd1893825a5811c34b8d' c = [] d = [c] c.append(d) code = compute_fixed_hash(c) assert code == 'cffaee424a62cd1893825a5811c34b8d'
def test_compute_fixed_hash_terminates(): a = [] b = [a] a.append(b) code = compute_fixed_hash(a) assert code == 'cffaee424a62cd1893825a5811c34b8d' c = [] d = [c] c.append(d) code = compute_fixed_hash(c) assert code == 'cffaee424a62cd1893825a5811c34b8d'
def args_valid(self, last_run_args=None, current_args=None): """ :return: True if the experiment arguments have not changed False if they have changed None if it cannot be determined because arguments are not hashable objects. """ if last_run_args is None: # Cast to dict (from OrderedDict) because different arg order shouldn't matter last_run_args = self.info.get_field(ExpInfoFields.ARGS) # A list of 2-tuples if current_args is None: current_args = dict(self.get_experiment().get_args()) try: return compute_fixed_hash(dict(last_run_args), try_objects=True) == compute_fixed_hash(dict(current_args), try_objects=True) except NotImplementedError: # Happens when we have unhashable arguments return None
def test_compute_fixed_hash(): # Not really sure why the fixed hash differes between python 2 and 4 here (maybe something do do with changes to strings) complex_obj = [1, 'd', {'a': 4, 'b': np.arange(10)}, (7, list(range(10)))] original_code = compute_fixed_hash(complex_obj) expected_code = 'c9b83dd2e1099c3bcbb05e3c69327c72' if _IS_PYTHON_3 else '6c98fabc301361863f321f6149a8a12a' assert compute_fixed_hash(complex_obj) == original_code == expected_code complex_obj[2]['b'][6] = 0 expected_code = 'a783b1f098fca8ccaf977a3123be5ac4' if _IS_PYTHON_3 else '8aee1f739fc9a612ed72e14682026627' assert compute_fixed_hash(complex_obj) == expected_code != original_code complex_obj[2]['b'][6] = 6 # Revert to old value assert compute_fixed_hash(complex_obj) == original_code
def test_compute_fixed_hash(): # Not really sure why the fixed hash differes between python 2 and 4 here (maybe something do do with changes to strings) complex_obj = [1, 'd', {'a': 4, 'b': np.arange(10)}, (7, list(range(10)))] original_code = compute_fixed_hash(complex_obj) expected_code = 'c9b83dd2e1099c3bcbb05e3c69327c72' if _IS_PYTHON_3 else '6c98fabc301361863f321f6149a8a12a' assert compute_fixed_hash(complex_obj) == original_code == expected_code complex_obj[2]['b'][6]=0 expected_code = 'a783b1f098fca8ccaf977a3123be5ac4' if _IS_PYTHON_3 else '8aee1f739fc9a612ed72e14682026627' assert compute_fixed_hash(complex_obj) == expected_code != original_code complex_obj[2]['b'][6]=6 # Revert to old value assert compute_fixed_hash(complex_obj) == original_code
def get_function_hash_filename(fcn, argname_argvalue_list, create_dir_if_not = False): args_code = compute_fixed_hash(argname_argvalue_list) # TODO: Include function path in hash? Or module path, which would allow memos to be shareable. full_path = os.path.join(get_memo_dir(fcn), '{}.pkl'.format(args_code, )) if create_dir_if_not: make_file_dir(full_path) return full_path
def _get_record_rows_cached(record_id, headers, raise_display_errors, truncate_to, ignore_valid_keys = ()): """ We want to load the saved row only if: - The record is complete - :param record_id: :param headers: :return: """ cache_key = compute_fixed_hash((record_id, [h.value for h in headers], truncate_to, ignore_valid_keys)) path = get_artemis_data_path(os.path.join('_ui_cache', cache_key), make_local_dir=True) if os.path.exists(path): try: with open(path, 'rb') as f: record_rows = pickle.load(f) if len(record_rows)!=len(headers): os.remove(path) # This should never happen. But in case it somehow does, we just go ahead and compute again. else: return record_rows except: logging.warn('Failed to load cached record info: {}'.format(record_id)) info_plus_status = _get_record_rows(record_id=record_id, headers=headers+[ExpRecordDisplayFields.STATUS], raise_display_errors=raise_display_errors, truncate_to=truncate_to, ignore_valid_keys=ignore_valid_keys) record_rows, status = info_plus_status[:-1], info_plus_status[-1] if status == ExpStatusOptions.STARTED: # In this case it's still running (maybe) and we don't want to cache because it'll change return record_rows else: with open(path, 'wb') as f: pickle.dump(record_rows, f) return record_rows
def _filter_experiments(user_range, exp_record_dict): if user_range in exp_record_dict: is_in = [k==user_range for k in exp_record_dict] else: number_range = interpret_numbers(user_range) if number_range is not None: # experiment_ids = [experiment_list[i] for i in number_range] is_in = [i in number_range for i in xrange(len(exp_record_dict))] elif user_range == 'all': # experiment_ids = experiment_list is_in = [True]*len(exp_record_dict) elif user_range.startswith('has:'): phrase = user_range[len('has:'):] # experiment_ids = [exp_id for exp_id in experiment_list if phrase in exp_id] is_in = [phrase in exp_id for exp_id in exp_record_dict] elif user_range.startswith('1diff:'): # select experiments whose arguments differ by one element from the selected experiments base_range = user_range[len('1diff:'):] base_range_exps = select_experiments(base_range, exp_record_dict) # list<experiment_id> all_exp_args_hashes = {eid: set(compute_fixed_hash(a) for a in load_experiment(eid).get_args().items()) for eid in exp_record_dict} # dict<experiment_id : set<arg_hashes>> # assert all_equal_length(all_exp_args_hashes.values()), 'All variants must have the same number of arguments' # Note: we diable this because we may have lists of experiments with different root functions. is_in = [any(len(all_exp_args_hashes[eid].difference(all_exp_args_hashes[other_eid]))<=1 for other_eid in base_range_exps) for eid in exp_record_dict] elif user_range.startswith('hasnot:'): phrase = user_range[len('hasnot:'):] # experiment_ids = [exp_id for exp_id in experiment_list if phrase not in exp_id] is_in = [phrase not in exp_id for exp_id in exp_record_dict] elif user_range in ('unfinished', 'invalid', 'corrupt'): # Return all experiments where all records are unfinished/invalid/corrupt record_filters = _filter_records(user_range, exp_record_dict) # experiment_ids = [exp_id for exp_id in experiment_list if len(record_filters[exp_id])] is_in = [all(record_filters[exp_id]) for exp_id in exp_record_dict] else: raise Exception("Don't know how to use input '{}' to select experiments".format(user_range)) return OrderedDict((exp_id, exp_is_in) for exp_id, exp_is_in in izip_equal(exp_record_dict, is_in))
def _get_record_rows_cached(record_id, headers, raise_display_errors, truncate_to, ignore_valid_keys = ()): """ We want to load the saved row only if: - The record is complete - :param record_id: :param headers: :return: """ cache_key = compute_fixed_hash((record_id, [h.value for h in headers], truncate_to, ignore_valid_keys)) path = get_artemis_data_path(os.path.join('_ui_cache', cache_key), make_local_dir=True) if os.path.exists(path): try: with open(path, 'rb') as f: record_rows = pickle.load(f) if len(record_rows)!=len(headers): os.remove(path) # This should never happen. But in case it somehow does, we just go ahead and compute again. else: return record_rows except: logging.warn('Failed to load cached record info: {}'.format(record_id)) info_plus_status = _get_record_rows(record_id=record_id, headers=headers+[ExpRecordDisplayFields.STATUS], raise_display_errors=raise_display_errors, truncate_to=truncate_to, ignore_valid_keys=ignore_valid_keys) record_rows, status = info_plus_status[:-1], info_plus_status[-1] if status == ExpStatusOptions.STARTED: # In this case it's still running (maybe) and we don't want to cache because it'll change return record_rows else: with open(path, 'wb') as f: pickle.dump(record_rows, f) return record_rows
def get_function_hash_filename(fcn, argname_argvalue_list, create_dir_if_not = False): args_code = compute_fixed_hash(argname_argvalue_list) # TODO: Include function path in hash? Or module path, which would allow memos to be shareable. full_path = os.path.join(get_memo_dir(fcn), '{}.pkl'.format(args_code, )) if create_dir_if_not: make_file_dir(full_path) return full_path
def args_valid(self, last_run_args=None, current_args=None): """ :param Optional[OrderedDict] last_run_args: The arguments from the last run :param Optional[OrderedDict] current_args: The arguments from the current experiment in code :return: True if the experiment arguments have not changed False if they have changed None if it cannot be determined because arguments are not hashable objects. """ if last_run_args is None: # Cast to dict (from OrderedDict) because different arg order shouldn't matter last_run_args = self.get_args() # A list of 2-tuples if any(isinstance(v, UnPicklableArg) for k, v in last_run_args.items()): return None if current_args is None: current_args = dict(self.get_experiment().get_args()) try: return compute_fixed_hash(dict(last_run_args), try_objects=True) == compute_fixed_hash(dict(current_args), try_objects=True) except NotImplementedError: # Happens when we have unhashable arguments return None
def _filter_experiments(user_range, exp_record_dict, return_is_in = False): if user_range.startswith('~'): is_in = _filter_experiments(user_range=user_range[1:], exp_record_dict=exp_record_dict, return_is_in=True) is_in = [not r for r in is_in] else: if user_range in exp_record_dict: is_in = [k==user_range for k in exp_record_dict] else: number_range = interpret_numbers(user_range) if number_range is not None: is_in = [i in number_range for i in xrange(len(exp_record_dict))] elif user_range == 'all': is_in = [True]*len(exp_record_dict) elif user_range.startswith('has:'): phrase = user_range[len('has:'):] is_in = [phrase in exp_id for exp_id in exp_record_dict] elif user_range.startswith('1diff:'): base_range = user_range[len('1diff:'):] base_range_exps = select_experiments(base_range, exp_record_dict) # list<experiment_id> all_exp_args_hashes = {eid: set(compute_fixed_hash(a) for a in load_experiment(eid).get_args().items()) for eid in exp_record_dict} # dict<experiment_id : set<arg_hashes>> is_in = [any(len(all_exp_args_hashes[eid].difference(all_exp_args_hashes[other_eid]))<=1 for other_eid in base_range_exps) for eid in exp_record_dict] elif user_range.startswith('hasnot:'): phrase = user_range[len('hasnot:'):] is_in = [phrase not in exp_id for exp_id in exp_record_dict] elif user_range in ('unfinished', 'invalid', 'corrupt'): # Return all experiments where all records are unfinished/invalid/corrupt record_filters = _filter_records(user_range, exp_record_dict) is_in = [all(record_filters[exp_id]) for exp_id in exp_record_dict] elif user_range == 'started': is_in = [has_experiment_record(exp_id) for exp_id in exp_record_dict] else: raise RecordSelectionError("Don't know how to use input '{}' to select experiments".format(user_range)) if return_is_in: return is_in else: return OrderedDict((exp_id, exp_is_in) for exp_id, exp_is_in in izip_equal(exp_record_dict, is_in))
def test_dictarraylist(): # We build the same thing in four different ways a = Duck() a['a', 'aa1'] = 1 a['a', 'aa2'] = 2 a['b', 0, 'subfield1'] = 4 a['b', 0, 'subfield2'] = 5 a['b', 1, 'subfield1'] = 6 a['b', 1, 'subfield2'] = 7 a['b', 2, 'subfield2'] = 9 # Note: Out of order assignment here! a['b', 2, 'subfield1'] = 8 b = Duck.from_struct(a.to_struct()) assert a.to_struct() == b.to_struct() c = Duck() c['a', :] = OrderedDict([('aa1', 1), ('aa2', 2)]) c['b', next, :] = OrderedDict([('subfield1', 4), ('subfield2', 5)]) c['b', next, :] = OrderedDict([('subfield1', 6), ('subfield2', 7)]) c['b', next, :] = OrderedDict([('subfield2', 9), ('subfield1', 8)]) d = Duck() d['a', :] = OrderedDict([('aa1', 1), ('aa2', 2)]) d['b', :, :] = [ OrderedDict([('subfield1', 4), ('subfield2', 5)]), OrderedDict([('subfield1', 6), ('subfield2', 7)]), OrderedDict([('subfield2', 9), ('subfield1', 8)]) ] e = Duck() e['a', ...] = OrderedDict([('aa1', 1), ('aa2', 2)]) e['b', ...] = [ OrderedDict([('subfield1', 4), ('subfield2', 5)]), OrderedDict([('subfield1', 6), ('subfield2', 7)]), OrderedDict([('subfield2', 9), ('subfield1', 8)]) ] f = a[:] g = a[:, :] for i, same_struct in enumerate([a, b, c, d, e, f, g]): print('Test {}'.format(i)) assert a.to_struct() == same_struct.to_struct() assert a == same_struct assert compute_fixed_hash(a.to_struct()) == compute_fixed_hash( b.to_struct()) assert same_struct['a', 'aa2'] == 2 assert list(same_struct['b', 1, :].values()) == [6, 7] assert list(same_struct['b', :, 'subfield1'].values()) == [4, 6, 8] assert same_struct['b', :, :].deepvalues() == [[4, 5], [6, 7], [ 9, 8 ]] # Note that the order-switching gets through. assert same_struct['b', :, ['subfield1', 'subfield2']].deepvalues() == [[4, 5], [6, 7], [8, 9]] assert same_struct['b', :, :].to_struct() == [ OrderedDict([('subfield1', 4), ('subfield2', 5)]), OrderedDict([('subfield1', 6), ('subfield2', 7)]), OrderedDict([('subfield2', 9), ('subfield1', 8)]) ] assert same_struct['b', 1:, :].to_struct() == [ OrderedDict([('subfield1', 6), ('subfield2', 7)]), OrderedDict([('subfield2', 9), ('subfield1', 8)]) ] with pytest.raises(KeyError): x = same_struct['a', 'aa3'] with pytest.raises(KeyError): x = same_struct['a', 1, 'subfield1'] with pytest.raises(IndexError): x = same_struct['b', 4, 'subfield1'] with pytest.raises( KeyError ): # This should raise an error because key 'a' does not have subkeys 1, 'subfield1' x = same_struct[:, 1, 'subfield1'] assert np.array_equal( same_struct['b'].to_array(), [[4, 5], [6, 7], [8, 9]]) # Note... the order is corrected. with pytest.raises(InvalidKeyError): assert np.array_equal( same_struct.to_array()) # Note... the order is corrected. new_struct = same_struct.arrayify_axis(axis=1, subkeys=('b', )) assert np.array_equal(new_struct['b', 'subfield1'], [4, 6, 8]) assert np.array_equal(new_struct['b', 'subfield2'], [5, 7, 9])
def test_dictarraylist(): # We build the same thing in four different ways a = Duck() a['a', 'aa1'] = 1 a['a', 'aa2'] = 2 a['b', 0, 'subfield1'] = 4 a['b', 0, 'subfield2'] = 5 a['b', 1, 'subfield1'] = 6 a['b', 1, 'subfield2'] = 7 a['b', 2, 'subfield2'] = 9 # Note: Out of order assignment here! a['b', 2, 'subfield1'] = 8 b = Duck.from_struct(a.to_struct()) assert a.to_struct() == b.to_struct() c = Duck() c['a', :] = OrderedDict([('aa1', 1), ('aa2', 2)]) c['b', next, :] = OrderedDict([('subfield1', 4), ('subfield2', 5)]) c['b', next, :] = OrderedDict([('subfield1', 6), ('subfield2', 7)]) c['b', next, :] = OrderedDict([('subfield2', 9), ('subfield1', 8)]) d = Duck() d['a', :] = OrderedDict([('aa1', 1), ('aa2', 2)]) d['b', :, :] = [OrderedDict([('subfield1', 4), ('subfield2', 5)]), OrderedDict([('subfield1', 6), ('subfield2', 7)]), OrderedDict([('subfield2', 9), ('subfield1', 8)])] e = Duck() e['a', ...] = OrderedDict([('aa1', 1), ('aa2', 2)]) e['b', ...] = [OrderedDict([('subfield1', 4), ('subfield2', 5)]), OrderedDict([('subfield1', 6), ('subfield2', 7)]), OrderedDict([('subfield2', 9), ('subfield1', 8)])] f = a[:] g = a[:, :] for i, same_struct in enumerate([a, b, c, d, e, f, g]): print ('Test {}'.format(i)) assert a.to_struct()==same_struct.to_struct() assert a==same_struct assert compute_fixed_hash(a.to_struct()) == compute_fixed_hash(b.to_struct()) assert same_struct['a', 'aa2'] == 2 assert list(same_struct['b', 1, :].values()) == [6, 7] assert list(same_struct['b', :, 'subfield1'].values()) == [4, 6, 8] assert same_struct['b', :, :].deepvalues() == [[4, 5], [6, 7], [9, 8]] # Note that the order-switching gets through. assert same_struct['b', :, ['subfield1', 'subfield2']].deepvalues() == [[4, 5], [6, 7], [8, 9]] assert same_struct['b', :, :].to_struct() == [OrderedDict([('subfield1', 4), ('subfield2', 5)]), OrderedDict([('subfield1', 6), ('subfield2', 7)]), OrderedDict([('subfield2', 9), ('subfield1', 8)])] assert same_struct['b', 1:, :].to_struct() == [OrderedDict([('subfield1', 6), ('subfield2', 7)]), OrderedDict([('subfield2', 9), ('subfield1', 8)])] with pytest.raises(KeyError): x = same_struct['a', 'aa3'] with pytest.raises(KeyError): x = same_struct['a', 1, 'subfield1'] with pytest.raises(IndexError): x = same_struct['b', 4, 'subfield1'] with pytest.raises(KeyError): # This should raise an error because key 'a' does not have subkeys 1, 'subfield1' x = same_struct[:, 1, 'subfield1'] assert np.array_equal(same_struct['b'].to_array(), [[4, 5], [6, 7], [8, 9]]) # Note... the order is corrected. with pytest.raises(InvalidKeyError): assert np.array_equal(same_struct.to_array()) # Note... the order is corrected. new_struct = same_struct.arrayify_axis(axis=1, subkeys=('b',)) assert np.array_equal(new_struct['b', 'subfield1'], [4, 6, 8]) assert np.array_equal(new_struct['b', 'subfield2'], [5, 7, 9])
def _filter_experiments(user_range, exp_record_dict, return_is_in=False): if '|' in user_range: is_in = [ any(xs) for xs in zip(*(_filter_experiments( subrange, exp_record_dict, return_is_in=True) for subrange in user_range.split('|'))) ] elif '&' in user_range: is_in = [ all(xs) for xs in zip(*(_filter_experiments( subrange, exp_record_dict, return_is_in=True) for subrange in user_range.split('&'))) ] elif user_range.startswith('~'): is_in = _filter_experiments(user_range=user_range[1:], exp_record_dict=exp_record_dict, return_is_in=True) is_in = [not r for r in is_in] else: if user_range in exp_record_dict: is_in = [k == user_range for k in exp_record_dict] else: number_range = interpret_numbers(user_range) if number_range is not None: is_in = [ i in number_range for i in xrange(len(exp_record_dict)) ] elif user_range == 'all': is_in = [True] * len(exp_record_dict) elif user_range.startswith('has:'): phrase = user_range[len('has:'):] is_in = [phrase in exp_id for exp_id in exp_record_dict] elif user_range.startswith('tag:'): tag = user_range[len('tag:'):] is_in = [ tag in load_experiment(exp_id).get_tags() for exp_id in exp_record_dict ] elif user_range.startswith('1diff:'): base_range = user_range[len('1diff:'):] base_range_exps = select_experiments( base_range, exp_record_dict) # list<experiment_id> all_exp_args_hashes = { eid: set( compute_fixed_hash(a) for a in load_experiment(eid).get_args().items()) for eid in exp_record_dict } # dict<experiment_id : set<arg_hashes>> is_in = [ any( len(all_exp_args_hashes[eid].difference( all_exp_args_hashes[other_eid])) <= 1 for other_eid in base_range_exps) for eid in exp_record_dict ] elif user_range.startswith('hasnot:'): phrase = user_range[len('hasnot:'):] is_in = [phrase not in exp_id for exp_id in exp_record_dict] elif user_range in ( 'unfinished', 'invalid', 'corrupt' ): # Return all experiments where all records are unfinished/invalid/corrupt record_filters = _filter_records(user_range, exp_record_dict) is_in = [ all(record_filters[exp_id]) for exp_id in exp_record_dict ] elif user_range == 'started': is_in = [ has_experiment_record(exp_id) for exp_id in exp_record_dict ] else: raise RecordSelectionError( "Don't know how to use input '{}' to select experiments". format(user_range)) if return_is_in: return is_in else: return OrderedDict( (exp_id, exp_is_in) for exp_id, exp_is_in in izip_equal(exp_record_dict, is_in))
def get_function_hash_filename(fcn, argname_argvalue_list): args_code = compute_fixed_hash(argname_argvalue_list) # TODO: Include function path in hash? Or module path, which would allow memos to be shareable. return os.path.join(MEMO_DIR, '%s-%s.pkl' % (fcn.__name__, args_code))