def model_props_sorted(tree, branch, data, depth): if tree[branch[-1]] != {}: return tree # Update data properties props = {} #copy.deepcopy(data) for p in data_props['data_properties']: props[p] = copy.deepcopy(data_props.get(p, None)) for i, p in enumerate( data_props['analysis_params']['sort_params'][:depth]): props[p] = branch[i] props['data_name_format'] = [props['data_name_format'][0]] + ( [props['analysis_params']['sort_params'][0]] + ['']) Data_Process().format(props, file_update=True) tree[branch[-1]] = props return tree
def __init__( self, data_props={ 'data_files': '*.npz', 'data_types': ['sites', 'observables', 'model_props'], 'data_typed': 'dict_split', 'data_format': 'npz', 'data_dir': 'dataset', }): display(print_it=True, time_it=False, m='Model Analysis...') # Import Data data = Data_Process().importer(data_props, upconvert=False, disp=False) print(data_props.get('data_dir')) print(data) # Used Typed Data Format self.data = data[2] if data is not None else data self.data_props = data_props return
def plot(self, data, model_props, plot_args, new_plot=True): # Check if Plot already exists: if not new_plot: if Data_Process().importer(data_props, data_files=model_props.get( 'data_file', 'None'), format='pdf') is not None: return plot_obj = MonteCarloPlot( { k: self.data_props['observe_props'].get(k, [True, k]) for k in data.keys() }, model_props, **plot_args) # Plot Data plot_obj.MC_plotter(data, **plot_args) # Save Figures if model_props.get('data_save', True): plot_obj.plot_save(model_props, read_write='ow', fig_size=(18, 12)) return
def reduce_sorted(tree, branch, data, root, model_props, rep): keys_func = lambda k, d: np.repeat(k, np.shape(d)[0] // len(k)) data_func = lambda data, N0, N: np.concatenate( tuple([d[N0:N0 + N] for d in data]), axis=0) for b in branch: data = data[b] model_props = model_props.get(b, model_props) keys = sorted(data.keys()) data_concat = [data[k] for k in keys] # Check if Data Exists if tree[b] != root: return tree dim_reduc_params = data_props['analysis_params'][ 'dim_reduc_params'].copy() temp_name = '_'.join([ 'rep', rep[0], '_'.join(p for p in parameters[1:len(branch)]) + ('_'.join(str(b) for b in branch[1:])) ]) + ('.' + model_props['data_format'][rep]) data_temp = Data_Process().importer( model_props, data_files=model_props['data_file'] + temp_name, disp=True) Ns = int(np.shape(data_concat)[1] * dim_reduc_params.pop('Ns')) N0 = 0 data_concat = data_func(data_concat, N0, Ns) if data_temp is None: print('New %s Data at ' % rep, parameters[:len(branch)], branch) #N0 = int(random.random()*np.shape(data)[1]*(1-Ns)) # Ns = np.array(random.sample(, # int(np.shape(data)[1]*Ns))) #input('Prepare for data concatenation after random choice') if True: data_reduc = dim_reduce(data_concat, rep=rep, **dim_reduc_params) Data_Process().exporter({temp_name.split('.')[0]: data}, model_props, read_write='a') else: print( 'Previous %s Data at ' % rep, parameters[:len(branch)], branch, np.shape(data_temp[0][model_props['data_file'] + temp_name.split('.')[0]])) data_reduc = data_temp[0][model_props['data_file'] + temp_name.split('.')[0]] keys = keys_func(keys, data_reduc) tree[b] = (keys, data_reduc, [int(np.mean(d)) for d in data_concat]) return tree
def sort(self, data, parameters, data_props, data_types=None): # # Sort by params as {param0_i: {param1_j: [param2_k values]}} # # i.e) q, L, T def tree_sort(parameters, data_props, data, branch_func, root=[], depth=None, *args): # Create Branches branches = { k: b[:depth] for k, b in set_branch(data_props, parameters).items() } # Create Tree tree = get_tree(branches.values(), root) # Set Tree for k in branches.keys(): set_tree(tree, branches[k], data.get(k, data), branch_func, *args) return tree, branches def get_branch(tree, branch, depth=-1): for b in branch[:depth - 1]: tree = tree[b] return list(tree[branch[depth]].keys()) def set_branch(data_props, parameters): branch = {} for k, d in data_props.items(): branch[k] = [hashable(d[p]) for p in parameters] return branch def get_tree(branches, root=[]): if max([len(b) for b in branches]) > 0: tree = {} b0_list = [] for b in branches: b0_list.extend(np.atleast_1d(b[0])) for b0 in set(b0_list): tree[b0] = get_tree( [b[1:] for b in branches if b0 in np.atleast_1d(b[0])], root) else: if isinstance(root, np.ndarray): tree = np.copy(root) else: tree = root.copy() return tree def set_tree(tree, branch, data, branch_func=lambda t, b, d, *a: d, *args): for b in branch[:-1]: tree = tree[b] tree = branch_func(tree, branch, data, *args) def sites_sorted(tree, branch, data, root): for i, b in enumerate(np.atleast_1d(branch[-1])): if tree[b] == root: tree[b] = data[i] else: tree[b] = np.append(tree[b], data[i], axis=0) return tree def observables_sorted(tree, branch, data, root, depth): for i, b in enumerate(np.atleast_1d(branch[-1])): for j in range(np.shape(data)[0]): for k in data[j].keys(): if k not in tree[b].keys(): tree[b][k] = data[j][k][i] else: tree[b][k] = np.append( np.atleast_1d(tree[b][k]), np.atleast_1d(dim_reduct(data[j][k][i])), axis=-1) return tree def model_props_sorted(tree, branch, data, depth): if tree[branch[-1]] != {}: return tree # Update data properties props = {} #copy.deepcopy(data) for p in data_props['data_properties']: props[p] = copy.deepcopy(data_props.get(p, None)) for i, p in enumerate( data_props['analysis_params']['sort_params'][:depth]): props[p] = branch[i] props['data_name_format'] = [props['data_name_format'][0]] + ( [props['analysis_params']['sort_params'][0]] + ['']) Data_Process().format(props, file_update=True) tree[branch[-1]] = props return tree def reduce_sorted(tree, branch, data, root, model_props, rep): keys_func = lambda k, d: np.repeat(k, np.shape(d)[0] // len(k)) data_func = lambda data, N0, N: np.concatenate( tuple([d[N0:N0 + N] for d in data]), axis=0) for b in branch: data = data[b] model_props = model_props.get(b, model_props) keys = sorted(data.keys()) data_concat = [data[k] for k in keys] # Check if Data Exists if tree[b] != root: return tree dim_reduc_params = data_props['analysis_params'][ 'dim_reduc_params'].copy() temp_name = '_'.join([ 'rep', rep[0], '_'.join(p for p in parameters[1:len(branch)]) + ('_'.join(str(b) for b in branch[1:])) ]) + ('.' + model_props['data_format'][rep]) data_temp = Data_Process().importer( model_props, data_files=model_props['data_file'] + temp_name, disp=True) Ns = int(np.shape(data_concat)[1] * dim_reduc_params.pop('Ns')) N0 = 0 data_concat = data_func(data_concat, N0, Ns) if data_temp is None: print('New %s Data at ' % rep, parameters[:len(branch)], branch) #N0 = int(random.random()*np.shape(data)[1]*(1-Ns)) # Ns = np.array(random.sample(, # int(np.shape(data)[1]*Ns))) #input('Prepare for data concatenation after random choice') if True: data_reduc = dim_reduce(data_concat, rep=rep, **dim_reduc_params) Data_Process().exporter({temp_name.split('.')[0]: data}, model_props, read_write='a') else: print( 'Previous %s Data at ' % rep, parameters[:len(branch)], branch, np.shape(data_temp[0][model_props['data_file'] + temp_name.split('.')[0]])) data_reduc = data_temp[0][model_props['data_file'] + temp_name.split('.')[0]] keys = keys_func(keys, data_reduc) tree[b] = (keys, data_reduc, [int(np.mean(d)) for d in data_concat]) return tree # Sort by params as {param0_i: {param1_j: [param2_k values]}} # i.e) q, L, T # Check if sorted data exists data_props = copy.deepcopy(data_props) if data_types is None: data_types = [ s for s in data_props['data_types'] if s not in ['sorted'] ] if ('tsne' in data_types or 'pca' in data_types) and ('sites_sorted' not in data.keys()): data_types = ['sites'] + [ s for s in data_types if s not in ['sites', 'sorted'] ] file_header = os.path.split(data_props['data_dir'])[0] file_format = lambda s: file_header + s if data.get('sorted'): for k, v in (data['sorted'].copy()).items(): data[k.split(file_header)[1] + 'sorted'] = v.copy() data['sorted'].pop(k) data.pop('sorted') return data root = {} depth = {} args = {} data_sorted = {} branch_func = {} for s in data_types: s_key = s + '_sorted' s_data = data.get(s, {}) root[s] = [] depth[s] = None args[s] = (root[s], ) branch_func[s] = locals().get(s_key) export = False if s == 'sites': root[s] = [] depth[s] = None elif s == 'observables': root[s] = {} depth[s] = None args[s] = (root[s], depth[s]) elif s == 'model_props': root[s] = {} depth[s] = -2 args[s] = (depth[s], ) elif s in ['tsne', 'pca']: root[s] = [] depth[s] = -1 args[s] += (data['model_props_sorted'], s) s_key = s s_data = data['sites_sorted'] branch_func[s] = locals().get('reduce_sorted') else: root[s] = [] depth[s] = None args[s] = (depth[s], ) print('Sorting ', s) if data.get(s_key) is not None: if file_header in data[s_key].keys(): data[s_key] = data[s_key][file_header] continue (data[s_key], data_props['branches']) = tree_sort( parameters, data['model_props'], s_data, branch_func[s], root[s], depth[s], *args[s]) # Save Sorted Data Data_Process().exporter({file_format(s_key): data[s_key]}, data_props, read_write='ow', export=export) return data
def observables(self, data, observables_functions, data_props): def plot_observables(data, key): self.plot( { #'observables':data['observables'][key], 'observables_mean': data['observables'][key] }, data['model_props'][key], { 'arr_0': ['T', data['model_props'][key]['T']], 'arr_1': [ 'algorithm', np.atleast_1d([ p['algorithm'] for p in data['model_props'][key]['iter_props'] ]) ] }) return def plot_sites(data, key): data_sites = {('sites', t): d[-1, :] for d, t in zip(np.atleast_3d(data['sites'][key]), data['model_props'][key]['T'])} self.plot( { #'observables':data['observables'][key], 'configurations': data_sites }, data['model_props'][key], { 'arr_0': ['T', data['model_props'][key]['T']], 'arr_1': [ 'algorithm', np.atleast_1d([ p['algorithm'] for p in data['model_props'][key]['iter_props'] ]) ] }) return if not data.get('observables'): data['observables'] = {} # Update Plotting Properties for s in set( list(data_props['observe_props'].keys()) + data_props.get('data_types', [])): if s in ['sorted']: continue elif s in ['sites']: t = 'configurations' data_props['observe_props']['configurations'] = [True, s] elif s in ['tsne', 'pca']: t = s data_props['observe_props'][s] = [True, s] else: t = s + '_sorted' data_props['observe_props'][t] = data_props['observe_props'].get( t, data_props['observe_props'].get(s, [True, s])) for k, sites in data['sites'].items(): # Update data properties data['model_props'][k].update(data_props) # Check if Data exists if data.get('observables', {}).get(k) is not None: # Plot Data if data_props.get('plot') and False: plot_observables(data, k) #if data_props.get('plot'): plot_sites(data, k) continue model_props = data['model_props'][k] # Measure Data m = Model(model=model_props, observe=observables_functions) data['observables'][k] = self.measure( sites, model_props['neighbour_sites'], model_props['T'], m.observables_functions) # Save Data if model_props.get('data_save', True): Data_Process().exporter( {'observables': data['observables'][k]}, model_props, file=k, format=model_props['data_format']['observables']) # Plot Data if data_props.get('plot') and False: plot_observables(data, k) #if data_props.get('plot'): plot_sites(data, k) return
def process( X, data_params, data_rep, data_type, alg_params={ 'N0': None, 'N': 2, 'perp': 30.0, 'pca': True, 'data_dir': 'dataset/tsne/', 'data_reps': ['tsne', 'pca'], 'data_name_format': ['', 'pca', 'perp', 'N0', ''] }): data_params = data_params.copy() Data_Process().format(data_params, initials=False) if data_params['file_dir'] == '': data_params['file_dir'] = data_params['data_dir'] # Import Data data, data_sizes, _, _ = Data_Process().importer( data_params, data_typing='dict', data_lists=True, upconvert=True, directory=data_params['file_dir']) display( True, True, 'Data Imported... \n' + str(data_sizes) + '\n' + (data_params['data_file'] + '\n')) # Setup Data # Change keys structure for appropriate plot labels (i.e. L_XX size labels) ind_data = [-3, None] ind_type = [0, None] data_typed = dict_modify(data, data_types_config + data_types_temps, f=lambda k, v: v.copy(), i=ind_data, j=ind_type) data_sizes = dict_modify(data_sizes, data_types_config + data_types_temps, f=lambda k, v: v, i=ind_data, j=ind_type) data_keys = {t: sorted(list(d.keys())) for t, d in data_typed.items()} Y = { r: dict_modify(data, data_types_config, f=lambda k, v: [], i=ind_data, j=ind_type) for r in data_reps } Y2 = Y.copy() data_types_config = [t[slice(*ind_type)] for t in data_types_config] data_types_temps = [t[slice(*ind_type)] for t in data_types_temps] # Setup Plotting plot_keys = {} plot_bool = {} for r in data_reps: for t in Y[r].keys(): plot_keys[r + '_' + t] = data_keys[t] plot_bool[r + '_' + t] = True Data_Process().plot_close() Data_Proc = Data_Process(plot_keys, plot_bool) comp = lambda x, i: {k: v[:, i] for k, v in x.items() if np.any(v)} # tSNE and PCA Analysis for t in sorted(data_types_config): for r in data_reps: if r == 'pca': continue # Check if Data Exists params = data_params.copy() file_header = r + '_' + t file_name = file_header + data_params['data_file'] params['data_files'] = file_name + '.' + data_params['data_format'] data = Data_Proc.importer(params, data_obj_format='dict', format='npz') if data is not None: print('Previous Data Found for', file_name) Y[r][t] = data[0][file_name].item() Data_Proc.plotter(comp(Y[r][t], 1), comp(Y[r][t], 0), plot_props(Y[r][t].keys(), r, t[-5:]), data_key=r + '_' + t) else: print('New Data for', file_name) for k in data_keys[t]: print(r, t, k) Y[r][t][k] = dim_reduce(data=data_typed[t][k], N=data_params['N'], N0=data_params['N0'], perp=data_params['perp'], rep=r, pca=data_params['pca']) Data_Proc.exporter({file_header: Y[r][t]}, data_params) Data_Proc.plotter(comp(Y[r][t], 1), comp(Y[r][t], 0), plot_props(Y[r][t].keys(), r, t[-5:]), data_key=file_header) Data_Proc.plot_save(data_params, read_write='a')
} data_reps = args.data_reps data_params = { 'data_files': data_files, 'data_types': data_types_configs + data_types_temps, 'data_format': 'npz', 'data_obj_format': data_obj_format, 'data_dir': 'dataset/tsne/', 'one_hot': [False], 'data_name_format': ['', 'pca', 'perp', 'N0', ''] } data_params['data_sets'] = configs_sets + temperatures_sets data_params.update(vars(args)) Data_Process().format(data_params, initials=False) if data_params['file_dir'] == '': data_params['file_dir'] = data_params['data_dir'] # Import Data data, data_sizes, _, _ = Data_Process().importer( data_params, data_typing='dict', data_lists=True, upconvert=True, directory=data_params['file_dir'], disp=True) display(