Ejemplo n.º 1
0
        def model_props_sorted(tree, branch, data, depth):

            if tree[branch[-1]] != {}:
                return tree

            # Update data properties
            props = {}  #copy.deepcopy(data)

            for p in data_props['data_properties']:
                props[p] = copy.deepcopy(data_props.get(p, None))

            for i, p in enumerate(
                    data_props['analysis_params']['sort_params'][:depth]):
                props[p] = branch[i]

            props['data_name_format'] = [props['data_name_format'][0]] + (
                [props['analysis_params']['sort_params'][0]] + [''])

            Data_Process().format(props, file_update=True)
            tree[branch[-1]] = props
            return tree
Ejemplo n.º 2
0
    def __init__(
        self,
        data_props={
            'data_files': '*.npz',
            'data_types': ['sites', 'observables', 'model_props'],
            'data_typed': 'dict_split',
            'data_format': 'npz',
            'data_dir': 'dataset',
        }):

        display(print_it=True, time_it=False, m='Model Analysis...')

        # Import Data
        data = Data_Process().importer(data_props, upconvert=False, disp=False)

        print(data_props.get('data_dir'))
        print(data)

        # Used Typed Data Format
        self.data = data[2] if data is not None else data
        self.data_props = data_props

        return
Ejemplo n.º 3
0
    def plot(self, data, model_props, plot_args, new_plot=True):

        # Check if Plot already exists:
        if not new_plot:
            if Data_Process().importer(data_props,
                                       data_files=model_props.get(
                                           'data_file', 'None'),
                                       format='pdf') is not None:
                return

        plot_obj = MonteCarloPlot(
            {
                k: self.data_props['observe_props'].get(k, [True, k])
                for k in data.keys()
            }, model_props, **plot_args)

        # Plot Data
        plot_obj.MC_plotter(data, **plot_args)

        # Save Figures
        if model_props.get('data_save', True):
            plot_obj.plot_save(model_props, read_write='ow', fig_size=(18, 12))

        return
Ejemplo n.º 4
0
        def reduce_sorted(tree, branch, data, root, model_props, rep):

            keys_func = lambda k, d: np.repeat(k, np.shape(d)[0] // len(k))
            data_func = lambda data, N0, N: np.concatenate(
                tuple([d[N0:N0 + N] for d in data]), axis=0)

            for b in branch:
                data = data[b]
                model_props = model_props.get(b, model_props)

            keys = sorted(data.keys())
            data_concat = [data[k] for k in keys]

            # Check if Data Exists
            if tree[b] != root:
                return tree

            dim_reduc_params = data_props['analysis_params'][
                'dim_reduc_params'].copy()
            temp_name = '_'.join([
                'rep', rep[0], '_'.join(p for p in parameters[1:len(branch)]) +
                ('_'.join(str(b) for b in branch[1:]))
            ]) + ('.' + model_props['data_format'][rep])
            data_temp = Data_Process().importer(
                model_props,
                data_files=model_props['data_file'] + temp_name,
                disp=True)

            Ns = int(np.shape(data_concat)[1] * dim_reduc_params.pop('Ns'))
            N0 = 0
            data_concat = data_func(data_concat, N0, Ns)
            if data_temp is None:
                print('New %s Data at ' % rep, parameters[:len(branch)],
                      branch)
                #N0 = int(random.random()*np.shape(data)[1]*(1-Ns))
                # Ns = np.array(random.sample(,
                # int(np.shape(data)[1]*Ns)))

                #input('Prepare for data concatenation after random choice')
                if True:
                    data_reduc = dim_reduce(data_concat,
                                            rep=rep,
                                            **dim_reduc_params)

                Data_Process().exporter({temp_name.split('.')[0]: data},
                                        model_props,
                                        read_write='a')

            else:
                print(
                    'Previous %s Data at ' % rep, parameters[:len(branch)],
                    branch,
                    np.shape(data_temp[0][model_props['data_file'] +
                                          temp_name.split('.')[0]]))

                data_reduc = data_temp[0][model_props['data_file'] +
                                          temp_name.split('.')[0]]

            keys = keys_func(keys, data_reduc)
            tree[b] = (keys, data_reduc,
                       [int(np.mean(d)) for d in data_concat])
            return tree
Ejemplo n.º 5
0
    def sort(self, data, parameters, data_props, data_types=None):

        # # Sort by params as {param0_i: {param1_j: [param2_k values]}}
        # # i.e) q, L, T

        def tree_sort(parameters,
                      data_props,
                      data,
                      branch_func,
                      root=[],
                      depth=None,
                      *args):
            # Create Branches
            branches = {
                k: b[:depth]
                for k, b in set_branch(data_props, parameters).items()
            }

            # Create Tree
            tree = get_tree(branches.values(), root)

            # Set Tree
            for k in branches.keys():
                set_tree(tree, branches[k], data.get(k, data), branch_func,
                         *args)
            return tree, branches

        def get_branch(tree, branch, depth=-1):
            for b in branch[:depth - 1]:
                tree = tree[b]
            return list(tree[branch[depth]].keys())

        def set_branch(data_props, parameters):
            branch = {}
            for k, d in data_props.items():
                branch[k] = [hashable(d[p]) for p in parameters]
            return branch

        def get_tree(branches, root=[]):
            if max([len(b) for b in branches]) > 0:
                tree = {}
                b0_list = []
                for b in branches:
                    b0_list.extend(np.atleast_1d(b[0]))
                for b0 in set(b0_list):
                    tree[b0] = get_tree(
                        [b[1:] for b in branches if b0 in np.atleast_1d(b[0])],
                        root)
            else:
                if isinstance(root, np.ndarray):
                    tree = np.copy(root)
                else:
                    tree = root.copy()
            return tree

        def set_tree(tree,
                     branch,
                     data,
                     branch_func=lambda t, b, d, *a: d,
                     *args):
            for b in branch[:-1]:
                tree = tree[b]
            tree = branch_func(tree, branch, data, *args)

        def sites_sorted(tree, branch, data, root):
            for i, b in enumerate(np.atleast_1d(branch[-1])):
                if tree[b] == root:
                    tree[b] = data[i]
                else:
                    tree[b] = np.append(tree[b], data[i], axis=0)
            return tree

        def observables_sorted(tree, branch, data, root, depth):
            for i, b in enumerate(np.atleast_1d(branch[-1])):
                for j in range(np.shape(data)[0]):
                    for k in data[j].keys():
                        if k not in tree[b].keys():
                            tree[b][k] = data[j][k][i]
                        else:
                            tree[b][k] = np.append(
                                np.atleast_1d(tree[b][k]),
                                np.atleast_1d(dim_reduct(data[j][k][i])),
                                axis=-1)
            return tree

        def model_props_sorted(tree, branch, data, depth):

            if tree[branch[-1]] != {}:
                return tree

            # Update data properties
            props = {}  #copy.deepcopy(data)

            for p in data_props['data_properties']:
                props[p] = copy.deepcopy(data_props.get(p, None))

            for i, p in enumerate(
                    data_props['analysis_params']['sort_params'][:depth]):
                props[p] = branch[i]

            props['data_name_format'] = [props['data_name_format'][0]] + (
                [props['analysis_params']['sort_params'][0]] + [''])

            Data_Process().format(props, file_update=True)
            tree[branch[-1]] = props
            return tree

        def reduce_sorted(tree, branch, data, root, model_props, rep):

            keys_func = lambda k, d: np.repeat(k, np.shape(d)[0] // len(k))
            data_func = lambda data, N0, N: np.concatenate(
                tuple([d[N0:N0 + N] for d in data]), axis=0)

            for b in branch:
                data = data[b]
                model_props = model_props.get(b, model_props)

            keys = sorted(data.keys())
            data_concat = [data[k] for k in keys]

            # Check if Data Exists
            if tree[b] != root:
                return tree

            dim_reduc_params = data_props['analysis_params'][
                'dim_reduc_params'].copy()
            temp_name = '_'.join([
                'rep', rep[0], '_'.join(p for p in parameters[1:len(branch)]) +
                ('_'.join(str(b) for b in branch[1:]))
            ]) + ('.' + model_props['data_format'][rep])
            data_temp = Data_Process().importer(
                model_props,
                data_files=model_props['data_file'] + temp_name,
                disp=True)

            Ns = int(np.shape(data_concat)[1] * dim_reduc_params.pop('Ns'))
            N0 = 0
            data_concat = data_func(data_concat, N0, Ns)
            if data_temp is None:
                print('New %s Data at ' % rep, parameters[:len(branch)],
                      branch)
                #N0 = int(random.random()*np.shape(data)[1]*(1-Ns))
                # Ns = np.array(random.sample(,
                # int(np.shape(data)[1]*Ns)))

                #input('Prepare for data concatenation after random choice')
                if True:
                    data_reduc = dim_reduce(data_concat,
                                            rep=rep,
                                            **dim_reduc_params)

                Data_Process().exporter({temp_name.split('.')[0]: data},
                                        model_props,
                                        read_write='a')

            else:
                print(
                    'Previous %s Data at ' % rep, parameters[:len(branch)],
                    branch,
                    np.shape(data_temp[0][model_props['data_file'] +
                                          temp_name.split('.')[0]]))

                data_reduc = data_temp[0][model_props['data_file'] +
                                          temp_name.split('.')[0]]

            keys = keys_func(keys, data_reduc)
            tree[b] = (keys, data_reduc,
                       [int(np.mean(d)) for d in data_concat])
            return tree

        # Sort by params as {param0_i: {param1_j: [param2_k values]}}
        # i.e) q, L, T

        # Check if sorted data exists
        data_props = copy.deepcopy(data_props)

        if data_types is None:
            data_types = [
                s for s in data_props['data_types'] if s not in ['sorted']
            ]

        if ('tsne' in data_types or 'pca'
                in data_types) and ('sites_sorted' not in data.keys()):
            data_types = ['sites'] + [
                s for s in data_types if s not in ['sites', 'sorted']
            ]

        file_header = os.path.split(data_props['data_dir'])[0]
        file_format = lambda s: file_header + s
        if data.get('sorted'):
            for k, v in (data['sorted'].copy()).items():
                data[k.split(file_header)[1] + 'sorted'] = v.copy()
                data['sorted'].pop(k)
            data.pop('sorted')
            return data

        root = {}
        depth = {}
        args = {}
        data_sorted = {}
        branch_func = {}
        for s in data_types:

            s_key = s + '_sorted'
            s_data = data.get(s, {})
            root[s] = []
            depth[s] = None
            args[s] = (root[s], )
            branch_func[s] = locals().get(s_key)
            export = False
            if s == 'sites':
                root[s] = []
                depth[s] = None
            elif s == 'observables':
                root[s] = {}
                depth[s] = None
                args[s] = (root[s], depth[s])
            elif s == 'model_props':
                root[s] = {}
                depth[s] = -2
                args[s] = (depth[s], )
            elif s in ['tsne', 'pca']:
                root[s] = []
                depth[s] = -1
                args[s] += (data['model_props_sorted'], s)
                s_key = s
                s_data = data['sites_sorted']
                branch_func[s] = locals().get('reduce_sorted')
            else:
                root[s] = []
                depth[s] = None
                args[s] = (depth[s], )

            print('Sorting ', s)
            if data.get(s_key) is not None:
                if file_header in data[s_key].keys():
                    data[s_key] = data[s_key][file_header]
                continue

            (data[s_key], data_props['branches']) = tree_sort(
                parameters, data['model_props'], s_data, branch_func[s],
                root[s], depth[s], *args[s])
            # Save Sorted Data
            Data_Process().exporter({file_format(s_key): data[s_key]},
                                    data_props,
                                    read_write='ow',
                                    export=export)

        return data
Ejemplo n.º 6
0
    def observables(self, data, observables_functions, data_props):
        def plot_observables(data, key):
            self.plot(
                {  #'observables':data['observables'][key],
                    'observables_mean': data['observables'][key]
                },
                data['model_props'][key],
                {
                    'arr_0': ['T', data['model_props'][key]['T']],
                    'arr_1': [
                        'algorithm',
                        np.atleast_1d([
                            p['algorithm']
                            for p in data['model_props'][key]['iter_props']
                        ])
                    ]
                })
            return

        def plot_sites(data, key):
            data_sites = {('sites', t): d[-1, :]
                          for d, t in zip(np.atleast_3d(data['sites'][key]),
                                          data['model_props'][key]['T'])}
            self.plot(
                {  #'observables':data['observables'][key],
                    'configurations': data_sites
                },
                data['model_props'][key],
                {
                    'arr_0': ['T', data['model_props'][key]['T']],
                    'arr_1': [
                        'algorithm',
                        np.atleast_1d([
                            p['algorithm']
                            for p in data['model_props'][key]['iter_props']
                        ])
                    ]
                })
            return

        if not data.get('observables'):
            data['observables'] = {}

        # Update Plotting Properties
        for s in set(
                list(data_props['observe_props'].keys()) +
                data_props.get('data_types', [])):
            if s in ['sorted']:
                continue
            elif s in ['sites']:
                t = 'configurations'
                data_props['observe_props']['configurations'] = [True, s]
            elif s in ['tsne', 'pca']:
                t = s
                data_props['observe_props'][s] = [True, s]
            else:
                t = s + '_sorted'
            data_props['observe_props'][t] = data_props['observe_props'].get(
                t, data_props['observe_props'].get(s, [True, s]))

        for k, sites in data['sites'].items():

            # Update data properties
            data['model_props'][k].update(data_props)

            # Check if Data exists
            if data.get('observables', {}).get(k) is not None:
                # Plot Data
                if data_props.get('plot') and False:
                    plot_observables(data, k)
                    #if data_props.get('plot'):
                    plot_sites(data, k)
                continue

            model_props = data['model_props'][k]

            # Measure Data

            m = Model(model=model_props, observe=observables_functions)

            data['observables'][k] = self.measure(
                sites, model_props['neighbour_sites'], model_props['T'],
                m.observables_functions)
            # Save Data
            if model_props.get('data_save', True):
                Data_Process().exporter(
                    {'observables': data['observables'][k]},
                    model_props,
                    file=k,
                    format=model_props['data_format']['observables'])

            # Plot Data
            if data_props.get('plot') and False:
                plot_observables(data, k)
                #if data_props.get('plot'):
                plot_sites(data, k)
        return
Ejemplo n.º 7
0
def process(
    X,
    data_params,
    data_rep,
    data_type,
    alg_params={
        'N0': None,
        'N': 2,
        'perp': 30.0,
        'pca': True,
        'data_dir': 'dataset/tsne/',
        'data_reps': ['tsne', 'pca'],
        'data_name_format': ['', 'pca', 'perp', 'N0', '']
    }):

    data_params = data_params.copy()

    Data_Process().format(data_params, initials=False)
    if data_params['file_dir'] == '':
        data_params['file_dir'] = data_params['data_dir']

    # Import Data

    data, data_sizes, _, _ = Data_Process().importer(
        data_params,
        data_typing='dict',
        data_lists=True,
        upconvert=True,
        directory=data_params['file_dir'])

    display(
        True, True, 'Data Imported... \n' + str(data_sizes) + '\n' +
        (data_params['data_file'] + '\n'))

    # Setup Data

    # Change keys structure for appropriate plot labels (i.e. L_XX size labels)
    ind_data = [-3, None]
    ind_type = [0, None]

    data_typed = dict_modify(data,
                             data_types_config + data_types_temps,
                             f=lambda k, v: v.copy(),
                             i=ind_data,
                             j=ind_type)

    data_sizes = dict_modify(data_sizes,
                             data_types_config + data_types_temps,
                             f=lambda k, v: v,
                             i=ind_data,
                             j=ind_type)

    data_keys = {t: sorted(list(d.keys())) for t, d in data_typed.items()}

    Y = {
        r: dict_modify(data,
                       data_types_config,
                       f=lambda k, v: [],
                       i=ind_data,
                       j=ind_type)
        for r in data_reps
    }

    Y2 = Y.copy()

    data_types_config = [t[slice(*ind_type)] for t in data_types_config]
    data_types_temps = [t[slice(*ind_type)] for t in data_types_temps]

    # Setup Plotting
    plot_keys = {}
    plot_bool = {}
    for r in data_reps:
        for t in Y[r].keys():
            plot_keys[r + '_' + t] = data_keys[t]
            plot_bool[r + '_' + t] = True

    Data_Process().plot_close()
    Data_Proc = Data_Process(plot_keys, plot_bool)

    comp = lambda x, i: {k: v[:, i] for k, v in x.items() if np.any(v)}

    # tSNE and PCA Analysis

    for t in sorted(data_types_config):

        for r in data_reps:

            if r == 'pca':
                continue

            # Check if Data Exists
            params = data_params.copy()
            file_header = r + '_' + t
            file_name = file_header + data_params['data_file']
            params['data_files'] = file_name + '.' + data_params['data_format']
            data = Data_Proc.importer(params,
                                      data_obj_format='dict',
                                      format='npz')
            if data is not None:
                print('Previous Data Found for', file_name)
                Y[r][t] = data[0][file_name].item()

                Data_Proc.plotter(comp(Y[r][t], 1),
                                  comp(Y[r][t], 0),
                                  plot_props(Y[r][t].keys(), r, t[-5:]),
                                  data_key=r + '_' + t)

            else:
                print('New Data for', file_name)
                for k in data_keys[t]:
                    print(r, t, k)
                    Y[r][t][k] = dim_reduce(data=data_typed[t][k],
                                            N=data_params['N'],
                                            N0=data_params['N0'],
                                            perp=data_params['perp'],
                                            rep=r,
                                            pca=data_params['pca'])

                Data_Proc.exporter({file_header: Y[r][t]}, data_params)

            Data_Proc.plotter(comp(Y[r][t], 1),
                              comp(Y[r][t], 0),
                              plot_props(Y[r][t].keys(), r, t[-5:]),
                              data_key=file_header)
    Data_Proc.plot_save(data_params, read_write='a')
Ejemplo n.º 8
0
    }
    data_reps = args.data_reps

    data_params = {
        'data_files': data_files,
        'data_types': data_types_configs + data_types_temps,
        'data_format': 'npz',
        'data_obj_format': data_obj_format,
        'data_dir': 'dataset/tsne/',
        'one_hot': [False],
        'data_name_format': ['', 'pca', 'perp', 'N0', '']
    }

    data_params['data_sets'] = configs_sets + temperatures_sets
    data_params.update(vars(args))
    Data_Process().format(data_params, initials=False)

    if data_params['file_dir'] == '':
        data_params['file_dir'] = data_params['data_dir']

    # Import Data

    data, data_sizes, _, _ = Data_Process().importer(
        data_params,
        data_typing='dict',
        data_lists=True,
        upconvert=True,
        directory=data_params['file_dir'],
        disp=True)

    display(