Ejemplo n.º 1
0
def process(
    X,
    data_params,
    data_rep,
    data_type,
    alg_params={
        'N0': None,
        'N': 2,
        'perp': 30.0,
        'pca': True,
        'data_dir': 'dataset/tsne/',
        'data_reps': ['tsne', 'pca'],
        'data_name_format': ['', 'pca', 'perp', 'N0', '']
    }):

    data_params = data_params.copy()

    Data_Process().format(data_params, initials=False)
    if data_params['file_dir'] == '':
        data_params['file_dir'] = data_params['data_dir']

    # Import Data

    data, data_sizes, _, _ = Data_Process().importer(
        data_params,
        data_typing='dict',
        data_lists=True,
        upconvert=True,
        directory=data_params['file_dir'])

    display(
        True, True, 'Data Imported... \n' + str(data_sizes) + '\n' +
        (data_params['data_file'] + '\n'))

    # Setup Data

    # Change keys structure for appropriate plot labels (i.e. L_XX size labels)
    ind_data = [-3, None]
    ind_type = [0, None]

    data_typed = dict_modify(data,
                             data_types_config + data_types_temps,
                             f=lambda k, v: v.copy(),
                             i=ind_data,
                             j=ind_type)

    data_sizes = dict_modify(data_sizes,
                             data_types_config + data_types_temps,
                             f=lambda k, v: v,
                             i=ind_data,
                             j=ind_type)

    data_keys = {t: sorted(list(d.keys())) for t, d in data_typed.items()}

    Y = {
        r: dict_modify(data,
                       data_types_config,
                       f=lambda k, v: [],
                       i=ind_data,
                       j=ind_type)
        for r in data_reps
    }

    Y2 = Y.copy()

    data_types_config = [t[slice(*ind_type)] for t in data_types_config]
    data_types_temps = [t[slice(*ind_type)] for t in data_types_temps]

    # Setup Plotting
    plot_keys = {}
    plot_bool = {}
    for r in data_reps:
        for t in Y[r].keys():
            plot_keys[r + '_' + t] = data_keys[t]
            plot_bool[r + '_' + t] = True

    Data_Process().plot_close()
    Data_Proc = Data_Process(plot_keys, plot_bool)

    comp = lambda x, i: {k: v[:, i] for k, v in x.items() if np.any(v)}

    # tSNE and PCA Analysis

    for t in sorted(data_types_config):

        for r in data_reps:

            if r == 'pca':
                continue

            # Check if Data Exists
            params = data_params.copy()
            file_header = r + '_' + t
            file_name = file_header + data_params['data_file']
            params['data_files'] = file_name + '.' + data_params['data_format']
            data = Data_Proc.importer(params,
                                      data_obj_format='dict',
                                      format='npz')
            if data is not None:
                print('Previous Data Found for', file_name)
                Y[r][t] = data[0][file_name].item()

                Data_Proc.plotter(comp(Y[r][t], 1),
                                  comp(Y[r][t], 0),
                                  plot_props(Y[r][t].keys(), r, t[-5:]),
                                  data_key=r + '_' + t)

            else:
                print('New Data for', file_name)
                for k in data_keys[t]:
                    print(r, t, k)
                    Y[r][t][k] = dim_reduce(data=data_typed[t][k],
                                            N=data_params['N'],
                                            N0=data_params['N0'],
                                            perp=data_params['perp'],
                                            rep=r,
                                            pca=data_params['pca'])

                Data_Proc.exporter({file_header: Y[r][t]}, data_params)

            Data_Proc.plotter(comp(Y[r][t], 1),
                              comp(Y[r][t], 0),
                              plot_props(Y[r][t].keys(), r, t[-5:]),
                              data_key=file_header)
    Data_Proc.plot_save(data_params, read_write='a')
Ejemplo n.º 2
0
            if (file_header in file_name) and args.import_files:
                data = Data_Proc.importer(params,
                                          data_obj_format='dict',
                                          format='npz',
                                          directory=data_params['data_dir'])
            else:
                data = None

            if data is not None:
                print('Previous Data Found for', file_name)
                Y[r][t] = list(data[0].values())[0]
                if args.plot:
                    Data_Proc.plotter(comp(Y[r][t], 1),
                                      comp(Y[r][t], 0),
                                      plot_props(Y[r][t].keys(), data_typed, r,
                                                 t[-5:], orientation),
                                      data_key=data_key)
                    break
            else:
                print('New Data for', file_name)

                if args.import_files:
                    Data_Proc.plot[data_key] = False
                    continue
                for k in data_keys[t][:1]:
                    print(r, t, k)
                    Y[r][t][k] = dim_reduce(data_typed[t][k],
                                            rep=r,
                                            **data_params)