def main(K=6):
    # set seed
    np.random.seed(1)

    # set numpy print options (pretty print matrix)
    np.set_printoptions(precision=4, suppress=True, floatmode='fixed')

    # load Data
    data = read_csv()

    # preprocess
    X = normalize_data([d[0] for d in data])
    Y = normalize_data([d[1] for d in data])
    data = np.array([X, Y]).transpose()

    model = VBGMM(data, K)

    # run mixGaussBayesFit
    posterior, loglikHist = model.fit()

    model.plot_posterior_alpha()

    # plot likelihood
    plt.plot(loglikHist, '-', marker='*', lw=3)
    plt.yticks(np.arange(-1100, -601, 50))
    plt.xlim([0, 100])
    plt.xlabel('iterations')
    plt.ylabel('lower bound on log marginal likelihood')
    plt.title('variational Bayes objective for GMM on old faithful data')
    plt.show()
def test_digits(model, digits, labels, ensemble_size, reshape_fun):
    steps_results = {'c_error': {}, 'entropy': {}}

    dnum = 80

    for i in range(1, 101):
        dless, dmore = salt_and_pepper(digits, i * dnum)

        d = utils.normalize_data(reshape_fun(dmore))
        entropy = ann.test_model(model, [d] * ensemble_size,
                                 labels,
                                 metric='entropy')
        c_error = ann.test_model(model, [d] * ensemble_size,
                                 labels,
                                 metric='c_error')
        steps_results['entropy'][i] = entropy
        steps_results['c_error'][i] = c_error

        d = utils.normalize_data(reshape_fun(dless))
        entropy = ann.test_model(model, [d] * ensemble_size,
                                 labels,
                                 metric='entropy')
        c_error = ann.test_model(model, [d] * ensemble_size,
                                 labels,
                                 metric='c_error')
        steps_results['entropy'][-1 * i] = entropy
        steps_results['c_error'][-1 * i] = c_error

    return steps_results
def iris_dataset_classification():
    # Set the seed to make result reproducible
    np.random.seed(50)
    # Loads iris dataset
    iris = datasets.load_iris()
    train_data, test_data, train_labels, test_labels = split_and_shuffle_train_test(
        iris.data, iris.target)
    # Does one hot encoding and transposes labels for use in network
    encoding, train_labels = one_hot_encoding(train_labels)
    train_labels = train_labels.T
    # Normalizes data
    train_data = normalize_data(train_data).T
    # Creates a range in which we will test different hidden layer number of neurons
    hidden_layer_neurons = range(200, 600, 30)
    # Here we store the precision for each hidden layer composition
    total_precision = []
    for neuron in hidden_layer_neurons:
        network = TwoLayerNetwork(4, neuron, 3)
        network.train(train_data, train_labels, 10000, 0.001)
        network.plot_training_cost()
        predictions = network.predict(normalize_data(test_data).T)
        cm = ConfusionMatrix(predictions, test_labels)
        cm.matrix_summary()
        total_precision.append(cm.total_precision())
    plot_precision_vs_number_neurons(hidden_layer_neurons, total_precision)
Beispiel #4
0
    def test(self):
        # load dataset
        file1_va = h5py.File('./data_da/data1750.h5', 'r')
        train_data = file1_va['data1750_x'][:]
        train_data = train_data.reshape(len(train_data), 1, 4096, 1)
        train_label = file1_va['data1750_y'][:]
        file2_va = h5py.File('./data_da/data1730.h5', 'r')
        test_data = file2_va['data1730_x'][:]
        test_data = test_data.reshape(len(test_data), 1, 4096, 1)
        test_label = file2_va['data1730_y'][:]

        train_data = utils.normalize_data(train_data, 'std')
        test_data = utils.normalize_data(test_data, 'std')

        ms = self.build_M()
        ms.load_weights('./net_weights/best_mt.hdf5')
        ms.compile(optimizer=keras.optimizers.Adam(), loss='mse')
        c = self.build_C()
        c.load_weights('./net_weights/best_c.hdf5')
        c.compile(optimizer=keras.optimizers.Adam(),
                  loss='categorical_crossentropy')

        train_fea = ms.predict(train_data)
        test_fea = ms.predict(test_data)
        test_pre = c.predict(test_fea)
        sio.savemat(
            'gan_fea_pca4test_BA_7.mat', {
                'train_fea': train_fea,
                'train_label': train_label,
                'test_fea': test_fea,
                'test_label': test_label,
                'test_pre': test_pre
            })
        '''layer_name = 'conv1'
Beispiel #5
0
    def extract_normalize_images(self):
        x_train_valid = self.data_df.iloc[:, 1:].values.reshape(
            -1, self.img_h, self.img_w, 1)  # (42000,28,28,1) array
        x_train_valid = x_train_valid.astype(
            np.float)  # convert from int64 to float32
        x_train_valid = utils.normalize_data(x_train_valid)

        x_test = self.test_df.iloc[:, 0:].values.reshape(
            -1, self.img_h, self.img_w, 1)  # (28000,28,28,1) array
        x_test = x_test.astype(np.float)
        x_test = utils.normalize_data(x_test)

        image_size = 784

        # extract image labels
        y_train_valid_labels = self.data_df.iloc[:,
                                                 0].values  # (42000,1) array
        labels_count = np.unique(y_train_valid_labels).shape[0]
        # number of different labels = 10

        #plot some images and labels
        #plt.figure(figsize=(15,9))
        #for i in range(50):
        #    plt.subplot(5,10,1+i)
        #    plt.title(y_train_valid_labels[i])
        #    plt.imshow(x_train_valid[i].reshape(28,28), cmap=cm.binary)

        # labels in one hot representation
        y_train_valid = utils.dense_to_one_hot(y_train_valid_labels,
                                               labels_count).astype(np.uint8)
        return (x_train_valid, y_train_valid, x_test)
Beispiel #6
0
def get_testing_batch():
    while True:
        for sequence in test_loader:
            sequence_0, sequence_match = sequence
            batch_0 = utils.normalize_data(opt, dtype, sequence_0)
            batch_match = []
            for i in range(5):
                batch_match.append(utils.normalize_data(opt, dtype, sequence_match[i]))
            yield batch_0, batch_match
Beispiel #7
0
def get_batch_generator(data_loader):
    while True:
        for sequence in data_loader:
            if not opt.use_action:
                batch = utils.normalize_data(opt, dtype, sequence)
                yield batch
            else:
                images, actions = sequence
                images = utils.normalize_data(opt, dtype, images)
                actions = utils.sequence_input(actions.transpose_(0, 1), dtype)
                yield images, actions
Beispiel #8
0
 def normalize(self, model=settings.MODEL):
     if model == "mlp" or model == "test":
         self.images_outer_flat = normalize_data(self.images_outer_flat)
         self.images_inner_flat = normalize_data(self.images_inner_flat)
     elif model == "conv_mlp":
         self.images_outer2d = normalize_data(self.images_outer2d)
         self.images_inner_flat = normalize_data(self.images_inner_flat)
     elif model == "conv_deconv" or model == "lasagne_conv_deconv":
         self.images_outer2d = normalize_data(self.images_outer2d)
         self.images_inner2d = normalize_data(self.images_inner2d)
     elif model == "dcgan" or model == "wgan" or model == "lsgan":
         self.images = normalize_data(self.images)
         self.images_inner2d = normalize_data(self.images_inner2d)
     elif model == "vgg16":
         self.images_outer2d = self.images_outer2d.astype('float32')
         self.images_inner2d = self.images_inner2d.astype('float32')
         for col_index, subtract in enumerate([103.939, 116.779, 123.68]):
             self.images_outer2d[:, col_index, :, :] -= subtract
             self.images_inner2d[:, col_index, :, :] -= subtract
         r, g, b = self.images_outer2d[:,
                                       0, :, :], self.images_outer2d[:,
                                                                     1, :, :], self.images_outer2d[:,
                                                                                                   2, :, :]
         self.images_outer2d[:,
                             0, :, :], self.images_outer2d[:,
                                                           1, :, :], self.images_outer2d[:,
                                                                                         2, :, :] = b, g, r
         r, g, b = self.images_inner2d[:,
                                       0, :, :], self.images_inner2d[:,
                                                                     1, :, :], self.images_inner2d[:,
                                                                                                   2, :, :]
         self.images_inner2d[:,
                             0, :, :], self.images_inner2d[:,
                                                           1, :, :], self.images_inner2d[:,
                                                                                         2, :, :] = b, g, r
Beispiel #9
0
    def predict_feature_map(self):
        input_data = pd.read_csv(self.annotations_path)
        n_samples = input_data.shape[0]
        self.feature_map = np.empty((n_samples, ) +
                                    self.model.layers[-1].output.shape[1:])

        for i in range(0, n_samples // self.batch_size):
            # TODO: get dtype from model
            images = np.empty((self.batch_size, ) + self.get_input_shape(),
                              dtype=np.int)

            for j, image_path in enumerate(
                    input_data.iloc[i * self.batch_size:(i + 1) *
                                    self.batch_size]["image_path"]):
                image = PIL.Image.open(
                    os.path.join(self.image_path, image_path))

                if len(np.array(image).shape) != 3:
                    rgbimg = PIL.Image.new("RGB", image.size)
                    rgbimg.paste(image)
                    image = rgbimg

                image = image.resize(self.get_input_shape()[:-1])
                image = np.array(image)
                images[j] = image
            self.feature_map[i * self.batch_size:(i + 1) *
                             self.batch_size] = self.model(images)

        self.feature_map = normalize_data(self.feature_map)
Beispiel #10
0
def get_testing_batch():
    while True:
        for image_seq, action_seq in test_loader:
            image_seq = utils.normalize_data(opt, dtype, image_seq)
            action_seq = utils.sequence_input(action_seq.transpose_(0, 1),
                                              dtype)
            yield image_seq, action_seq
Beispiel #11
0
def main():
    # cleaning
    utils.remove_all_files_inside_folder('./results/')
    utils.remove_all_files_inside_folder('./training_checkpoints/')
    # prepare dataset
    (train_images, _), (_, _) = utils.get_fmnist_data()
    train_dataset = utils.normalize_data(train_images)
    # create models
    generator = utils.Generator()
    discriminator = utils.Discriminator()
    # Defun gives 10 secs/epoch performance boost
    generator.call = tf.contrib.eager.defun(generator.call)
    discriminator.call = tf.contrib.eager.defun(discriminator.call)
    # training helpers
    checkpoint = utils.setup_checkpoint(generator, discriminator)
    random_vector = utils.generate_constant_random_vector(
        NOISE_DIM, NUM_EXAMPLES_TO_GENERATE)
    # training
    history = utils.train(dataset=train_dataset, epochs=EPOCHS, noise_dim=NOISE_DIM, generator=generator,
                          discriminator=discriminator, checkpoint=checkpoint, random_vector=random_vector)
    # reporting
    generator.summary()
    discriminator.summary()
    utils.plot_loss(history)
    utils.create_gif()
def test_digits(model, digits, labels, ensemble_size, reshape_fun):
    steps_results = {'c_error': {}, 'entropy': {}}

    dnum = 200

    pb = ProgressBar(total=100,
                     prefix='Sim trial progress',
                     length=25,
                     fill='=',
                     zfill='_')
    for i in range(1, 101):
        dnoice = salt_and_pepper(digits, i * dnum)

        d = utils.normalize_data(reshape_fun(dnoice))
        entropy = ann.test_model(model, [d] * ensemble_size,
                                 labels,
                                 metric='entropy')
        c_error = ann.test_model(model, [d] * ensemble_size,
                                 labels,
                                 metric='c_error')
        steps_results['entropy'][i] = entropy
        steps_results['c_error'][i] = c_error
        pb.print_progress_bar(i)

    return steps_results
Beispiel #13
0
    def pre_process_data(self):
        in_features = self.data_config['in_features']
        out_features = self.data_config['out_features']

        data_obj = DATA(freq=self.data_config['freq'])
        all_data = data_obj.get_df()

        # copying required data
        df = all_data[in_features].copy()
        for out in out_features:
            df[out] = all_data[out].copy()
        if self.verbosity > 0:
            print('shape of whole dataset', df.shape)

        # assuming that pandas will add the 'datetime' column as last column. This columns will only be used to keep
        # track of indices of train and test data.
        df['datetime'] = list(
            map(int, np.array(df.index.strftime('%Y%m%d%H%M'))))

        # columns containing target data (may) have nan values because missing values are represented by nans
        # so convert those nans 0s. This is with a big assumption that the actual target data does not contain 0s.
        # they are converted to zeros because in LSTM and at other places as well we will select the data based on mask
        # such as values>0.0 and if target data has zeros, we can not do this.
        dataset = nan_to_num(df.values,
                             len(out_features) + 1,
                             replace_with=0.0)

        if self.data_config['normalize']:
            dataset, self.scalers['all'] = normalize_data(
                dataset, df.columns, 1)

        return dataset  # , scalers
Beispiel #14
0
 def patch(self, id):
     try:
         measurement = Measurement.objects(id=id).first()
         if measurement is not None:
             if get_formatted_date(
                     measurement.created) != get_formatted_date(
                         get_today_date()):
                 raise BadRequest(
                     f'Cannot update a measurement for {get_formatted_date(measurement.created)}'
                 )
             data = self.reqparse.parse_args()
             data = normalize_data(data)
             measurement.update(**data)
             measurement.reload()
             return measurement.to_dict(), 200
         abort(404, message=f'Measurement ID={id} was not found')
     except BadRequest as e:
         app.logger.error(e)
         raise e
     except NotFound as e:
         app.logger.error(e)
         raise e
     except Exception as e:
         app.logger.error(e)
         abort(500, message=str(e))
def train(dataset, model_name, timestep=20):
    """Train an LSTM model."""
    positions = []
    for i in range(len(dataset[0])):
        # model_period = f"{model_name}_period{i}.h5"

        x_train, y_train = generate_time_series_sample(
            normalize_data(dataset[0][i][0]), dataset[0][i][1].values,
            timestep)

        x_test, y_test = generate_time_series_sample(
            normalize_data(dataset[1][i][0]), dataset[1][i][1].values,
            timestep)

        x_train = x_train.transpose((0, 2, 1))
        x_train = np.reshape(x_train,
                             (x_train.shape[0] * x_train.shape[1], timestep))
        y_train = np.reshape(y_train, (y_train.shape[0] * y_train.shape[1]))

        x_test = x_test.transpose((0, 2, 1))
        x_test = np.reshape(x_test,
                            (x_test.shape[0] * x_test.shape[1], timestep))
        y_test = np.reshape(y_test, (y_test.shape[0] * y_test.shape[1]))
        print(f"x train shape: {x_train.shape}")
        print(f"y train shape: {y_train.shape}")
        print(f"x test shape: {x_test.shape}")
        print(f"y test shape: {y_test.shape}")

        clf = RandomForestClassifier(n_jobs=2, random_state=0, max_depth=5)
        clf.fit(x_train, y_train)
        predict = clf.predict(x_test)
        predict = predict.reshape(predict.shape[0] // 31, 31)[-250:]
        position = dataset[1][i][1].values[-250:, :]
        result = sum(sum(predict == position)) / predict.size

        predict1 = clf.predict(x_test)
        predict1 = predict1.reshape(predict1.shape[0] // 31, 31)[-300:-250]
        position1 = dataset[1][i][1].values[-300:-250, :]
        result1 = sum(sum(predict1 == position1)) / predict1.size

        positions.append(predict)
        print(result)
        print(result1)
    all_positions = np.concatenate(positions, axis=0)
    print(all_positions.shape)
Beispiel #16
0
def pts_process(label_path, bbox, img_size):
    landmark_ori = np.genfromtxt(label_path, skip_header=3, skip_footer=1)
    landmark = np.multiply(
        np.clip(
            normalize_data(landmark_ori,
                           bbox,
                           occlu_include=False,
                           label_ext=".pts"), 0, 1), img_size)
    return landmark
Beispiel #17
0
 def return_test_data(self):
     X_test = []
     for i in range(self.test_images.shape[0]):
         X = np.copy(self.test_images[i])
         center = (int(np.floor(X.shape[1] / 2.)),
                   int(np.floor(X.shape[2] / 2.)))
         X[:, center[0] - 16:center[0] + 16,
           center[1] - 16:center[1] + 16] = 0
         X_test.append(X)
     y_test = []
     for i in range(self.test_images.shape[0]):
         y = np.copy(self.test_images[i])
         center = (int(np.floor(y.shape[1] / 2.)),
                   int(np.floor(y.shape[2] / 2.)))
         y_test.append(y[:, center[0] - 16:center[0] + 16,
                         center[1] - 16:center[1] + 16])
     return normalize_data(np.array(X_test)), normalize_data(
         np.array(y_test))
Beispiel #18
0
def train_deepnn(model_file, inputs, outputs, model, num_epochs):
    x_train, x_valid, y_train, y_valid = train_test_split(inputs,
                                                          outputs,
                                                          test_size=0.2,
                                                          random_state=36)

    means, std_dev = get_mean_stddev(x_train)

    filepath = '/'.join(model_file.split("/")[:-1])
    filename = model_file.split("_")[2] + "_" + str(x_train.shape[2])

    np.save(filepath + "/means_" + filename + ".npy", means)
    np.save(filepath + "/stddev_" + filename + ".npy", std_dev)

    x_train = normalize_data(x_train, means, std_dev)
    x_valid = normalize_data(x_valid, means, std_dev)

    y_train = labels_to_categorical(y_train)
    y_valid = labels_to_categorical(y_valid)

    for epoch in range(num_epochs):
        history_train = model.fit(x_train,
                                  y_train,
                                  batch_size=BATCH_SIZE,
                                  epochs=1,
                                  verbose=0)
        history_valid = model.evaluate(x_valid,
                                       y_valid,
                                       verbose=0,
                                       batch_size=BATCH_SIZE)

        key_list = list(history_train.history.keys())
        score_train = history_train.history["loss"][0]
        acc_train = history_train.history["acc"][0]

        print()
        print("Epoch {}/{}".format(epoch + 1, num_epochs))
        print(" - loss: {:.4f} - acc: {:.4f}".format(score_train, acc_train))
        print()
        print("logloss score: %.4f" % history_valid[0])
        print("Validation set Accuracy: %.4f" % history_valid[1])
        add_history(model_file, history_train.history, history_valid, key_list)

    return model
Beispiel #19
0
def get_data(opt, data, indices):
    if not isinstance(indices, tuple):
        indices = indices.split('_')
        indices = (indices[0], int(indices[1]), int(indices[2]),
                   int(indices[3]))
    x, flist = data.get_sequence_idx(*indices)
    x = tobatch(x)
    x = utils.normalize_data(opt, torch.cuda.FloatTensor, x)
    name = '_'.join(list(map(str, indices)))
    return x, name, flist
Beispiel #20
0
    def get_batches_new(self, mode):

        print('\n', '*' * 14)
        print("creating data for {} mode".format(mode))
        print('*' * 14)

        in_features = self.data_config['in_features']
        out_features = self.data_config['out_features']

        data_obj = DATA(freq=self.data_config['freq'])
        all_data = data_obj.get_df_from_rf('opt_set.mat')  # INPUT

        # copying required data
        df = all_data[in_features].copy()
        for out in out_features:
            df[out] = all_data[out].copy()
        if self.verbosity > 0:
            print('shape of whole dataset', df.shape)

        # assuming that pandas will add the 'datetime' column as last column. This columns will only be used to keep
        # track of indices of train and test data.
        df['datetime'] = list(
            map(int, np.array(df.index.strftime('%Y%m%d%H%M'))))

        index = all_data[mode + '_index']
        ttk = index.dropna()

        self.args[mode + '_args']['no_of_samples'] = len(ttk)

        ttk_idx = list(map(int,
                           np.array(ttk.index.strftime('%Y%m%d%H%M'))))  # list

        df['to_keep'] = 0
        df['to_keep'][ttk.index] = ttk_idx

        dataset = nan_to_num(df.values,
                             len(out_features) + 2,
                             replace_with=0.0)

        if self.data_config['normalize']:
            dataset, self.scalers[mode] = normalize_data(
                dataset, df.columns, 2)

        self.batches[mode + '_x'],\
            self.batches[mode + '_y'], \
            self.nn_config[mode + '_no_of_batches'], \
            self.batches[mode + '_index'],\
            self.batches[mode + '_tk_index'] = generate_sample_based_batches(self.args[mode + '_args'],
                                                                             self.nn_config['batch_size'],
                                                                             dataset,
                                                                             self.intervals[mode + '_intervals']
                                                                             )
        return
Beispiel #21
0
    def __init__(self, config, train=True):

        self.config = config
        self.train = train
        self.formatdata = FormatData(config)
        if train:
            subjects = os.listdir('{0}/{1}/{2}'.format(config.data_root,
                                                       'train',
                                                       config.filename))
        else:
            subjects = os.listdir('{0}/{1}/{2}'.format(config.data_root,
                                                       'test',
                                                       config.filename))

        set = []
        complete_train = []
        for sub in subjects:
            if train:
                folderdir = '{0}/{1}/{2}/{3}'.format(config.data_root, 'train',
                                                     config.filename, sub)
            else:
                folderdir = '{0}/{1}/{2}/{3}'.format(config.data_root, 'test',
                                                     config.filename, sub)
            for file in os.listdir(folderdir):
                filedir = '{0}/{1}'.format(folderdir, file)
                rawdata = np.load(filedir)['poses'][:, :66]
                rawdata = self.frame_filter(rawdata)
                # 去除帧太少的序列
                if rawdata.shape[0] > 150:
                    set.append(rawdata)
            if len(complete_train) == 0:
                complete_train = copy.deepcopy(
                    set[-1])  #每个subjects取最后一个动作序列计算均值方差
            else:
                complete_train = np.append(complete_train, set[-1], axis=0)
        if train:
            print('video num for training:', len(set))
        else:
            print('video num for test:', len(set))
        if not train and config.data_mean is None:
            print('Load train dataset first!')
        if train:
            data_mean, data_std, dim_to_ignore, dim_to_use = utils.normalization_stats(
                complete_train)
            config.data_mean = data_mean
            config.data_std = data_std
            config.dim_to_ignore = dim_to_ignore
            config.dim_to_use = dim_to_use

        set = utils.normalize_data(set, config.data_mean, config.data_std,
                                   config.dim_to_use)
        # [S_num, frame_for_S, 60]
        self.data = set
Beispiel #22
0
def test_out_digits(model, data, labels):
    #print("===== TESTING THE CURRENT DIGITS =====")
    rescaled = list(map(dutils.unpad_img, data))
    rescaled = list(
        map(
            lambda img: dutils.center_box_image(dutils.resize_image(img, 20),
                                                20, 4), rescaled))
    testing_data = np.array(rescaled)
    testing_data = utils.normalize_data(testing_data)
    testing_data_size = testing_data.shape[0]
    return ann.test_model(model,
                          testing_data.reshape(testing_data_size, 28, 28, 1),
                          labels)
def main():
    # loading the datasets.
    train_x = np.loadtxt("train_x")
    train_y = np.loadtxt("train_y")
    test_x = np.loadtxt("test_x")

    train_x, train_y = utils.shuffle(train_x, train_y)

    # data normalization
    utils.normalize_data(train_x, 'min_max')

    # Create Neaural network
    net = nn.NeuralNetwork(train_x.shape)

    # Training the model
    net.train(train_x, train_y)

    # Testing
    result = net.test(test_x)
    # Extract test result to file.
    file = open('test_y', 'w+')
    for y in result:
        file.write("{}\n".format(y))
Beispiel #24
0
def tcf_cut(orig_datapoints, boundary_width=0.1, n=2):
    """
  input: datapoints, table, type of method
  output: two list of data in each cluster
  """
    datapoints = deepcopy(orig_datapoints)
    datapoints = utl.centralize_data(datapoints)
    datapoints = utl.normalize_data(datapoints)

    coeff, oa, ob = _tcf(datapoints)

    c_left = []
    c_right = []

    r_bp = []
    l_bp = []

    r_nbp = []
    l_nbp = []
    for orig_point, copy_point in zip(orig_datapoints, datapoints):
        # calc distance from point to boundary
        unit_len = sum(coeff[:-1] ** 2) ** 0.5
        p2b_dist = (sum(copy_point * coeff[:-1]) + coeff[-1]) / unit_len

        if abs(p2b_dist) <= boundary_width * n:
            if p2b_dist >= 0:
                r_nbp.append(orig_point)
            else:
                l_nbp.append(orig_point)

        if abs(p2b_dist) <= boundary_width:
            if p2b_dist >= 0:
                r_bp.append(orig_point)
            else:
                l_bp.append(orig_point)

        if p2b_dist >= 0:
            c_right.append(orig_point)
        else:
            c_left.append(orig_point)

    c_left = np.array(c_left, np.float)
    c_right = np.array(c_right, np.float)
    r_bp = np.array(r_bp, np.float)
    l_bp = np.array(l_bp, np.float)
    r_nbp = np.array(r_nbp, np.float)
    l_nbp = np.array(l_nbp, np.float)

    # left, right, in boundary point, coeff
    return c_left, c_right, (r_bp, l_bp), (r_nbp, l_nbp), coeff
Beispiel #25
0
def tcf_cut(orig_datapoints, boundary_width=0.1, n=2):
    """
  input: datapoints, table, type of method
  output: two list of data in each cluster
  """
    datapoints = deepcopy(orig_datapoints)
    datapoints = utl.centralize_data(datapoints)
    datapoints = utl.normalize_data(datapoints)

    coeff, oa, ob = _tcf(datapoints)

    c_left = []
    c_right = []

    r_bp = []
    l_bp = []

    r_nbp = []
    l_nbp = []
    for orig_point, copy_point in zip(orig_datapoints, datapoints):
        # calc distance from point to boundary
        unit_len = sum(coeff[:-1]**2)**0.5
        p2b_dist = (sum(copy_point * coeff[:-1]) + coeff[-1]) / unit_len

        if abs(p2b_dist) <= boundary_width * n:
            if p2b_dist >= 0:
                r_nbp.append(orig_point)
            else:
                l_nbp.append(orig_point)

        if abs(p2b_dist) <= boundary_width:
            if p2b_dist >= 0:
                r_bp.append(orig_point)
            else:
                l_bp.append(orig_point)

        if p2b_dist >= 0:
            c_right.append(orig_point)
        else:
            c_left.append(orig_point)

    c_left = np.array(c_left, np.float)
    c_right = np.array(c_right, np.float)
    r_bp = np.array(r_bp, np.float)
    l_bp = np.array(l_bp, np.float)
    r_nbp = np.array(r_nbp, np.float)
    l_nbp = np.array(l_nbp, np.float)

    # left, right, in boundary point, coeff
    return c_left, c_right, (r_bp, l_bp), (r_nbp, l_nbp), coeff
Beispiel #26
0
def rwm_cut(orig_datapoints, boundary_width=0.1, n=2):
  datapoints = deepcopy(orig_datapoints)
  datapoints = utl.centralize_data(datapoints)
  datapoints = utl.normalize_data(datapoints)

  in_boundary = 0
  size, dim = datapoints.shape
  c_left = []
  c_right = []

  coeff = _rwm(datapoints)

  r_bp = []
  l_bp = []

  r_nbp = []
  l_nbp = []

  for orig_point, copy_point in zip(orig_datapoints, datapoints):
    # calc distance from point to boundary
    unit_len = sum(coeff[:-1] ** 2) ** 0.5
    p2b_dist = (sum(copy_point * coeff[:-1]) + coeff[-1]) / unit_len

    if abs(p2b_dist) <= boundary_width * n:
      if p2b_dist >= 0:
        r_nbp.append(orig_point)
      else:
        l_nbp.append(orig_point)

    if abs(p2b_dist) <= boundary_width:
      if p2b_dist >= 0:
        r_bp.append(orig_point)
      else:
        l_bp.append(orig_point)

    if p2b_dist >= 0:
      c_right.append(orig_point)
    else:
      c_left.append(orig_point)

  c_left = np.array(c_left, np.float)
  c_right = np.array(c_right, np.float)
  r_bp = np.array(r_bp, np.float)
  l_bp = np.array(l_bp, np.float)
  r_nbp = np.array(r_nbp, np.float)
  l_nbp = np.array(l_nbp, np.float)

  # left, right, in boundary point, coeff
  return c_left, c_right, (r_bp, l_bp), (r_nbp, l_nbp), coeff
Beispiel #27
0
def experiment(network_model, reshape_mode = 'mlp'):
    reshape_funs = {
        "conv" : lambda d : d.reshape(-1,28,28,1),
        "mlp" : lambda d : d.reshape(-1,784)
    }
    xtrain,ytrain,xtest,ytest = utils.load_mnist()
    reshape_fun = reshape_funs[reshape_mode]
    xtrain,xtest = reshape_fun(xtrain),reshape_fun(xtest)
    digits_data = utils.load_processed_data('digits_og_and_optimal')
    digits = digits_data['optimal_lw']
    labels = utils.create_one_hot(digits_data['labels'].astype('uint'))

    ensemble_size = 20
    epochs = 50
    small_digits = reshape_fun(np.array(list(map(scale_down, digits))))
    small_digits = utils.normalize_data(small_digits)
    trials = 5

    for t in range(1,trials+1):
        gc.collect()

        l_xtrain = []
        l_xval = []
        l_ytrain = []
        l_yval = []
        for _ in range(ensemble_size):
            t_xtrain,t_ytrain,t_xval,t_yval = utils.create_validation(xtrain,ytrain,(1/6))
            l_xtrain.append(t_xtrain)
            l_xval.append(t_xval)
            l_ytrain.append(t_ytrain)
            l_yval.append(t_yval)

        inputs, outputs, train_model, model_list, merge_model = ann.build_ensemble([network_model], pop_per_type=ensemble_size, merge_type="Average")
        es = clb.EarlyStopping(monitor='val_loss',patience=2,restore_best_weights=True)
        
        train_model.compile(optimizer="adam", loss="categorical_crossentropy", metrics = ['acc'])
        train_model.fit(x=l_xtrain,y=l_ytrain, verbose=1,batch_size=100, epochs = epochs,validation_data=(l_xval,l_yval),callbacks=[es])
        merge_model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=['acc'])

        results = test_digits(merge_model, digits, labels, ensemble_size, reshape_fun)

        #entropy = ann.test_model(merge_model, [small_digits]*ensemble_size, labels, metric = 'entropy')
        #c_error = ann.test_model(merge_model, [small_digits]*ensemble_size, labels, metric = 'c_error')

        #results['c_error'][0] = c_error
        #results['entropy'][0] = entropy

        filename = "saltpepper_norm_trial-%s" % t
        utils.save_processed_data(results, filename)
    def __init__(self, config, train=True):

        self.config = config
        self.train = train
        self.formatdata = FormatData(config)
        if config.datatype == 'smpl':
            train_path = config.data_root
        else:
            print('CMUDataset only support the smpl datatype')
            sys.exit(1)
        if config.filename != 'all':
            if train:
                subjects = config.subjects_train
            else:
                subjects = config.subjects_test
        else:
            print('Only support walking and dance action')
            sys.exit(1)

        set = []
        complete_train = []
        for sub in subjects:
            folderdir = '{0}/{1}'.format(train_path, sub)
            for file in os.listdir(folderdir):
                filedir = '{0}/{1}'.format(folderdir, file)
                rawdata = np.load(filedir)['poses'][:, :66]
                rawdata = self.frame_filter(rawdata)
                if rawdata.shape[0] > 70:
                    set.append(rawdata)
            if len(complete_train) == 0:
                complete_train = copy.deepcopy(
                    set[-1])  #每个subjects取最后一个动作序列计算均值方差
            else:
                complete_train = np.append(complete_train, set[-1], axis=0)
        print('视频个数:', len(set))
        if not train and config.data_mean is None:
            print('Load train dataset first!')
        if train and config.datatype == 'smpl':
            data_mean, data_std, dim_to_ignore, dim_to_use = utils.normalization_stats(
                complete_train)
            config.data_mean = data_mean
            config.data_std = data_std
            config.dim_to_ignore = dim_to_ignore
            config.dim_to_use = dim_to_use

        set = utils.normalize_data(set, config.data_mean, config.data_std,
                                   config.dim_to_use)
        # [S_num, frame_for_S, 66]
        self.data = set
def single_file_reader(savename):
    f = open(savename)

    full_x = []
    full_y = []
    for line in f:
        (x,y) = line.split(' ')
        x = map(float, x.split(',')[:-1])
        y = map(int, y.split(',')[:-1])

        full_x.append(x)
        full_y.append(y)

    full_x = utils.normalize_data(full_x)
    return (full_x, full_y)
Beispiel #30
0
def preprocess_and_save(batch_id):
    images, labels = load_cifar10_batch(batch_id)
    images = utils.normalize_data(images)
    labels = utils.one_hot_encode(labels, 10)
    train_images, train_labels, valid_images, valid_labels, test_images, test_labels =\
        utils.split_data(images, labels, train_size=0.8, valid_size=0.1, test_size=0.1)
    batch = {
        'train_images': train_images,
        'train_labels': train_labels,
        'valid_images': valid_images,
        'valid_labels': valid_labels,
        'test_images': test_images,
        'test_labels': test_labels
    }
    batch_path = os.path.join(folder_path, 'preprocess_batch_' + str(batch_id))
    np.save(batch_path, np.asarray(batch))
Beispiel #31
0
def select_features_stepwise_forward(dataFrame, n_news, original_cols):
    """
		*stepwise selection* de varaibles, utiliza las importancias de un modelo de random forest para clasificar las mejores variables

		Parámetros:
		- dataFrame -- *DataFrame* de pandas, datos de todas las variables que van a ser seleccionadas
		- n_news -- Entero, máximo número de variables que van a ser seleccionadas
		- original_cols -- Lista, lista con los nombres de las columnas originales del problema para reconocer las variables seleccionadas

		Retorna:
		NADA
		(no retorna nada pero escribe las variables seleccioandas en el archivo 'data_selected.csv' en el directorio data)

	"""
    n_features = dataFrame.shape[1]
    dataFrame.columns = original_cols

    # params
    n_news -= 1
    features = set(dataFrame.columns)
    features.remove(list(dataFrame.columns)[0])
    missing = features.copy()
    inside = [list(dataFrame.columns)[0]]
    from sklearn.ensemble import RandomForestRegressor

    while (n_news):
        fts = list(inside)
        best = ''
        best_importance = 0
        for ft in missing:
            fts = fts + [ft]
            scaled, scaler = utils.normalize_data(dataFrame[fts].values)
            x, y = series_to_supervised(scaled)
            model = RandomForestRegressor(n_estimators=100)
            model.fit(x, y)
            importances = model.feature_importances_
            if (importances[-1] > best_importance):
                best = fts[-1]
                best_importance = importances[-1]

        inside.append(best)
        missing.remove(best)

        n_news -= 1

    df = dataFrame[inside]
    df.to_csv('data/data_selected.csv')
Beispiel #32
0
def cut_by_coeff(orig_datapoints, coeff):
    datapoints = deepcopy(orig_datapoints)
    datapoints = utl.centralize_data(datapoints)
    datapoints = utl.normalize_data(datapoints)

    c_left = []
    c_right = []
    unit_len = sum(coeff[:-1] ** 2) ** 0.5

    for orig_point, copy_point in zip(orig_datapoints, datapoints):
        p2b_dist = (sum(copy_point * coeff[:-1]) + coeff[-1]) / unit_len
        if p2b_dist >= 0:
            c_right.append(orig_point)
        else:
            c_left.append(orig_point)
    c_left = np.array(c_left, np.float)
    c_right = np.array(c_right, np.float)

    return (c_left, c_right)
Beispiel #33
0
def cut_by_coeff(orig_datapoints, coeff):
    datapoints = deepcopy(orig_datapoints)
    datapoints = utl.centralize_data(datapoints)
    datapoints = utl.normalize_data(datapoints)

    c_left = []
    c_right = []
    unit_len = sum(coeff[:-1]**2)**0.5

    for orig_point, copy_point in zip(orig_datapoints, datapoints):
        p2b_dist = (sum(copy_point * coeff[:-1]) + coeff[-1]) / unit_len
        if p2b_dist >= 0:
            c_right.append(orig_point)
        else:
            c_left.append(orig_point)
    c_left = np.array(c_left, np.float)
    c_right = np.array(c_right, np.float)

    return (c_left, c_right)
Beispiel #34
0
def plot_hilbert_spectra(time, frequency, amplitude, title, plotter=plt, fs=100):

    # Scale factor (to plot frequency with decimal precision)
    scale_freq = 10
    # Max scaled frequency
    max_freq = int(0.5*scale_freq*fs)

    # Creating time axis
    time_ax = np.linspace(0, len(time)-1, len(time))
    # Allocating memory for power and the rounded frequency
    power_array = np.zeros(np.shape(frequency))
    freq_rounded_array = np.zeros(np.shape(power_array), np.int)

    # Create GRID based on time axis and maximum frequency
    yi = np.linspace(0, max_freq, max_freq + 1)
    Z = np.ones((max_freq + 1, len(time_ax)))*-200
    X, Y = np.meshgrid(time_ax, yi)

    # Enter loop if more than one IMF exists
    if isinstance(frequency[0], np.ndarray):
        n_inst_frequencies = len(frequency)
        for i in range(n_inst_frequencies):
            # Normalize the amplitude ( 0<=a<=1)
            amplitude[i] = utils.normalize_data(amplitude[i])
            # Power equal to amplitude squared
            power_array[i] = np.multiply(amplitude[i], amplitude[i])
            # Round the frequency to the nearest (results in OK resolution if scale_freq > 1, eg scale_freq=10)
            freq_rounded_array[i] = np.ceil(frequency[i]*scale_freq)
            # Compute the logarithmic power, and add it to the previous if the same inst. frequency exists.
            for k in range(len(time_ax)):
                if power_array[i, k] == 0.0:
                    power_array[i, k] = 0.00000001

                current_amplitude = Z[int(freq_rounded_array[i, k]), int(time_ax[k])]

                if current_amplitude > -200:
                    Z[int(freq_rounded_array[i, k]), int(time_ax[k])] = current_amplitude + 20.0*np.log10(power_array[i, k])
                else:
                    Z[int(freq_rounded_array[i, k]), int(time_ax[k])] = 20.0*np.log10(power_array[i, k])
    else:
        # Normalize the amplitude ( 0<=a<=1)
        amplitude = utils.normalize_data(amplitude)
        # Power equal to amplitude squared
        power_array = np.multiply(amplitude, amplitude)
        # Round the frequency to the nearest (results in OK resolution if scale_freq > 1, eg scale_freq=10)
        freq_rounded_array = np.ceil(frequency*scale_freq)
        # Compute the logarithmic power, and add it to the previous if the same inst. frequency exists.
        for k in range(len(time_ax)):
            Z[int(freq_rounded_array[k]), int(time_ax[k])] = 20.0*np.log10(power_array[k])

    # Create figure and subplot.
    # Set titles and labels.
    fig = plotter.figure()
    suptitle = 'Hilbert Spectra - Channel: ' + title
    fig.suptitle(suptitle)
    ax = plotter.subplot(111)
    ax.set_xlabel('Time [s]')
    ax.set_ylabel('Frequency [Hz]')

    # Create contour plot and time, frequency and logarithmic power. Scale frequencies back to original values.
    n_levels = 200
    cax = ax.contourf(X, Y/scale_freq, Z, n_levels)
    # Assign color bar to the contour plot
    cb = fig.colorbar(cax)
    # Set label and draw plot
    cb.set_label('Amplitude [dB]')
    plotter.draw()
Beispiel #35
0
  for cls in tmp_clusters:
    res_cls.append(cls)

  print("#cls {} -> {}".format(len(clusters), len(res_cls)))
  print(calc_num_point(res_cls))

  return res_cls


if __name__ == '__main__':
  doctest.testmod()

  points, label = utl.read_from_text('2d5c_noncycle')

  points = utl.centralize_data(points)
  points = utl.normalize_data(points)

  # points, label = utl.read_from_text('2d5c_cov')
  # points, label = utl.read_from_text('hand_write_digit_2d')
  # seleted = datasets.load_digits()                                                                                                   
  # points = seleted.data                                                       
  # label = seleted.target
  #
  ms_tree = ms2c(points)
  # paint_tree(ms_tree, ms_tree)

  final_nodes = ms_tree.merge()
  grounded_nodes = ms_tree.grounded_nodes
  grounded_cls = [x.datapoints for x in grounded_nodes]
  final_cls = [x.datapoints for x in final_nodes]
Beispiel #36
0
# Predicting house prices: a regression example
from keras.datasets import boston_housing
from utils import normalize_data
from keras import models, layers, optimizers, losses, metrics

# loading the Boston housing dataset
(train_data, train_labels), (test_data, test_labels) = boston_housing.load_data()
# dataset info
print(train_data.shape)
print(test_data.shape)

# preparing the data
# Normalizing the data
train_data = normalize_data(train_data)
test_data = normalize_data(test_data)

# build your network
def build_model():
    model = models.Sequential()
    model.add(layers.Dense(64, activation='relu', input_shape=(train_data.shape[1],)))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(1))
    model.compile(optimizer=optimizers.Adam(lr=0.001), loss=losses.mse, metrics=[metrics.mae])
    return model

network = build_model()
# train
network.fit(train_data, train_labels, epochs=80)
loss, acc = network.evaluate(test_data, test_labels)
print(loss, acc)
import numpy as np
from keras import models
from keras import layers
from keras import optimizers
from keras import losses
from keras import metrics
from keras.datasets import boston_housing
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from utils import normalize_data, init_keras

init_keras()

(x_train, y_train),(x_test, y_test) = boston_housing.load_data()

normalize_data(x_train)
normalize_data(x_test)

k = 4
epochs = 500
mae_histories_4_k_fold = []
for i in range(k):
    x_train, x_cv, y_train, y_cv = train_test_split(
    x_train, y_train, test_size=1.0/k)

    model = models.Sequential()
    model.add(layers.Dense(64, activation='relu', input_shape=(x_train.shape[1],)))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(1))

    model.compile(optimizer=optimizers.RMSprop(lr=0.001), loss=losses.mse, metrics=[metrics.mae])
def get_testing_batch(dtype=torch.cuda.FloatTensor):
    while True:
        for sequence in test_loader:
            batch = utils.normalize_data(opt, dtype, sequence)
            yield batch
def read_data(trainfile, batchsize=100000, platenamefile=None):
    """
    Reads a datafile created by the generate_data_files function.
    It will reset the file after having read through it.
    
    Yields data in batches with a specified batchsize.
    
    Parameters:
        trainfile:  The file to read data from.
                    The must have a seek function taking an integer as the parameter.

        batchsize:  How many samples to yield at a time.
                    Defaults to 100000

        platenamefile: 
                    A file to read platenammes from, which should correspond to the rows in the trainfile.
                    If None, no platenames will be read or yielded.
                    Defaults to None.

    Returns:
        Data on the format (features,target) or (features,target,platenames) if platenamefile is set,
        in batches of size `batchsize`.
    """

    while True:
        x = trainfile.readline()
        y = trainfile.readline()
        if y == '':
            break

        if platenamefile:
            platestring = platenamefile.readline()
            platestring = platestring.split(' ')
            platestring[1] = int(platestring[1])
            platestring[2] = int(platestring[2])
            platenames = [platestring]

        try:
            x = [map(float, x.split(' ')[:-1])]
            y = [map(int, y.split(' ')[:-1])]
        except:
            x = []
            y = []
            if platenamefile:
                del(platenames[-1])

        while len(x) < batchsize:
            newx = trainfile.readline()
            newy = trainfile.readline()
            if newy == '':
                break

            if platenamefile:
                platestring = platenamefile.readline()
                platestring = platestring.split(' ')
                platestring[1] = int(platestring[1])
                platestring[2] = int(platestring[2])
                platenames.append(platestring)
            #Remove newlines and convert to correct datatypes
            try:
                x.append(map(float, newx.split(' ')[:-1])) 
                y.append(map(int, newy.split(' ')[:-1]))
            except:
                if platenamefile:
                    del(platenames[-1])
                continue

        x = utils.normalize_data(x)
        if platenamefile:
            yield(x,y,platenames)
        else:
            yield(x,y)
    trainfile.seek(0)
    if platenamefile:
        platenamefile.seek(0)
Beispiel #40
0
def get_testing_batch():
    while True:
        for sequence in test_loader:
            batch = utils.normalize_data(opt, dtype, sequence)
            yield batch