def main(K=6): # set seed np.random.seed(1) # set numpy print options (pretty print matrix) np.set_printoptions(precision=4, suppress=True, floatmode='fixed') # load Data data = read_csv() # preprocess X = normalize_data([d[0] for d in data]) Y = normalize_data([d[1] for d in data]) data = np.array([X, Y]).transpose() model = VBGMM(data, K) # run mixGaussBayesFit posterior, loglikHist = model.fit() model.plot_posterior_alpha() # plot likelihood plt.plot(loglikHist, '-', marker='*', lw=3) plt.yticks(np.arange(-1100, -601, 50)) plt.xlim([0, 100]) plt.xlabel('iterations') plt.ylabel('lower bound on log marginal likelihood') plt.title('variational Bayes objective for GMM on old faithful data') plt.show()
def test_digits(model, digits, labels, ensemble_size, reshape_fun): steps_results = {'c_error': {}, 'entropy': {}} dnum = 80 for i in range(1, 101): dless, dmore = salt_and_pepper(digits, i * dnum) d = utils.normalize_data(reshape_fun(dmore)) entropy = ann.test_model(model, [d] * ensemble_size, labels, metric='entropy') c_error = ann.test_model(model, [d] * ensemble_size, labels, metric='c_error') steps_results['entropy'][i] = entropy steps_results['c_error'][i] = c_error d = utils.normalize_data(reshape_fun(dless)) entropy = ann.test_model(model, [d] * ensemble_size, labels, metric='entropy') c_error = ann.test_model(model, [d] * ensemble_size, labels, metric='c_error') steps_results['entropy'][-1 * i] = entropy steps_results['c_error'][-1 * i] = c_error return steps_results
def iris_dataset_classification(): # Set the seed to make result reproducible np.random.seed(50) # Loads iris dataset iris = datasets.load_iris() train_data, test_data, train_labels, test_labels = split_and_shuffle_train_test( iris.data, iris.target) # Does one hot encoding and transposes labels for use in network encoding, train_labels = one_hot_encoding(train_labels) train_labels = train_labels.T # Normalizes data train_data = normalize_data(train_data).T # Creates a range in which we will test different hidden layer number of neurons hidden_layer_neurons = range(200, 600, 30) # Here we store the precision for each hidden layer composition total_precision = [] for neuron in hidden_layer_neurons: network = TwoLayerNetwork(4, neuron, 3) network.train(train_data, train_labels, 10000, 0.001) network.plot_training_cost() predictions = network.predict(normalize_data(test_data).T) cm = ConfusionMatrix(predictions, test_labels) cm.matrix_summary() total_precision.append(cm.total_precision()) plot_precision_vs_number_neurons(hidden_layer_neurons, total_precision)
def test(self): # load dataset file1_va = h5py.File('./data_da/data1750.h5', 'r') train_data = file1_va['data1750_x'][:] train_data = train_data.reshape(len(train_data), 1, 4096, 1) train_label = file1_va['data1750_y'][:] file2_va = h5py.File('./data_da/data1730.h5', 'r') test_data = file2_va['data1730_x'][:] test_data = test_data.reshape(len(test_data), 1, 4096, 1) test_label = file2_va['data1730_y'][:] train_data = utils.normalize_data(train_data, 'std') test_data = utils.normalize_data(test_data, 'std') ms = self.build_M() ms.load_weights('./net_weights/best_mt.hdf5') ms.compile(optimizer=keras.optimizers.Adam(), loss='mse') c = self.build_C() c.load_weights('./net_weights/best_c.hdf5') c.compile(optimizer=keras.optimizers.Adam(), loss='categorical_crossentropy') train_fea = ms.predict(train_data) test_fea = ms.predict(test_data) test_pre = c.predict(test_fea) sio.savemat( 'gan_fea_pca4test_BA_7.mat', { 'train_fea': train_fea, 'train_label': train_label, 'test_fea': test_fea, 'test_label': test_label, 'test_pre': test_pre }) '''layer_name = 'conv1'
def extract_normalize_images(self): x_train_valid = self.data_df.iloc[:, 1:].values.reshape( -1, self.img_h, self.img_w, 1) # (42000,28,28,1) array x_train_valid = x_train_valid.astype( np.float) # convert from int64 to float32 x_train_valid = utils.normalize_data(x_train_valid) x_test = self.test_df.iloc[:, 0:].values.reshape( -1, self.img_h, self.img_w, 1) # (28000,28,28,1) array x_test = x_test.astype(np.float) x_test = utils.normalize_data(x_test) image_size = 784 # extract image labels y_train_valid_labels = self.data_df.iloc[:, 0].values # (42000,1) array labels_count = np.unique(y_train_valid_labels).shape[0] # number of different labels = 10 #plot some images and labels #plt.figure(figsize=(15,9)) #for i in range(50): # plt.subplot(5,10,1+i) # plt.title(y_train_valid_labels[i]) # plt.imshow(x_train_valid[i].reshape(28,28), cmap=cm.binary) # labels in one hot representation y_train_valid = utils.dense_to_one_hot(y_train_valid_labels, labels_count).astype(np.uint8) return (x_train_valid, y_train_valid, x_test)
def get_testing_batch(): while True: for sequence in test_loader: sequence_0, sequence_match = sequence batch_0 = utils.normalize_data(opt, dtype, sequence_0) batch_match = [] for i in range(5): batch_match.append(utils.normalize_data(opt, dtype, sequence_match[i])) yield batch_0, batch_match
def get_batch_generator(data_loader): while True: for sequence in data_loader: if not opt.use_action: batch = utils.normalize_data(opt, dtype, sequence) yield batch else: images, actions = sequence images = utils.normalize_data(opt, dtype, images) actions = utils.sequence_input(actions.transpose_(0, 1), dtype) yield images, actions
def normalize(self, model=settings.MODEL): if model == "mlp" or model == "test": self.images_outer_flat = normalize_data(self.images_outer_flat) self.images_inner_flat = normalize_data(self.images_inner_flat) elif model == "conv_mlp": self.images_outer2d = normalize_data(self.images_outer2d) self.images_inner_flat = normalize_data(self.images_inner_flat) elif model == "conv_deconv" or model == "lasagne_conv_deconv": self.images_outer2d = normalize_data(self.images_outer2d) self.images_inner2d = normalize_data(self.images_inner2d) elif model == "dcgan" or model == "wgan" or model == "lsgan": self.images = normalize_data(self.images) self.images_inner2d = normalize_data(self.images_inner2d) elif model == "vgg16": self.images_outer2d = self.images_outer2d.astype('float32') self.images_inner2d = self.images_inner2d.astype('float32') for col_index, subtract in enumerate([103.939, 116.779, 123.68]): self.images_outer2d[:, col_index, :, :] -= subtract self.images_inner2d[:, col_index, :, :] -= subtract r, g, b = self.images_outer2d[:, 0, :, :], self.images_outer2d[:, 1, :, :], self.images_outer2d[:, 2, :, :] self.images_outer2d[:, 0, :, :], self.images_outer2d[:, 1, :, :], self.images_outer2d[:, 2, :, :] = b, g, r r, g, b = self.images_inner2d[:, 0, :, :], self.images_inner2d[:, 1, :, :], self.images_inner2d[:, 2, :, :] self.images_inner2d[:, 0, :, :], self.images_inner2d[:, 1, :, :], self.images_inner2d[:, 2, :, :] = b, g, r
def predict_feature_map(self): input_data = pd.read_csv(self.annotations_path) n_samples = input_data.shape[0] self.feature_map = np.empty((n_samples, ) + self.model.layers[-1].output.shape[1:]) for i in range(0, n_samples // self.batch_size): # TODO: get dtype from model images = np.empty((self.batch_size, ) + self.get_input_shape(), dtype=np.int) for j, image_path in enumerate( input_data.iloc[i * self.batch_size:(i + 1) * self.batch_size]["image_path"]): image = PIL.Image.open( os.path.join(self.image_path, image_path)) if len(np.array(image).shape) != 3: rgbimg = PIL.Image.new("RGB", image.size) rgbimg.paste(image) image = rgbimg image = image.resize(self.get_input_shape()[:-1]) image = np.array(image) images[j] = image self.feature_map[i * self.batch_size:(i + 1) * self.batch_size] = self.model(images) self.feature_map = normalize_data(self.feature_map)
def get_testing_batch(): while True: for image_seq, action_seq in test_loader: image_seq = utils.normalize_data(opt, dtype, image_seq) action_seq = utils.sequence_input(action_seq.transpose_(0, 1), dtype) yield image_seq, action_seq
def main(): # cleaning utils.remove_all_files_inside_folder('./results/') utils.remove_all_files_inside_folder('./training_checkpoints/') # prepare dataset (train_images, _), (_, _) = utils.get_fmnist_data() train_dataset = utils.normalize_data(train_images) # create models generator = utils.Generator() discriminator = utils.Discriminator() # Defun gives 10 secs/epoch performance boost generator.call = tf.contrib.eager.defun(generator.call) discriminator.call = tf.contrib.eager.defun(discriminator.call) # training helpers checkpoint = utils.setup_checkpoint(generator, discriminator) random_vector = utils.generate_constant_random_vector( NOISE_DIM, NUM_EXAMPLES_TO_GENERATE) # training history = utils.train(dataset=train_dataset, epochs=EPOCHS, noise_dim=NOISE_DIM, generator=generator, discriminator=discriminator, checkpoint=checkpoint, random_vector=random_vector) # reporting generator.summary() discriminator.summary() utils.plot_loss(history) utils.create_gif()
def test_digits(model, digits, labels, ensemble_size, reshape_fun): steps_results = {'c_error': {}, 'entropy': {}} dnum = 200 pb = ProgressBar(total=100, prefix='Sim trial progress', length=25, fill='=', zfill='_') for i in range(1, 101): dnoice = salt_and_pepper(digits, i * dnum) d = utils.normalize_data(reshape_fun(dnoice)) entropy = ann.test_model(model, [d] * ensemble_size, labels, metric='entropy') c_error = ann.test_model(model, [d] * ensemble_size, labels, metric='c_error') steps_results['entropy'][i] = entropy steps_results['c_error'][i] = c_error pb.print_progress_bar(i) return steps_results
def pre_process_data(self): in_features = self.data_config['in_features'] out_features = self.data_config['out_features'] data_obj = DATA(freq=self.data_config['freq']) all_data = data_obj.get_df() # copying required data df = all_data[in_features].copy() for out in out_features: df[out] = all_data[out].copy() if self.verbosity > 0: print('shape of whole dataset', df.shape) # assuming that pandas will add the 'datetime' column as last column. This columns will only be used to keep # track of indices of train and test data. df['datetime'] = list( map(int, np.array(df.index.strftime('%Y%m%d%H%M')))) # columns containing target data (may) have nan values because missing values are represented by nans # so convert those nans 0s. This is with a big assumption that the actual target data does not contain 0s. # they are converted to zeros because in LSTM and at other places as well we will select the data based on mask # such as values>0.0 and if target data has zeros, we can not do this. dataset = nan_to_num(df.values, len(out_features) + 1, replace_with=0.0) if self.data_config['normalize']: dataset, self.scalers['all'] = normalize_data( dataset, df.columns, 1) return dataset # , scalers
def patch(self, id): try: measurement = Measurement.objects(id=id).first() if measurement is not None: if get_formatted_date( measurement.created) != get_formatted_date( get_today_date()): raise BadRequest( f'Cannot update a measurement for {get_formatted_date(measurement.created)}' ) data = self.reqparse.parse_args() data = normalize_data(data) measurement.update(**data) measurement.reload() return measurement.to_dict(), 200 abort(404, message=f'Measurement ID={id} was not found') except BadRequest as e: app.logger.error(e) raise e except NotFound as e: app.logger.error(e) raise e except Exception as e: app.logger.error(e) abort(500, message=str(e))
def train(dataset, model_name, timestep=20): """Train an LSTM model.""" positions = [] for i in range(len(dataset[0])): # model_period = f"{model_name}_period{i}.h5" x_train, y_train = generate_time_series_sample( normalize_data(dataset[0][i][0]), dataset[0][i][1].values, timestep) x_test, y_test = generate_time_series_sample( normalize_data(dataset[1][i][0]), dataset[1][i][1].values, timestep) x_train = x_train.transpose((0, 2, 1)) x_train = np.reshape(x_train, (x_train.shape[0] * x_train.shape[1], timestep)) y_train = np.reshape(y_train, (y_train.shape[0] * y_train.shape[1])) x_test = x_test.transpose((0, 2, 1)) x_test = np.reshape(x_test, (x_test.shape[0] * x_test.shape[1], timestep)) y_test = np.reshape(y_test, (y_test.shape[0] * y_test.shape[1])) print(f"x train shape: {x_train.shape}") print(f"y train shape: {y_train.shape}") print(f"x test shape: {x_test.shape}") print(f"y test shape: {y_test.shape}") clf = RandomForestClassifier(n_jobs=2, random_state=0, max_depth=5) clf.fit(x_train, y_train) predict = clf.predict(x_test) predict = predict.reshape(predict.shape[0] // 31, 31)[-250:] position = dataset[1][i][1].values[-250:, :] result = sum(sum(predict == position)) / predict.size predict1 = clf.predict(x_test) predict1 = predict1.reshape(predict1.shape[0] // 31, 31)[-300:-250] position1 = dataset[1][i][1].values[-300:-250, :] result1 = sum(sum(predict1 == position1)) / predict1.size positions.append(predict) print(result) print(result1) all_positions = np.concatenate(positions, axis=0) print(all_positions.shape)
def pts_process(label_path, bbox, img_size): landmark_ori = np.genfromtxt(label_path, skip_header=3, skip_footer=1) landmark = np.multiply( np.clip( normalize_data(landmark_ori, bbox, occlu_include=False, label_ext=".pts"), 0, 1), img_size) return landmark
def return_test_data(self): X_test = [] for i in range(self.test_images.shape[0]): X = np.copy(self.test_images[i]) center = (int(np.floor(X.shape[1] / 2.)), int(np.floor(X.shape[2] / 2.))) X[:, center[0] - 16:center[0] + 16, center[1] - 16:center[1] + 16] = 0 X_test.append(X) y_test = [] for i in range(self.test_images.shape[0]): y = np.copy(self.test_images[i]) center = (int(np.floor(y.shape[1] / 2.)), int(np.floor(y.shape[2] / 2.))) y_test.append(y[:, center[0] - 16:center[0] + 16, center[1] - 16:center[1] + 16]) return normalize_data(np.array(X_test)), normalize_data( np.array(y_test))
def train_deepnn(model_file, inputs, outputs, model, num_epochs): x_train, x_valid, y_train, y_valid = train_test_split(inputs, outputs, test_size=0.2, random_state=36) means, std_dev = get_mean_stddev(x_train) filepath = '/'.join(model_file.split("/")[:-1]) filename = model_file.split("_")[2] + "_" + str(x_train.shape[2]) np.save(filepath + "/means_" + filename + ".npy", means) np.save(filepath + "/stddev_" + filename + ".npy", std_dev) x_train = normalize_data(x_train, means, std_dev) x_valid = normalize_data(x_valid, means, std_dev) y_train = labels_to_categorical(y_train) y_valid = labels_to_categorical(y_valid) for epoch in range(num_epochs): history_train = model.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=1, verbose=0) history_valid = model.evaluate(x_valid, y_valid, verbose=0, batch_size=BATCH_SIZE) key_list = list(history_train.history.keys()) score_train = history_train.history["loss"][0] acc_train = history_train.history["acc"][0] print() print("Epoch {}/{}".format(epoch + 1, num_epochs)) print(" - loss: {:.4f} - acc: {:.4f}".format(score_train, acc_train)) print() print("logloss score: %.4f" % history_valid[0]) print("Validation set Accuracy: %.4f" % history_valid[1]) add_history(model_file, history_train.history, history_valid, key_list) return model
def get_data(opt, data, indices): if not isinstance(indices, tuple): indices = indices.split('_') indices = (indices[0], int(indices[1]), int(indices[2]), int(indices[3])) x, flist = data.get_sequence_idx(*indices) x = tobatch(x) x = utils.normalize_data(opt, torch.cuda.FloatTensor, x) name = '_'.join(list(map(str, indices))) return x, name, flist
def get_batches_new(self, mode): print('\n', '*' * 14) print("creating data for {} mode".format(mode)) print('*' * 14) in_features = self.data_config['in_features'] out_features = self.data_config['out_features'] data_obj = DATA(freq=self.data_config['freq']) all_data = data_obj.get_df_from_rf('opt_set.mat') # INPUT # copying required data df = all_data[in_features].copy() for out in out_features: df[out] = all_data[out].copy() if self.verbosity > 0: print('shape of whole dataset', df.shape) # assuming that pandas will add the 'datetime' column as last column. This columns will only be used to keep # track of indices of train and test data. df['datetime'] = list( map(int, np.array(df.index.strftime('%Y%m%d%H%M')))) index = all_data[mode + '_index'] ttk = index.dropna() self.args[mode + '_args']['no_of_samples'] = len(ttk) ttk_idx = list(map(int, np.array(ttk.index.strftime('%Y%m%d%H%M')))) # list df['to_keep'] = 0 df['to_keep'][ttk.index] = ttk_idx dataset = nan_to_num(df.values, len(out_features) + 2, replace_with=0.0) if self.data_config['normalize']: dataset, self.scalers[mode] = normalize_data( dataset, df.columns, 2) self.batches[mode + '_x'],\ self.batches[mode + '_y'], \ self.nn_config[mode + '_no_of_batches'], \ self.batches[mode + '_index'],\ self.batches[mode + '_tk_index'] = generate_sample_based_batches(self.args[mode + '_args'], self.nn_config['batch_size'], dataset, self.intervals[mode + '_intervals'] ) return
def __init__(self, config, train=True): self.config = config self.train = train self.formatdata = FormatData(config) if train: subjects = os.listdir('{0}/{1}/{2}'.format(config.data_root, 'train', config.filename)) else: subjects = os.listdir('{0}/{1}/{2}'.format(config.data_root, 'test', config.filename)) set = [] complete_train = [] for sub in subjects: if train: folderdir = '{0}/{1}/{2}/{3}'.format(config.data_root, 'train', config.filename, sub) else: folderdir = '{0}/{1}/{2}/{3}'.format(config.data_root, 'test', config.filename, sub) for file in os.listdir(folderdir): filedir = '{0}/{1}'.format(folderdir, file) rawdata = np.load(filedir)['poses'][:, :66] rawdata = self.frame_filter(rawdata) # 去除帧太少的序列 if rawdata.shape[0] > 150: set.append(rawdata) if len(complete_train) == 0: complete_train = copy.deepcopy( set[-1]) #每个subjects取最后一个动作序列计算均值方差 else: complete_train = np.append(complete_train, set[-1], axis=0) if train: print('video num for training:', len(set)) else: print('video num for test:', len(set)) if not train and config.data_mean is None: print('Load train dataset first!') if train: data_mean, data_std, dim_to_ignore, dim_to_use = utils.normalization_stats( complete_train) config.data_mean = data_mean config.data_std = data_std config.dim_to_ignore = dim_to_ignore config.dim_to_use = dim_to_use set = utils.normalize_data(set, config.data_mean, config.data_std, config.dim_to_use) # [S_num, frame_for_S, 60] self.data = set
def test_out_digits(model, data, labels): #print("===== TESTING THE CURRENT DIGITS =====") rescaled = list(map(dutils.unpad_img, data)) rescaled = list( map( lambda img: dutils.center_box_image(dutils.resize_image(img, 20), 20, 4), rescaled)) testing_data = np.array(rescaled) testing_data = utils.normalize_data(testing_data) testing_data_size = testing_data.shape[0] return ann.test_model(model, testing_data.reshape(testing_data_size, 28, 28, 1), labels)
def main(): # loading the datasets. train_x = np.loadtxt("train_x") train_y = np.loadtxt("train_y") test_x = np.loadtxt("test_x") train_x, train_y = utils.shuffle(train_x, train_y) # data normalization utils.normalize_data(train_x, 'min_max') # Create Neaural network net = nn.NeuralNetwork(train_x.shape) # Training the model net.train(train_x, train_y) # Testing result = net.test(test_x) # Extract test result to file. file = open('test_y', 'w+') for y in result: file.write("{}\n".format(y))
def tcf_cut(orig_datapoints, boundary_width=0.1, n=2): """ input: datapoints, table, type of method output: two list of data in each cluster """ datapoints = deepcopy(orig_datapoints) datapoints = utl.centralize_data(datapoints) datapoints = utl.normalize_data(datapoints) coeff, oa, ob = _tcf(datapoints) c_left = [] c_right = [] r_bp = [] l_bp = [] r_nbp = [] l_nbp = [] for orig_point, copy_point in zip(orig_datapoints, datapoints): # calc distance from point to boundary unit_len = sum(coeff[:-1] ** 2) ** 0.5 p2b_dist = (sum(copy_point * coeff[:-1]) + coeff[-1]) / unit_len if abs(p2b_dist) <= boundary_width * n: if p2b_dist >= 0: r_nbp.append(orig_point) else: l_nbp.append(orig_point) if abs(p2b_dist) <= boundary_width: if p2b_dist >= 0: r_bp.append(orig_point) else: l_bp.append(orig_point) if p2b_dist >= 0: c_right.append(orig_point) else: c_left.append(orig_point) c_left = np.array(c_left, np.float) c_right = np.array(c_right, np.float) r_bp = np.array(r_bp, np.float) l_bp = np.array(l_bp, np.float) r_nbp = np.array(r_nbp, np.float) l_nbp = np.array(l_nbp, np.float) # left, right, in boundary point, coeff return c_left, c_right, (r_bp, l_bp), (r_nbp, l_nbp), coeff
def tcf_cut(orig_datapoints, boundary_width=0.1, n=2): """ input: datapoints, table, type of method output: two list of data in each cluster """ datapoints = deepcopy(orig_datapoints) datapoints = utl.centralize_data(datapoints) datapoints = utl.normalize_data(datapoints) coeff, oa, ob = _tcf(datapoints) c_left = [] c_right = [] r_bp = [] l_bp = [] r_nbp = [] l_nbp = [] for orig_point, copy_point in zip(orig_datapoints, datapoints): # calc distance from point to boundary unit_len = sum(coeff[:-1]**2)**0.5 p2b_dist = (sum(copy_point * coeff[:-1]) + coeff[-1]) / unit_len if abs(p2b_dist) <= boundary_width * n: if p2b_dist >= 0: r_nbp.append(orig_point) else: l_nbp.append(orig_point) if abs(p2b_dist) <= boundary_width: if p2b_dist >= 0: r_bp.append(orig_point) else: l_bp.append(orig_point) if p2b_dist >= 0: c_right.append(orig_point) else: c_left.append(orig_point) c_left = np.array(c_left, np.float) c_right = np.array(c_right, np.float) r_bp = np.array(r_bp, np.float) l_bp = np.array(l_bp, np.float) r_nbp = np.array(r_nbp, np.float) l_nbp = np.array(l_nbp, np.float) # left, right, in boundary point, coeff return c_left, c_right, (r_bp, l_bp), (r_nbp, l_nbp), coeff
def rwm_cut(orig_datapoints, boundary_width=0.1, n=2): datapoints = deepcopy(orig_datapoints) datapoints = utl.centralize_data(datapoints) datapoints = utl.normalize_data(datapoints) in_boundary = 0 size, dim = datapoints.shape c_left = [] c_right = [] coeff = _rwm(datapoints) r_bp = [] l_bp = [] r_nbp = [] l_nbp = [] for orig_point, copy_point in zip(orig_datapoints, datapoints): # calc distance from point to boundary unit_len = sum(coeff[:-1] ** 2) ** 0.5 p2b_dist = (sum(copy_point * coeff[:-1]) + coeff[-1]) / unit_len if abs(p2b_dist) <= boundary_width * n: if p2b_dist >= 0: r_nbp.append(orig_point) else: l_nbp.append(orig_point) if abs(p2b_dist) <= boundary_width: if p2b_dist >= 0: r_bp.append(orig_point) else: l_bp.append(orig_point) if p2b_dist >= 0: c_right.append(orig_point) else: c_left.append(orig_point) c_left = np.array(c_left, np.float) c_right = np.array(c_right, np.float) r_bp = np.array(r_bp, np.float) l_bp = np.array(l_bp, np.float) r_nbp = np.array(r_nbp, np.float) l_nbp = np.array(l_nbp, np.float) # left, right, in boundary point, coeff return c_left, c_right, (r_bp, l_bp), (r_nbp, l_nbp), coeff
def experiment(network_model, reshape_mode = 'mlp'): reshape_funs = { "conv" : lambda d : d.reshape(-1,28,28,1), "mlp" : lambda d : d.reshape(-1,784) } xtrain,ytrain,xtest,ytest = utils.load_mnist() reshape_fun = reshape_funs[reshape_mode] xtrain,xtest = reshape_fun(xtrain),reshape_fun(xtest) digits_data = utils.load_processed_data('digits_og_and_optimal') digits = digits_data['optimal_lw'] labels = utils.create_one_hot(digits_data['labels'].astype('uint')) ensemble_size = 20 epochs = 50 small_digits = reshape_fun(np.array(list(map(scale_down, digits)))) small_digits = utils.normalize_data(small_digits) trials = 5 for t in range(1,trials+1): gc.collect() l_xtrain = [] l_xval = [] l_ytrain = [] l_yval = [] for _ in range(ensemble_size): t_xtrain,t_ytrain,t_xval,t_yval = utils.create_validation(xtrain,ytrain,(1/6)) l_xtrain.append(t_xtrain) l_xval.append(t_xval) l_ytrain.append(t_ytrain) l_yval.append(t_yval) inputs, outputs, train_model, model_list, merge_model = ann.build_ensemble([network_model], pop_per_type=ensemble_size, merge_type="Average") es = clb.EarlyStopping(monitor='val_loss',patience=2,restore_best_weights=True) train_model.compile(optimizer="adam", loss="categorical_crossentropy", metrics = ['acc']) train_model.fit(x=l_xtrain,y=l_ytrain, verbose=1,batch_size=100, epochs = epochs,validation_data=(l_xval,l_yval),callbacks=[es]) merge_model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=['acc']) results = test_digits(merge_model, digits, labels, ensemble_size, reshape_fun) #entropy = ann.test_model(merge_model, [small_digits]*ensemble_size, labels, metric = 'entropy') #c_error = ann.test_model(merge_model, [small_digits]*ensemble_size, labels, metric = 'c_error') #results['c_error'][0] = c_error #results['entropy'][0] = entropy filename = "saltpepper_norm_trial-%s" % t utils.save_processed_data(results, filename)
def __init__(self, config, train=True): self.config = config self.train = train self.formatdata = FormatData(config) if config.datatype == 'smpl': train_path = config.data_root else: print('CMUDataset only support the smpl datatype') sys.exit(1) if config.filename != 'all': if train: subjects = config.subjects_train else: subjects = config.subjects_test else: print('Only support walking and dance action') sys.exit(1) set = [] complete_train = [] for sub in subjects: folderdir = '{0}/{1}'.format(train_path, sub) for file in os.listdir(folderdir): filedir = '{0}/{1}'.format(folderdir, file) rawdata = np.load(filedir)['poses'][:, :66] rawdata = self.frame_filter(rawdata) if rawdata.shape[0] > 70: set.append(rawdata) if len(complete_train) == 0: complete_train = copy.deepcopy( set[-1]) #每个subjects取最后一个动作序列计算均值方差 else: complete_train = np.append(complete_train, set[-1], axis=0) print('视频个数:', len(set)) if not train and config.data_mean is None: print('Load train dataset first!') if train and config.datatype == 'smpl': data_mean, data_std, dim_to_ignore, dim_to_use = utils.normalization_stats( complete_train) config.data_mean = data_mean config.data_std = data_std config.dim_to_ignore = dim_to_ignore config.dim_to_use = dim_to_use set = utils.normalize_data(set, config.data_mean, config.data_std, config.dim_to_use) # [S_num, frame_for_S, 66] self.data = set
def single_file_reader(savename): f = open(savename) full_x = [] full_y = [] for line in f: (x,y) = line.split(' ') x = map(float, x.split(',')[:-1]) y = map(int, y.split(',')[:-1]) full_x.append(x) full_y.append(y) full_x = utils.normalize_data(full_x) return (full_x, full_y)
def preprocess_and_save(batch_id): images, labels = load_cifar10_batch(batch_id) images = utils.normalize_data(images) labels = utils.one_hot_encode(labels, 10) train_images, train_labels, valid_images, valid_labels, test_images, test_labels =\ utils.split_data(images, labels, train_size=0.8, valid_size=0.1, test_size=0.1) batch = { 'train_images': train_images, 'train_labels': train_labels, 'valid_images': valid_images, 'valid_labels': valid_labels, 'test_images': test_images, 'test_labels': test_labels } batch_path = os.path.join(folder_path, 'preprocess_batch_' + str(batch_id)) np.save(batch_path, np.asarray(batch))
def select_features_stepwise_forward(dataFrame, n_news, original_cols): """ *stepwise selection* de varaibles, utiliza las importancias de un modelo de random forest para clasificar las mejores variables Parámetros: - dataFrame -- *DataFrame* de pandas, datos de todas las variables que van a ser seleccionadas - n_news -- Entero, máximo número de variables que van a ser seleccionadas - original_cols -- Lista, lista con los nombres de las columnas originales del problema para reconocer las variables seleccionadas Retorna: NADA (no retorna nada pero escribe las variables seleccioandas en el archivo 'data_selected.csv' en el directorio data) """ n_features = dataFrame.shape[1] dataFrame.columns = original_cols # params n_news -= 1 features = set(dataFrame.columns) features.remove(list(dataFrame.columns)[0]) missing = features.copy() inside = [list(dataFrame.columns)[0]] from sklearn.ensemble import RandomForestRegressor while (n_news): fts = list(inside) best = '' best_importance = 0 for ft in missing: fts = fts + [ft] scaled, scaler = utils.normalize_data(dataFrame[fts].values) x, y = series_to_supervised(scaled) model = RandomForestRegressor(n_estimators=100) model.fit(x, y) importances = model.feature_importances_ if (importances[-1] > best_importance): best = fts[-1] best_importance = importances[-1] inside.append(best) missing.remove(best) n_news -= 1 df = dataFrame[inside] df.to_csv('data/data_selected.csv')
def cut_by_coeff(orig_datapoints, coeff): datapoints = deepcopy(orig_datapoints) datapoints = utl.centralize_data(datapoints) datapoints = utl.normalize_data(datapoints) c_left = [] c_right = [] unit_len = sum(coeff[:-1] ** 2) ** 0.5 for orig_point, copy_point in zip(orig_datapoints, datapoints): p2b_dist = (sum(copy_point * coeff[:-1]) + coeff[-1]) / unit_len if p2b_dist >= 0: c_right.append(orig_point) else: c_left.append(orig_point) c_left = np.array(c_left, np.float) c_right = np.array(c_right, np.float) return (c_left, c_right)
def cut_by_coeff(orig_datapoints, coeff): datapoints = deepcopy(orig_datapoints) datapoints = utl.centralize_data(datapoints) datapoints = utl.normalize_data(datapoints) c_left = [] c_right = [] unit_len = sum(coeff[:-1]**2)**0.5 for orig_point, copy_point in zip(orig_datapoints, datapoints): p2b_dist = (sum(copy_point * coeff[:-1]) + coeff[-1]) / unit_len if p2b_dist >= 0: c_right.append(orig_point) else: c_left.append(orig_point) c_left = np.array(c_left, np.float) c_right = np.array(c_right, np.float) return (c_left, c_right)
def plot_hilbert_spectra(time, frequency, amplitude, title, plotter=plt, fs=100): # Scale factor (to plot frequency with decimal precision) scale_freq = 10 # Max scaled frequency max_freq = int(0.5*scale_freq*fs) # Creating time axis time_ax = np.linspace(0, len(time)-1, len(time)) # Allocating memory for power and the rounded frequency power_array = np.zeros(np.shape(frequency)) freq_rounded_array = np.zeros(np.shape(power_array), np.int) # Create GRID based on time axis and maximum frequency yi = np.linspace(0, max_freq, max_freq + 1) Z = np.ones((max_freq + 1, len(time_ax)))*-200 X, Y = np.meshgrid(time_ax, yi) # Enter loop if more than one IMF exists if isinstance(frequency[0], np.ndarray): n_inst_frequencies = len(frequency) for i in range(n_inst_frequencies): # Normalize the amplitude ( 0<=a<=1) amplitude[i] = utils.normalize_data(amplitude[i]) # Power equal to amplitude squared power_array[i] = np.multiply(amplitude[i], amplitude[i]) # Round the frequency to the nearest (results in OK resolution if scale_freq > 1, eg scale_freq=10) freq_rounded_array[i] = np.ceil(frequency[i]*scale_freq) # Compute the logarithmic power, and add it to the previous if the same inst. frequency exists. for k in range(len(time_ax)): if power_array[i, k] == 0.0: power_array[i, k] = 0.00000001 current_amplitude = Z[int(freq_rounded_array[i, k]), int(time_ax[k])] if current_amplitude > -200: Z[int(freq_rounded_array[i, k]), int(time_ax[k])] = current_amplitude + 20.0*np.log10(power_array[i, k]) else: Z[int(freq_rounded_array[i, k]), int(time_ax[k])] = 20.0*np.log10(power_array[i, k]) else: # Normalize the amplitude ( 0<=a<=1) amplitude = utils.normalize_data(amplitude) # Power equal to amplitude squared power_array = np.multiply(amplitude, amplitude) # Round the frequency to the nearest (results in OK resolution if scale_freq > 1, eg scale_freq=10) freq_rounded_array = np.ceil(frequency*scale_freq) # Compute the logarithmic power, and add it to the previous if the same inst. frequency exists. for k in range(len(time_ax)): Z[int(freq_rounded_array[k]), int(time_ax[k])] = 20.0*np.log10(power_array[k]) # Create figure and subplot. # Set titles and labels. fig = plotter.figure() suptitle = 'Hilbert Spectra - Channel: ' + title fig.suptitle(suptitle) ax = plotter.subplot(111) ax.set_xlabel('Time [s]') ax.set_ylabel('Frequency [Hz]') # Create contour plot and time, frequency and logarithmic power. Scale frequencies back to original values. n_levels = 200 cax = ax.contourf(X, Y/scale_freq, Z, n_levels) # Assign color bar to the contour plot cb = fig.colorbar(cax) # Set label and draw plot cb.set_label('Amplitude [dB]') plotter.draw()
for cls in tmp_clusters: res_cls.append(cls) print("#cls {} -> {}".format(len(clusters), len(res_cls))) print(calc_num_point(res_cls)) return res_cls if __name__ == '__main__': doctest.testmod() points, label = utl.read_from_text('2d5c_noncycle') points = utl.centralize_data(points) points = utl.normalize_data(points) # points, label = utl.read_from_text('2d5c_cov') # points, label = utl.read_from_text('hand_write_digit_2d') # seleted = datasets.load_digits() # points = seleted.data # label = seleted.target # ms_tree = ms2c(points) # paint_tree(ms_tree, ms_tree) final_nodes = ms_tree.merge() grounded_nodes = ms_tree.grounded_nodes grounded_cls = [x.datapoints for x in grounded_nodes] final_cls = [x.datapoints for x in final_nodes]
# Predicting house prices: a regression example from keras.datasets import boston_housing from utils import normalize_data from keras import models, layers, optimizers, losses, metrics # loading the Boston housing dataset (train_data, train_labels), (test_data, test_labels) = boston_housing.load_data() # dataset info print(train_data.shape) print(test_data.shape) # preparing the data # Normalizing the data train_data = normalize_data(train_data) test_data = normalize_data(test_data) # build your network def build_model(): model = models.Sequential() model.add(layers.Dense(64, activation='relu', input_shape=(train_data.shape[1],))) model.add(layers.Dense(64, activation='relu')) model.add(layers.Dense(1)) model.compile(optimizer=optimizers.Adam(lr=0.001), loss=losses.mse, metrics=[metrics.mae]) return model network = build_model() # train network.fit(train_data, train_labels, epochs=80) loss, acc = network.evaluate(test_data, test_labels) print(loss, acc)
import numpy as np from keras import models from keras import layers from keras import optimizers from keras import losses from keras import metrics from keras.datasets import boston_housing from sklearn.model_selection import train_test_split import matplotlib.pyplot as plt from utils import normalize_data, init_keras init_keras() (x_train, y_train),(x_test, y_test) = boston_housing.load_data() normalize_data(x_train) normalize_data(x_test) k = 4 epochs = 500 mae_histories_4_k_fold = [] for i in range(k): x_train, x_cv, y_train, y_cv = train_test_split( x_train, y_train, test_size=1.0/k) model = models.Sequential() model.add(layers.Dense(64, activation='relu', input_shape=(x_train.shape[1],))) model.add(layers.Dense(64, activation='relu')) model.add(layers.Dense(1)) model.compile(optimizer=optimizers.RMSprop(lr=0.001), loss=losses.mse, metrics=[metrics.mae])
def get_testing_batch(dtype=torch.cuda.FloatTensor): while True: for sequence in test_loader: batch = utils.normalize_data(opt, dtype, sequence) yield batch
def read_data(trainfile, batchsize=100000, platenamefile=None): """ Reads a datafile created by the generate_data_files function. It will reset the file after having read through it. Yields data in batches with a specified batchsize. Parameters: trainfile: The file to read data from. The must have a seek function taking an integer as the parameter. batchsize: How many samples to yield at a time. Defaults to 100000 platenamefile: A file to read platenammes from, which should correspond to the rows in the trainfile. If None, no platenames will be read or yielded. Defaults to None. Returns: Data on the format (features,target) or (features,target,platenames) if platenamefile is set, in batches of size `batchsize`. """ while True: x = trainfile.readline() y = trainfile.readline() if y == '': break if platenamefile: platestring = platenamefile.readline() platestring = platestring.split(' ') platestring[1] = int(platestring[1]) platestring[2] = int(platestring[2]) platenames = [platestring] try: x = [map(float, x.split(' ')[:-1])] y = [map(int, y.split(' ')[:-1])] except: x = [] y = [] if platenamefile: del(platenames[-1]) while len(x) < batchsize: newx = trainfile.readline() newy = trainfile.readline() if newy == '': break if platenamefile: platestring = platenamefile.readline() platestring = platestring.split(' ') platestring[1] = int(platestring[1]) platestring[2] = int(platestring[2]) platenames.append(platestring) #Remove newlines and convert to correct datatypes try: x.append(map(float, newx.split(' ')[:-1])) y.append(map(int, newy.split(' ')[:-1])) except: if platenamefile: del(platenames[-1]) continue x = utils.normalize_data(x) if platenamefile: yield(x,y,platenames) else: yield(x,y) trainfile.seek(0) if platenamefile: platenamefile.seek(0)
def get_testing_batch(): while True: for sequence in test_loader: batch = utils.normalize_data(opt, dtype, sequence) yield batch