def readIris(classes=3, one_hot=False): from sklearn.datasets import load_iris iris = load_iris() x = iris['data'] y = iris['target'] train_x1 = x[0:40, :] train_x2 = x[50:90, :] train_x3 = x[100:140, :] train_y1 = y[0:40].reshape(40, 1) train_y2 = y[50:90].reshape(40, 1) train_y3 = y[100:140].reshape(40, 1) test_x1 = x[40:50, :] test_x2 = x[90:100, :] test_x3 = x[140:150, :] test_y1 = y[40:50].reshape(10, 1) test_y2 = y[90:100].reshape(10, 1) test_y3 = y[140:150].reshape(10, 1) train_x = np.vstack((train_x1, train_x2)) train_y = np.vstack((train_y1, train_y2)) test_x = np.vstack((test_x1, test_x2)) test_y = np.vstack((test_y1, test_y2)) if (classes == 3): train_x = np.vstack((train_x, train_x3)) train_y = np.vstack((train_y, train_y3)) test_x = np.vstack((test_x, test_x3)) test_y = np.vstack((test_y, test_y3)) if one_hot == True: train_y = utils.convert_to_onehot(train_y, classes) test_y = utils.convert_to_onehot(test_y, classes) return {'x': train_x, 'y': train_y.reshape([-1])}, {'x': test_x, 'y': test_y.reshape([-1])}
def readMnist(path, one_hot=False): import gzip, os, sys, time def extract_data(filename, num_images): IMAGE_SIZE = 28 print('Extracting', filename) with gzip.open(filename) as bytestream: bytestream.read(16) buf = bytestream.read(IMAGE_SIZE * IMAGE_SIZE * num_images) data = np.frombuffer(buf, dtype=np.uint8).astype(np.float32) # data = (data - (PIXEL_DEPTH / 2.0)) / PIXEL_DEPTH data = data.reshape(num_images, IMAGE_SIZE, IMAGE_SIZE, 1) return data def extract_labels(filename, num_images): """Extract the labels into a vector of int64 label IDs.""" print('Extracting', filename) with gzip.open(filename) as bytestream: bytestream.read(8) buf = bytestream.read(1 * num_images) labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int64) return labels.reshape(num_images, 1) train_x = extract_data(path + 'train-images-idx3-ubyte.gz', 60000) train_y = extract_labels(path + 'train-labels-idx1-ubyte.gz', 60000) test_x = extract_data(path + 't10k-images-idx3-ubyte.gz', 10000) test_y = extract_labels(path + 't10k-labels-idx1-ubyte.gz', 10000) if one_hot == True: train_y = utils.convert_to_onehot(train_y, 10) test_y = utils.convert_to_onehot(test_y, 10) return {'x': train_x.reshape([-1, 28, 28, 1]), 'y': train_y.reshape([-1])}, {'x': test_x.reshape([-1, 28, 28, 1]), 'y': test_y.reshape([-1])}
def readCifar10(path, one_hot=False): text_labels = utils.load_model(path + 'batches.meta')['label_names'] for i in range(5): if os.name == "nt": data = utils.load_model(path + 'data_batch_' + str(i + 1)) else: data = utils.load_encoding_model(path + 'data_batch_' + str(i + 1), encode='bytes') if i == 0: train_x = data['data' if os.name == "nt" else b'data'].reshape((-1, 3, 32, 32)).swapaxes(1, 3).swapaxes(1, 2).reshape( -1, 32 * 32 * 3) train_y = np.array(data['labels' if os.name == "nt" else b'labels']).reshape(10000, 1) continue train_x = np.vstack((train_x, data['data' if os.name == "nt" else b'data'].reshape((-1, 3, 32, 32)).swapaxes(1, 3).swapaxes( 1, 2).reshape(-1, 32 * 32 * 3))) train_y = np.vstack((train_y, np.array(data['labels' if os.name == "nt" else b'labels']).reshape(10000, 1))) data = utils.load_encoding_model(path + 'test_batch', encode='bytes') test_x = data['data' if os.name == "nt" else b'data'].reshape((-1, 3, 32, 32)).swapaxes(1, 3).swapaxes(1, 2).reshape( -1, 32 * 32 * 3) test_y = np.array(data['labels' if os.name == "nt" else b'labels']).reshape(10000, 1) if one_hot == True: train_y = utils.convert_to_onehot(train_y, 10) test_y = utils.convert_to_onehot(test_y, 10) else: test_y = np.reshape(test_y,(-1)) train_y = np.reshape(train_y, (-1)) train_x = np.reshape(train_x, (-1,32,32,3)) test_x = np.reshape(test_x, (-1,32,32,3)) return {'x': train_x, 'y': train_y}, {'x': test_x, 'y': test_y}, {'text_labels': text_labels}
def readCifar100(path="/media/batman/ent/datasets/cifar-100-python", oneHot=False, labelType="fine"): textLabels = utils.load_model(os.path.join(path, 'meta'))['fine_label_names'] if os.name == "nt": train = utils.load_model(os.path.join(path, 'train')) test = utils.load_model(os.path.join(path, 'test')) else: train = utils.load_encoding_model(os.path.join(path, 'train'), encode='bytes') test = utils.load_encoding_model(os.path.join(path, 'test'), encode='bytes') trainFilenames = [] trainDict = list(train.keys()) testDict = list(test.keys()) for f in train[trainDict[0]]: f = str(f) trainFilenames.append((f.split('_s_')[0]).replace("_", " ")) testFilenames = [] for f in test[testDict[0]]: f = str(f) testFilenames.append((f.split('_s_')[0]).replace("_", " ")) if labelType == "fine": trainY = np.array(train[trainDict[2]]) testY = np.array(test[testDict[2]]) else: trainY = np.array(train[trainDict[3]]) testY = np.array(test[testDict[3]]) trainX = np.array(train[trainDict[4]]).reshape((-1, 32, 32, 3)) testX = np.array(test[testDict[4]]).reshape((-1, 32, 32, 3)) if oneHot == True: trainY = utils.convert_to_onehot(trainY, 10 if labelType == "fine" else 100) testX = utils.convert_to_onehot(testY, 10 if labelType == "fine" else 100) return {'x': trainX, 'y': trainY, "filenames": trainFilenames}, {'x': testX, 'y': testY, "filenames": testFilenames}, textLabels
def read_cifar100(path,one_hot=True): import os if os.name == 'nt': text_labels=utils.load_model(path+'meta')['fine_label_names'] for i in range(5): data=utils.load_model(path + 'data_batch_'+str(i+1)) if i==0: train_x=data['data'] train_y=np.array(data['labels']).reshape(10000,1) continue train_x= np.vstack((train_x,data['data'])) train_y = np.vstack((train_y,np.array(data['labels']).reshape(10000,1))) data=utils.load_model(path +'test_batch') test_x=data['data'] test_y=np.array(data['labels']).reshape(10000,1) if one_hot == True: train_y=utils.convert_to_onehot(train_y,10) test_y = utils.convert_to_onehot(test_y, 10) return {'train_x': train_x, 'train_y': train_y, 'test_x': test_x, 'test_y': test_y, 'text_labels': text_labels}
def update(self): batch_states, batch_actions, batch_rewards, batch_new_states, batch_dones = self.replay_memory.sample_mini_batch( batch_size=self.batch_size) batch_states = batch_states.to(self.device) batch_actions = batch_actions.to(self.device) batch_rewards = batch_rewards.to(self.device) batch_new_states = batch_new_states.to(self.device) batch_dones = batch_dones.to(self.device) critic_loss_per_agent = [] actor_loss_per_agent = [] for idx in range(len(self.actors)): actor = self.actors[idx] critic = self.critics[idx] old_actor = self.old_actors[idx] old_critic = self.old_critics[idx] actor_optimizer = self.actor_optimizers[idx] critic_optimizer = self.critic_optimizers[idx] # update critic predict_Q = critic(state=batch_states, actions=batch_actions).squeeze(-1) old_actor_actions = old_actor(batch_new_states) target_actions = batch_actions.clone().detach() target_actions[:, idx, :] = old_actor_actions target_actions = convert_to_onehot(target_actions, epsilon=self.epsilon) target_Q = self.gamma * old_critic( state=batch_new_states, actions=target_actions).squeeze(-1) * ( 1 - batch_dones) + batch_rewards c_loss = self.critic_loss(input=predict_Q, target=target_Q.detach()) c_loss.backward() torch.nn.utils.clip_grad_norm(critic.parameters(), 0.5) critic_optimizer.step() critic_optimizer.zero_grad() critic_loss_per_agent.append(c_loss.item()) # update actor actor_actions = actor(batch_states) actor_actions = gumbel_softmax(actor_actions, hard=True) predict_actions = batch_actions.clone().detach() predict_actions[:, idx, :] = actor_actions a_loss = -critic(state=batch_states, actions=predict_actions).squeeze(-1) a_loss = a_loss.mean() torch.nn.utils.clip_grad_norm(actor.parameters(), 0.5) a_loss.backward() actor_optimizer.step() actor_optimizer.zero_grad() actor_loss_per_agent.append(a_loss.item()) return sum(actor_loss_per_agent) / len(actor_loss_per_agent), sum( critic_loss_per_agent) / len(critic_loss_per_agent)
def encode_text(text): alphabet = [ "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "-", ",", ";", ".", "!", "?", ":", "'", "\"", "\\", "/", "|", "_", "@", "#", "$", "%", "^", "&", "*", "~", "`", "+", "-", "=", "<", ">", "(", ")", "[", "]", "{", "}", "\n", " " ] text_idx = convert_one_sentence(text, alphabet, target_len=201) text_onehot = convert_to_onehot(data=text_idx - 1, n_class=len(alphabet)) return text_onehot
def _load_with_correct_shape(self, data_type): raw_images, raw_labels = load_mnist(type(self).__name__, data_type) raw_images = raw_images / 255.0 if self.shrink == True: new_images = [] for image in raw_images: new_images.append( skimage.measure.block_reduce(image, (2, 2), np.max)) raw_images = np.array(new_images) current_shape = raw_images.shape chain_length = current_shape[1] * current_shape[2] ones = np.ones(chain_length) raw_data = raw_images.reshape(current_shape[0], chain_length) _spinner = spinner(jump=300) data = [] for element in raw_data: new_element = np.column_stack((ones, element)) data.append(new_element) data = np.array(data) print(data.shape) labels = convert_to_onehot(raw_labels) return (data, labels)
m_test = len(test_images) num_px = train_images.shape[1] print("Number of training examples: m_train = " + str(m_train)) print("Number of testing examples: m_test = " + str(m_test)) print("Height/Width of each image: num_px = " + str(num_px)) print("Each image is of size: (" + str(num_px) + ", " + str(num_px) + ")") print("train_images shape: " + str(train_images.shape)) print("train_labels shape: " + str(train_labels.shape)) print("test_images shape: " + str(test_images.shape)) print("test_labels shape: " + str(test_labels.shape)) train_images_flatten = train_images.reshape(train_images.shape[0], -1).T test_images_flatten = test_images.reshape(test_images.shape[0], -1).T train_labels_onehot = convert_to_onehot(train_labels, 10) test_labels_onehot = convert_to_onehot(test_labels, 10) print("train_images_flatten shape: " + str(train_images_flatten.shape)) print("train_labels_onehot shape: " + str(train_labels_onehot.shape)) print("test_images_flatten shape: " + str(test_images_flatten.shape)) print("test_labels_onehot shape: " + str(test_labels_onehot.shape)) train_set_x = train_images_flatten / 255.0 test_set_x = test_images_flatten / 255.0 nn_architecture = [ { "layer_size": 784, "activation": "none" }, # input layer
def _load_all_data(self): """ _load_all_data is responsible for reading the .csv files downloaded in the initialisation. The results are saved into _all_data :return: nothing """ _all_datapoints = [] _all_labels = [] counter = 0 factor = 2 / self.data_length new_length = int(self.data_length / 2) ones = np.ones(new_length) _spinner = spinner() for i in range(10): #_spinner.print_spinner(0.0) percentage = int((i / 10) * 100) filename = self._uncompressed_data_path + "Participant_" + str( i + 1) + ".csv" with open(filename, 'r') as f: reader = csv.reader(f) header = next(reader) headings = next(reader) jump_index = 0 index = 0 prev_row_label = None row_label = 0 data = [] for index, row in enumerate(reader): _spinner.print_spinner(percentage) if index >= jump_index: if index != 0 and (index) % self.data_length == 0: data = np.abs(np.fft.rfft(data, axis=0) * factor)[:-1] data = np.column_stack((ones, data)) _all_datapoints.append(np.array(data)) _all_labels.append(row_label.value) data = [] prev_row_label = None data.append( np.array([ np.float32(row[1]), np.float32(row[2]), np.float32(row[3]) ])) row_label = activityLabels[row[-1]] if prev_row_label is not None: if row_label != prev_row_label: modulo = index % self.data_length jump_index = (modulo + 1) * self.data_length prev_row_label = row_label _all_datapoints = np.array(_all_datapoints) _all_labels = convert_to_onehot(np.array(_all_labels)) permutation = np.random.permutation(len(_all_datapoints)) #_all_datapoints = _all_datapoints[permutation] #_all_labels = _all_labels[permutation] #_all_datapoints[:,:,1:] = np.tanh(_all_datapoints[:,:,1:]) _spinner.print_spinner(100.0) print(_all_datapoints.shape) print(_all_labels.shape) print(_all_labels[0]) self._all_data = (_all_datapoints, _all_labels) np.save(self._all_data_path, _all_datapoints) np.save(self._all_labels_path, _all_labels)
X_test_signals_paths = [ DATASET_PATH + TEST + "Inertial Signals/" + signal + "test.txt" for signal in INPUT_SIGNAL_TYPES ] x_train = load_X(X_train_signals_paths) x_test = load_X(X_test_signals_paths) y_train_path = DATASET_PATH + TRAIN + "y_train.txt" y_test_path = DATASET_PATH + TEST + "y_test.txt" y_train = load_y(y_train_path) y_test = load_y(y_test_path) train = {} train['x'] = x_train train['y'] = utils.convert_to_onehot(y_train, 6) val = {} val['x'] = x_test val['y'] = utils.convert_to_onehot(y_test, 6) print(train['x'].shape, train['y'].shape, val['x'].shape, val['y'].shape) import rnn_classifier text_labels = LABELS model = rnn_classifier.model() model.batch_size = 128 model.epochs = 10 model.learning_rate = 0.0001 model.sequence_dimensions = 9 model.sequence_length = 128 model.no_of_cell = 2 model.cell_size = 32
val_x = [] val_y = [] for k in data.keys(): #random.shuffle(data[k]) for i in range(len(data[k])): if i < (len(data[k]) * 0.85): train_x.append(data[k][i]) train_y.append(int(k)) else: val_x.append(data[k][i]) val_y.append(int(k)) train = {} val = {} train['x'] = np.array(train_x).reshape([-1, 128, 128, 3]) train['y'] = utils.convert_to_onehot(np.array(train_y), 17) val['x'] = np.array(val_x).reshape([-1, 128, 128, 3]) val['y'] = utils.convert_to_onehot(np.array(val_y), 17) print(train['x'].shape, train['y'].shape, val['x'].shape, val['y'].shape) def model_fun(x, is_training): x_shape = x.get_shape().as_list()[1:] kernel = {'c1': [5, 5, x_shape[2], 64], 'c2': [5, 5, 20, 50]} strides = {'1': [1, 1, 1, 1], '2': [1, 2, 2, 1]} pool_win_size = {'2': [1, 2, 2, 1]} conv = ops.conv2d(x, 'conv1', kernel['c1'], strides['1'], 'SAME') conv = ops.max_pool(conv, [1, 3, 3, 1], [1, 1, 1, 1])
test_df=pd.read_csv(data_test_file) text_labels=['T-shirt/top' 'Trouser' 'Pullover' 'Dress' 'Coat' 'Sandal' 'Shirt' 'Sneaker' 'Bag' 'Ankle boot'] train={} val={} train['y']=utils.convert_to_onehot(np.array(train_df['label']),10) del train_df['label'] train['x'] = np.reshape(np.array(train_df), [-1,28,28]) val['y']=utils.convert_to_onehot(np.array(test_df['label']),10) del test_df['label'] val['x'] = np.reshape(np.array(test_df), [-1, 28,28]) print train['x'].shape,train['y'].shape,val['x'].shape,val['y'].shape model=rnn_classifier.model() model.batch_size=128 model.epochs=10 model.learning_rate=0.0001 model.sequence_dimensions=28 model.sequence_length=28
def _load_all_data(self): _all_datapoints = [] _all_labels = [] counter = 0 new_length = int(self.data_length / 2) result_length = self.result_length ones = np.ones(result_length) _spinner = spinner(200) counter = np.array([0, 0, 0, 0]) csv_filename = self._uncompressed_data_path + "REFERENCE.csv" with open(csv_filename, 'r') as f: reader = csv.reader(f) for index, row in enumerate(reader): current_data = [] current_loc = self.data_length percentage = int(100 * index / 8528.0) _spinner.print_spinner(percentage) label = cardioLabels.noisy if row[1] != "~": label = cardioLabels[row[1]] sorted_indices = counter.argsort() if label.value != sorted_indices[-1]: record = self._uncompressed_data_path + row[0] + ".mat" mat_data = scipy.io.loadmat(record) samples = mat_data["val"] samples = samples.flatten() len_left = len(samples) data = samples[:self.data_length] data = np.abs(np.fft.rfft(data))[5:result_length + 5] factor = 1 / np.amax(data) data = data * factor data = np.column_stack((ones, data)) _all_labels.append(label.value) _all_datapoints.append(data) current_data.append(data) counter[label.value] = counter[label.value] + 1 len_left -= self.data_length while (len_left > self.data_length and (label.value == 0 or label.value == sorted_indices[0]) and label.value != 3): data = samples[current_loc:current_loc + self.data_length] data = np.abs(np.fft.rfft(data))[5:result_length + 5] factor = 1 / np.amax(data) data = data * factor data = np.column_stack((ones, data)) label = cardioLabels.noisy if row[1] != "~": label = cardioLabels[row[1]] _all_labels.append(label.value) _all_datapoints.append(data) current_data.append(data) counter[label.value] = counter[label.value] + 1 current_loc += self.data_length + 1 len_left -= self.data_length if counter[label.value] < counter[ sorted_indices[2]] or label.value == 0: for data in current_data: _all_datapoints.append(data) _all_labels.append(label.value) _all_datapoints.append(data) _all_labels.append(label.value) counter[label.value] = counter[label.value] + 1 counter[label.value] = counter[label.value] + 1 _all_datapoints = np.array(_all_datapoints) _all_labels = convert_to_onehot(np.array(_all_labels)) _spinner.print_spinner(100.0) print(_all_datapoints.shape) print(_all_labels.shape) print(_all_labels[0]) self._all_data = (_all_datapoints, _all_labels) print("datapoints by class:", counter) np.save(self._all_data_path, _all_datapoints) np.save(self._all_labels_path, _all_labels)