Ejemplo n.º 1
0
def readIris(classes=3, one_hot=False):
    from sklearn.datasets import load_iris
    iris = load_iris()
    x = iris['data']
    y = iris['target']
    train_x1 = x[0:40, :]
    train_x2 = x[50:90, :]
    train_x3 = x[100:140, :]
    train_y1 = y[0:40].reshape(40, 1)
    train_y2 = y[50:90].reshape(40, 1)
    train_y3 = y[100:140].reshape(40, 1)
    test_x1 = x[40:50, :]
    test_x2 = x[90:100, :]
    test_x3 = x[140:150, :]
    test_y1 = y[40:50].reshape(10, 1)
    test_y2 = y[90:100].reshape(10, 1)
    test_y3 = y[140:150].reshape(10, 1)
    train_x = np.vstack((train_x1, train_x2))
    train_y = np.vstack((train_y1, train_y2))
    test_x = np.vstack((test_x1, test_x2))
    test_y = np.vstack((test_y1, test_y2))
    if (classes == 3):
        train_x = np.vstack((train_x, train_x3))
        train_y = np.vstack((train_y, train_y3))
        test_x = np.vstack((test_x, test_x3))
        test_y = np.vstack((test_y, test_y3))
    if one_hot == True:
        train_y = utils.convert_to_onehot(train_y, classes)
        test_y = utils.convert_to_onehot(test_y, classes)
    return {'x': train_x, 'y': train_y.reshape([-1])}, {'x': test_x, 'y': test_y.reshape([-1])}
Ejemplo n.º 2
0
def readMnist(path, one_hot=False):
    import gzip, os, sys, time
    def extract_data(filename, num_images):
        IMAGE_SIZE = 28
        print('Extracting', filename)
        with gzip.open(filename) as bytestream:
            bytestream.read(16)
            buf = bytestream.read(IMAGE_SIZE * IMAGE_SIZE * num_images)
            data = np.frombuffer(buf, dtype=np.uint8).astype(np.float32)
            # data = (data - (PIXEL_DEPTH / 2.0)) / PIXEL_DEPTH
            data = data.reshape(num_images, IMAGE_SIZE, IMAGE_SIZE, 1)
            return data

    def extract_labels(filename, num_images):
        """Extract the labels into a vector of int64 label IDs."""
        print('Extracting', filename)
        with gzip.open(filename) as bytestream:
            bytestream.read(8)
            buf = bytestream.read(1 * num_images)
            labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int64)
        return labels.reshape(num_images, 1)

    train_x = extract_data(path + 'train-images-idx3-ubyte.gz', 60000)
    train_y = extract_labels(path + 'train-labels-idx1-ubyte.gz', 60000)
    test_x = extract_data(path + 't10k-images-idx3-ubyte.gz', 10000)
    test_y = extract_labels(path + 't10k-labels-idx1-ubyte.gz', 10000)
    if one_hot == True:
        train_y = utils.convert_to_onehot(train_y, 10)
        test_y = utils.convert_to_onehot(test_y, 10)
    return {'x': train_x.reshape([-1, 28, 28, 1]), 'y': train_y.reshape([-1])}, {'x': test_x.reshape([-1, 28, 28, 1]),
                                                                                 'y': test_y.reshape([-1])}
Ejemplo n.º 3
0
def readCifar10(path, one_hot=False):
    text_labels = utils.load_model(path + 'batches.meta')['label_names']
    for i in range(5):
        if os.name == "nt":
            data = utils.load_model(path + 'data_batch_' + str(i + 1))
        else:
            data = utils.load_encoding_model(path + 'data_batch_' + str(i + 1), encode='bytes')
        if i == 0:
            train_x = data['data' if os.name == "nt" else b'data'].reshape((-1, 3, 32, 32)).swapaxes(1, 3).swapaxes(1,
                                                                                                                    2).reshape(
                -1, 32 * 32 * 3)
            train_y = np.array(data['labels' if os.name == "nt" else b'labels']).reshape(10000, 1)
            continue
        train_x = np.vstack((train_x,
                             data['data' if os.name == "nt" else b'data'].reshape((-1, 3, 32, 32)).swapaxes(1,
                                                                                                            3).swapaxes(
                                 1, 2).reshape(-1,
                                               32 * 32 * 3)))
        train_y = np.vstack((train_y, np.array(data['labels' if os.name == "nt" else b'labels']).reshape(10000, 1)))
    data = utils.load_encoding_model(path + 'test_batch', encode='bytes')
    test_x = data['data' if os.name == "nt" else b'data'].reshape((-1, 3, 32, 32)).swapaxes(1, 3).swapaxes(1,
                                                                                                           2).reshape(
        -1, 32 * 32 * 3)
    test_y = np.array(data['labels' if os.name == "nt" else b'labels']).reshape(10000, 1)
    if one_hot == True:
        train_y = utils.convert_to_onehot(train_y, 10)
        test_y = utils.convert_to_onehot(test_y, 10)
    else:
        test_y = np.reshape(test_y,(-1))
        train_y = np.reshape(train_y, (-1))
    train_x = np.reshape(train_x, (-1,32,32,3))
    test_x = np.reshape(test_x, (-1,32,32,3))
    return {'x': train_x, 'y': train_y}, {'x': test_x, 'y': test_y}, {'text_labels': text_labels}
Ejemplo n.º 4
0
def readCifar100(path="/media/batman/ent/datasets/cifar-100-python", oneHot=False, labelType="fine"):
    textLabels = utils.load_model(os.path.join(path, 'meta'))['fine_label_names']

    if os.name == "nt":
        train = utils.load_model(os.path.join(path, 'train'))
        test = utils.load_model(os.path.join(path, 'test'))
    else:
        train = utils.load_encoding_model(os.path.join(path, 'train'), encode='bytes')
        test = utils.load_encoding_model(os.path.join(path, 'test'), encode='bytes')
    trainFilenames = []
    trainDict = list(train.keys())
    testDict = list(test.keys())
    for f in train[trainDict[0]]:
        f = str(f)
        trainFilenames.append((f.split('_s_')[0]).replace("_", " "))
    testFilenames = []
    for f in test[testDict[0]]:
        f = str(f)
        testFilenames.append((f.split('_s_')[0]).replace("_", " "))

    if labelType == "fine":
        trainY = np.array(train[trainDict[2]])
        testY = np.array(test[testDict[2]])
    else:
        trainY = np.array(train[trainDict[3]])
        testY = np.array(test[testDict[3]])

    trainX = np.array(train[trainDict[4]]).reshape((-1, 32, 32, 3))
    testX = np.array(test[testDict[4]]).reshape((-1, 32, 32, 3))

    if oneHot == True:
        trainY = utils.convert_to_onehot(trainY, 10 if labelType == "fine" else 100)
        testX = utils.convert_to_onehot(testY, 10 if labelType == "fine" else 100)
    return {'x': trainX, 'y': trainY, "filenames": trainFilenames}, {'x': testX, 'y': testY,
                                                                     "filenames": testFilenames}, textLabels
Ejemplo n.º 5
0
def read_cifar100(path,one_hot=True):
    import os
    if os.name == 'nt':
        text_labels=utils.load_model(path+'meta')['fine_label_names']
        for i in range(5):
            data=utils.load_model(path + 'data_batch_'+str(i+1))
            if i==0:
                train_x=data['data']
                train_y=np.array(data['labels']).reshape(10000,1)
                continue
            train_x= np.vstack((train_x,data['data']))
            train_y = np.vstack((train_y,np.array(data['labels']).reshape(10000,1)))
        data=utils.load_model(path +'test_batch')
        test_x=data['data']
        test_y=np.array(data['labels']).reshape(10000,1)
        if one_hot == True:
            train_y=utils.convert_to_onehot(train_y,10)
            test_y = utils.convert_to_onehot(test_y, 10)
        return {'train_x': train_x, 'train_y': train_y, 'test_x': test_x, 'test_y': test_y, 'text_labels': text_labels}
Ejemplo n.º 6
0
    def update(self):
        batch_states, batch_actions, batch_rewards, batch_new_states, batch_dones = self.replay_memory.sample_mini_batch(
            batch_size=self.batch_size)
        batch_states = batch_states.to(self.device)
        batch_actions = batch_actions.to(self.device)
        batch_rewards = batch_rewards.to(self.device)
        batch_new_states = batch_new_states.to(self.device)
        batch_dones = batch_dones.to(self.device)
        critic_loss_per_agent = []
        actor_loss_per_agent = []
        for idx in range(len(self.actors)):
            actor = self.actors[idx]
            critic = self.critics[idx]
            old_actor = self.old_actors[idx]
            old_critic = self.old_critics[idx]
            actor_optimizer = self.actor_optimizers[idx]
            critic_optimizer = self.critic_optimizers[idx]

            # update critic
            predict_Q = critic(state=batch_states,
                               actions=batch_actions).squeeze(-1)
            old_actor_actions = old_actor(batch_new_states)

            target_actions = batch_actions.clone().detach()
            target_actions[:, idx, :] = old_actor_actions
            target_actions = convert_to_onehot(target_actions,
                                               epsilon=self.epsilon)
            target_Q = self.gamma * old_critic(
                state=batch_new_states, actions=target_actions).squeeze(-1) * (
                    1 - batch_dones) + batch_rewards
            c_loss = self.critic_loss(input=predict_Q,
                                      target=target_Q.detach())
            c_loss.backward()
            torch.nn.utils.clip_grad_norm(critic.parameters(), 0.5)
            critic_optimizer.step()
            critic_optimizer.zero_grad()
            critic_loss_per_agent.append(c_loss.item())

            # update actor
            actor_actions = actor(batch_states)
            actor_actions = gumbel_softmax(actor_actions, hard=True)
            predict_actions = batch_actions.clone().detach()
            predict_actions[:, idx, :] = actor_actions
            a_loss = -critic(state=batch_states,
                             actions=predict_actions).squeeze(-1)
            a_loss = a_loss.mean()
            torch.nn.utils.clip_grad_norm(actor.parameters(), 0.5)
            a_loss.backward()
            actor_optimizer.step()
            actor_optimizer.zero_grad()
            actor_loss_per_agent.append(a_loss.item())
        return sum(actor_loss_per_agent) / len(actor_loss_per_agent), sum(
            critic_loss_per_agent) / len(critic_loss_per_agent)
 def encode_text(text):
     alphabet = [
         "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
         "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
         "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "-", ",", ";",
         ".", "!", "?", ":", "'", "\"", "\\", "/", "|", "_", "@", "#", "$",
         "%", "^", "&", "*", "~", "`", "+", "-", "=", "<", ">", "(", ")",
         "[", "]", "{", "}", "\n", " "
     ]
     text_idx = convert_one_sentence(text, alphabet, target_len=201)
     text_onehot = convert_to_onehot(data=text_idx - 1,
                                     n_class=len(alphabet))
     return text_onehot
 def _load_with_correct_shape(self, data_type):
     raw_images, raw_labels = load_mnist(type(self).__name__, data_type)
     raw_images = raw_images / 255.0
     if self.shrink == True:
         new_images = []
         for image in raw_images:
             new_images.append(
                 skimage.measure.block_reduce(image, (2, 2), np.max))
         raw_images = np.array(new_images)
     current_shape = raw_images.shape
     chain_length = current_shape[1] * current_shape[2]
     ones = np.ones(chain_length)
     raw_data = raw_images.reshape(current_shape[0], chain_length)
     _spinner = spinner(jump=300)
     data = []
     for element in raw_data:
         new_element = np.column_stack((ones, element))
         data.append(new_element)
     data = np.array(data)
     print(data.shape)
     labels = convert_to_onehot(raw_labels)
     return (data, labels)
Ejemplo n.º 9
0
m_test = len(test_images)
num_px = train_images.shape[1]

print("Number of training examples: m_train = " + str(m_train))
print("Number of testing examples: m_test = " + str(m_test))
print("Height/Width of each image: num_px = " + str(num_px))
print("Each image is of size: (" + str(num_px) + ", " + str(num_px) + ")")
print("train_images shape: " + str(train_images.shape))
print("train_labels shape: " + str(train_labels.shape))
print("test_images shape: " + str(test_images.shape))
print("test_labels shape: " + str(test_labels.shape))

train_images_flatten = train_images.reshape(train_images.shape[0], -1).T
test_images_flatten = test_images.reshape(test_images.shape[0], -1).T

train_labels_onehot = convert_to_onehot(train_labels, 10)
test_labels_onehot = convert_to_onehot(test_labels, 10)

print("train_images_flatten shape: " + str(train_images_flatten.shape))
print("train_labels_onehot shape: " + str(train_labels_onehot.shape))
print("test_images_flatten shape: " + str(test_images_flatten.shape))
print("test_labels_onehot shape: " + str(test_labels_onehot.shape))

train_set_x = train_images_flatten / 255.0
test_set_x = test_images_flatten / 255.0

nn_architecture = [
    {
        "layer_size": 784,
        "activation": "none"
    },  # input layer
Ejemplo n.º 10
0
    def _load_all_data(self):
        """
        _load_all_data is responsible for reading the .csv files downloaded in the initialisation.
        The results are saved into _all_data
        :return: nothing
        """
        _all_datapoints = []
        _all_labels = []
        counter = 0
        factor = 2 / self.data_length
        new_length = int(self.data_length / 2)
        ones = np.ones(new_length)
        _spinner = spinner()
        for i in range(10):

            #_spinner.print_spinner(0.0)
            percentage = int((i / 10) * 100)

            filename = self._uncompressed_data_path + "Participant_" + str(
                i + 1) + ".csv"
            with open(filename, 'r') as f:
                reader = csv.reader(f)
                header = next(reader)
                headings = next(reader)
                jump_index = 0
                index = 0
                prev_row_label = None
                row_label = 0
                data = []
                for index, row in enumerate(reader):

                    _spinner.print_spinner(percentage)

                    if index >= jump_index:
                        if index != 0 and (index) % self.data_length == 0:
                            data = np.abs(np.fft.rfft(data, axis=0) *
                                          factor)[:-1]
                            data = np.column_stack((ones, data))
                            _all_datapoints.append(np.array(data))
                            _all_labels.append(row_label.value)
                            data = []
                            prev_row_label = None
                        data.append(
                            np.array([
                                np.float32(row[1]),
                                np.float32(row[2]),
                                np.float32(row[3])
                            ]))
                        row_label = activityLabels[row[-1]]
                        if prev_row_label is not None:
                            if row_label != prev_row_label:
                                modulo = index % self.data_length
                                jump_index = (modulo + 1) * self.data_length
                        prev_row_label = row_label
        _all_datapoints = np.array(_all_datapoints)
        _all_labels = convert_to_onehot(np.array(_all_labels))
        permutation = np.random.permutation(len(_all_datapoints))
        #_all_datapoints = _all_datapoints[permutation]
        #_all_labels = _all_labels[permutation]
        #_all_datapoints[:,:,1:] = np.tanh(_all_datapoints[:,:,1:])

        _spinner.print_spinner(100.0)

        print(_all_datapoints.shape)
        print(_all_labels.shape)
        print(_all_labels[0])
        self._all_data = (_all_datapoints, _all_labels)
        np.save(self._all_data_path, _all_datapoints)
        np.save(self._all_labels_path, _all_labels)
Ejemplo n.º 11
0
X_test_signals_paths = [
    DATASET_PATH + TEST + "Inertial Signals/" + signal + "test.txt"
    for signal in INPUT_SIGNAL_TYPES
]

x_train = load_X(X_train_signals_paths)
x_test = load_X(X_test_signals_paths)

y_train_path = DATASET_PATH + TRAIN + "y_train.txt"
y_test_path = DATASET_PATH + TEST + "y_test.txt"

y_train = load_y(y_train_path)
y_test = load_y(y_test_path)
train = {}
train['x'] = x_train
train['y'] = utils.convert_to_onehot(y_train, 6)
val = {}
val['x'] = x_test
val['y'] = utils.convert_to_onehot(y_test, 6)
print(train['x'].shape, train['y'].shape, val['x'].shape, val['y'].shape)

import rnn_classifier
text_labels = LABELS
model = rnn_classifier.model()
model.batch_size = 128
model.epochs = 10
model.learning_rate = 0.0001
model.sequence_dimensions = 9
model.sequence_length = 128
model.no_of_cell = 2
model.cell_size = 32
Ejemplo n.º 12
0
val_x = []
val_y = []
for k in data.keys():
    #random.shuffle(data[k])
    for i in range(len(data[k])):
        if i < (len(data[k]) * 0.85):
            train_x.append(data[k][i])
            train_y.append(int(k))
        else:
            val_x.append(data[k][i])
            val_y.append(int(k))

train = {}
val = {}
train['x'] = np.array(train_x).reshape([-1, 128, 128, 3])
train['y'] = utils.convert_to_onehot(np.array(train_y), 17)
val['x'] = np.array(val_x).reshape([-1, 128, 128, 3])
val['y'] = utils.convert_to_onehot(np.array(val_y), 17)

print(train['x'].shape, train['y'].shape, val['x'].shape, val['y'].shape)


def model_fun(x, is_training):
    x_shape = x.get_shape().as_list()[1:]
    kernel = {'c1': [5, 5, x_shape[2], 64], 'c2': [5, 5, 20, 50]}
    strides = {'1': [1, 1, 1, 1], '2': [1, 2, 2, 1]}
    pool_win_size = {'2': [1, 2, 2, 1]}

    conv = ops.conv2d(x, 'conv1', kernel['c1'], strides['1'], 'SAME')

    conv = ops.max_pool(conv, [1, 3, 3, 1], [1, 1, 1, 1])
Ejemplo n.º 13
0
test_df=pd.read_csv(data_test_file)

text_labels=['T-shirt/top'
'Trouser'
'Pullover'
'Dress'
'Coat'
'Sandal'
'Shirt'
'Sneaker'
'Bag'
'Ankle boot']

train={}
val={}
train['y']=utils.convert_to_onehot(np.array(train_df['label']),10)
del train_df['label']
train['x'] = np.reshape(np.array(train_df), [-1,28,28])

val['y']=utils.convert_to_onehot(np.array(test_df['label']),10)
del test_df['label']
val['x'] = np.reshape(np.array(test_df), [-1, 28,28])

print train['x'].shape,train['y'].shape,val['x'].shape,val['y'].shape

model=rnn_classifier.model()
model.batch_size=128
model.epochs=10
model.learning_rate=0.0001
model.sequence_dimensions=28
model.sequence_length=28
Ejemplo n.º 14
0
    def _load_all_data(self):
        _all_datapoints = []
        _all_labels = []
        counter = 0
        new_length = int(self.data_length / 2)
        result_length = self.result_length
        ones = np.ones(result_length)
        _spinner = spinner(200)
        counter = np.array([0, 0, 0, 0])
        csv_filename = self._uncompressed_data_path + "REFERENCE.csv"
        with open(csv_filename, 'r') as f:
            reader = csv.reader(f)
            for index, row in enumerate(reader):
                current_data = []
                current_loc = self.data_length
                percentage = int(100 * index / 8528.0)
                _spinner.print_spinner(percentage)
                label = cardioLabels.noisy
                if row[1] != "~":
                    label = cardioLabels[row[1]]
                sorted_indices = counter.argsort()
                if label.value != sorted_indices[-1]:
                    record = self._uncompressed_data_path + row[0] + ".mat"
                    mat_data = scipy.io.loadmat(record)
                    samples = mat_data["val"]
                    samples = samples.flatten()
                    len_left = len(samples)
                    data = samples[:self.data_length]
                    data = np.abs(np.fft.rfft(data))[5:result_length + 5]
                    factor = 1 / np.amax(data)
                    data = data * factor
                    data = np.column_stack((ones, data))
                    _all_labels.append(label.value)
                    _all_datapoints.append(data)
                    current_data.append(data)
                    counter[label.value] = counter[label.value] + 1
                    len_left -= self.data_length
                    while (len_left > self.data_length
                           and (label.value == 0
                                or label.value == sorted_indices[0])
                           and label.value != 3):
                        data = samples[current_loc:current_loc +
                                       self.data_length]
                        data = np.abs(np.fft.rfft(data))[5:result_length + 5]
                        factor = 1 / np.amax(data)
                        data = data * factor
                        data = np.column_stack((ones, data))
                        label = cardioLabels.noisy
                        if row[1] != "~":
                            label = cardioLabels[row[1]]
                        _all_labels.append(label.value)
                        _all_datapoints.append(data)
                        current_data.append(data)
                        counter[label.value] = counter[label.value] + 1
                        current_loc += self.data_length + 1
                        len_left -= self.data_length
                    if counter[label.value] < counter[
                            sorted_indices[2]] or label.value == 0:
                        for data in current_data:
                            _all_datapoints.append(data)
                            _all_labels.append(label.value)
                            _all_datapoints.append(data)
                            _all_labels.append(label.value)
                            counter[label.value] = counter[label.value] + 1
                            counter[label.value] = counter[label.value] + 1
        _all_datapoints = np.array(_all_datapoints)
        _all_labels = convert_to_onehot(np.array(_all_labels))

        _spinner.print_spinner(100.0)

        print(_all_datapoints.shape)
        print(_all_labels.shape)
        print(_all_labels[0])
        self._all_data = (_all_datapoints, _all_labels)
        print("datapoints by class:", counter)
        np.save(self._all_data_path, _all_datapoints)
        np.save(self._all_labels_path, _all_labels)