def grab_data(config, examples, labels, is_train=True):
    params = {
        'batch_size': config['batch_size'],
        'num_workers': 4,
        'pin_memory': True,
        'sampler': None
    }
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    tr_transforms, ts_transforms = transforms.Compose([
        transforms.Resize(256),
        transforms.RandomResizedCrop(224, (0.08, 1), (0.5, 4.0 / 3)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize,
    ]), transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        normalize,
    ])
    if is_train:
        params['shuffle'] = True
        params['sampler'] = None
        data_set = data.DataLoader(
            DataSet(config, examples, labels, tr_transforms, is_train),
            **params)
    else:
        params['shuffle'] = False
        data_set = data.DataLoader(
            DataSet(config, examples, labels, ts_transforms, is_train),
            **params)
    return data_set
def extractImageFile(path_img, path_labels):
    # Structure is as follows:
    # [offset] [type]          [value]          [description]
    # 0000     32 bit integer  0x00000803(2051) magic number
    # 0004     32 bit integer  60000            number of images
    # 0008     32 bit integer  28               number of rows
    # 0012     32 bit integer  28               number of columns
    # 0016     unsigned byte   ??               pixel
    # 0017     unsigned byte   ??               pixel
    # xxxx     unsigned byte   ??               pixel
    f = open(path_img, "rb")
    # HEADER
    int.from_bytes(f.read(4), byteorder="big")
    number_of_images = int.from_bytes(f.read(4), byteorder="big")
    row = int.from_bytes(f.read(4), byteorder="big")
    col = int.from_bytes(f.read(4), byteorder="big")
    labels = extractLabelFile(path_labels)
    # DATA
    data = np.zeros(number_of_images * row * col)
    EOF = b''
    byte = f.read(1)
    i = 0
    while byte != EOF:
        data[i] = nomalyzeByte(int.from_bytes(byte, byteorder="big"))
        byte = f.read(1)
        i += 1
    dataset = DataSet(number_of_images, labels, row, col, data)
    f.close()
    return dataset
Exemple #3
0
def load(path: str='dataset/mnist', valid_size: int=5000, mean_subtraction=True, normalization=True):
    images_train_valid = load_images(os.path.join(path, train_images_filename))
    labels_train_valid = load_labels(os.path.join(path, train_labels_filename))

    images_valid = images_train_valid[:valid_size]
    labels_valid = labels_train_valid[:valid_size]

    images_train = images_train_valid[valid_size:]
    labels_train = labels_train_valid[valid_size:]

    images_test = load_images(os.path.join(path, test_images_filename))
    labels_test = load_labels(os.path.join(path, test_labels_filename))

    # mean subtraction
    if mean_subtraction:
        train_mean = np.mean(images_train, axis=0)
        images_train -= train_mean
        images_valid -= train_mean
        images_test -= train_mean

    # normalization
    if normalization:
        train_std = np.std(images_train, axis=0)
        train_std += (train_std == 0).astype(int)
        images_train /= train_std
        images_valid /= train_std
        images_test /= train_std

    return DataSet(
        train=(images_train, labels_train),
        validation=(images_valid, labels_valid),
        test=(images_test, labels_test)
    )
Exemple #4
0
class Test:

    def __init__(self):
        self.ds = DataSet()

    def test(self):
        X, Y = self.ds.load_dataset()
        
        print("dataset size {}".format(X.shape))

        json_file = open(str(model_dir.joinpath('model.json')), 'r')
        loaded_model_json = json_file.read()
        json_file.close()
        
        model = tf.keras.models.model_from_json(loaded_model_json)
        model.load_weights(str(model_dir.joinpath('model.h5')))
        print("Loaded model from disk")

        model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
        
        results = model.evaluate(X, Y)
        print('test loss, test acc:', results)

        for i, x in enumerate(X):
            pred = model.predict(np.array([x]))
            print("{}: {}: max: {}".format (i, pred[0][i], np.max(pred[0])))
Exemple #5
0
class Train:
    
    def __init__(self):
        self.ds = DataSet()

    def train(self, lr=1e-3, epochs = 12):
        X, Y = self.ds.load_dataset()
        
        print("dataset size {}".format(X.shape))

        batch , num_features , num_channel = X.shape
        xception = Xception(num_classes=13, num_features=num_features)

        checkpoint_filepath = str(model_dir.joinpath('model.h5'))
        
        model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
            filepath=checkpoint_filepath,
            monitor='acc',
            mode='max',
            verbose=1,
            save_best_only=True)

        
        model = xception.get_model()
        adam = tf.keras.optimizers.Adam(lr=lr)

        model.compile(optimizer=adam, loss='sparse_categorical_crossentropy',
                       metrics=['accuracy'])    

        history = model.fit(X, Y, epochs=epochs, 
        callbacks=[model_checkpoint_callback],
        verbose=2)

        model_json = model.to_json()
        with open(str(model_dir.joinpath('model.json')), "w") as json_file:
            json_file.write(model_json)
        
        model.save_weights(str(model_dir.joinpath('model.h5')))
        print("Saved model to disk")

        plt.plot(history.history['accuracy'])
        plt.title('Model accuracy')
        plt.ylabel('Accuracy')
        plt.xlabel('Epoch')
        plt.legend(['Train'], loc='upper left')
        plt.savefig(str(model_dir.joinpath('acc.png')))
        plt.close()


        plt.plot(history.history['loss'])
        plt.title('Model loss')
        plt.ylabel('Loss')
        plt.xlabel('Epoch')
        plt.legend(['Train'], loc='upper left')
        plt.savefig(str(model_dir.joinpath('loss.png')))
Exemple #6
0
def load(path: str = 'dataset/cifar10/cifar-10-batches-py',
         valid_size: int = 5000,
         mean_subtraction=True,
         normalization=True):

    images_train_valid = np.zeros(shape=(train_size, channels, image_height,
                                         image_width),
                                  dtype=float)
    labels_train_valid = np.zeros(shape=train_size, dtype=int)

    start = 0
    for i in range(train_files):
        images_batch, labels_batch = load_data(
            os.path.join(path, "data_batch_{}".format(i + 1)))
        size = images_batch.shape[0]
        images_train_valid[start:start + size] = images_batch
        labels_train_valid[start:start + size] = labels_batch
        start = start + size

    images_valid = images_train_valid[:valid_size]
    labels_valid = labels_train_valid[:valid_size]

    images_train = images_train_valid[valid_size:]
    labels_train = labels_train_valid[valid_size:]

    images_test, labels_test = load_data(os.path.join(path, "test_batch"))

    # mean subtraction
    if mean_subtraction:
        train_mean = np.mean(images_train, axis=0)
        images_train -= train_mean
        images_valid -= train_mean
        images_test -= train_mean

    # normalization
    if normalization:
        train_std = np.std(images_train, axis=0)
        train_std += (train_std == 0).astype(int)
        images_train /= train_std
        images_valid /= train_std
        images_test /= train_std

    return DataSet(train=(images_train, labels_train),
                   validation=(images_valid, labels_valid),
                   test=(images_test, labels_test))
Exemple #7
0
class Train:
    def __init__(self):
        self.ds = DataSet()

    def train(self, lr=1e-3, epochs=12):
        X, Y = self.ds.load_dataset()

        batch, num_features, num_channel = X.shape
        xception = Xception(num_classes=4, num_features=num_features)

        mc = tf.keras.callbacks.ModelCheckpoint(str(
            model_dir.joinpath('model.h5')),
                                                monitor='train_acc',
                                                mode='auto',
                                                verbose=1,
                                                save_best_only=True)

        model = xception.get_model()
        adam = tf.keras.optimizers.Adam(lr=lr)

        model.compile(optimizer=adam,
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])
        history = model.fit(X, Y, epochs=epochs, verbose=2)
Exemple #8
0
#pca_dim = np.load(osp.join(args.pca_dir, 'pca_dim.npy'))
#pca_dir = np.load(osp.join(args.pca_dir, 'pca_dir.npy')).item()

pca_dim, pca_dir = PCA_Dim_Compute(os.path.join(args.data_dir, 'features'),
                                   args.pca_ratio)
model = VGG(pca_dir=pca_dir,
            num_classes=args.num_classes,
            dim=pca_dim,
            pca=True)
model.initialize_weights(pretrained=True,
                         weight_path=osp.join(args.pretrained_ra),
                         feat_path=os.path.join(args.data_dir, 'features'))

cudnn.benchmark = True

trainloader = data.DataLoader(DataSet(args.data_dir, mean=IMG_MEAN),
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=args.num_workers,
                              pin_memory=True)

trainloader_iter = enumerate(trainloader)

testloader = data.DataLoader(TestDataSet(args.data_dir, mean=IMG_MEAN),
                             batch_size=args.batch_size,
                             shuffle=False,
                             num_workers=args.num_workers,
                             pin_memory=True)

testloader_iter = enumerate(testloader)
Exemple #9
0
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1' 

import tensorflow as tf
from dataset.dataset import DataSet
from train.train import Train
from test.test import Test
from model.xception import Xception

'''
    This project uses tensorflow 2.1.0
'''

print('tensorflow version {}'.format(tf.__version__))

ds = DataSet()
xception =  Xception()

# xception.plot_model()
train = Train()
train.train(epochs=3)
Exemple #10
0
    def train(self,
              data_set: DataSet,
              method: str,
              num_passes: int = 20,
              batch_size: int = 128,
              verbose: bool = True):

        if verbose:
            print('\ntrain method: {}'.format(method),
                  '\nnum_passes: {}'.format(num_passes),
                  '\nbatch_size: {}\n'.format(batch_size))

        start_total_time = time.time()

        X_train, y_train = data_set.train_set()
        X_valid, y_valid = data_set.validation_set()
        """ initalize layers """
        input_size = X_train[0].shape
        for layer in self.layers:
            input_size = layer.initialize(input_size, self.num_classes, method)
            layer.reset_params()

        step = 0
        for epoch in range(num_passes):
            """ decay learning rate if necessary """
            if self.lr_decay > 0 and epoch > 0 and (
                    epoch % self.lr_decay_interval) == 0:
                self.optimizer.decay_learning_rate(self.lr_decay)
                if verbose:
                    print("Decreased learning rate by {}".format(
                        self.lr_decay))

            for batch in data.mini_batches(X_train, y_train, batch_size):
                X_batch, y_batch = batch
                """ forward pass """
                start_forward_time = time.time()
                for layer in self.layers:
                    X_batch = layer.forward(X_batch, mode='train')
                self.statistics['forward_time'] += time.time(
                ) - start_forward_time
                """ loss """
                loss, delta = self.loss.calculate(X_batch, y_batch)
                """ backward pass """
                gradients = []
                start_backward_time = time.time()
                if method == 'dfa':
                    for layer in self.layers:
                        dW, db = layer.dfa(delta)
                        gradients.append((layer, dW, db))
                elif method == 'bp':
                    dX = delta
                    for layer in reversed(self.layers):
                        dX, dW, db = layer.back_prob(dX)
                        gradients.append((layer, dW, db))
                    gradients.reverse()
                else:
                    raise ValueError(
                        "Invalid train method '{}'".format(method))
                self.statistics['backward_time'] += time.time(
                ) - start_backward_time
                """ regularization (L2) """
                start_regularization_time = time.time()
                if self.regularization > 0:
                    reg_term = 0
                    for layer, dW, db in gradients:
                        if layer.has_weights():
                            dW += self.regularization * layer.W
                            reg_term += np.sum(np.square(layer.W))
                    reg_term *= self.regularization / 2.
                    reg_term /= y_batch.shape[0]
                    loss += reg_term
                self.statistics['regularization_time'] += time.time(
                ) - start_regularization_time
                """ update """
                start_update_time = time.time()
                update = UpdateLayer(self.optimizer)
                self.layers = [update(x) for x in gradients]
                self.statistics['update_time'] += time.time(
                ) - start_update_time
                """ log statistics """
                accuracy = (np.argmax(X_batch, axis=1)
                            == y_batch).sum() / y_batch.shape[0]
                self.statistics['train_loss'].append(loss)
                self.statistics['train_accuracy'].append(accuracy)

                if (step % 10) == 0 and verbose:
                    print("epoch {}, step {}, loss = {:07.5f}, accuracy = {}".
                          format(epoch, step, loss, accuracy))

                step += 1
            """ log statistics """
            valid_loss, valid_accuracy = self.cost(X_valid, y_valid)
            self.statistics['valid_step'].append(step)
            self.statistics['valid_loss'].append(valid_loss)
            self.statistics['valid_accuracy'].append(valid_accuracy)

            if verbose:
                print(
                    "validation after epoch {}: loss = {:07.5f}, accuracy = {}"
                    .format(epoch, valid_loss, valid_accuracy))

        self.statistics['total_time'] = time.time() - start_total_time
        return self.statistics
# Load numpy
import numpy as np

# Set random seed
#np.random.seed(3)
good_annot = ["Live_In", "NONE", "Work_For" ]

def print_annot_file(annot_sent_dic, label):
    line = 'sent' + str(annot_sent_dic['id']) + '\t' + str(annot_sent_dic['ent1']) + '\t' + good_annot[label] + '\t' \
           + str(annot_sent_dic['ent2']) + '\t' + '( ' + str(annot_sent_dic['sent']) + ' )'+'\n'
    return line



if __name__ == "__main__":
    ds = DataSet('dataset')
    # Create a random forest Classifier. By convention, clf means 'Classifier'
    clf = RandomForestClassifier(n_estimators=14, random_state=3, class_weight={0:4, 1:1, 2:5}, max_depth=12)
    #clf = LogisticRegression(random_state=0, class_weight={0:4, 1:1, 2:4})


    # Train the Classifier to take the training features and learn how they relate
    # to the training y (the species)
    X, y = ds.train
    clf.fit(X, y)
    X_dev, y_dev = ds.dev
    annot_data_dev = ds.dev_annot_data
    y_pred = clf.predict(X_dev)
    from sklearn import metrics
    print('f1-WRK-LIV:', metrics.f1_score(y_dev, y_pred, labels=[0, 2], average='weighted'))
    print('f1-NON:', metrics.f1_score(y_dev, y_pred, labels=[1], average='weighted'))
import os
import cv2
import startup
import config
from dataset.dataset import DataSet
from algorithms.analysisframework import AnalysisFramework
from algorithms.distance import *
from utils import *

#Read data associated with Net
label_file = "/media/blcv/drive_2TB/CODE/FV-Benchmark/DataSets/Felix/labels.txt"
main_folder = "/media/blcv/drive_2TB/CODE/FV-Benchmark/"
config_file = main_folder + "Nets/configs_net/casia_ver_conv52_correct_felix.json"
config_file = parse_deep_config(config_file)
data = DataSet("deep", config_file)
labels = data.labels
labels_set = set(labels)

#After calulcating matches outside iPython (using parallel etc.), load score and extract best images
score_file = "/media/blcv/drive_2TB/CODE/FV-Benchmark/FELIX_data/calculate_matching/divide_pairs/scores_all.txt"
with open(score_file, 'r') as f:
    lines_score = [line.strip() for line in f]
lines_score = sorted(
    lines_score,
    key=lambda x: int(x.split('/')[-1].split('.')[0].split("_")[1]),
    reverse=False)

verification_task = np.load(
    "/media/blcv/drive_2TB/CODE/FV-Benchmark/notebooks/verification_felix.npy")

scores_matrix = np.zeros((verification_task.shape[0]))
Exemple #13
0
 def __init__(self):
     self.ds = DataSet()