Python data_preprocess Examples, utils.data_preprocess Python Examples

Example #1

0

Show file

def main():
    parser = argparse.ArgumentParser(description='start parser')
    parser.add_argument('-start', action='store', default=0, type=int)
    parser.add_argument('-gpu', action='store', default=0, type=int)
    arg = parser.parse_args()
    print '---------------------------------------------------'
    print 'GPU ID %d ' % arg.gpu
    print 'Continue training after %d iterations' % arg.start
    print '---------------------------------------------------'
    gpu = arg.gpu

    caffe.set_device(arg.gpu)
    caffe.set_mode_gpu()

    # load dataset
    if not (os.path.isfile('mnist_5000_trainset.npy')
            and os.path.isfile('mnist_5000_trainlabels.npy')):
        trainset, trainlabels = load_mnist(dataset="training",
                                           imsize=image_size[1:],
                                           path="../MNIST")
        assert trainset.shape[0] == 60000
        trainmean = np.expand_dims(np.mean(trainset, axis=0), axis=0)
        plt.figure()
        plt.imshow(np.uint8(trainmean[0]))
        plt.axis('off')
        plt.savefig('trainmean.png', cmap='gray')
        plt.close('all')
        # random subset
        ind = np.random.choice(trainset.shape[0], trainset_size, replace=False)
        trainset = trainset[ind]
        trainlabels = trainlabels[ind]
        np.save('mnist_5000_trainset.npy', trainset)
        np.save('mnist_5000_trainlabels.npy', trainlabels)
        np.save('mnist_5000_trainmean.npy', trainmean)
    else:
        trainset = np.load('mnist_5000_trainset.npy')
        trainlabels = np.load('mnist_5000_trainlabels.npy')
        trainmean = np.load('mnist_5000_trainmean.npy')
    assert trainset.shape[
        0] == trainset_size, 'trainset has wrong number of samples: %d vs %d' % (
            trainset.shape[0], trainset_size)

    testset, testlabels = load_mnist(dataset="testing",
                                     imsize=image_size[1:],
                                     path="../MNIST")
    assert testset.shape[
        0] == testset_size, 'testset has wrong number of samples: %d vs %d' % (
            testset.shape[0], testset_size)
    testmean = np.expand_dims(np.mean(testset, axis=0), axis=0)

    trainset = data_preprocess(trainset, trainmean)
    testset = data_preprocess(testset, testmean)

    # train the network
    train(trainset, trainlabels, testset, testlabels, arg.start)

Example #2

0

Show file

def main():
    x_test, y_test = data_loader(clean_data_filename)
    x_test = data_preprocess(x_test)

    bd_model = keras.models.load_model(model_filename)

    clean_label_p = np.argmax(bd_model.predict(x_test), axis=1)
    class_accu = np.mean(np.equal(clean_label_p, y_test)) * 100
    print('Classification accuracy:', class_accu)

Example #3

0

Show file

File: build_vessel_tree.py Project: ziqing-wan99/Coronary-Artery-Tracking-via-3D-CNN-Classification

def infer(start: list):
    """
    :param start: Initial point
    :return: Moving position, the index of maximum confidence direction, Current termination probability
    """
    max_z = re_spacing_img.shape[0]
    max_x = re_spacing_img.shape[1]
    max_y = re_spacing_img.shape[2]

    cut_size = 9
    spacing_x = spacing[0]
    spacing_y = spacing[1]
    spacing_z = spacing[2]

    center_x_pixel = get_spacing_res2(start[0], spacing_x, resize_factor[1])
    center_y_pixel = get_spacing_res2(start[1], spacing_y, resize_factor[2])
    center_z_pixel = get_spacing_res2(start[2], spacing_z, resize_factor[0])

    left_x = center_x_pixel - cut_size
    right_x = center_x_pixel + cut_size
    left_y = center_y_pixel - cut_size
    right_y = center_y_pixel + cut_size
    left_z = center_z_pixel - cut_size
    right_z = center_z_pixel + cut_size

    new_patch = np.zeros(
        (cut_size * 2 + 1, cut_size * 2 + 1, cut_size * 2 + 1))

    if not (left_x < 0 or right_x < 0 or left_y < 0 or right_y < 0
            or left_z < 0 or right_z < 0 or left_x >= max_x or right_x >= max_x
            or left_y >= max_y or right_y >= max_y or left_z >= max_z
            or right_z >= max_z):
        for ind in range(left_z, right_z + 1):
            src_temp = re_spacing_img[ind].copy()
            new_patch[ind - left_z] = src_temp[left_y:right_y + 1,
                                               left_x:right_x + 1]
        input_data = data_preprocess(new_patch)

        inputs = input_data.to(device)
        outputs = infer_model(inputs.float())

        outputs = outputs.view((len(input_data), max_points + 1))
        outputs_1 = outputs[:, :len(outputs[0]) - 1]
        outputs_2 = outputs[:, -1]

        outputs_1 = torch.nn.functional.softmax(outputs_1, 1)
        indexs = np.argsort(outputs_1.cpu().detach().numpy()[0])[::-1]
        curr_prob = prob_terminates(outputs_1,
                                    max_points).cpu().detach().numpy()[0]
        curr_r = outputs_2.cpu().detach().numpy()[0]
        sx, sy, sz = get_shell(max_points, curr_r)
        return [sx, sy, sz], indexs, curr_r, curr_prob
    else:
        return None

Example #4

0

Show file

    def my_generator(Xtrain, Ytrain, length, n_channel, n_classes,
                     random_noise, normalized, batch_size):
        n_sample = Xtrain.shape[0]
        n_length = Xtrain.shape[1]
        ind = list(range(n_sample))
        x = np.empty((batch_size, length, n_channel), dtype=np.float)
        y = np.empty((batch_size, n_classes), dtype=int)

        while True:
            np.random.shuffle(ind)
            for i in range(n_sample // batch_size):
                st = random.choice(np.arange(0, Xtrain.shape[1] - length))
                i_batch = ind[i * batch_size:(i + 1) * batch_size]
                for j, k in enumerate(i_batch):
                    x[j, :] = myutils.data_preprocess(
                        Xtrain[k, st:(st + length), :],
                        random_noise=random_noise,
                        normalized=normalized)
                    y[j, :] = Ytrain[k, :]
                yield x, y

Example #5

0

Show file

def search_seeds_ostias(max_size=(200, 10)):
    '''
    find seeds points arr and ostia points arr
    :param max_size: The first max_size[0] seed points and the first max_size[1] ostia points were selected
    :return:
    '''
    print("search seeds and ostias")
    spacing_x = spacing[0]
    spacing_y = spacing[1]
    spacing_z = spacing[2]

    re_spacing_img, curr_spacing, resize_factor = resample(
        src_array, np.array([spacing_z, spacing_x, spacing_y]),
        np.array([1, 1, 1]))
    re_spacing_img, meam_minc, mean_minr, mean_maxc, mean_maxr = crop_heart(
        re_spacing_img)
    cut_size = 9
    res_seeds = {}
    res_ostia = {}
    count = 0
    random_point_size = 80000
    batch_size = 1000
    new_patch_list = []
    center_coord_list = []
    z, h, w = re_spacing_img.shape
    offset_size = 10
    x_list = np.random.random_integers(meam_minc - offset_size,
                                       mean_maxc + offset_size,
                                       (random_point_size, 1))
    y_list = np.random.random_integers(mean_minr - offset_size,
                                       mean_maxr + offset_size,
                                       (random_point_size, 1))
    z_list = np.random.random_integers(0, z, (random_point_size, 1))

    index = np.concatenate([x_list, y_list, z_list], axis=1)
    index = list(set(tuple(x) for x in index))
    for i in index:
        center_x_pixel = i[0]
        center_y_pixel = i[1]
        center_z_pixel = i[2]
        left_x = center_x_pixel - cut_size
        right_x = center_x_pixel + cut_size
        left_y = center_y_pixel - cut_size
        right_y = center_y_pixel + cut_size
        left_z = center_z_pixel - cut_size
        right_z = center_z_pixel + cut_size
        if left_x >= 0 and right_x < h and left_y >= 0 and right_y < w and left_z >= 0 and right_z < z:
            new_patch = np.zeros(
                (cut_size * 2 + 1, cut_size * 2 + 1, cut_size * 2 + 1))
            for ind in range(left_z, right_z + 1):
                src_temp = re_spacing_img[ind].copy()
                new_patch[ind - left_z] = src_temp[left_y:right_y + 1,
                                                   left_x:right_x + 1]
            count += 1
            input_data = data_preprocess(new_patch)
            new_patch_list.append(input_data)
            center_coord_list.append(
                (center_x_pixel, center_y_pixel, center_z_pixel))
            if count % batch_size == 0:
                input_data = torch.cat(new_patch_list, axis=0)
                inputs = input_data.to(device)
                seeds_outputs = seeds_model(inputs.float())
                seeds_outputs = seeds_outputs.view((len(input_data)))  # view
                seeds_proximity = seeds_outputs.cpu().detach().numpy()
                ostia_outputs = ostia_model(inputs.float())
                ostia_outputs = ostia_outputs.view(len(input_data))
                ostia_proximity = ostia_outputs.cpu().detach().numpy()
                for i in range(batch_size):
                    res_seeds[center_coord_list[i]] = seeds_proximity[i]
                    res_ostia[center_coord_list[i]] = ostia_proximity[i]
                new_patch_list.clear()
                center_coord_list.clear()
                del input_data
                del inputs
                del seeds_outputs
                del ostia_outputs

    positive_count = 0
    for i in res_seeds.values():
        if i > 0:
            positive_count += 1
    res_seeds = sorted(res_seeds.items(),
                       key=lambda item: item[1],
                       reverse=True)
    res_ostia = sorted(res_ostia.items(),
                       key=lambda item: item[1],
                       reverse=True)
    res_seeds = res_seeds[:max_size[0]]
    res_ostia = res_ostia[:max_size[1]]
    return res_seeds, res_ostia

Example #6

0

Show file

File: train.py Project: twcmchang/seq-to-seq

def train(args):
    if args.init_from is not None:
        # check if all necessary files exist
        assert os.path.isfile(
            os.path.join(args.init_from, "config.pkl")
        ), "config.pkl file does not exist in path %s" % args.init_from

        # get ckpt
        ckpt = tf.train.get_checkpoint_state(args.init_from)

        # get vocab
        with open(os.path.join(args.init_from, 'vocab.pkl'), 'rb') as f:
            vocab = cPickle.load(f)
        vocab_inv = {v: k for k, v in vocab.items()}

        # read data
        _, _, train_feat_id, train_caption, test_feat_id, test_caption = data_preprocess(
            args.train_label_json, args.test_label_json)

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f:
            saved_args = cPickle.load(f)
        need_be_same = [
            "dim_image", "dim_hidden", "n_lstm_step", "n_video_step",
            "n_caption_step"
        ]
        for checkme in need_be_same:
            assert vars(saved_args)[checkme] == vars(
                args
            )[checkme], "Command line argument and saved model disagree on '%s' " % checkme

        # complete arguments to fulfill different versions
        if ("schedule_sampling" in vars(saved_args)):
            print("schedule_sampling: %d" %
                  vars(saved_args)["schedule_sampling"])
        else:
            vars(saved_args)["schedule_sampling"] = 0.0

    else:
        with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
            cPickle.dump(args, f)

        vocab, vocab_inv, train_feat_id, train_caption, test_feat_id, test_caption = data_preprocess(
            args.train_label_json, args.test_label_json)

        with open(os.path.join(args.save_dir, 'vocab.pkl'), 'wb') as f:
            cPickle.dump(vocab, f)

    model = Video_Caption_Generator(args, n_vocab=len(vocab), infer=False)

    # add gpu options
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_mem)

    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:

        tf.global_variables_initializer().run()
        print("Initialized")

        saver = tf.train.Saver(tf.global_variables())
        if args.init_from is not None:
            saver.restore(sess, ckpt.model_checkpoint_path)

        loss_fd = open('log/loss.txt', 'w')
        loss_to_draw = []

        for epoch in range(0, args.n_epoch):
            if (model.schedule_sampling > 0.0):
                # [pseudo] prob of schedule sampling linearly increases with epochs
                model.schedule_sampling = np.min(
                    [model.schedule_sampling * (1.0 + epoch / 50), 1.0])

            # shuffle
            index = np.array(range(len(train_feat_id)))
            np.random.shuffle(index)
            epoch_train_feat_id = train_feat_id[index]
            epoch_train_caption = train_caption[index]

            loss_to_draw_epoch = []

            for start, end in zip(
                    range(0, len(epoch_train_feat_id), model.batch_size),
                    range(model.batch_size, len(epoch_train_feat_id),
                          model.batch_size)):
                # for start,end in zip(range(0,2,2),range(2,4,2)):
                start_time = time.time()

                # get one minibatch
                batch_feat_id = epoch_train_feat_id[start:end]
                batch_caption = epoch_train_caption[start:end]

                # get vdieo features
                current_feat, current_feat_mask = get_video_feat(
                    args.train_video_feat_path, batch_feat_id)

                # randomly select one captions for one video and get padding captions with maxlen = 20
                current_caption, current_caption_mask = get_padding_caption(
                    vocab, batch_caption, maxlen=model.n_caption_step + 1)

                # run train_op to optimizer tf_loss
                _, loss_val = sess.run(
                    [model.train_op, model.tf_loss],
                    feed_dict={
                        model.video: current_feat,
                        model.video_mask: current_feat_mask,
                        model.caption: current_caption,
                        model.caption_mask: current_caption_mask
                    })
                loss_to_draw_epoch.append(loss_val)

                print('idx: ', start, " Epoch: ", epoch, " loss: ", loss_val,
                      ' Elapsed time: ', str((time.time() - start_time)))
                loss_fd.write('epoch ' + str(epoch) + ' loss ' +
                              str(loss_val) + '\n')
            if np.mod(epoch, args.save_every) == 0:
                checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=epoch)
                print("Epoch ", epoch,
                      "model saved to {}".format(checkpoint_path))
        loss_fd.close()

Example #7

0

Show file

File: nn_test.py Project: simeonbabatunde/Hands-on-ML-Final-Project

from models import nn
from utils import read_file, data_preprocess, print_accuracy_stats

dataset = read_file("activity_recognition_dataset.csv")
train,test = data_preprocess(dataset, 0.8)
ts_y = test[:, -1:]
x = nn.NeuralNet();
x.train(train)
pred = x.test(test)
print_accuracy_stats(pred, ts_y)

Example #8

0

Show file

File: inference.py Project: hamna-moieez/self-attention-image-recognition

import utils
import model
from model import MyModel
(train_img, train_lab), (test_img, test_lab) = data_loader("CIFAR10")
test_img = utils.data_preprocess(test_img)
test_lab = utils.one_hot_encoder(test_lab)

epochs = 10
resnet50_model = MyModel()
resnet50_model = resnet50_model.ResNet50()


def evaluate(test_im, test_lab):

    test_result = resnet50_model.evaluate(test_im, test_lab, verbose=0)
    return test_result


test_result = evaluate(test_img, test_lab)
print("ResNet50 loss: ", test_result[0])
print("ResNet50 accuracy: ", test_result[1])

Example #9

0

Show file

File: train.py Project: hebowei2000/BMAGCCNs

# hyperparameters.
flags.DEFINE_integer('batch_size', 18, 'Batch Size (default: 64), should be tuned according to data size.')
flags.DEFINE_integer('num_epochs', 1000, 'Number of training epochs (default: 500), early stop.')
flags.DEFINE_integer('folds', 10, 'Number of folds in cross validation (default: 10)')
flags.DEFINE_integer('class_size', 2, 'Classification Size (default: 2), should be tuned according to different datasets.')
flags.DEFINE_integer('seq_len', 18, 'Number of selected nodes (default: 18 MUTAG), should be tuned according to different datasets.')
flags.DEFINE_integer('order_len', 45, 'Number of 3-order length (default: 45 MUTAG), should be tuned according to different datasets.')
flags.DEFINE_float('learning_rate', 1e-3, 'MomentumOptimizer/AdamOptimizer learning rate (default: 0.001)')
flags.DEFINE_float('momentum', 0.9, 'MomentumOptimizer learning rate decay (default: 0.9)')
flags.DEFINE_string('data_fn', 'datasets/mutag_data.npy', 'training & test file name, including data matrix (default: mutag_data.npy)')
flags.DEFINE_string('label_fn', 'datasets/mutag_label.npy', 'training & test file name, including label vector (default: mutag_label.npy)')


if __name__ == "__main__":
    # divide train set and test set.
    data, label = utils.data_preprocess(FLAGS.data_fn, FLAGS.label_fn)
    test_size = int(data.shape[0]/FLAGS.folds)
    train_size = data.shape[0]-test_size

    with tf.Session() as sess:
        build_time = time.time()
        
        net = models.MotifAttGCN(sess, FLAGS.batch_size, FLAGS.class_size, FLAGS.seq_len, FLAGS.order_len)
        # list containing each accuracy calculated from each fold data.
        accs = []
        for fold in range(FLAGS.folds):
            sess.run(tf.global_variables_initializer())
            begin_time = time.time()
            print('--------this fold initialization(build model+init) takes %.3f minutes\n'%((begin_time-build_time)/60))
            # get batch data.
            if fold < FLAGS.folds - 1:

Example #10

0

Show file

import os
# tf.config.experimental_run_functions_eagerly(True)

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# (train_img, train_lab),(test_img, test_lab) = utils.data_loader("CIFAR10")

model = san.san(sa_type=1, layers=(2, 1, 2, 4, 1), kernels=[3, 7, 7, 7, 7])
model.build(input_shape=(config.BATCH_SIZE, config.channels,
                         config.image_height, config.image_width))
model.summary()

train_img, train_lab, test_img, test_lab = utils.read_train_test_data(
    "/Users/hamnamoieez/Desktop/Projects/self-attention-image-recognition/dataset"
)
train_img = utils.data_preprocess(train_img)
train_lab = utils.one_hot_encoder(train_lab)
X_train, X_val, y_train, y_val = utils.validation_data(train_img, train_lab)
train_generator, val_generator = utils.data_augmentation(
    X_train, y_train, X_val, y_val)

# define loss and optimizer
loss_object = tf.keras.losses.CategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy')

valid_loss = tf.keras.metrics.Mean(name='valid_loss')
valid_accuracy = tf.keras.metrics.CategoricalAccuracy(name='valid_accuracy')

# @tf.function

Example #11

0

Show file

File: train.py Project: kdmsit/HAIN

        epochs = 200
        nhid1 = 512
    elif args.dataset == "citeseer":
        learning_rate = 0.03
        epochs = 50
        nhid1 = 512
    elif args.dataset == "pubmed":
        learning_rate = 0.05
        epochs = 200
        nhid1 = 256
    elif args.dataset == "dblp":
        learning_rate = 0.03
        epochs = 100
        nhid1 = 256
    print("Dataset :",args.dataset)
    line_content, contentset, hyper_incidence_matrix, adj_line, train_node, test_node, val_node, labelset, label, classes,splits = data_preprocess(args.dataset)

    features = sp.csr_matrix(line_content, dtype=np.float32)
    features = normalize(features)
    features = torch.FloatTensor(np.array(features.todense()))
    hyper_incidence_tensor = torch.FloatTensor(hyper_incidence_matrix)
    adj_line = sp.csr_matrix(adj_line, dtype=np.float32)
    adj = sparse_mx_to_torch_sparse_tensor(adj_line)
    idx_train = torch.LongTensor(train_node)
    idx_test = torch.LongTensor(test_node)
    idx_val = torch.LongTensor(val_node)
    labels = torch.LongTensor(np.where(labelset)[1])
    model = HAIN(nfeat=contentset.shape[1],
                nhid1=nhid1,
                nclass=len(classes),
                dropout=dropout)

Example #12

0

Show file

File: auto_encoder_utils.py Project: zuquan-song/CSAW-HackML-2020

 def reset_threshold(self, retrained_data_filename):
     retrained_x, retrained_y = data_loader(retrained_data_filename)
     retrained_x = data_preprocess(retrained_x)
     reconstructions = self.auto_encoder.predict(retrained_x)
     train_loss = keras.losses.mae(reconstructions, retrained_x)
     self.threshold = np.mean(train_loss) + np.std(train_loss) + 0.03

Example #13

0

Show file

# PANDAS version
pd_version = pd.__version__.split('.')
if int(pd_version[0]) >= 0 and int(pd_version[1]) >= 24 and int(
        pd_version[2]) >= 0:  # PD version >= 0.24.2
    data = df.to_numpy(dtype=np.int32)
else:
    data = df.values
    data = data.astype(np.int32)

# Compute pip count
pip_o, pip_x = utils.pip_count(data)
# Compute reward
reward = utils.cal_reward(pip_x, pip_o)

# Preprocess
data_processed = utils.data_preprocess(data)

# Preapre for dataloader creation
x = data_processed
y = reward

batch_size = 256

# Shuffle indices
N = len(data)
idx = np.random.permutation(N)
idx_train, idx_val = idx[:1000000], idx[1000000:]

# Split data into train and validation
X = {'train': x[idx_train], 'val': x[idx_val]}
Y = {'train': y[idx_train], 'val': y[idx_val]}