def main(): parser = argparse.ArgumentParser(description='start parser') parser.add_argument('-start', action='store', default=0, type=int) parser.add_argument('-gpu', action='store', default=0, type=int) arg = parser.parse_args() print '---------------------------------------------------' print 'GPU ID %d ' % arg.gpu print 'Continue training after %d iterations' % arg.start print '---------------------------------------------------' gpu = arg.gpu caffe.set_device(arg.gpu) caffe.set_mode_gpu() # load dataset if not (os.path.isfile('mnist_5000_trainset.npy') and os.path.isfile('mnist_5000_trainlabels.npy')): trainset, trainlabels = load_mnist(dataset="training", imsize=image_size[1:], path="../MNIST") assert trainset.shape[0] == 60000 trainmean = np.expand_dims(np.mean(trainset, axis=0), axis=0) plt.figure() plt.imshow(np.uint8(trainmean[0])) plt.axis('off') plt.savefig('trainmean.png', cmap='gray') plt.close('all') # random subset ind = np.random.choice(trainset.shape[0], trainset_size, replace=False) trainset = trainset[ind] trainlabels = trainlabels[ind] np.save('mnist_5000_trainset.npy', trainset) np.save('mnist_5000_trainlabels.npy', trainlabels) np.save('mnist_5000_trainmean.npy', trainmean) else: trainset = np.load('mnist_5000_trainset.npy') trainlabels = np.load('mnist_5000_trainlabels.npy') trainmean = np.load('mnist_5000_trainmean.npy') assert trainset.shape[ 0] == trainset_size, 'trainset has wrong number of samples: %d vs %d' % ( trainset.shape[0], trainset_size) testset, testlabels = load_mnist(dataset="testing", imsize=image_size[1:], path="../MNIST") assert testset.shape[ 0] == testset_size, 'testset has wrong number of samples: %d vs %d' % ( testset.shape[0], testset_size) testmean = np.expand_dims(np.mean(testset, axis=0), axis=0) trainset = data_preprocess(trainset, trainmean) testset = data_preprocess(testset, testmean) # train the network train(trainset, trainlabels, testset, testlabels, arg.start)
def main(): x_test, y_test = data_loader(clean_data_filename) x_test = data_preprocess(x_test) bd_model = keras.models.load_model(model_filename) clean_label_p = np.argmax(bd_model.predict(x_test), axis=1) class_accu = np.mean(np.equal(clean_label_p, y_test)) * 100 print('Classification accuracy:', class_accu)
def infer(start: list): """ :param start: Initial point :return: Moving position, the index of maximum confidence direction, Current termination probability """ max_z = re_spacing_img.shape[0] max_x = re_spacing_img.shape[1] max_y = re_spacing_img.shape[2] cut_size = 9 spacing_x = spacing[0] spacing_y = spacing[1] spacing_z = spacing[2] center_x_pixel = get_spacing_res2(start[0], spacing_x, resize_factor[1]) center_y_pixel = get_spacing_res2(start[1], spacing_y, resize_factor[2]) center_z_pixel = get_spacing_res2(start[2], spacing_z, resize_factor[0]) left_x = center_x_pixel - cut_size right_x = center_x_pixel + cut_size left_y = center_y_pixel - cut_size right_y = center_y_pixel + cut_size left_z = center_z_pixel - cut_size right_z = center_z_pixel + cut_size new_patch = np.zeros( (cut_size * 2 + 1, cut_size * 2 + 1, cut_size * 2 + 1)) if not (left_x < 0 or right_x < 0 or left_y < 0 or right_y < 0 or left_z < 0 or right_z < 0 or left_x >= max_x or right_x >= max_x or left_y >= max_y or right_y >= max_y or left_z >= max_z or right_z >= max_z): for ind in range(left_z, right_z + 1): src_temp = re_spacing_img[ind].copy() new_patch[ind - left_z] = src_temp[left_y:right_y + 1, left_x:right_x + 1] input_data = data_preprocess(new_patch) inputs = input_data.to(device) outputs = infer_model(inputs.float()) outputs = outputs.view((len(input_data), max_points + 1)) outputs_1 = outputs[:, :len(outputs[0]) - 1] outputs_2 = outputs[:, -1] outputs_1 = torch.nn.functional.softmax(outputs_1, 1) indexs = np.argsort(outputs_1.cpu().detach().numpy()[0])[::-1] curr_prob = prob_terminates(outputs_1, max_points).cpu().detach().numpy()[0] curr_r = outputs_2.cpu().detach().numpy()[0] sx, sy, sz = get_shell(max_points, curr_r) return [sx, sy, sz], indexs, curr_r, curr_prob else: return None
def my_generator(Xtrain, Ytrain, length, n_channel, n_classes, random_noise, normalized, batch_size): n_sample = Xtrain.shape[0] n_length = Xtrain.shape[1] ind = list(range(n_sample)) x = np.empty((batch_size, length, n_channel), dtype=np.float) y = np.empty((batch_size, n_classes), dtype=int) while True: np.random.shuffle(ind) for i in range(n_sample // batch_size): st = random.choice(np.arange(0, Xtrain.shape[1] - length)) i_batch = ind[i * batch_size:(i + 1) * batch_size] for j, k in enumerate(i_batch): x[j, :] = myutils.data_preprocess( Xtrain[k, st:(st + length), :], random_noise=random_noise, normalized=normalized) y[j, :] = Ytrain[k, :] yield x, y
def search_seeds_ostias(max_size=(200, 10)): ''' find seeds points arr and ostia points arr :param max_size: The first max_size[0] seed points and the first max_size[1] ostia points were selected :return: ''' print("search seeds and ostias") spacing_x = spacing[0] spacing_y = spacing[1] spacing_z = spacing[2] re_spacing_img, curr_spacing, resize_factor = resample( src_array, np.array([spacing_z, spacing_x, spacing_y]), np.array([1, 1, 1])) re_spacing_img, meam_minc, mean_minr, mean_maxc, mean_maxr = crop_heart( re_spacing_img) cut_size = 9 res_seeds = {} res_ostia = {} count = 0 random_point_size = 80000 batch_size = 1000 new_patch_list = [] center_coord_list = [] z, h, w = re_spacing_img.shape offset_size = 10 x_list = np.random.random_integers(meam_minc - offset_size, mean_maxc + offset_size, (random_point_size, 1)) y_list = np.random.random_integers(mean_minr - offset_size, mean_maxr + offset_size, (random_point_size, 1)) z_list = np.random.random_integers(0, z, (random_point_size, 1)) index = np.concatenate([x_list, y_list, z_list], axis=1) index = list(set(tuple(x) for x in index)) for i in index: center_x_pixel = i[0] center_y_pixel = i[1] center_z_pixel = i[2] left_x = center_x_pixel - cut_size right_x = center_x_pixel + cut_size left_y = center_y_pixel - cut_size right_y = center_y_pixel + cut_size left_z = center_z_pixel - cut_size right_z = center_z_pixel + cut_size if left_x >= 0 and right_x < h and left_y >= 0 and right_y < w and left_z >= 0 and right_z < z: new_patch = np.zeros( (cut_size * 2 + 1, cut_size * 2 + 1, cut_size * 2 + 1)) for ind in range(left_z, right_z + 1): src_temp = re_spacing_img[ind].copy() new_patch[ind - left_z] = src_temp[left_y:right_y + 1, left_x:right_x + 1] count += 1 input_data = data_preprocess(new_patch) new_patch_list.append(input_data) center_coord_list.append( (center_x_pixel, center_y_pixel, center_z_pixel)) if count % batch_size == 0: input_data = torch.cat(new_patch_list, axis=0) inputs = input_data.to(device) seeds_outputs = seeds_model(inputs.float()) seeds_outputs = seeds_outputs.view((len(input_data))) # view seeds_proximity = seeds_outputs.cpu().detach().numpy() ostia_outputs = ostia_model(inputs.float()) ostia_outputs = ostia_outputs.view(len(input_data)) ostia_proximity = ostia_outputs.cpu().detach().numpy() for i in range(batch_size): res_seeds[center_coord_list[i]] = seeds_proximity[i] res_ostia[center_coord_list[i]] = ostia_proximity[i] new_patch_list.clear() center_coord_list.clear() del input_data del inputs del seeds_outputs del ostia_outputs positive_count = 0 for i in res_seeds.values(): if i > 0: positive_count += 1 res_seeds = sorted(res_seeds.items(), key=lambda item: item[1], reverse=True) res_ostia = sorted(res_ostia.items(), key=lambda item: item[1], reverse=True) res_seeds = res_seeds[:max_size[0]] res_ostia = res_ostia[:max_size[1]] return res_seeds, res_ostia
def train(args): if args.init_from is not None: # check if all necessary files exist assert os.path.isfile( os.path.join(args.init_from, "config.pkl") ), "config.pkl file does not exist in path %s" % args.init_from # get ckpt ckpt = tf.train.get_checkpoint_state(args.init_from) # get vocab with open(os.path.join(args.init_from, 'vocab.pkl'), 'rb') as f: vocab = cPickle.load(f) vocab_inv = {v: k for k, v in vocab.items()} # read data _, _, train_feat_id, train_caption, test_feat_id, test_caption = data_preprocess( args.train_label_json, args.test_label_json) # open old config and check if models are compatible with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f: saved_args = cPickle.load(f) need_be_same = [ "dim_image", "dim_hidden", "n_lstm_step", "n_video_step", "n_caption_step" ] for checkme in need_be_same: assert vars(saved_args)[checkme] == vars( args )[checkme], "Command line argument and saved model disagree on '%s' " % checkme # complete arguments to fulfill different versions if ("schedule_sampling" in vars(saved_args)): print("schedule_sampling: %d" % vars(saved_args)["schedule_sampling"]) else: vars(saved_args)["schedule_sampling"] = 0.0 else: with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f: cPickle.dump(args, f) vocab, vocab_inv, train_feat_id, train_caption, test_feat_id, test_caption = data_preprocess( args.train_label_json, args.test_label_json) with open(os.path.join(args.save_dir, 'vocab.pkl'), 'wb') as f: cPickle.dump(vocab, f) model = Video_Caption_Generator(args, n_vocab=len(vocab), infer=False) # add gpu options gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_mem) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: tf.global_variables_initializer().run() print("Initialized") saver = tf.train.Saver(tf.global_variables()) if args.init_from is not None: saver.restore(sess, ckpt.model_checkpoint_path) loss_fd = open('log/loss.txt', 'w') loss_to_draw = [] for epoch in range(0, args.n_epoch): if (model.schedule_sampling > 0.0): # [pseudo] prob of schedule sampling linearly increases with epochs model.schedule_sampling = np.min( [model.schedule_sampling * (1.0 + epoch / 50), 1.0]) # shuffle index = np.array(range(len(train_feat_id))) np.random.shuffle(index) epoch_train_feat_id = train_feat_id[index] epoch_train_caption = train_caption[index] loss_to_draw_epoch = [] for start, end in zip( range(0, len(epoch_train_feat_id), model.batch_size), range(model.batch_size, len(epoch_train_feat_id), model.batch_size)): # for start,end in zip(range(0,2,2),range(2,4,2)): start_time = time.time() # get one minibatch batch_feat_id = epoch_train_feat_id[start:end] batch_caption = epoch_train_caption[start:end] # get vdieo features current_feat, current_feat_mask = get_video_feat( args.train_video_feat_path, batch_feat_id) # randomly select one captions for one video and get padding captions with maxlen = 20 current_caption, current_caption_mask = get_padding_caption( vocab, batch_caption, maxlen=model.n_caption_step + 1) # run train_op to optimizer tf_loss _, loss_val = sess.run( [model.train_op, model.tf_loss], feed_dict={ model.video: current_feat, model.video_mask: current_feat_mask, model.caption: current_caption, model.caption_mask: current_caption_mask }) loss_to_draw_epoch.append(loss_val) print('idx: ', start, " Epoch: ", epoch, " loss: ", loss_val, ' Elapsed time: ', str((time.time() - start_time))) loss_fd.write('epoch ' + str(epoch) + ' loss ' + str(loss_val) + '\n') if np.mod(epoch, args.save_every) == 0: checkpoint_path = os.path.join(args.save_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=epoch) print("Epoch ", epoch, "model saved to {}".format(checkpoint_path)) loss_fd.close()
from models import nn from utils import read_file, data_preprocess, print_accuracy_stats dataset = read_file("activity_recognition_dataset.csv") train,test = data_preprocess(dataset, 0.8) ts_y = test[:, -1:] x = nn.NeuralNet(); x.train(train) pred = x.test(test) print_accuracy_stats(pred, ts_y)
import utils import model from model import MyModel (train_img, train_lab), (test_img, test_lab) = data_loader("CIFAR10") test_img = utils.data_preprocess(test_img) test_lab = utils.one_hot_encoder(test_lab) epochs = 10 resnet50_model = MyModel() resnet50_model = resnet50_model.ResNet50() def evaluate(test_im, test_lab): test_result = resnet50_model.evaluate(test_im, test_lab, verbose=0) return test_result test_result = evaluate(test_img, test_lab) print("ResNet50 loss: ", test_result[0]) print("ResNet50 accuracy: ", test_result[1])
# hyperparameters. flags.DEFINE_integer('batch_size', 18, 'Batch Size (default: 64), should be tuned according to data size.') flags.DEFINE_integer('num_epochs', 1000, 'Number of training epochs (default: 500), early stop.') flags.DEFINE_integer('folds', 10, 'Number of folds in cross validation (default: 10)') flags.DEFINE_integer('class_size', 2, 'Classification Size (default: 2), should be tuned according to different datasets.') flags.DEFINE_integer('seq_len', 18, 'Number of selected nodes (default: 18 MUTAG), should be tuned according to different datasets.') flags.DEFINE_integer('order_len', 45, 'Number of 3-order length (default: 45 MUTAG), should be tuned according to different datasets.') flags.DEFINE_float('learning_rate', 1e-3, 'MomentumOptimizer/AdamOptimizer learning rate (default: 0.001)') flags.DEFINE_float('momentum', 0.9, 'MomentumOptimizer learning rate decay (default: 0.9)') flags.DEFINE_string('data_fn', 'datasets/mutag_data.npy', 'training & test file name, including data matrix (default: mutag_data.npy)') flags.DEFINE_string('label_fn', 'datasets/mutag_label.npy', 'training & test file name, including label vector (default: mutag_label.npy)') if __name__ == "__main__": # divide train set and test set. data, label = utils.data_preprocess(FLAGS.data_fn, FLAGS.label_fn) test_size = int(data.shape[0]/FLAGS.folds) train_size = data.shape[0]-test_size with tf.Session() as sess: build_time = time.time() net = models.MotifAttGCN(sess, FLAGS.batch_size, FLAGS.class_size, FLAGS.seq_len, FLAGS.order_len) # list containing each accuracy calculated from each fold data. accs = [] for fold in range(FLAGS.folds): sess.run(tf.global_variables_initializer()) begin_time = time.time() print('--------this fold initialization(build model+init) takes %.3f minutes\n'%((begin_time-build_time)/60)) # get batch data. if fold < FLAGS.folds - 1:
import os # tf.config.experimental_run_functions_eagerly(True) os.environ["CUDA_VISIBLE_DEVICES"] = "0" # (train_img, train_lab),(test_img, test_lab) = utils.data_loader("CIFAR10") model = san.san(sa_type=1, layers=(2, 1, 2, 4, 1), kernels=[3, 7, 7, 7, 7]) model.build(input_shape=(config.BATCH_SIZE, config.channels, config.image_height, config.image_width)) model.summary() train_img, train_lab, test_img, test_lab = utils.read_train_test_data( "/Users/hamnamoieez/Desktop/Projects/self-attention-image-recognition/dataset" ) train_img = utils.data_preprocess(train_img) train_lab = utils.one_hot_encoder(train_lab) X_train, X_val, y_train, y_val = utils.validation_data(train_img, train_lab) train_generator, val_generator = utils.data_augmentation( X_train, y_train, X_val, y_val) # define loss and optimizer loss_object = tf.keras.losses.CategoricalCrossentropy() optimizer = tf.keras.optimizers.Adam() train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy') valid_loss = tf.keras.metrics.Mean(name='valid_loss') valid_accuracy = tf.keras.metrics.CategoricalAccuracy(name='valid_accuracy') # @tf.function
epochs = 200 nhid1 = 512 elif args.dataset == "citeseer": learning_rate = 0.03 epochs = 50 nhid1 = 512 elif args.dataset == "pubmed": learning_rate = 0.05 epochs = 200 nhid1 = 256 elif args.dataset == "dblp": learning_rate = 0.03 epochs = 100 nhid1 = 256 print("Dataset :",args.dataset) line_content, contentset, hyper_incidence_matrix, adj_line, train_node, test_node, val_node, labelset, label, classes,splits = data_preprocess(args.dataset) features = sp.csr_matrix(line_content, dtype=np.float32) features = normalize(features) features = torch.FloatTensor(np.array(features.todense())) hyper_incidence_tensor = torch.FloatTensor(hyper_incidence_matrix) adj_line = sp.csr_matrix(adj_line, dtype=np.float32) adj = sparse_mx_to_torch_sparse_tensor(adj_line) idx_train = torch.LongTensor(train_node) idx_test = torch.LongTensor(test_node) idx_val = torch.LongTensor(val_node) labels = torch.LongTensor(np.where(labelset)[1]) model = HAIN(nfeat=contentset.shape[1], nhid1=nhid1, nclass=len(classes), dropout=dropout)
def reset_threshold(self, retrained_data_filename): retrained_x, retrained_y = data_loader(retrained_data_filename) retrained_x = data_preprocess(retrained_x) reconstructions = self.auto_encoder.predict(retrained_x) train_loss = keras.losses.mae(reconstructions, retrained_x) self.threshold = np.mean(train_loss) + np.std(train_loss) + 0.03
# PANDAS version pd_version = pd.__version__.split('.') if int(pd_version[0]) >= 0 and int(pd_version[1]) >= 24 and int( pd_version[2]) >= 0: # PD version >= 0.24.2 data = df.to_numpy(dtype=np.int32) else: data = df.values data = data.astype(np.int32) # Compute pip count pip_o, pip_x = utils.pip_count(data) # Compute reward reward = utils.cal_reward(pip_x, pip_o) # Preprocess data_processed = utils.data_preprocess(data) # Preapre for dataloader creation x = data_processed y = reward batch_size = 256 # Shuffle indices N = len(data) idx = np.random.permutation(N) idx_train, idx_val = idx[:1000000], idx[1000000:] # Split data into train and validation X = {'train': x[idx_train], 'val': x[idx_val]} Y = {'train': y[idx_train], 'val': y[idx_val]}