def main(): args = parse_args() tf.reset_default_graph() if not tf.gfile.Exists(args.graph_dir): tf.gfile.MakeDirs(args.graph_dir) # set inputs node, here you should use placeholder. x = tf.placeholder(tf.float32, shape=[None, 224, 224, 3], name="input") # create inference graph pred = alexnet(x, args.chip.lower(), class_num=args.num_classes) y = tf.nn.softmax(logits=pred, axis=-1, name="softmax") config = tf.ConfigProto() custom_op = config.graph_options.rewrite_options.custom_optimizers.add() custom_op.name = "NpuOptimizer" config.graph_options.rewrite_options.remapping = RewriterConfig.OFF with tf.Session(config=config) as sess: convert_variables_to_constants(sess, args) # freeze_model_graph(sess, args) print("Done")
def main(): a = alexnet(pretrained=True, to_select=[LAYER_IDX]).to(DEVICE).eval() opt_img = Variable((torch.rand(BATCH_SIZE,3,128,128)).to(DEVICE), requires_grad=True) optimizer = optim.SGD([opt_img], lr=LR, weight_decay=WEIGHT_DECAY) prev_loss = np.Inf opt_images = list() opt_images.append(opt_img[0].data.cpu().numpy().transpose(1,2,0)) losses = list() for i in range(MAX_ITER): print "Iteration {}: Loss {}".format(i, prev_loss) optimizer.zero_grad() # Forward prop activation = a(opt_img)[LAYER_IDX][0, CHAN, NEURON_X, NEURON_Y] # Compute loss loss = loss_func(activation) curr_loss = loss.data.cpu().numpy() # Back propagation loss.backward() optimizer.step() # Save image. Index is 0 since only optimizing for one image right now t = opt_img[0].data.cpu().numpy().transpose(1,2,0) opt_images.append(t) # Check convergence if np.abs(curr_loss - prev_loss) <= EPS: break losses.append(curr_loss) prev_loss = curr_loss return opt_images, losses
def main(): import json import os import time timers = {} STAT_REPEAT = os.environ.get('STAT_REPEAT', '') if STAT_REPEAT == '' or STAT_REPEAT == None: STAT_REPEAT = 50 STAT_REPEAT = int(STAT_REPEAT) config = tf.ConfigProto() # config.gpu_options.allow_growth = True # config.gpu_options.allocator_type = 'BFC' config.gpu_options.per_process_gpu_memory_fraction = float( os.getenv('CK_TF_GPU_MEMORY_PERCENT', 33)) / 100.0 sess = tf.Session(config=config) keras.backend.set_session(sess) """ Call model construction function and run model multiple times. """ model = alexnet() test_x = np.random.rand(224, 224, 3) x = model.predict(np.array([test_x])) dt = time.time() for _ in range(STAT_REPEAT): x = model.predict(np.array([test_x])) print(x) t = (time.time() - dt) / STAT_REPEAT timers['execution_time_classify'] = t timers['execution_time'] = t with open('tmp-ck-timer.json', 'w') as ftimers: json.dump(timers, ftimers, indent=2)
def train(pertrained=False, resume_file=None): if pertrained: from model import alexnet net = alexnet(pretrained=True, num_classes=NUMBER_CLASSES) else: from model import AlexNet net = AlexNet(num_classes=NUMBER_CLASSES) valid_precision = 0 policies = net.parameters() optimizer = optim.SGD(policies, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY) train_log = open( "logs/train_logs_{}.log".format( time.strftime("%Y-%m-%d-%H:%M:%S", time.localtime())), "w") valid_log = open( "logs/valid_logs_{}.log".format( time.strftime("%Y-%m-%d-%H:%M:%S", time.localtime())), "w") train_log.write("{}\t{}\t{}\n".format("epoch", "losses ", "correct")) valid_log.write("{}\t{}\t{}\n".format("epoch", "losses ", "correct")) # 恢复训练 if resume_file: if os.path.isfile(resume_file): print(("=> loading checkpoint '{}'".format(resume_file))) checkpoint = torch.load(resume_file) start_epoch = checkpoint['epoch'] net.load_state_dict(checkpoint['model_state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( resume_file, checkpoint['epoch']))) else: start_epoch = 0 print(("=> no checkpoint found at '{}'".format(resume_file))) # valid_precision = valid(net) for epoch in range(start_epoch, EPOCHES): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() correct = AverageMeter() end = time.time() optimizer = adjust_learning_rate(optimizer, epoch, LR, LR_steps, WEIGHT_DECAY) for i_batch, sample_batched in enumerate(train_dataloader): # measure data loading time data_time.update(time.time() - end) inputs, labels = sample_batched if CUDA_AVALIABLE: outputs = net.forward(inputs.cuda()) labels = labels.long().flatten().cuda() else: outputs = net.forward(inputs) labels = labels.long().flatten() outputs = outputs.reshape([-1, NUMBER_CLASSES]) loss = criterion(outputs, labels) # 更新统计数据 losses.update(loss.item(), inputs.size(0)) _, predicted = torch.max(outputs.data, 1) # 计算准确率 correct.update( (predicted == labels.long()).sum().item() / len(labels), inputs.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i_batch % 10 == 0: print(('Epoch: [{0}][{1}/{2}], lr: {lr:.5f}\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'.format( epoch, i_batch, len(train_dataloader), batch_time=batch_time, data_time=data_time, loss=losses, top1=correct, lr=optimizer.param_groups[-1]['lr']))) train_log.write("{:5d}\t{:.5f}\t{:.5f}\n".format( epoch, losses.avg, correct.avg)) train_log.flush() if epoch % 1 == 0: valid_precision = valid(net, epoch, valid_log) # 保存网络 if (epoch > 0 and epoch % 10 == 0) or epoch == EPOCHES - 1: save_path = os.path.join( "models", "{:d}_{}_{:d}_{:d}_{:.5f}.pt".format(int(time.time()), "alexnet", epoch, BATCHSIZE, valid_precision)) print("[INFO] Save weights to " + save_path) torch.save( { 'epoch': epoch, 'model_state_dict': net.state_dict(), 'optimizer_state_dir': optimizer.state_dict, 'loss': loss }, save_path) train_log.close() valid_log.close()
from tensorflow.python.framework import ops from tensorflow.python.framework import dtypes from scipy.misc import imread from scipy.misc import imresize import tensorflow as tf ################################################################################ #Train x = tf.placeholder(tf.float32, shape=[None, 224, 224, 3]) x_image = tf.reshape(x, [-1, 224, 224, 3]) y_ = tf.placeholder(tf.float32, shape=[None, 3]) keep_prob = tf.placeholder(tf.float32) y_conv = model.alexnet(x_image, keep_prob) train_image_batch, test_image_batch, train_label_batch, test_label_batch = sort_data.Dataset( ) #cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1])) cross_entropy = tf.reduce_mean( -tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1])) #train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) #train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy) train_step = tf.train.MomentumOptimizer( 0.001, 0.9, use_locking=False, name='Momentum', use_nesterov=True).minimize(cross_entropy) correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
transform=data_transforms_val) val_loader = torch.utils.data.DataLoader(val_datasets, batch_size=batch_size, shuffle=False, num_workers=1) dataloaders = {'train': train_loader, 'val': val_loader} dataset_sizes = {'train': len(train_datasets), 'val': len(val_datasets)} class_names_train = train_datasets.classes # Neural network and optimizer # We define neural net in model.py so that it can be reused by the evaluate.py script from model import Net, simple_cnn, alexnet, vgg11, resnet34 model_fit = alexnet() model = model_fit[0] input_size = model_fit[1] if use_cuda: print('Using GPU') model.cuda() else: print('Using CPU') params_to_update = model.parameters() print("Params to learn:") if feature_extract: params_to_update = [] for name, param in model.named_parameters(): if param.requires_grad == True:
# folder containing all images to be tested testdir = '../HemaCam-Data/Segmented_Cells/Cell_Images' ##### MAIN STARTS HERE if __name__ == "__main__": # This is the Graph input # x is input images and y is the label for the images x = tf.placeholder(tf.float32, [None, fine_size, fine_size, c]) y = tf.placeholder(tf.int64, None) keep_dropout = tf.placeholder(tf.float32) train_phase = tf.placeholder(tf.bool) # Construct model logits = alexnet(x, keep_dropout, train_phase) # Define loss and optimizer loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)) train_optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss) # Evaluate model accuracy1 = tf.reduce_mean(tf.cast(tf.nn.in_top_k(logits, y, 1), tf.float32)) # define initialization init = tf.global_variables_initializer() # define saver for checkpoint saver = tf.train.Saver() opt_data_test = {
def main(): # Training settings parser = argparse.ArgumentParser(description='PyTorch MNIST Example') # model cfg # parser.add_argument('--pretrained', action='store_true', default=False, # help='load pretrained model') parser.add_argument('--model-type', type=str, default="", help="type of the model.") parser.add_argument('--model-structure', type=int, default=0, metavar='N', help='model structure to be trained (default: 0)') parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint, (default: None)') parser.add_argument('--e', '--evaluate', dest='evaluate', action='store_true', help='evaluate model on validation set') parser.add_argument('--Quantized', action='store_true', default=False, help='use quantized model') parser.add_argument('--qbit', default='4,8', help='activation/weight qbit') # dataset parser.add_argument('--dataset-root', type=str, default="../datasets", help="load dataset path.") parser.add_argument('--workers', default=0, type=int, metavar='N', help='number of data loading workers (default: 0)') parser.add_argument('--train-batch-size', type=int, default=128, metavar='N', help='input batch size for training (default: 128)') parser.add_argument('--test-batch-size', type=int, default=128, metavar='N', help='input batch size for testing (default: 128)') # train cfg parser.add_argument('--epochs', type=int, default=80, metavar='N', help='number of epochs to train (default: 10)') parser.add_argument('--start-epoch', default=0, type=int, metavar='N', help='manual epoch number (useful to restarts)') parser.add_argument('--lr', type=float, default=0.001, metavar='LR', help='learning rate (default: 0.001)') # optimizer parser.add_argument('--optim', type=str, default="Adam", help="optim type Adam/SGD") parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='SGD momentum (default: 0.9)') parser.add_argument('--wd', default=5e-4, type=float, metavar='W', help='weight decay (default: 5e-4)') # scheduler parser.add_argument('--schedule', type=int, nargs='+', default=[150, 225], help='Decrease learning rate at these epochs.') parser.add_argument('--gamma', type=float, default=0.1, help='LR is multiplied by gamma on schedule.') parser.add_argument('--decreasing-lr', default='16,30,54', help='decreasing strategy') # device init cfg parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') # result output cfg parser.add_argument('--detail', action='store_true', default=False, help='show log in detial') parser.add_argument( '--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--save-model', action='store_true', default=True, help='For Saving the current Model') parser.add_argument('--checkpoint-path', type=str, default="", help="save model path.") args = parser.parse_args() print("+++", args) # Train the network on the training data # Test the network on the test data use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") print(device) # net = alexnet(args.pretrained, args.resume, num_classes=10, structure=args.model_structure) # create model # config qbit_list = list(map(int, args.qbit.split(','))) quantize_cfg = { 'input_qbit': qbit_list[1], 'weight_qbit': qbit_list[0], 'activation_qbit': qbit_list[1] } if args.model_type == 'VGG16': net = model.VGG16() elif args.model_type == 'cifar10' or args.model_type == 'VGG8': net = model.cifar10(n_channel=128, quantized=args.Quantized, **quantize_cfg) elif args.model_type == 'alexnet': net = model.alexnet(num_classes=10, structure=args.model_structure) elif args.model_type == 'resnet18': net = model.resnet18() else: net = model.cifar10(n_channel=128, quantized=args.Quantized, **quantize_cfg) if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") net = nn.DataParallel(net) net.to(device) # for param in net.parameters(): # param = nn.init.normal_(param) # config milestones = list(map(int, args.decreasing_lr.split(','))) print(milestones) # optimizer = optim.SGD(net.parameters(), lr=lr, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY) # not good enough 68% # optimizer = optim.Adam(net.parameters(), lr=args.lr, weight_decay=args.wd) if args.optim == "Adam": optimizer = optim.Adam(net.parameters(), lr=args.lr, weight_decay=args.wd) elif args.optim == "SGD": optimizer = optim.SGD(net.parameters(), lr=args.lr, weight_decay=args.wd) else: optimizer = optim.Adam(net.parameters(), lr=args.lr, weight_decay=args.wd) scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=args.gamma) # optionlly resume from a checkpoint if args.resume: print("=> using pre-trained model '{}'".format(args.model_type)) else: print("=> creating model '{}'".format(args.model_type)) global best_prec1 if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] net.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) # Data loading kwargs = { 'num_workers': args.workers, 'pin_memory': True } if use_cuda else {} trainloader, valloader, testloader = getcifar(args, 'pad', 4, True, 32, True, **kwargs) print(len(trainloader), len(valloader), len(testloader)) t_s = time.monotonic() if not args.evaluate: print("!!train!!") run(args, net, device, trainloader, valloader, scheduler, optimizer) print("!!test!!") test(args, net, device, testloader) t_e = time.monotonic() m, s = divmod(t_e - t_s, 60) h, m = divmod(m, 60) print("%d:%02d:%02d" % (h, m, s)) PATH = args.checkpoint_path + '_' + args.model_type + '_' + str( args.model_structure) + '_final.pth' torch.save( { 'epoch': args.epochs + 1, 'arch': args.model_type, 'state_dict': net.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict() }, PATH) print('Finished Training')
import numpy as np import torchvision from PIL import Image import matplotlib.pyplot as plt from torch.nn import functional as F from runner import LoadWeight, PreHeating, Logger from model import alexnet, CifarConfig class_name = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] # 加载配置信息 config = CifarConfig() # config.preheating = True # 加载网络 LoadWeight = LoadWeight(net = alexnet(), weight_name = "cifar10_alexnet_5") net = LoadWeight.net # 模型预热 if config.preheating: PreHeating(net) # 将打印信息保存至txt path = os.path.abspath(os.path.dirname(__file__)) # type = sys.getfilesystemencoding() sys.stdout = Logger('inference.txt') print(time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time()))) print('------------------') print('\n'.join(['%s:%s' % item for item in config.__dict__.items()])) print('------------------')
print( 'Dataset split into {:d} training and {:d} validation samples'.format( len(ds_train), len(ds_validation))) with tf.variable_scope('model', reuse=tf.AUTO_REUSE): # Note: We need to use placeholders for inputs and outputs. Otherwise, # the batch size would be fixed and we could not use the trained model with # a different batch size. In addition, the names of these tensors must be "inputs" # and "labels" such that we can find them on the evaluation server. DO NOT CHANGE THIS! x = tf.placeholder(tf.float32, [None] + [224, 224] + [1], 'inputs') labels = tf.placeholder(tf.float32, [None] + [NUM_CLASSES], 'labels') prediction_logits = [] if MODEL == 'alexnet': prediction_logits = model.alexnet(x, len(CLASSNAMES), dropout_rate=0.55) if MODEL == 'resnet': prediction_logits = model.resnet(x, len(CLASSNAMES)) if MODEL == 'resnet_m': prediction_logits = model.resnet_m(x, len(CLASSNAMES)) if MODEL == 'inception_resnet': prediction_logits, auxiliary = model.inception_resnet( x, len(CLASSNAMES)) # apply loss # TODO : Implement suitable loss function for multi-label problem loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=prediction_logits)) if MODEL == 'inception_resnet':
random.seed(manualSeed) torch.manual_seed(manualSeed) cudnn.benchmark = True # setup gpu driver device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Load datasets train_dataloader, test_dataloader = load_datasets(opt.datasets, opt.dataroot, opt.batch_size) # Load model if opt.datasets == "cifar100": if torch.cuda.device_count() > 1: model = torch.nn.DataParallel(alexnet()) else: model = alexnet() else: model = "" print(opt) model.to(device) print(model) # Loss function criterion = torch.nn.CrossEntropyLoss() # Optimizer optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr)
ds_train, ds_validation = ds.split_train_validation(TRAIN_PART) print( 'Dataset split into {:d} training and {:d} validation samples'.format( len(ds_train), len(ds_validation))) with tf.variable_scope('model', reuse=tf.AUTO_REUSE): # Note: We need to use placeholders for inputs and outputs. Otherwise, # the batch size would be fixed and we could not use the trained model with # a different batch size. In addition, the names of these tensors must be "inputs" # and "labels" such that we can find them on the evaluation server. DO NOT CHANGE THIS! x = tf.placeholder(tf.float32, [None] + [224, 224] + [1], 'inputs') labels = tf.placeholder(tf.float32, [None] + [NUM_CLASSES], 'labels') if MODEL == 'alexnet': prediction_logits = model.alexnet(x, len(CLASSNAMES)) if MODEL == 'resnet': prediction_logits = model.resnet(x, len(CLASSNAMES)) # apply loss # TODO : Implement suitable loss function for multi-label problem loss = tf.losses.sigmoid_cross_entropy(multi_class_labels=labels, logits=prediction_logits) mse_loss = tf.losses.mean_squared_error(labels=labels, predictions=prediction_logits) # convert into binary (boolean) predictions prediction_sigmoid = tf.nn.sigmoid(prediction_logits) prediction_int = tf.round(prediction_sigmoid) prediction_bin = tf.cast(prediction_int, tf.bool)
import tensorflow as tf from model import alexnet from data import read_dataset, image_to_array, path_to_4dtensor """ To.do - Apply tensorboard callback """ # Dataset Path train_path = 'resources/traing_dataset.csv' valid_path = 'resources/valid_dataset.csv' test_path = 'resources/test_dataset.csv' X, Y, is_training, cost, optimizer, accuracy, merged, model = alexnet( dropout=1) # Read dataset. classes = ['daisy', 'dandelion', 'rose', 'sunflower', 'tulip'] train = read_dataset(train_path) valid = read_dataset(valid_path) tran_labels = train[classes] valid_labels = valid[classes] # Setting for learning batch_size = 100 iteration = int(len(train) / 100) epochs = 100 valid_size = 10 print("batch_size: {}, iteration: {}, valid_size: {}".format(
def main(_): tf.logging.set_verbosity(tf.logging.INFO) tf.logging.info("**********") print("===>>>dataset:{}".format(FLAGS.dataset)) print("===>>>result:{}".format(FLAGS.result)) print("===>>>train_step:{}".format(FLAGS.train_step)) ## print all parameters for attr, flag_obj in sorted(FLAGS.__flags.items()): print("{} = {}".format(attr.lower(), flag_obj.value)) ##============Obtain Data================ data = Data(batch_size=FLAGS.batch_size, num_classes=FLAGS.num_classes, data_path=os.path.join(FLAGS.dataset, "train"), val_data=os.path.join(FLAGS.dataset, "val")) tf.logging.info("Label dict = %s", data.labels_dict) x = tf.placeholder(dtype=tf.float32, shape=[None, 224, 224, 3]) y = tf.placeholder(dtype=tf.int32, shape=[None, FLAGS.num_classes]) # construction model pred = alexnet(x, FLAGS.chip.lower(), class_num=FLAGS.num_classes) # define loss function and optimizer cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y)) global_step = tf.Variable(0, trainable=False) learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, FLAGS.decay_step, FLAGS.decay_rate, staircase=True) optimizer = tf.train.AdamOptimizer(learning_rate).minimize( cost, global_step=global_step) # definition accuracy prediction_correction = tf.equal(tf.cast(tf.argmax(pred, 1), dtype=tf.int32), tf.cast(tf.argmax(y, 1), dtype=tf.int32), name='prediction') accuracy = tf.reduce_mean(tf.cast(prediction_correction, dtype=tf.float32), name='accuracy') tf.summary.scalar('loss', cost) tf.summary.scalar('accuracy', accuracy) summary_op = tf.summary.merge_all() ## gpu profiling configuration if FLAGS.chip.lower() == 'gpu' and FLAGS.profiling: options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() else: options = None run_metadata = None config = make_config(FLAGS) # start training with tf.Session(config=config) as sess: init_op = tf.group(tf.local_variables_initializer(), tf.global_variables_initializer()) sess.run(init_op) train_writer = tf.summary.FileWriter(logdir=os.path.join( FLAGS.result, "train"), graph=sess.graph) test_writer = tf.summary.FileWriter(logdir=os.path.join( FLAGS.result, "test"), graph=sess.graph) # saver is used to save the model saver = tf.train.Saver(tf.global_variables(), max_to_keep=5) if FLAGS.resume_path is not None: tf.logging.info('Loading checkpoint from {}...'.format( FLAGS.resume_path)) model_file = tf.train.latest_checkpoint(FLAGS.resume_path) saver.restore(sess, model_file) max_acc = 0 for step in range(FLAGS.train_step): batch_num = data.get_batch_num() tf.logging.info(" step = %d, batch_num=%d", step, batch_num) train_accuracy_list = [] train_loss_list = [] for batch_count in range(batch_num): start = datetime.datetime.now() train_images, train_labels = data.get_batch(batch_count) end = datetime.datetime.now() data_deltatime = (end - start).total_seconds() * 1000 start = datetime.datetime.now() feed_dict = {x: train_images, y: train_labels} _, train_loss, train_acc, summary = sess.run( [optimizer, cost, accuracy, summary_op], feed_dict=feed_dict, options=options, run_metadata=run_metadata) end = datetime.datetime.now() train_deltatime = (end - start).total_seconds() * 1000 if batch_count % 30 == 0: tf.logging.info( "Time Used===>>>[FP+BP]:{:.3f} (ms), [Get Data]:{:.3f} (ms)\n" .format(train_deltatime, data_deltatime)) train_loss_list.append(train_loss) train_accuracy_list.append(train_acc) train_writer.add_summary(summary, step) tf.logging.info("train_acc = %s", np.mean(train_accuracy_list)) tf.logging.info("train_loss = %s", np.mean(train_loss_list)) if (step + 1) % FLAGS.save_step == 0: test_accuracy_list = [] test_loss_list = [] val_images, val_labels = data.get_val_data() val_feed = {x: val_images, y: val_labels} prediction_correction = tf.equal(tf.cast(tf.argmax(pred, 1), dtype=tf.int32), tf.cast(tf.argmax(y, 1), dtype=tf.int32), name='prediction') accuracy = tf.reduce_mean(tf.cast(prediction_correction, dtype=tf.float32), name='accuracy') test_loss, test_acc, summary = sess.run( [cost, accuracy, summary_op], feed_dict=val_feed) test_accuracy_list.append(test_acc) test_loss_list.append(test_loss) test_writer.add_summary(summary, step) tf.logging.info("test_acc = %s", test_accuracy_list) tf.logging.info("test_loss = %s", test_loss_list) # save model if test_acc > max_acc: saver.save(sess=sess, save_path=os.path.join(FLAGS.result, "model")) test_writer.add_summary(summary=summary, global_step=step) max_acc = test_acc train_writer.close() test_writer.close() if FLAGS.chip.lower() == 'gpu' and FLAGS.profiling: work_dir = os.getcwd() timeline_path = os.path.join(work_dir, 'timeline.ctf.json') with open(timeline_path, 'w') as trace_file: trace = timeline.Timeline(step_stats=run_metadata.step_stats) trace_file.write(trace.generate_chrome_trace_format()) if FLAGS.platform.lower() == 'modelarts': from help_modelarts import modelarts_result2obs modelarts_result2obs(FLAGS)
def train(FLAGS): """Training model """ valid_steps = FLAGS.valid_steps max_steps = FLAGS.max_steps batch_size = FLAGS.batch_size base_learning_rate = FLAGS.base_learning_rate input_shape = FLAGS.input_shape # image shape = 28 * 28 num_classes = FLAGS.num_classes keep_prob = FLAGS.keep_prob save_dir = FLAGS.save_dir tb_path = FLAGS.tb_path train_loss, train_acc = [], [] valid_loss, valid_acc = [], [] tf.reset_default_graph() # define default tensor graphe with tf.Graph().as_default(): images_pl = tf.placeholder(tf.float32, shape=[None, input_shape]) labels_pl = tf.placeholder(tf.float32, shape=[None, num_classes]) # define a variable global_steps global_steps = tf.Variable(0, trainable=False) # build a graph that calculate the logits prediction from model logits = alexnet(images_pl, num_classes, keep_prob) loss, acc, _ = calc_loss_acc(labels_pl, logits) # build a graph that trains the model with one batch of example and updates the model params training_op = train_op(loss, global_steps, base_learning_rate) validing_op = train_op(loss, global_steps, base_learning_rate) # define the model saver saver = tf.train.Saver(tf.global_variables()) # define a summary operation summary_op = tf.summary.merge_all() summ_op = tf.summary.merge_all() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # print(sess.run(tf.trainable_variables())) # start queue runners coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) train_writter = tf.summary.FileWriter(tb_path, sess.graph) train_writter2 = tf.summary.FileWriter('./tb_logs/Second/', sess.graph) #train_writter = tf.summary.create_file_writer(tb_path) # start training for step in range(max_steps): train_image_batch, train_label_batch = mnist.train.next_batch( batch_size) train_feed_dict = { images_pl: train_image_batch, labels_pl: train_label_batch } _, _loss, _acc, _summary_op = sess.run( [training_op, loss, acc, summary_op], feed_dict=train_feed_dict) # store loss and accuracy value train_loss.append(_loss) train_acc.append(_acc) print("Iteration " + str(step) + ", Mini-batch Loss= " + "{:.6f}".format(_loss) + ", Training Accuracy= " + "{:.5f}".format(_acc)) train_writter.add_summary(_summary_op, global_step=step) print("brrr", step) if step % 100 == 0: _valid_loss, _valid_acc = [], [] print('Start validation process') for itr in range(valid_steps): valid_image_batch, valid_label_batch = mnist.test.next_batch( batch_size) valid_feed_dict = { images_pl: valid_image_batch, labels_pl: valid_label_batch } _, _loss, _acc, _validing_op = sess.run( [validing_op, loss, acc, summ_op], feed_dict=valid_feed_dict) train_writter2.add_summary(_validing_op, global_step=itr) _valid_loss.append(_loss) _valid_acc.append(_acc) #train_writter.add_summary(_summary_op, global_step= step) valid_loss.append(np.mean(_valid_loss)) valid_acc.append(np.mean(_valid_acc)) #print("Iteration {}: Train Loss {:6.3f}, Train Acc {:6.3f}, Val Loss {:6.3f}, Val Acc {:6.3f}".format(itr, train_loss[-1], train_acc[-1], valid_loss[-1], valid_acc[-1])) print( "Iteration {}: Train Loss {}, Train Acc {}, Val Loss {}, Val Acc {}", itr, train_loss[-1], train_acc[-1], valid_loss[-1], valid_acc[-1]) #train_writter.add_summary(_summary_op, global_step= step) #print("brrr",step) #train_writter. #with train_writter.as_default(): #tf.summary.scalar('loss', _valid_loss) #tf.summary.scalar('accuracy', _valid_acc) np.save(os.path.join(save_dir, 'accuracy_loss', 'train_loss'), train_loss) np.save(os.path.join(save_dir, 'accuracy_loss', 'train_acc'), train_acc) np.save(os.path.join(save_dir, 'accuracy_loss', 'valid_loss'), valid_loss) np.save(os.path.join(save_dir, 'accuracy_loss', 'valid_acc'), valid_acc) checkpoint_path = os.path.join(save_dir, 'model', 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) coord.request_stop() coord.join(threads) sess.close()
- Apply tensorboard callback """ # Dataset Path train_path = 'resources/traing_dataset.csv' valid_path = 'resources/valid_dataset.csv' test_path = 'resources/test_dataset.csv' # Setting for learning batch_size = 100 iteration = 10 epochs = 10 valid_size = 5 X, Y, is_training, cost, optimizer, accuracy, merged = alexnet() # Read dataset. classes = ['daisy', 'dandelion', 'rose', 'sunflower', 'tulip'] train = read_dataset(train_path) valid = read_dataset(valid_path) tran_labels = train[classes] valid_labels = valid[classes] sess = tf.Session() # For tensorboard logdir='log/' train_writer = tf.summary.FileWriter(logdir + '/train', sess.graph) valid_writer = tf.summary.FileWriter(logdir + '/valid')
def train(FLAGS): """Training model """ valid_steps = FLAGS.valid_steps max_steps = FLAGS.max_steps batch_size = FLAGS.batch_size base_learning_rate = FLAGS.base_learning_rate input_shape = FLAGS.input_shape # image shape = 28 * 28 num_classes = FLAGS.num_classes keep_prob = FLAGS.keep_prob save_dir = FLAGS.save_dir tb_path = FLAGS.tb_path train_loss, train_acc = [], [] valid_loss, valid_acc = [], [] tf.reset_default_graph() # define default tensor graphe with tf.Graph().as_default(): images_pl = tf.placeholder(tf.float32, shape=[None, input_shape]) labels_pl = tf.placeholder(tf.float32, shape=[None, num_classes]) # define a variable global_steps global_steps = tf.Variable(0, trainable=False) # build a graph that calculate the logits prediction from model logits = alexnet(images_pl, num_classes, keep_prob) loss, acc, _ = calc_loss_acc(labels_pl, logits) # build a graph that trains the model with one batch of example and updates the model params training_op = train_op(loss, global_steps, base_learning_rate) validing_op = train_op(loss, global_steps, base_learning_rate) # define the model saver saver = tf.train.Saver(tf.global_variables()) # define a summary operation summary_op = tf.summary.merge_all() summ_op = tf.summary.merge_all() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # print(sess.run(tf.trainable_variables())) # start queue runners coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) train_writter = tf.summary.FileWriter(tb_path, sess.graph) train_writter2 = tf.summary.FileWriter('./tb_logs/Second/', sess.graph) #train_writter = tf.summary.create_file_writer(tb_path) # start training for step in range(max_steps): # get train image / label batch #training_data = np.array([image.reshape(28, 28, 1) for image in mnist.train.images]) #training_label = mnist.train.labels #k=np.array([image.reshape(28, 28, 1) for image in mnist.train.next_batch(batch_size)]) #mnist=k #train = {'X': resize_images(mnist.train.images.reshape(-1, 28, 28)), #'y': mnist.train.labels} #scale_percent = 60 # percent of original size #width = int(img.shape[1] * scale_percent / 100) #height = int(img.shape[0] * scale_percent / 100) #dim = (width, height) # resize image #resized = cv2.resize(img, dim, interpolation = cv2.INTER_AREA) """ dim=(batch_size,1600) #train_image_batch=np.resize(mnist.train.images,dim) for image in mnist.train.images: train_image_batch=np.resize(image,dim) train_data=[] for image in mnist.train.images: resized_img=np.resize(image,dim) train_data.append(resized_img) """ ##train_dataa=train_data.next_batch(batch_size) #train_image_batch=misc.imresize(mnist.train.images,dim) #train_label_batch=mnist.train.labels #train_image_batch, train_label_batch = imf.next_batch(batch_size) training_data = imf[1:128, :] training_label = mnist.train.labels[1:128] #dim=(40,40) """ train_data=[] for image in mnist.train.images: resized_img=np.resize(image,dim) train_data.append(resized_img) """ #tf.image.resize([train_image_batch,28,28,128], dim,name=None) train_feed_dict = { images_pl: training_data, labels_pl: training_label } _, _loss, _acc, _summary_op = sess.run( [training_op, loss, acc, summary_op], feed_dict=train_feed_dict) # store loss and accuracy value train_loss.append(_loss) train_acc.append(_acc) print("Iteration " + str(step) + ", Mini-batch Loss= " + "{:.6f}".format(_loss) + ", Training Accuracy= " + "{:.5f}".format(_acc)) train_writter.add_summary(_summary_op, global_step=step) print("brrr", step) if step % 100 == 0: _valid_loss, _valid_acc = [], [] print('Start validation process') for itr in range(valid_steps): valid_image_batch = imf[1:128, :] valid_label_batch = mnist.test.labels[1:128] valid_feed_dict = { images_pl: valid_image_batch, labels_pl: valid_label_batch } _, _loss, _acc, _validing_op = sess.run( [validing_op, loss, acc, summ_op], feed_dict=valid_feed_dict) train_writter2.add_summary(_validing_op, global_step=itr) _valid_loss.append(_loss) _valid_acc.append(_acc) #train_writter.add_summary(_summary_op, global_step= step) valid_loss.append(np.mean(_valid_loss)) valid_acc.append(np.mean(_valid_acc)) #print("Iteration {}: Train Loss {:6.3f}, Train Acc {:6.3f}, Val Loss {:6.3f}, Val Acc {:6.3f}".format(itr, train_loss[-1], train_acc[-1], valid_loss[-1], valid_acc[-1])) print( "Iteration {}: Train Loss {}, Train Acc {}, Val Loss {}, Val Acc {}", itr, train_loss[-1], train_acc[-1], valid_loss[-1], valid_acc[-1]) #train_writter.add_summary(_summary_op, global_step= step) #print("brrr",step) #train_writter. #with train_writter.as_default(): #tf.summary.scalar('loss', _valid_loss) #tf.summary.scalar('accuracy', _valid_acc) np.save(os.path.join(save_dir, 'accuracy_loss', 'train_loss'), train_loss) np.save(os.path.join(save_dir, 'accuracy_loss', 'train_acc'), train_acc) np.save(os.path.join(save_dir, 'accuracy_loss', 'valid_loss'), valid_loss) np.save(os.path.join(save_dir, 'accuracy_loss', 'valid_acc'), valid_acc) checkpoint_path = os.path.join(save_dir, 'model', 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) coord.request_stop() coord.join(threads) sess.close()
def main(): """ Call model construction function and run model multiple times. """ model = alexnet() test_x = np.random.rand(224, 224, 3) for _ in range(50): model.predict(np.array([test_x]))
def main(): # Training settings parser = argparse.ArgumentParser(description='PyTorch MNIST Example') # model cfg # parser.add_argument('--pretrained', action='store_true', default=False, # help='load pretrained model') parser.add_argument('--model-type', type=str, default="", help="type of the model.") parser.add_argument('--model-structure', type=int, default=0, metavar='N', help='model structure to be trained (default: 0)') parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint, (default: None)') parser.add_argument('--e', '--evaluate', dest='evaluate', action='store_true', help='evaluate model on validation set') # dataset parser.add_argument('--dataset-root', type=str, default="../datasets", help="load dataset path.") parser.add_argument('--workers', default=0, type=int, metavar='N', help='number of data loading workers (default: 0)') parser.add_argument('--train-batch-size', type=int, default=128, metavar='N', help='input batch size for training (default: 128)') parser.add_argument('--test-batch-size', type=int, default=128, metavar='N', help='input batch size for testing (default: 128)') # train cfg parser.add_argument('--epochs', type=int, default=80, metavar='N', help='number of epochs to train (default: 10)') parser.add_argument('--start-epoch', default=0, type=int, metavar='N', help='manual epoch number (useful to restarts)') parser.add_argument('--lr', type=float, default=0.001, metavar='LR', help='learning rate (default: 0.001)') # optimizer parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='SGD momentum (default: 0.9)') parser.add_argument('--wd', default=5e-4, type=float, metavar='W', help='weight decay (default: 5e-4)') # scheduler parser.add_argument('--schedule', type=int, nargs='+', default=[150, 225], help='Decrease learning rate at these epochs.') parser.add_argument('--gamma', type=float, default=0.1, help='LR is multiplied by gamma on schedule.') parser.add_argument('--decreasing-lr', default='16,30,54', help='decreasing strategy') # device init cfg parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') # result output cfg parser.add_argument('--detail', action='store_true', default=False, help='show log in detial') parser.add_argument( '--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--save-model', action='store_true', default=True, help='For Saving the current Model') parser.add_argument('--checkpoint-path', type=str, default="", help="save model path.") parser.add_argument('--crxb-size', type=int, default=64, help='corssbar size') parser.add_argument('--vdd', type=float, default=3.3, help='supply voltage') parser.add_argument('--gwire', type=float, default=0.0357, help='wire conductacne') parser.add_argument('--gload', type=float, default=0.25, help='load conductance') parser.add_argument('--gmax', type=float, default=0.000333, help='maximum cell conductance') parser.add_argument('--gmin', type=float, default=0.000000333, help='minimum cell conductance') parser.add_argument('--ir-drop', action='store_true', default=False, help='switch to turn on ir drop analysis') parser.add_argument('--scaler-dw', type=float, default=1, help='scaler to compress the conductance') parser.add_argument('--test', action='store_true', default=False, help='switch to turn inference mode') parser.add_argument('--enable_noise', action='store_true', default=False, help='switch to turn on noise analysis') parser.add_argument('--enable_SAF', action='store_true', default=False, help='switch to turn on SAF analysis') parser.add_argument('--enable_ec-SAF', action='store_true', default=False, help='switch to turn on SAF error correction') parser.add_argument('--freq', type=float, default=10e6, help='scaler to compress the conductance') parser.add_argument('--temp', type=float, default=300, help='scaler to compress the conductance') args = parser.parse_args() print("+++", args) # Train the network on the training data # Test the network on the test data use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") print(device) # net = alexnet(args.pretrained, args.resume, num_classes=10, structure=args.model_structure) # create model crxb_cfg = { 'crxb_size': args.crxb_size, 'gmax': args.gmax, 'gmin': args.gmin, 'gwire': args.gwire, 'gload': args.gload, 'vdd': args.vdd, 'ir_drop': args.ir_drop, 'device': device, 'freq': args.freq, 'temp': args.temp, 'enable_SAF': args.enable_SAF, 'enable_noise': args.enable_noise, 'enable_ec_SAF': args.enable_ec_SAF, 'quantize': 64 } if args.model_type == 'VGG16': net = model.VGG16() elif args.model_type == 'cifar10' or args.model_type == 'VGG8': net = model.cifar10(n_channel=128, physical=0, **crxb_cfg) elif args.model_type == 'alexnet': net = model.alexnet(num_classes=10, structure=args.model_structure) elif args.model_type == 'resnet18': net = model.resnet18() else: net = model.cifar10(n_channel=128, physical=True, **crxb_cfg) if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") net = nn.DataParallel(net) net.to(device) # for param in net.parameters(): # param = nn.init.normal_(param) # config milestones = list(map(int, args.decreasing_lr.split(','))) print(milestones) # optimizer = optim.SGD(net.parameters(), lr=lr, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY) # not good enough 68% optimizer = optim.Adam(net.parameters(), lr=args.lr, weight_decay=args.wd) scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=args.gamma) # optionlly resume from a checkpoint if args.resume: print("=> using pre-trained model '{}'".format(args.model_type)) else: print("=> creating model '{}'".format(args.model_type)) global best_prec1 # if args.resume: # if os.path.isfile(args.resume): # print("=> loading checkpoint '{}'".format(args.resume)) # checkpoint = torch.load(args.resume) # args.start_epoch = checkpoint['epoch'] # best_prec1 = checkpoint['best_prec1'] # net.load_state_dict(checkpoint['state_dict']) # optimizer.load_state_dict(checkpoint['optimizer']) # print("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch'])) # else: # print("=> no checkpoint found at '{}'".format(args.resume)) # Data loading kwargs = { 'num_workers': args.workers, 'pin_memory': True } if use_cuda else {} trainloader, valloader, testloader = getcifar(args, 'pad', 4, True, 32, True, **kwargs) print(len(trainloader), len(valloader), len(testloader)) print('\r\n1!!!model_dict') model_dict = net.state_dict() print(model_dict.keys(), "\r\n2!!!model parameters") parm = {} for name, parameters in net.named_parameters(): print(name) print('\r\n3!!!pretrained_dict') checkpoint = torch.load(args.resume) # print(type(checkpoint),'\r\n!!!') # print(checkpoint.keys(),'\r\n!!!') pretrained_dict = checkpoint['state_dict'] print(pretrained_dict.keys(), '\r\n4!!!new_dict') import re new_dict = {} for k, v in pretrained_dict.items(): if k not in model_dict: bn_detect = re.match( r'module\.features\.(1|4|8|11|15|18|22)\.(running_mean|num_batches_tracked|running_var)', k) if bn_detect: k = 'module.features.{}.bn.{}'.format(bn_detect.group(1), bn_detect.group(2)) print(k) new_dict[k] = v else: pass else: new_dict[k] = v print(new_dict.keys(), '\r\n5!!!') print([k for k, v in new_dict.items() if k not in model_dict], '\r\n') print([k for k, v in model_dict.items() if k not in new_dict]) # print('net buffers') # print([n for n,v in net.named_buffers()], '\r\n !!!ideal_buffer') ideal_buffer = torch.load("../models/cifar10_crxb_ideal_VGG8_0_final.pth") # buffer_list = [k for k, v in ideal_buffer['state_dict'].items() if k not in new_dict] for k, v in ideal_buffer['state_dict'].items(): if k not in new_dict: new_dict[k] = v print("\r\ncheck:", new_dict.keys() == model_dict.keys()) model_dict.update(new_dict) # model_dict.update(ideal_buffer['state_dict']) net.load_state_dict(model_dict) # print('vvv') # print([k for k,v in ideal_buffer['state_dict'].items() if k not in model_dict]) # net.load_state_dict(ideal_buffer['state_dict']) test(args, net, device, testloader)
def loadweight(self, weight_name): return LoadWeight(net=alexnet(), weight_name=weight_name)