def run(): image_size = 299 num_classes = 5 logdir = './log' checkpoint_file = tf.train.latest_checkpoint(logdir) with tf.Graph().as_default() as graph: images = tf.placeholder(shape=[None, image_size, image_size, 3], dtype=tf.float32, name='Placeholder_only') with slim.arg_scope(xception_arg_scope()): logits, end_points = xception(images, num_classes=num_classes, is_training=False) variables_to_restore = slim.get_variables_to_restore() saver = tf.train.Saver(variables_to_restore) # Setup graph def input_graph_def = graph.as_graph_def() output_node_names = "Xception/Predictions/Softmax" output_graph_name = "./frozen_model_xception.pb" with tf.Session() as sess: saver.restore(sess, checkpoint_file) # Exporting the graph print("Exporting graph...") output_graph_def = graph_util.convert_variables_to_constants( sess, input_graph_def, output_node_names.split(",")) with tf.gfile.GFile(output_graph_name, "wb") as f: f.write(output_graph_def.SerializeToString())
def load_model(sess): """ Load TensorFlow model Args: sess: TensorFlow session """ print("Loading model...") placeholder = tf.placeholder(shape=[None, image_size, image_size, 3], dtype=tf.float32, name='Placeholder_only') #Now create the inference model but set is_training=False with slim.arg_scope(xception_arg_scope()): logits, end_points = xception(placeholder, num_classes=NUM_CLASSES, is_training=False) # #get all the variables to restore from the checkpoint file and create the saver function to restore variables_to_restore = slim.get_variables_to_restore() #Just define the metrics to track without the loss or whatsoever probabilities = end_points['Predictions'] predictions = tf.argmax(probabilities, 1) saver = tf.train.Saver() saver.restore(sess, '/model.ckpt') # specify here which model to restore return predictions
def testBuild(self): batch_size = 5 height, width = 299, 299 num_classes = 1001 with self.test_session(): inputs = tf.random_uniform((batch_size, height, width, 3)) logits, end_points = xception.xception(inputs, num_classes) #Entry Flow self.assertEquals(end_points['Xception/block1_res_conv'].get_shape().as_list(), [5, 74, 74 ,128]) self.assertEquals(end_points['Xception/block2_res_conv'].get_shape().as_list(), [5, 37, 37, 256]) self.assertEquals(end_points['Xception/block3_res_conv'].get_shape().as_list(), [5, 19, 19, 728]) #Mid Flow self.assertEquals(end_points['Xception/block5_dws_conv3'].get_shape().as_list(), [5, 19, 19, 728]) self.assertEquals(end_points['Xception/block6_dws_conv3'].get_shape().as_list(), [5, 19, 19, 728]) self.assertEquals(end_points['Xception/block7_dws_conv3'].get_shape().as_list(), [5, 19, 19, 728]) self.assertEquals(end_points['Xception/block8_dws_conv3'].get_shape().as_list(), [5, 19, 19, 728]) self.assertEquals(end_points['Xception/block9_dws_conv3'].get_shape().as_list(), [5, 19, 19, 728]) self.assertEquals(end_points['Xception/block10_dws_conv3'].get_shape().as_list(), [5, 19, 19, 728]) self.assertEquals(end_points['Xception/block11_dws_conv3'].get_shape().as_list(), [5, 19, 19, 728]) self.assertEquals(end_points['Xception/block12_dws_conv3'].get_shape().as_list(), [5, 19, 19, 728]) self.assertEquals(end_points['Xception/block12_res_conv'].get_shape().as_list(), [5, 10, 10, 1024]) #Exit Flow self.assertEquals(end_points['Xception/block14_dws_conv1'].get_shape().as_list(), [5, 10, 10, 1536]) self.assertEquals(end_points['Xception/block14_dws_conv2'].get_shape().as_list(), [5, 10, 10, 2048]) self.assertEquals(end_points['Xception/block15_avg_pool'].get_shape().as_list(), [5, 1, 1, 2048]) self.assertEquals(end_points['Xception/block15_conv1'].get_shape().as_list(), [5, 1, 1, 2048]) self.assertEquals(end_points['Xception/block15_conv2'].get_shape().as_list(), [5, 1, 1, 1001]) #Check outputs self.assertListEqual(logits.get_shape().as_list(), [batch_size, num_classes]) self.assertListEqual(end_points['Predictions'].get_shape().as_list(), [batch_size, num_classes])
def main(_): if settings.FLAGS.job_name == "worker" and settings.FLAGS.task_index == 0: model_inputs.maybe_download_and_extract() ps_hosts = settings.FLAGS.ps_hosts.split(",") worker_hosts = settings.FLAGS.worker_hosts.split(",") # Create a cluster from the parameter server and worker hosts. cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts}) # Create and start a server for the local task. server = tf.train.Server(cluster, job_name=settings.FLAGS.job_name, task_index=settings.FLAGS.task_index) if settings.FLAGS.job_name == "ps": server.join() elif settings.FLAGS.job_name == "worker": # Assigns ops to the local worker by default. with tf.device(tf.train.replica_device_setter( worker_device="/job:worker/task:%d" % settings.FLAGS.task_index, cluster=cluster)): isXception = trainer_functions.query_yes_no("Would you like to use the Xception model \n(if no, the model will default to that of the TensorFlow turorial)?") # Build model if isXception: images, labels = trainer_functions.distorted_inputs(isXception) with slim.arg_scope(xception.xception_arg_scope()): logits, end_points = xception.xception(images, num_classes = 10, is_training = True) else: images, labels = trainer_functions.distorted_inputs(isXception) logits = trainer_functions.tutorial_model(images) # Calculate loss. loss = trainer_functions.loss(logits, labels) global_step = tf.contrib.framework.get_or_create_global_step() train_op = tf.train.AdagradOptimizer(0.01).minimize( loss, global_step=global_step) # The StopAtStepHook handles stopping after running given steps. hooks=[tf.train.StopAtStepHook(last_step=settings.FLAGS.max_steps)] # The MonitoredTrainingSession takes care of session initialization, # restoring from a checkpoint, saving to a checkpoint, and closing when done # or an error occurs. with tf.train.MonitoredTrainingSession(master=server.target, is_chief=(settings.FLAGS.task_index == 0), checkpoint_dir="./train_logs", hooks=hooks) as mon_sess: prev_time = time.time() while not mon_sess.should_stop(): # Run a training step asynchronously. # See `tf.train.SyncReplicasOptimizer` for additional details on how to # perform *synchronous* training. # mon_sess.run handles AbortedError in case of preempted PS. mon_sess.run(train_op) if mon_sess.run(global_step)%20 == 0: duration = time.time() - prev_time prev_time = time.time() examples_per_sec = settings.FLAGS.log_frequency * settings.FLAGS.batch_size / duration print ("examples/sec: %d" % examples_per_sec + ", loss: %f" % mon_sess.run(loss))
def __init__(self, num_labels): super(Xception, self).__init__() self.xception = xception.xception( pretrained='imagenet') # input size >= 299x299 num_features = self.xception.num_classes # 1000 self.classifier = nn.Linear(num_features, num_labels * 3) self.num_labels = num_labels self.num_classes = 3 # [p0, p1, p2] for each label
def testForward(self): batch_size = 1 height, width = 299, 299 with self.test_session() as sess: inputs = tf.random_uniform((batch_size, height, width, 3)) logits, _ = xception.xception(inputs) sess.run(tf.global_variables_initializer()) output = sess.run(logits) self.assertTrue(output.any())
def main(): # parse the argument parser = argparse.ArgumentParser() parser.add_argument( 'data_list', help='The path of data list file, which consists of one image path per line' ) parser.add_argument( 'model', help='The model for image classification', choices=[ 'alexnet', 'vgg13', 'vgg16', 'vgg19', 'resnet', 'googlenet', 'inception-resnet-v2', 'inception_v4', 'xception' ]) parser.add_argument( 'params_path', help='The file which stores the parameters') args = parser.parse_args() # PaddlePaddle init paddle.init(use_gpu=True, trainer_count=1) image = paddle.layer.data( name="image", type=paddle.data_type.dense_vector(DATA_DIM)) if args.model == 'alexnet': out = alexnet.alexnet(image, class_dim=CLASS_DIM) elif args.model == 'vgg13': out = vgg.vgg13(image, class_dim=CLASS_DIM) elif args.model == 'vgg16': out = vgg.vgg16(image, class_dim=CLASS_DIM) elif args.model == 'vgg19': out = vgg.vgg19(image, class_dim=CLASS_DIM) elif args.model == 'resnet': out = resnet.resnet_imagenet(image, class_dim=CLASS_DIM) elif args.model == 'googlenet': out, _, _ = googlenet.googlenet(image, class_dim=CLASS_DIM) elif args.model == 'inception-resnet-v2': assert DATA_DIM == 3 * 331 * 331 or DATA_DIM == 3 * 299 * 299 out = inception_resnet_v2.inception_resnet_v2( image, class_dim=CLASS_DIM, dropout_rate=0.5, data_dim=DATA_DIM) elif args.model == 'inception_v4': out = inception_v4.inception_v4(image, class_dim=CLASS_DIM) elif args.model == 'xception': out = xception.xception(image, class_dim=CLASS_DIM) # load parameters with gzip.open(args.params_path, 'r') as f: parameters = paddle.parameters.Parameters.from_tar(f) file_list = [line.strip() for line in open(args.data_list)] test_data = [(paddle.image.load_and_transform(image_file, 256, 224, False) .flatten().astype('float32'), ) for image_file in file_list] probs = paddle.infer( output_layer=out, parameters=parameters, input=test_data) lab = np.argsort(-probs) for file_name, result in zip(file_list, lab): print "Label of %s is: %d" % (file_name, result[0])
def testEvaluation(self): batch_size = 1 height, width = 299, 299 num_classes = 1001 with self.test_session(): eval_inputs = tf.random_uniform((batch_size, height, width, 3)) logits, _ = xception.xception(eval_inputs, is_training=False) self.assertListEqual(logits.get_shape().as_list(), [batch_size, num_classes]) predictions = tf.argmax(logits, 1) self.assertListEqual(predictions.get_shape().as_list(), [batch_size])
def return_pytorch04_xception_ft(pretrained=True, pretrain3epoch=True): # Raises warning "src not broadcastable to dst" but thats fine if pretrained: # Load model in torch 0.4+ if pretrain3epoch: model = xception(pretrained=False) model.fc = model.last_linear del model.last_linear state_dict = torch.load( '/home/jc/Faceforensics_onServer/Model/xception-b5690688.pth') for name, weights in state_dict.items(): if 'pointwise' in name: state_dict[name] = weights.unsqueeze(-1).unsqueeze(-1) model.load_state_dict(state_dict) model.last_linear = model.fc del model.fc else: model = xception(num_classes=2, pretrained=False) state_dict = torch.load( '/home/jc/Faceforensics_onServer/Model/xception-b5690688-after3epochs-noNT-Big.pth' ) model.load_state_dict(state_dict) return model
def return_pytorch04_xception(init_checkpoint=None): # Raises warning "src not broadcastable to dst" but thats fine model = xception(pretrained=False) if init_checkpoint is not None: # Load model in torch 0.4+ model.fc = model.last_linear del model.last_linear state_dict = torch.load(init_checkpoint) for name, weights in state_dict.items(): if 'pointwise' in name: state_dict[name] = weights.unsqueeze(-1).unsqueeze(-1) model.load_state_dict(state_dict) model.last_linear = model.fc del model.fc return model
def __init__(self): self.num_class = 2 self.net = xception(num_classes=self.num_class) checkpoint = torch.load('./ckpt_iter.pth.tar') pretrain(self.net, checkpoint['state_dict']) self.new_width = self.new_height = 299 self.transform = torchvision.transforms.Compose([ torchvision.transforms.Resize((self.new_width, self.new_height)), torchvision.transforms.ToTensor(), ]) self.net.cuda() self.net.eval()
def return_pytorch04_xception(pretrained=True): # Raises warning "src not broadcastable to dst" but thats fine model = xception(pretrained=False) if pretrained: # Load model in torch 0.4+ model.fc = model.last_linear del model.last_linear # state_dict = torch.load('/home/ondyari/.torch/models/xception-b5690688.pth') state_dict = torch.load(XCEPTION_MODEL) for name, weights in state_dict.items(): if 'pointwise' in name: state_dict[name] = weights.unsqueeze(-1).unsqueeze(-1) model.load_state_dict(state_dict) model.last_linear = model.fc del model.fc return model
def build_backbone(backbone_name, pretrained=False, os=16): if backbone_name == 'res50_atrous': net = atrousnet.resnet50_atrous(pretrained=pretrained, os=os) return net elif backbone_name == 'res101_atrous': net = atrousnet.resnet101_atrous(pretrained=pretrained, os=os) return net elif backbone_name == 'res152_atrous': net = atrousnet.resnet152_atrous(pretrained=pretrained, os=os) return net elif backbone_name == 'xception' or backbone_name == 'Xception': net = xception.xception(pretrained=pretrained, os=os) return net else: raise ValueError( 'backbone.py: The backbone named %s is not supported yet.' % backbone_name)
# from tensorboardX import SummaryWriter from torch.utils.tensorboard import SummaryWriter from xception import xception import torch #from model_ref import AlignedXception input = torch.zeros(32, 3, 299, 299) #model_ref = AlignedXception(8) model = xception() writer = SummaryWriter('./graph_torch') writer.add_graph(model, input) writer.close()
sig = str(datetime.datetime.now()) #os.environ["CUDA_VISIBLE_DEVICES"] = str(opt.gpu) random.seed(opt.seed) torch.manual_seed(opt.seed) torch.cuda.manual_seed_all(opt.seed) #os.makedirs('%s/modules/%s' % (opt.save_dir, sig), exist_ok=True) CNN_embed_dim = 2048 RNN_hidden_layers = 3 RNN_hidden_nodes = 512 RNN_FC_dim = 256 dropout_p = 0.0 k = 2 print("Initializing Networks") cnn_encoder = xception(2, load_pretrain=True).to(device) rnn_decoder = DecoderRNN(CNN_embed_dim=CNN_embed_dim, h_RNN_layers=RNN_hidden_layers, h_RNN=RNN_hidden_nodes, h_FC_dim=RNN_FC_dim, drop_p=dropout_p, num_classes=k).to(device) if torch.cuda.device_count() > 1: print("Using", torch.cuda.device_count(), "GPUs!") cnn_encoder = nn.DataParallel(cnn_encoder) rnn_decoder = nn.DataParallel(rnn_decoder) crnn_params = list(cnn_encoder.parameters()) + list(rnn_decoder.parameters()) #optimizer_xcp = optim.Adam(model.parameters(), lr=opt.lr) #model.cuda() optimizer = torch.optim.Adam(crnn_params, lr=opt.lr)
# org org = (50, 50) # fontScale fontScale = 1 # Blue color in BGR color = (255, 0, 0) # Line thickness of 2 px thickness = 2 use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") print(device) m = xception(pretrained=True, device=device, path='127_x_net.pth') m.to(device) m.eval() vidcap = cv2.VideoCapture( '/home/nick/projects/comma/speedchallenge/data/test.mp4') success, prev_img = vidcap.read() while success: success, image = vidcap.read() img1 = process(image) img2 = process(prev_img) img1 = torch.transpose(img1, 3, 1).cuda().float() img2 = torch.transpose(img2, 3, 1).cuda().float() # print(data['img1'].shape) # output = m(data['img1'], data['img2']) output = m(img1, img2)
def process(img): img = cv2.resize(img, (480, 480)) img = img / 255.0 img = img - np.array([0.485, 0.456, 0.406]) img = img / np.array([0.229, 0.224, 0.225]) # (shape: (256, 256, 3)) img = img.astype(np.float32) img = torch.from_numpy(img) return img loser = nn.MSELoss() use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") print(device) m = xception(device=device).to(device) m.train() optimizer = optim.Adadelta(m.parameters(), lr=0.1) epochs = 200 start = time.time() for e in range(epochs): vidcap = cv2.VideoCapture( '/home/nick/projects/comma/speedchallenge/data/train.mp4') f = open("/home/nick/projects/comma/speedchallenge/data/train.txt", "r") success, prev_img = vidcap.read() speed = float(f.readline()) count = 0 running_loss = 0.0 while success:
break WIDTH = 300 HEIGHT = 200 LR = 1e-3 EPOCHS = 5 MODEL_NAME = '07-06-test-2' # .format(EPOCHS, LR) PREV_MODEL = '' logdir = f".\\logs\\{MODEL_NAME}" tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir, profile_batch=5) LOAD_MODEL = False model = alexnet_model_modified_v2(img_shape=(HEIGHT, WIDTH, 3), n_classes=9) mode = xception(img_shape=(HEIGHT, WIDTH, 3), n_classes=9) if LOAD_MODEL: model.load(PREV_MODEL) print('We have loaded a previous model!!!!') for e in range(EPOCHS): print(f"--------------_______________***************_____________________EPOCH:{e+1}___________________***************_________________--------------") data_order = [i for i in range(1, FILE_I_END)] shuffle(data_order) for count, i in enumerate(data_order): try: file_name = path.format(i) train_data = np.load(file_name, allow_pickle=True) print(f'training_data-{i}.npy, _EPOCH:{e+1}/{EPOCHS}, ({count+1}/{FILE_I_END-1})')
def test_full_image_network(data_path, model_path, output_path, start_frame=0, end_frame=None, cuda=True): """ Reads a video and evaluates a subset of frames with the a detection network that takes in a full frame. Outputs are only given if a face is present and the face is highlighted using dlib. :param video_path: path to video file :param model_path: path to model file (should expect the full sized image) :param output_path: path where the output video is stored :param start_frame: first frame to evaluate :param end_frame: last frame to evaluate :param cuda: enable cuda :return: """ f_prediction = open(output_path, 'a') # Face detector face_detector = dlib.get_frontal_face_detector() # Load model model = xception(num_classes=2, pretrained=False) # model_path = '../Model/faceforensics++_models_subset/face_detection/xception/all_c23.p' # choose model manually if model_path is not None: # model = torch.load(model_path, map_location={'cuda:1':'cuda:0'}) state_dict = torch.load(model_path, map_location='cuda:0') model.load_state_dict(state_dict) # model = torch.load(model_path) print('Model found in {}'.format(model_path)) else: print('No model found, initializing random model.') if cuda: print('CUDA!') model = model.cuda() # Frame numbers and length of output video frame_num = 0 pbar = tqdm(total=end_frame - start_frame) for file in os.listdir(data_path): print(file) if file: image = cv2.imread(data_path + '/' + file) print(data_path + '/' + file) # Image size height, width = image.shape[:2] pbar.update(1) else: f_prediction.close() # --- Prediction --------------------------------------------------- # Actual prediction using our model prediction, output = predict_with_model(image, model, cuda=cuda) print('prediction:') print(prediction) f_prediction.write('Frame: ' + str(frame_num) + ', Prediction:' + str(prediction) + '\n') print('output:') print(output) cv2.waitKey(33) # About 30 fps pbar.close()
def run(): #Create log_dir for evaluation information if not os.path.exists(log_eval): os.mkdir(log_eval) #Just construct the graph from scratch again with tf.Graph().as_default() as graph: tf.logging.set_verbosity(tf.logging.INFO) #Get the dataset first and load one batch of validation images and labels tensors. Set is_training as False so as to use the evaluation preprocessing dataset = get_split('validation', dataset_dir) images, raw_images, labels = load_batch(dataset, batch_size=batch_size, is_training=False) #Create some information about the training steps num_batches_per_epoch = dataset.num_samples / batch_size num_steps_per_epoch = num_batches_per_epoch #Now create the inference model but set is_training=False with slim.arg_scope(xception_arg_scope()): logits, end_points = xception(images, num_classes=dataset.num_classes, is_training=False) # #get all the variables to restore from the checkpoint file and create the saver function to restore variables_to_restore = slim.get_variables_to_restore() saver = tf.train.Saver(variables_to_restore) def restore_fn(sess): return saver.restore(sess, checkpoint_file) #Just define the metrics to track without the loss or whatsoever probabilities = end_points['Predictions'] predictions = tf.argmax(probabilities, 1) accuracy, accuracy_update = tf.contrib.metrics.streaming_accuracy( predictions, labels) metrics_op = tf.group(accuracy_update) #Create the global step and an increment op for monitoring global_step = get_or_create_global_step() global_step_op = tf.assign( global_step, global_step + 1 ) #no apply_gradient method so manually increasing the global_step #Create a evaluation step function def eval_step(sess, metrics_op, global_step): ''' Simply takes in a session, runs the metrics op and some logging information. ''' start_time = time.time() _, global_step_count, accuracy_value = sess.run( [metrics_op, global_step_op, accuracy]) time_elapsed = time.time() - start_time #Log some information logging.info( 'Global Step %s: Streaming Accuracy: %.4f (%.2f sec/step)', global_step_count, accuracy_value, time_elapsed) return accuracy_value #Define some scalar quantities to monitor tf.summary.scalar('Validation_Accuracy', accuracy) ''' confusion matrix summaries ''' with tf.name_scope('confusion_matrix'): confusion_matrix = tf.as_string( tf.confusion_matrix(labels=labels, predictions=predictions, num_classes=num_classes, name='Confusion')) confusion_mat = tf.summary.text("confusion_matrix", confusion_matrix) my_summary_op = tf.summary.merge_all() #Get your supervisor sv = tf.train.Supervisor(logdir=log_eval, summary_op=None, init_fn=restore_fn) #Now we are ready to run in one session with sv.managed_session() as sess: for step in range(int(num_batches_per_epoch * num_epochs)): #print vital information every start of the epoch as always if step % num_batches_per_epoch == 0: logging.info('Epoch: %s/%s', step / num_batches_per_epoch + 1, num_epochs) logging.info('Current Streaming Accuracy: %.4f', sess.run(accuracy)) #Compute summaries every 10 steps and continue evaluating if step % 10 == 0: eval_step(sess, metrics_op=metrics_op, global_step=sv.global_step) summaries = sess.run(my_summary_op) sv.summary_computed(sess, summaries) #Otherwise just run as per normal else: eval_step(sess, metrics_op=metrics_op, global_step=sv.global_step) #At the end of all the evaluation, show the final accuracy logging.info('Final Streaming Accuracy: %.4f', sess.run(accuracy)) #Now we want to visualize the last batch's images just to see what our model has predicted raw_images, labels, predictions, probabilities = sess.run( [raw_images, labels, predictions, probabilities]) for i in range(10): image, label, prediction, probability = raw_images[i], labels[ i], predictions[i], probabilities[i] prediction_name, label_name = dataset.labels_to_name[ prediction], dataset.labels_to_name[label] text = 'Prediction: %s \n Ground Truth: %s \n Probability: %s' % ( prediction_name, label_name, probability[prediction]) img_plot = plt.imshow(image) #Set up the plot and hide axes plt.title(text) img_plot.axes.get_yaxis().set_ticks([]) img_plot.axes.get_xaxis().set_ticks([]) plt.show() logging.info( 'Model evaluation has completed! Visit TensorBoard for more information regarding your evaluation.' ) sv.saver.save(sess, sv.save_path, global_step=sv.global_step)
hidden_feature=512, n_classes=args.n_classes) elif args.arch == 'ResNet50': model = models.resnet50(pretrained=True) # model.fc = nn.Linear(in_features=2048, out_features=args.n_classes, bias=True) # model.fc = nn.Sequential( # nn.Linear(in_features=2048, out_features=512, bias=True), # nn.BatchNorm1d(num_features=512), # nn.ReLU(), # nn.Linear(in_features=512, out_features=args.n_classes) # ) model.fc = MLP_Classifier(in_feature=2048, hidden_feature=512, n_classes=args.n_classes) elif args.arch == 'Xception': model = xception(pretrained='imagenet') # model.last_linear = nn.Linear(in_features=2048, out_features=args.n_classes, bias=True) # model.last_linear = nn.Sequential( # nn.Linear(in_features=2048, out_features=512, bias=True), # nn.BatchNorm1d(num_features=512), # nn.ReLU(), # nn.Linear(in_features=512, out_features=args.n_classes) # ) model.last_linear = MLP_Classifier(in_feature=2048, hidden_feature=512, n_classes=args.n_classes) # 设置输出目录 args.out_dir = './out/' + args.arch if not os.path.exists(args.out_dir): os.makedirs(args.out_dir) state_log = open(os.path.join(args.out_dir, 'test_acc.log'), 'w')
def __init__(self, pretrained=True, num_classes=-1): super(deeplabv3plus, self).__init__() self.backbone = None self.backbone_layers = None input_channel = 2048 self.aspp = ASPP(dim_in=input_channel, dim_out=256, rate=16 // 16, bn_mom=0.0003) self.dropout1 = nn.Dropout(0.5) self.upsample4 = nn.UpsamplingBilinear2d(scale_factor=4) self.upsample_sub = nn.UpsamplingBilinear2d(scale_factor=16 // 8) #16//4 indim = 728 shallow1_dim = 64 self.shortcut_conv1_1 = nn.Sequential( nn.Conv2d(indim, shallow1_dim, 1, 1, padding=1 // 2, bias=True), SynchronizedBatchNorm2d(shallow1_dim, momentum=0.0003), nn.ReLU(inplace=True), ) self.cat_conv1_1 = nn.Sequential( nn.Conv2d(256 + shallow1_dim, 256, 3, 1, padding=1, bias=True), SynchronizedBatchNorm2d(256, momentum=0.0003), nn.ReLU(inplace=True), nn.Dropout(0.5), nn.Conv2d(256, 256, 3, 1, padding=1, bias=True), SynchronizedBatchNorm2d(256, momentum=0.0003), nn.ReLU(inplace=True), nn.Dropout(0.1), ) indim = 256 shallow2_dim = 32 self.shortcut_conv1_2 = nn.Sequential( nn.Conv2d(indim, shallow2_dim, 1, 1, padding=1 // 2, bias=True), SynchronizedBatchNorm2d(shallow2_dim, momentum=0.0003), nn.ReLU(inplace=True), ) self.cat_conv1_2 = nn.Sequential( nn.Conv2d(256 + shallow2_dim, 256, 3, 1, padding=1, bias=True), SynchronizedBatchNorm2d(256, momentum=0.0003), nn.ReLU(inplace=True), nn.Dropout(0.5), nn.Conv2d(256, 256, 3, 1, padding=1, bias=True), SynchronizedBatchNorm2d(256, momentum=0.0003), nn.ReLU(inplace=True), nn.Dropout(0.1), ) # self.predict5x5 = nn.Conv2d(256, 256, 5, 1, padding=2) self.predict5x5 = nn.Sequential( nn.Conv2d(256, 256, 3, 1, padding=1, bias=True), SynchronizedBatchNorm2d(256, momentum=0.0003), nn.ReLU(inplace=True), nn.Dropout(0.5), nn.Conv2d(256, 256, 3, 1, padding=1, bias=True), SynchronizedBatchNorm2d(256, momentum=0.0003), nn.ReLU(inplace=True), nn.Dropout(0.1), ) self.cls_conv = nn.Conv2d(256, num_classes, 1, 1, padding=0) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, SynchronizedBatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) self.backbone = xception.xception(pretrained=pretrained, os=16) self.backbone_layers = self.backbone.get_layers()
def __init__(self, num_inputs, num_outputs): super(Image_Xception, self).__init__() self.model = xception(num_classes=num_outputs) self.model.fc = nn.Linear(2048, num_outputs)
def _xception_small(self, inputs, num_classes=None, is_training=True, global_pool=True, output_stride=None, regularize_depthwise=True, reuse=None, scope='xception_small'): """A shallow and thin Xception for faster tests.""" block = xception.xception_block blocks = [ block('entry_flow/block1', depth_list=[1, 1, 1], skip_connection_type='conv', activation_fn_in_separable_conv=False, regularize_depthwise=regularize_depthwise, num_units=1, stride=2), block('entry_flow/block2', depth_list=[2, 2, 2], skip_connection_type='conv', activation_fn_in_separable_conv=False, regularize_depthwise=regularize_depthwise, num_units=1, stride=2), block('entry_flow/block3', depth_list=[4, 4, 4], skip_connection_type='conv', activation_fn_in_separable_conv=False, regularize_depthwise=regularize_depthwise, num_units=1, stride=1), block('entry_flow/block4', depth_list=[4, 4, 4], skip_connection_type='conv', activation_fn_in_separable_conv=False, regularize_depthwise=regularize_depthwise, num_units=1, stride=2), block('middle_flow/block1', depth_list=[4, 4, 4], skip_connection_type='sum', activation_fn_in_separable_conv=False, regularize_depthwise=regularize_depthwise, num_units=2, stride=1), block('exit_flow/block1', depth_list=[8, 8, 8], skip_connection_type='conv', activation_fn_in_separable_conv=False, regularize_depthwise=regularize_depthwise, num_units=1, stride=2), block('exit_flow/block2', depth_list=[16, 16, 16], skip_connection_type='none', activation_fn_in_separable_conv=True, regularize_depthwise=regularize_depthwise, num_units=1, stride=1), ] return xception.xception(inputs, blocks=blocks, num_classes=num_classes, is_training=is_training, global_pool=global_pool, output_stride=output_stride, reuse=reuse, scope=scope)
transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) speed_dataset = speedDataset.ImageToSpeedDataset(csv='data/im_im_sp.csv', root_dir='data/images/') dataset_loader = torch.utils.data.DataLoader(speed_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=10) loser = nn.MSELoss() use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") print(device) m = xception.xception(pretrained=False, device=device) optimizer = optim.AdamW(m.parameters(), lr=0.001) if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") m = nn.DataParallel(m) m.to(device) m.train() epochs = 200 print("ready to train") loss_history = [] eval_history = [] start = time.time()
def main(): # parse the argument parser = argparse.ArgumentParser() parser.add_argument('-m', '--model', help='The model for image classification', choices=[ 'alexnet', 'vgg13', 'vgg16', 'vgg19', 'resnet', 'googlenet', 'inception-resnet-v2', 'inception_v4', 'xception' ]) parser.add_argument( '-r', '--retrain_file', type=str, default='', help="The model file to retrain, none is for train from scratch") args = parser.parse_args() # PaddlePaddle init paddle.init(use_gpu=True, trainer_count=1) image = paddle.layer.data(name="image", type=paddle.data_type.dense_vector(DATA_DIM)) lbl = paddle.layer.data(name="label", type=paddle.data_type.integer_value(CLASS_DIM)) extra_layers = None learning_rate = 0.0001 if args.model == 'alexnet': out = alexnet.alexnet(image, class_dim=CLASS_DIM) elif args.model == 'vgg13': out = vgg.vgg13(image, class_dim=CLASS_DIM) elif args.model == 'vgg16': out = vgg.vgg16(image, class_dim=CLASS_DIM) elif args.model == 'vgg19': out = vgg.vgg19(image, class_dim=CLASS_DIM) elif args.model == 'resnet': conv, pool, out = resnet.resnet_imagenet(image, class_dim=CLASS_DIM) learning_rate = 0.1 elif args.model == 'googlenet': out, out1, out2 = googlenet.googlenet(image, class_dim=CLASS_DIM) loss1 = paddle.layer.cross_entropy_cost(input=out1, label=lbl, coeff=0.3) paddle.evaluator.classification_error(input=out1, label=lbl) loss2 = paddle.layer.cross_entropy_cost(input=out2, label=lbl, coeff=0.3) paddle.evaluator.classification_error(input=out2, label=lbl) extra_layers = [loss1, loss2] elif args.model == 'inception-resnet-v2': assert DATA_DIM == 3 * 331 * 331 or DATA_DIM == 3 * 299 * 299 out = inception_resnet_v2.inception_resnet_v2(image, class_dim=CLASS_DIM, dropout_rate=0.5, data_dim=DATA_DIM) elif args.model == 'inception_v4': conv, pool, out = inception_v4.inception_v4(image, class_dim=CLASS_DIM) elif args.model == 'xception': out = xception.xception(image, class_dim=CLASS_DIM) cost = paddle.layer.classification_cost(input=out, label=lbl) # Create parameters parameters = paddle.parameters.create(cost) for k, v in parameters.__param_conf__.items(): print(" config key {0}\t\t\tval{1}".format(k, v)) print("-" * 50) #print(parameters.__param_conf__[0]) if args.retrain_file is not None and '' != args.retrain_file: print("restore parameters from {0}".format(args.retrain_file)) exclude_params = [ param for param in parameters.names() if param.startswith('___fc_layer_0__') ] parameters.init_from_tar(gzip.open(args.retrain_file), exclude_params) # Create optimizer optimizer = paddle.optimizer.Momentum( momentum=0.9, regularization=paddle.optimizer.L2Regularization(rate=0.0005 * BATCH_SIZE), learning_rate=learning_rate / BATCH_SIZE, learning_rate_decay_a=0.1, learning_rate_decay_b=128000 * 35, learning_rate_schedule="discexp", ) train_reader = paddle.batch( paddle.reader.shuffle( # flowers.train(), # To use other data, replace the above line with: reader.train_reader('valid_train0.lst'), buf_size=2048), batch_size=BATCH_SIZE) test_reader = paddle.batch( # flowers.valid(), # To use other data, replace the above line with: reader.test_reader('valid_val.lst'), batch_size=BATCH_SIZE) # Create trainer trainer = paddle.trainer.SGD(cost=cost, parameters=parameters, update_equation=optimizer, extra_layers=extra_layers) # End batch and end pass event handler def event_handler(event): global step global start if isinstance(event, paddle.event.EndIteration): if event.batch_id % 10 == 0: print "\nPass %d, Batch %d, Cost %f, %s, %s" % ( event.pass_id, event.batch_id, event.cost, event.metrics, time.time() - start) start = time.time() loss_scalar.add_record(step, event.cost) acc_scalar.add_record( step, 1 - event.metrics['classification_error_evaluator']) start = time.time() step += 1 if event.batch_id % 100 == 0: with gzip.open('params_pass_%d.tar.gz' % event.pass_id, 'w') as f: trainer.save_parameter_to_tar(f) if isinstance(event, paddle.event.EndPass): with gzip.open('params_pass_%d.tar.gz' % event.pass_id, 'w') as f: trainer.save_parameter_to_tar(f) result = trainer.test(reader=test_reader) print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics) trainer.train(reader=train_reader, num_passes=200, event_handler=event_handler)
def model(inputs, region, is_training): """Constructs the ResNet model given the inputs.""" if data_format == 'channels_first': # Convert the inputs from channels_last (NHWC) to channels_first (NCHW). # This provides a large performance boost on GPU. See # https://www.tensorflow.org/performance/performance_guide#data_formats inputs = tf.transpose(inputs, [0, 3, 1, 2]) region = tf.transpose(region, [0, 3, 1, 2]) # tf.logging.info('net shape: {}'.format(inputs.shape)) # encoder with tf.contrib.slim.arg_scope( xception.xception_arg_scope( batch_norm_decay=batch_norm_decay)): logits, end_points, low_level_features = xception.xception( inputs, region, num_classes=None, is_training=is_training) if is_training and pre_trained_model != None: #exclude = ['xception/logits', 'global_step'] variables_to_restore = tf.contrib.slim.get_variables_to_restore( exclude=None) tf.train.init_from_checkpoint( pre_trained_model, {v.name.split(':')[0]: v for v in variables_to_restore}) inputs_size = tf.shape(inputs)[1:3] net = end_points['Logits'] encoder_output = atrous_spatial_pyramid_pooling( net, batch_norm_decay, is_training) with tf.variable_scope("lstm"): with tf.contrib.slim.arg_scope( xception.xception_arg_scope( batch_norm_decay=batch_norm_decay)): with arg_scope([layers.batch_norm], is_training=is_training): k_size = encoder_output.get_shape().as_list()[2] #k_size = 21 net = layers_lib.conv2d(encoder_output, 64, [1, 1], stride=1, scope="conv1_1x1") #net = layers_lib.conv2d(net, 4, [1, 1], stride=1, scope="conv2_1x1") rnn_input = tf.reshape(net, [-1, 4, k_size, k_size, 64]) cell_1 = tf.contrib.rnn.ConvLSTMCell( conv_ndims=2, input_shape=[k_size, k_size, 64], output_channels=64, kernel_shape=[3, 3]) #cell_2 = tf.contrib.rnn.ConvLSTMCell(conv_ndims=2, input_shape=[k_size, k_size, 256], output_channels=256, kernel_shape=[3, 3]) #cell_3 = tf.contrib.rnn.ConvLSTMCell(conv_ndims=2, input_shape=[k_size, k_size, 256], output_channels=256, kernel_shape=[3, 3]) multi_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([cell_1]) dropout_rnn = tf.contrib.rnn.DropoutWrapper( multi_rnn_cell, input_keep_prob=0.4) #init_state = multi_rnn_cell.zero_state(12,dtype=tf.float32) rnn_outputs, state = tf.nn.dynamic_rnn(cell=dropout_rnn, inputs=rnn_input, time_major=False, dtype=tf.float32) with tf.variable_scope("decoder"): with tf.contrib.slim.arg_scope( xception.xception_arg_scope( batch_norm_decay=batch_norm_decay)): with arg_scope([layers.batch_norm], is_training=is_training): with tf.variable_scope("low_level_features"): low_level_features = layers_lib.conv2d( low_level_features, 24, [1, 1], stride=1, scope='conv_1x1') low_level_features_size = tf.shape( low_level_features)[1:3] with tf.variable_scope("upsampling_logits"): encoder_output_re = tf.reshape( rnn_outputs, [-1, k_size, k_size, 64]) net = layers_lib.conv2d(encoder_output_re, 64, [3, 3], stride=1, scope='conv_3x3_1') net = layers_lib.conv2d(net, 64, [3, 3], stride=1, scope='conv_3x3_2') net = tf.image.resize_bilinear(net, low_level_features_size, name='upsample_1') net = tf.concat([net, low_level_features], axis=3, name='concat') net = layers_lib.conv2d(net, 44, [3, 3], stride=1, scope='conv_3x3_3') net = layers_lib.conv2d(net, 44, [3, 3], stride=1, scope='conv_3x3_4') net = tf.image.resize_bilinear(net, inputs_size, name='upsample_2') #print(inputs.get_shape().as_list()) low_level_features_two = layers_lib.conv2d( inputs, 1, [1, 1], stride=1, scope='low_level_feature_conv_1x1') net = tf.concat([net, low_level_features_two], axis=3, name='concat_2') logits = layers_lib.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='outputs') return logits
X,Y_name = iterator.get_next() ###data.api end ########Is_training Is_training = tf.placeholder(tf.bool)#标记是否为True ###end ###dropout KEEP_PROB = tf.placeholder(tf.float32) ###dropout end ###网络定义从这里开始 with slim.arg_scope(xception_arg_scope()): Y_prediction,end_points = xception(X, num_classes=16, is_training=Is_training, scope='xception', keep_prob=KEEP_PROB) ###网络结构这里结束 Y_softmax = tf.nn.softmax(Y_prediction) initialization()#初始化函数,包括初始化训练集和测试集,得到训练集和测试集的个数,取出id对应的种类 preprocess.main()#creat TFrecord variables_to_restore = slim.get_variables_to_restore() ###saver saver = tf.train.Saver(variables_to_restore,max_to_keep = 1) # 保存所有的变量,最多保存10个 model_file=tf.train.latest_checkpoint('./save/')#尝试加载上次最新的训练结果 with open("./prediction-split-softmax.csv", 'a', newline='') as csv_file:
def run(): # Create the log directory here. Must be done here otherwise import will activate this unneededly. # 创建log目录 if not os.path.exists(log_dir): os.mkdir(log_dir) # ======================= TRAINING PROCESS(训练过程) ========================= # Now we start to construct the graph and build our model # 现在我们开始构造图并建立我们的模型 with tf.Graph().as_default() as graph: # Set the verbosity to INFO level # 设置日志的级别,会将日志级别为INFO的打印出 tf.logging.set_verbosity(tf.logging.INFO) # First create the dataset and load one batch # 首先,创建数据集并加载一个批次 dataset = get_split('train', dataset_dir, file_pattern=file_pattern) images, _, labels = load_batch(dataset, batch_size=batch_size) # Know the number steps to take before decaying the learning rate and batches per epoch num_batches_per_epoch = dataset.num_samples // batch_size num_steps_per_epoch = num_batches_per_epoch # Because one step is one batch processed decay_steps = int(num_epochs_before_decay * num_steps_per_epoch) # Create the model inference # 创建模型推理 with slim.arg_scope(xception_arg_scope()): logits, end_points = xception(images, num_classes=dataset.num_classes, is_training=True) # Perform one-hot-encoding of the labels (Try one-hot-encoding within the load_batch function!) # 将标签编程one-hot形式 one_hot_labels = slim.one_hot_encoding(labels, dataset.num_classes) # Performs the equivalent to tf.nn.sparse_softmax_cross_entropy_with_logits but enhanced with checks # 计算损失 loss = tf.losses.softmax_cross_entropy(onehot_labels=one_hot_labels, logits=logits) total_loss = tf.losses.get_total_loss() # obtain the regularization losses as well # Create the global step for monitoring the learning_rate and training. # 创建global_step global_step = get_or_create_global_step() # Define your exponentially decaying learning rate # 定义指数衰减的学习率 lr = tf.train.exponential_decay( learning_rate=initial_learning_rate, global_step=global_step, decay_steps=decay_steps, decay_rate=learning_rate_decay_factor, staircase=True) # Now we can define the optimizer that takes on the learning rate # 定义优化器 optimizer = tf.train.AdamOptimizer(learning_rate=lr) # optimizer = tf.train.RMSPropOptimizer(learning_rate = lr, momentum=0.9) # Create the train_op. # 创建训练操作 train_op = slim.learning.create_train_op(total_loss, optimizer) # State the metrics that you want to predict. We get a predictions that is not one_hot_encoded. # 定义度量标准 predictions = tf.argmax(end_points['Predictions'], 1) probabilities = end_points['Predictions'] accuracy, accuracy_update = tf.contrib.metrics.streaming_accuracy(predictions, labels) metrics_op = tf.group(accuracy_update, probabilities) # Now finally create all the summaries you need to monitor and group them into one summary op. # 创建summary tf.summary.scalar('losses/Total_Loss', total_loss) tf.summary.scalar('accuracy', accuracy) tf.summary.scalar('learning_rate', lr) my_summary_op = tf.summary.merge_all() # Now we need to create a training step function that runs both the train_op, metrics_op and updates the global_step concurrently. def train_step(sess, train_op, global_step): ''' Simply runs a session for the three arguments provided and gives a logging on the time elapsed for each global step ''' # Check the time for each sess run start_time = time.time() total_loss, global_step_count, _ = sess.run([train_op, global_step, metrics_op]) time_elapsed = time.time() - start_time # Run the logging to print some results logging.info('global step %s: loss: %.4f (%.2f sec/step)', global_step_count, total_loss, time_elapsed) return total_loss, global_step_count # Define your supervisor for running a managed session. # Do not run the summary_op automatically or else it will consume too much memory sv = tf.train.Supervisor(logdir=log_dir, summary_op=None) # Run the managed session with sv.managed_session() as sess: for step in range(num_steps_per_epoch * num_epochs): # At the start of every epoch, show the vital information: if step % num_batches_per_epoch == 0: logging.info('Epoch %s/%s', step / num_batches_per_epoch + 1, num_epochs) learning_rate_value, accuracy_value = sess.run([lr, accuracy]) logging.info('Current Learning Rate: %s', learning_rate_value) logging.info('Current Streaming Accuracy: %s', accuracy_value) # optionally, print your logits and predictions for a sanity check that things are going fine. logits_value, probabilities_value, predictions_value, labels_value = sess.run( [logits, probabilities, predictions, labels]) print('logits: \n', logits_value[:5]) print('Probabilities: \n', probabilities_value[:5]) print('predictions: \n', predictions_value[:5]) print('Labels:\n:', labels_value[:5]) # Log the summaries every 10 step. if step % 10 == 0: loss, _ = train_step(sess, train_op, sv.global_step) summaries = sess.run(my_summary_op) sv.summary_computed(sess, summaries) # If not, simply run the training step else: loss, _ = train_step(sess, train_op, sv.global_step) # We log the final training loss and accuracy logging.info('Final Loss: %s', loss) logging.info('Final Accuracy: %s', sess.run(accuracy)) # Once all the training has been done, save the log files and checkpoint model logging.info('Finished training! Saving model to disk now.')
def main(): # parse the argument parser = argparse.ArgumentParser() parser.add_argument('model', help='The model for image classification', choices=[ 'alexnet', 'vgg13', 'vgg16', 'vgg19', 'resnet', 'googlenet', 'inception-resnet-v2', 'inception_v4', 'xception' ]) args = parser.parse_args() # PaddlePaddle init paddle.init(use_gpu=True, trainer_count=1) image = paddle.layer.data(name="image", type=paddle.data_type.dense_vector(DATA_DIM)) lbl = paddle.layer.data(name="label", type=paddle.data_type.integer_value(CLASS_DIM)) extra_layers = None learning_rate = 0.01 if args.model == 'alexnet': out = alexnet.alexnet(image, class_dim=CLASS_DIM) elif args.model == 'vgg13': out = vgg.vgg13(image, class_dim=CLASS_DIM) elif args.model == 'vgg16': out = vgg.vgg16(image, class_dim=CLASS_DIM) elif args.model == 'vgg19': out = vgg.vgg19(image, class_dim=CLASS_DIM) elif args.model == 'resnet': out = resnet.resnet_imagenet(image, class_dim=CLASS_DIM) learning_rate = 0.1 elif args.model == 'googlenet': out, out1, out2 = googlenet.googlenet(image, class_dim=CLASS_DIM) loss1 = paddle.layer.cross_entropy_cost(input=out1, label=lbl, coeff=0.3) paddle.evaluator.classification_error(input=out1, label=lbl) loss2 = paddle.layer.cross_entropy_cost(input=out2, label=lbl, coeff=0.3) paddle.evaluator.classification_error(input=out2, label=lbl) extra_layers = [loss1, loss2] elif args.model == 'inception-resnet-v2': assert DATA_DIM == 3 * 331 * 331 or DATA_DIM == 3 * 299 * 299 out = inception_resnet_v2.inception_resnet_v2(image, class_dim=CLASS_DIM, dropout_rate=0.5, data_dim=DATA_DIM) elif args.model == 'inception_v4': out = inception_v4.inception_v4(image, class_dim=CLASS_DIM) elif args.model == 'xception': out = xception.xception(image, class_dim=CLASS_DIM) cost = paddle.layer.classification_cost(input=out, label=lbl) # Create parameters parameters = paddle.parameters.create(cost) # Create optimizer optimizer = paddle.optimizer.Momentum( momentum=0.9, regularization=paddle.optimizer.L2Regularization(rate=0.0005 * BATCH_SIZE), learning_rate=learning_rate / BATCH_SIZE, learning_rate_decay_a=0.1, learning_rate_decay_b=128000 * 35, learning_rate_schedule="discexp", ) train_reader = paddle.batch( paddle.reader.shuffle( flowers.train(), # To use other data, replace the above line with: # reader.train_reader('train.list'), buf_size=1000), batch_size=BATCH_SIZE) test_reader = paddle.batch( flowers.valid(), # To use other data, replace the above line with: # reader.test_reader('val.list'), batch_size=BATCH_SIZE) # Create trainer trainer = paddle.trainer.SGD(cost=cost, parameters=parameters, update_equation=optimizer, extra_layers=extra_layers) # End batch and end pass event handler def event_handler(event): if isinstance(event, paddle.event.EndIteration): if event.batch_id % 1 == 0: print "\nPass %d, Batch %d, Cost %f, %s" % ( event.pass_id, event.batch_id, event.cost, event.metrics) if isinstance(event, paddle.event.EndPass): with gzip.open('params_pass_%d.tar.gz' % event.pass_id, 'w') as f: trainer.save_parameter_to_tar(f) result = trainer.test(reader=test_reader) print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics) trainer.train(reader=train_reader, num_passes=200, event_handler=event_handler)