def get_initial_weights(self, model_type): tf.reset_default_graph() if model_type == "perceptron": m = Perceptron() inputs = tf.placeholder(tf.float32, shape=(None, 28 * 28)) _ = m.get_model(features={"x": inputs}, labels=None, mode='predict', params=None) elif model_type == 'cnn-mnist': m = CNN() inputs = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) _ = m.get_model(features={"x": inputs}, labels=None, mode='predict', params=None) elif model_type == 'cnn-cifar10': m = CNN() inputs = tf.placeholder(tf.float32, shape=(None, 32, 32, 3)) _ = m.get_model(features={"x": inputs}, labels=None, mode='predict', params=None) else: raise ValueError( "Model {model_type} not supported.".format(model_type)) with tf.Session().as_default() as sess: sess.run(tf.global_variables_initializer()) collection = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) weights = {tensor.name: sess.run(tensor) for tensor in collection} tf.reset_default_graph() return weights
def test(args): np.random.seed(1234) train, _, test = load_dataset(args.data_dir, valid=args.validation, dataset_seed=args.dataset_seed) print("N_train:{}".format(train.N)) enc = CNN(n_outputs=args.n_categories, dropout_rate=args.dropout_rate) chainer.serializers.load_npz(args.trained_model_path, enc) if args.gpu > -1: chainer.cuda.get_device(args.gpu).use() enc.to_gpu() print("Finetune") for i in range(args.finetune_iter): x,_ = train.get(args.batchsize_finetune, gpu=args.gpu) enc(x) acc_sum = 0 test_x, test_t = test.get() N_test = test.N for i in range(0, N_test, args.batchsize_eval): x = test_x[i:i + args.batchsize_eval] t = test_t[i:i + args.batchsize_eval] if args.gpu > -1: x, t = cuda.to_gpu(x, device=args.gpu), cuda.to_gpu(t, device=args.gpu) logit = enc(x, train=False) acc = F.accuracy(logit, t).data acc_sum += acc * x.shape[0] acc_test = acc_sum / N_test print("test acc: ", acc_test)
def get_predictions(self, frames, scope): frames = self._reshape_to_conv(frames) cnn = CNN() if self.operation == 'training': cnn_output = cnn.create_model(frames, cnn.conv_filters, keep_prob=self.keep_prob) else: cnn_output = cnn.create_model(frames, cnn.conv_filters, keep_prob=1.0) cnn_output = self._reshape_to_rnn(cnn_output) rnn = RNN() rnn_output = rnn.create_model(cnn_output, scope + '_rnn') if self.is_attention: attention = Attention(self.batch_size) attention_output = attention.create_model(rnn_output, scope + '_attention') fc = FC(self.num_classes) outputs = fc.create_model(attention_output, scope + '_fc') else: rnn_output = rnn_output[:, -1, :] fc = FC(self.num_classes) outputs = fc.create_model(rnn_output, scope + '_fc') return outputs
def GetDefaultCNN(): cnn = CNN( in_features=(32,32,3), out_features=10, conv_filters=[32,32,64,64], conv_kernel_size=[3,3,3,3], conv_strides=[1,1,1,1], conv_pad=[0,0,0,0], max_pool_kernels=[None, (2,2), None, (2,2)], max_pool_strides=[None,2,None,2], use_dropout=False, use_batch_norm=False, actv_func=[None, "relu", None, "relu"], device=device ) # Create MLP # Calculate the input shape s = cnn.GetCurShape() in_features = s[0]*s[1]*s[2] mlp = MLP(in_features, 10, [], [], use_batch_norm=False, use_dropout=False, use_softmax=False, device=device) # mlp = DefaultCifar10MLP(device=device, in_features=in_features) cnn.AddMLP(mlp) return cnn
def setup_model(self, model_type): self.model_type = model_type if model_type == "perceptron": self.model = Perceptron() elif model_type == "cnn-mnist": self.model = CNN() elif model_type == "cnn-cifar10": self.model = CNN() else: raise ValueError("Model {0} not supported.".format(model_type))
def __init__(self, config, pad_idx, train_iter=None, valid_iter=None, test_iter=None): self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') self.config = config self.pad_idx = pad_idx # Train mode if self.config.mode == 'train': self.train_iter = train_iter self.valid_iter = valid_iter # Test mode else: self.test_iter = test_iter model_type = { 'vanilla_rnn': RNN(self.config, self.pad_idx), 'bidirectional_lstm': BidirectionalLSTM(self.config, self.pad_idx), 'cnn': CNN(self.config), } self.model = model_type[self.config.model] self.model.to(self.device) # SGD updates all parameters with the 'same' learning rate # Adam adapts learning rate for each parameter optim_type = { 'SGD': optim.SGD(self.model.parameters(), lr=self.config.lr), 'Adam': optim.Adam(self.model.parameters()), } self.optimizer = optim_type[self.config.optim] # BCEWithLogitsLoss carries out both the sigmoid and the binary cross entropy steps. self.criterion = nn.BCEWithLogitsLoss() self.criterion.to(self.device)
def main(args): # Get dataset datasource = DataSource(None, [args.input_image], shape=tuple(args.eye_shape), data_format=args.data_format, heatmap_scale=args.heatmap_scale) # Get model learning_schedule = [{ 'loss_terms_to_optimize': { 'heatmaps_mse': ['hourglass'], 'radius_mse': ['radius'], }, 'learning_rate': 1e-3, }] model = CNN(datasource.tensors, datasource.x_shape, learning_schedule, data_format=args.data_format) # Get evaluator evaluator = Trainer(model, model_checkpoint=args.model_checkpoint) # Predict output, losses = evaluator.run_predict(datasource) input_data = util.load_pickle(args.input_image) print('Losses', losses) util.plot_predictions(output, input_data, tuple(args.eye_shape))
def __init__(self, args, device, rel2id, word_emb=None): lr = args.lr lr_decay = args.lr_decay self.cpu = torch.device('cpu') self.device = device self.args = args self.max_grad_norm = args.max_grad_norm if args.model == 'pa_lstm': self.model = PositionAwareLSTM(args, rel2id, word_emb) elif args.model == 'bgru': self.model = BGRU(args, rel2id, word_emb) elif args.model == 'cnn': self.model = CNN(args, rel2id, word_emb) elif args.model == 'pcnn': self.model = PCNN(args, rel2id, word_emb) elif args.model == 'lstm': self.model = LSTM(args, rel2id, word_emb) else: raise ValueError self.model.to(device) self.criterion = nn.CrossEntropyLoss() self.parameters = [ p for p in self.model.parameters() if p.requires_grad ] # self.parameters = self.model.parameters() self.optimizer = torch.optim.SGD(self.parameters, lr)
def __init__(self, args, device, rel2id, word_emb=None): lr = args.lr lr_decay = args.lr_decay self.cpu = torch.device('cpu') self.device = device self.args = args self.rel2id = rel2id self.max_grad_norm = args.max_grad_norm if args.model == 'pa_lstm': self.model = PositionAwareRNN(args, rel2id, word_emb) elif args.model == 'bgru': self.model = BGRU(args, rel2id, word_emb) elif args.model == 'cnn': self.model = CNN(args, rel2id, word_emb) elif args.model == 'pcnn': self.model = PCNN(args, rel2id, word_emb) elif args.model == 'lstm': self.model = LSTM(args, rel2id, word_emb) else: raise ValueError self.model.to(device) self.criterion = nn.CrossEntropyLoss() if args.fix_bias: self.model.flinear.bias.requires_grad = False self.parameters = [ p for p in self.model.parameters() if p.requires_grad ] # self.parameters = self.model.parameters() self.optimizer = torch.optim.SGD(self.parameters, lr) self.scheduler = lr_scheduler.ReduceLROnPlateau(self.optimizer, 'min', patience=3, factor=lr_decay)
def main(args): # Get dataset test_files = glob.glob(os.path.join(args.test_path, '*.pickle')) datasource = DataSource(None, test_files, shape=tuple(args.eye_shape), data_format=args.data_format, heatmap_scale=args.heatmap_scale) # Get model learning_schedule = [{ 'loss_terms_to_optimize': { 'heatmaps_mse': ['hourglass'], 'radius_mse': ['radius'], }, 'learning_rate': 1e-3, }] model = CNN(datasource.tensors, datasource.x_shape, learning_schedule) # Get evaluator evaluator = Trainer(model, model_checkpoint=args.model_checkpoint) # Evaluate avg_losses = evaluator.run_eval(datasource) print('Avarage Losses', avg_losses)
def setup(self): # Load a Tensorflow model into memory. # If needed froze the graph to get better performance. data_format = self.args.data_format shape = tuple(self.args.eye_shape) preprocessor = ImgPreprocessor(data_format) datasource = ImgDataSource(shape=shape, data_format=data_format) # Get model model = CNN(datasource.tensors, datasource.x_shape, None, data_format=data_format, predict_only=True) # Start session saver = tf.train.Saver() sess = tf.Session() # Init variables init = tf.global_variables_initializer() init_l = tf.local_variables_initializer() sess.run(init) sess.run(init_l) # Restore model checkpoint saver.restore(sess, self.args.model_checkpoint) return datasource, preprocessor, sess, model
def main(args): # Get dataset train_files = glob.glob(os.path.join(args.train_path, '*.pickle')) eval_files = glob.glob(os.path.join(args.eval_path, '*.pickle')) datasource = DataSource(train_files, eval_files, shape=tuple(args.eye_shape), batch_size=args.batch_size, data_format=args.data_format, heatmap_scale=args.heatmap_scale) # Get model learning_schedule = [ { 'loss_terms_to_optimize': { 'heatmaps_mse': ['hourglass'], 'radius_mse': ['radius'], }, 'learning_rate': 1e-3, }, ] model = CNN(datasource.tensors, datasource.x_shape, learning_schedule) # Get trainer trainer = Trainer(model, eval_steps=args.eval_steps) # Train for 10000 steps return trainer.run_training(datasource, args.steps)
def multi_get_attention(self, frames): frames = self._reshape_to_conv(frames) cnn = CNN() cnn_output = cnn.create_model(frames, cnn.conv_filters) cnn_output = self._reshape_to_rnn(cnn_output) rnn = RNN() rnn_output = rnn.create_model(cnn_output) if self.is_attention: attention = Attention(self.batch_size) attention_output = attention.attention_analysis(rnn_output) return attention_output else: rnn_output = rnn_output[:, -1, :] fc = FC(self.num_classes) outputs = fc.create_model(rnn_output) return outputs
def main(args): model = CNN() chainer.serializers.load_npz(args.init_weights, model) model = chainer.links.Classifier(model) paths = glob.glob(os.path.join(args.dataset_dir, '**/*.wav')) testset = ESDataset(paths, label_index=args.label_index) y_targs = [] y_preds = [] for data in testset: x, y = data y_targs.append(int(y)) y_preds.append(int(predict(model, x))) class_names = ['中立', '穏やか', '幸せ', '悲しみ', '怒り', '恐怖', '嫌悪', '驚き'] accuracy = accuracy_score(y_targs, y_preds) plot_confusion_matrix(y_targs, y_preds, classes=class_names, normalize=True, title='{0} 精度:{1:.1%}'.format(args.title, accuracy)) plt.savefig(os.path.join(args.out_dir, 'confusion_matrix.png'))
class CNNPredictor: def __init__(self, name= "WikiContrvCNN", input_name=None): if input_name is None: input_name = name self.hp = hyperparams.HPCNN() self.sess = init_session() self.sess.run(tf.global_variables_initializer()) self.dropout_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") self.seq_max = self.hp.seq_max self.word2idx = cache.load_cache(input_name+".voca") init_emb = cache.load_cache("init_emb_word2vec") self.model = CNN("controv", self.seq_max, 2, [2, 3, 4], 128, init_emb, self.hp.embedding_size, self.dropout_prob) self.input_text = tf.placeholder(tf.int32, shape=[None, self.seq_max], name="comment_input") self.sout = self.model.network(self.input_text) self.tokenize = lambda x: tokenize(x, set(), False) loader = tf.train.Saver() loader.restore(self.sess, cpath.get_model_full_path(name)) def encode(self, docs): def convert(word): if word in self.word2idx: return self.word2idx[word] else: return OOV data = [] for doc in docs: entry = [] for token in self.tokenize(doc): entry.append(convert(token)) entry = entry[:self.seq_max] while len(entry) < self.seq_max: entry.append(PADDING) data.append((entry, 0)) return data def score(self, docs): inputs = self.encode(docs) def forward_run(inputs): batches = get_batches_ex(inputs, self.hp.batch_size, 2) logit_list = [] for batch in batches: x, y, = batch logits, = self.sess.run([self.sout, ], feed_dict={ self.input_text: x, self.dropout_prob: 1.0, }) logit_list.append(logits) return np.concatenate(logit_list) output = forward_run(inputs)[:,1] return output
def get_model(args): mean = np.array([640., 476.23620605, 88.2875590389]) std = np.array([227.59802246, 65.00177002, 52.7303319245]) if "scale" not in args.model: mean, std = mean[:2], std[:2] logger.info("Mean: {}, std: {}".format(mean, std)) if args.model == "cnn" or args.model == "cnn_scale": model = CNN(mean, std, args.gpu, args.channel_list, args.deconv_list, args.ksize_list, args.dc_ksize_list, args.inter_list, args.last_list, args.pad_list) elif args.model == "cnn_pose" or args.model == "cnn_pose_scale": model = CNN_Pose(mean, std, args.gpu, args.channel_list, args.deconv_list, args.ksize_list, args.dc_ksize_list, args.inter_list, args.last_list, args.pad_list) elif args.model == "cnn_ego" or args.model == "cnn_ego_scale": model = CNN_Ego(mean, std, args.gpu, args.channel_list, args.deconv_list, args.ksize_list, args.dc_ksize_list, args.inter_list, args.last_list, args.pad_list, args.ego_type) elif args.model == "cnn_ego_pose" or args.model == "cnn_ego_pose_scale": model = CNN_Ego_Pose(mean, std, args.gpu, args.channel_list, args.deconv_list, args.ksize_list, args.dc_ksize_list, args.inter_list, args.last_list, args.pad_list, args.ego_type) else: logger.info("Invalid argument: model={}".format(args.model)) exit(1) if args.resume != "": serializers.load_npz(args.resume, model) if args.gpu >= 0: model.to_gpu(args.gpu) return model
def models(m): if m == 'rnn': return RNN(1, opt.hidden_size, opt.num_layers, 1, opt.cuda) elif m == 'lstm': return LSTM(1, opt.hidden_size, opt.num_layers, 1, opt.cuda) elif m == 'qrnn': return QRNN(1, opt.hidden_size, opt.num_layers, 1, opt.cuda) elif m == 'cnn': return CNN(1, opt.hidden_size, 1, opt.cuda)
def get_multi_predictions(self, frames): frames = self._reshape_to_conv(frames) cnn = CNN() if self.operation == 'training': cnn_output = cnn.create_model(frames, cnn.conv_filters, keep_prob=self.keep_prob) else: cnn_output = cnn.create_model(frames, cnn.conv_filters, keep_prob=1.0) cnn_output = self._reshape_to_rnn(cnn_output) rnn = RNN() arousal_rnn_output = rnn.create_model(cnn_output, 'arousal_rnn') valence_rnn_output = rnn.create_model(cnn_output, 'valence_rnn') dominance_rnn_output = rnn.create_model(cnn_output, 'dominance_rnn') if self.is_attention: attention = Attention(self.batch_size) arousal_attention_output = attention.create_model( arousal_rnn_output, 'arousal_attention') valence_attention_output = attention.create_model( valence_rnn_output, 'valence_attention') dominance_attention_output = attention.create_model( dominance_rnn_output, 'dominance_attention') fc = FC(self.num_classes) arousal_fc_outputs = fc.create_model(arousal_attention_output, 'arousal_fc') valence_fc_outputs = fc.create_model(valence_attention_output, 'valence_fc') dominance_fc_outputs = fc.create_model(dominance_attention_output, 'dominance_fc') else: arousal_rnn_output = arousal_rnn_output[:, -1, :] valence_rnn_output = valence_rnn_output[:, -1, :] dominance_rnn_output = dominance_rnn_output[:, -1, :] fc = FC(self.num_classes) arousal_fc_outputs = fc.create_model(arousal_rnn_output, 'arousal_fc') valence_fc_outputs = fc.create_model(valence_rnn_output, 'valence_fc') dominance_fc_outputs = fc.create_model(dominance_rnn_output, 'dominance_fc') return arousal_fc_outputs, valence_fc_outputs, dominance_fc_outputs
def __init__(self, filed=80): super(TD_LSTM, self).__init__() self.filed = filed self.cnn_l = CNN(self.filed) self.rnn_l = nn.LSTM(input_size=40, hidden_size=64, num_layers=4, batch_first=True) self.cnn_r = CNN(self.filed) self.rnn_r = nn.LSTM(input_size=40, hidden_size=64, num_layers=4, batch_first=True) self.linear = nn.Sequential( nn.Linear(128, 64), nn.Linear(64, 2), )
def __init__(self, name= "WikiContrvCNN", input_name=None): if input_name is None: input_name = name self.hp = hyperparams.HPCNN() self.sess = init_session() self.sess.run(tf.global_variables_initializer()) self.dropout_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") self.seq_max = self.hp.seq_max self.word2idx = cache.load_cache(input_name+".voca") init_emb = cache.load_cache("init_emb_word2vec") self.model = CNN("controv", self.seq_max, 2, [2, 3, 4], 128, init_emb, self.hp.embedding_size, self.dropout_prob) self.input_text = tf.placeholder(tf.int32, shape=[None, self.seq_max], name="comment_input") self.sout = self.model.network(self.input_text) self.tokenize = lambda x: tokenize(x, set(), False) loader = tf.train.Saver() loader.restore(self.sess, cpath.get_model_full_path(name))
def __init__(self, filed=80): super(MemNet, self).__init__() self.filed = filed self.cnn_l = CNN(filed=self.filed) self.attention = Attention(40, score_function='mlp') self.x_linear = nn.Sequential(nn.Linear(40, 40), ) self.linear = nn.Sequential( nn.Linear(40, 64), nn.Linear(64, 2), )
def get_predictor(): dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") cnn = CNN("agree", sequence_length=FLAGS.comment_length, num_classes=3, filter_sizes=[1, 2, 3], num_filters=64, init_emb=load_local_pickle("init_embedding"), embedding_size=FLAGS.embedding_size, dropout_prob=dropout_keep_prob) input_comment = tf.placeholder(tf.int32, shape=[None, FLAGS.comment_length], name="comment_input") #sout = model.cnn.network(input_comment) sout = cnn.network(input_comment) sess = init_session() batch_size = 512 path = os.path.join(model_path, "runs", "agree", "model-36570") variables = tf.contrib.slim.get_variables_to_restore() for v in variables: print(v.name) loader = tf.train.Saver(variables) loader.restore(sess, path) def predict(comments): batches = get_batches_ex(comments, batch_size, 1) all_scores = [] ticker = TimeEstimator(len(batches)) for batch in batches: scores, = sess.run([sout], feed_dict={ input_comment: batch[0], dropout_keep_prob: 1.0, }) all_scores.append(scores) ticker.tick() return np.concatenate(all_scores) return predict
def __init__(self, prior, init_emb): self.comment_length = FLAGS.comment_length self.comment_count = FLAGS.comment_count self.embedding_size = FLAGS.embedding_size self.prior = prior self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") self.input_comment = tf.placeholder( tf.int32, shape=[None, self.comment_count, self.comment_length], name="input_reaction") self.input_comment_y = tf.placeholder( tf.int32, shape=[None, self.comment_count], name="input_y_comment") # agree label for comments self.input_y = tf.placeholder(tf.int32, shape=[ None, ], name="input_y") # Controversy Label self.cnn = CNN("agree", sequence_length=self.comment_length, num_classes=3, filter_sizes=[1, 2, 3], num_filters=64, init_emb=init_emb, embedding_size=self.embedding_size, dropout_prob=self.dropout_keep_prob) self.score = self.controversy(self.input_comment) self.acc = accuracy(self.score, self.input_y, axis=1) self.agree_logit = self.predict_2d(self.input_comment) self.agree_acc = accuracy(self.agree_logit, self.input_comment_y, axis=2) self.agree_loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.agree_logit, labels=self.input_comment_y))
def __init__(self, args): super(CNN_MLP, self).__init__() self.args = args self.device = args["device"] self.batch_size = args["batch_size"] self.input_dim = args["input_dim"] self.obs_len = args["input_length"] self.kernel_size = args["kernel_size"] self.nb_conv = args["nb_conv"] self.nb_kernel = args["nb_kernel"] self.cnn_feat_size = args["cnn_feat_size"] self.mlp_layers = args["mlp_layers"] self.output_size = args["output_size"] self.cnn = CNN(num_inputs=self.input_dim, nb_kernel=self.nb_kernel, cnn_feat_size=self.cnn_feat_size, obs_len=self.obs_len, kernel_size=self.kernel_size, nb_conv=self.nb_conv) # self.cnn = nn.Sequential() # padding = int((self.kernel_size-1)/2.0) # for i in range(self.nb_conv): # conv = nn.Conv1d(self.nb_kernel , self.nb_kernel , self.kernel_size, padding=padding) # if i == 0: # conv = nn.Conv1d(self.coord_embedding_size, self.nb_kernel , self.kernel_size, padding=padding) # self.cnn.add_module("conv0",conv) # self.project_cnn = nn.Linear(self.obs_len*self.nb_kernel,self.cnn_feat_size) self.mlp = nn.Sequential() self.mlp.add_module("layer0", nn.Linear(self.cnn_feat_size, self.mlp_layers[0])) self.mlp.add_module("relu0", nn.ReLU()) for i in range(1, len(self.mlp_layers)): self.mlp.add_module( "layer{}".format(i), nn.Linear(self.mlp_layers[i - 1], self.mlp_layers[i])) self.mlp.add_module("relu{}".format(i), nn.ReLU()) self.mlp.add_module("layer{}".format(len(self.mlp_layers)), nn.Linear(self.mlp_layers[-1], self.output_size))
def __init__(self, filed=80): super(ATAE_LSTM, self).__init__() self.filed = filed self.cnn_l = CNN(filed=self.filed) self.rnn_l = nn.LSTM(input_size=110, hidden_size=64, num_layers=4, batch_first=True) self.attention = NoQueryAttention(128, score_function='bi_linear') self.linear = nn.Sequential( nn.Linear(64, 64), nn.Linear(64, 2), )
def GetCNN(): cnn = CNN( in_features=(32, 32, 3), out_features=10, conv_filters=[32, 32, 64, 64], conv_kernel_size=[3, 3, 3, 3], conv_strides=[1, 1, 1, 1], conv_pad=[0, 0, 0, 0], max_pool_kernels=[None, (2, 2), None, (2, 2)], max_pool_strides=[None, 2, None, 2], use_dropout=False, use_batch_norm=True, #False actv_func=["relu", "relu", "relu", "relu"], device=device) return cnn
def setup_model(self, model_type): self.model_type = model_type if model_type == "perceptron": self.model = Perceptron() self.weights_metadata = self.model.get_weights_shape() elif model_type == "cnn": #TODO: Support CNN self.model = CNN() elif model_type == "lstm": #TODO: Support LSTM self.model = LSTM() elif model_type == "gan": self.model = ConversationalNetwork() self.model.build_model(is_retraining=True) else: raise ValueError("Model {0} not supported.".format(model_type))
def __init__(self, filed = 80): super(IAN_LSTM, self).__init__() self.filed = filed self.cnn_l = CNN(filed=self.filed) self.rnn_l = nn.LSTM( input_size=55, hidden_size=64, num_layers=4, batch_first=True) self.attention_aspect = Attention(64, score_function='bi_linear') self.attention_context = Attention(64, score_function='bi_linear') self.linear = nn.Sequential( nn.Linear(128, 64), nn.Linear(64, 2), )
def get_model(args, parallel=True, ckpt_path=False): if args.clf == 'fcn': print('Initializing FCN...') model = FCN(args.input_size, args.output_size) elif args.clf == 'mlp': print('Initializing MLP...') model = MLP(args.input_size, args.output_size) elif args.clf == 'svm': print('Initializing SVM...') model = SVM(args.input_size, args.output_size) elif args.clf == 'cnn': print('Initializing CNN...') model = CNN(nc=args.num_channels, fs=args.cnn_view) elif args.clf == 'resnet18': print('Initializing ResNet18...') model = resnet.resnet18(num_channels=args.num_channels, num_classes=args.output_size) elif args.clf == 'vgg19': print('Initializing VGG19...') model = VGG(vgg_name=args.clf, num_channels=args.num_channels, num_classes=args.output_size) elif args.clf == 'unet': print('Initializing UNet...') model = UNet(in_channels=args.num_channels, out_channels=args.output_size) num_params, num_layers = get_model_size(model) print("# params: {}\n# layers: {}".format(num_params, num_layers)) if ckpt_path: model.load_state_dict(torch.load(ckpt_path)) print('Load init: {}'.format(ckpt_path)) if parallel: model = nn.DataParallel(model.to(get_device(args)), device_ids=args.device_id) else: model = model.to(get_device(args)) loss_type = 'hinge' if args.clf == 'svm' else args.loss_type print("Loss: {}".format(loss_type)) return model, loss_type
def predict_cnn(config): # load tokenizer and torchtext Field pickle_tokenizer = open('pickles/tokenizer.pickle', 'rb') cohesion_scores = pickle.load(pickle_tokenizer) tokenizer = LTokenizer(scores=cohesion_scores) pickle_vocab = open('pickles/text.pickle', 'rb') text = pickle.load(pickle_vocab) model = CNN(config) model.load_state_dict(torch.load(config.save_model)) model.to(device) model.eval() tokenized = tokenizer.tokenize(config.input) min_len = config.filter_sizes[-1] # if user's input sentence is shorter than the largest filter size, add pad tokens to input sentence if len(tokenized) < min_len: tokenized += ['<pad>'] * (min_len - len(tokenized)) indexed = [text.vocab.stoi[token] for token in tokenized] length = [len(indexed)] tensor = torch.LongTensor(indexed).to(device) tensor = tensor.unsqueeze(1) length_tensor = torch.LongTensor(length) prediction = torch.sigmoid(model(tensor, length_tensor)) label = torch.round(prediction) if label == 1: label = 'Positive' else: label = 'Negative' sentiment_percent = prediction.item() print(f'[in] >> {config.input}') print(f'[out] >> {sentiment_percent*100:.2f} % : {label}')
train_dataset, train_dataset_args = read_data_args(train_data_spec) valid_dataset, valid_dataset_args = read_data_args(valid_data_spec) # reading data train_sets, train_xy, train_x, train_y = read_dataset(train_dataset, train_dataset_args) valid_sets, valid_xy, valid_x, valid_y = read_dataset(valid_dataset, valid_dataset_args) numpy_rng = numpy.random.RandomState(89677) theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) log('> ... building the model') # construct the cnn architecture cnn = CNN(numpy_rng=numpy_rng, theano_rng = theano_rng, batch_size = batch_size, n_outs=n_outs, conv_layer_configs = conv_layer_configs, hidden_layers_sizes = hidden_layers_sizes, conv_activation = conv_activation, full_activation = full_activation, use_fast = use_fast, update_layers = update_layers) total_layer_number = len(cnn.layers) if full_ptr_layer_number > 0: _file2nnet(cnn.layers[len(conv_layer_configs):total_layer_number], set_layer_num = full_ptr_layer_number, filename = full_ptr_file, withfinal=False) if conv_ptr_layer_number > 0: _file2cnn(cnn.layers[0:len(conv_layer_configs)], filename=conv_ptr_file) # get the training, validation and testing function for the model log('> ... getting the finetuning functions') train_fn, valid_fn = cnn.build_finetune_functions( (train_x, train_y), (valid_x, valid_y), batch_size=batch_size)
if arguments.has_key('ptr_file') and arguments.has_key('ptr_layer_number'): ptr_file = arguments['ptr_file'] ptr_layer_number = int(arguments['ptr_layer_number']) # check working dir to see whether it's resuming training resume_training = False if os.path.exists(wdir + '/nnet.tmp') and os.path.exists(wdir + '/training_state.tmp'): resume_training = True cfg.lrate = _file2lrate(wdir + '/training_state.tmp') log('> ... found nnet.tmp and training_state.tmp, now resume training from epoch ' + str(cfg.lrate.epoch)) numpy_rng = numpy.random.RandomState(89677) theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) log('> ... initializing the model') # construct the cnn architecture cnn = CNN(numpy_rng=numpy_rng, theano_rng = theano_rng, cfg = cfg) # load the pre-training networks, if any, for parameter initialization if (ptr_layer_number > 0) and (resume_training is False): _file2nnet(cnn.layers, set_layer_num = ptr_layer_number, filename = ptr_file) if resume_training: _file2nnet(cnn.layers, filename = wdir + '/nnet.tmp') # get the training, validation and testing function for the model log('> ... getting the finetuning functions') train_fn, valid_fn = cnn.build_finetune_functions( (cfg.train_x, cfg.train_y), (cfg.valid_x, cfg.valid_y), batch_size=cfg.batch_size) log('> ... finetunning the model') while (cfg.lrate.get_rate() != 0): # one epoch of sgd training