def __init__(self, gpu, basename, input_size=32, Resampling=500): if basename == 'cnnLstm': if gpu >= 0: self.model = clstm(gpu, input_size, Resampling).cuda(gpu) else: self.model = clstm(gpu, input_size, Resampling) elif basename == 'cnnVoting': # 采用CNN Voting 的方法 if gpu >= 0: self.model = cnnVoting(gpu, input_size, Resampling).cuda(gpu) else: self.model = cnnVoting(gpu, input_size, Resampling) elif basename == 'cnnTransformer': if gpu >= 0: self.model = cnnTransformer(gpu, input_size, Resampling).cuda(gpu) else: self.model = cnnTransformer(gpu, input_size, Resampling) elif basename == 'cnnSvm': if gpu >= 0: self.model = cnnSvm(gpu, input_size, Resampling).cuda(gpu) else: self.model = cnnSvm(gpu, input_size, Resampling) elif basename == 'vdCnn': if gpu >= 0: self.model = VDCNN(gpu, input_size, Resampling).cuda(gpu) else: self.model = VDCNN(gpu, input_size, Resampling) elif basename == 'dpCnn': if gpu >= 0: self.model = DPCNN(gpu, input_size, Resampling).cuda(gpu) else: self.model = DPCNN(gpu, input_size, Resampling) else: pass
def __init__(self, model_weights_dir, num_channel=1, device="gpu", device_id=0, variable_reuse=None, is_chinese=False): """ :param model_weights_dir: string, save_model dir :param num_channel: int | number of channels of input :param device: string, cpu or gpu :param device_id: int|cpu or gpu device id :param variable_reuse: bool, whether to reuse variable during prediction,(for multiple gpus, see below examples) :param is_chinese: bool|whether the model input is chinese """ self.model_weights_dir = model_weights_dir self.per_process_gpu_memory_fraction = .95 self.num_channel = num_channel self.device = device self.device_id = device_id self.variable_reuse = variable_reuse self.is_chinese = is_chinese # load vocab self.vocabulary = learn.preprocessing.CategoricalVocabulary() for token in config.ALPHABET: self.vocabulary.add(token) self.vocabulary.freeze() self.index2label = pickle.load( open(os.path.join(self.model_weights_dir[:-11], 'index2label.pk'), 'rb')) self.num_class = len(self.index2label) max_document_length = config.FEATURE_LEN self.vocab_processor = learn.preprocessing.VocabularyProcessor( max_document_length, vocabulary=self.vocabulary, tokenizer_fn=list) self.is_training = tf.placeholder('bool', [], name='is_training') # load model with tf.device(self.device + ":" + str(self.device_id)): self.model = VDCNN( feature_len=config.FEATURE_LEN, num_classes=self.num_class, vocab_size=70, # fixed to 70, <unk> + 69 char in config embedding_size=config.CHAR_EBD_SIZE, is_training=self.is_training, depth=9) # Write vocabulary self.model_session = self.model_session()
def main(_): depth = 9 use_he_uniform = True optional_shortcut = False currentPath = os.path.dirname(os.path.abspath(__file__)) save_path = os.path.join(currentPath, args.model_path) if not os.path.exists(save_path): raise ValueError("{} not exists!!".format(save_path)) ckpt = tf.train.get_checkpoint_state(save_path) if ckpt and ckpt.model_checkpoint_path: ckpt_name = args.model_name config = tf.ConfigProto() config.allow_soft_placement = True config.gpu_options.allow_growth = True sess = tf.Session(config=config) cnn_model = VDCNN( input_dim=[context_window_size, feat_size], batchsize=batchsize, depth=9, downsampling_type=args.downsampling_type, use_he_uniform=use_he_uniform, optional_shortcut=optional_shortcut) saver = tf.train.Saver() saver.restore(sess, os.path.join(save_path, ckpt_name)) print("[*] Read {}".format(ckpt_name)) test_path = os.path.join(currentPath, os.path.join("data", "test")) test_list = [ os.path.join(test_path, file) for file in os.listdir(test_path) if file.endswith(".txt") ] ReadARKFile(sess, test_list, cnn_model)
# Training # ================================================== with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) is_training = tf.placeholder('bool', [], name='is_training') with sess.as_default(): vdcnn = VDCNN(feature_len=FLAGS.feature_len, num_classes=num_classes, vocab_size=len(vocab_processor.vocabulary_), embedding_size=FLAGS.embedding_dim, l2_reg_lambda=FLAGS.l2_reg_lambda, is_training=is_training, depth=9) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) train_ops = vdcnn.build_train_op(FLAGS.lr, global_step) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath( os.path.join(os.path.curdir, FLAGS.TRAIN_DIR, timestamp)) print("Writing to {}\n".format(out_dir)) # Summaries for loss and accuracy
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=15) test_dataset = TextDataset(test_x_path.get(dataset), test_y_path.get(dataset)) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=15) end = time.time() print(end - start) print('Dataset loaded...') ## Model initialization model = VDCNN(vocab_size, embed_size, depth, downsample, args.shortcut, kmaxpool, num_classes) if is_cuda: model.cuda() model = nn.DataParallel(model, device_ids=range(torch.cuda.device_count())) cudnn.benchmark = True if args.load is not None: model.load_state_dict(torch.load(args.load)) ## Opitmizer if optimi == 'SGD': optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9) else: optimizer = optim.Adam(model.parameters(), lr=lr) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.7) criterion = nn.CrossEntropyLoss()
def train(epoch=10, batch_size=128, embedding_size=16, class_n=10, maxlen=1014, gpu=None): test_ratio = .2 # fake dataset # vocab_n = 100 # X = np.random.randint(vocab_n, size=(1000, 1, maxlen)).astype(np.int32) # T = np.random.randint(10, size=(1000)).astype(np.int32) # train_x, test_x = X[:int(len(X)*(1-test_ratio))], X[-int(len(X)*test_ratio):] # train_t, test_t = T[:int(len(T)*(1-test_ratio))], T[-int(len(T)*test_ratio):] vocab_n = len(token_dict) ag = AGCorpus('./datas/newsspace200.xml') T, X = ag.get_data() N = len(X) X = util.np_int32([ util.convert_one_of_m_vector_char(x, token_dict, maxlen).astype(np.int32).reshape(1, maxlen) for x in X ]) T = util.np_int32(T) train_x, test_x = X[:int(len(X)*(1-test_ratio))], X[-int(len(X)*test_ratio):] train_t, test_t = T[:int(len(T)*(1-test_ratio))], T[-int(len(T)*test_ratio):] train_n = len(train_x) test_n = len(test_x) model = VDCNN(vocab_n, embedding_size, class_n) if gpu: chainer.cuda.get_device(gpu).use() model.to_gpu() xp = chainer.cuda.cupy else: xp = np optimizer = optimizers.MomentumSGD() optimizer.setup(model) s.s_print('epoch: {}'.format(epoch)) s.s_print('batch size: {}'.format(batch_size)) s.s_print('embedding size: {}'.format(embedding_size)) s.s_print('class n: {}'.format(class_n)) s.s_print('vocab n: {}'.format(vocab_n)) s.s_print('train n: {}'.format(train_n)) s.s_print('test n: {}'.format(test_n)) for e in range(epoch): loss_acc = 0 order = np.random.permutation(train_n) train_iter_x = Iterator(train_x, batch_size, order=order) train_iter_t = Iterator(train_t, batch_size, order=order) for x, t in tqdm(zip(train_iter_x, train_iter_t)): x = model.prepare_input(x, dtype=xp.int32, xp=xp) t = model.prepare_input(t, dtype=xp.int32, xp=xp) loss = model(x, t) loss.backward() optimizer.update() loss_acc += float(loss.data) print('loss: {}'.format(loss_acc/train_n/batch_size)) order = np.random.permutation(train_n) test_iter_x = Iterator(test_x, batch_size, order=order) test_iter_t = Iterator(test_t, batch_size, order=order) for x, t in tqdm(zip(test_iter_x, test_iter_t)): x = model.prepare_input(x, dtype=xp.int32, xp=xp) t = model.prepare_input(t, dtype=xp.int32, xp=xp) loss = model(x, t) loss_acc += float(loss.data) print('test loss: {}'.format(loss_acc/test_n/batch_size))
def main(_): # Set some Top params # batchsize = 32 # context_window_size = 11 # feat_size = 43 # Change the 2 params to global params depth = 9 use_he_uniform = True optional_shortcut = False learning_rate = 1e-3 # num_epochs = 3 currentPath = os.path.dirname(os.path.abspath(__file__)) saver_path = os.path.join(currentPath, "model_save") if not os.path.exists(saver_path): os.mkdir(saver_path) # create save dir TFRecord = os.path.join(currentPath, os.path.join("data", args.TFRecord)) num_example = 0 for record in tf.python_io.tf_record_iterator(TFRecord): num_example += 1 # if num_example == 1: # new add!!!! # break print("#############################total examples in TFRecords {} : {}". format(TFRecord, num_example)) num_batchs = num_example / batchsize num_iters = int(num_batchs * num_epochs) + 1 sliced_feat_op, sliced_noise_feat_op = read_and_decode( TFRecord, context_window_size, feat_size) config = tf.ConfigProto() config.allow_soft_placement = True config.gpu_options.allow_growth = True udevice = [] for device in devices: if len(devices) > 1 and device.device_type == "GPU": continue udevice.append(device) sess = tf.Session(config=config) if model_type == "ANFCN": cnn_model = ANFCN(input_dim=[context_window_size, feat_size], batchsize=batchsize, is_ref=True, do_prelu=True) elif model_type == "VDCNN": cnn_model = VDCNN(input_dim=[context_window_size, feat_size], batchsize=batchsize, depth=9, downsampling_type=args.downsampling_type, use_he_uniform=use_he_uniform, optional_shortcut=optional_shortcut) else: print("Model type error!!") sys.exit(1) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) print("2!!!!!!") with tf.control_dependencies(update_ops): global_step = tf.Variable(0, name="global_step", trainable=False) # TODO: change the num_batches_per_epoch update strategynum_epochs*num_batches_per_epoch learning_rate = tf.train.exponential_decay(learning_rate, global_step, num_epochs, 0.95, staircase=True) optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9) gradients, variables = zip( *optimizer.compute_gradients(cnn_model.loss)) gradients, _ = tf.clip_by_global_norm(gradients, 7.0) train_op = optimizer.apply_gradients(zip(gradients, variables), global_step=global_step) print("Initializing all variables.") sess.run(tf.global_variables_initializer()) if not os.path.exists(os.path.join(saver_path, "train")): os.mkdir(os.path.join(saver_path, "train")) tf.summary.scalar("loss", cnn_model.loss) merge_summary = tf.summary.merge_all() writer = tf.summary.FileWriter(os.path.join(saver_path, "train"), sess.graph) saver = tf.train.Saver() # local model saver num_iters = 101 with sess: for i in range(num_iters): sliced_feat, sliced_noise_feat = sess.run( [sliced_feat_op, sliced_noise_feat_op]) feed = { cnn_model.input_x: sliced_noise_feat, cnn_model.input_y: sliced_feat, cnn_model.is_training: True } _, step, loss = sess.run([train_op, global_step, cnn_model.loss], feed) train_summary = sess.run(merge_summary, feed_dict={ cnn_model.input_x: sliced_noise_feat, cnn_model.input_y: sliced_feat, cnn_model.is_training: True }) print("step {}/{}, loss {:g}".format(step, num_iters, loss)) if i % save_freq == 0 or i == (num_iters - 1): saver.save(sess, os.path.join(saver_path, "saver"), global_step=i) writer.add_summary(train_summary, step)
n_dev_samples = 200000 ## Changed from 200000 # TODO: Create a cross validation procedure x_train, x_dev = x_shuffled[:-n_dev_samples], x_shuffled[-n_dev_samples:] y_train, y_dev = y_shuffled[:-n_dev_samples], y_shuffled[-n_dev_samples:] print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev))) # Training # ================================================== with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): cnn = VDCNN() # Define Training procedure ### To update the computation of moving_mean & moving_var, we must put it on the parent graph of minimizing loss update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): # Ensures that we execute the update_ops before performing the train global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: