def main(): args = configurations.get_args() ref_labels = dataloader.read_labels_file(args.reflabelpath) classes_num = len(np.unique(ref_labels)) ref_images_paths = dataloader.get_images_path(args.refpath) target_images_paths = get_target_images_by_classes(args.targetpath, ["knife", "sword"]) ref_dataloader = dataloader.Dataloader(ref_images_paths, classes_num, ref_labels) target_dataloader = dataloader.Dataloader(target_images_paths, classes_num) network = utils.get_network(args.nntype) optimizer = tf.keras.optimizers.Adam(learning_rate=args.lr) trainer = Trainer(network, optimizer, args.lambd, compactnes_loss, descriptiveness_loss) num_iterations = max(len(ref_images_paths) / args.batches, 1) train(ref_dataloader, target_dataloader, trainer, args.batches, num_iterations, args.epochs)
def train(self): model = cool_model.create_model() model.compile(optimizer=Adam(Params.learn_rate), loss=Params.losses, metrics=Params.metrics) print(model.summary()) # callbacks logger = CSVLogger(f'out/logs/{Params.model_name}_logs.csv') checkpointer = ModelCheckpoint(f'out/checkpoints/{Params.model_name}_{{epoch:02d}}.hdf5', verbose=1, save_best_only=True) # dataloaders train_loader = dataloader.Dataloader(dataloader.Dataloader.mode_train, split_size=50000) validation_loader = dataloader.Dataloader(dataloader.Dataloader.mode_validation, split_size=int(50000 * .42)) num_train_batches = train_loader.__len__() num_valid_batches = int(num_train_batches * 0.42) model.fit_generator(generator=train_loader, validation_data=validation_loader, callbacks=[checkpointer, logger], steps_per_epoch=num_train_batches, epochs=100, validation_steps=num_valid_batches,shuffle=False)
def __init__(self): # Input Shape self.hight = 36 self.width = 128 self.shape = (self.hight, self.width) # Datasets self.loader = dataloader.Dataloader('cache36_suzuki.pkl', 'cache36_kinoshita.pkl', shape=self.shape) self.loader.loadData() # Loss weights self.lambda_cycle = 10. # Cycle-consistency loss self.lambda_id = 0.1 * self.lambda_cycle # Identity loss generator_optimizer = Adam(lr=0.0002, beta_1=0.5) discriminator_optimizer = Adam(lr=0.0001, beta_1=0.5) # Build and compile the discriminators self.d_A = models.build_PatchGAN_Discriminator(self.shape) self.d_B = models.build_PatchGAN_Discriminator(self.shape) self.d_A.compile(loss='mse', optimizer=discriminator_optimizer, metrics=['accuracy']) self.d_B.compile(loss='mse', optimizer=discriminator_optimizer, metrics=['accuracy']) self.d_A.trainable = False self.d_B.trainable = False # Build and compile the generators self.g_AB = models.build_212CNN_Generator(self.shape) self.g_BA = models.build_212CNN_Generator(self.shape) input_A = Input(shape=self.shape) input_B = Input(shape=self.shape) fake_B = self.g_AB(input_A) fake_A = self.g_AB(input_B) reconstr_A = self.g_BA(fake_B) reconstr_B = self.g_AB(fake_A) id_A = self.g_BA(input_A) id_B = self.g_AB(input_B) valid_A = self.d_A(fake_A) valid_B = self.d_B(fake_B) self.combined = Model(inputs=[input_A, input_B], outputs=[valid_A, valid_B, reconstr_A, reconstr_B, id_A, id_B]) self.combined.compile(loss=['mse', 'mse', 'mae', 'mae', 'mae', 'mae'], loss_weights=[1, 1, self.lambda_cycle, self.lambda_cycle, self.lambda_id, self.lambda_id], optimizer=generator_optimizer)
import dataloader import dataset import transform import os import copy Batch_size = 32 imageType = ['Gray', 'Colour'] dataAddress = './data/' saveAddress2 = '%s/PreprocessData2.0' % dataAddress dataType = ['frequency_domain', 'time_domain'] dataAddr = '%s/labeleddata_%s.pkl' % (saveAddress2, dataType[0]) type = 'Gray' mode = 'Test' D = dataloader.Dataloader() p = dataset.Dataset(data_transform=transform.horizontalFlip()) # load processed data from pickle,if the data not exist, it will take longer to process first if not os.path.isfile(dataAddr): dataAddr = p.preprocessing(dataType=dataType[0], DataperSample=1300) # data with label else: labeldata = torch.load(dataAddr) trainData = D.loader( dataset=labeldata, batch_size=Batch_size, mode='Train', shuffle=True, ) # 16*89*1301
def test(): #below is a function test; if you use this for text classifiction, you need to tranform sentence to indices of vocabulary first. then feed data to the graph. num_classes = 50 learning_rate = 0.01 batch_size = 30 decay_steps = 1000 decay_rate = 0.9 sequence_length = 37 #148 vocab_size = 400000 embed_size = 300 attention_size = 50 is_training = True dropout_keep_prob = 0.5 #0.5 epoch = 300 SGD = False print("learning_rate : ", learning_rate) print("batch_size :", batch_size) textRNN = TextRNN(num_classes, learning_rate, batch_size, decay_steps, decay_rate, sequence_length, vocab_size, embed_size, is_training, attention_size) data_loader_train = dataloader.Dataloader(batch_size) data_loader_validation = dataloader.Dataloader(batch_size) data_loader_test = dataloader.Dataloader(batch_size) #data_loader_train.load_train_data(positive_file, negative_file) cnt = 0 acc_total = 0 result = [] whole_acc = 0 best_acc = 0 with tf.Session() as sess: sess.run(tf.global_variables_initializer()) embedding = data_loader_train.load_train_data(Training_path, word_embedding_path) data_loader_validation.load_train_data(Validation_path, word_embedding_path) data_loader_test.load_train_data(Test_path, word_embedding_path) #print (data_loader_train.num_batch) #assert(0) for f in range(epoch): data_loader_train.set_folder(f) data_loader_train.reset_pointer() cnt_train = 0 cnt_test = 0 acc_avg_train = 0 acc_avg_test = 0 cnt_all = 0 for it in range(0, data_loader_train.num_batch): #input_x=np.zeros((batch_size,sequence_length)) #[None, self.sequence_length] #input_y=input_y=np.array([1,0,1,1,1,2,1,1]) #np.zeros((batch_size),dtype=np.int32) #[None, self.sequence_length] #print (data_loader_train.num_batch) input_x, input_y, input_z = data_loader_train.Train_next_batch( ) #print ("iter, ", it) #if it == data_loader_train.num_batch: # print (len(input_y)) #print (np.shape(input_x)) #print (np.shape(input_y)) outputs, loss, acc, predict, _ = sess.run( [ textRNN.attention_output, textRNN.loss_val, textRNN.accuracy, textRNN.predictions, textRNN.train_op ], feed_dict={ textRNN.input_x: input_x, textRNN.input_y: input_y, textRNN.dropout_keep_prob: dropout_keep_prob, textRNN.Embedding_placeholder: embedding, textRNN.SGD: SGD }) cnt_all = cnt_all + np.shape(outputs)[0] #print (np.shape(outputs)) #print (acc_avg_train) cnt_train = cnt_train + 1 #assert (0) acc_avg_train = acc_avg_train + acc # break if acc_avg_train / cnt_train > 0.7: SGD = True #print (cnt_all) #print (data_loader_train.num_batch) #assert (0) for it in range(data_loader_validation.num_batch): input_x, input_y, input_z = data_loader_validation.Train_next_batch( ) loss, acc, predict = sess.run( [textRNN.loss_val, textRNN.accuracy, textRNN.predictions], feed_dict={ textRNN.input_x: input_x, textRNN.input_y: input_y, textRNN.dropout_keep_prob: dropout_keep_prob, textRNN.Embedding_placeholder: embedding }) acc_avg_test = acc_avg_test + acc cnt_test = cnt_test + 1 #print(acc) #break if acc_avg_test / cnt_test > best_acc: best_acc = acc_avg_test / cnt_test output = open('u6022937.csv', 'w') output.write('id,category\n') for it in range(data_loader_test.num_batch): input_x, input_z = data_loader_test.Test_next_batch() predict = sess.run(textRNN.predictions, feed_dict={ textRNN.input_x: input_x, textRNN.dropout_keep_prob: dropout_keep_prob, textRNN.Embedding_placeholder: embedding }) cnt_output = 0 for i in predict: output.write('%s,%s\n' % (input_z[cnt_output], i)) cnt_output = cnt_output + 1 print("Epoch : ", f, "Training acc : ", acc_avg_train / cnt_train, ", Test acc : ", acc_avg_test / cnt_test, ",Best test acc : ", best_acc) #assert (0) #assert (0) #whole_acc = whole_acc + acc_total / cnt #print (whole_acc / epoch) ''' ''' #print("loss:",loss,"acc:",acc,"label:",input_y,"prediction:",predict) #print (acc_total/cnt) '''
def log_string(out_str): LOG_FOUT.write(out_str + "\n") LOG_FOUT.flush() print(out_str) log_string("pid: %s" % str(os.getpid())) log_string("use_cuda: %s" % str(torch.cuda.is_available())) # dataset TRAIN_DATASET = dataloader.Dataloader(root="./datasets", dataset=DATASET, split="train", normalization=NORMALIZATION, batch_size=BATCH_SIZE, max_rul=MAX_RUL, quantity=QUANTITY) VALIDATION_DATASET = dataloader.Dataloader(root="./datasets", dataset=DATASET, split="validation", normalization=NORMALIZATION, batch_size=BATCH_SIZE) log_string("Dataset: " + DATASET) input_size = (TRAIN_DATASET.num_channels, TRAIN_DATASET.window, TRAIN_DATASET.num_features)
def main(_): pp = pprint.PrettyPrinter() pp.pprint(FLAGS.__flags) n_classes = 2 # load dataset data = dl.Dataloader(n_classes=n_classes, test_path=FLAGS.testset, embedding_path=FLAGS.word_vec, split_ratio=FLAGS.valid_ratio, max_sen=FLAGS.max_sen, max_len=FLAGS.max_len) # build model x = tf.placeholder("int32", [None, FLAGS.max_sen, FLAGS.max_len], name="input") y = tf.placeholder("float32", [None, n_classes], name="target") keep_prob = tf.placeholder(tf.float32, name="keep_prob") ham = model.HAM(vocabsize=data.vocab_size + 1, hiddensize=data.hidden_dim, rnnsize=FLAGS.rnnsize, docsize=FLAGS.docsize, max_sen=FLAGS.max_sen, max_len=FLAGS.max_len) pred = ham.build(x, keep_prob, n_classes, data.embedding) pred_label = tf.argmax(pred, 1) cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y)) correct_pred = tf.equal(pred_label, tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) # load testset data2_path = "./data/testset2.txt" data2 = dl.Dataloader(n_classes=n_classes, test_path=data2_path, max_sen=FLAGS.max_sen, max_len=FLAGS.max_len) # load short text testset data3_path = "./data/testset3.txt" data3 = dl.Dataloader(n_classes=n_classes, test_path=data3_path, max_sen=FLAGS.max_sen, max_len=FLAGS.max_len) metrics = [accuracy, pred_label] init = tf.initialize_all_variables() saver = tf.train.Saver() config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=True, device_count={'GPU': 0}) #config = tf.ConfigProto(allow_soft_placement=True,log_device_placement=True) with tf.Session(config=config) as sess: sess.run(init) model_path = os.path.join(FLAGS.save, "model.ckpt") saver.restore(sess, model_path) # test: data print "main testset results:" res1 = "data/testset.res" feed_dict = { x: data.testset.data, y: data.testset.label, keep_prob: 1.0 } batch_eval(sess, metrics, feed_dict, data.testset, save=res1) # test: dataset 2 print "testset 2 results:" feed_dict = { x: data2.testset.data, y: data2.testset.label, keep_prob: 1.0 } batch_eval(sess, metrics, feed_dict, data2.testset) # test: dataset 3 print "testset 3 results: (short text)" feed_dict = { x: data3.testset.data, y: data3.testset.label, keep_prob: 1.0 } batch_eval(sess, metrics, feed_dict, data3.testset) # test data, one doc per run batch_size = 1
def log_string(out_str): LOG_FOUT.write(out_str + "\n") LOG_FOUT.flush() print(out_str) log_string("pid: %s" % str(os.getpid())) log_string("use_cuda: %s" % str(torch.cuda.is_available())) # dataset DATA_PATH = os.path.join("datasets", DATASET) TEST_DATASET = dataloader.Dataloader(root="./datasets", dataset=DATASET, split="test", normalization=NORMALIZATION, batch_size=BATCH_SIZE) log_string("Dataset: " + DATASET) input_size = (TEST_DATASET.num_channels, TEST_DATASET.window, TEST_DATASET.num_features) # get class name clss = [ m[0] for m in inspect.getmembers(module, inspect.isclass) if m[1].__module__ == FLAGS.model ] assert len(clss) == 1 cls = clss[0]