def main():
    args = configurations.get_args()
    ref_labels = dataloader.read_labels_file(args.reflabelpath)
    classes_num = len(np.unique(ref_labels))
    ref_images_paths = dataloader.get_images_path(args.refpath)
    target_images_paths = get_target_images_by_classes(args.targetpath,
                                                       ["knife", "sword"])
    ref_dataloader = dataloader.Dataloader(ref_images_paths, classes_num,
                                           ref_labels)
    target_dataloader = dataloader.Dataloader(target_images_paths, classes_num)
    network = utils.get_network(args.nntype)
    optimizer = tf.keras.optimizers.Adam(learning_rate=args.lr)
    trainer = Trainer(network, optimizer, args.lambd, compactnes_loss,
                      descriptiveness_loss)

    num_iterations = max(len(ref_images_paths) / args.batches, 1)

    train(ref_dataloader, target_dataloader, trainer, args.batches,
          num_iterations, args.epochs)
Beispiel #2
0
    def train(self):
        model = cool_model.create_model()
        model.compile(optimizer=Adam(Params.learn_rate), loss=Params.losses, metrics=Params.metrics)
        print(model.summary())

        # callbacks
        logger = CSVLogger(f'out/logs/{Params.model_name}_logs.csv')
        checkpointer = ModelCheckpoint(f'out/checkpoints/{Params.model_name}_{{epoch:02d}}.hdf5', verbose=1,
                                       save_best_only=True)

        # dataloaders
        train_loader = dataloader.Dataloader(dataloader.Dataloader.mode_train, split_size=50000)
        validation_loader = dataloader.Dataloader(dataloader.Dataloader.mode_validation, split_size=int(50000 * .42))

        num_train_batches = train_loader.__len__()
        num_valid_batches = int(num_train_batches * 0.42)

        model.fit_generator(generator=train_loader, validation_data=validation_loader, callbacks=[checkpointer, logger],
                            steps_per_epoch=num_train_batches, epochs=100, validation_steps=num_valid_batches,shuffle=False)
Beispiel #3
0
    def __init__(self):
        # Input Shape
        self.hight = 36
        self.width = 128
        self.shape = (self.hight, self.width)

        # Datasets
        self.loader = dataloader.Dataloader('cache36_suzuki.pkl', 'cache36_kinoshita.pkl', shape=self.shape)
        self.loader.loadData()

        # Loss weights
        self.lambda_cycle = 10.  # Cycle-consistency loss
        self.lambda_id = 0.1 * self.lambda_cycle  # Identity loss

        generator_optimizer = Adam(lr=0.0002, beta_1=0.5)
        discriminator_optimizer = Adam(lr=0.0001, beta_1=0.5)

        # Build and compile the discriminators
        self.d_A = models.build_PatchGAN_Discriminator(self.shape)
        self.d_B = models.build_PatchGAN_Discriminator(self.shape)
        self.d_A.compile(loss='mse', optimizer=discriminator_optimizer, metrics=['accuracy'])
        self.d_B.compile(loss='mse', optimizer=discriminator_optimizer, metrics=['accuracy'])
        self.d_A.trainable = False
        self.d_B.trainable = False


        # Build and compile the generators
        self.g_AB = models.build_212CNN_Generator(self.shape)
        self.g_BA = models.build_212CNN_Generator(self.shape)

        input_A = Input(shape=self.shape)
        input_B = Input(shape=self.shape)

        fake_B = self.g_AB(input_A)
        fake_A = self.g_AB(input_B)

        reconstr_A = self.g_BA(fake_B)
        reconstr_B = self.g_AB(fake_A)


        id_A = self.g_BA(input_A)
        id_B = self.g_AB(input_B)

        valid_A = self.d_A(fake_A)
        valid_B = self.d_B(fake_B)

        self.combined = Model(inputs=[input_A, input_B], outputs=[valid_A, valid_B, reconstr_A, reconstr_B, id_A, id_B])
        self.combined.compile(loss=['mse', 'mse', 'mae', 'mae', 'mae', 'mae'],
                              loss_weights=[1, 1, self.lambda_cycle, self.lambda_cycle, self.lambda_id, self.lambda_id],
                              optimizer=generator_optimizer)
Beispiel #4
0
import dataloader
import dataset
import transform
import os
import copy

Batch_size = 32
imageType = ['Gray', 'Colour']
dataAddress = './data/'
saveAddress2 = '%s/PreprocessData2.0' % dataAddress
dataType = ['frequency_domain', 'time_domain']
dataAddr = '%s/labeleddata_%s.pkl' % (saveAddress2, dataType[0])
type = 'Gray'
mode = 'Test'

D = dataloader.Dataloader()
p = dataset.Dataset(data_transform=transform.horizontalFlip())

# load processed data from pickle,if the data not exist, it will take longer to process first
if not os.path.isfile(dataAddr):
    dataAddr = p.preprocessing(dataType=dataType[0],
                               DataperSample=1300)  # data with label
else:
    labeldata = torch.load(dataAddr)

trainData = D.loader(
    dataset=labeldata,
    batch_size=Batch_size,
    mode='Train',
    shuffle=True,
)  # 16*89*1301
Beispiel #5
0
def test():
    #below is a function test; if you use this for text classifiction, you need to tranform sentence to indices of vocabulary first. then feed data to the graph.
    num_classes = 50
    learning_rate = 0.01
    batch_size = 30
    decay_steps = 1000
    decay_rate = 0.9
    sequence_length = 37  #148
    vocab_size = 400000
    embed_size = 300
    attention_size = 50
    is_training = True
    dropout_keep_prob = 0.5  #0.5
    epoch = 300
    SGD = False

    print("learning_rate : ", learning_rate)
    print("batch_size :", batch_size)

    textRNN = TextRNN(num_classes, learning_rate, batch_size, decay_steps,
                      decay_rate, sequence_length, vocab_size, embed_size,
                      is_training, attention_size)

    data_loader_train = dataloader.Dataloader(batch_size)
    data_loader_validation = dataloader.Dataloader(batch_size)
    data_loader_test = dataloader.Dataloader(batch_size)
    #data_loader_train.load_train_data(positive_file, negative_file)
    cnt = 0
    acc_total = 0
    result = []
    whole_acc = 0
    best_acc = 0

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        embedding = data_loader_train.load_train_data(Training_path,
                                                      word_embedding_path)
        data_loader_validation.load_train_data(Validation_path,
                                               word_embedding_path)
        data_loader_test.load_train_data(Test_path, word_embedding_path)
        #print (data_loader_train.num_batch)
        #assert(0)
        for f in range(epoch):

            data_loader_train.set_folder(f)
            data_loader_train.reset_pointer()
            cnt_train = 0
            cnt_test = 0
            acc_avg_train = 0
            acc_avg_test = 0
            cnt_all = 0
            for it in range(0, data_loader_train.num_batch):
                #input_x=np.zeros((batch_size,sequence_length)) #[None, self.sequence_length]
                #input_y=input_y=np.array([1,0,1,1,1,2,1,1]) #np.zeros((batch_size),dtype=np.int32) #[None, self.sequence_length]
                #print (data_loader_train.num_batch)
                input_x, input_y, input_z = data_loader_train.Train_next_batch(
                )
                #print ("iter, ", it)
                #if it == data_loader_train.num_batch:
                #    print (len(input_y))
                #print (np.shape(input_x))
                #print (np.shape(input_y))
                outputs, loss, acc, predict, _ = sess.run(
                    [
                        textRNN.attention_output, textRNN.loss_val,
                        textRNN.accuracy, textRNN.predictions, textRNN.train_op
                    ],
                    feed_dict={
                        textRNN.input_x: input_x,
                        textRNN.input_y: input_y,
                        textRNN.dropout_keep_prob: dropout_keep_prob,
                        textRNN.Embedding_placeholder: embedding,
                        textRNN.SGD: SGD
                    })
                cnt_all = cnt_all + np.shape(outputs)[0]
                #print (np.shape(outputs))
                #print (acc_avg_train)
                cnt_train = cnt_train + 1
                #assert (0)
                acc_avg_train = acc_avg_train + acc
                # break
            if acc_avg_train / cnt_train > 0.7:
                SGD = True
            #print (cnt_all)
            #print (data_loader_train.num_batch)
            #assert (0)
            for it in range(data_loader_validation.num_batch):
                input_x, input_y, input_z = data_loader_validation.Train_next_batch(
                )
                loss, acc, predict = sess.run(
                    [textRNN.loss_val, textRNN.accuracy, textRNN.predictions],
                    feed_dict={
                        textRNN.input_x: input_x,
                        textRNN.input_y: input_y,
                        textRNN.dropout_keep_prob: dropout_keep_prob,
                        textRNN.Embedding_placeholder: embedding
                    })
                acc_avg_test = acc_avg_test + acc
                cnt_test = cnt_test + 1
            #print(acc)
            #break
            if acc_avg_test / cnt_test > best_acc:
                best_acc = acc_avg_test / cnt_test
                output = open('u6022937.csv', 'w')
                output.write('id,category\n')
                for it in range(data_loader_test.num_batch):
                    input_x, input_z = data_loader_test.Test_next_batch()
                    predict = sess.run(textRNN.predictions,
                                       feed_dict={
                                           textRNN.input_x: input_x,
                                           textRNN.dropout_keep_prob:
                                           dropout_keep_prob,
                                           textRNN.Embedding_placeholder:
                                           embedding
                                       })
                    cnt_output = 0
                    for i in predict:
                        output.write('%s,%s\n' % (input_z[cnt_output], i))
                        cnt_output = cnt_output + 1

            print("Epoch : ", f, "Training acc : ", acc_avg_train / cnt_train,
                  ", Test acc : ", acc_avg_test / cnt_test,
                  ",Best test acc : ", best_acc)
            #assert (0)
            #assert (0)

            #whole_acc = whole_acc + acc_total / cnt
        #print (whole_acc / epoch)
    '''
        
    '''

    #print("loss:",loss,"acc:",acc,"label:",input_y,"prediction:",predict)
    #print (acc_total/cnt)
    '''

def log_string(out_str):
    LOG_FOUT.write(out_str + "\n")
    LOG_FOUT.flush()
    print(out_str)


log_string("pid: %s" % str(os.getpid()))
log_string("use_cuda: %s" % str(torch.cuda.is_available()))

# dataset
TRAIN_DATASET = dataloader.Dataloader(root="./datasets",
                                      dataset=DATASET,
                                      split="train",
                                      normalization=NORMALIZATION,
                                      batch_size=BATCH_SIZE,
                                      max_rul=MAX_RUL,
                                      quantity=QUANTITY)

VALIDATION_DATASET = dataloader.Dataloader(root="./datasets",
                                           dataset=DATASET,
                                           split="validation",
                                           normalization=NORMALIZATION,
                                           batch_size=BATCH_SIZE)

log_string("Dataset: " + DATASET)

input_size = (TRAIN_DATASET.num_channels, TRAIN_DATASET.window,
              TRAIN_DATASET.num_features)
Beispiel #7
0
def main(_):
    pp = pprint.PrettyPrinter()
    pp.pprint(FLAGS.__flags)

    n_classes = 2
    # load dataset
    data = dl.Dataloader(n_classes=n_classes,
                         test_path=FLAGS.testset,
                         embedding_path=FLAGS.word_vec,
                         split_ratio=FLAGS.valid_ratio,
                         max_sen=FLAGS.max_sen,
                         max_len=FLAGS.max_len)
    # build model
    x = tf.placeholder("int32", [None, FLAGS.max_sen, FLAGS.max_len],
                       name="input")
    y = tf.placeholder("float32", [None, n_classes], name="target")
    keep_prob = tf.placeholder(tf.float32, name="keep_prob")

    ham = model.HAM(vocabsize=data.vocab_size + 1,
                    hiddensize=data.hidden_dim,
                    rnnsize=FLAGS.rnnsize,
                    docsize=FLAGS.docsize,
                    max_sen=FLAGS.max_sen,
                    max_len=FLAGS.max_len)
    pred = ham.build(x, keep_prob, n_classes, data.embedding)
    pred_label = tf.argmax(pred, 1)
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
    correct_pred = tf.equal(pred_label, tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

    # load testset
    data2_path = "./data/testset2.txt"
    data2 = dl.Dataloader(n_classes=n_classes,
                          test_path=data2_path,
                          max_sen=FLAGS.max_sen,
                          max_len=FLAGS.max_len)
    # load short text testset
    data3_path = "./data/testset3.txt"
    data3 = dl.Dataloader(n_classes=n_classes,
                          test_path=data3_path,
                          max_sen=FLAGS.max_sen,
                          max_len=FLAGS.max_len)

    metrics = [accuracy, pred_label]
    init = tf.initialize_all_variables()
    saver = tf.train.Saver()
    config = tf.ConfigProto(allow_soft_placement=True,
                            log_device_placement=True,
                            device_count={'GPU': 0})
    #config = tf.ConfigProto(allow_soft_placement=True,log_device_placement=True)
    with tf.Session(config=config) as sess:
        sess.run(init)
        model_path = os.path.join(FLAGS.save, "model.ckpt")
        saver.restore(sess, model_path)

        # test: data
        print "main testset results:"
        res1 = "data/testset.res"
        feed_dict = {
            x: data.testset.data,
            y: data.testset.label,
            keep_prob: 1.0
        }
        batch_eval(sess, metrics, feed_dict, data.testset, save=res1)
        # test: dataset 2
        print "testset 2 results:"
        feed_dict = {
            x: data2.testset.data,
            y: data2.testset.label,
            keep_prob: 1.0
        }
        batch_eval(sess, metrics, feed_dict, data2.testset)
        # test: dataset 3
        print "testset 3 results: (short text)"
        feed_dict = {
            x: data3.testset.data,
            y: data3.testset.label,
            keep_prob: 1.0
        }
        batch_eval(sess, metrics, feed_dict, data3.testset)

        # test data, one doc per run
        batch_size = 1
def log_string(out_str):
    LOG_FOUT.write(out_str + "\n")
    LOG_FOUT.flush()
    print(out_str)


log_string("pid: %s" % str(os.getpid()))
log_string("use_cuda: %s" % str(torch.cuda.is_available()))

# dataset
DATA_PATH = os.path.join("datasets", DATASET)

TEST_DATASET = dataloader.Dataloader(root="./datasets",
                                     dataset=DATASET,
                                     split="test",
                                     normalization=NORMALIZATION,
                                     batch_size=BATCH_SIZE)

log_string("Dataset: " + DATASET)

input_size = (TEST_DATASET.num_channels, TEST_DATASET.window,
              TEST_DATASET.num_features)

# get class name
clss = [
    m[0] for m in inspect.getmembers(module, inspect.isclass)
    if m[1].__module__ == FLAGS.model
]
assert len(clss) == 1
cls = clss[0]