Exemple #1
0
def keras_train():  # TODO custom loop...
    tabnet_add_dense = models.tabnet_model()
    train_dataset = dataset.make_dataset('train_1.csv', config.COLUMNS, config.BATCH_SIZE,
                                         onehot=True,
                                         shuffle=True,
                                         train=True)  # MapDataset (512, 676)
    validation_dataset = dataset.make_dataset('validation_1.csv', config.COLUMNS, config.BATCH_SIZE,
                                              onehot=True,
                                              shuffle=True,
                                              train=True)  # MapDataset (512, 676)
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir="./logs")
    dt_value = datetime.datetime.now()
    filename = 'checkpoint/checkpoint-epoch-{epoch:04d}.ckpt'  # TODO F1 score를 넣지는 못할까?
    checkpoint = ModelCheckpoint(filename,  # file명을 지정합니다
                                 verbose=1,
                                 period=1,
                                 save_weights_only=True
                                 )
    tabnet_add_dense.compile(
        loss=focal_loss(),
        #loss=tf.keras.losses.BinaryCrossentropy(from_logits=False),
        optimizer=optimizer,
        metrics=['accuracy', metrics.f1_score]
    )
    tabnet_add_dense.load_weights("checkpoint/checkpoint-epoch-0017.ckpt")
    tabnet_add_dense.fit(train_dataset, validation_data=validation_dataset,
                         epochs=100000, verbose=1,
                         callbacks=[tensorboard_callback, checkpoint],
                         steps_per_epoch=200,
                         validation_steps=3)

    return tabnet_add_dense
Exemple #2
0
def main(*args):
    path, csv, test_csv, test_labels, identifier, infer, model_name, result_dir, sub_fn_path, load, exp_name = args

    config = global_config[exp_name]
    config['path'] = path
    config['csv'] = csv
    config['test_csv'] = test_csv
    config['test_labels'] = test_labels
    config['model_name'] = model_name
    config['result_dir'] = result_dir
    config['identifier'] = identifier

    if infer:
        vocab, trn_ds, vld_ds, _, emb_matrix = make_dataset(config)
        trn_dl, vld_dl, _ = make_iterator(config, vocab, trn_ds, vld_ds, _)

        config['vocab_size'] = len(vocab.itos)
        config['pad_idx'] = vocab.stoi[PAD_TOKEN]

        model = make_model(config, emb_matrix)

        # load model from disk from previous iteration and just
        if load:
            print(
                'Loading model from disk from {}'.format(config['result_dir'] +
                                                         config['model_name'] +
                                                         '.pth'))

            model_dict = load_model(config['result_dir'] +
                                    config['model_name'] + '.pth')
            model = model.load_state_dict(model_dict)
        else:
            model = learn(model, trn_dl, vld_dl, vocab, config)

    else:
        vocab, trn_ds, _, tst_ds, emb_matrix = make_dataset(config)
        trn_dl, _, tst_dl = make_iterator(config, vocab, trn_ds, _, tst_ds)

        config['vocab_size'] = len(vocab.itos)
        config['emb_matrix'] = emb_matrix
        config['pad_idx'] = vocab.stoi[PAD_TOKEN]

        model = make_model(config, emb_matrix)

        if load:
            print(
                'Loading model from disk from {}'.format(config['result_dir'] +
                                                         config['model_name'] +
                                                         '_full.pth'))

            model_dict = load_model(config['result_dir'] +
                                    config['model_name'] + '_full.pth')
            model = model.load_state_dict(model_dict)
        else:
            model = learn(model, trn_dl, _, vocab, config)

        test_labels = read_csv(config['test_labels'])
        _ = predictions(model, tst_dl, None, test_labels, sub_fn_path)
Exemple #3
0
def make_model(conn, uid, ptype, epochs, batch, learning_rate, project,
               classes):

    #retriveing data after Image augmentation

    message_to_send = "Loading Dataset...".encode("UTF-8")
    conn.send(len(message_to_send).to_bytes(2, byteorder='big'))
    conn.send(message_to_send)

    data, labels = make_dataset(uid, ptype, project, classes)

    data = np.array(data)

    #one-hot encoding
    labels = pd.Series(labels)
    labels = pd.get_dummies(labels).values

    x_train, x_test, y_train, y_test = train_test_split(data,
                                                        labels,
                                                        test_size=0.2,
                                                        random_state=42)

    message_to_send = "Training Model...".encode("UTF-8")
    conn.send(len(message_to_send).to_bytes(2, byteorder='big'))
    conn.send(message_to_send)

    CNN(conn, x_train, x_test, y_train, y_test, int(epochs), int(batch),
        float(learning_rate), len(classes), project, uid, ptype)
def make_data_loader(data_dir, dataset_type, plane, device=None, shuffle=False):
    if device is None:
        device = 'cuda' if torch.cuda.is_available() else 'cpu'

    dataset = make_dataset(data_dir, dataset_type, plane, device=device)
    data_loader = DataLoader(dataset, batch_size=1, shuffle=shuffle)

    return data_loader
Exemple #5
0
def main():
    options = parse_args()

    N_train = options.n_train
    N_valid = options.n_valid
    D_in = options.n_input
    D_out = options.n_output
    epochs = options.n_epoch
    batch_size = options.n_batch
    n_sampling_combs = options.n_sampling

    X_data, X_valid, y_data, y_valid = make_dataset(N_train, N_valid, D_in)

    net = Net(D_in, D_out)
    opt = optim.Adam(net.parameters())

    for epoch in range(1, epochs + 1):
        index = torch.randperm(N_train)

        X_train = X_data[index]
        y_train = y_data[index]

        for cur_batch in range(0, N_train, batch_size):
            X_batch = X_train[cur_batch:cur_batch + batch_size]
            y_batch = y_train[cur_batch:cur_batch + batch_size]

            opt.zero_grad()
            batch_loss = torch.zeros(1)
            if X_batch is not None:
                preds = net(X_batch)

                for _ in range(n_sampling_combs):
                    i, j = np.random.choice(range(preds.shape[0]), 2)
                    s_i = preds[i]
                    s_j = preds[j]

                    if y_batch[i] > y_batch[j]:
                        S_ij = 1
                    elif y_batch[i] == y_batch[j]:
                        S_ij = 0
                    else:
                        S_ij = -1

                    loss = pairwise_loss(s_i, s_j, S_ij)
                    batch_loss += loss

            batch_loss.backward(retain_graph=True)
            opt.step()

        with torch.no_grad():
            valid_preds = net(X_valid)
            valid_swapped_pairs = swapped_pairs(valid_preds, y_valid)
            print(
                f"epoch: {epoch} valid swapped pairs: {valid_swapped_pairs}/{N_valid*(N_valid-1)//2}"
            )

    print('DONE')
Exemple #6
0
def get_data(i):
    import dataset
    imgs = dataset.make_dataset(r"E:\360Downloads\dataset\fingerprint\val")
    imgx = []
    imgy = []
    for img in imgs:
        imgx.append(img[0])
        imgy.append(img[1])
    return imgx[i], imgy[i]
Exemple #7
0
def get_data(i):
    import dataset
    imgs = dataset.make_dataset(r"D:\project\data_sets\liver\val")
    imgx = []
    imgy = []
    for img in imgs:
        imgx.append(img[0])
        imgy.append(img[1])
    return imgx[i], imgy[i]
def resize(root, num):
    folderA = os.path.join(root, 'target')
    folderB = os.path.join(root, 'example')
    folderA = make_dataset(folderA)
    folderB = make_dataset(folderB)
    os.makedirs(os.path.join(root, 'target_resize'), exist_ok=True)
    os.makedirs(os.path.join(root, 'example_resize'), exist_ok=True)

    for i in range(num):
        if i % 100 == 0:
            print(i)
        imgA = cv2.imread(folderA[i])
        imgB = cv2.imread(folderB[i])
        imgA = cv2.resize(imgA, (128, 128))
        imgB = cv2.resize(imgB, (128, 128))

        cv2.imwrite(folderA[i].replace('target', 'target_resize'), imgA)
        cv2.imwrite(folderB[i].replace('example', 'example_resize'), imgB)
Exemple #9
0
def get_data(i):
    import dataset
    imgs = dataset.make_dataset(
        r"/Users/apple/Desktop/FYPtest_2288970/data/val")
    imgx = []
    imgy = []
    for img in imgs:
        imgx.append(img[0])
        imgy.append(img[1])
    return imgx[i], imgy[i]
Exemple #10
0
def get_data(i):
    import dataset
    imgs = dataset.make_dataset(
        r"H:\BaiduNetdisk\BaiduDownload\u_net_liver-master\data\val")
    imgx = []
    imgy = []
    for img in imgs:
        imgx.append(img[0])
        imgy.append(img[1])
    return imgx[i], imgy[i]
Exemple #11
0
def main(options):
    seed = options.seed
    torch.manual_seed(seed)
    np.random.seed(seed)
    torch.cuda.manual_seed(seed)

    torch.backends.cudnn.enabled = True
    torch.backends.cudnn.benchmark = True
    frames, targets = dataset.make_dataset()

    trainer.train_net(frames, targets)
Exemple #12
0
    def __init__(self):
        # initializing the weights here with Xavier initialisation
        # (by multiplying with 1/sqrt(n)).
        self.weights = torch.randn(784, 10) / math.sqrt(784)
        self.weights.requires_grad_()
        self.bias = torch.zeros(10, requires_grad=True)
        self.bs = 64

        (self.x_train, self.y_train, self.x_valid,
         self.y_valid) = dataset.make_dataset()
        self.n, self.c = self.x_train.shape
Exemple #13
0
def main(options):
    seed = options.seed
    torch.manual_seed(seed)
    np.random.seed(seed)
    torch.cuda.manual_seed(seed)

    torch.backends.cudnn.enabled = True
    torch.backends.cudnn.benchmark = True

    d = options.dim
    n = options.num_samples

    x, y = dataset.make_dataset(d, n)

    trainer.train_net(x, y)
Exemple #14
0
def calculate_probability(config):
    model = models.tabnet_model()
    model.load_weights(config.CHCK_PATH)

    test_dataset = dataset.make_dataset(config.TEST_DIR,
                                        config.COLUMNS,
                                        config.TEST_BATCH_SIZE,
                                        onehot=True,
                                        train=False)
    # test_dataset = dataset.make_dataset(config.TEST_DIR, config.COLUMNS, config.TEST_BATCH_SIZE, onehot=False, train=False)
    probs = np.empty((1, 1))
    for num, chunk in enumerate(test_dataset):
        print(f"Process batch: {num + 1}")  # to 48
        prob = model(chunk).numpy()
        probs = np.append(probs, prob, axis=0)
    return probs
Exemple #15
0
def train_rpn(**kwargs):
    transformed_dataset = make_dataset(**kwargs)
    dataloader = DataLoader(transformed_dataset,
                            batch_size=batch_size,
                            shuffle=True,
                            num_workers=4)

    net = build_rpn(pretrained=True)
    optimizer = optim.SGD(
        [param for param in net.parameters() if param.requires_grad],
        lr=rpn_learning_rate)
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer,
                                               milestones=[60000],
                                               gamma=0.1)
    criterion = criterion = RPNLoss()

    iteration = 0
    rpn_epochs = rpn_iterations // len(transformed_dataset)
    print("Total number of epoch is {}".format(rpn_epochs))
    for epoch in range(rpn_epochs):
        running_loss = 0.0
        for i, data in enumerate(dataloader, 0):
            scheduler.step()
            # get the inputs
            inputs, labels = data

            # Extends the labels with the image size
            labels["size"] = inputs.size()[2:]

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 2 == 1:
                #if i % 2000 == 1999:    # print every 2000 mini-batches
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss))
                running_loss = 0.0
            iteration += 1
Exemple #16
0
def main(options):
    seed = options.seed
    torch.manual_seed(seed)
    np.random.seed(seed)
    torch.cuda.manual_seed(seed)

    net = network.Net()
    d = torch.load('trained_cnn_model.pth')
    net.load_state_dict(d['state_dict'])

    net.double()
    net.cuda()

    train_frames, _ = dataset.make_dataset()
    frames, _ = dataset.make_test_dataset()
    count = 0

    #"""
    for f in train_frames:
        f = deepcopy(f.numpy())
        J = fast_jacobian(net, f, 3 * SIZE * SIZE)
        J = J.view(-1, 3 * SIZE * SIZE)
        J = J.cpu().data.numpy()
        s, _ = eigs(J, k=1, tol=1e-3)
        top = np.abs(s)
        print(top)
        if top < 1:
            count += 1
        del J
    print("Attractors: ", count)
    #"""

    #"""
    avg_error = 0
    for f in frames:
        f = deepcopy(f.numpy())
        count += 1
        error = iterate(net, f)
        #print(error)
        avg_error += error
    print(count)
    print("AVERAGE ERROR: ", avg_error / count)
Exemple #17
0
def train(config):

    basemodel_name = config["basemodel"]
    framework = config["framework"]
    lr = config["lr"]
    momentum = config["momentum"]
    weight_decay = config["weight_decay"]
    train_batch_size = config["train_bs"]
    val_batch_size = config["val_bs"]

    data_loader, data_loader_test, num_classes = ds.make_dataset(IMAGES,
                                                            ANNOS,
                                                            train_batch_size=train_batch_size,
                                                            val_batch_size=val_batch_size,
                                                            train_fraction=0.7)

    basemodel = bm.fetch_basemodel(basemodel_name, framework)
    model = fm.set_basemodel_in_framework(basemodel, framework, num_classes)

    # some pipeline configs (put these into the pipeline module )
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    model.to(device)

    if framework == "FasterRCNN":
        pipeline = pp.PipelineFasterRCNN(num_epochs=10,
                                         model=model,
                                         lr=lr, momentum=momentum, weight_decay=weight_decay,
                                         data_loader=data_loader,
                                         data_loader_test=data_loader_test,
                                         device=device,
                                         print_freq=1)

    elif framework == "SSD":
        pipeline = pp.PipelineSSD(num_epochs=10,
                                         model=model,
                                         lr=lr, momentum=momentum, weight_decay=weight_decay,
                                         data_loader=data_loader,
                                         data_loader_test=data_loader_test,
                                         device=device,
                                         print_freq=1)
    pipeline.train()
Exemple #18
0
def test_rpn_training(**kwargs):
    transformed_dataset = make_dataset(**kwargs)
    dataloader = DataLoader(transformed_dataset,
                            batch_size=batch_size,
                            shuffle=False,
                            num_workers=4)

    net = build_rpn(pretrained=True)
    optimizer = optim.SGD(
        [param for param in net.parameters() if param.requires_grad],
        lr=rpn_learning_rate)
    criterion = RPNLoss()

    rpn_epochs = rpn_iterations // len(transformed_dataset)
    data = next(iter(dataloader))

    print("Total number of epoch is {}".format(rpn_epochs))
    for epoch in range(rpn_epochs):
        running_loss = 0.0
        # get the inputs
        inputs, labels = data

        # Extends the labels with the image size
        labels["size"] = inputs.size()[2:]

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        print('[%d, %5d] loss: %.3f' %
              (epoch + 1, epoch + 1, running_loss / 2000))
        running_loss = 0.0
Exemple #19
0
def main():
    max_iter_step = 60000
    labeledset = make_dataset(txs, tys)
    trainset = make_dataset(trainx, trainy)
    testset = make_dataset(testx, testy)
    with tf.device(device):
        real_data, real_label, unlabeled_data, y, z, opt_c, opt_e_z, opt_e_y, opt_g, fake_x, c_loss, e_loss_z, e_loss_y, g_loss, true_y = build_graph(
        )
    merged_all = tf.summary.merge_all()
    saver = tf.train.Saver()
    config = tf.ConfigProto(allow_soft_placement=True,
                            log_device_placement=False)
    config.gpu_options.allow_growth = False
    config.gpu_options.per_process_gpu_memory_fraction = 0.95
    with tf.Session(config=config) as sess:

        def next_y():
            return np.random.multinomial(1, [1 / 10.] * 10, size=batch_size)

        def next_z():
            return np.random.normal(0, 1,
                                    [batch_size, z_dim]).astype(np.float32)

        sess.run(tf.global_variables_initializer())
        summary_writer = tf.summary.FileWriter(log_dir, sess.graph)

        print("Training semi-supervised wgan...")
        for i in range(max_iter_step):
            citers = Citers
            for j in range(citers):
                unlabeled_img, _ = trainset.next_batch(batch_size)
                if i % 100 == 99 and j == 0:
                    train_img, train_label = labeledset.next_batch(batch_size)
                    run_options = tf.RunOptions(
                        trace_level=tf.RunOptions.FULL_TRACE)
                    run_metadata = tf.RunMetadata()
                    _, merged, loss_c_ = sess.run(
                        [opt_c, merged_all, c_loss],
                        feed_dict={
                            real_data: train_img,
                            real_label: train_label,
                            y: next_y(),
                            z: next_z(),
                            unlabeled_data: unlabeled_img
                        },
                        options=run_options,
                        run_metadata=run_metadata)
                    summary_writer.add_summary(merged, i)
                    summary_writer.add_run_metadata(
                        run_metadata, 'critic_metadata {}'.format(i), i)
                else:
                    _ = sess.run([opt_c],
                                 feed_dict={
                                     unlabeled_data: unlabeled_img,
                                     y: next_y(),
                                     z: next_z()
                                 })

            train_img, train_label = labeledset.next_batch(batch_size)
            unlabeled_img, _ = trainset.next_batch(batch_size)
            bz = next_z()
            by = next_y()
            if i % 100 == 99:
                _, _, _, merged, loss_e_y_, loss_e_z_, loss_g_ = sess.run(
                    [
                        opt_e_y, opt_e_z, opt_g, merged_all, e_loss_y,
                        e_loss_z, g_loss
                    ],
                    feed_dict={
                        real_data: train_img,
                        real_label: train_label,
                        y: by,
                        z: bz,
                        unlabeled_data: unlabeled_img
                    },
                    options=run_options,
                    run_metadata=run_metadata)
                summary_writer.add_summary(merged, i)
                summary_writer.add_run_metadata(
                    run_metadata,
                    'generator_and_encoder_metadata {}'.format(i), i)
            else:
                _, _, _ = sess.run(
                    [opt_e_y, opt_e_z, opt_g],
                    feed_dict={
                        real_data: train_img,
                        real_label: train_label,
                        y: by,
                        z: bz,
                        unlabeled_data: unlabeled_img
                    })

            if i % 100 == 99:
                print(
                    "Training ite %d, c_loss: %f, e_loss_z: %f, e_loss_y: %f, g_loss: %f"
                    % (i, loss_c_, loss_e_z_, loss_e_y_, loss_g_))
                batch_y = []
                batch_z = []
                tmp = np.random.normal(0, 1, [10, z_dim]).astype(np.float32)
                for j in range(10):
                    batch_z.append(tmp)
                    tmpy = np.zeros((10, 10))
                    tmpy[:, j] = 1
                    batch_y.append(tmpy)
                batch_z = np.concatenate(batch_z, 0)
                batch_y = np.concatenate(batch_y, 0)

                bx = sess.run(fake_x, feed_dict={y: batch_y, z: batch_z})
                fig = plt.figure(image_dir + '.semi-wgan')
                grid_show(fig, (bx + 1) / 2, [32, 32, channel])
                if not os.path.exists('./logs/{}/{}'.format(
                        image_dir, args.logdir)):
                    os.makedirs('./logs/{}/{}'.format(image_dir, args.logdir))
                fig.savefig('./logs/{}/{}/{}.png'.format(
                    image_dir, args.logdir, (i - 99) / 100))

            if i % 1000 == 999:
                saver.save(sess,
                           os.path.join(ckpt_dir, "model.ckpt"),
                           global_step=i)
                testset._index_in_epoch = 0
                preds = np.zeros(
                    (testset.num_examples / batch_size * batch_size))
                gts = np.zeros(
                    (testset.num_examples / batch_size * batch_size))
                for j in range(testset.num_examples / batch_size):
                    test_img, test_label = testset.next_batch(batch_size)
                    by = sess.run(true_y, feed_dict={real_data: test_img})
                    preds[j * batch_size:(j + 1) * batch_size] = np.argmax(
                        by, 1)
                    gts[j * batch_size:(j + 1) * batch_size] = np.argmax(
                        test_label, 1)
                acc = np.sum(preds == gts) / float(gts.shape[0])
                print("Training ite %d, testing acc: %f" % (i, acc))
parser.add_argument('--w_tv', type=float, default=0.000005)

args = parser.parse_args()

if __name__ == '__main__':
    SEED = 0
    random.seed(SEED)
    np.random.seed(SEED)
    torch.manual_seed(SEED)
    torch.cuda.manual_seed(SEED)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    checkpoint_dir, image_dir = prepare_sub_folder(args.output_path,
                                                   delete_first=True)

    dataset = make_dataset(args)
    dataloader = DataLoader(dataset=dataset,
                            batch_size=args.batch_size,
                            shuffle=True,
                            drop_last=False,
                            num_workers=args.num_workers)

    warper = Warper(args)
    warper.to(device)
    warper.train()

    paras = list(warper.parameters())
    opt = optim.Adam([p for p in paras if p.requires_grad],
                     lr=args.lr,
                     betas=(0.5, 0.999),
                     weight_decay=1e-5)
Exemple #21
0
def main():
    mus = np.concatenate([np.identity(10), np.zeros((10, z_dim - 10))], 1)
    covs = np.stack([np.identity(z_dim) * args.sigma] * 10, 0)
    max_iter_step = 60000
    trainset = make_dataset(trainx, trainy)
    testset = make_dataset(testx, testy)
    with tf.device(device):
        unlabeled_data, z, opt_c_z, opt_c_x, opt_e, opt_g, fake_x, c_loss_z, c_loss_x, e_loss, g_loss, unlabeled_z = build_graph(
        )
    merged_all = tf.summary.merge_all()
    saver = tf.train.Saver()
    config = tf.ConfigProto(allow_soft_placement=True,
                            log_device_placement=False)
    config.gpu_options.allow_growth = False
    config.gpu_options.per_process_gpu_memory_fraction = 0.95
    with tf.Session(config=config) as sess:

        def sample_z(gmm):
            if gmm is None:
                return np.random.normal(0, 1,
                                        [batch_size, z_dim]).astype(np.float32)
                ys = np.random.randint(10, size=batch_size)
                # np.random.multinomial(1, [1/10.]*10, size=batch_size)
                return np.concatenate([
                    np.random.multivariate_normal(mus[y], covs[y], 1)
                    for y in ys
                ], 0)
            else:
                return gmm.sample(batch_size)[0]

        def sample_z_given_y(gmm, inx):
            if gmm is None:
                return np.random.normal(0, 1, [1, z_dim]).astype(np.float32)
                return np.random.multivariate_normal(mus[inx], covs[inx], 1)
            else:
                return np.random.multivariate_normal(gmm.means_[inx],
                                                     gmm.covariances_[inx], 1)

        sess.run(tf.global_variables_initializer())
        summary_writer = tf.summary.FileWriter(log_dir, sess.graph)

        gmm = None
        print("Training clustering wgan...")
        for i in range(max_iter_step):
            if i < 25 or i % 500 == 0:
                citers = 100
            else:
                citers = Citers

            for j in range(citers):
                if i % 100 == 99 and j == 0:
                    run_options = tf.RunOptions(
                        trace_level=tf.RunOptions.FULL_TRACE)
                    run_metadata = tf.RunMetadata()
                    _, _, merged, loss_c_z_, loss_c_x_ = sess.run(
                        [opt_c_z, opt_c_x, merged_all, c_loss_z, c_loss_x],
                        feed_dict={
                            z: sample_z(gmm),
                            unlabeled_data: trainset.next_batch(batch_size)[0]
                        },
                        options=run_options,
                        run_metadata=run_metadata)
                    summary_writer.add_summary(merged, i)
                    summary_writer.add_run_metadata(
                        run_metadata, 'critic_metadata {}'.format(i), i)
                else:
                    _, _ = sess.run(
                        [opt_c_z, opt_c_x],
                        feed_dict={
                            unlabeled_data: trainset.next_batch(batch_size)[0],
                            z: sample_z(gmm)
                        })

            if i % 100 == 99:
                _, _, merged, loss_e_, loss_g_ = sess.run(
                    [opt_g, opt_e, merged_all, e_loss, g_loss],
                    feed_dict={
                        z: sample_z(gmm),
                        unlabeled_data: trainset.next_batch(batch_size)[0]
                    },
                    options=run_options,
                    run_metadata=run_metadata)
                summary_writer.add_summary(merged, i)
                summary_writer.add_run_metadata(
                    run_metadata,
                    'generator_and_encoder_metadata {}'.format(i), i)
            else:
                _, _ = sess.run(
                    [opt_g, opt_e],
                    feed_dict={
                        z: sample_z(gmm),
                        unlabeled_data: trainset.next_batch(batch_size)[0]
                    })

            if i % 100 == 99:
                print(
                    "Training ite %d, c_loss_z: %f, c_loss_x: %f, e_loss: %f, g_loss: %f"
                    % (i, loss_c_z_, loss_c_x_, loss_e_, loss_g_))
                batch_y = []
                for j in range(10):
                    for k in range(10):
                        batch_y.append(j)
                batch_z = np.concatenate(
                    [sample_z_given_y(gmm, y) for y in batch_y[:64]], 0)
                bx = sess.run(fake_x, feed_dict={z: batch_z})
                batch_z = np.concatenate(
                    [sample_z_given_y(gmm, y) for y in batch_y[36:]], 0)
                bx1 = sess.run(fake_x, feed_dict={z: batch_z})
                bx = np.concatenate([bx, bx1[28:]], 0)

                fig = plt.figure(image_dir + '.clustering-wgan')
                grid_show(fig, (bx + 1) / 2, [32, 32, channel])
                if not os.path.exists('./logs/{}/{}'.format(
                        image_dir, args.logdir)):
                    os.makedirs('./logs/{}/{}'.format(image_dir, args.logdir))
                fig.savefig('./logs/{}/{}/{}.png'.format(
                    image_dir, args.logdir, (i - 99) / 100))

            if i % 500 == 0:
                trainset._index_in_epoch = 0
                true_zs = np.zeros(
                    (trainset.num_examples / batch_size * batch_size, z_dim))
                gts = np.zeros(
                    (trainset.num_examples / batch_size * batch_size))
                for j in range(trainset.num_examples / batch_size):
                    train_img, train_label = trainset.next_batch(batch_size)
                    bz = sess.run(unlabeled_z,
                                  feed_dict={unlabeled_data: train_img})
                    true_zs[j * batch_size:(j + 1) * batch_size] = bz
                    gts[j * batch_size:(j + 1) * batch_size] = train_label
                gmm = mixture.BayesianGaussianMixture(
                    n_components=10, covariance_type='full').fit(true_zs)
                preds = gmm.predict(true_zs)
                print("Training ite %d, acc: %f, nmi: %f" %
                      (i, cluster_acc(preds, gts), cluster_nmi(preds, gts)))
                #preds1 = np.argmax(np.sum(np.square(true_zs[:, np.newaxis, :] - mus[np.newaxis, :, :]), 2), 1)
                #print("Training ite %d, new gmm, acc: %f, nmi: %f; prior gmm, acc: %f, nmi: %f" % (i, cluster_acc(preds, gts), cluster_nmi(preds, gts), cluster_acc(preds1, gts), cluster_nmi(preds1, gts)))

            if i % 1000 == 999:
                saver.save(sess,
                           os.path.join(ckpt_dir, "model.ckpt"),
                           global_step=i)
def main(path_to_train_data, path_to_validation_data):
    args = Args()

    df_trn, df_val = dataset.make_dataset(path_to_train_data,
                                          path_to_validation_data)

    if args.should_continue:
        sorted_checkpoints = _sorted_checkpoints(args)
        if len(sorted_checkpoints) == 0:
            raise ValueError(
                "Used --should_continue but no checkpoint was found in --output_dir."
            )
        else:
            args.model_name_or_path = sorted_checkpoints[-1]

    if (os.path.exists(args.output_dir) and os.listdir(args.output_dir)
            and args.do_train and not args.overwrite_output_dir
            and not args.should_continue):
        raise ValueError(
            "Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome."
            .format(args.output_dir))

    # Setup CUDA, GPU & distributed training
    device = torch.device("cuda")
    args.n_gpu = torch.cuda.device_count()
    args.device = device

    # Setup logging
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN,
    )
    logger.warning(
        "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s",
        args.local_rank,
        device,
        args.n_gpu,
        bool(args.local_rank != -1),
        args.fp16,
    )

    # Set seed
    utils.set_seed(args)

    config = AutoConfig.from_pretrained(args.config_name,
                                        cache_dir=args.cache_dir)
    tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_name,
                                              cache_dir=args.cache_dir)
    model = AutoModelWithLMHead.from_pretrained(
        args.model_name_or_path,
        from_tf=False,
        config=config,
        cache_dir=args.cache_dir,
    )
    model.to(args.device)

    logger.info("Training/evaluation parameters %s", args)

    # Training
    if args.do_train:
        train_dataset = utils.load_and_cache_examples(args,
                                                      tokenizer,
                                                      df_trn,
                                                      df_val,
                                                      evaluate=False)

        global_step, tr_loss = train.train(args, train_dataset, model,
                                           tokenizer)
        logger.info(" global_step = %s, average loss = %s", global_step,
                    tr_loss)

    # Saving best-practices: if you use save_pretrained for the model and tokenizer, you can reload them using from_pretrained()
    if args.do_train:
        # Create output directory if needed
        os.makedirs(args.output_dir, exist_ok=True)

        logger.info("Saving model checkpoint to %s", args.output_dir)
        # Save a trained model, configuration and tokenizer using `save_pretrained()`.
        # They can then be reloaded using `from_pretrained()`
        model_to_save = (model.module if hasattr(model, "module") else model
                         )  # Take care of distributed/parallel training
        model_to_save.save_pretrained(args.output_dir)
        tokenizer.save_pretrained(args.output_dir)

        # Good practice: save your training arguments together with the trained model
        torch.save(args, os.path.join(args.output_dir, "training_args.bin"))

        # Load a trained model and vocabulary that you have fine-tuned
        model = AutoModelWithLMHead.from_pretrained(args.output_dir)
        tokenizer = AutoTokenizer.from_pretrained(args.output_dir)
        model.to(args.device)

    # Evaluation
    results = {}
    if args.do_eval and args.local_rank in [-1, 0]:
        checkpoints = [args.output_dir]
        if args.eval_all_checkpoints:
            checkpoints = list(
                os.path.dirname(c) for c in sorted(
                    glob.glob(args.output_dir + "/**/" + WEIGHTS_NAME,
                              recursive=True)))
            logging.getLogger("transformers.modeling_utils").setLevel(
                logging.WARN)  # Reduce logging
        logger.info("Evaluate the following checkpoints: %s", checkpoints)
        for checkpoint in checkpoints:
            global_step = checkpoint.split(
                "-")[-1] if len(checkpoints) > 1 else ""
            prefix = checkpoint.split(
                "/")[-1] if checkpoint.find("checkpoint") != -1 else ""

            model = AutoModelWithLMHead.from_pretrained(checkpoint)
            model.to(args.device)
            result = evaluate.evaluate(args,
                                       model,
                                       tokenizer,
                                       df_trn,
                                       df_val,
                                       prefix=prefix)
            result = dict(
                (k + "_{}".format(global_step), v) for k, v in result.items())
            results.update(result)

    return results
Exemple #23
0
import pathlib


def get_filenames_of_path(path: pathlib.Path, ext: str = '*'):
    """Returns a list of files in a directory/path. Uses pathlib."""
    filenames = [
        file for file in path.glob(ext + '/**/*.npy') if file.is_file()
    ]
    return filenames


root = pathlib.Path('/data/dk/datasets_CROPS/crops_fixed_scale_uint8/valid')

# class_to_idx = {'FP': 0, 'LAD': 1, 'LCX': 2, 'RCA': 3, 'LM': 4}
class_to_idx = ['bengin', 'TP', 'negative', 'nipple']
train_samples = make_dataset(root, class_to_idx, extensions='npy')
print(len(train_samples))
dataset_train = ClassificationDataset(train_samples)
print(len(dataset_train))
train_sampler = BalancedBatchSampler(dataset_train)
# dataloader_train = DataLoader(dataset=dataset_train, sampler=train_sampler, batch_size=4)
dataloader_train = DataLoader(dataset=dataset_train,
                              batch_size=4,
                              shuffle=True)
#
#
x, y = next(iter(dataloader_train))

print(x)
print(y)
#
Exemple #24
0
    weights[_w_name] = checkpoint['state_dict'][w_name]
big_model.load_state_dict(weights)
big_model.cuda()

model = get_model(num_classes=27, sample_size=224, width_mult=1.0)
try:
    checkpoint = torch.load('./best_classifier_checkpoint.pth.tar',
                            map_location=torch.device('cpu'))
    model.load_state_dict(checkpoint)
except:
    checkpoint = torch.load(
        './pretrain/models/imagenet_mobilenetv2_autoencoder.pth')
    model.load_feature_detector_from_autoencoder(checkpoint['state_dict'])
model.cuda()

train_ds, test_ds = make_dataset()
loss_fn = nn.CrossEntropyLoss()


def SoftmaxWithTemperature(temp=1, dim=1):
    def fn(x):
        exp = torch.exp(x / temp)
        denom = torch.sum(exp, dim)
        return exp / denom

    return fn


act_fn = SoftmaxWithTemperature(temp=10)
learning_rate = 0.001
clip_value = 0.05
Exemple #25
0
def main():
    """ main function """

    RANDOM_SEED = 42
    paths = {'train': '../data/train.csv', 'test': '../data/test.csv'}
    target, test, train = make_dataset(paths)
    X, X_valid, y, y_valid = train_test_split(train,
                                              target,
                                              test_size=0.2,
                                              random_state=RANDOM_SEED)

    forest = RandomForestRegressor(n_jobs=-1,
                                   n_estimators=200,
                                   random_state=RANDOM_SEED,
                                   max_features=50,
                                   min_samples_leaf=2,
                                   min_samples_split=2,
                                   max_depth=20)
    forest.fit(X, y)
    predictions = forest.predict(X_valid)

    # gboost = GradientBoostingRegressor(n_estimators=1500,
    #                                    learning_rate=0.03,
    #                                    max_features=40,
    #                                    min_samples_leaf=2,
    #                                    min_samples_split=12,
    #                                    random_state=RANDOM_SEED)
    # gboost.fit(train, target)

    # lasso = Lasso(alpha=0.0005, max_iter=5000, random_state=RANDOM_SEED)
    # lasso.fit(X, y)
    # predictions = lasso.predict(X_valid)

    # ridge = Ridge(alpha=7.5, random_state=RANDOM_SEED)
    # ridge.fit(train, target)

    def cv_rmse(model):
        rmse = sqrt(-cross_val_score(model,
                                     train,
                                     target,
                                     scoring="neg_mean_squared_error",
                                     cv=5,
                                     n_jobs=-1))
        return (rmse)

    rf_score = cv_rmse(forest)
    # las_score = cv_rmse(lasso)
    # rid_score = cv_rmse(ridge)
    # gb_score = cv_rmse(gboost)

    print("RandomForest CV score is:      {:.4f} ({:.4f})".format(
        rf_score.mean(), rf_score.std()))
    # print("Gradient Boosting CV score is: {:.4f} ({:.4f})".format(
    #     gb_score.mean(), gb_score.std()))
    # print("Lasso CV score is:             {:.4f} ({:.4f})".format(
    # las_score.mean(), las_score.std()))
    # print("Ridge CV score is:             {:.4f} ({:.4f})".format(
    #     rid_score.mean(), rid_score.std()))

    # sns.scatterplot(x=y_valid, y=predictions)
    # plt.show()

    print("RandomForest score is: {:.4f}".format(
        sqrt(mean_squared_error(y_valid, predictions)).mean()))
    print("RandomForest R2 score is : {:.4f}".format(
        forest.score(X_valid, y_valid)))
params = open_config_file(args.config)

params.gpu_ids = [params.gpu_ids]
# set gpu ids
if len(params.gpu_ids) > 0:
    torch.cuda.set_device(params.gpu_ids[0])

params.nThreads = 1  # test code only supports nThreads = 1
params.batchSize = 1  # test code only supports batchSize = 1
params.serial_batches = True  # no shuffle
params.no_flip = True  # no flip

###

create_dir(Path(params.results_dir))
inference_images = make_dataset(params.source_dir)
num_inference_images = len(inference_images)
print(f'#inference images = {num_inference_images}')

model = create_model(params)
transform = get_transform(params)
start_time = time.time()

for i, img_path in enumerate(inference_images):

    if params.how_many:
        if i >= params.how_many:
            print(f'how_many: {params.how_many}')
            break

    frameid = img_path.split('/')[-1].replace('.png', '')
Exemple #27
0
    }

if __name__ == '__main__':
    model_dict = {
        'n_res_blocks': config.n_res,
        'n_classes': 10,
        'n_channels': 128,
    }
    os.system('mkdir ' + config.save_dir)

    np_dataset = pickle.load(open(config.dataset, 'rb'))
    np_train_dataset, np_val_dataset = np_dataset

    torch.manual_seed(config.seed)

    train_rel_dataset, train_nonrel_dataset = make_dataset(np_train_dataset, rel_augmentation=True)
    train_dataset = torch.utils.data.ConcatDataset([train_rel_dataset, train_nonrel_dataset])
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True, pin_memory=True, num_workers=config.n_cpu)

    val_rel_dataset, val_nonrel_dataset = make_dataset(np_val_dataset)
    val_rel_dataloader = torch.utils.data.DataLoader(val_rel_dataset, batch_size=config.batch_size, pin_memory=True)
    val_nonrel_dataloader = torch.utils.data.DataLoader(val_nonrel_dataset, batch_size=config.batch_size, pin_memory=True)


    model_name = '{}_{}_{}'.format(model_dict['n_res_blocks'], model_dict['n_channels'], config.tag)
    tb = tensorboard.tf_recorder(model_name, config.log_dir)
    net = make_model(model_dict).cuda()
    optimizer = optim.Adam(net.parameters(), lr=config.lr, weight_decay=config.weight_decay)
    criterion = nn.CrossEntropyLoss().cuda()

    if config.resume:
Exemple #28
0
def features():
    start_time = time.time()
    make_dataset(config)
    end_time = time.time()
    return end_time - start_time
Exemple #29
0
from train import train_model, save_model
from dataset import make_dataset, save_dataset, load_dataset
from config import make_config
import sys

config = make_config("""{
    "target":"example",
    "activation":"relu",
    "solver":"adam",
    "limit": 1000
}""")

try:
    dataset = load_dataset(config)
except:
    dataset = make_dataset(config)
    save_dataset(config, dataset)

print("loaded dataset")


def features():
    start_time = time.time()
    make_dataset(config)
    end_time = time.time()
    return end_time - start_time


def train(dataset):
    X, Y = dataset
    start_time = time.time()
Exemple #30
0
def main():
    max_iter_step = 60000
    mnist_trainset = make_dataset(mnist_trainX, mnist_trainY)
    svhn_trainset = make_dataset(svhn_trainX, svhn_trainY)

    with tf.device(device):
        x1, f1, y1, x2, f2, opt, loss1, y1_test, y2_test, feature1, feature2, opt_2, loss2_critic, loss2_clf, f1_test, f2_test, reset, opt_3, loss3, epsilon, init = build_graph(
        )
    merged_all = tf.summary.merge_all()
    saver = tf.train.Saver()
    config = tf.ConfigProto(allow_soft_placement=True,
                            log_device_placement=False)
    config.gpu_options.allow_growth = False
    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())
        summary_writer = tf.summary.FileWriter(log_dir, sess.graph)

        F1 = np.zeros([svhn_trainY.shape[0], 8, 8, 64])
        Y1 = np.zeros(svhn_trainY.shape)
        F2 = np.zeros([mnist_trainY.shape[0], 8, 8, 64])
        Y2 = np.zeros(mnist_trainY.shape)
        print("Pretraining...")
        for i in range(10000):
            svhn_bx, svhn_by = svhn_trainset.next_batch(batch_size)
            mnist_bx, _ = mnist_trainset.next_batch(batch_size)
            [_, loss1_] = sess.run([opt, loss1],
                                   feed_dict={
                                       x1: svhn_bx,
                                       y1: svhn_by,
                                       x2: mnist_bx
                                   })

            if i % 100 == 99:
                print("Pretraining ite %d, loss1: %f" % (i, loss1_))

            if i % 1000 == 999:
                acc1_ = []
                for j in range(26):
                    y1_test_ = sess.run(
                        [y1_test],
                        feed_dict={x1: svhn_testX[j * 1000:(j + 1) * 1000]})
                    acc1_.append(
                        np.mean(
                            np.equal(svhn_testY[j * 1000:(j + 1) * 1000],
                                     y1_test_).astype(np.float32)))
                acc2_ = []
                for j in range(10):
                    y2_test_ = sess.run(
                        [y2_test],
                        feed_dict={x2: mnist_testX[j * 1000:(j + 1) * 1000]})
                    acc2_.append(
                        np.mean(
                            np.equal(mnist_testY[j * 1000:(j + 1) * 1000],
                                     y2_test_).astype(np.float32)))
                print("--------->Pretraining ite %d, acc1: %f, acc2: %f" %
                      (i, np.mean(acc1_), np.mean(acc2_)))
        for i in range(int(math.ceil(svhn_trainset._num_examples /
                                     batch_size))):
            start = i * batch_size
            end = min((i + 1) * batch_size, svhn_trainset._num_examples)
            F1[start:end] = sess.run(
                f1, feed_dict={x1: svhn_trainset._images[start:end]})
            Y1[start:end] = svhn_trainset._labels[start:end]
        for i in range(
                int(math.ceil(mnist_trainset._num_examples / batch_size))):
            start = i * batch_size
            end = min((i + 1) * batch_size, mnist_trainset._num_examples)
            F2[start:end] = sess.run(
                f2, feed_dict={x2: mnist_trainset._images[start:end]})
            Y2[start:end] = mnist_trainset._labels[start:end]
        np.savez('features.npz', F1=F1, Y1=Y1, F2=F2, Y2=Y2)

        #         npz = np.load('features.npz')
        #         F1 = npz['F1']
        #         F2 = npz['F2']
        #         Y1 = npz['Y1']
        #         Y2 = npz['Y2']
        print("Trainging...")
        F1_set = make_dataset(F1, Y1)
        F2_set = make_dataset(F2, Y2)
        for ite in range(100):
            sess.run(init)
            for i in range(2000):
                f1_, y1_ = F1_set.next_batch(batch_size)
                f2_, y2_ = F2_set.next_batch(batch_size)
                [_, loss2_clf_,
                 loss2_critic_] = sess.run([opt_2, loss2_clf, loss2_critic],
                                           feed_dict={
                                               feature1: f1_,
                                               y1: y1_,
                                               feature2: f2_
                                           })
                if i % 100 == 99:
                    [f1_test_, f2_test_] = sess.run([f1_test, f2_test],
                                                    feed_dict={
                                                        feature1: f1_,
                                                        feature2: f2_
                                                    })
                    print(
                        "Training epoch %d, ite %d, loss2_clf: %f, loss2_critic: %f, acc1: %f, acc2: %f"
                        %
                        (ite, i, loss2_clf_, loss2_critic_,
                         np.mean(np.equal(y1_, f1_test_).astype(np.float32)),
                         np.mean(np.equal(y2_, f2_test_).astype(np.float32))))

            tmp = np.zeros(F1_set._images.shape)
            for i in range(int(math.ceil(F1_set._num_examples / batch_size))):
                print("Training adversarial samples at batch %d" % i)
                start = i * batch_size
                end = min((i + 1) * batch_size, svhn_trainset._num_examples)
                f1_ = F1_set._images[start:end]
                sess.run(reset)
                for j in range(100):
                    [_, loss3_] = sess.run([opt_3, loss3],
                                           feed_dict={feature1: f1_})
                    #print(i, j, loss3_)
                tmp[start:end] = epsilon.eval()[:end - start] + f1_

            F1_set._images = tmp