Example #1
0
    def __init__(self,
                 lstm,
                 state_vocab,
                 object_vocab,
                 args,
                 map_dim=10,
                 batch_size=32):
        super(UVFA_text, self).__init__()

        self.state_vocab = state_vocab
        self.object_vocab = object_vocab
        self.lstm = lstm
        self.rank = args.rank
        self.map_dim = map_dim
        self.batch_size = batch_size
        self.positions = self.__agent_pos()

        ## add one for agent position
        self.state_dim = (self.state_vocab + 1) * (map_dim**2)
        # self.state_dim = self.state_vocab * map_dim**2
        self.state_layers = [self.state_dim, 128, 128, args.rank]
        self.state_mlp = models.MLP(self.state_layers)

        self.object_dim = self.object_vocab * (map_dim**2)
        self.object_layers = [self.object_dim, 128, 128, args.rank]
        self.object_mlp = models.MLP(self.object_layers)
Example #2
0
    def __init__(self,
                 state_vocab,
                 object_vocab,
                 args,
                 map_dim=10,
                 batch_size=32):
        super(UVFA_pos, self).__init__()

        self.state_vocab = state_vocab
        self.object_vocab = object_vocab
        self.total_vocab = state_vocab + object_vocab
        self.pos_size = 2

        self.rank = args.rank
        self.map_dim = map_dim
        self.batch_size = batch_size
        self.positions = self.__agent_pos()

        ## add one for agent position
        self.input_dim = (self.total_vocab + 1) * (map_dim**2)
        self.world_layers = [self.input_dim, 128, 128, args.rank]
        self.world_mlp = models.MLP(self.world_layers)

        # self.object_dim = self.object_vocab * (map_dim**2)
        self.pos_layers = [self.pos_size, 128, 128, args.rank]
        self.pos_mlp = models.MLP(self.pos_layers)
Example #3
0
    def __init__(self,
                 actions,
                 observation_size,
                 hidden_size,
                 emp_num_steps,
                 beta,
                 mem_size,
                 mem_fields,
                 max_batch_size,
                 device='cpu'):
        assert type(actions) is dict
        self.device = device
        self.beta = beta
        self.max_batch_size = max_batch_size

        self.actions = actions
        actions_id = [str(x) for x in self.actions.values()]
        self.actions_keys = [
            ''.join(act_seq)
            for act_seq in product(actions_id, repeat=emp_num_steps)
        ]

        self.actions_seqs = {}
        for actions_key in self.actions_keys:
            self.actions_seqs[actions_key] = self.actions_keys.index(
                actions_key)

        # model used to compute likelihood of action sequences
        self.decoder = models.MLP(2 * observation_size, hidden_size,
                                  len(self.actions_seqs))
        self.decoder.to(self.device)

        self.obj_decoder = nn.CrossEntropyLoss()
        self.optim_decoder = optim.Adam(self.decoder.parameters())

        # model used to compute the source distribution of actions
        self.model_source_distr = models.MLP(observation_size, hidden_size,
                                             len(self.actions_seqs))
        self.model_source_distr.to(self.device)

        self.model_phi = models.MLP(observation_size, hidden_size, 1)
        self.model_phi.to(self.device)

        self.obj_source = nn.MSELoss(reduction='mean')
        self.optim_source = optim.Adam(
            list(self.model_source_distr.parameters()) + \
            list(self.model_phi.parameters())
        )

        self.memory = utils.Memory(mem_size, *mem_fields)
Example #4
0
    def __init__(self,
                 actions,
                 observation_size,
                 hidden_size,
                 emp_num_steps,
                 beta,
                 alpha,
                 mem_size,
                 mem_fields,
                 max_batch_size,
                 path_source_distr,
                 device='cpu'):
        assert type(actions) is dict
        self.device = device
        self.beta = beta
        self.alpha = alpha
        self.max_batch_size = max_batch_size

        self.actions = actions
        actions_id = [str(x) for x in self.actions.values()]
        self.actions_keys = [
            ''.join(act_seq)
            for act_seq in product(actions_id, repeat=emp_num_steps)
        ]

        self.actions_seqs = {}
        for actions_key in self.actions_keys:
            self.actions_seqs[actions_key] = self.actions_keys.index(
                actions_key)
        self.num_actions_seqs = len(self.actions_seqs)

        # model used to compute score (or marginals/joint) (s`+a, conditioned on s)
        self.model_score = models.MLP(
            2 * observation_size + len(self.actions_seqs), hidden_size, 1)
        self.model_score.to(self.device)

        self.obj_score = utils.UnbiasedMine(ema_weight=self.beta)
        self.optim_score = optim.Adam(self.model_score.parameters())

        # model used to compute the source distribution of actions
        self.model_source_distr = models.MLP(observation_size, hidden_size,
                                             len(self.actions_seqs))
        self.model_source_distr.load(path_source_distr)
        self.model_source_distr.to(self.device)

        self.memory = utils.Memory(mem_size, *mem_fields)
        self.seq_onehot = None
        self.empowerment_states = np.zeros(observation_size)
def predict(hparams,
            model_design,
            X,
            Y,
            data,
            data_dir="models/mlp",
            splits=5):

    x = torch.tensor(X).type(dtype=torch.float)
    y = torch.tensor(Y).type(dtype=torch.float)

    mae = np.zeros(splits)
    nse = np.zeros(splits)
    preds = np.zeros((splits, len(Y)))

    for i in range(splits):

        model = models.MLP(model_design["layer_sizes"])
        model.load_state_dict(
            torch.load(os.path.join(data_dir, f"{data}_model{i}.pth")))
        model.eval()

        with torch.no_grad():
            preds[i, :] = model(x).squeeze(1)
            mae[i] = metrics.mean_absolute_error(y, preds[i, :])
            nse[i] = utils.nash_sutcliffe(y.numpy().squeeze(1), preds[i, :])

    return preds, mae, nse
Example #6
0
def net_fn(inputs, theta=None, is_training=True):
  if args.model == 'mlp':
    mlp = models.MLP(
        nlayers=args.nlayers, nhid=args.nhid, with_bias=True, batch_norm=False)
    return mlp(inputs, theta, is_training)
  elif args.model == 'resnet':
    network = models.Net(args.model_size)
    return network(inputs, is_training)
Example #7
0
    def testMLP(self):
        resolutions = [
            [1, 2, 2],
            [1, 3, 3],
            [1, 4, 4],
            [1, 5, 5],
            [1, 4, 5],
            [1, 5, 4],
            [1, 27, 32],
            [1, 32, 27],
            [1, 32, 32],
            [3, 32, 32],
        ]
        units = [
            [10],
            [10, 10, 10, 10],
            [1000],
        ]
        activations = [
            torch.nn.ReLU,
            torch.nn.Sigmoid,
            torch.nn.Tanh,
        ]
        normalizations = [
            None,
            torch.nn.BatchNorm1d
        ]

        clamps = [
            True,
            False
        ]

        scales_and_whitens = [
            (False, False),
            (True, False),
            (False, True),
        ]

        classes = 10
        for resolution in resolutions:
            for unit in units:
                for activation in activations:
                    for normalization in normalizations:
                        for clamp in clamps:
                            for scale_and_whiten in scales_and_whitens:
                                original_model = models.MLP(classes, resolution, clamp=clamp, scale=scale_and_whiten[0], whiten=scale_and_whiten[1], units=unit, activation=activation, normalization=normalization)
                                for parameters in original_model.parameters():
                                    parameters.data.zero_()

                                common.state.State.checkpoint(self.filepath, original_model)
                                state = common.state.State.load(self.filepath)
                                loaded_model = state.model

                                for parameters in loaded_model.parameters():
                                    self.assertEqual(torch.sum(parameters).item(), 0)
Example #8
0
    def __init__(self, config,flow,attn,event_dim):
        super().__init__()
        self.config = config
        self.event_dim = event_dim
    
        distrib_augment_net = models.MLP(config['latent_dim'],config['net_cif_dist_hidden_dims'],(config['cif_latent_dim']- config['latent_dim'])*2,nonlin=torch.nn.GELU())
        distrib_augment = models.ConditionalNormal(net =distrib_augment_net,split_dim = event_dim,clamp = config['clamp_dist'])
        self.act_norm = models.ActNormBijectionCloud(config['cif_latent_dim'])
        distrib_slice = distrib_augment
        self.augmenter = models.Augment(
            distrib_augment, config['latent_dim'], split_dim=event_dim)
        
        pre_attention_mlp = models.MLP(config['latent_dim']//2,config['pre_attention_mlp_hidden_dims'], config['attn_input_dim'], torch.nn.GELU(), residual=True)

        self.affine_cif = models.AffineCoupling(config['cif_latent_dim'],config['affine_cif_hidden'],nn.GELU(),scale_fn_type='sigmoid',split_dim=config['cif_latent_dim']-config['latent_dim'])
        self.flow = models.PreConditionApplier(flow(config['latent_dim'], config['attn_dim']), CouplingPreconditionerAttn(attn(), pre_attention_mlp, config['latent_dim']//2, event_dim=event_dim))
        self.slicer = models.Slice(distrib_slice, config['latent_dim'], dim=self.event_dim)
        
        self.reverse = models.Reverse(config['cif_latent_dim'],dim=-1)
Example #9
0
def provide_models(
    encoder,
    num_intents=22
):  # generator, that provides all possible model configurations
    modes = ['sim', 'mlp']
    multilabels = [True, False]
    for mode, multilabel in itertools.product(modes, multilabels):
        if mode == 'sim':
            yield mode, multilabel, models.Similarity(encoder,
                                                      multilabel=multilabel)
        else:
            yield mode, multilabel, models.MLP(encoder,
                                               multilabel=multilabel,
                                               num_intents=num_intents)
Example #10
0
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        with tf.variable_scope(self.scope):
            self.input = tf.placeholder(tf.float32, [None] +
                                        list(self.env.observation_space.shape),
                                        name='input')

            self.cnn_output = models.CNN(scope='cnn',
                                         convs=kwargs['convs'],
                                         hiddens=kwargs['hiddens'],
                                         inpt=self.input)

            self.mlp_output = models.MLP(scope='mlp',
                                         hiddens=kwargs['hiddens'],
                                         inpt=self.cnn_output)
Example #11
0
    def testMLP(self):
        resolutions = [
            [1, 2, 2],
            [1, 3, 3],
            [1, 4, 4],
            [1, 5, 5],
            [1, 4, 5],
            [1, 5, 4],
            [1, 27, 32],
            [1, 32, 27],
            [1, 32, 32],
            [3, 32, 32],
        ]
        units = [
            [10],
            [10, 10],
            [10, 10, 10],
            [10, 10, 10, 10],
            [1000],
        ]
        activations = [
            torch.nn.ReLU,
            torch.nn.Sigmoid,
            torch.nn.Tanh,
        ]
        normalizations = [None, torch.nn.BatchNorm1d]

        classes = 10
        batch_size = 100
        for resolution in resolutions:
            for unit in units:
                for activation in activations:
                    for normalization in normalizations:
                        model = models.MLP(classes,
                                           resolution,
                                           clamp=True,
                                           units=unit,
                                           activation=activation,
                                           normalization=normalization)
                        output = model(
                            torch.autograd.Variable(
                                torch.zeros([batch_size] + resolution)))
                        self.assertEqual(output.size()[0], batch_size)
                        self.assertEqual(output.size()[1], classes)
Example #12
0
def get_model(args, num_embeddings, num_classes):
    non_linearities = {
        'tanh': torch.tanh,
        'relu': torch.nn.ReLU(),
        'leaky-relu': torch.nn.LeakyReLU()
    }
    if args.model == 'dcnn':
        model = models.DCNN(num_embeddings,
                            args.embedding_dim,
                            num_classes,
                            kernel_sizes=args.kernel_sizes,
                            num_filters=args.num_filters,
                            non_linearity=non_linearities[args.non_linearity])
    elif args.model == 'mlp':
        max_length = max(len(x.text) for x in train_iter.data())
        model = models.MLP(num_embeddings, args.embedding_dim, max_length,
                           num_classes)
    if torch.cuda.is_available():
        model = model.cuda()
    return model
Example #13
0
def build_model(config):
    model_type = config['model_type']
    # import ipdb as pdb; pdb.set_trace()
    if model_type =='lenet':
        model = models.LENET(config)
    elif model_type == 'mlp':
        model = models.MLP(config)
    else:
        print("model_type={0} is not supported yet!".fortmat(model_type))
    if config['operation_mode'] == "retrain" or config['operation_mode'] == "inference":
        print("Using a trained model...")
        model.load_state_dict(torch.load(config['trained_model']))
    else:
        # Loss and Optimizer
        model.weights_init(config)

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=config["lr"])
    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma = 0.99)
    return model, criterion, optimizer, scheduler
Example #14
0
def make_NN(n_classes, params):
    if params.nn_type == 'CNN_ND':
        NN = models.CNN_ND(n_layers=params.n_layers,
                           n_dim=params.n_dim,
                           n_filters=params.n_filters,
                           filter_size=params.filter_size,
                           pad_size=(params.filter_size - 1) /
                           2 if type(params.filter_size) is int else map(
                               lambda x: (x - 1) / 2, params.filter_size),
                           n_output=n_classes,
                           use_bn=params.use_bn)
    elif params.nn_type == 'CNN_ND_GAP':
        NN = models.CNN_ND_GAP(n_output=n_classes)
    elif params.nn_type == 'MLP':
        NN = models.MLP(n_layers=params.n_layers,
                        n_hidden=params.n_hidden_fc,
                        n_output=n_classes,
                        use_bn=params.use_bn)
    else:
        raise Exception('No such model specified')
    return NN
Example #15
0
def setup_and_run(args, criterion, device, train_loader, test_loader,
                  val_loader, logging, results):
    global BEST_ACC
    print("\n#### Running REF ####")

    # architecture
    if args.architecture == "MLP":
        model = models.MLP(args.input_dim, args.hidden_dim,
                           args.output_dim).to(device)
    elif args.architecture == "LENET300":
        model = models.LeNet300(args.input_dim, args.output_dim).to(device)
    elif args.architecture == "LENET5":
        model = models.LeNet5(args.input_channels, args.im_size,
                              args.output_dim).to(device)
    elif "VGG" in args.architecture:
        assert (args.architecture == "VGG11" or args.architecture == "VGG13"
                or args.architecture == "VGG16"
                or args.architecture == "VGG19")
        model = models.VGG(args.architecture, args.input_channels,
                           args.im_size, args.output_dim).to(device)
    elif args.architecture == "RESNET18":
        model = models.ResNet18(args.input_channels, args.im_size,
                                args.output_dim).to(device)
    elif args.architecture == "RESNET34":
        model = models.ResNet34(args.input_channels, args.im_size,
                                args.output_dim).to(device)
    elif args.architecture == "RESNET50":
        model = models.ResNet50(args.input_channels, args.im_size,
                                args.output_dim).to(device)
    elif args.architecture == "RESNET101":
        model = models.ResNet101(args.input_channels, args.im_size,
                                 args.output_dim).to(device)
    elif args.architecture == "RESNET152":
        model = models.ResNet152(args.input_channels, args.im_size,
                                 args.output_dim).to(device)
    else:
        print('Architecture type "{0}" not recognized, exiting ...'.format(
            args.architecture))
        exit()

    # optimizer
    if args.optimizer == "ADAM":
        optimizer = optim.Adam(model.parameters(),
                               lr=args.learning_rate,
                               weight_decay=args.weight_decay)
    elif args.optimizer == "SGD":
        optimizer = optim.SGD(
            model.parameters(),
            lr=args.learning_rate,
            momentum=args.momentum,
            nesterov=args.nesterov,
            weight_decay=args.weight_decay,
        )
    else:
        print('Optimizer type "{0}" not recognized, exiting ...'.format(
            args.optimizer))
        exit()

    # lr-scheduler
    if args.lr_decay == "STEP":
        scheduler = optim.lr_scheduler.StepLR(optimizer,
                                              step_size=1,
                                              gamma=args.lr_scale)
    elif args.lr_decay == "EXP":
        scheduler = optim.lr_scheduler.ExponentialLR(optimizer,
                                                     gamma=args.lr_scale)
    elif args.lr_decay == "MSTEP":
        x = args.lr_interval.split(",")
        lri = [int(v) for v in x]
        scheduler = optim.lr_scheduler.MultiStepLR(optimizer,
                                                   milestones=lri,
                                                   gamma=args.lr_scale)
        args.lr_interval = 1  # lr_interval handled in scheduler!
    else:
        print('LR decay type "{0}" not recognized, exiting ...'.format(
            args.lr_decay))
        exit()

    init_weights(model, xavier=True)
    logging.info(model)
    num_parameters = sum([l.nelement() for l in model.parameters()])
    logging.info("Number of parameters: %d", num_parameters)

    start_epoch = -1
    iters = 0  # total no of iterations, used to do many things!
    # optionally resume from a checkpoint
    if args.eval:
        logging.info('Loading checkpoint file "{0}" for evaluation'.format(
            args.eval))
        if not os.path.isfile(args.eval):
            print(
                'Checkpoint file "{0}" for evaluation not recognized, exiting ...'
                .format(args.eval))
            exit()
        checkpoint = torch.load(args.eval)
        model.load_state_dict(checkpoint["state_dict"])

    elif args.resume:
        checkpoint_file = args.resume
        logging.info('Loading checkpoint file "{0}" to resume'.format(
            args.resume))
        if not os.path.isfile(checkpoint_file):
            print('Checkpoint file "{0}" not recognized, exiting ...'.format(
                checkpoint_file))
            exit()
        checkpoint = torch.load(checkpoint_file)
        start_epoch = checkpoint["epoch"]
        assert args.architecture == checkpoint["architecture"]
        model.load_state_dict(checkpoint["state_dict"])
        optimizer.load_state_dict(checkpoint["optimizer"])
        scheduler.load_state_dict(checkpoint["scheduler"])
        BEST_ACC = checkpoint["best_acc1"]
        iters = checkpoint["iters"]
        logging.debug("best_acc1: {0}, iters: {1}".format(BEST_ACC, iters))

    if not args.eval:
        logging.info("Training...")
        model.train()
        st = timer()

        for e in range(start_epoch + 1, args.num_epochs):
            for i, (data, target) in enumerate(train_loader):
                l = train_step(model, device, data, target, optimizer,
                               criterion)
                if i % args.log_interval == 0:
                    acc1, acc5 = evaluate(args,
                                          model,
                                          device,
                                          val_loader,
                                          training=True)
                    logging.info(
                        "Epoch: {0},\t Iter: {1},\t Loss: {loss:.5f},\t Val-Acc1: {acc1:.2f} "
                        "(Best: {best:.2f}),\t Val-Acc5: {acc5:.2f}".format(
                            e, i, loss=l, acc1=acc1, best=BEST_ACC, acc5=acc5))

                if iters % args.lr_interval == 0:
                    lr = args.learning_rate
                    for param_group in optimizer.param_groups:
                        lr = param_group["lr"]
                    scheduler.step()
                    for param_group in optimizer.param_groups:
                        if lr != param_group["lr"]:
                            logging.info("lr: {0}".format(
                                param_group["lr"]))  # print if changed
                iters += 1

            # save checkpoint
            acc1, acc5 = evaluate(args,
                                  model,
                                  device,
                                  val_loader,
                                  training=True)
            results.add(
                epoch=e,
                iteration=i,
                train_loss=l,
                val_acc1=acc1,
                best_val_acc1=BEST_ACC,
            )
            util.save_checkpoint(
                {
                    "epoch": e,
                    "architecture": args.architecture,
                    "state_dict": model.state_dict(),
                    "optimizer": optimizer.state_dict(),
                    "scheduler": scheduler.state_dict(),
                    "best_acc1": BEST_ACC,
                    "iters": iters,
                },
                is_best=False,
                path=args.save_dir,
            )
            results.save()

        et = timer()
        logging.info("Elapsed time: {0} seconds".format(et - st))

        acc1, acc5 = evaluate(args, model, device, val_loader, training=True)
        logging.info(
            "End of training, Val-Acc: {acc1:.2f} (Best: {best:.2f}), Val-Acc5: {acc5:.2f}"
            .format(acc1=acc1, best=BEST_ACC, acc5=acc5))
        # load saved model
        saved_model = torch.load(args.save_name)
        model.load_state_dict(saved_model["state_dict"])
    # end of training

    # eval-set
    if args.eval_set != "TRAIN" and args.eval_set != "TEST":
        print('Evaluation set "{0}" not recognized ...'.format(args.eval_set))

    logging.info("Evaluating REF on the {0} set...".format(args.eval_set))
    st = timer()
    if args.eval_set == "TRAIN":
        acc1, acc5 = evaluate(args, model, device, train_loader)
    else:
        acc1, acc5 = evaluate(args, model, device, test_loader)
    et = timer()
    logging.info("Accuracy: top-1: {acc1:.2f}, top-5: {acc5:.2f}%".format(
        acc1=acc1, acc5=acc5))
    logging.info("Elapsed time: {0} seconds".format(et - st))
Example #16
0
        image_output = img_model(image)
        dist = F.pairwise_distance(video_output, image_output)
        print(dist)
        pred = (dist < 10.0)
        print(label)
        correct += (pred == label).sum().float()
        total += len(label)
    acc = (100 * correct * 1.0 / total)

    return acc


resnet = models.ResNet(block=models.BasicBlock, num_blocks=[3, 3, 3])
cnn3d = models.CNN3D()
cnn = models.CNN()
mlp = models.MLP()
criterion = nn.MSELoss().cuda()
optimizer = torch.optim.Adam(list(mlp.parameters()) +
                             list(resnet.parameters()),
                             lr=lr)
# optimizer = torch.optim.SGD(list(cnn3d.parameters()) + list(resnet.parameters()), lr=lr, momentum=0.9, weight_decay=1e-4)

task2_ds = matching_dataset(cat="whiteboard_spray", transforms=transform_train)
val_len = len(task2_ds) // 10
train_len = len(task2_ds) - val_len
train_ds, val_ds = torch.utils.data.random_split(task2_ds,
                                                 [train_len, val_len])
train_loader = torch.utils.data.DataLoader(train_ds, 10, False, num_workers=10)
val_loader = torch.utils.data.DataLoader(val_ds, 10, False, num_workers=10)

# print(len(task2_ds))
def main(output_dim, train_bs, val_bs, test_bs, num_epochs, max_seq_length,
         learning_rate, warmup_proportion, early_stopping_criteria, num_layers,
         hidden_dim, bidirectional, dropout, filter_sizes, embedding_file,
         model_name, use_mongo, vm, subtask, _run):

    #Logger
    directory_checkpoints = f"results/checkpoints/{_run._id}/"
    directory = f"results/{_run._id}/"

    #Batch sizes
    batch_sizes = [int(train_bs), int(val_bs), int(test_bs)]
    batch_size = int(train_bs)

    if "BERT" in model_name:  #Default = False, if BERT model is used then use_bert is set to True
        use_bert = True
    else:
        use_bert = False

    if vm == "google":
        directory = f"results-bert-google/{_run._id}/"
    elif vm == "aws":
        directory = f"results-bert-aws/{_run._id}/"

    #Data
    if use_bert:
        train_dataloader, val_dataloader, test_dataloader = get_data_bert(
            int(max_seq_length), batch_sizes, subtask)
    else:
        embedding_dim, vocab_size, embedding_matrix, train_dataloader, val_dataloader, test_dataloader = get_data(
            int(max_seq_length),
            embedding_file=embedding_file,
            batch_size=batch_size,
            subtask=subtask)

    #Model
    if model_name == "MLP":
        model = models.MLP(embedding_matrix, embedding_dim, vocab_size,
                           int(hidden_dim), dropout, output_dim)
    if model_name == "MLP_Features":
        model = models.MLP_Features(embedding_matrix, embedding_dim,
                                    vocab_size, int(hidden_dim), 14, dropout,
                                    output_dim)
        print(model)
    elif model_name == "CNN":
        model = models.CNN(embedding_matrix, embedding_dim, vocab_size,
                           dropout, filter_sizes, output_dim)
        print(model)
    elif model_name == "LSTM":
        model = models.LSTM(embedding_matrix, embedding_dim, vocab_size,
                            int(hidden_dim), dropout, int(num_layers),
                            bidirectional, output_dim)
        print(model)
    elif model_name == "LSTMAttention":
        model = models.LSTMAttention(embedding_matrix, embedding_dim,
                                     vocab_size, int(hidden_dim), dropout,
                                     int(num_layers), bidirectional,
                                     output_dim)
        print(model)
    elif model_name == "BERTFreeze":
        model = BertForSequenceClassification.from_pretrained(
            "bert-base-uncased", output_dim)
        for param in model.bert.parameters():
            param.requires_grad = False
            print(param)
            print(param.requires_grad)
        print(model)
    elif model_name == "BERT":
        model = BertForSequenceClassification.from_pretrained(
            "bert-base-uncased", output_dim)
        print(model)
    elif model_name == "BERTLinear":
        model = models.BertLinear(hidden_dim, dropout, output_dim)
        print(model)
    elif model_name == "BERTLinearFreeze":
        model = models.BertLinearFreeze(hidden_dim, dropout, output_dim)
        print(model)
    elif model_name == "BERTLinearFreezeEmbeddings":
        model = models.BertLinearFreezeEmbeddings(hidden_dim, dropout,
                                                  output_dim)
        print(model)
    elif model_name == "BERTLSTM":
        model = models.BertLSTM(hidden_dim, dropout, bidirectional, output_dim)
        print(model)
    elif model_name == "BERTNonLinear":
        model = models.BertNonLinear(dropout, output_dim)
        print(model)
    elif model_name == "BERTNorm":
        model = models.BertNorm(dropout, output_dim)
        print(model)

    model = model.to(device)

    #Loss and optimizer
    #optimizer = optim.Adam([{'params': model.parameters(), 'weight_decay': 0.1}], lr=learning_rate)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    loss_fn = F.cross_entropy

    #Scheduler
    #scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[5, 50], gamma=0.1)

    #Training and evaluation
    print('Training and evaluation for {} epochs...'.format(num_epochs))
    train_metrics, val_metrics = train_and_evaluate(
        num_epochs, model, optimizer, loss_fn, train_dataloader,
        val_dataloader, early_stopping_criteria, directory_checkpoints,
        use_bert, use_mongo)
    train_metrics.to_csv(directory + "train_metrics.csv"), val_metrics.to_csv(
        directory + "val_metrics.csv")

    #Test
    print('Testing...')
    load_checkpoint(directory_checkpoints + "best_model.pth.tar", model)

    #Add artifacts
    #ex.add_artifact(directory+"best_model.pth.tar")
    #ex.add_artifact(directory+"last_model.pth.tar")

    test_metrics = evaluate_model(model, optimizer, loss_fn, test_dataloader,
                                  device, use_bert)
    if use_mongo: log_scalars(test_metrics, "Test")

    test_metrics_df = pd.DataFrame(test_metrics)
    #test_metrics_df = pd.DataFrame(test_metrics, index=["NOT","OFF"])
    print(test_metrics)
    test_metrics_df.to_csv(directory + "test_metrics.csv")

    id_nummer = f'{_run._id}'

    results = {
        'id': id_nummer,
        'loss': np.round(np.mean(val_metrics['loss']), 4),
        'accuracy': test_metrics['accuracy'],
        'recall': test_metrics['recall'],
        'precision': test_metrics['precision'],
        'f1': test_metrics['f1'],
        'learning_rate': learning_rate,
        'hidden_dim': hidden_dim,
        'status': 'ok'
    }

    return results
Example #18
0
    hparams = random_search.hparams_search(layersizes)  # 0.005, 16
else:
    layersizes = [7, 32, 32, 16, 1]
    hparams = [0.005, 16]

hparams_setting = {
    "epochs": 1000,
    "batchsize": hparams[1],
    "learningrate": hparams[0],
    "history": 1
}
model_design = {"layer_sizes": layersizes}

#%%
import models
model = models.MLP(model_design["layer_sizes"])


def get_n_params(model):
    pp = 0
    for p in list(model.parameters()):
        nn = 1
        for s in list(p.size()):
            nn = nn * s
        pp += nn
    return pp


def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
def MLP_model_training(train_ds, test_ds, textField, labelField,maxlen,embed_size
                      ,batch_size,no_epochs, saver_path,
                      saver_name, results_file, earlystopping = early_stopping):
    AUC_scores = []
    F1_scores = []
    training_time = []
    micro_F1_scores = []
    macro_F1_scores = []
    for i in range(5):
        start_time = time.time()
        # split the train dataset into train and validation
        X_train, X_valid, y_train, y_valid = train_test_split(train_ds[textField], train_ds[labelField], test_size=0.3, stratify=train_ds[labelField])

        tokenizer = Tokenizer()
        tokenizer.fit_on_texts(train_ds[textField])
        vocab_size = len(tokenizer.word_index) + 1

        X_train = tokenizer.texts_to_sequences(X_train)
        X_valid = tokenizer.texts_to_sequences(X_valid)
        X_test = tokenizer.texts_to_sequences(test_ds[textField])

        x_train = pad_sequences(X_train, maxlen=maxlen)
        x_valid = pad_sequences(X_valid, maxlen=maxlen)
        x_test =  pad_sequences(X_test, maxlen=maxlen)

        print(x_train.shape, "padded Training sequences")
        print(x_valid.shape, "padded validation sequences")
        print(x_test.shape, "padded testing sequences")

        x_train = np.asmatrix(x_train)
        x_valid = np.asmatrix(x_valid)

        training_generator = helpers._data_generator(
            x_train, y_train, maxlen, batch_size)
        validation_generator = helpers._data_generator(
            x_valid, y_valid, maxlen, batch_size)

        # Get number of training steps. This indicated the number of steps it takes
        # to cover all samples in one epoch.
        steps_per_epoch = x_train.shape[0] // batch_size
        if x_train.shape[0] % batch_size:
            steps_per_epoch += 1
        # Get number of validation steps.
        validation_steps = x_valid.shape[0] // batch_size
        if x_valid.shape[0] % batch_size:
            validation_steps += 1

        saver = ModelCheckpoint(saver_path +"/"+saver_name)
        # Logistic regression
        MLP_model = models.MLP(x_train.shape[1],vocab_size,embed_size,False)
        MLP_training_history = MLP_model.fit_generator(
            generator=training_generator,
            validation_data = validation_generator,
            steps_per_epoch=steps_per_epoch,
            validation_steps=validation_steps,
            callbacks=[earlystopping,saver],
            epochs=no_epochs,
            verbose=0)

        predicted_labels = MLP_model.predict(x_test)
        print("iteration" + str(i))
        print("AUC score", roc_auc_score(test_ds[labelField], predicted_labels))
        print("F1 score", f1_score(test_ds[labelField], np.rint(predicted_labels)))
        print("micro F1 score", f1_score(test_ds[labelField], np.rint(predicted_labels), average="micro"))
        print("macro F1 score", f1_score(test_ds[labelField], np.rint(predicted_labels), average="macro"))

        exc_time = time.time() - start_time
        AUC_scores.append(roc_auc_score(test_ds[labelField], predicted_labels))
        F1_scores.append(f1_score(test_ds[labelField],np.rint(predicted_labels)))
        macro_F1_scores.append(f1_score(test_ds[labelField], np.rint(predicted_labels), average="macro"))
        micro_F1_scores.append(f1_score(test_ds[labelField], np.rint(predicted_labels), average="micro"))
        training_time.append(exc_time)
        keras.backend.clear_session()
        print("End iteration" + str(i))

    test_ds["MLP_prediction"] = predicted_labels
    print("AUC_avg", np.mean(AUC_scores))
    print("f1_avg", np.mean(F1_scores))
    print("macro_f1_avg", np.mean(macro_F1_scores))
    print("micro_f1_avg", np.mean(micro_F1_scores))
    f = open(results_file, "w")
    f.write(saver_name)
    f.write("\n")
    f.write("AUC_mean: " + str(np.mean(AUC_scores)))
    f.write("\n")
    f.write("F1_mean: " + str(np.mean(F1_scores)))
    f.write("\n")
    f.write("macro_F1_mean: " + str(np.mean(macro_F1_scores)))
    f.write("\n")
    f.write("micro_F1_mean: " + str(np.mean(micro_F1_scores)))
    f.write("\n")
    f.write("Excution Time: " + str(np.mean(training_time)))
    f.write("--------------------------------------------------------------------------------")
    f.write("\n")
    f.close()
    return test_ds
    print("Done!")
Example #20
0
if run_PairSetup_MLP:
    torch.manual_seed(random_seed)
    print('************** Running MLP model (for PairSetup) *****************')
    in_dim, out_dim = 14 * 14 * 2, 2
    # We test for different hidden layer count vs. neurons per layer count
    L_range = list(range(2, 18, 2))
    h_range = list(range(10, 50, 5))
    test_error_means = np.zeros((len(h_range), len(L_range)))
    test_error_std = np.zeros((len(h_range), len(L_range)))
    for L_idx in range(0, len(L_range)):
        for h_idx in range(0, len(h_range)):
            torch.manual_seed(random_seed)
            L = L_range[L_idx]
            h = h_range[h_idx]
            print('testing with L = {} and h = {}'.format(L, h))
            model = models.MLP(L, h, in_dim, out_dim)
            test_err_mean, test_err_std, _, _ = training_functions.rounds_train(
                model,
                'PairSetup',
                lr=0.0001,
                epochs=200,
                use_crossentropy=True,
                rounds=10)
            test_error_means[h_idx, L_idx] = test_err_mean
            test_error_std[h_idx, L_idx] = test_err_std
    # Plot heat table
    plots.plot_error_table(
        h_range, L_range, test_error_means, test_error_std,
        'Pair Setup MLP models mean/std minimum test error',
        './plots/pairSetup_MLP.svg')
Example #21
0
def train(hparams, model_design, X, Y, data, data_dir="models/mlp", splits=5):
    """
    
    
    """
    epochs = hparams["epochs"]

    kf = KFold(n_splits=splits, shuffle=False)
    kf.get_n_splits(X)

    #rmse_train = np.zeros((splits, epochs))
    #rmse_val = np.zeros((splits, epochs))
    mae_train = np.zeros((splits, epochs))
    mae_val = np.zeros((splits, epochs))

    i = 0

    #performance = []
    #y_tests = []
    #y_preds = []

    for train_index, test_index in kf.split(X):

        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = Y[train_index], Y[test_index]

        X_test = torch.tensor(X_test).type(dtype=torch.float)
        y_test = torch.tensor(y_test).type(dtype=torch.float)
        X_train = torch.tensor(X_train).type(dtype=torch.float)
        y_train = torch.tensor(y_train).type(dtype=torch.float)

        model = models.MLP(model_design["layer_sizes"])

        optimizer = optim.Adam(model.parameters(), lr=hparams["learningrate"])
        criterion = nn.MSELoss()

        #early_stopping = utils.EarlyStopping()

        for epoch in range(epochs):

            # Training
            model.train()

            x, y = utils.create_batches(X_train, y_train, hparams["batchsize"],
                                        hparams["history"])

            x = torch.tensor(x).type(dtype=torch.float)
            y = torch.tensor(y).type(dtype=torch.float)

            output = model(x)

            # Compute training loss
            loss = criterion(output, y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Evaluate current model at test set
            model.eval()

            with torch.no_grad():
                pred_train = model(X_train)
                pred_test = model(X_test)
                #rmse_train[i, epoch] = utils.rmse(y_train, pred_train)
                #rmse_val[i, epoch] = utils.rmse(y_test, pred_test)
                val_loss = metrics.mean_absolute_error(y_test, pred_test)
                #early_stopping(val_loss)
                #if early_stopping.early_stop:
                #    break

                mae_train[i, epoch] = metrics.mean_absolute_error(
                    y_train, pred_train)
                mae_val[i, epoch] = val_loss

        # Predict with fitted model
        #with torch.no_grad():
        #    preds_train = model(X_train)
        #    preds_test = model(X_test)
        #    performance.append([utils.rmse(y_train, preds_train),
        #                        utils.rmse(y_test, preds_test),
        #                        metrics.mean_absolute_error(y_train, preds_train.numpy()),
        #                        metrics.mean_absolute_error(y_test, preds_test.numpy())])

        torch.save(model.state_dict(),
                   os.path.join(data_dir, f"{data}_model{i}.pth"))

        #y_tests.append(y_test.numpy())
        #y_preds.append(preds_test.numpy())

        i += 1

    running_losses = {
        "mae_train": mae_train,
        "mae_val": mae_val
    }  #, "rmse_val":rmse_val, "rmse_train":rmse_train, }

    return running_losses  #, performance #, y_tests, y_preds
            episode_q.append(out)
            action = out.max(1)[1].item()
            next_obs, reward, done, info = env.step(action)
            R += reward
            obs = next_obs
        rewards.append(R)
        episode_qs.append(torch.stack(episode_q))
    return torch.tensor(rewards, dtype=torch.float).mean(), torch.stack(episode_qs)


if __name__ == "__main__":
    args = get_args()

    if args.env == 'CartPole-v0':
        env = gym.make('CartPole-v0')
        net = models.MLP(4, 2)
    elif args.env == 'MNIST':
        transform = transforms.Compose([transforms.ToTensor()])
        dataset = torchvision.datasets.MNIST('.data', train=False, download=True, transform=transform)
        env = environments.ClassEnv(dataset, 10)
        net = models.CNN(1, 28, 28, 10)
    else:
        raise ValueError('Not a valid env')

    device = torch.device('cpu')
    net.load_state_dict(torch.load(args.params_path, map_location=device))
    save_folder = os.path.split(args.params_path)[0]

    test_R, episode_qs = test(env, net, args.episodes, args.render, device)
    print(f'Average reward over {args.episodes} episodes is {test_R:.1f}')
    n_wrong = (len(dataset) - test_R) / 2
Example #23
0
def default(id_val, algo, dataset_fname, expdir, env_atts_types, \
            feateng_type='-1', d_size=-1, bin_env=False, eq_estrat=-1, SEED=100,
            test_info='-1',val_split=0.0, irm_args={}, linear_irm_args={}, \
            mlp_args={}, linreg_args={}, logreg_args={}, \
            toy_data=[False], testing=False):

    '''
    :param id_val: Numerical identifier for run (str)
    :param algo: Name of algo to be applied (str)
    :param dataset_fname: path to dataset (str)
    :param expdir: directory where results stored (str)
    :param env_atts_types: environment vairanbbles in list (list len=1)
    :param feateng_type: The feature engineering steps to be taken (str)
    :param d_size: subsampling number on dataset (int)
    :param bin_env: 0 to not bin, 1 to bin (0-1 int)
    :param eq_estrat: -1 to not apply, int for min num samples per env (int)
    :param seed: random seed used (int)
    :param test_info: -1 or name of test environment (str)
    '''

    random.seed(SEED)

    #Meta-function Accounting
    unid = '''{}_{}_{}_{}_{}_{}'''.format(id_val, feateng_type,\
                                          dname_from_fpath(dataset_fname), \
                                          str(d_size), \
                                          str(SEED), \
                                          ''.join([str(f) \
                                                   for f in env_atts_types])
                                         )
    logger_fname = os.path.join(expdir, 'log_{}.txt'.format(unid))
    logging.basicConfig(filename=logger_fname, level=logging.DEBUG)
    logging.info('id: {}'.format(id_val))
    logging.info('algo: {}'.format(algo))
    logging.info('fteng: {}'.format(feateng_type))
    logging.info('dataset: {}'.format(dname_from_fpath(dataset_fname)))
    logging.info('env_atts: {}'.format(str(env_atts_types)))
    logging.info('dataset size: {}'.format(str(d_size)))
    logging.info('binarize envs: {}'.format(str(bin_env)))
    logging.info('equalize envs: {}'.format(str(eq_estrat)))
    logging.info('seed: {}'.format(str(SEED)))
    logging.info('test_info: {}'.format(test_info))

    #Select correct dataset
    data, y_all, d_atts = dp.data_loader(dataset_fname, \
                                proc_fteng(feateng_type), dsize=d_size, \
                                binar=bin_env, toy=toy_data, testing=testing)
    logging.info('{} Dataset loaded - size {}'.format(\
                                dataset_fname.split('/')[-1], str(data.shape)))

    # #Remove Validation and Test Data
    data, y_all, d_atts, _, _, _, _ = dp.train_val_test_split(\
                                         data, y_all, d_atts, val_split, \
                                         test_info, SEED)

    logging.info('Val, Test Environment Removed - Dataset size {}'.format(\
                                                              str(data.shape)))

    if algo == 'icp':
        icp = models.InvariantCausalPrediction()
        icp.run(data, y_all, d_atts, unid, expdir, proc_fteng(feateng_type), \
                 env_atts_types)

    elif algo == 'irm':
        assert irm_args > 0
        logging.info('irm_params: {}'.format(str(irm_args)))
        irm = models.InvariantRiskMinimization()
        irm.run(data, y_all, d_atts, unid, expdir, SEED, env_atts_types, \
                eq_estrat, irm_args)

    elif algo == 'linear-irm':
        assert linear_irm_args > 0
        logging.info('linear-irm_params: {}'.format(str(linear_irm_args)))
        l_irm = models.LinearInvariantRiskMinimization()
        l_irm.run(data, y_all, d_atts, unid, expdir, SEED, env_atts_types, \
                  eq_estrat, linear_irm_args)

    elif algo == 'mlp':
        mlp = models.MLP()
        mlp.run(data, y_all, unid, expdir, mlp_args)

    elif algo == 'linreg':
        linreg = models.Linear()
        linreg.run(data, y_all, unid, expdir, linreg_args)

    elif algo == 'logreg':
        logreg = models.LogisticReg()
        logreg.run(data, y_all, unid, expdir, logreg_args)

    else:
        raise Exception('Algorithm not Implemented')
Example #24
0
def train(args):
  import models
  import numpy as np
  np.random.seed(1234)

  if args.dataset == 'digits':
    n_dim, n_out, n_channels = 8, 10, 1
    X_train, y_train, X_val, y_val = data.load_digits()
  elif args.dataset == 'mnist':
    n_dim, n_out, n_channels = 28, 10, 1
    X_train, y_train, X_val, y_val, _, _ = data.load_mnist()
  elif args.dataset == 'svhn':
    n_dim, n_out, n_channels = 32, 10, 3
    X_train, y_train, X_val, y_val = data.load_svhn()
    X_train, y_train, X_val, y_val = data.prepare_dataset(X_train, y_train, X_val, y_val)
  elif args.dataset == 'cifar10':
    n_dim, n_out, n_channels = 32, 10, 3
    X_train, y_train, X_val, y_val = data.load_cifar10()
    X_train, y_train, X_val, y_val = data.prepare_dataset(X_train, y_train, X_val, y_val)
  elif args.dataset == 'random':
    n_dim, n_out, n_channels = 2, 2, 1
    X_train, y_train = data.load_noise(n=1000, d=n_dim)
    X_val, y_val = X_train, y_train
  else:
    raise ValueError('Invalid dataset name: %s' % args.dataset)
  print 'dataset loaded, dim:', X_train.shape

  # set up optimization params
  p = { 'lr' : args.lr, 'b1': args.b1, 'b2': args.b2 }

  # create model
  if args.model == 'softmax':
    model = models.Softmax(n_dim=n_dim, n_out=n_out, n_superbatch=args.n_superbatch, 
                           opt_alg=args.alg, opt_params=p)
  elif args.model == 'mlp':
    model = models.MLP(n_dim=n_dim, n_out=n_out, n_superbatch=args.n_superbatch, 
                       opt_alg=args.alg, opt_params=p)
  elif args.model == 'cnn':
    model = models.CNN(n_dim=n_dim, n_out=n_out, n_chan=n_channels, model=args.dataset,
                       n_superbatch=args.n_superbatch, opt_alg=args.alg, opt_params=p)  
  elif args.model == 'kcnn':
    model = models.KCNN(n_dim=n_dim, n_out=n_out, n_chan=n_channels, model=args.dataset,
                       n_superbatch=args.n_superbatch, opt_alg=args.alg, opt_params=p)    
  elif args.model == 'resnet':
    model = models.Resnet(n_dim=n_dim, n_out=n_out, n_chan=n_channels,
                          n_superbatch=args.n_superbatch, opt_alg=args.alg, opt_params=p)    
  elif args.model == 'vae':
    model = models.VAE(n_dim=n_dim, n_out=n_out, n_chan=n_channels, n_batch=args.n_batch,
                          n_superbatch=args.n_superbatch, opt_alg=args.alg, opt_params=p,
                          model='bernoulli' if args.dataset in ('digits', 'mnist') 
                                            else 'gaussian')    
  elif args.model == 'convvae':
    model = models.ConvVAE(n_dim=n_dim, n_out=n_out, n_chan=n_channels, n_batch=args.n_batch,
                          n_superbatch=args.n_superbatch, opt_alg=args.alg, opt_params=p,
                          model='bernoulli' if args.dataset in ('digits', 'mnist') 
                                            else 'gaussian')    
  elif args.model == 'convadgm':
    model = models.ConvADGM(n_dim=n_dim, n_out=n_out, n_chan=n_channels, n_batch=args.n_batch,
                          n_superbatch=args.n_superbatch, opt_alg=args.alg, opt_params=p,
                          model='bernoulli' if args.dataset in ('digits', 'mnist') 
                                            else 'gaussian')    
  elif args.model == 'sbn':
    model = models.SBN(n_dim=n_dim, n_out=n_out, n_chan=n_channels,
                          n_superbatch=args.n_superbatch, opt_alg=args.alg, opt_params=p)      
  elif args.model == 'adgm':
    model = models.ADGM(n_dim=n_dim, n_out=n_out, n_chan=n_channels, n_batch=args.n_batch,
                          n_superbatch=args.n_superbatch, opt_alg=args.alg, opt_params=p,
                          model='bernoulli' if args.dataset in ('digits', 'mnist') 
                                            else 'gaussian')
  elif args.model == 'hdgm':
    model = models.HDGM(n_dim=n_dim, n_out=n_out, n_chan=n_channels, n_batch=args.n_batch,
                          n_superbatch=args.n_superbatch, opt_alg=args.alg, opt_params=p)        
  elif args.model == 'dadgm':
    model = models.DADGM(n_dim=n_dim, n_out=n_out, n_chan=n_channels,
                          n_superbatch=args.n_superbatch, opt_alg=args.alg, opt_params=p) 
  elif args.model == 'dcgan':
    model = models.DCGAN(n_dim=n_dim, n_out=n_out, n_chan=n_channels,
                          n_superbatch=args.n_superbatch, opt_alg=args.alg, opt_params=p)   
  elif args.model == 'ssadgm':
    X_train_lbl, y_train_lbl, X_train_unl, y_train_unl \
      = data.split_semisup(X_train, y_train, n_lbl=args.n_labeled)
    model = models.SSADGM(X_labeled=X_train_lbl, y_labeled=y_train_lbl, n_out=n_out,
                          n_superbatch=args.n_superbatch, opt_alg=args.alg, opt_params=p)
    X_train, y_train = X_train_unl, y_train_unl
  else:
    raise ValueError('Invalid model')
  
  # train model
  model.fit(X_train, y_train, X_val, y_val, 
            n_epoch=args.epochs, n_batch=args.n_batch,
            logname=args.logname)
Example #25
0
 def getModel(cls):
     return models.MLP(10, [1, 28, 28], units=[100, 100, 100], action=torch.nn.Sigmoid)
def main(output_dim, train_bs, val_bs, test_bs, num_epochs, max_seq_length,
         learning_rate, warmup_proportion, early_stopping_criteria, num_layers,
         hidden_dim, bidirectional, dropout, filter_sizes, embedding_file,
         model_name, use_mongo, _run):

    #Logger
    directory = f"results/{_run._id}/"

    #Batch sizes
    batch_sizes = [int(train_bs), int(val_bs), int(test_bs)]
    batch_size = int(train_bs)

    if "BERT" in model_name:  #Default = False, if BERT model is used then use_bert is set to True
        use_bert = True
    else:
        use_bert = False

    #Data
    if use_bert:
        train_dataloader, val_dataloader, test_dataloader = get_data_bert(
            int(max_seq_length), batch_sizes)
    else:
        embedding_dim, vocab_size, embedding_matrix, train_dataloader, val_dataloader, test_dataloader = get_data_features(
            int(max_seq_length),
            embedding_file=embedding_file,
            batch_size=batch_size)

    #Model
    if model_name == "MLP":
        model = models.MLP(embedding_matrix, embedding_dim, vocab_size,
                           int(hidden_dim), dropout, output_dim)
    if model_name == "MLP_Features":
        model = models.MLP_Features(embedding_matrix, embedding_dim,
                                    vocab_size, int(hidden_dim), 13, dropout,
                                    output_dim)
        print(model)
    elif model_name == "CNN":
        model = models.CNN(embedding_matrix, embedding_dim, vocab_size,
                           dropout, filter_sizes, output_dim)
        print(model)
    elif model_name == "LSTM":
        model = models.LSTM(embedding_matrix, embedding_dim, vocab_size,
                            int(hidden_dim), dropout, int(num_layers),
                            bidirectional, output_dim)
        print(model)
    elif model_name == "LSTMAttention":
        model = models.LSTMAttention(embedding_matrix, embedding_dim,
                                     vocab_size, int(hidden_dim), dropout,
                                     int(num_layers), bidirectional,
                                     output_dim)
        print(model)
    elif model_name == "BERT":
        model = BertForSequenceClassification.from_pretrained(
            "bert-base-uncased", output_dim)
        print(model)
    elif model_name == "BERTLinear":
        model = models.BertLinear(hidden_dim, dropout, output_dim)
        print(model)
    elif model_name == "BERTLSTM":
        model = models.BertLSTM(hidden_dim, dropout, output_dim)
        print(model)

    model = model.to(device)

    #Loss and optimizer
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    loss_fn = F.cross_entropy

    #Training and evaluation
    print('Training and evaluation for {} epochs...'.format(num_epochs))
    train_metrics, val_metrics = train_and_evaluate(
        num_epochs, model, optimizer, loss_fn, train_dataloader,
        val_dataloader, early_stopping_criteria, directory, use_bert,
        use_mongo)
    train_metrics.to_csv(directory + "train_metrics.csv"), val_metrics.to_csv(
        directory + "val_metrics.csv")

    #Test
    print('Testing...')
    load_checkpoint(directory + "best_model.pth.tar", model)

    test_metrics = evaluate_model(model, optimizer, loss_fn, test_dataloader,
                                  device, use_bert)
    if use_mongo: log_scalars(test_metrics, "Test")

    test_metrics_df = pd.DataFrame(test_metrics)
    print(test_metrics)
    test_metrics_df.to_csv(directory + "test_metrics.csv")

    id_nummer = f'{_run._id}'

    results = {
        'id': id_nummer,
        'loss': np.round(np.mean(val_metrics['loss']), 4),
        'accuracy': test_metrics['accuracy'],
        'recall': test_metrics['recall'],
        'precision': test_metrics['precision'],
        'f1': test_metrics['f1'],
        'learning_rate': learning_rate,
        'hidden_dim': hidden_dim,
        'status': 'ok'
    }

    return results

def MacroF1Metric():
    """
    LGB Params used to train this loss
        lgb_params = {'objective': 'multiclass', 'num_class': 11, 'metric': 'multi_logloss', 'learning_rate': 0.01,
                      'lambda_l1': 0.001, 'lambda_l2': 0.18977, 'num_leaves': 180, 'feature_fraction': 0.587338,
                      'bagging_fraction': 0.705783, 'bagging_freq': 4 }
    """
    labels = dtrain.get_label()
    num_labels = 11
    preds = preds.reshape(num_labels, len(preds) // num_labels)
    preds = np.argmax(preds, axis=0)
    score = f1_score(labels, preds, average="macro")
    return ('KaggleMetric', score, True)


models = {
    "dt": models.DecisionTree(),
    "rf": models.RandomForest(),
    "lr": models.LR(),
    "xgb": models.XGBoost(),
    "svm": models.SVM(),
    "lgb": models.LGB(),
    "mlp": models.MLP(),
    "lstm": models.LSTM()
}

# to get the final accuracy, calculate the mean and the mean absolute error should be the percentage of the
# performance since he wants to see performance
Example #28
0
    test_set_x = np.asarray(test_set_x, dtype=np.float32)
    test_set_y = np.asarray(test_set_t, dtype=np.int32)

    N_features = valid_set_x.shape[1]
    N_hidden = 500
    N_labels = len(np.unique(valid_set_y))
    seed = 123

    theano.config.mode = 'FAST_RUN'
    #import theano.sandbox.cuda
    #theano.sandbox.cuda.use("gpu0")

    classifier = models.MLP(
                        X=theano.shared(valid_set_x,borrow=True),
                        y=theano.shared(valid_set_y,borrow=True),
                        N_features=N_features,
                        N_hidden=N_hidden,
                        N_labels=N_labels,
                        activationFunction=T.tanh,
                        seed=123)

    tr_model = train_model(classifier=classifier,
                           X=train_set_x,
                           y=train_set_y,
                           X_valid=valid_set_x,
                           y_valid=valid_set_y,
                           L1_reg=0.01,L2_reg=0.0)

    N = train_set_x.shape[0]
    train(tr_model,
          max_epoch=1000,
          batch_size=4000,
Example #29
0
    train_file = args.data_dir + "/debug.json"
dev_file = args.data_dir + "/dev.json"

full_dataset = data_loader.InvertedIndexData(args, train_file)
dev_dataset = data_loader.InvertedIndexData(args, dev_file)

output_prefix = args.data_dir + "/models/"

# ###############################################################################
# # Build the model
# ###############################################################################
for q in args.quantiles:
    my_print("Training for quantile q = {}".format(q))
    model_file = output_prefix + "/" + create_file_name(args, q) + ".model"
    my_print("Writing model to file", model_file)
    model = models.MLP(args.layers)
    model = model.to(device=args.device)
    my_print(model)

    # # Loop over epochs.
    lr = args.lr
    best_val_loss = None

    # # At any point you can hit Ctrl + C to break out of training early.
    try:
        for epoch in range(1, args.epochs + 1):
            epoch_start_time = time.time()
            my_print("start epoch {}/{}".format(epoch, args.epochs))
            for start in range(0, len(full_dataset), 1000000):
                subset = data_loader.InvertedIndexSubSet(
                    full_dataset, start, 1000000, args.device)
Example #30
0
def test_task2(root_path):
    '''
    :param root_path: root path of test data, e.g. ./dataset/task2/test/0/
    :return results: a dict of classification results
    results = {'audio_0000.pkl': 23, ‘audio_0001’: 11, ...}
    This means audio 'audio_0000.pkl' is matched to video 'video_0023' and ‘audio_0001’ is matched to 'video_0011'.
    '''
    results = {}

    audio_preds = []
    audio_names = []
    audio_ds = my_dataset("test", transform, root_path)
    audio_loader = torch.utils.data.DataLoader(audio_ds,
                                               4,
                                               False,
                                               num_workers=0)
    audio_model = models.ResNet(block=models.BasicBlock, num_blocks=[3, 3, 3])
    audio_model = nn.DataParallel(audio_model).cuda()
    audio_model.load_state_dict(torch.load("task1resnet18.pkl"))
    audio_model.eval()
    for data in audio_loader:
        image, name = data['image'], data['name']
        image = image.cuda()
        image = torch.autograd.Variable(image)
        output = audio_model(image)
        pred = torch.argmax(output, 1)
        audio_names.extend(name)
        audio_preds.extend(pred)
    audio_preds = [int(i) for i in audio_preds]
    audio_names = [i.split('/')[-1][:-4] for i in audio_names]
    audio_results = dict(zip(audio_names, audio_preds))

    video_preds = []
    video_names = []
    video_ds = video_dataset("test", transform, root_path)
    video_loader = torch.utils.data.DataLoader(video_ds,
                                               4,
                                               False,
                                               num_workers=0)
    video_model = models.ResNet(in_ch=1,
                                in_stride=(1, 1),
                                fc_size=64,
                                block=models.BasicBlock,
                                num_blocks=[3, 3, 3])
    video_model = nn.DataParallel(video_model).cuda()
    video_model.load_state_dict(torch.load("new_video_resnet.pkl"))
    video_model.eval()
    for data in video_loader:
        image, name = data['image'], data['name']
        image = image.cuda()
        image = torch.autograd.Variable(image)
        output = video_model(image)
        pred = torch.argmax(output, 1)
        video_names.extend(name)
        video_preds.extend(pred)
    video_preds = [int(i) for i in video_preds]
    video_names_unique = list(set(video_names))
    video_preds_max = []
    for name in video_names_unique:
        indices = [i for i, x in enumerate(video_names) if x == name]
        pred = [video_preds[i] for i in indices]
        pred = max(pred, key=pred.count)
        video_preds_max.append(pred)
    video_results = dict(zip(video_names_unique, video_preds_max))

    audio_num = len(audio_names)
    for i in range(10):
        class_name = classes[i]
        matching_resnet_model = models.ResNet(block=models.BasicBlock,
                                              num_blocks=[3, 3, 3])
        matching_resnet_model = nn.DataParallel(matching_resnet_model).cuda()
        matching_resnet_model.load_state_dict(
            torch.load(class_name + "_resnet.pkl"))
        matching_resnet_model.eval()
        matching_mlp_model = models.MLP()
        matching_mlp_model = nn.DataParallel(matching_mlp_model).cuda()
        matching_mlp_model.load_state_dict(torch.load(class_name + "_mlp.pkl"))
        matching_mlp_model.eval()

        audio_i = [k for k, v in audio_results.items() if v == i]
        video_i = [k for k, v in video_results.items() if v == i]
        print(audio_i)
        print(video_i)

        matching_ds = matching_dataset(mode="test",
                                       transforms=transform,
                                       test_path=root_path,
                                       test_audio=audio_i,
                                       test_video=video_i)
        matching_loader = torch.utils.data.DataLoader(matching_ds,
                                                      4,
                                                      False,
                                                      num_workers=0)

        distance_matrix = np.zeros((len(audio_i), len(video_i)))

        for data in matching_loader:
            raw, name = data['raw'], data['name']
            image = raw[0]
            video = raw[1]
            image = torch.autograd.Variable(image.cuda())
            video = torch.autograd.Variable(video.cuda())
            video_output = matching_mlp_model(video)
            image_output = matching_resnet_model(image)
            dist = F.pairwise_distance(video_output, image_output)
            for j in range(len(dist)):
                audio_num = audio_i.index(name[0][j])
                video_num = video_i.index(name[1][j])
                distance_matrix[audio_num][video_num] = dist[j]

        print(distance_matrix)
        row_ind, col_ind = linear_sum_assignment(distance_matrix)
        print(row_ind)
        print(col_ind)
        for j in range(len(row_ind)):
            audio_name = audio_i[row_ind[j]]
            video_name = video_i[col_ind[j]]
            results[audio_name] = video_name

    audio_set = list(set(audio_names) - set([k for k, v in results.items()]))
    video_set = list(set(video_names) - set([v for k, v in results.items()]))
    perm = np.random.permutation(len(audio_set))
    for j in perm:
        audio_name = audio_set[j]
        video_name = video_set[j]
        results[audio_name] = video_name
    for k, v in results.items():
        results[k] = int(v[-4:])
    print(results)
    return results