예제 #1
0
def load_model(path_model, path_config, vocab):
    config = Config(path_config)
    model_name = config.getstr("model")
    word_dim = config.getint("word_dim")
    state_dim = config.getint("state_dim")

    if model_name == "rnn":
        model = models.RNN(vocab_size=len(vocab),
                           word_dim=word_dim,
                           state_dim=state_dim,
                           initialW=None,
                           EOS_ID=vocab["<EOS>"])
    elif model_name == "lstm":
        model = models.LSTM(vocab_size=len(vocab),
                            word_dim=word_dim,
                            state_dim=state_dim,
                            initialW=None,
                            EOS_ID=vocab["<EOS>"])
    elif model_name == "gru":
        model = models.GRU(vocab_size=len(vocab),
                           word_dim=word_dim,
                           state_dim=state_dim,
                           initialW=None,
                           EOS_ID=vocab["<EOS>"])
    else:
        print "[error] Unkwown model name: %s" % model_name
        sys.exit(-1)
    serializers.load_npz(path_model, model)
    return model
예제 #2
0
    def build_model(self):
        """Creates and initializes the shared and controller models."""
        self.shared = models.RNN(self.args, self.dataset)
        self.controller = models.Controller(self.args)

        if self.args.num_gpu == 1:
            self.shared.cuda()
            self.controller.cuda()
        elif self.args.num_gpu > 1:
            raise NotImplementedError('`num_gpu > 1` is in progress')
예제 #3
0
def main(model_name="CNN", is_training=True):
    config = Config()
    if model_name == "CNN":
        model = models.CNN(config)
    elif model_name == "RNN":
        model = models.RNN(config)
    elif model_name == "RCNN":
        model = models.RCNN(config)
    else:
        model = models.FC(config)

    if is_training:
        model.train()
    else:
        model.restore_model()
        model.predict()
예제 #4
0
    def build_model(self):
        """Creates and initializes the shared and controller models."""
        if self.args.network_type == 'rnn':
            self.shared = models.RNN(self.args, self.dataset)
        elif self.args.network_type == 'cnn':
            self.shared = models.CNN(self.args, self.dataset)
        else:
            raise NotImplementedError(f'Network type '
                                      f'`{self.args.network_type}` is not '
                                      f'defined')
        self.controller = models.Controller(self.args)

        if self.args.num_gpu == 1:
            self.shared.cuda()
            self.controller.cuda()
        elif self.args.num_gpu > 1:
            raise NotImplementedError('`num_gpu > 1` is in progress')
예제 #5
0
    def build_model(self):
        """Creates and initializes the shared and controller models."""
        if self.args.network_type == 'rnn':
            self.shared = models.RNN(self.args, self.dataset)
        elif self.args.network_type == 'cnn':
            self.shared = models.CNN(self.args, self.dataset)
        else:
            raise NotImplementedError(
                'Network type `{0}` is not defined'.format(
                    self.args.network_type))
        self.controller = models.Controller(
            self.args
        )  # 构建了一个orward:Embedding(130,100)->lstm(100,100)->decoder的列表,对应25个decoder

        if self.args.num_gpu == 1:
            self.shared.cuda()
            self.controller.cuda()
        elif self.args.num_gpu > 1:
            raise NotImplementedError('`num_gpu > 1` is in progress')
예제 #6
0
def main(args):
    ######

    # 3.2 Processing of the data
    TEXT = data.Field(sequential=True, include_lengths=True, tokenize='spacy')
    LABEL = data.Field(sequential=False, use_vocab=False)

    train, val, test = data.TabularDataset.splits(
        path=
        "/Users/RobertAdragna/Documents/Third Year/Fall Term/MIE 324 - Introduction to Machine Intelligence/mie324/a4",
        train='train.tsv',
        validation='validation.tsv',
        test='test.tsv',
        format='tsv',
        skip_header=True,
        fields=[('text', TEXT), ('label', LABEL)])

    # train_itr = data.BucketIterator(train, 64, sort_key=lambda x: len(x.TEXT), sort_within_batch=True, repeat=False)
    # val_itr = data.BucketIterator(val, 64, sort_key=lambda x: len(x.TEXT), sort_within_batch=True, repeat=False)
    # test_itr = data.BucketIterator(test, 64, sort_key=lambda x: len(x.TEXT), sort_within_batch=True, repeat=False)

    ######
    train_iter, val_iter, test_iter = data.Iterator.splits(
        (train, val, test),
        sort_key=lambda x: len(x.text),
        sort_within_batch=True,
        repeat=False,
        batch_sizes=(64, 64, 64),
        device=-1)
    # train_iter, val_iter, test_iter = data.BucketIterator.splits(
    #     (train, val, test), sort_key=lambda x: len(x.text), sort_within_batch=True, repeat=False,
    #     batch_sizes=(64, 64, 64), device=-1)
    TEXT.build_vocab(train)
    vocab = TEXT.vocab
    vocab.load_vectors(torchtext.vocab.GloVe(name='6B', dim=100))

    ######

    # 5 Training and Evaluation
    base_model = models.Baseline(100, vocab)
    rnn_model = models.RNN(100, vocab, 100)
    cnn_model = models.CNN(100, vocab, 50, (2, 4))
    train_func(rnn_model, train_iter, val_iter, test_iter, 20, "rnn")
예제 #7
0
 def build_model(self):
     """Creates and initializes the shared and controller models."""
     if self.args.network_type == 'rnn':
         self.shared = models.RNN(self.args, self.dataset)
     elif self.args.network_type == 'cnn':
         print("----- begin to init cnn------")
         self.shared = models.CNN(self.args, self.dataset)
         # self.shared = self.shared.cuda()
     else:
         raise NotImplementedError(f'Network type '
                                   f'`{self.args.network_type}` is not '
                                   f'defined')
     print("---- begin to init controller-----")
     self.controller = models.Controller(self.args)
     #self.controller = self.controller.cuda()
     print("===begin to cuda")
     if True:
         print("cuda")
         self.shared.cuda()
         self.controller.cuda()
         print("finish cuda")
     elif self.args.num_gpu > 1:
         raise NotImplementedError('`num_gpu > 1` is in process')
예제 #8
0
    def build_model(self):
        """Creates and initializes the shared and controller models."""
        if self.args.network_type == 'rnn':
            self.shared = models.RNN(self.args, self.dataset)
            self.controller = models.Controller(self.args)
        elif self.args.network_type == 'micro_cnn':
            self.shared = models.CNN(self.args, self.dataset)
            self.controller = models.CNNMicroController(self.args)
        else:
            raise NotImplementedError(f'Network type '
                                      f'`{self.args.network_type}` is not '
                                      f'defined')

        if self.args.num_gpu == 1:
            if torch.__version__ == '0.3.1':
                self.shared.cuda()
                self.controller.cuda()
            else:
                self.shared.to(self.device)
                self.controller.to(self.device)

        elif self.args.num_gpu > 1:
            raise NotImplementedError('`num_gpu > 1` is in progress')
예제 #9
0
def initialize_and_train(N,
                         X_clusters,
                         n_lag,
                         n_hold,
                         n_out,
                         X_dim,
                         num_classes,
                         clust_sig=0.1,
                         model_seed=2,
                         hid_nonlin='tanh',
                         num_epochs=20,
                         learning_rate=0.005,
                         patience_before_stopping=10,
                         batch_size=10,
                         loss='cce',
                         optimizer='rmsprop',
                         momentum=0,
                         l2_regularization=0,
                         dropout_p=0,
                         unit_injected_noise=0,
                         scheduler='plateau',
                         learning_patience=5,
                         scheduler_factor=0.5,
                         network='vanilla_rnn',
                         Win='orthog',
                         use_biases=True,
                         Wrec_rand_proportion=1,
                         input_scale=1.,
                         wout_scale=1,
                         g_radius=1.,
                         dt=0.01,
                         num_train_samples_per_epoch=None,
                         num_test_samples_per_epoch=None,
                         freeze_input=False,
                         train_output_weights=True,
                         input_style='hypercube',
                         saves_per_epoch=1,
                         rerun=False,
                         table_path=None,
                         multiprocess_lock=None):
    """
    Parameters
    ----------
    N : int
        Number of units in the "hidden" layer, i.e. number of neurons making
        up the recurrent layer.
    X_clusters : int
        Number of clusters.
    n_lag : int
        Number of timesteps from stimulus onset to end of loss evaluation.
    n_hold : int
        Number of timesteps for which the input is presented.
    n_out : int
        Number of timesteps for which loss is evaluated.
    X_dim : int
        Dimension of the ambient space in which clusters are generated.
    num_classes : int
        Number of class labels.
    clust_sig : float
        Standard deviation of each cluster.
    model_seed : int
        Seed for generating input and model weights.
    hid_nonlin : str
        Activation function for the hidden units, or if using a sompolinsky
        style recurrent network the nonlinear
        transfer function.
    num_epochs : int
        The number of epochs to train for.
    learning_rate : float
        Learning rate for optimizer.
    patience_before_stopping : int
        Number of consecutive epochs to wait for which there is no
        improvement to the (cumulative average) validation
        loss before ending training.
    batch_size : int
        Size of each training data minibatch.
    loss : str
        The loss function to use. Options are "mse" for mean squared error
        and "cce" for categorical cross entropy.
    optimizer : str
        The optimizer to use. Options are "sgd" for standard stochastic
        gradient descent and "rmsprop" for RMSProp.
    momentum : float
        Momentum value to give to optimizer. If optimizer is 'adam' then
        momentum is set to 0.
    l2_regularization : float
        Weighting factor for l2 regularization of parameters. Default: 0.
    dropout_p : float
        Probability value for dropout applied to the hidden units of the
        feedforward network or recurrent units at each recurrent timestep.
        Default: 0. If 0, a dropout layer isn't added.
    unit_injected_noise : float
        Magnitude of i.i.d Gaussian noise to inject in each unit of each hidden
        layer or on each recurrent timestep. Default: 0.
    scheduler : str
        The strategy used to adjust the learning rate through training.
        Options are None for constant learning rate
        through training, "plateau" for reducing the learning rate by a
        multiplicative factor after a plateau of a
        certain number of epochs, and "steplr" for reducing the learning rate
        by a multiplicative factor. In both
        cases, the number of epochs is specified by scheduler_patience and
        the multiplicative factor by
        scheduler_factor.
    learning_patience : int
        If using plateau scheduler, this is the number of epochs over which
        to measure that a plateau has been
        reached. If using steplr scheduler, this is the number of epochs
        after which to reduce the learning rate.
    scheduler_factor : float
        The multiplicative factor by which to reduce the learning rate.
    network : str
        The type of network architecture to use. Options are "vanilla_rnn"
        for a vanilla RNN, "sompolinsky" for a
        Sompolinsky style RNN, and "feedforward" for a feedforward network.
    Win : str
        Type of input weights to use. Can be 'diagonal_first_two' for feeding
        inputs to only the first two neurons
         in the network or 'orthogonal' for a (truncated) orthogonal matrix.
    Wrec_rand_proportion : float
        The proportion of Wrec that should initially be random. Only applies
        if network is sompolinsky style (
        network='sompolinsky'). Wrec will be initialized as a convex
        combination of a random matrix and an orthogonal
        matrix, weighted by Wrec_rand_proportion.
    input_scale : float
        Global scaling of the inputs.
    wout_scale : float
        Scaling of output weights.
    g_radius : float
        Magnitude of the largest eigenvalue of the random part of the
        recurrent weight matrix. This holds exactly
        (i.e. the random matrix is rescaled so that this is satisfied
        exactly), not just on average.
    dt : float
        Size of the timestep to use for the discretization of the dynamics if
        'network' is an RNN ('vanilla_rnn' or
        'sompolinksy'). If network='vanilla_rnn', the recurrent weight matrix
        will be (1-dt)*I + dt*J where I is the
        identitiy matrix and J is a random matrix. The entries of J are
        i.i.d. normally distributed, and scaled so that
        the largest eigenvalue of J has magnitude equal to g_radius.
    num_train_samples_per_epoch : int
        Number of training samples to use per epoch.
    num_test_samples_per_epoch : int
        Number of testing samples to use per epoch.
    input_style: str
        Input style. Currently 'hypercube' is the only valid option.
    freeze_input: bool
        Whether or not to present the same input every epoch. If False,
        new input samples are drawn every epoch
    saves_per_epoch: Union[int,float,Iterable[int]]
        The number of times model parameters are saved to disk, per epoch.
        If this is a fraction, then multiple epochs will be completed per
        save: the equation is
        saves_per_epoch = round(1/epochs_per_save).
        If this is an iterable (such as a list), then it must have length
        num_epochs. Each entry in the list specifies
        how many saves should be in that epoch. For example, if num_epochs =
        3, then setting saves_per_epoch = [2,0,1]
        will cause the model to be saved twice during epoch 1, not saved
        during epoch 2, and saved once (at the end of)
        epoch 3. The first save (check_0.pt) always corresponds with the
        initial network, the next save is called
        check_1.pt, and so on
    rerun: bool
        Whether or not to run the simulation again even if a matching run is
        found on disk. True means run the
        simulation again. This parameter is not written to the output table.
    table_path: str
        Path to the output table.
    multiprocess_lock: Optional[Lock]
        A multiprocessing Lock for ensuring that writing to the output table
        in a parallel way doesn't cause conflicts.
        This parameter is not written to the output table

    Returns
    -------
        torch.nn.Module
            The trained network model.
        dict
            A collection of all the (meta) parameters used to specify the
            run. This is basically a dictionary of the
            input arguments to this function.
        str
            The directory where the model parameters over training are stored.
    """
    if (unit_injected_noise or dropout_p) and network != 'vanilla_rnn':
        raise NotImplementedError(
            "Noise injection is only implemented in vanilla_rnn")
    if table_path is None:
        table_path = DEFAULT_TABLE_PATH
    if num_test_samples_per_epoch in (None, 'None', 'NA', 'na'):
        num_test_samples_per_epoch = round(.15 * num_train_samples_per_epoch)
    if hasattr(saves_per_epoch, '__len__'):
        saves_per_epoch_copy = copy.copy(saves_per_epoch)
        saves_per_epoch = str(
            saves_per_epoch)  # Make a string copy to save to arg_dict below
    network = network.lower()
    loss = loss.lower()
    scheduler = scheduler.lower()
    optimizer = optimizer.lower()
    learning_patience_copy = copy.copy(learning_patience)
    if hasattr(learning_patience, '__len__'):
        learning_patience = '_'.join([str(x) for x in learning_patience])
    if optimizer == 'adam':
        momentum = 0

    ## Record the input parameters in a dictionary
    loc = locals()
    args = inspect.getfullargspec(initialize_and_train)[0]
    arg_dict = {arg: loc[arg] for arg in args}
    del arg_dict['table_path']
    del arg_dict['rerun']
    del arg_dict['multiprocess_lock']

    learning_patience = learning_patience_copy

    ## Redefine parameter options for consistency
    for key, value in arg_dict.items():
        if value in (None, 'None', 'NA'):
            arg_dict[key] = 'na'

    learning_patience = learning_patience_copy
    if isinstance(saves_per_epoch, str):
        saves_per_epoch = saves_per_epoch_copy

    ## Initialize Data.
    print('==> Preparing data..')
    torch.manual_seed(model_seed)
    np.random.seed(model_seed)

    ## Training datasets
    if network == 'feedforward':
        out = classification_task.delayed_mixed_gaussian(
            num_train_samples_per_epoch,
            num_test_samples_per_epoch,
            X_dim,
            num_classes,
            X_clusters,
            0,
            0,
            clust_sig,
            cluster_seed=2 * model_seed + 1,
            assignment_and_noise_seed=3 * model_seed + 13,
            avg_magn=1,
            freeze_input=freeze_input)
    else:
        out = classification_task.delayed_mixed_gaussian(
            num_train_samples_per_epoch,
            num_test_samples_per_epoch,
            X_dim,
            num_classes,
            X_clusters,
            n_hold,
            n_lag,
            clust_sig,
            cluster_seed=2 * model_seed + 1,
            assignment_and_noise_seed=3 * model_seed + 13,
            avg_magn=1,
            freeze_input=freeze_input)

    datasets, centers, cluster_class_label = out
    trainset = datasets['train']
    testset = datasets['val']

    if num_train_samples_per_epoch != 'na':
        subset_indices = range(num_train_samples_per_epoch)
        trainloader = torch.utils.data.DataLoader(trainset,
                                                  batch_size=batch_size,
                                                  shuffle=False,
                                                  num_workers=0)
    else:
        trainloader = torch.utils.data.DataLoader(trainset,
                                                  batch_size=batch_size,
                                                  num_workers=0,
                                                  shuffle=False)

    if num_test_samples_per_epoch != 'na':
        subset_indices = range(num_test_samples_per_epoch)
        testloader = torch.utils.data.DataLoader(
            testset,
            batch_size=batch_size,
            sampler=torch.utils.data.sampler.SubsetRandomSampler(
                subset_indices),
            num_workers=0)
    else:
        testloader = torch.utils.data.DataLoader(testset,
                                                 batch_size=batch_size,
                                                 num_workers=0,
                                                 shuffle=False)

    datasets = {'train': trainset, 'val': testset}
    dataloaders = {'train': trainloader, 'val': testloader}

    ## Convenience functions and variable definitions
    def ident(x):
        return x

    def zero_fun():
        return 0

    if hid_nonlin == 'linear'.casefold():
        nonlin = ident
    elif hid_nonlin == 'tanh'.casefold():
        nonlin = torch.tanh
    elif hid_nonlin == 'relu'.casefold():
        nonlin = torch.relu
    else:
        raise ValueError('Unrecognized option for hid_nonlin')

    ## Find requested network model and put model on appropriate device
    if Win in ('identity', 'diagonal_first_two'):
        Win_instance = input_scale * torch.eye(N, X_dim)
    elif Win in ('orth', 'orthogonal', 'orthog'):
        temp = torch.empty(N, X_dim)
        temp = torch.nn.init.orthogonal_(temp)
        temp = temp / torch.mean(torch.abs(temp))
        temp = input_scale * temp / math.sqrt(X_dim)
        Win_instance = temp
    else:
        raise AttributeError("Win option not recognized.")

    if loss == "mse_scalar":
        Wout_instance = wout_scale * torch.randn(1, N) * (.3 / np.sqrt(N))
        bout = torch.zeros(1)
    else:
        Wout_instance = wout_scale * torch.randn(num_classes,
                                                 N) * (.3 / np.sqrt(N))
        bout = torch.zeros(num_classes)

    brec = torch.zeros(N)

    J = torch.randn(N, N) / math.sqrt(N)
    top_ew = get_max_eigval(J)[0]
    top_ew_mag = torch.sqrt(top_ew[0]**2 + top_ew[1]**2)
    J_scaled = g_radius * (J / top_ew_mag)
    # J_scaled = g_radius*J
    if network in ('somp', 'sompolinsky', 'sompolinskyrnn'):
        Q = torch.nn.init.orthogonal_(torch.empty(N, N))
        Q_scaled = g_radius * Q
        Wrec = Wrec_rand_proportion * J_scaled + (1 - Wrec_rand_proportion) * Q
        model = models.SompolinskyRNN(Win_instance,
                                      Wrec,
                                      Wout_instance,
                                      brec,
                                      bout,
                                      nonlin,
                                      dt=dt,
                                      train_recurrent_bias=use_biases,
                                      train_output_bias=use_biases,
                                      output_over_recurrent_time=True)

    elif network == 'vanilla_rnn'.casefold():
        Wrec = (1 - dt) * torch.eye(N, N) + dt * J_scaled
        model = models.RNN(Win_instance,
                           Wrec,
                           Wout_instance,
                           brec,
                           bout,
                           nonlin,
                           output_over_recurrent_time=True,
                           train_output=train_output_weights,
                           train_recurrent_bias=use_biases,
                           train_output_bias=use_biases,
                           dropout_p=dropout_p,
                           unit_injected_noise=unit_injected_noise)

    elif network == 'feedforward'.casefold():
        Wrec = (1 - dt) * torch.eye(N, N) + dt * g_radius * (J / top_ew_mag)
        layer_weights = [Win_instance.T.clone()]
        biases = [torch.zeros(N)]
        nonlinearities = [nonlin]
        for i0 in range(n_lag + n_hold - 1):
            layer_weights.append(Wrec.clone())
            biases.append(torch.zeros(N))
            nonlinearities.append(nonlin)
        layer_weights.append(Wout_instance.T.clone())
        if loss == 'mse_scalar':
            biases.append(torch.zeros(1))
        else:
            biases.append(torch.zeros(num_classes))
        nonlinearities.append(ident)
        layer_train = [True] * len(layer_weights)
        bias_train = [use_biases] * len(biases)
        if not train_output_weights:
            layer_train[-1] = False
            bias_train[-1] = False

        model = models.FeedForward(layer_weights, biases, nonlinearities,
                                   layer_train, bias_train)
    else:
        raise AttributeError('Option for net_architecture not recognized.')
    if torch.cuda.device_count() == 2:
        device = [torch.device("cuda:0"), torch.device("cuda:1")]
    elif torch.cuda.device_count() == 1:
        device = [torch.device("cuda:0")]
    else:
        device = [torch.device("cpu")]

    print("Using {}".format(device[0]))
    model = model.to(device[0])
    loss_points = torch.arange(n_lag - n_out, n_lag + n_hold - 1)

    ## Initialize regularizer
    def l2_regularizer(param_list):
        # mean is over all elements of p, even if p is a matrix
        reg = 0
        cnt = 0
        for p in param_list:
            reg += torch.norm(p, 'fro') / N
            cnt = cnt + 1
        return reg / cnt

    if l2_regularization > 0:

        def l2_regularization_f():
            return l2_regularization * l2_regularizer(model.parameters())
    else:
        l2_regularization_f = zero_fun

    ## Initializing loss functions
    if loss in ('categorical_crossentropy', 'cce'):
        loss_unregularized = torch.nn.CrossEntropyLoss()
        if network == 'feedforward':

            def loss_function_unregularized(output, label):
                return loss_unregularized(output, label)
        else:

            def loss_function_unregularized(output, label):
                return loss_unregularized(
                    output[:, loss_points].transpose(1, 2), label[:,
                                                                  loss_points])
    elif loss in ('mean_square_error', 'mse'):
        criterion_mse = torch.nn.MSELoss()

        def criterion_single_timepoint(output,
                                       label):  # The output does not have a
            # time dimension
            label_onehot = torch.zeros(label.shape[0], num_classes)
            for i0 in range(num_classes):
                label_onehot[label == i0, i0] = 1
            return criterion_mse(output, .7 * label_onehot)

        if network == 'feedforward':

            def loss_function_unregularized(output, label):
                return criterion_single_timepoint(output, label)
        else:

            def loss_function_unregularized(output, label):
                cum_loss = 0
                for i0 in loss_points:
                    cum_loss += criterion_single_timepoint(
                        output[:, i0], label[:, i0])
                cum_loss = cum_loss / len(loss_points)
                return cum_loss
    elif loss == 'mse_scalar':
        criterion_mse = torch.nn.MSELoss()
        if network == 'feedforward':

            def loss_function_unregularized(output, label):
                label_t = .7 * (2. * label - 1.)
                return criterion_mse(output.flatten(), label_t.flatten())
        else:

            def loss_function_unregularized(output, label):
                label = .7 * (2. * label - 1.)
                # raise AttributeError("scalar loss with recurrent network
                # needs to be checked.")
                crit = criterion_mse(
                    output[:, loss_points].transpose(1, 2).flatten(),
                    label[:, loss_points].flatten())
                return crit
    elif loss == 'zero':

        def loss_function_unregularized(output, label):
            return 0
    else:
        raise AttributeError("loss option not recognized.")

    def loss_function(output, label):
        return loss_function_unregularized(output,
                                           label) + l2_regularization_f()

    ## Initialize optimizer and learning scheduler
    if optimizer == 'sgd':
        optimizer_instance = torch.optim.SGD(filter(lambda p: p.requires_grad,
                                                    model.parameters()),
                                             lr=learning_rate,
                                             momentum=momentum)
    elif optimizer == 'rmsprop':
        # noinspection PyUnresolvedReferences
        optimizer_instance = torch.optim.RMSprop(filter(
            lambda p: p.requires_grad, model.parameters()),
                                                 lr=learning_rate,
                                                 alpha=0.9,
                                                 momentum=momentum)
    elif optimizer == 'adam':
        # noinspection PyUnresolvedReferences
        optimizer_instance = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                                     model.parameters()),
                                              lr=learning_rate)
    else:
        raise AttributeError('optimizer option not recognized.')
    if scheduler == 'plateau':
        learning_scheduler_instance = model_trainer.ReduceLROnPlateau(
            optimizer_instance,
            factor=scheduler_factor,
            patience=learning_patience,
            threshold=1e-7,
            threshold_mode='abs',
            min_lr=0,
            verbose=False)
    elif scheduler == 'steplr':
        learning_scheduler_instance = model_trainer.StepLR(
            optimizer_instance,
            step_size=learning_patience,
            gamma=scheduler_factor)
    elif scheduler == 'multisteplr':
        learning_scheduler_instance = model_trainer.MultiStepLR(
            optimizer_instance, learning_patience, scheduler_factor)
    elif scheduler == 'cyclic':
        learning_scheduler_instance = model_trainer.CyclicLR(
            optimizer_instance,
            max_lr=learning_rate,
            base_lr=scheduler_factor * learning_rate,
            step_size_up=learning_patience,
            cycle_momentum=False)
    elif scheduler == 'cyclic_halving':
        learning_scheduler_instance = model_trainer.CyclicLR(
            optimizer_instance,
            mode='triangular2',
            max_lr=learning_rate,
            base_lr=scheduler_factor * learning_rate,
            step_size_up=learning_patience,
            cycle_momentum=False)
    elif 'onecyclelr' in scheduler:
        if '_' in scheduler:
            final_div_factor = float(scheduler.split('_')[-1])
        else:
            final_div_factor = 1e4
        pct_start = learning_patience / patience_before_stopping
        learning_scheduler_instance = model_trainer.OneCycleLR(
            optimizer_instance,
            total_steps=patience_before_stopping,
            pct_start=pct_start,
            max_lr=learning_rate,
            div_factor=scheduler_factor,
            final_div_factor=final_div_factor,
            cycle_momentum=False)
    else:
        raise AttributeError('scheduler option not recognized.')

    ## Determine if the training needs to be run again or if it can be loaded
    # from disk
    verbose = True
    if isinstance(multiprocess_lock, Lock):
        multiprocess_lock.acquire()
        print("Locking output table.")
        # verbose = False
        verbose = True
    else:
        print("No locking")
    dirs, ids, output_exists = mom.get_dirs_and_ids_for_run(
        arg_dict, table_path, ['num_epochs'], maximize='num_epochs')
    # breakpoint()
    if len(dirs) == 0:
        run_id, run_dir = mom.make_dir_for_run(arg_dict, table_path)
    else:
        run_id = ids[0]
        run_dir = dirs[0]
    if isinstance(multiprocess_lock, Lock):
        time.sleep(.5)
        print("Releasing output table.")
        multiprocess_lock.release()
    ## Now train the model (if necessary)
    saves_per_epoch_is_number = not hasattr(saves_per_epoch, '__len__')
    batches_per_epoch = len(trainloader)
    if saves_per_epoch_is_number and saves_per_epoch > 1:
        mod_factor = int(math.ceil((batches_per_epoch - 1) / saves_per_epoch))
        print(mod_factor)

        def save_model_criterion(stat_dict):
            return stat_dict['batch'] % mod_factor == 0
    elif saves_per_epoch_is_number:
        epochs_per_save = round(1 / saves_per_epoch)

        def save_model_criterion(stat_dict):
            save_epoch = stat_dict['epoch'] % epochs_per_save == 0
            return stat_dict['epoch_end'] and save_epoch
    else:

        def save_model_criterion(stat_dict):
            saves_this_epoch = saves_per_epoch[stat_dict['epoch']]
            if saves_this_epoch == 1 and stat_dict['epoch_end']:
                return True
            elif saves_this_epoch > 1:
                mod_factor = int(
                    math.ceil((batches_per_epoch - 1) / saves_this_epoch))
                return stat_dict['batch'] % mod_factor == 0
            else:
                return False

    print('\n==> Training/loading network')
    load_prev = not rerun
    # This modifies model by reference
    model_trainer.train_model(model,
                              dataloaders,
                              device[0],
                              loss_function,
                              optimizer_instance,
                              num_epochs,
                              run_dir,
                              load_prev,
                              learning_scheduler=learning_scheduler_instance,
                              save_model_criterion=save_model_criterion,
                              verbose=verbose)
    params = dict(dataloaders=dataloaders, datasets=datasets)
    loss_function_info = dict(
        loss_function=loss_function,
        loss_function_unregularized=loss_function_unregularized,
        l2_regularization_f=l2_regularization_f)
    params.update(arg_dict)
    params.update(loss_function_info)
    return model, params, run_dir
예제 #10
0
import models
import utils
import torch
import torch.nn as nn
import matplotlib.pyplot as plt

dataset, labels = utils.load_dataset("./dataset.csv")
dataset, labels = utils.create_seq(dataset, labels, utils.SEQU_SIZE)
X_train, y_train, X_test, y_test = utils.split(dataset, labels, 0.5)

rnn = models.RNN(1, 50, 1)

EPOCHS = 1
LR = 0.001

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(rnn.parameters(), lr=LR)


def train(t, label):
    hidden = rnn.init_hidden()

    for i in range(t.size()[0]):
        output, hidden = rnn(t[i], hidden)

    optimizer.zero_grad()
    loss = criterion(output, label)
    loss.backward()
    optimizer.step()

    return output, loss
c_to_i = pickle.load(open(args.c_to_i, 'rb'))
i_to_c = pickle.load(open(args.i_to_c, 'rb'))
n_char = len(c_to_i)

dataloaders = []

with open('data/testing.txt') as f:
    lines = f.readlines()
    lines = [l.strip().split('\t') for l in lines]
    s_to_human_score = {l[1]: l[2] for l in lines}

if args.model == 'Trans':
    model = models.TransformerModel(args, n_char, i_to_c)
else:
    model = models.RNN(args, n_char, i_to_c)

model = utils.initialize_model(model, device, args.save_files)

print("number of parameters :",
      sum(p.numel() for p in model.parameters() if p.requires_grad))

model.eval()

log_likelihoods = []
synthesis = []
sascores = []
ok_sascore = []
no_sascore = []
ok_ourscore = []
no_ourscore = []
예제 #12
0
            loss = th.mean(th.norm(out - targets)**2)
            # print(loss.item())
            loss.backward()
            optimizer.step()

    # rnn1 = th.jit.script(RNN_builtin(win, wrec, wout, brec, bout))
    # optimizer1 = th.optim.SGD(filter(lambda p: p.requires_grad, rnn1.parameters()), lr=lr)

    # rnn2 = th.jit.script(models.RNN(win, wrec, wout, brec, bout, nonlinearity='tanh', train_input=True))
    # rnn2 = th.jit.trace(models.RNN(win, wrec, wout, brec, bout, nonlinearity='tanh', train_input=True),
    #                     th.zeros(b, T, d))
    rnn2 = models.RNN(win,
                      wrec,
                      wout,
                      brec,
                      bout,
                      nonlinearity='tanh',
                      train_input=True)
    optimizer2 = th.optim.SGD(filter(lambda p: p.requires_grad,
                                     rnn2.parameters()),
                              lr=lr)

    # tic = time.time()
    # train_model(rnn1, optimizer1)
    # toc = time.time()
    # print(toc-tic)

    tic = time.time()
    train_model(rnn2, optimizer2)
    toc = time.time()
예제 #13
0
# reshape input to be [time steps,samples,features]
train_x, train_y = np.expand_dims(train_x, 1), np.expand_dims(train_y, 1)
validate_x, validate_y = np.expand_dims(validate_x,
                                        1), np.expand_dims(validate_y, 1)
test_x, test_y = np.expand_dims(test_x, 1), np.expand_dims(test_y, 1)

seed = FLAGS.seed
torch.manual_seed(seed)

rmse_list = []
mae_list = []

if FLAGS.algorithm == 'RNN':
    model = models.RNN(input_size=FLAGS.input_size,
                       hidden_size=FLAGS.hidden_size,
                       output_size=FLAGS.output_size)
elif FLAGS.algorithm == 'LSTM':
    model = models.LSTM(input_size=FLAGS.input_size,
                        hidden_size=FLAGS.hidden_size,
                        output_size=FLAGS.output_size)
elif FLAGS.algorithm == 'mRNN_fixD':
    model = models.MRNNFixD(input_size=FLAGS.input_size,
                            hidden_size=FLAGS.hidden_size,
                            output_size=FLAGS.output_size,
                            k=FLAGS.K)
elif FLAGS.algorithm == 'mRNN':
    model = models.MRNN(input_size=FLAGS.input_size,
                        hidden_size=FLAGS.hidden_size,
                        output_size=FLAGS.output_size,
                        k=FLAGS.K)
예제 #14
0
def get_model(model,
              model_path,
              pdtb_category,
              vocab_size=10000,
              embedding_size=50,
              classes=4,
              rnn_model='LSTM',
              mean_seq=False,
              hidden_size=128,
              embed=None,
              layers=2,
              lr=0.005,
              weight_decay=1e-4):
    if pdtb_category:
        model_file_path = model_path + '/' + model + '_' + pdtb_category + '_50.pkl'
    else:
        model_file_path = model_path + '/' + model + '_50.pkl'

    if model == 'rnn':
        if os.path.exists(model_file_path):
            _model = joblib.load(model_file_path)
        else:
            _model = models.RNN(vocab_size=vocab_size,
                                embed_size=embedding_size,
                                num_output=classes,
                                rnn_model=rnn_model,
                                use_last=(not mean_seq),
                                hidden_size=hidden_size,
                                embedding_tensor=embed,
                                num_layers=layers,
                                batch_first=True)

        # optimizer and loss
        _optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                             _model.parameters()),
                                      lr=lr,
                                      weight_decay=weight_decay)

        _criterion = nn.CrossEntropyLoss()

        return (_model, _optimizer, _criterion)

    elif model == 'rnnatt17':
        if os.path.exists(model_file_path):
            _model = joblib.load(model_file_path)
        else:
            _model = models.RNNATT17(vocab_size=vocab_size,
                                     embed_size=embedding_size,
                                     num_output=classes,
                                     rnn_model=rnn_model,
                                     use_last=(not mean_seq),
                                     hidden_size=hidden_size,
                                     embedding_tensor=embed,
                                     num_layers=layers,
                                     batch_first=True)

        # optimizer and loss
        _optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                             _model.parameters()),
                                      lr=lr,
                                      weight_decay=weight_decay)

        _criterion = nn.CrossEntropyLoss()

        return (_model, _optimizer, _criterion)

    elif model == 'grn16':
        if os.path.exists(model_file_path):
            _model = joblib.load(model_file_path)
        else:
            _model = models.GRN16(vocab_size=vocab_size,
                                  embed_size=embedding_size,
                                  num_output=classes,
                                  rnn_model=rnn_model,
                                  use_last=(not mean_seq),
                                  hidden_size=hidden_size,
                                  embedding_tensor=embed,
                                  num_layers=layers,
                                  batch_first=True)

        # optimizer and loss
        _optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                             _model.parameters()),
                                      lr=lr,
                                      weight_decay=weight_decay)

        _criterion = nn.CrossEntropyLoss()

        return (_model, _optimizer, _criterion)

    elif model == 'keann':
        if pdtb_category:
            model_file_path = model_path + '/' + model + '_' + pdtb_category + '_10.pkl'
        else:
            model_file_path = model_path + '/' + model + '_10.pkl'
        if os.path.exists(model_file_path):
            _model = joblib.load(model_file_path)
        else:
            _model = models.KEANN(vocab_size=vocab_size,
                                  embed_size=embedding_size,
                                  num_output=classes,
                                  rnn_model=rnn_model,
                                  use_last=(not mean_seq),
                                  hidden_size=hidden_size,
                                  embedding_tensor=embed,
                                  num_layers=layers,
                                  batch_first=True)

        # optimizer and loss
        _optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                             _model.parameters()),
                                      lr=lr,
                                      weight_decay=weight_decay)

        _criterion = nn.CrossEntropyLoss()

        return (_model, _optimizer, _criterion)

    elif model == 'keann_kg':
        if pdtb_category:
            model_file_path = model_path + '/' + model + '_' + pdtb_category + '_10.pkl'
        else:
            model_file_path = model_path + '/' + model + '_10.pkl'
        if os.path.exists(model_file_path):
            _model = joblib.load(model_file_path)
        else:
            _model = models.KEANNKG(vocab_size=vocab_size,
                                    embed_size=embedding_size,
                                    num_output=classes,
                                    rnn_model=rnn_model,
                                    use_last=(not mean_seq),
                                    hidden_size=hidden_size,
                                    embedding_tensor=embed,
                                    num_layers=layers,
                                    batch_first=True)

        # optimizer and loss
        _optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                             _model.parameters()),
                                      lr=lr,
                                      weight_decay=weight_decay)

        _criterion = nn.CrossEntropyLoss()

        return (_model, _optimizer, _criterion)

    elif model == 'trans_s':
        # Todo
        if os.path.exists(model_file_path):
            _model = joblib.load(model_file_path)
        else:
            _model = models.KEANNKG(vocab_size=vocab_size,
                                    embed_size=embedding_size,
                                    num_output=classes,
                                    rnn_model=rnn_model,
                                    use_last=(not mean_seq),
                                    hidden_size=hidden_size,
                                    embedding_tensor=embed,
                                    num_layers=layers,
                                    batch_first=True)

        # optimizer and loss
        _optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                             _model.parameters()),
                                      lr=lr,
                                      weight_decay=weight_decay)

        _criterion = nn.CrossEntropyLoss()

        return (_model, _optimizer, _criterion)
예제 #15
0
parser.add_argument('--model', dest='model_path', type=str, default=None)
args = parser.parse_args()
if args.model_path == None:
    if args.direct_model == None:
        raise ValueError("Must pass first positional argument as model, or --model argument, e.g. summary/experiment-0/models/model-7")
    else:
        saved_model_path = args.direct_model[0]
else:
    saved_model_path = args.model_path

all_state_dicts = torch.load(saved_model_path)
models_file_path = get_saved_model_defs(saved_model_path)
sys.path.insert(0, models_file_path); import models; sys.path.remove(models_file_path)
config_file_path = get_saved_model_config(saved_model_path)
sys.path.insert(0, config_file_path); from config import *; sys.path.remove(config_file_path)
model = models.RNN(minibatch_size, chars_len, hidden_size, chars_len, n_layers, minibatch_size).to(DEVICE)
model.load_state_dict(all_state_dicts["RNN"])
model = model.to(DEVICE)

sample_len = 100
gen_strings = ["I" for i in range(minibatch_size)]
hidden = Variable(model.create_hidden()).to(DEVICE)
random_state = np.random.RandomState(11)
for i in range(sample_len):
    batch = [char_to_index[gen_strings[i][-1]] for i in range(minibatch_size)]
    batch = torch.LongTensor(batch)[None, :].to(DEVICE)
    out, hidden = model(batch, hidden)
    p_out = F.softmax(out, dim=-1)
    p = p_out.cpu().data.numpy()
    p = p / (p.sum(axis=-1)[:, None] + 1E-3)
    sampled = [np.argmax(random_state.multinomial(1, p[i])) for i in range(len(p))]
예제 #16
0
FILTER_SIZES = [3,4,5] # for CNN
OUTPUT_DIM = 1
N_LAYERS = 2 # for LSTM
BIDIRECTIONAL = True # for LSTM
DROPOUT = 0.5

model = models.FastText(INPUT_DIM, EMBEDDING_DIM, OUTPUT_DIM)
PATH = 'models/FastText.pt'

if args.model == 'RNN':
	INPUT_DIM = len(TEXT.vocab)
	EMBEDDING_DIM = 300
	HIDDEN_DIM = 256
	OUTPUT_DIM = 1

	model = models.RNN(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM)
	PATH = 'models/RNN.pt'
elif args.model == 'LSTM':
	INPUT_DIM = len(TEXT.vocab)
	EMBEDDING_DIM = 300
	HIDDEN_DIM = 256
	OUTPUT_DIM = 1
	N_LAYERS = 2
	BIDIRECTIONAL = True
	DROPOUT = 0.5

	model = models.LSTM(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM, N_LAYERS, BIDIRECTIONAL, DROPOUT)
	PATH = 'models/LSTM.pt'
elif args.model == 'FastText':
	INPUT_DIM = len(TEXT.vocab)
	EMBEDDING_DIM = 300
예제 #17
0
def main(_):
    if not os.path.exists(FLAGS.local_path_in) or FLAGS.use_optimizer:
        utils_gcs.download_files_from_gcs(FLAGS.local_path_in,
                                          FLAGS.gcs_path_in)
    logging.info('Data downloaded successfully!')

    sequence_df = pd.read_hdf(
        os.path.join(FLAGS.local_path_in, FLAGS.seq_file), 'df')
    if FLAGS.balance_df:
        balance_df = pd.read_hdf(
            os.path.join(FLAGS.local_path_in, FLAGS.balance_df), 'df')
        sequence_df = sequence_df[sequence_df['url'].isin(balance_df['url'])]
    embeddings_dict = utils.get_n2v_graph_embedding(os.path.join(
        FLAGS.local_path_in, FLAGS.g_emb),
                                                    graph_gen=False,
                                                    normalize_type='minmax')
    x_sequence, y_label, label_list = utils.load_input_with_label(
        sequence_df, embeddings_dict, FLAGS.task)

    train_idx, val_idx, test_idx = utils.split_data_idx(
        len(x_sequence), FLAGS.train_ratio, FLAGS.val_ratio)
    train_batches = np.array_split(train_idx,
                                   len(train_idx) / FLAGS.batch_size)
    val_batches = np.array_split(val_idx, len(val_idx) / FLAGS.batch_size)
    test_batches = np.array_split(test_idx, len(test_idx) / FLAGS.batch_size)

    # model training/testing
    logging.info('FLAGS.epochs: %s', FLAGS.epochs)
    logging.info('FLAGS.batch_size: %s', FLAGS.batch_size)
    logging.info('FLAGS.learning_rate: %s', FLAGS.lr)

    dropout = 0.0 if FLAGS.num_layers == 1 else FLAGS.dropout

    print_gpu_info()
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    logging.info('Current device is %s', device.type)
    if FLAGS.model == 'rnn':
        tm_model = models.RNN(in_dim=FLAGS.dim,
                              hid_dim=FLAGS.hid_dim,
                              num_label=len(label_list),
                              num_layers=FLAGS.num_layers,
                              dropout=dropout).to(device)
    elif FLAGS.model == 'lstm':
        tm_model = models.LSTM(in_dim=FLAGS.dim,
                               hid_dim=FLAGS.hid_dim,
                               num_label=len(label_list),
                               num_layers=FLAGS.num_layers,
                               dropout=dropout,
                               bi_direct=FLAGS.bi).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(tm_model.parameters(),
                                 lr=FLAGS.lr,
                                 weight_decay=1e-6)
    if FLAGS.use_optimizer:
        # example trial_name:
        # 'projects/{project_id}/locations/{region}/studies/{study_id}/trials/{trial_id}'
        trial_name_split = FLAGS.trial_name.split('/')
        project_id = trial_name_split[1]
        region = trial_name_split[3]
        study_id = trial_name_split[-3]
        trial_id = trial_name_split[-1]
        logging.info('project_id: %s, region: %s, study_id: %s, trial_id: %s',
                     project_id, region, study_id, trial_id)
        ml_client = optimizer_client.create_or_load_study(
            project_id, region, study_id, json.loads(FLAGS.study_config))
    for epoch in range(FLAGS.epochs):
        logging.info('Epoch %s', epoch)
        start_time = time.time()
        train(tm_model, x_sequence, y_label, train_batches, criterion,
              optimizer, device, FLAGS.print_step)
        val_f1 = val(tm_model, x_sequence, y_label, val_batches, device)
        test(tm_model, x_sequence, y_label, test_batches, device)
        if FLAGS.use_optimizer:
            elapsed_secs = int(time.time() - start_time)
            metric_list = [{'metric': 'valf1', 'value': float(val_f1)}]
            ml_client.report_intermediate_objective_value(
                epoch, elapsed_secs, metric_list, trial_id)

    logging.info('Experiment finished.')

    if FLAGS.save_model:
        filename = '%s_%s_%s' % (FLAGS.task, FLAGS.model, FLAGS.name)
        utils.save_model(tm_model, optimizer, FLAGS.local_path_out, filename)
        utils_gcs.upload_files_to_gcs(local_folder=FLAGS.local_path_out,
                                      gcs_path=FLAGS.gcs_path_out)
예제 #18
0
def main():
    batch_size = 1
    start = 0
    end = 100
    # read data
    df_data = pd.read_csv('data/' + FLAGS.dataset + '.csv')
    # split train/val/test
    if FLAGS.dataset == 'tree7':
        train_size = 2500
        validate_size = 1000
    if FLAGS.dataset == 'DJI':
        train_size = 2500
        validate_size = 1500
    if FLAGS.dataset == 'traffic':
        train_size = 1200
        validate_size = 200
    if FLAGS.dataset == 'arfima':
        train_size = 2000
        validate_size = 1200
    rmse_list = []
    mae_list = []
    for i in range(start, end):
        seed = i
        print('seed ----------------------------------', seed)
        x = np.array(df_data['x'])
        y = np.array(df_data['x'])
        x = x.reshape(-1, FLAGS.input_size)
        y = y.reshape(-1, FLAGS.output_size)
        # normalize the data
        scaler = MinMaxScaler(feature_range=(0, 1))
        x = scaler.fit_transform(x)
        y = scaler.fit_transform(y)
        # use this function to prepare the data for modeling
        data_x, data_y = create_dataset(x, y)

        # split into train and test sets
        train_x, train_y = data_x[0:train_size], data_y[0:train_size]
        validate_x, validate_y = data_x[train_size:train_size +
                                                    validate_size], \
                                 data_y[train_size:train_size +
                                                   validate_size]
        test_x, test_y = data_x[train_size + validate_size:len(data_y)], \
                         data_y[train_size + validate_size:len(data_y)]

        # reshape input to be [time steps,samples,features]
        train_x = np.reshape(train_x,
                             (train_x.shape[0], batch_size, FLAGS.input_size))
        validate_x = np.reshape(
            validate_x, (validate_x.shape[0], batch_size, FLAGS.input_size))

        test_x = np.reshape(test_x,
                            (test_x.shape[0], batch_size, FLAGS.input_size))
        train_y = np.reshape(train_y,
                             (train_y.shape[0], batch_size, FLAGS.output_size))
        validate_y = np.reshape(
            validate_y, (validate_y.shape[0], batch_size, FLAGS.output_size))
        test_y = np.reshape(test_y,
                            (test_y.shape[0], batch_size, FLAGS.output_size))

        torch.manual_seed(seed)
        # initialize model
        if FLAGS.algorithm == 'RNN':
            model = models.RNN(input_size=FLAGS.input_size,
                               hidden_size=FLAGS.hidden_size,
                               output_size=FLAGS.output_size)
        elif FLAGS.algorithm == 'LSTM':
            model = models.LSTM(input_size=FLAGS.input_size,
                                hidden_size=FLAGS.hidden_size,
                                output_size=FLAGS.output_size)
        elif FLAGS.algorithm == 'mRNN_fixD':
            model = models.MRNNFixD(input_size=FLAGS.input_size,
                                    hidden_size=FLAGS.hidden_size,
                                    output_size=FLAGS.output_size,
                                    k=FLAGS.K)
        elif FLAGS.algorithm == 'mRNN':
            model = models.MRNN(input_size=FLAGS.input_size,
                                hidden_size=FLAGS.hidden_size,
                                output_size=FLAGS.output_size,
                                k=FLAGS.K)
        elif FLAGS.algorithm == 'mLSTM_fixD':
            model = models.MLSTMFixD(input_size=FLAGS.input_size,
                                     hidden_size=FLAGS.hidden_size,
                                     output_size=FLAGS.output_size,
                                     k=FLAGS.K)
        elif FLAGS.algorithm == 'mLSTM':
            model = models.MLSTM(input_size=FLAGS.input_size,
                                 hidden_size=FLAGS.hidden_size,
                                 output_size=FLAGS.output_size,
                                 k=FLAGS.K)
        else:
            print('Algorithm selection ERROR!!!')
        criterion = nn.MSELoss()
        optimizer = optim.Adam(model.parameters(), lr=FLAGS.lr)
        best_loss = np.infty
        best_train_loss = np.infty
        stop_criterion = 1e-5
        rec = np.zeros((FLAGS.epochs, 3))
        epoch = 0
        val_loss = -1
        train_loss = -1
        cnt = 0

        def train():
            model.train()
            optimizer.zero_grad()
            target = torch.from_numpy(train_y).float()
            output, hidden_state = model(torch.from_numpy(train_x).float())
            with torch.no_grad():
                val_y, _ = model(
                    torch.from_numpy(validate_x).float(), hidden_state)
                target_val = torch.from_numpy(validate_y).float()
                val_loss = criterion(val_y, target_val)

            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            return loss, val_loss

        def compute_test(best_model):
            model = best_model
            train_predict, hidden_state = model(to_torch(train_x))
            train_predict = train_predict.detach().numpy()
            val_predict, hidden_state = model(to_torch(validate_x),
                                              hidden_state)
            test_predict, _ = model(to_torch(test_x), hidden_state)
            test_predict = test_predict.detach().numpy()
            # invert predictions
            test_predict_r = scaler.inverse_transform(test_predict[:, 0, :])
            test_y_r = scaler.inverse_transform(test_y[:, 0, :])
            # calculate error
            test_rmse = math.sqrt(
                mean_squared_error(test_y_r[:, 0], test_predict_r[:, 0]))
            test_mape = (abs((test_predict_r[:, 0] - test_y_r[:, 0]) /
                             test_y_r[:, 0])).mean()
            test_mae = mean_absolute_error(test_predict_r[:, 0], test_y_r[:,
                                                                          0])
            return test_rmse, test_mape, test_mae

        while epoch < FLAGS.epochs:
            _time = time.time()
            loss, val_loss = train()
            if val_loss < best_loss:
                best_loss = val_loss
                best_epoch = epoch
                best_model = deepcopy(model)
            # stop_criteria = abs(criterion(val_Y, target_val) - val_loss)
            if (best_train_loss - loss) > stop_criterion:
                best_train_loss = loss
                cnt = 0
            else:
                cnt += 1
            if cnt == FLAGS.patience:
                break
            # save training records
            time_elapsed = time.time() - _time
            rec[epoch, :] = np.array([loss, val_loss, time_elapsed])
            print("epoch: {:2.0f} train_loss: {:2.5f} val_loss: {:2.5f} "
                  "time: {:2.1f}s".format(epoch, loss.item(), val_loss.item(),
                                          time_elapsed))
            epoch = epoch + 1

        # make predictions
        test_rmse, test_mape, test_mae = compute_test(best_model)

        rmse_list.append(test_rmse)
        mae_list.append(test_mae)
        print('RMSE:{}'.format(rmse_list))
        print('MAE:{}'.format(mae_list))
예제 #19
0
import torch
import datasets
import models

dataset_loader = datasets.uiuc_video(1, batch_size=8)
epoch_size = 200
device = 'cuda:0'
model = models.RNN()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(),
                            lr=1e-4,
                            momentum=0.8,
                            weight_decay=1e-2)
예제 #20
0
    train_loaders.append(temp)

# train_loader = DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True)
validation_loader = DataLoader(dataset=validation_dataset,
                               batch_size=args.batch_size,
                               shuffle=False)

# Model, loss, and optimizer
if args.model == 'lstm':
    model = models.LSTM(args.input_size, args.hidden_size, args.num_layers,
                        args.num_classes, args.noise_std).to(device)
elif args.model == 'gru':
    model = models.GRU(args.input_size, args.hidden_size, args.num_layers,
                       args.num_classes, args.noise_std).to(device)
elif args.model == 'rnn':
    model = models.RNN(args.input_size, args.hidden_size, args.num_layers,
                       args.num_classes, args.noise_std).to(device)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

# Train the model
# total_step = len(train_loader)
# total_train_step = len(train_loader)
# total_val_step = len(validation_loader)


def evaluate(dataloader):
    total_loss = 0.0
    num_batches = 0.0
    model.eval()
    with torch.no_grad():
예제 #21
0
    print("Running on " + str(device))

    #Switch between RNN and GRU
    model_type = 'GRU'

    if (model_type == 'RNN'):

        #Generate samples using RNN
        dir = 'models/rnn/best_params.pt'

        print("RNN model loaded.")

        model = models.RNN(emb_size=200,
                           hidden_size=1500,
                           seq_len=35,
                           batch_size=10,
                           vocab_size=10000,
                           num_layers=2,
                           dp_keep_prob=0.35)

    elif (model_type == 'GRU'):

        #Generate samples using RNN
        dir = 'models/gru/best_params.pt'

        print("GRU model loaded.")

        model = models.GRU(emb_size=200,
                           hidden_size=1500,
                           seq_len=35,
                           batch_size=10,
예제 #22
0
def main(gpu, path_corpus, path_config, path_word2vec):
    MAX_EPOCH = 50
    EVAL = 200
    MAX_LENGTH = 70

    config = utils.Config(path_config)
    model_name = config.getstr("model")
    word_dim = config.getint("word_dim")
    state_dim = config.getint("state_dim")
    grad_clip = config.getfloat("grad_clip")
    weight_decay = config.getfloat("weight_decay")
    batch_size = config.getint("batch_size")

    print "[info] CORPUS: %s" % path_corpus
    print "[info] CONFIG: %s" % path_config
    print "[info] PRE-TRAINED WORD EMBEDDINGS: %s" % path_word2vec
    print "[info] MODEL: %s" % model_name
    print "[info] WORD DIM: %d" % word_dim
    print "[info] STATE DIM: %d" % state_dim
    print "[info] GRADIENT CLIPPING: %f" % grad_clip
    print "[info] WEIGHT DECAY: %f" % weight_decay
    print "[info] BATCH SIZE: %d" % batch_size

    path_save_head = os.path.join(
        config.getpath("snapshot"),
        "rnnlm.%s.%s" % (os.path.basename(path_corpus),
                         os.path.splitext(os.path.basename(path_config))[0]))
    print "[info] SNAPSHOT: %s" % path_save_head

    sents_train, sents_val, vocab, ivocab = \
            utils.load_corpus(path_corpus=path_corpus, max_length=MAX_LENGTH)

    if path_word2vec is not None:
        word2vec = utils.load_word2vec(path_word2vec, word_dim)
        initialW = utils.create_word_embeddings(vocab,
                                                word2vec,
                                                dim=word_dim,
                                                scale=0.001)
    else:
        initialW = None

    cuda.get_device(gpu).use()
    if model_name == "rnn":
        model = models.RNN(vocab_size=len(vocab),
                           word_dim=word_dim,
                           state_dim=state_dim,
                           initialW=initialW,
                           EOS_ID=vocab["<EOS>"])
    elif model_name == "lstm":
        model = models.LSTM(vocab_size=len(vocab),
                            word_dim=word_dim,
                            state_dim=state_dim,
                            initialW=initialW,
                            EOS_ID=vocab["<EOS>"])
    elif model_name == "gru":
        model = models.GRU(vocab_size=len(vocab),
                           word_dim=word_dim,
                           state_dim=state_dim,
                           initialW=initialW,
                           EOS_ID=vocab["<EOS>"])
    elif model_name == "bd_lstm":
        model = models.BD_LSTM(vocab_size=len(vocab),
                               word_dim=word_dim,
                               state_dim=state_dim,
                               initialW=initialW,
                               EOS_ID=vocab["<EOS>"])
    else:
        print "[error] Unknown model name: %s" % model_name
        sys.exit(-1)
    model.to_gpu(gpu)

    opt = optimizers.SMORMS3()
    opt.setup(model)
    opt.add_hook(chainer.optimizer.GradientClipping(grad_clip))
    opt.add_hook(chainer.optimizer.WeightDecay(weight_decay))

    print "[info] Evaluating on the validation sentences ..."
    loss_data, acc_data = evaluate(model, model_name, sents_val, ivocab)
    perp = math.exp(loss_data)
    print "[validation] iter=0, epoch=0, perplexity=%f, accuracy=%.2f%%" \
        % (perp, acc_data*100)

    it = 0
    n_train = len(sents_train)
    vocab_size = model.vocab_size
    for epoch in xrange(1, MAX_EPOCH + 1):
        perm = np.random.permutation(n_train)
        for data_i in xrange(0, n_train, batch_size):
            if data_i + batch_size > n_train:
                break
            words = sents_train[perm[data_i:data_i + batch_size]]

            if model_name == "bd_lstm":
                xs, ms = utils.make_batch(words,
                                          train=True,
                                          tail=False,
                                          mask=True)
                ys = model.forward(xs=xs, ms=ms, train=True)
            else:
                xs = utils.make_batch(words, train=True, tail=False)
                ys = model.forward(ts=xs, train=True)

            ys = F.concat(ys, axis=0)
            ts = F.concat(xs, axis=0)
            ys = F.reshape(ys, (-1, vocab_size))  # (TN, |V|)
            ts = F.reshape(ts, (-1, ))  # (TN,)

            loss = F.softmax_cross_entropy(ys, ts)
            acc = F.accuracy(ys, ts, ignore_label=-1)

            model.zerograds()
            loss.backward()
            loss.unchain_backward()
            opt.update()
            it += 1

            loss_data = float(cuda.to_cpu(loss.data))
            perp = math.exp(loss_data)
            acc_data = float(cuda.to_cpu(acc.data))
            print "[training] iter=%d, epoch=%d (%d/%d=%.03f%%), perplexity=%f, accuracy=%.2f%%" \
                    % (it, epoch, data_i+batch_size, n_train,
                        float(data_i+batch_size)/n_train*100,
                        perp, acc_data*100)

            if it % EVAL == 0:
                print "[info] Evaluating on the validation sentences ..."
                loss_data, acc_data = evaluate(model, model_name, sents_val,
                                               ivocab)
                perp = math.exp(loss_data)
                print "[validation] iter=%d, epoch=%d, perplexity=%f, accuracy=%.2f%%" \
                        % (it, epoch, perp, acc_data*100)

                serializers.save_npz(
                    path_save_head + ".iter_%d.epoch_%d.model" % (it, epoch),
                    model)
                utils.save_word2vec(
                    path_save_head + ".iter_%d.epoch_%d.vectors.txt" %
                    (it, epoch), utils.extract_word2vec(model, vocab))
                print "[info] Saved."

    print "[info] Done."
예제 #23
0
def run_experiment(args, seed):
    if args.train is not None:
        train_dl = MTCDataLoader(args.train)
        if args.dev is None:
            train_data, dev_data = train_dl.train_test_split(
                test_size=args.devsize)
        else:
            dev_dl = MTCDataLoader(args.dev)
            train_data = list(train_dl.sequences())
            dev_data = list(dev_dl.sequences())
        if args.test is not None:
            test_dl = MTCDataLoader(args.test)
            test_data = list(test_dl.sequences())
        else:
            test_data = []
        print(
            f'Train: {len(train_data)}, Dev: {len(dev_data)}, Test: {len(test_data)}'
        )

    elif args.dataconf:
        if args.dataconf not in CONFIGS:
            print(f"Error. {args.dataconf} is not a valid data configuration.",
                  file=sys.stderr)
            print(f"Choose one of: {' '.join(DataConf.confs.keys())}",
                  file=sys.stderr)
            raise SystemExit
        train_data, dev_data, test_data = DataConf().getData(
            args.dataconf, args.devsize, args.testsize, args.cross_class_size)
        if args.test:
            print("Warning. Command line argument --test_data ignored.",
                  file=sys.stderr)

    if args.savetrain:
        MTCDataLoader.writeJSON(args.savetrain, train_data)
    if args.savedev:
        MTCDataLoader.writeJSON(args.savedev, dev_data)
    if args.savetest:
        MTCDataLoader.writeJSON(args.savetest, test_data)

    cat_encoders = [CategoricalEncoder(f) for f in args.categorical_features]
    scaler = (StandardScaler if args.scaler == 'zscore' else
              MinMaxScaler if args.scaler == 'minmax' else IdentityScaler)
    cont_encoders = [
        ContinuousEncoder(f, scaler=scaler) for f in args.continuous_features
    ]
    encoders = cat_encoders + cont_encoders

    train_selector, dev_selector = None, None
    if args.precompute_examples:
        if args.example_type == 'pairs':
            train_selector = samplers.PairSelector(
                pos_neg_ratio=args.pn_ratio, random_sample=args.sample_ratio)
            dev_selector = samplers.PairSelector(pos_neg_ratio=args.pn_ratio)
        else:
            train_selector = samplers.TripletSelector(
                sample_ratio=args.sample_ratio)
            dev_selector = samplers.TripletSelector(
                sample_ratio=args.sample_ratio)

    dataset_constructor = (datasets.Dataset if args.online_sampler else
                           datasets.DupletDataset if args.example_type
                           == 'pairs' else datasets.TripletDataset)

    train = dataset_constructor(train_data,
                                *encoders,
                                batch_size=args.batch_size,
                                selector=train_selector,
                                label='tunefamily',
                                train=True).fit()

    dev = dataset_constructor(dev_data,
                              *encoders,
                              batch_size=args.batch_size,
                              selector=dev_selector,
                              label='tunefamily',
                              train=False).fit()

    if args.precompute_examples:
        print(train_selector)
        print(dev_selector)

    collate_fn = datasets.collate_fn

    if args.balanced_batch_sampler:
        train_batch_sampler = datasets.BalancedBatchSampler(
            train.labels, n_classes=args.n_classes, n_samples=args.n_samples)
        dev_batch_sampler = datasets.BalancedBatchSampler(
            dev.labels, n_classes=args.n_classes, n_samples=args.n_samples)
        train_loader = DataLoader(train,
                                  batch_sampler=train_batch_sampler,
                                  collate_fn=collate_fn,
                                  num_workers=args.n_workers)
        dev_loader = DataLoader(dev,
                                batch_sampler=dev_batch_sampler,
                                collate_fn=collate_fn,
                                num_workers=args.n_workers)
    elif not args.precompute_examples:
        train_loader = DataLoader(train,
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  collate_fn=collate_fn,
                                  num_workers=args.n_workers)
        dev_loader = DataLoader(dev,
                                batch_size=args.batch_size,
                                collate_fn=collate_fn,
                                num_workers=args.n_workers)
    else:
        train_loader, dev_loader = datasets.DataLoader(
            train), datasets.DataLoader(dev)

    device = 'cuda' if args.cuda else 'cpu'

    emb_dims = [(encoder.size(), args.emb_dim) for encoder in cat_encoders]
    if args.model.lower() == 'rnn':
        network = models.RNN(emb_dims,
                             args.hid_dim,
                             cont_features=len(cont_encoders),
                             n_layers=args.n_layers,
                             cell=args.cell,
                             dropout=args.dropout,
                             bidirectional=args.bidirectional)
    elif args.model.lower() == 'cnn':
        network = models.CNN(emb_dims,
                             cont_features=len(cont_encoders),
                             kernel_sizes=tuple(args.kernel_sizes),
                             highway_layers=args.highway_layers,
                             out_channels=args.out_channels,
                             dropout=args.dropout)
    else:
        network = models.CNNRNN(emb_dims,
                                cont_features=len(cont_encoders),
                                kernel_sizes=tuple(args.kernel_sizes),
                                highway_layers=args.highway_layers,
                                out_channels=args.out_channels,
                                dropout=args.dropout,
                                cell=args.cell,
                                bidirectional=args.bidirectional,
                                n_layers=args.n_layers)

    if args.example_type == 'pairs':
        if not args.online_sampler:
            if args.loss == 'cosine':
                loss_fn = models.CosinePairLoss(weight=args.weight,
                                                margin=args.margin)
            else:
                loss_fn = models.EuclideanPairLoss(margin=args.margin)
            model = models.TwinNetwork(network,
                                       loss_fn).to(device)  # margin 0.16, 0.4
        else:
            if args.loss == 'cosine':
                loss_fn = models.OnlineCosinePairLoss(
                    samplers.HardNegativePairSelector(),
                    weight=args.weight,
                    margin=args.margin,
                    cutoff=args.cutoff_cosine)
            else:
                loss_fn = models.OnlineEuclideanPairLoss(
                    samplers.HardNegativePairSelector(), margin=args.margin)
            model = models.Network(network, loss_fn).to(device)
    else:
        if not args.online_sampler:
            if args.loss == 'cosine':
                loss_fn = models.CosineTripletLoss(margin=args.margin)
            else:
                loss_fn = models.EuclidianTripletLoss(margin=args.margin)
            model = models.TripletNetwork(network, loss_fn).to(device)
        else:
            if args.loss == 'cosine':
                loss_fn = models.OnlineCosineTripletLoss(
                    samplers.NegativeTripletSelector(
                        method=args.negative_pair_selector,
                        margin=args.margin),
                    margin=args.margin)
            else:
                loss_fn = models.OnlineEuclideanTripletLoss(
                    samplers.NegativeTripletSelector(
                        method=args.negative_pair_selector,
                        margin=args.margin),
                    margin=args.margin)
            model = models.Network(network, loss_fn).to(device)

    print(model)

    for embedding in model.network.embs:
        embedding.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        'min',
        verbose=True,
        patience=args.lr_scheduler,
        threshold=1e-4,
        cooldown=5)
    print(
        f'Number of parameters: {sum(p.nelement() for p in model.parameters())}'
    )

    try:
        early_stop = fit_model(train_loader,
                               dev_loader,
                               model,
                               optimizer,
                               scheduler,
                               args.epochs,
                               args.log_interval,
                               plot=False,
                               patience=args.patience,
                               early_stop_score=args.early_stop_score,
                               eval_metric=args.loss)
        best_score = early_stop['best']
        fails = early_stop['fails']
        best_params = early_stop['best_params']
        val_scores = early_stop['val_scores']
    except EarlyStopException as e:
        print("Early stopping training")
        best_score = e.best
        fails = e.fails
        best_params = e.best_params
        val_scores = e.val_scores

    model.load_state_dict(best_params)
    model.eval()
    # serialize model if necessary

    print("Best", args.early_stop_score, best_score)

    if args.save_encodings is not None and args.dev is not None:
        utils.save_encodings(dev, model, args.save_encodings, 'dev')

    if args.test is not None:
        train_label_set = list(set(train_loader.dataset.labels))
        test = dataset_constructor(test_data,
                                   *encoders,
                                   batch_size=args.batch_size,
                                   label='tunefamily',
                                   train=False).fit()
        test_scores = metrics.evaluate_ranking(model,
                                               test,
                                               train_label_set=train_label_set,
                                               metric=args.loss)
        message = 'Testing:\n'
        message += f'  silouhette: {test_scores["silhouette"]:.3f}\n'
        message += f'  MAP: {test_scores["MAP"]:.3f}\n'
        message += f'  MAP (seen): {test_scores["MAP seen labels"]:.3f}\n'
        message += f'  MAP (unseen): {test_scores["MAP unseen labels"]:.3f}\n'
        message += f'  Margin: {test_scores["margin_score"]:.3f}'
        print(message)

    with open(f'{args.results_dir}/{args.results_path}', 'a+') as f:
        f.write(
            json.dumps(
                {
                    "params": vars(args),
                    "dev_score": float(best_score),
                    "val_scores": val_scores,
                    "test_scores":
                    test_scores if args.test is not None else {},
                    "fails": fails,
                    "seed": seed,
                    "now": str(datetime.now())
                },
                cls=NumpyEncoder) + '\n')

    if args.save_encodings is not None and args.test is not None:
        utils.save_encodings(test, model, args.save_encodings, 'test')

    return model