def parse_args(args):
    parser = argparse.ArgumentParser("""
            Plot for the simulation convergence check: what if num features grows
            """)
    parser.add_argument('--result-folder',
                        type=str,
                        default="simulation_convergence_num_p")
    parser.add_argument(
        '--spinn-template',
        type=str,
        default="_output/seed_%d/num_train_200/num_p_%d/fitted_spinn.pkl")
    parser.add_argument(
        '--file-template',
        type=str,
        default="_output/seed_%d/num_train_200/num_p_%d/fitted_spinn.csv")
    parser.add_argument('--num-ps',
                        type=str,
                        default=make_params([25, 50, 100, 200, 400]))
    parser.add_argument('--lasso-ratio', type=float, default=0.1)
    parser.add_argument('--max-relevant-idx', type=int, default=6)
    parser.add_argument('--seeds', type=str, default=make_params(range(1, 21)))
    parser.add_argument('--out-plot',
                        type=str,
                        default="_output/plot_simulation_mse.png")
    parser.add_argument('--out-weight-plot',
                        type=str,
                        default="_output/plot_simulation_weights.png")
    parser.set_defaults()
    args = parser.parse_args(args)
    args.seeds = process_params(args.seeds, int)
    args.num_ps = process_params(args.num_ps, int)
    return args
예제 #2
0
def parse_args(args):
    parser = argparse.ArgumentParser("Plot for the main simulations")
    parser.add_argument(
        '--result-folder',
        type=str,
        default="simulation_alpha")
    parser.add_argument(
        '--lasso',
        type=str,
        default=make_params(np.arange(-.5, -6, step=-.5)))
    parser.add_argument(
        '--group-lasso',
        type=str,
        default=make_params(np.arange(-.5, -6, step=-.5)))
    parser.add_argument(
        '--max-relevant-idx',
        type=int,
        default=6)
    parser.add_argument(
        '--seeds',
        type=str,
        default="4,5,6,7,8,9,10,11")
    parser.add_argument(
        '--file-template',
        type=str,
        default="_output/seed_%d/group_lasso_%.2f/lasso_%.2f/fitted_spinn.%s")
    parser.add_argument(
        '--out-mse-plot',
        type=str,
        default="_output/plot_alpha_mse.png")
    parser.add_argument(
        '--out-irrelev-weight-plot',
        type=str,
        default="_output/plot_alpha_irrelev_weight.png")
    parser.add_argument(
        '--out-relev-weight-plot',
        type=str,
        default="_output/plot_alpha_relev_weight.png")
    parser.add_argument(
        '--out-nonzero-hidden-plot',
        type=str,
        default="_output/plot_alpha_nonzero_hidden.png")
    parser.add_argument(
        '--out-nonzero-inputs-plot',
        type=str,
        default="_output/plot_alpha_nonzero_inputs.png")
    parser.set_defaults()
    args = parser.parse_args(args)
    args.group_lasso = process_params(args.group_lasso, float)
    args.lasso = process_params(args.lasso, float)
    args.seeds = process_params(args.seeds, int)
    return args
예제 #3
0
class MultiplicativeLSTM(Benchmark):
    # parameters taken from the paper
    default_params = dict(batch_size=3,
                          input_size=100,
                          hidden_size=400,
                          embed_size=400,
                          cuda=True)
    params = make_params(cuda=over(True, False))

    def prepare(self, p):
        def cast(tensor):
            return tensor.cuda() if p.cuda else tensor

        self.input = Variable(cast(torch.randn(p.batch_size, p.input_size)))
        self.hiddens = (Variable(cast(torch.randn(p.batch_size,
                                                  p.hidden_size))),
                        Variable(cast(torch.randn(p.batch_size,
                                                  p.hidden_size))))
        self.w_xm = Variable(cast(torch.randn(p.embed_size, p.input_size)))
        self.w_hm = Variable(cast(torch.randn(p.embed_size, p.hidden_size)))
        self.w_ih = Variable(cast(torch.randn(4 * p.hidden_size,
                                              p.input_size)))
        self.w_mh = Variable(cast(torch.randn(4 * p.hidden_size,
                                              p.embed_size)))

    def time(self, p):
        # TODO: this is totally bogus
        h = self.hiddens
        for i in range(N_ITER):
            # TODO: Don't keep using the same input
            h = mlstm.MultiplicativeLSTMCell(self.input, h, self.w_xm,
                                             self.w_hm, self.w_ih, self.w_mh)
예제 #4
0
def parse_args(args):
    parser = argparse.ArgumentParser("Plot riboflavin")
    parser.add_argument('--result-folder', type=str, default="riboflavin")
    parser.add_argument(
        '--spinn-file-template',
        type=str,
        default="extractor_1/prop_0.20/seed_%d/relu_0/fitted_spinn.pkl")
    parser.add_argument(
        '--nn-file-template',
        type=str,
        #default="_output/seed_%d/relu_0/layer_10,10:2,10:4/fitted_%s.csv")
        default="_output/seed_%d/relu_0/layer_3,10/fitted_%s.csv")
    parser.add_argument('--file-template',
                        type=str,
                        default="_output/seed_%d/fitted_%s.csv")
    parser.add_argument('--methods',
                        type=str,
                        default="lasso,trees,spinn,spam,ridge_nn")
    parser.add_argument('--seeds',
                        type=str,
                        default=make_params(range(40, 70)))
    parser.set_defaults()
    args = parser.parse_args(args)
    args.methods = args.methods.split(",")
    args.seeds = process_params(args.seeds, int)
    return args
예제 #5
0
def parse_args(args):
    parser = argparse.ArgumentParser("Plot for the simulation convergence check")
    parser.add_argument(
        '--result-folder',
        type=str,
        default="simulation_convergence_num_obs")
    parser.add_argument(
        '--spinn-template',
        type=str,
        default="_output/seed_%d/n_train_%d/fitted_spinn.pkl")
    parser.add_argument(
        '--file-template',
        type=str,
        default="_output/seed_%d/n_train_%d/fitted_spinn.csv")
    parser.add_argument(
        '--n-trains',
        type=str,
        default=make_params([100, 200, 400, 800, 1600, 3200]))
    parser.add_argument(
        '--lasso-ratio',
        type=float,
        default=0.1)
    parser.add_argument(
        '--max-relevant-idx',
        type=int,
        default=6)
    parser.add_argument(
        '--seeds',
        type=str,
        default=make_params(range(11,31)))
    parser.add_argument(
        '--out-plot',
        type=str,
        default="_output/plot_simulation_mse.png")
    parser.add_argument(
        '--out-weight-plot',
        type=str,
        default="_output/plot_simulation_weights.png")
    parser.set_defaults()
    args = parser.parse_args(args)
    args.seeds = process_params(args.seeds, int)
    args.n_trains = process_params(args.n_trains, int)
    return args
예제 #6
0
def parse_args(args):
    parser = argparse.ArgumentParser("Plot for the main simulations")
    parser.add_argument('--result-folder',
                        type=str,
                        default="simulation_univar_additive")
    parser.add_argument(
        '--spinn-template',
        type=str,
        #default="_output/seed_%d/n_train_%d/fitted_spinn.pkl")
        default="_output/seed_%d/n_train_%d/fitted_spinn.pkl")
    parser.add_argument('--file-template',
                        type=str,
                        default="_output/seed_%d/n_train_%d/fitted_%s.csv")
    parser.add_argument(
        '--methods',
        type=str,
        default="spam,lasso,spinn,gam,trees,ridge_nn,oracle_nn")
    parser.add_argument('--max-relevant-idx', type=int, default=6)
    parser.add_argument(
        '--n-trains',
        type=str,
        #default=make_params([125, 250, 500, 1000]))
        default=make_params([125, 250, 500, 1000, 2000, 4000, 8000]))
    parser.add_argument('--seeds', type=str, default=make_params(range(2, 22)))
    parser.add_argument('--out-plot',
                        type=str,
                        default="_output/plot_simulation_mse.png")
    parser.add_argument('--out-weight-plot',
                        type=str,
                        default="_output/plot_simulation_weights.png")
    parser.add_argument('--show-legend', action='store_true')
    parser.set_defaults()
    args = parser.parse_args(args)
    args.methods = args.methods.split(",")
    args.seeds = process_params(args.seeds, int)
    args.n_trains = process_params(args.n_trains, int)
    return args
예제 #7
0
def parse_args(args):
    parser = argparse.ArgumentParser("Plot peptide results")
    parser.add_argument(
        '--result-folder',
        type=str,
        default="peptide_binding/_output")
    parser.add_argument(
        '--hla',
        type=str,
        default="A")
    parser.add_argument(
        '--spinn-file-template',
        type=str,
        default="extractor_1/prop_0.20/seed_%d/relu_0/fitted_spinn.pkl")
    parser.add_argument(
        '--nn-file-template',
        type=str,
        default="extractor_1/prop_0.20/seed_%d/relu_0/fitted_%s.csv")
        #default="extractor_1/prop_0.20/seed_%d/relu_0/fitted_%s_final.csv")
    parser.add_argument(
        '--file-template',
        type=str,
        default="extractor_1/prop_0.20/seed_%d/fitted_%s.csv")
    parser.add_argument(
        '--methods',
        type=str,
        default="lasso,trees,spam,spinn,ridge_nn")
    parser.add_argument(
        '--seeds',
        type=str,
        default=make_params(range(3,23)))
    parser.set_defaults()
    args = parser.parse_args(args)
    args.methods = args.methods.split(",")
    args.seeds = process_params(args.seeds, int)
    return args
예제 #8
0
class BNLSTM(Benchmark):
    default_params = dict(hidden_size=100,
                          max_length=784,
                          pmnist=False,
                          num_batches=1)
    params = make_params(cuda=over(True, False))

    def prepare(self, p):
        # The CPU version is slow...
        p['batch_size'] = 20 if p.cuda else 5

        class Model(nn.Module):
            def __init__(self):
                super(Model, self).__init__()
                self.rnn = bnlstm.LSTM(cell_class=bnlstm.BNLSTMCell,
                                       input_size=1,
                                       hidden_size=p.hidden_size,
                                       batch_first=True,
                                       max_length=p.max_length)
                self.fc = nn.Linear(in_features=p.hidden_size,
                                    out_features=10)  # 10 digits in mnist

            def forward(self, data):
                hx = None
                if not p.pmnist:
                    h0 = Variable(
                        data.data.new(data.size(0),
                                      p.hidden_size).normal_(0, 0.1))
                    c0 = Variable(
                        data.data.new(data.size(0),
                                      p.hidden_size).normal_(0, 0.1))
                    hx = (h0, c0)
                _, (h_n, _) = self.rnn(input_=data, hx=hx)
                logits = self.fc(h_n[0])
                return logits

        def cast(tensor):
            return tensor.cuda() if p.cuda else tensor

        self.model = Model()
        self.criterion = nn.CrossEntropyLoss()
        self.data_batches = [
            Variable(cast(torch.zeros(p.batch_size, 28 * 28, 1)))
            for _ in range(p.num_batches)
        ]
        self.target_batches = [
            Variable(cast(torch.zeros(p.batch_size)).long())
            for _ in range(p.num_batches)
        ]
        if p.cuda:
            self.model.cuda()
            self.criterion.cuda()

    def time(self, p):
        total_loss = 0
        for data, targets in zip(self.data_batches, self.target_batches):
            logits = self.model(data)
            loss = self.criterion(input=logits, target=targets)
            loss.backward()
            total_loss += loss.data  # CUDA sync point
        if p.cuda:
            torch.cuda.synchronize()
예제 #9
0
class WLM(Benchmark):
    default_params = dict(rnn_type='LSTM',
                          num_tokens=10000,
                          embedding_size=200,
                          hidden_size=200,
                          num_layers=2,
                          batch_size=20,
                          bptt=35,
                          dropout=0.5,
                          num_batches=10,
                          cuda=True)
    params = make_params(cuda=over(True, False))

    def prepare(self, p):
        def get_rnn():
            if p.rnn_type in ['LSTM', 'GRU']:
                return getattr(nn, p.rnn_type)(p.embedding_size,
                                               p.hidden_size,
                                               p.num_layers,
                                               dropout=p.dropout)
            else:
                nonlinearity = {
                    'RNN_TANH': 'tanh',
                    'RNN_RELU': 'relu'
                }[p.rnn_type]
                return nn.RNN(p.embedding_size,
                              p.hidden_size,
                              p.num_layers,
                              nonlinearity=nonlinearity,
                              dropout=p.dropout)

        class Model(nn.Module):
            def __init__(self):
                super(Model, self).__init__()
                self.drop = nn.Dropout(p.dropout)
                self.rnn = get_rnn()
                self.encoder = nn.Embedding(p.num_tokens, p.embedding_size)
                self.decoder = nn.Linear(p.hidden_size, p.num_tokens)

            def forward(self, input):
                emb = self.drop(self.encoder(input))
                output, hidden = self.rnn(emb)
                output = self.drop(output)
                decoded = self.decoder(
                    output.view(
                        output.size(0) * output.size(1), output.size(2)))
                return decoded.view(output.size(0), output.size(1),
                                    decoded.size(1)), hidden

        def cast(tensor):
            return tensor.long().cuda() if p.cuda else tensor.long()

        self.model = Model()
        self.criterion = nn.CrossEntropyLoss()
        self.data_batches = [
            Variable(cast(torch.zeros(p.bptt, p.batch_size)))
            for _ in range(p.num_batches)
        ]
        self.target_batches = [
            Variable(cast(torch.zeros(p.bptt * p.batch_size)))
            for _ in range(p.num_batches)
        ]
        if p.cuda:
            self.model.cuda()
            self.criterion.cuda()

    def time(self, p):
        total_loss = 0
        for data, targets in zip(self.data_batches, self.target_batches):
            output, _ = self.model(data)
            loss = self.criterion(output.view(-1, output.size(2)), targets)
            loss.backward()
            total_loss += loss.data  # CUDA sync point
        if p.cuda:
            torch.cuda.synchronize()