def parse_args(args): parser = argparse.ArgumentParser(""" Plot for the simulation convergence check: what if num features grows """) parser.add_argument('--result-folder', type=str, default="simulation_convergence_num_p") parser.add_argument( '--spinn-template', type=str, default="_output/seed_%d/num_train_200/num_p_%d/fitted_spinn.pkl") parser.add_argument( '--file-template', type=str, default="_output/seed_%d/num_train_200/num_p_%d/fitted_spinn.csv") parser.add_argument('--num-ps', type=str, default=make_params([25, 50, 100, 200, 400])) parser.add_argument('--lasso-ratio', type=float, default=0.1) parser.add_argument('--max-relevant-idx', type=int, default=6) parser.add_argument('--seeds', type=str, default=make_params(range(1, 21))) parser.add_argument('--out-plot', type=str, default="_output/plot_simulation_mse.png") parser.add_argument('--out-weight-plot', type=str, default="_output/plot_simulation_weights.png") parser.set_defaults() args = parser.parse_args(args) args.seeds = process_params(args.seeds, int) args.num_ps = process_params(args.num_ps, int) return args
def parse_args(args): parser = argparse.ArgumentParser("Plot for the main simulations") parser.add_argument( '--result-folder', type=str, default="simulation_alpha") parser.add_argument( '--lasso', type=str, default=make_params(np.arange(-.5, -6, step=-.5))) parser.add_argument( '--group-lasso', type=str, default=make_params(np.arange(-.5, -6, step=-.5))) parser.add_argument( '--max-relevant-idx', type=int, default=6) parser.add_argument( '--seeds', type=str, default="4,5,6,7,8,9,10,11") parser.add_argument( '--file-template', type=str, default="_output/seed_%d/group_lasso_%.2f/lasso_%.2f/fitted_spinn.%s") parser.add_argument( '--out-mse-plot', type=str, default="_output/plot_alpha_mse.png") parser.add_argument( '--out-irrelev-weight-plot', type=str, default="_output/plot_alpha_irrelev_weight.png") parser.add_argument( '--out-relev-weight-plot', type=str, default="_output/plot_alpha_relev_weight.png") parser.add_argument( '--out-nonzero-hidden-plot', type=str, default="_output/plot_alpha_nonzero_hidden.png") parser.add_argument( '--out-nonzero-inputs-plot', type=str, default="_output/plot_alpha_nonzero_inputs.png") parser.set_defaults() args = parser.parse_args(args) args.group_lasso = process_params(args.group_lasso, float) args.lasso = process_params(args.lasso, float) args.seeds = process_params(args.seeds, int) return args
class MultiplicativeLSTM(Benchmark): # parameters taken from the paper default_params = dict(batch_size=3, input_size=100, hidden_size=400, embed_size=400, cuda=True) params = make_params(cuda=over(True, False)) def prepare(self, p): def cast(tensor): return tensor.cuda() if p.cuda else tensor self.input = Variable(cast(torch.randn(p.batch_size, p.input_size))) self.hiddens = (Variable(cast(torch.randn(p.batch_size, p.hidden_size))), Variable(cast(torch.randn(p.batch_size, p.hidden_size)))) self.w_xm = Variable(cast(torch.randn(p.embed_size, p.input_size))) self.w_hm = Variable(cast(torch.randn(p.embed_size, p.hidden_size))) self.w_ih = Variable(cast(torch.randn(4 * p.hidden_size, p.input_size))) self.w_mh = Variable(cast(torch.randn(4 * p.hidden_size, p.embed_size))) def time(self, p): # TODO: this is totally bogus h = self.hiddens for i in range(N_ITER): # TODO: Don't keep using the same input h = mlstm.MultiplicativeLSTMCell(self.input, h, self.w_xm, self.w_hm, self.w_ih, self.w_mh)
def parse_args(args): parser = argparse.ArgumentParser("Plot riboflavin") parser.add_argument('--result-folder', type=str, default="riboflavin") parser.add_argument( '--spinn-file-template', type=str, default="extractor_1/prop_0.20/seed_%d/relu_0/fitted_spinn.pkl") parser.add_argument( '--nn-file-template', type=str, #default="_output/seed_%d/relu_0/layer_10,10:2,10:4/fitted_%s.csv") default="_output/seed_%d/relu_0/layer_3,10/fitted_%s.csv") parser.add_argument('--file-template', type=str, default="_output/seed_%d/fitted_%s.csv") parser.add_argument('--methods', type=str, default="lasso,trees,spinn,spam,ridge_nn") parser.add_argument('--seeds', type=str, default=make_params(range(40, 70))) parser.set_defaults() args = parser.parse_args(args) args.methods = args.methods.split(",") args.seeds = process_params(args.seeds, int) return args
def parse_args(args): parser = argparse.ArgumentParser("Plot for the simulation convergence check") parser.add_argument( '--result-folder', type=str, default="simulation_convergence_num_obs") parser.add_argument( '--spinn-template', type=str, default="_output/seed_%d/n_train_%d/fitted_spinn.pkl") parser.add_argument( '--file-template', type=str, default="_output/seed_%d/n_train_%d/fitted_spinn.csv") parser.add_argument( '--n-trains', type=str, default=make_params([100, 200, 400, 800, 1600, 3200])) parser.add_argument( '--lasso-ratio', type=float, default=0.1) parser.add_argument( '--max-relevant-idx', type=int, default=6) parser.add_argument( '--seeds', type=str, default=make_params(range(11,31))) parser.add_argument( '--out-plot', type=str, default="_output/plot_simulation_mse.png") parser.add_argument( '--out-weight-plot', type=str, default="_output/plot_simulation_weights.png") parser.set_defaults() args = parser.parse_args(args) args.seeds = process_params(args.seeds, int) args.n_trains = process_params(args.n_trains, int) return args
def parse_args(args): parser = argparse.ArgumentParser("Plot for the main simulations") parser.add_argument('--result-folder', type=str, default="simulation_univar_additive") parser.add_argument( '--spinn-template', type=str, #default="_output/seed_%d/n_train_%d/fitted_spinn.pkl") default="_output/seed_%d/n_train_%d/fitted_spinn.pkl") parser.add_argument('--file-template', type=str, default="_output/seed_%d/n_train_%d/fitted_%s.csv") parser.add_argument( '--methods', type=str, default="spam,lasso,spinn,gam,trees,ridge_nn,oracle_nn") parser.add_argument('--max-relevant-idx', type=int, default=6) parser.add_argument( '--n-trains', type=str, #default=make_params([125, 250, 500, 1000])) default=make_params([125, 250, 500, 1000, 2000, 4000, 8000])) parser.add_argument('--seeds', type=str, default=make_params(range(2, 22))) parser.add_argument('--out-plot', type=str, default="_output/plot_simulation_mse.png") parser.add_argument('--out-weight-plot', type=str, default="_output/plot_simulation_weights.png") parser.add_argument('--show-legend', action='store_true') parser.set_defaults() args = parser.parse_args(args) args.methods = args.methods.split(",") args.seeds = process_params(args.seeds, int) args.n_trains = process_params(args.n_trains, int) return args
def parse_args(args): parser = argparse.ArgumentParser("Plot peptide results") parser.add_argument( '--result-folder', type=str, default="peptide_binding/_output") parser.add_argument( '--hla', type=str, default="A") parser.add_argument( '--spinn-file-template', type=str, default="extractor_1/prop_0.20/seed_%d/relu_0/fitted_spinn.pkl") parser.add_argument( '--nn-file-template', type=str, default="extractor_1/prop_0.20/seed_%d/relu_0/fitted_%s.csv") #default="extractor_1/prop_0.20/seed_%d/relu_0/fitted_%s_final.csv") parser.add_argument( '--file-template', type=str, default="extractor_1/prop_0.20/seed_%d/fitted_%s.csv") parser.add_argument( '--methods', type=str, default="lasso,trees,spam,spinn,ridge_nn") parser.add_argument( '--seeds', type=str, default=make_params(range(3,23))) parser.set_defaults() args = parser.parse_args(args) args.methods = args.methods.split(",") args.seeds = process_params(args.seeds, int) return args
class BNLSTM(Benchmark): default_params = dict(hidden_size=100, max_length=784, pmnist=False, num_batches=1) params = make_params(cuda=over(True, False)) def prepare(self, p): # The CPU version is slow... p['batch_size'] = 20 if p.cuda else 5 class Model(nn.Module): def __init__(self): super(Model, self).__init__() self.rnn = bnlstm.LSTM(cell_class=bnlstm.BNLSTMCell, input_size=1, hidden_size=p.hidden_size, batch_first=True, max_length=p.max_length) self.fc = nn.Linear(in_features=p.hidden_size, out_features=10) # 10 digits in mnist def forward(self, data): hx = None if not p.pmnist: h0 = Variable( data.data.new(data.size(0), p.hidden_size).normal_(0, 0.1)) c0 = Variable( data.data.new(data.size(0), p.hidden_size).normal_(0, 0.1)) hx = (h0, c0) _, (h_n, _) = self.rnn(input_=data, hx=hx) logits = self.fc(h_n[0]) return logits def cast(tensor): return tensor.cuda() if p.cuda else tensor self.model = Model() self.criterion = nn.CrossEntropyLoss() self.data_batches = [ Variable(cast(torch.zeros(p.batch_size, 28 * 28, 1))) for _ in range(p.num_batches) ] self.target_batches = [ Variable(cast(torch.zeros(p.batch_size)).long()) for _ in range(p.num_batches) ] if p.cuda: self.model.cuda() self.criterion.cuda() def time(self, p): total_loss = 0 for data, targets in zip(self.data_batches, self.target_batches): logits = self.model(data) loss = self.criterion(input=logits, target=targets) loss.backward() total_loss += loss.data # CUDA sync point if p.cuda: torch.cuda.synchronize()
class WLM(Benchmark): default_params = dict(rnn_type='LSTM', num_tokens=10000, embedding_size=200, hidden_size=200, num_layers=2, batch_size=20, bptt=35, dropout=0.5, num_batches=10, cuda=True) params = make_params(cuda=over(True, False)) def prepare(self, p): def get_rnn(): if p.rnn_type in ['LSTM', 'GRU']: return getattr(nn, p.rnn_type)(p.embedding_size, p.hidden_size, p.num_layers, dropout=p.dropout) else: nonlinearity = { 'RNN_TANH': 'tanh', 'RNN_RELU': 'relu' }[p.rnn_type] return nn.RNN(p.embedding_size, p.hidden_size, p.num_layers, nonlinearity=nonlinearity, dropout=p.dropout) class Model(nn.Module): def __init__(self): super(Model, self).__init__() self.drop = nn.Dropout(p.dropout) self.rnn = get_rnn() self.encoder = nn.Embedding(p.num_tokens, p.embedding_size) self.decoder = nn.Linear(p.hidden_size, p.num_tokens) def forward(self, input): emb = self.drop(self.encoder(input)) output, hidden = self.rnn(emb) output = self.drop(output) decoded = self.decoder( output.view( output.size(0) * output.size(1), output.size(2))) return decoded.view(output.size(0), output.size(1), decoded.size(1)), hidden def cast(tensor): return tensor.long().cuda() if p.cuda else tensor.long() self.model = Model() self.criterion = nn.CrossEntropyLoss() self.data_batches = [ Variable(cast(torch.zeros(p.bptt, p.batch_size))) for _ in range(p.num_batches) ] self.target_batches = [ Variable(cast(torch.zeros(p.bptt * p.batch_size))) for _ in range(p.num_batches) ] if p.cuda: self.model.cuda() self.criterion.cuda() def time(self, p): total_loss = 0 for data, targets in zip(self.data_batches, self.target_batches): output, _ = self.model(data) loss = self.criterion(output.view(-1, output.size(2)), targets) loss.backward() total_loss += loss.data # CUDA sync point if p.cuda: torch.cuda.synchronize()