class ResNet(nn.Module): def __init__(self, block, num_blocks, num_classes=10, name="Model"): super(ResNet, self).__init__() self.in_planes = 64 self.name = name self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(64) self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) self.linear = nn.Linear(512 * block.expansion, num_classes) def _make_layer(self, block, planes, num_blocks, stride): strides = [stride] + [1] * (num_blocks - 1) layers = [] for stride in strides: layers.append(block(self.in_planes, planes, stride)) self.in_planes = planes * block.expansion return nn.Sequential(*layers) def forward(self, x): out = F.relu(self.bn1(self.conv1(x))) out = self.layer1(out) out = self.layer2(out) out = self.layer3(out) out = self.layer4(out) out = F.avg_pool2d(out, 4) out = out.view(out.size(0), -1) out = self.linear(out) out = out.view(out.size(0), -1) return F.log_softmax(out, dim=-1) def summary(self, input_size): summary(self, input_size=input_size) def gotrain(self, optimizer, train_loader, test_loader, epochs, statspath, scheduler=None, batch_scheduler=False, L1lambda=0): self.trainer = ModelTrainer(self, optimizer, train_loader, test_loader, statspath, scheduler, batch_scheduler, L1lambda) self.trainer.run(epochs) def stats(self): return self.trainer.stats if self.trainer else None
def gotrain(self, optimizer, train_loader, test_loader, epochs, statspath, scheduler=None, batch_scheduler=False, L1lambda=0): self.trainer = ModelTrainer(self, optimizer, train_loader, test_loader, statspath, scheduler, batch_scheduler, L1lambda) self.trainer.run(epochs)
class Net(nn.Module): """ Base network that defines helper functions, summary and mapping to device """ def conv2d(self, in_channels, out_channels, kernel_size=(3,3), dilation=1, groups=1, padding=1, bias=False, padding_mode="zeros"): return [nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, groups=groups, dilation=dilation, padding=padding, bias=bias, padding_mode=padding_mode)] def activate(self, l, out_channels, bn=True, dropout=0, relu=True): if bn: l.append(nn.BatchNorm2d(out_channels)) if dropout>0: l.append(nn.Dropout(dropout)) if relu: l.append(nn.ReLU()) return nn.Sequential(*l) def create_conv2d(self, in_channels, out_channels, kernel_size=(3,3), dilation=1, groups=1, padding=1, bias=False, bn=True, dropout=0, relu=True, padding_mode="zeros"): return self.activate(self.conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, groups=groups, dilation=dilation, padding=padding, bias=bias, padding_mode=padding_mode), out_channels, bn, dropout, relu) def create_depthwise_conv2d(self, in_channels, out_channels, kernel_size=(3,3), dilation=1, padding=1, bias=False, bn=True, dropout=0, relu=True, padding_mode="zeros"): return self.activate(self.separable_conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, dilation=dilation, padding=padding, bias=bias, padding_mode=padding_mode), out_channels, bn, dropout, relu) def __init__(self, name="Model"): super(Net, self).__init__() self.trainer = None self.name = name def summary(self, input_size): #input_size=(1, 28, 28) summary(self, input_size=input_size) def gotrain(self, optimizer, train_loader, test_loader, epochs, statspath, scheduler=None, batch_scheduler=False, L1lambda=0): self.trainer = ModelTrainer(self, optimizer, train_loader, test_loader, statspath, scheduler, batch_scheduler, L1lambda) self.trainer.run(epochs) def stats(self): return self.trainer.stats if self.trainer else None
def train(train_path, validation_path, dictionary_path, model_path, reload_state=False, dim_word=100, # word vector dimensionality dim=1000, # the number of LSTM units encoder='lstm', patience=10, max_epochs=5000, dispFreq=100, decay_c=0., alpha_c=0., diag_c=0., lrate=0.01, n_words=100000, maxlen=100, # maximum length of the description optimizer='rmsprop', batch_size = 16, valid_batch_size = 16, validFreq=1000, saveFreq=1000, # save the parameters after every saveFreq updates sampleFreq=100, # generate some text samples after every sampleFreq updates profile=False): # Model options model_options = locals().copy() worddicts = dict() worddicts_r = dict() with open(dictionary_path, 'rb') as f: for (i, line) in enumerate(f): word = line.strip() code = i + 2 worddicts_r[code] = word worddicts[word] = code # reload options if reload_state and os.path.exists(model_path): with open('%s.pkl' % model_path, 'rb') as f: models_options = pkl.load(f) print '### Loading data.' train = TextIterator(train_path, worddicts, n_words_source=n_words, batch_size=batch_size, maxlen=maxlen) valid = TextIterator(validation_path, worddicts, n_words_source=n_words, batch_size=valid_batch_size, maxlen=maxlen) print '### Building neural network.' rnnlm = RNNLM(model_options) trainer = ModelTrainer(rnnlm, optimizer, model_options) sampler = TextSampler(rnnlm, model_options) print '### Training neural network.' best_params = None bad_count = 0 if validFreq == -1: validFreq = len(train[0])/batch_size if saveFreq == -1: saveFreq = len(train[0])/batch_size if sampleFreq == -1: sampleFreq = len(train[0])/batch_size uidx = 0 estop = False for eidx in xrange(max_epochs): n_samples = 0 for x in train: n_samples += len(x) uidx += 1 x, x_mask = prepare_data(x, maxlen=maxlen, n_words=n_words) if x == None: print 'Minibatch with zero sample under length ', maxlen uidx -= 1 continue ud_start = time.time() cost = trainer.f_grad_shared(x, x_mask) trainer.f_update(lrate) ud = time.time() - ud_start if numpy.isnan(cost) or numpy.isinf(cost): print 'NaN detected' return 1., 1., 1. if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud if numpy.mod(uidx, saveFreq) == 0: # Save the best parameters, or the current state if best_params # is None. rnnlm.save_params(best_params) # Save the training options. pkl.dump(model_options, open('%s.pkl' % model_path, 'wb')) if numpy.mod(uidx, sampleFreq) == 0: # FIXME: random selection? for jj in xrange(5): sample, score = sampler.generate() print 'Sample ', jj, ': ', ss = sample for vv in ss: if vv == 0: break if vv in worddicts_r: print worddicts_r[vv], else: print 'UNK', print if numpy.mod(uidx, validFreq) == 0: valid_errs = pred_probs(f_log_probs, prepare_data, model_options, valid) valid_err = valid_errs.mean() rnnlm.error_history.append(valid_err) if uidx == 0 or valid_err <= numpy.array(error_history).min(): best_params = rnnlm.get_param_values() bad_counter = 0 if len(rnnlm.error_history) > patience and valid_err >= numpy.array(rnnlm.error_history)[:-patience].min(): bad_counter += 1 if bad_counter > patience: print 'Early Stop!' estop = True break if numpy.isnan(valid_err): import ipdb; ipdb.set_trace() print 'Valid ', valid_err print 'Seen %d samples'%n_samples if estop: break if best_params is not None: rnnlm.set_param_values(best_params) valid_err = pred_probs(f_log_probs, prepare_data, model_options, valid).mean() print 'Valid ', valid_err params = copy.copy(best_params) numpy.savez(model_path, zipped_params=best_params, error_history=rnnlm.error_history, **params) return valid_err
def train( train_path, validation_path, dictionary_path, model_path, reload_state=False, dim_word=100, # word vector dimensionality dim=1000, # the number of LSTM units encoder='lstm', patience=10, max_epochs=5000, dispFreq=100, decay_c=0., alpha_c=0., diag_c=0., lrate=0.01, n_words=100000, maxlen=100, # maximum length of the description optimizer='rmsprop', batch_size=16, valid_batch_size=16, validFreq=1000, saveFreq=1000, # save the parameters after every saveFreq updates sampleFreq=100, # generate some text samples after every sampleFreq updates profile=False): # Model options model_options = locals().copy() worddicts = dict() worddicts_r = dict() with open(dictionary_path, 'rb') as f: for (i, line) in enumerate(f): word = line.strip() code = i + 2 worddicts_r[code] = word worddicts[word] = code # reload options if reload_state and os.path.exists(model_path): with open('%s.pkl' % model_path, 'rb') as f: models_options = pkl.load(f) print '### Loading data.' train = TextIterator(train_path, worddicts, n_words_source=n_words, batch_size=batch_size, maxlen=maxlen) valid = TextIterator(validation_path, worddicts, n_words_source=n_words, batch_size=valid_batch_size, maxlen=maxlen) print '### Building neural network.' rnnlm = RNNLM(model_options) trainer = ModelTrainer(rnnlm, optimizer, model_options) sampler = TextSampler(rnnlm, model_options) print '### Training neural network.' best_params = None bad_count = 0 if validFreq == -1: validFreq = len(train[0]) / batch_size if saveFreq == -1: saveFreq = len(train[0]) / batch_size if sampleFreq == -1: sampleFreq = len(train[0]) / batch_size uidx = 0 estop = False for eidx in xrange(max_epochs): n_samples = 0 for x in train: n_samples += len(x) uidx += 1 x, x_mask = prepare_data(x, maxlen=maxlen, n_words=n_words) if x == None: print 'Minibatch with zero sample under length ', maxlen uidx -= 1 continue ud_start = time.time() cost = trainer.f_grad_shared(x, x_mask) trainer.f_update(lrate) ud = time.time() - ud_start if numpy.isnan(cost) or numpy.isinf(cost): print 'NaN detected' return 1., 1., 1. if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud if numpy.mod(uidx, saveFreq) == 0: # Save the best parameters, or the current state if best_params # is None. rnnlm.save_params(best_params) # Save the training options. pkl.dump(model_options, open('%s.pkl' % model_path, 'wb')) if numpy.mod(uidx, sampleFreq) == 0: # FIXME: random selection? for jj in xrange(5): sample, score = sampler.generate() print 'Sample ', jj, ': ', ss = sample for vv in ss: if vv == 0: break if vv in worddicts_r: print worddicts_r[vv], else: print 'UNK', print if numpy.mod(uidx, validFreq) == 0: valid_errs = pred_probs(f_log_probs, prepare_data, model_options, valid) valid_err = valid_errs.mean() rnnlm.error_history.append(valid_err) if uidx == 0 or valid_err <= numpy.array(error_history).min(): best_params = rnnlm.get_param_values() bad_counter = 0 if len(rnnlm.error_history ) > patience and valid_err >= numpy.array( rnnlm.error_history)[:-patience].min(): bad_counter += 1 if bad_counter > patience: print 'Early Stop!' estop = True break if numpy.isnan(valid_err): import ipdb ipdb.set_trace() print 'Valid ', valid_err print 'Seen %d samples' % n_samples if estop: break if best_params is not None: rnnlm.set_param_values(best_params) valid_err = pred_probs(f_log_probs, prepare_data, model_options, valid).mean() print 'Valid ', valid_err params = copy.copy(best_params) numpy.savez(model_path, zipped_params=best_params, error_history=rnnlm.error_history, **params) return valid_err