def train(self, num_epochs): self.print_net_summary() util.log('Starting training...') start_epoch = self.curr_epoch while (self.curr_epoch - start_epoch <= num_epochs and self.should_continue_training()): batch_start = time.time() train_data = self.train_dp.get_next_batch(self.batch_size) self.curr_epoch = train_data.epoch self.curr_batch += 1 input, label = train_data.data, train_data.labels self.net.train_batch(input, label) cost , correct, numCase = self.net.get_batch_information() self.train_outputs += [({'logprob': [cost, 1 - correct]}, numCase, self.elapsed())] print >> sys.stderr, '%d.%d: error: %f logreg: %f time: %f' % (self.curr_epoch, self.curr_batch, 1 - correct, cost, time.time() - batch_start) if self.check_test_data(): self.get_test_error() if self.factor != 1.0 and self.check_adjust_lr(): self.adjust_lr() if self.check_save_checkpoint(): self.save_checkpoint() #self.get_test_error() self.save_checkpoint() self.report() self._finished_training()
def dump(self, checkpoint, suffix=0): if self.checkpoint_dir is None: return cp_pattern = os.path.join(self.checkpoint_dir, '*') cp_files = [(f, os.stat(f)) for f in glob.glob(cp_pattern)] cp_files = list(reversed(sorted(cp_files, key=lambda f: f[1].st_mtime))) while sum([f[1].st_size for f in cp_files]) > self.max_cp_size: os.remove(cp_files.pop()[0]) checkpoint_filename = "%d" % suffix checkpoint_filename = os.path.join(self.checkpoint_dir, checkpoint_filename) util.log('Writing checkpoint to %s', checkpoint_filename) sf = shelve.open(checkpoint_filename, flag='c', protocol=-1, writeback=False) for k, v in checkpoint.iteritems(): sf[k] = v sf.sync() sf.close() util.log('save file finished')
def __init__(self, name, poolSize=2, stride=2, start=0, disableBprop = False): Layer.__init__(self, name, 'pool', disableBprop) self.pool = 'avg' self.poolSize = poolSize self.stride = stride self.start = start util.log("pool_size:%s stride:%s start:%s", self.poolSize, self.stride, self.start)
def __init__(self, name, n_out, epsW=0.001, epsB=0.002, initW=0.01, initB=0.0, momW=0.0, momB=0.0, wc=0.0, dropRate=0.0, weight=None, bias=None, weightIncr=None, biasIncr=None, disableBprop=False): self.outputSize = n_out self.dropRate = dropRate WeightedLayer.__init__(self, name, 'fc', epsW, epsB, initW, initB, momW, momB, wc, weight, bias, weightIncr, biasIncr, disableBprop) util.log( 'output_size:%s epsW:%s epsB:%s initW:%s initB:%s momW:%s momB:%s wc:%s dropRate:%s', self.outputSize, self.epsW, self.epsB, self.initW, self.initB, self.momW, self.momB, self.wc, self.dropRate)
def get_checkpoint(self): if self.checkpoint_dir is None: return None if self.test_id == '': cp_pattern = self.checkpoint_dir else: cp_pattern = os.path.join(self.checkpoint_dir, "*") cp_files = glob.glob(cp_pattern) if not cp_files: util.log_info('Not checkpoint files found in %s' % cp_pattern) return None checkpoint_file = sorted(cp_files, key=os.path.getmtime)[-1] util.log('Loading from checkpoint file: %s', checkpoint_file) try: #return shelve.open(checkpoint_file, flag='r', protocol=-1, writeback=False) return shelve.open(checkpoint_file, flag='r', protocol=-1, writeback=False) except: dict = {} with zipfile.ZipFile(checkpoint_file) as zf: for k in zf.namelist(): dict[k] = cPickle.loads(zf.read(k)) return dict
def __init__(self, name, pow=0.75, size=9, scale=0.001, disableBprop = False): Layer.__init__(self, name, 'rnorm', disableBprop) self.pow = pow self.size = size self.scale = scale self.scaler = self.scale / self.size ** 2 self.denom = None util.log("pow:%s size:%s scale:%s scaler:%s", self.pow, self.size, self.scale, self.scaler)
def __init__(self, name, pow=0.75, size=9, scale=0.001, blocked=False, disableBprop = False): ResponseNormLayer.__init__(self, name, pow, size, scale, disableBprop) self.type = 'cmrnorm' self.scaler = self.scale / self.size self.blocked = blocked util.log("pow:%s size:%s, scale:%s scaler:%s", self.pow, self.size, self.scale, self.scaler)
def attach(self, prev): input_shape = prev.get_output_shape() self.inputSize = int(np.prod(input_shape[0:3])) self.batchSize = input_shape[3] self.weightShape = (self.outputSize, self.inputSize) self.biasShape = (self.outputSize, 1) util.log('%s %s %s', input_shape, self.weightShape, self.biasShape) self._init_weights(self.weightShape, self.biasShape)
def train(self, num_epochs): self.print_net_summary() util.log('Starting training...') start_epoch = self.curr_epoch clear_w = gpuarray.to_gpu(np.eye(self.net.layers[-2].weight.shape[0], dtype = np.float32)) while (self.curr_epoch - start_epoch <= num_epochs and self.should_continue_training()): batch_start = time.time() train_data = self.train_dp.get_next_batch(self.batch_size) self.curr_epoch = train_data.epoch self.curr_batch += 1 input, label = train_data.data, train_data.labels if self.train_dp.is_curr_batch_noisy == False: assert(self.net.layers[-2].name == 'noise') noisy_eps = self.net.layers[-2].epsW self.net.layers[-2].epsW = 0 noisy_w = self.net.layers[-2].weight self.net.layers[-2].weight = clear_w else: if hasattr(self, 'noisy_factor'): self.net.adjust_learning_rate(self.noisy_factor) self.net.train_batch(input, label) if self.train_dp.is_curr_batch_noisy == False: self.net.layers[-2].epsW = noisy_eps self.net.layers[-2].weight = noisy_w else: if hasattr(self, 'noisy_factor'): self.net.adjust_learning_rate(1./self.noisy_factor) cost , correct, numCase = self.net.get_batch_information() self.train_outputs += [({'logprob': [cost, 1 - correct]}, numCase, self.elapsed())] print >> sys.stderr, '%d.%d: error: %f logreg: %f time: %f' % (self.curr_epoch, self.curr_batch, 1 - correct, cost, time.time() - batch_start) if self.check_test_data(): noisy_w = self.net.layers[-2].weight self.net.layers[-2].weight = clear_w self.get_test_error() self.net.layers[-2].weight = noisy_w if self.factor != 1.0 and self.check_adjust_lr(): self.adjust_lr() if self.check_save_checkpoint(): self.save_checkpoint() #self.get_test_error() self.save_checkpoint() self.report() self._finished_training()
def save_checkpoint(self): weight = self.net.get_weight_by_name(self.first_layer_name) self.base_weight = self._get_layer_weight(self.first_layer_name) weight_diff = weight - self.base_weight diff = np.sum(np.abs(weight_diff)) self.diff_list.append(diff) util.log('%f', diff) self._log() trainer.Trainer.save_checkpoint(self)
def get_next_batch(self): self.get_next_index() filename = os.path.join(self.data_dir + '.%s' % self.curr_batch) util.log('reading from %s', filename) data_dic = util.load(filename) data = data_dic[self.data_name].transpose() labels = data_dic['labels'] data = np.require(data, requirements='C', dtype=np.float32) return BatchData(data, labels, self.curr_epoch)
def _finished_training(self): dumper = getattr(self, 'train_layer_output_dumper', None) if dumper != None: cache_outputs(self.net, self.train_dp, dumper, index = -3) else: util.log('There is no dumper for train data') dumper = getattr(self, 'test_layer_output_dumper', None) if dumper != None: cache_outputs(self.net, self.test_dp, dumper, index = -3) else: util.log('There is no dumper for test data')
def __init__(self, name, n_out, epsW=0.001, epsB=0.002, initW=0.01, initB=0.0, momW=0.0, momB=0.0, wc=0.0, dropRate=0.0, weight=None, bias=None, weightIncr = None, biasIncr = None, disableBprop = False): self.outputSize = n_out self.dropRate = dropRate WeightedLayer.__init__(self, name, 'fc', epsW, epsB, initW, initB, momW, momB, wc, weight, bias, weightIncr, biasIncr, disableBprop) util.log('output_size:%s epsW:%s epsB:%s initW:%s initB:%s momW:%s momB:%s wc:%s dropRate:%s', self.outputSize, self.epsW, self.epsB, self.initW, self.initB, self.momW, self.momB, self.wc, self.dropRate)
def predict(self, save_layers=None, filename=None): self.net.save_layerouput(save_layers) self.print_net_summary() util.log('Starting predict...') save_output = [] total_cost = 0 total_correct = 0 total_numcase = 0 if self.net.layers[-2].name == 'noise': w = np.eye(self.net.layers[-2].weight.shape[0], dtype=np.float32) if self.net.layers[-2].weight.shape[0] == 11: w[:, 10] = 0.1 w[10, 10] = 0 clear_w = gpuarray.to_gpu(w) noisy_w = self.net.layers[-2].weight self.net.layers[-2].weight = clear_w while self.curr_epoch < 2: start = time.time() test_data = self.test_dp.get_next_batch(self.batch_size) input, label = test_data.data, test_data.labels self.net.train_batch(input, label, TEST) cost, correct, numCase = self.net.get_batch_information() self.curr_epoch = test_data.epoch self.curr_batch += 1 print >> sys.stderr, '%d.%d: error: %f logreg: %f time: %f' % ( self.curr_epoch, self.curr_batch, 1 - correct, cost, time.time() - start) if save_layers is not None: save_output.extend(self.net.get_save_output()) total_cost += cost * numCase total_correct += correct * numCase total_numcase += numCase if self.net.layers[-2].name == 'noise': self.net.layers[-2].weight = noisy_w if save_layers is not None: if filename is not None: with open(filename, 'w') as f: cPickle.dump(save_output, f, protocol=-1) util.log('save layer output finished') total_cost /= total_numcase total_correct /= total_numcase print >> sys.stderr, '---- test ----' print >> sys.stderr, 'error: %f logreg: %f' % (1 - total_correct, total_cost)
def __init__(self, name, poolSize=2, stride=2, start=0, disableBprop=False): Layer.__init__(self, name, 'pool', disableBprop) self.pool = 'avg' self.poolSize = poolSize self.stride = stride self.start = start util.log("pool_size:%s stride:%s start:%s", self.poolSize, self.stride, self.start)
def __init__(self, name, pow=0.75, size=9, scale=0.001, disableBprop=False): Layer.__init__(self, name, 'rnorm', disableBprop) self.pow = pow self.size = size self.scale = scale self.scaler = self.scale / self.size**2 self.denom = None util.log("pow:%s size:%s scale:%s scaler:%s", self.pow, self.size, self.scale, self.scaler)
def dump(self, checkpoint, suffix): saved_filename = [f for f in os.listdir(self.checkpoint_dir) if self.regex.match(f)] for f in saved_filename: os.remove(os.path.join(self.checkpoint_dir, f)) checkpoint_filename = "%s-%d" % (self.test_id, suffix) self.checkpoint_file = os.path.join(self.checkpoint_dir, checkpoint_filename) print >> sys.stderr, self.checkpoint_file with zipfile.ZipFile(self.checkpoint_file, mode='w') as output: for k, v in checkpoint.iteritems(): output.writestr(k, cPickle.dumps(v, protocol=-1)) util.log('save file finished')
def __init__(self, name, pow=0.75, size=9, scale=0.001, blocked=False, disableBprop=False): ResponseNormLayer.__init__(self, name, pow, size, scale, disableBprop) self.type = 'cmrnorm' self.scaler = self.scale / self.size self.blocked = blocked util.log("pow:%s size:%s, scale:%s scaler:%s", self.pow, self.size, self.scale, self.scaler)
def predict(self, save_layers=None, filename=None): self.net.save_layerouput(save_layers) self.print_net_summary() util.log('Starting predict...') save_output = [] total_cost = 0 total_correct = 0 total_numcase = 0 if self.net.layers[-2].name == 'noise': w = np.eye(self.net.layers[-2].weight.shape[0], dtype = np.float32) if self.net.layers[-2].weight.shape[0] == 11: w[:,10] = 0.1 w[10,10] = 0 clear_w = gpuarray.to_gpu(w) noisy_w = self.net.layers[-2].weight self.net.layers[-2].weight = clear_w while self.curr_epoch < 2: start = time.time() test_data = self.test_dp.get_next_batch(self.batch_size) input, label = test_data.data, test_data.labels self.net.train_batch(input, label, TEST) cost , correct, numCase = self.net.get_batch_information() self.curr_epoch = test_data.epoch self.curr_batch += 1 print >> sys.stderr, '%d.%d: error: %f logreg: %f time: %f' % (self.curr_epoch, self.curr_batch, 1 - correct, cost, time.time() - start) if save_layers is not None: save_output.extend(self.net.get_save_output()) total_cost += cost * numCase total_correct += correct * numCase total_numcase += numCase if self.net.layers[-2].name == 'noise': self.net.layers[-2].weight = noisy_w if save_layers is not None: if filename is not None: with open(filename, 'w') as f: cPickle.dump(save_output, f, protocol=-1) util.log('save layer output finished') total_cost /= total_numcase total_correct /= total_numcase print >> sys.stderr, '---- test ----' print >> sys.stderr, 'error: %f logreg: %f' % (1 - total_correct, total_cost)
def __init__(self, name, num_filters, filter_shape, padding = 2, stride = 1, initW = None, initB = None, epsW = 0.001, epsB=0.002, momW=0.9, momB=0.9, wc = 0.004, bias = None, weight = None, weightIncr = None, biasIncr = None, disable_bprop = False): self.numFilter = num_filters assert filter_shape[0] == filter_shape[1], 'Non-square filters not yet supported.' self.filterSize = filter_shape[0] self.padding = padding self.stride = stride WeightedLayer.__init__(self, name, 'local', epsW, epsB, initW, initB, momW, momB, wc, weight, bias, weightIncr, biasIncr, disable_bprop) util.log('numFilter:%s padding:%s stride:%s initW:%s initB:%s, w: %s, b: %s', self.numFilter, self.padding, self.stride, self.initW, self.initB, self.weight, self.bias)
def dump(self, checkpoint, suffix): saved_filename = [ f for f in os.listdir(self.checkpoint_dir) if self.regex.match(f) ] for f in saved_filename: os.remove(os.path.join(self.checkpoint_dir, f)) checkpoint_filename = "%s-%d" % (self.test_id, suffix) self.checkpoint_file = os.path.join(self.checkpoint_dir, checkpoint_filename) print >> sys.stderr, self.checkpoint_file with zipfile.ZipFile(self.checkpoint_file, mode='w') as output: for k, v in checkpoint.iteritems(): output.writestr(k, cPickle.dumps(v, protocol=-1)) util.log('save file finished')
def get_checkpoint(self): cp_pattern = self.checkpoint_dir + '/%s-*' % self.test_id cp_files = glob.glob(cp_pattern) if not cp_files: return None checkpoint_file = sorted(cp_files, key=os.path.getmtime)[-1] util.log('Loading from checkpoint file: %s', checkpoint_file) dict = {} with zipfile.ZipFile(checkpoint_file, mode='r') as zip_in: for fname in zip_in.namelist(): with zip_in.open(fname, mode='r') as entry_f: dict[fname] = cPickle.load(entry_f) return dict
def drop_layer_from(self, name): found = False for i, layer in enumerate(self.layers): if layer.name == name: found = True break if not found: util.log('Layer: %s not found.', name) return [] return_layers = self.layers[i:] self.layers = self.layers[0:i] print 'delete layer from', name print 'the last layer would be', self.layers[-1].name return return_layers
def flush(self): if self.sz == 0: return out = {} for k in self.data[0].keys(): items = [d[k] for d in self.data] out[k] = np.concatenate(items, axis=0) filename = '%s.%d' % (self.target_path, self.count) with open(filename, 'w') as f: cPickle.dump(out, f, -1) util.log('Wrote layer dump to %s', filename) self.data = [] self.sz = 0 self.count += 1
def __init__(self, name, num_filters, filter_shape, padding=2, stride=1, initW=0.01, initB= 0.0, partialSum = 0, sharedBiases = 0, epsW=0.001, epsB=0.002, momW=0.0, momB=0.0, wc=0.0, bias=None, weight=None, weightIncr = None, biasIncr = None, disableBprop = False): self.numFilter = num_filters assert filter_shape[0] == filter_shape[1], 'Non-square filters not yet supported.' self.filterSize = filter_shape[0] self.padding = padding self.stride = stride self.partialSum = partialSum self.sharedBiases = sharedBiases WeightedLayer.__init__(self, name, 'conv', epsW, epsB, initW, initB, momW, momB, wc, weight, bias, weightIncr, biasIncr, disableBprop) util.log('num_filter:%d padding:%d stride:%d initW:%s initB:%s, epsW:%s epsB:%s, momW:%s momB:%s wc:%s', self.numFilter, self.padding, self.stride, self.initW, self.initB, self.epsW, self.epsB, self.momW, self.momB, self.wc)
def __init__(self, target_path, max_mem_size=500e5): self.target_path = target_path dirname = os.path.dirname(self.target_path) if not os.path.exists(dirname): os.makedirs(dirname) util.log('%s is not exist, create a new directory', dirname) self.data = [] self.sz = 0 self.count = 0 self.max_mem_size = max_mem_size util.log('dumper establised') util.log('target path: %s', self.target_path) util.log('max_memory: %s', self.max_mem_size)
def __init__(self, data_dir, batch_range=None, multiview = False, category_range=None, scale=1, batch_size=1024): DataProvider.__init__(self, data_dir, batch_range) self.multiview = multiview self.batch_size = batch_size self.scale = scale self.img_size = ImageNetDataProvider.img_size / scale self.border_size = ImageNetDataProvider.border_size / scale self.inner_size = self.img_size - self.border_size * 2 if self.multiview: self.batch_size = 12 self.images = _prepare_images(data_dir, category_range, batch_range, self.batch_meta) self.num_view = 5 * 2 if self.multiview else 1 assert len(self.images) > 0 self._shuffle_batches() if 'data_mean' in self.batch_meta: data_mean = self.batch_meta['data_mean'] else: data_mean = util.load(data_dir + 'image-mean.pickle')['data'] self.data_mean = (data_mean .astype(np.single) .T .reshape((3, 256, 256))[:, self.border_size:self.border_size + self.inner_size, self.border_size:self.border_size + self.inner_size] .reshape((self.data_dim, 1))) util.log('Starting data provider with %d batches', len(self.batches))
def __init__(self, name, num_filters, filter_shape, padding=2, stride=1, initW=0.01, initB=0.0, partialSum=0, sharedBiases=0, epsW=0.001, epsB=0.002, momW=0.0, momB=0.0, wc=0.0, bias=None, weight=None, weightIncr=None, biasIncr=None, disableBprop=False): self.numFilter = num_filters assert filter_shape[0] == filter_shape[ 1], 'Non-square filters not yet supported.' self.filterSize = filter_shape[0] self.padding = padding self.stride = stride self.partialSum = partialSum self.sharedBiases = sharedBiases WeightedLayer.__init__(self, name, 'conv', epsW, epsB, initW, initB, momW, momB, wc, weight, bias, weightIncr, biasIncr, disableBprop) util.log( 'num_filter:%d padding:%d stride:%d initW:%s initB:%s, epsW:%s epsB:%s, momW:%s momB:%s wc:%s', self.numFilter, self.padding, self.stride, self.initW, self.initB, self.epsW, self.epsB, self.momW, self.momB, self.wc)
def __init__(self, data_dir, batch_range=None, multiview=False, category_range=None, scale=1, batch_size=1024): DataProvider.__init__(self, data_dir, batch_range) self.multiview = multiview self.batch_size = batch_size self.scale = scale self.img_size = ImageNetDataProvider.img_size / scale self.border_size = ImageNetDataProvider.border_size / scale self.inner_size = self.img_size - self.border_size * 2 if self.multiview: self.batch_size = 12 self.images = _prepare_images(data_dir, category_range, batch_range, self.batch_meta) self.num_view = 5 * 2 if self.multiview else 1 assert len(self.images) > 0 self._shuffle_batches() if 'data_mean' in self.batch_meta: data_mean = self.batch_meta['data_mean'] else: data_mean = util.load(data_dir + 'image-mean.pickle')['data'] self.data_mean = (data_mean.astype(np.single).T.reshape( (3, 256, 256))[:, self.border_size:self.border_size + self.inner_size, self.border_size:self.border_size + self.inner_size].reshape((self.data_dim, 1))) util.log('Starting data provider with %d batches', len(self.batches))
def cut_off_chunk(self): if len(self.memory_chunk) == 0: util.log('There is no chunk to cut off') return size = 0 for k, v, in self.memory_chunk[0].iteritems(): size += self.memory_chunk[0][k].nbytes del self.memory_chunk[0] self.total_data_size -= size self.count -= 1 util.log('drop off the first memory chunk') util.log('droped chunk size: %s', size) util.log('total data size: %s', self.total_data_size)
def run(self): while not self._stop: util.log('Fetching...') self.queue.put(self.dp.get_next_batch()) util.log('%s', self.dp.curr_batch_index) util.log('Done.') self._running = False
def print_learning_rates(self): util.log('Learning rates:') for layer in self.layers: if isinstance(layer, WeightedLayer): util.log('%s: %s %s %s', layer.name, layer.__class__.__name__, layer.weight.epsilon, layer.bias.epsilon) else: util.log('%s: %s', layer.name, layer.__class__.__name__)
def dump(self, checkpoint, suffix=0): if self.checkpoint_dir is None: return cp_pattern = os.path.join(self.checkpoint_dir, '*') cp_files = [(f, os.stat(f)) for f in glob.glob(cp_pattern)] cp_files = list(reversed(sorted(cp_files, key=lambda f: f[1].st_mtime))) #while sum([f[1].st_size for f in cp_files]) > self.max_cp_size: # os.remove(cp_files.pop()) checkpoint_filename = "%d" % suffix checkpoint_filename = os.path.join(self.checkpoint_dir, checkpoint_filename) util.log('Writing checkpoint to %s', checkpoint_filename) if checkpoint_filename.startswith('/hdfs'): print 'Writing to hdfs ' suf = '' for i in range(6): suf += random.choice(string.ascii_letters) tempfilename = '/tmp/' + suf print 'temp filename is', tempfilename sf = shelve.open(tempfilename, flag = 'c', protocol=-1, writeback=False) #sf = shelve.open(checkpoint_filename, flag='c', protocol=-1, writeback=False) for k, v in checkpoint.iteritems(): sf[k] = v sf.sync() sf.close() #shutil.copy2(tempfilename, checkpoint_filename) os.system('mv %s %s' %( tempfilename, checkpoint_filename)) else: sf = shelve.open(checkpoint_filename, flag='c', protocol=-1, writeback=False) for k, v in checkpoint.iteritems(): sf[k] = v sf.sync() sf.close() util.log('save file finished')
def __init__(self, data_dir='.', batch_range=None): self.data_dir = data_dir self.meta_file = os.path.join(data_dir, 'batches.meta') self.curr_batch_index = 0 self.curr_batch = None self.curr_epoch = 1 if os.path.exists(self.meta_file): self.batch_meta = util.load(self.meta_file) else: util.log_warn('Missing metadata for loader.') if batch_range is None: self.batch_range = self.get_batch_indexes() else: self.batch_range = batch_range util.log('Batch range: %s', self.batch_range) random.shuffle(self.batch_range) self.index = 0 self._handle_new_epoch()
def train(self, num_epochs=1000): self.print_net_summary() util.log('Starting training...') start_epoch = self.curr_epoch last_print_time = time.time() while (self.curr_epoch - start_epoch < num_epochs and self.should_continue_training()): batch_start = time.time() train_data = self.train_dp.get_next_batch(self.batch_size) self.curr_epoch = train_data.epoch self.curr_batch += 1 input, label = train_data.data, train_data.labels self.net.train_batch(input, label) cost, correct, numCase = self.net.get_batch_information() self.train_outputs += [({'logprob': [cost, 1 - correct]}, numCase, self.elapsed())] if time.time() - last_print_time > 1: print >> sys.stderr, '%d.%d: error: %f logreg: %f time: %f' % ( self.curr_epoch, self.curr_batch, 1 - correct, cost, time.time() - batch_start) last_print_time = time.time() if self.check_test_data(): self.get_test_error() if self.factor != 1.0 and self.check_adjust_lr(): self.adjust_lr() if self.check_save_checkpoint(): self.save_checkpoint() self.get_test_error() self.save_checkpoint() self.report() self._finished_training()
def predict(self, save_layers=None, filename=None): self.net.save_layerouput(save_layers) self.print_net_summary() util.log('Starting predict...') save_output = [] total_cost = 0 total_correct = 0 total_numcase = 0 while self.curr_epoch < 2: start = time.time() test_data = self.test_dp.get_next_batch(self.batch_size) input, label = test_data.data, test_data.labels self.net.train_batch(input, label, TEST) cost , correct, numCase = self.net.get_batch_information() self.curr_epoch = test_data.epoch self.curr_batch += 1 print >> sys.stderr, '%d.%d: error: %f logreg: %f time: %f' % (self.curr_epoch, self.curr_batch, 1 - correct, cost, time.time() - start) if save_layers is not None: save_output.extend(self.net.get_save_output()) total_cost += cost * numCase total_correct += correct * numCase total_numcase += numCase if save_layers is not None: if filename is not None: with open(filename, 'w') as f: cPickle.dump(save_output, f, protocol=-1) util.log('save layer output finished') total_cost /= total_numcase total_correct /= total_numcase print >> sys.stderr, '---- test ----' print >> sys.stderr, 'error: %f logreg: %f' % (1 - total_correct, total_cost)
def __init__(self, single_memory_size=50e6, total_memory_size=4e9): self.single_memory_size = single_memory_size self.total_memory_size = total_memory_size self.single_data_size = 0 self.total_data_size = 0 self.count = 0 self.data = [] self.memory_chunk = [] util.log('memory data holder establised') util.log('total memory size: %s', self.total_memory_size) util.log('single memory size: %s', self.single_memory_size)
def flush(self): if self.single_data_size == 0: return dic = {} for k in self.data[0].keys(): items = [d[k] for d in self.data] dic[k] = np.concatenate(items, axis=0) self.memory_chunk.append(dic) util.log('add another memory chunk') util.log('memory chunk size: %s', self.single_data_size) util.log('total data size: %s', self.total_data_size) self.data = [] self.single_data_size = 0 self.count += 1
def __init__(self, learningRate, imgShape, init_model=None): self.learningRate = learningRate self.numColor, self.imgSize, _, self.batchSize = imgShape self.imgShapes = [imgShape] self.inputShapes = [(self.numColor * (self.imgSize**2), self.batchSize) ] self.layers = [] self.outputs = [] self.grads = [] self.output = None self.save_layers = None self.save_output = [] self.numCase = self.cost = self.correct = 0.0 self.numConv = 0 if init_model is None: util.log( 'initial model not provided, network doesn\'t have any layer') return if 'layers' in init_model: # Loading from a checkpoint add_layers(FastNetBuilder(), self, init_model['layers']) else: if is_cudaconvnet_config(init_model): # AlexK config file add_layers(CudaconvNetBuilder(), self, init_model) else: # FastNet config file add_layers(FastNetBuilder(), self, init_model) self.adjust_learning_rate(self.learningRate) util.log('Learning rates:') for l in self.layers: if isinstance(l, WeightedLayer): util.log('%s: %s %s', l.name, getattr(l, 'epsW', 0), getattr(l, 'epsB', 0))
def __init__(self, learningRate, imgShape, init_model = None): self.learningRate = learningRate self.numColor, self.imgSize, _ , self.batchSize = imgShape self.imgShapes = [imgShape] self.inputShapes = [(self.numColor * (self.imgSize ** 2), self.batchSize)] self.layers = [] self.outputs = [] self.grads = [] self.output = None self.save_layers = None self.save_output = [] self.numCase = self.cost = self.correct = 0.0 self.numConv = 0 if init_model is None: util.log('initial model not provided, network doesn\'t have any layer') return if 'layers' in init_model: # Loading from a checkpoint add_layers(FastNetBuilder(), self, init_model['layers']) else: if is_cudaconvnet_config(init_model): # AlexK config file add_layers(CudaconvNetBuilder(), self, init_model) else: # FastNet config file add_layers(FastNetBuilder(), self, init_model) self.adjust_learning_rate(self.learningRate) util.log('Learning rates:') for l in self.layers: if isinstance(l, WeightedLayer): util.log('%s: %s %s', l.name, getattr(l, 'epsW', 0), getattr(l, 'epsB', 0))
param_dict['num_batch'] = num_batch[0] else: param_dict['num_batch'] = num_batch param_dict['num_group_list'] = util.string_to_int_list(args.num_group_list) param_dict['num_caterange_list'] = util.string_to_int_list(args.num_caterange_list) param_dict['output_dir'] = args.output_dir param_dict['output_method'] = args.output_method param_dict['replaynet_epoch'] = args.replaynet_epoch param_dict['frag_epoch'] = args.frag_epoch train_layer_output_dumper = None test_layer_output_dumper = None if param_dict['output_method'] == 'disk': if param_dict['output_dir'] != '': train_layer_output_path = os.path.join(param_dict['output_dir'], 'train_data.pickle') param_dict['train_layer_output_path'] = train_layer_output_path train_layer_output_dumper = DataDumper(train_layer_output_path) test_layer_output_path = os.path.join(param_dict['output_dir'], 'test_data.pickle') param_dict['test_layer_output_path'] = test_layer_output_path test_layer_output_dumper = DataDumper(test_layer_output_path) elif param_dict['output_method'] == 'memory': train_layer_output_dumper = MemoryDataHolder() test_layer_output_dumper = MemoryDataHolder() param_dict['train_layer_output_dumeper'] = train_layer_output_dumper param_dict['test_layer_output_dumeper'] = test_layer_output_dumper trainer = Trainer.get_trainer_by_name(trainer, param_dict) util.log('start to train...') trainer.train(args.num_epoch)
def _start_read(self): util.log('Starting reader...') assert self._reader is None self._reader = ReaderThread(self._data_queue, self.dp) self._reader.start()
def __init__(self, data_dir, batch_range=None, category_range=None, batch_size=1024): DataProvider.__init__(self, data_dir, batch_range) self.img_size = 256 self.border_size = 16 self.inner_size = 224 self.batch_size = batch_size # self.multiview = dp_params['multiview_test'] and test self.multiview = 0 self.num_views = 5 * 2 self.data_mult = self.num_views if self.multiview else 1 self.buffer_idx = 0 dirs = glob.glob(data_dir + '/n*') synid_to_dir = {} for d in dirs: synid_to_dir[basename(d)[1:]] = d if category_range is None: cat_dirs = dirs else: cat_dirs = [] for i in category_range: synid = self.batch_meta['label_to_synid'][i] # util.log('Using category: %d, synid: %s, label: %s', i, synid, self.batch_meta['label_names'][i]) cat_dirs.append(synid_to_dir[synid]) self.images = [] batch_dict = dict((k, k) for k in self.batch_range) for d in cat_dirs: img_files = list() img_files.extend(glob.glob(d + '/*.jpg')) img_files.extend(glob.glob(d + '/*.jpeg')) img_files.extend(glob.glob(d + '/*.JPG')) img_files.extend(glob.glob(d + '/*.JPEG')) img_files.sort() imgs = [v for i, v in enumerate(img_files) if i in batch_dict] self.images.extend(imgs) self.images = np.array(self.images) # build index vector into 'images' and split into groups of batch-size image_index = np.arange(len(self.images)) np.random.shuffle(image_index) self.batches = np.array_split(image_index, util.divup(len(self.images), batch_size)) self.batch_range = range(len(self.batches)) util.log('Starting data provider with %d batches', len(self.batches)) np.random.shuffle(self.batch_range) imagemean = cPickle.loads(open(data_dir + "image-mean.pickle").read()) self.data_mean = (imagemean['data'] .astype(np.single) .T .reshape((3, 256, 256))[:, self.border_size:self.border_size + self.inner_size, self.border_size:self.border_size + self.inner_size] .reshape((self.get_data_dims(), 1)))
def train(self, num_epochs): self.print_net_summary() util.log('Starting training...') start_epoch = self.curr_epoch clear_w = gpuarray.to_gpu( np.eye(self.net.layers[-2].weight.shape[0], dtype=np.float32)) while (self.curr_epoch - start_epoch <= num_epochs and self.should_continue_training()): batch_start = time.time() train_data = self.train_dp.get_next_batch(self.batch_size) self.curr_epoch = train_data.epoch self.curr_batch += 1 input, label = train_data.data, train_data.labels if self.train_dp.is_curr_batch_noisy == False: assert (self.net.layers[-2].name == 'noise') noisy_eps = self.net.layers[-2].epsW self.net.layers[-2].epsW = 0 noisy_w = self.net.layers[-2].weight self.net.layers[-2].weight = clear_w else: if hasattr(self, 'noisy_factor'): self.net.adjust_learning_rate(self.noisy_factor) self.net.train_batch(input, label) if self.train_dp.is_curr_batch_noisy == False: self.net.layers[-2].epsW = noisy_eps self.net.layers[-2].weight = noisy_w else: if hasattr(self, 'noisy_factor'): self.net.adjust_learning_rate(1. / self.noisy_factor) cost, correct, numCase = self.net.get_batch_information() self.train_outputs += [({ 'logprob': [cost, 1 - correct] }, numCase, self.elapsed())] print >> sys.stderr, '%d.%d: error: %f logreg: %f time: %f' % ( self.curr_epoch, self.curr_batch, 1 - correct, cost, time.time() - batch_start) if self.check_test_data(): noisy_w = self.net.layers[-2].weight self.net.layers[-2].weight = clear_w self.get_test_error() self.net.layers[-2].weight = noisy_w if self.factor != 1.0 and self.check_adjust_lr(): self.adjust_lr() if self.check_save_checkpoint(): self.save_checkpoint() #self.get_test_error() self.save_checkpoint() self.report() self._finished_training()
def _log(self, fmt, *args): util.log('%s :: %s', rank, fmt % args)