def __init__(self # Runner-general ,data_location = './normalized.pkl.gz' ,test_run = False ,cross_val = True # Classifier-general ,epochs = 1000 ,batch_size = 100 ,max_layer_sizes = 0 ,encdecs_name = "" ,model_name = "" # Classifier-specific ,encdec_optimizers = ('rmsprop',) ,class_optimizers = ('adadelta',) ,class_losses = ('categorical_crossentropy',) ,drop_rates = (0.0,) ,gauss_base_sigmas = (0.0,) ,gauss_sigma_factors = (1.0,) ,l1s = (0.0,) ,l2s = (0.0,) ,enc_use_drop = False ,enc_use_noise = True ,mod_use_drop = True ,mod_use_noise = False ,catalog_name = "" ,starting_index = 0): self.data_location = data_location or default_data_location with gzip.open(data_location, 'rb') as f: self.data = cPickle.load(f) if cross_val: self.cross_val_drops = list(self.data.keys()) if cross_val else [] else: self.cross_val_drops = [] self.cross_val_index = starting_index if cross_val else -1 self.cross_val_test_data = None if test_run: self.epochs = 4 self.batch_size = 1000 else: self.epochs = int(epochs) self.batch_size = int(batch_size) self.max_layer_sizes = int(max_layer_sizes) self.encdecs_name = str(encdecs_name) or str(model_name) or time_str() self.model_name = str(model_name) or str(encdecs_name) or time_str() self.encdec_optimizers = map(str, encdec_optimizers) self.class_optimizers = map(str, class_optimizers) self.class_losses = map(str, class_losses) self.drop_rates = map(float, drop_rates) self.gauss_base_sigmas = map(float, gauss_base_sigmas) self.gauss_sigma_factors = map(float, gauss_sigma_factors) self.l1s = map(float, l1s) self.l2s = map(float, l2s) self.enc_use_drop = bool(enc_use_drop) self.enc_use_noise = bool(enc_use_noise) self.mod_use_drop = bool(mod_use_drop) self.mod_use_noise = bool(mod_use_noise) self.cat = DummyCatalogManager if test_run else (named_catalog(catalog_name) if catalog_name else CatalogManager)
def run(self): while self.cross_val_index < len(self.cross_val_drops): if self.cross_val_drops: labels, data = self.split_data(True) else: labels, data = self.split_data(False) gauss_combs = [(0, 1)] if 0 in self.gauss_base_sigmas else [] gauss_combs += [(gbs, gsf) for gbs in self.gauss_base_sigmas for gsf in self.gauss_sigma_factors if gbs != 0] for (i, eo) in enumerate(self.encdec_optimizers): for (j, dr) in enumerate(self.drop_rates): for (k, (gbs, gsf)) in enumerate(gauss_combs): for (l, l1) in enumerate(self.l1s): for (m, l2) in enumerate(self.l2s): ifxval = ("-x%s" % self.cross_val_index if self.cross_val_drops else "") current_name = self.model_name + ifxval + ( "-%s%s%s%s" % (j, k, l, m)) enc_name = self.encdecs_name + ifxval + ( "-%s%s%s%s%s" % (i, j, k, l, m)) pc = Classifier( data=data, labels=labels, batch_size=self.batch_size, epochs=self.epochs, model_name=current_name, model_dir="", encdecs_name=enc_name, encdec_optimizer=eo, # Not used for now, set later class_optimizer=None, # idem class_loss=None, drop_rate=dr, gauss_base_sigma=gbs, gauss_sigma_factor=gsf, l1=l1, l2=l2, catalog_class=self.cat) # TODO do something with max-layer-sizes here pc.new_encdecs(compile=True, use_dropout=self.enc_use_drop, use_noise=self.enc_use_noise) pc.pretrain() pc.cap_data() for (n, co) in enumerate(self.class_optimizers): for (o, cl) in enumerate(self.class_losses): suffix = "-%s%s%s%s%s%s%s" % ( i, j, k, l, m, n, o) current_name = self.model_name + ifxval + suffix start_time = time_str() pc.catalog_manager.update( { 'cross_validation': { 'enabled': bool(self.cross_val_drops), 'drops': self.cross_val_drops, 'index_reached': self.cross_val_index }, 'settings': { '0-eo': self.encdec_optimizers, '1-dr': self.drop_rates, '2-(gbs,gsf)': gauss_combs, '3-l1s': self.l1s, '4-l2s': self.l2s, '5-co': self.class_optimizers, '6-cl': self.class_losses }, ifxval + suffix: { 'finished': False, 'start_time': start_time } }, self.model_name) pc.model_name = current_name pc.cls_opt = co pc.cls_lss = cl pc.new_model( fresh=False, compile=True, use_dropout=self.mod_use_drop, use_noise=self.mod_use_noise) history = pc.finetune() with gzip.open( os.path.join( os.path.dirname( default_data_location), current_name + '.history.pkl.gz'), 'wb') as f: cPickle.dump(history, f, 2) pc.catalog_manager.set( { 'finished': True, 'test_accuracy': pc. _model_info['test_accuracy'], 'start_time': start_time, 'end_time': time_str() }, self.model_name, ifxval + suffix) self.cross_val_index += 1
def __init__( self # Runner-general , data_location='./normalized.pkl.gz', test_run=False, cross_val=True # Classifier-general , epochs=1000, batch_size=100, max_layer_sizes=0, encdecs_name="", model_name="" # Classifier-specific , encdec_optimizers=('rmsprop', ), class_optimizers=('adadelta', ), class_losses=('categorical_crossentropy', ), drop_rates=(0.0, ), gauss_base_sigmas=(0.0, ), gauss_sigma_factors=(1.0, ), l1s=(0.0, ), l2s=(0.0, ), enc_use_drop=False, enc_use_noise=True, mod_use_drop=True, mod_use_noise=False, catalog_name="", starting_index=0): self.data_location = data_location or default_data_location with gzip.open(data_location, 'rb') as f: self.data = cPickle.load(f) if cross_val: self.cross_val_drops = list(self.data.keys()) if cross_val else [] else: self.cross_val_drops = [] self.cross_val_index = starting_index if cross_val else -1 self.cross_val_test_data = None if test_run: self.epochs = 4 self.batch_size = 1000 else: self.epochs = int(epochs) self.batch_size = int(batch_size) self.max_layer_sizes = int(max_layer_sizes) self.encdecs_name = str(encdecs_name) or str(model_name) or time_str() self.model_name = str(model_name) or str(encdecs_name) or time_str() self.encdec_optimizers = map(str, encdec_optimizers) self.class_optimizers = map(str, class_optimizers) self.class_losses = map(str, class_losses) self.drop_rates = map(float, drop_rates) self.gauss_base_sigmas = map(float, gauss_base_sigmas) self.gauss_sigma_factors = map(float, gauss_sigma_factors) self.l1s = map(float, l1s) self.l2s = map(float, l2s) self.enc_use_drop = bool(enc_use_drop) self.enc_use_noise = bool(enc_use_noise) self.mod_use_drop = bool(mod_use_drop) self.mod_use_noise = bool(mod_use_noise) self.cat = DummyCatalogManager if test_run else ( named_catalog(catalog_name) if catalog_name else CatalogManager)
def run(self): while self.cross_val_index < len(self.cross_val_drops): if self.cross_val_drops: labels, data = self.split_data(True) else: labels, data = self.split_data(False) gauss_combs = [(0, 1)] if 0 in self.gauss_base_sigmas else [] gauss_combs += [(gbs, gsf) for gbs in self.gauss_base_sigmas for gsf in self.gauss_sigma_factors if gbs != 0] for (i,eo) in enumerate(self.encdec_optimizers): for (j,dr) in enumerate(self.drop_rates): for (k,(gbs,gsf)) in enumerate(gauss_combs): for (l, l1) in enumerate(self.l1s): for (m, l2) in enumerate(self.l2s): ifxval = ("-x%s" % self.cross_val_index if self.cross_val_drops else "") current_name = self.model_name + ifxval + ("-%s%s%s%s" % (j,k,l,m)) enc_name = self.encdecs_name + ifxval + ("-%s%s%s%s%s" % (i,j,k,l,m)) pc = Classifier( data = data, labels = labels, batch_size = self.batch_size, epochs = self.epochs, model_name = current_name, model_dir = "", encdecs_name = enc_name, encdec_optimizer = eo, # Not used for now, set later class_optimizer = None, # idem class_loss = None, drop_rate = dr, gauss_base_sigma = gbs, gauss_sigma_factor = gsf, l1 = l1, l2 = l2, catalog_class = self.cat ) # TODO do something with max-layer-sizes here pc.new_encdecs(compile = True, use_dropout = self.enc_use_drop, use_noise = self.enc_use_noise) pc.pretrain() pc.cap_data() for (n,co) in enumerate(self.class_optimizers): for (o,cl) in enumerate(self.class_losses): suffix = "-%s%s%s%s%s%s%s" % (i,j,k,l,m,n,o) current_name = self.model_name + ifxval + suffix start_time = time_str() pc.catalog_manager.update({ 'cross_validation': { 'enabled': bool(self.cross_val_drops), 'drops': self.cross_val_drops, 'index_reached': self.cross_val_index }, 'settings': { '0-eo': self.encdec_optimizers, '1-dr': self.drop_rates, '2-(gbs,gsf)': gauss_combs, '3-l1s': self.l1s, '4-l2s': self.l2s, '5-co': self.class_optimizers, '6-cl': self.class_losses }, ifxval + suffix: { 'finished': False, 'start_time': start_time } }, self.model_name) pc.model_name = current_name pc.cls_opt = co pc.cls_lss = cl pc.new_model(fresh = False, compile = True, use_dropout = self.mod_use_drop, use_noise = self.mod_use_noise) history = pc.finetune() with gzip.open(os.path.join(os.path.dirname(default_data_location), current_name + '.history.pkl.gz'), 'wb') as f: cPickle.dump(history, f, 2) pc.catalog_manager.set({ 'finished': True, 'test_accuracy': pc._model_info['test_accuracy'], 'start_time': start_time, 'end_time': time_str() }, self.model_name, ifxval + suffix) self.cross_val_index += 1