def norm_iterator(iterable): """returns a normalized iterable of tuples""" if isinstance(iterable, list): names = ['list_' + str(i) for i, j in enumerate(iterable)] return szip(names, iterable) else: raise NotImplementedError('Iterables other than lists ' 'cannot be passed to this function')
def _prepare_message(self, model, data, data_val, kwargs, generator=False): """Prepare the elements to be passed to the backend Args: model(supported model): the model to be prepared data(list): the list of dicts or generators used for training data_val(list): the list of dicts or generator used for validation Returns: the transformed data object, the transformed validation data object, the data_hash """ self._check_compile(model, kwargs) kwargs = self._check_serialize(kwargs) gen_setup = [] if generator: nb_data_chunks = [get_nb_chunks(d) for d in data] nb_data_val_chunks = [get_nb_chunks(dv) for dv in data_val] for d_c, dv_c in szip(nb_data_chunks, nb_data_val_chunks): is_val_one = dv_c == 1 is_train_one = d_c == 1 if dv_c is not None: # many to one if d_c > dv_c and is_val_one: gen_setup.append(1) # one to many elif d_c < dv_c and is_train_one: gen_setup.append(2) # equal elif d_c == dv_c: gen_setup.append(3) else: # pragma: no cover Exception('Nb batches in train generator and' 'validation generator not compatible') data_hash = cm.create_gen_hash(data) data, data_val = pickle_gen(data, data_val) else: data_hash = cm.create_data_hash(data) return data, data_val, data_hash, gen_setup
def test_experiment_fit_gen_async_nogenval(self, get_model_data_expe): ''' Main case: gen on train, data on val Subcases: 10 chunks on train 1 chunk on train ''' data, data_val, is_classif, model, metric, expe = get_model_data_expe for Nchunks_gen, expected_value in szip([True, False], [10, 1]): gen_train, data_train, data_stream_train = make_gen( Nchunks_gen, is_classif, train=True) for data_val_loc in [None, data_val]: _, thread = expe.fit_gen_async([gen_train], [data_val_loc], model=model, overwrite=True, metrics=metric) thread.join() assert len(expe.full_res['metrics'][ 'score']) == expected_value assert len(expe.full_res['metrics'][ 'val_score']) == expected_value if data_val_loc is not None: assert None not in expe.full_res['metrics'][ 'val_score'] else: assert np.all([np.isnan(v) for v in expe.full_res[ 'metrics']['val_score']]) assert expe.data_id is not None assert expe.mod_id is not None assert expe.params_dump is not None assert expe close_gens(gen_train, data_train, data_stream_train) print(self)
def test_experiment_fit_gen_async_withgenval(self, get_model_data_expe): ''' Main case: gen on train, gen on val Subcases: 10 chunks on train / 10 chunks on val 10 chunks on train / 1 chunk on val 1 chunk on train / 10 chunks on val ''' data, data_val, is_classif, model, metric, expe = get_model_data_expe for Nchunks_gen, Nchunks_val in szip([True, True, False], [True, False, True]): gen_train, data_train, data_stream_train = make_gen( Nchunks_gen, is_classif, train=True) gen_test, data_test, data_stream_test = make_gen( Nchunks_val, is_classif, train=False) _, thread = expe.fit_gen_async( [gen_train], [gen_test], overwrite=True, metrics=metric) thread.join() expected_value_gen = 10 if not Nchunks_gen: expected_value_gen = 1 assert len(expe.full_res['metrics'][ 'score']) == expected_value_gen assert len(expe.full_res['metrics'][ 'val_score']) == 10 assert expe.data_id is not None assert expe.mod_id is not None assert expe.params_dump is not None assert expe close_gens(gen_train, data_train, data_stream_train) print(self)
def train(model, data, data_val, size_gen, generator=False, *args, **kwargs): """Fit a model given hyperparameters and a serialized model Args: model(dict): a serialized keras.Model data(list): a list of dict mapping inputs and outputs to lists or dictionnaries mapping the inputs names to np.arrays data_val(list): same structure than `data` but for validation Returns: the loss (list), the validation loss (list), the number of iterations, and the model """ if generator: from six.moves import reload_module as sreload import theano sreload(theano) results = dict() results['metrics'] = dict() custom_objects = None callbacks = [] fit_gen_val = False suf = 'val_' if 'custom_objects' in kwargs: custom_objects = kwargs.pop('custom_objects') # load model model = model_from_dict_w_opt(model, custom_objects=custom_objects) if 'callbacks' in kwargs: callbacks = kwargs.pop('callbacks') callbacks = [deserialize(**callback) for callback in callbacks] for i, callback in enumerate(callbacks): if inspect.isfunction(callback): callbacks[i] = callback() else: raise TypeError('Your callback is not wrapped in a function') metrics_names = model.metrics_names for metric in metrics_names: results['metrics'][metric] = [] results['metrics'][suf + metric] = [] mod_name = model.__class__.__name__ if generator: data = [pickle.loads(d.encode('raw_unicode_escape')) for d in data] data = [cm.transform_gen(d, mod_name) for d in data] kwargs.pop('batch_size') if all(v is None for v in data_val): val_gen = 0 else: val_gen = check_gen(data_val) if val_gen > 0: if generator: data_val = [pickle.loads(dv.encode('raw_unicode_escape')) for dv in data_val] data_val = [cm.transform_gen(dv, mod_name) for dv in data_val] for i, check in enumerate(size_gen): if check is 1: data_val[i] = next(data_val[i]) fit_gen_val = True else: raise Exception("You should also pass a generator for the training" " data.") # fit the model according to the input/output type if mod_name is "Sequential" or mod_name is "Model": for d, dv in szip(data, data_val): validation = check_validation(dv) if not fit_gen_val: if dv is not None: dv = (dv['X'], dv['y']) if generator: h = model.fit_generator(generator=d, validation_data=dv, callbacks=callbacks, *args, **kwargs) else: X, y = d['X'], d['y'] h = model.fit(x=X, y=y, validation_data=dv, callbacks=callbacks, *args, **kwargs) for metric in metrics_names: results['metrics'][metric] += h.history[metric] if validation: results['metrics'][ suf + metric] += h.history[suf + metric] else: results['metrics'][suf + metric] += [np.nan] * \ len(h.history[metric]) results['metrics']['iter'] = h.epoch[-1] * len(data) else: raise NotImplementedError("This type of model" "is not supported: {}".format(mod_name)) return results, model
def train(model, data, data_val, size_gen, generator=False, *args, **kwargs): """Fit a model given hyperparameters and a serialized model Args: model(dict): a serialized keras.Model data(list): a list of dict mapping inputs and outputs to lists or dictionnaries mapping the inputs names to np.arrays data_val(list): same structure than `data` but for validation Returns: the loss (list), the validation loss (list), the number of iterations, and the model """ if generator: from six.moves import reload_module as sreload import theano sreload(theano) results = dict() results['metrics'] = dict() custom_objects = None callbacks = [] fit_gen_val = False suf = 'val_' if 'custom_objects' in kwargs: custom_objects = kwargs.pop('custom_objects') # load model model = model_from_dict_w_opt(model, custom_objects=custom_objects) if 'callbacks' in kwargs: callbacks = kwargs.pop('callbacks') callbacks = [deserialize(**callback) for callback in callbacks] for i, callback in enumerate(callbacks): if inspect.isfunction(callback): callbacks[i] = callback() else: raise TypeError('Your callback is not wrapped in a function') metrics_names = model.metrics_names for metric in metrics_names: results['metrics'][metric] = [] results['metrics'][suf + metric] = [] mod_name = model.__class__.__name__ if generator: data = [pickle.loads(d.encode('raw_unicode_escape')) for d in data] data = [cm.transform_gen(d, mod_name) for d in data] kwargs.pop('batch_size') if all(v is None for v in data_val): val_gen = 0 else: val_gen = check_gen(data_val) if val_gen > 0: if generator: data_val = [ pickle.loads(dv.encode('raw_unicode_escape')) for dv in data_val ] data_val = [cm.transform_gen(dv, mod_name) for dv in data_val] for i, check in enumerate(size_gen): if check is 1: data_val[i] = next(data_val[i]) fit_gen_val = True else: raise Exception("You should also pass a generator for the training" " data.") # fit the model according to the input/output type if mod_name is "Sequential" or mod_name is "Model": for d, dv in szip(data, data_val): validation = check_validation(dv) if not fit_gen_val: if dv is not None: dv = (dv['X'], dv['y']) if generator: h = model.fit_generator(generator=d, validation_data=dv, callbacks=callbacks, *args, **kwargs) else: X, y = d['X'], d['y'] h = model.fit(x=X, y=y, validation_data=dv, callbacks=callbacks, *args, **kwargs) for metric in metrics_names: results['metrics'][metric] += h.history[metric] if validation: results['metrics'][suf + metric] += h.history[suf + metric] else: results['metrics'][suf + metric] += [np.nan] * \ len(h.history[metric]) results['metrics']['iter'] = h.epoch[-1] * len(data) else: raise NotImplementedError("This type of model" "is not supported: {}".format(mod_name)) return results, model
def train(model, data, data_val, size_gen, generator=False, *args, **kwargs): """Fit a model given parameters and a serialized model Args: model(dict): a serialized sklearn model data(list): a list of dict mapping inputs and outputs to lists or dictionnaries mapping the inputs names to np.arrays XOR -a list of fuel generators data_val(list): same structure than `data` but for validation. it is possible to feed generators for data and plain data for data_val. it is not possible the other way around. Returns: the loss (list), the validation loss (list), the number of iterations, and the model """ # Local variables import sklearn.metrics results = dict() results['metrics'] = dict() custom_objects = None predondata = [] predonval = [] fit_gen_val = False # Load custom_objects if 'custom_objects' in kwargs: # pragma: no cover custom_objects = kwargs.pop('custom_objects') # Load model and get metrics model, metrics = model_from_dict_w_opt(model, custom_objects=custom_objects) # instantiates metrics # there is at least one mandatory metric for sklearn models metrics_names = ["score"] if metrics: for metric in metrics: metrics_names.append(metric) for metric in metrics_names: results['metrics'][metric] = [] results['metrics']["val_" + metric] = [] # pickle data if generator if generator: data = [pickle.loads(d.encode('raw_unicode_escape')) for d in data] # check if data_val is in generator if all(v is None for v in data_val): val_gen = 0 else: val_gen = check_gen(data_val) # if so pickle data_val if val_gen > 0: if generator: data_val = [ pickle.loads(dv.encode('raw_unicode_escape')) for dv in data_val ] fit_gen_val = True else: raise Exception("You should also pass a generator for the training" " data.") # Fit the model # and validates it if len(size_gen) == 0: size_gen = [0] * len(data) # loop over the data/generators for d, dv, s_gen in szip(data, data_val, size_gen): # check if we have a data_val object. # if not, no evaluation of the metrics on data_val. if dv is None: validation = False else: validation = True # not treating the case "not generator and fit_gen_val" # since it is catched above # case A : dict for data and data_val if not generator and not fit_gen_val: X, y = d['X'], d['y'] model.fit(X, y, *args, **kwargs) predondata.append(model.predict(X)) for metric in metrics_names: if metric is not 'score': computed_metric = getattr(sklearn.metrics, metric)(y, predondata[-1]) results['metrics'][metric].append(computed_metric) else: computed_metric = model.score(X, y) results['metrics']['score'].append(computed_metric) # TODO : optimization if validation: X_val, y_val = dv['X'], dv['y'] predonval.append(model.predict(X_val)) for metric in metrics_names: if metric is not 'score': computed_metric = getattr(sklearn.metrics, metric)(y_val, predonval[-1]) else: computed_metric = model.score(X_val, y_val) # TODO : optimization results['metrics']['val_' + metric].append(computed_metric) else: for metric in metrics_names: results['metrics']['val_' + metric].append(np.nan) # case B : generator for data and no generator for data_val # could be dict or None elif generator and not fit_gen_val: if validation: X_val, y_val = dv['X'], dv['y'] for batch_data in d.get_epoch_iterator(): X, y = batch_data model.fit(X, y, *args, **kwargs) predondata.append(model.predict(X)) if validation: predonval.append(model.predict(X_val)) for metric in metrics_names: if metric is not 'score': results['metrics'][metric].append( getattr(sklearn.metrics, metric)(y, predondata[-1])) if validation: results['metrics']['val_' + metric].append( getattr(sklearn.metrics, metric)(y_val, predonval[-1])) else: results['metrics']['val_' + metric].append(np.nan) else: results['metrics']['score'].append(model.score(X, y)) if validation: results['metrics']['val_score'].append( model.score(X_val, y_val)) else: results['metrics']['val_score'].append(np.nan) # case C : generator for data and for data_val else: # case C1: N chunks in gen, 1 chunk in val, many to one if s_gen == 1: X_val, y_val = snext(dv.get_epoch_iterator()) for batch_data in d.get_epoch_iterator(): X, y = batch_data model.fit(X, y, *args, **kwargs) predondata.append(model.predict(X)) predonval.append(model.predict(X_val)) for metric in metrics_names: if metric is not 'score': results['metrics'][metric].append( getattr(sklearn.metrics, metric)(y, predondata[-1])) results['metrics']['val_' + metric].append( getattr(sklearn.metrics, metric)(y_val, predonval[-1])) else: results['metrics']['score'].append( model.score(X, y)) results['metrics']['val_score'].append( model.score(X_val, y_val)) # case C2 : 1 chunk in gen, N chunks in val, one to many elif s_gen == 2: X, y = snext(d.get_epoch_iterator()) model.fit(X, y, *args, **kwargs) predondata.append(model.predict(X)) for metric in metrics_names: if metric is not 'score': results['metrics'][metric].append( getattr(sklearn.metrics, metric)(y, predondata[-1])) else: results['metrics']['score'].append(model.score(X, y)) for batch_val in dv.get_epoch_iterator(): X_val, y_val = batch_val predonval.append(model.predict(X_val)) for metric in metrics_names: if metric is not 'score': results['metrics']['val_' + metric].append( getattr(sklearn.metrics, metric)(y_val, predonval[-1])) else: results['metrics']['val_score'].append( model.score(X_val, y_val)) # case C3 : same numbers of chunks, many to many elif s_gen == 3: for batch_data, batch_val in szip(d.get_epoch_iterator(), dv.get_epoch_iterator()): X, y = batch_data X_val, y_val = batch_val model.fit(X, y, *args, **kwargs) predondata.append(model.predict(X)) predonval.append(model.predict(X_val)) for metric in metrics_names: if metric is not 'score': results['metrics'][metric].append( getattr(sklearn.metrics, metric)(y, predondata[-1])) results['metrics']['val_' + metric].append( getattr(sklearn.metrics, metric)(y_val, predonval[-1])) else: results['metrics']['score'].append( model.score(X, y)) results['metrics']['val_score'].append( model.score(X_val, y_val)) else: # pragma: no cover raise Exception( 'Incoherent generator size for train and validation') # for compatibility with keras backend results['metrics']['iter'] = np.nan return results, model
def train(model, data, data_val, size_gen, generator=False, *args, **kwargs): """Fit a model given parameters and a serialized model Args: model(dict): a serialized sklearn model data(list): a list of dict mapping inputs and outputs to lists or dictionnaries mapping the inputs names to np.arrays XOR -a list of fuel generators data_val(list): same structure than `data` but for validation. it is possible to feed generators for data and plain data for data_val. it is not possible the other way around. Returns: the loss (list), the validation loss (list), the number of iterations, and the model """ # Local variables import sklearn.metrics results = dict() results['metrics'] = dict() custom_objects = None predondata = [] predonval = [] fit_gen_val = False # Load custom_objects if 'custom_objects' in kwargs: # pragma: no cover custom_objects = kwargs.pop('custom_objects') # Load model and get metrics model, metrics = model_from_dict_w_opt(model, custom_objects=custom_objects) # instantiates metrics # there is at least one mandatory metric for sklearn models metrics_names = ["score"] if metrics: for metric in metrics: metrics_names.append(metric) for metric in metrics_names: results['metrics'][metric] = [] results['metrics']["val_" + metric] = [] # pickle data if generator if generator: data = [pickle.loads(d.encode('raw_unicode_escape')) for d in data] # check if data_val is in generator if all(v is None for v in data_val): val_gen = 0 else: val_gen = check_gen(data_val) # if so pickle data_val if val_gen > 0: if generator: data_val = [pickle.loads(dv.encode('raw_unicode_escape')) for dv in data_val] fit_gen_val = True else: raise Exception("You should also pass a generator for the training" " data.") # Fit the model # and validates it if len(size_gen) == 0: size_gen = [0] * len(data) # loop over the data/generators for d, dv, s_gen in szip(data, data_val, size_gen): # check if we have a data_val object. # if not, no evaluation of the metrics on data_val. if dv is None: validation = False else: validation = True # not treating the case "not generator and fit_gen_val" # since it is catched above # case A : dict for data and data_val if not generator and not fit_gen_val: X, y = d['X'], d['y'] model.fit(X, y, *args, **kwargs) predondata.append(model.predict(X)) for metric in metrics_names: if metric is not 'score': computed_metric = getattr( sklearn.metrics, metric)(y, predondata[-1]) results['metrics'][metric].append( computed_metric) else: computed_metric = model.score(X, y) results['metrics']['score'].append( computed_metric) # TODO : optimization if validation: X_val, y_val = dv['X'], dv['y'] predonval.append(model.predict(X_val)) for metric in metrics_names: if metric is not 'score': computed_metric = getattr( sklearn.metrics, metric)(y_val, predonval[-1]) else: computed_metric = model.score(X_val, y_val) # TODO : optimization results['metrics']['val_' + metric].append( computed_metric) else: for metric in metrics_names: results['metrics']['val_' + metric].append(np.nan) # case B : generator for data and no generator for data_val # could be dict or None elif generator and not fit_gen_val: if validation: X_val, y_val = dv['X'], dv['y'] for batch_data in d.get_epoch_iterator(): X, y = batch_data model.fit(X, y, *args, **kwargs) predondata.append(model.predict(X)) if validation: predonval.append(model.predict(X_val)) for metric in metrics_names: if metric is not 'score': results['metrics'][metric].append( getattr(sklearn.metrics, metric)(y, predondata[-1])) if validation: results['metrics']['val_' + metric].append( getattr(sklearn.metrics, metric)(y_val, predonval[-1])) else: results['metrics'][ 'val_' + metric].append(np.nan) else: results['metrics']['score'].append( model.score(X, y)) if validation: results['metrics']['val_score'].append( model.score(X_val, y_val)) else: results['metrics']['val_score'].append(np.nan) # case C : generator for data and for data_val else: # case C1: N chunks in gen, 1 chunk in val, many to one if s_gen == 1: X_val, y_val = snext(dv.get_epoch_iterator()) for batch_data in d.get_epoch_iterator(): X, y = batch_data model.fit(X, y, *args, **kwargs) predondata.append(model.predict(X)) predonval.append(model.predict(X_val)) for metric in metrics_names: if metric is not 'score': results['metrics'][metric].append( getattr(sklearn.metrics, metric)(y, predondata[-1])) results['metrics']['val_' + metric].append( getattr(sklearn.metrics, metric)(y_val, predonval[-1])) else: results['metrics']['score'].append( model.score(X, y)) results['metrics']['val_score'].append( model.score(X_val, y_val)) # case C2 : 1 chunk in gen, N chunks in val, one to many elif s_gen == 2: X, y = snext(d.get_epoch_iterator()) model.fit(X, y, *args, **kwargs) predondata.append(model.predict(X)) for metric in metrics_names: if metric is not 'score': results['metrics'][metric].append( getattr(sklearn.metrics, metric)(y, predondata[-1])) else: results['metrics']['score'].append(model.score(X, y)) for batch_val in dv.get_epoch_iterator(): X_val, y_val = batch_val predonval.append(model.predict(X_val)) for metric in metrics_names: if metric is not 'score': results['metrics']['val_' + metric].append( getattr(sklearn.metrics, metric)(y_val, predonval[-1])) else: results['metrics']['val_score'].append( model.score(X_val, y_val)) # case C3 : same numbers of chunks, many to many elif s_gen == 3: for batch_data, batch_val in szip(d.get_epoch_iterator(), dv.get_epoch_iterator()): X, y = batch_data X_val, y_val = batch_val model.fit(X, y, *args, **kwargs) predondata.append(model.predict(X)) predonval.append(model.predict(X_val)) for metric in metrics_names: if metric is not 'score': results['metrics'][metric].append( getattr(sklearn.metrics, metric)(y, predondata[-1])) results['metrics']['val_' + metric].append( getattr(sklearn.metrics, metric)(y_val, predonval[-1])) else: results['metrics']['score'].append( model.score(X, y)) results['metrics']['val_score'].append( model.score(X_val, y_val)) else: # pragma: no cover raise Exception( 'Incoherent generator size for train and validation') # for compatibility with keras backend results['metrics']['iter'] = np.nan return results, model