def fit(model_str, data_s, nb_train, nb_test, offset, nb_epoch=10, batch_size=32, custom_objects=None, callbacks=None, cuts_shutd=True): """A function to train models given datasets,a serialized model, custom objects, callbacks, nb_epochs and batch size Args: model_str(str): the model dumped with the `to_json` method data(str): the path of the dataset to loads nb_train(int): the number of train datapoints to take nb_test(int): the number of test datapoints to take offset(int): how many datapoints to burn weights(numpy.array): an array of weights of the size of the training set. nb_epoch(int, optionnal): the number of epochs to train the model batch_size(int, optionnal): the batch size of the mini batches callbacks(list): a list of callbacks used in the fit Returns: the unique id of the model""" from utils import sliced from databasesetup import get_models from datetime import datetime import hashlib import json import numpy as np if custom_objects == None: custom_objects = [] if callbacks == None: callbacks = [] # convert string to json model_json = model_str # get the models collection models = get_models() # load data data_path = data_s.pop('data_path') elec = data_s.pop('elec') cell = data_s.pop('cell') if data_path != 'test': root = data_s.pop('root') reader = dr.DataReader(data_path, root) reader.get_files_info() data = reader.commit(data_s) else: cuts_shutd = False data = data_s.copy() data['current'] = [] data['bins'] = [] root = None data_s = {key: data_s['data'][key].tolist() for key in data_s['data']} # slice data beg, endt, endv = sliced(data['data'], nb_train, nb_test, offset) data_t = {n: data['data'][n][beg:endt] for n in data['data']} data_val = {n: data['data'][n][endt:endv] for n in data['data']} current_t = data['current'][beg:endt] current_val = data['current'][endt:endv] bins_t = data['bins'][beg:endt] bins_val = data['bins'][endt:endv] # cuts the data by shutdown if cuts_shutd == True: datas = dr.make_datasets(data_t, current_t, bins_t) datas_val = dr.make_datasets(data_val, current_val, bins_val) diff = len(datas) - len(datas_val) if diff > 0: datas_val += [datas_val[-1] for i in range(diff)] else: datas = [data_t] datas_val = [data_val] # TODO: implement cut dataset to match batch sizes if 'statefull' in model_json: pass # get a unique descriptor of the db first = data_t.keys()[0] un_data = data_t[first].mean() # create the hash from the stringified json m = hashlib.md5() m.update(json.dumps(model_str) + str(un_data) + str(batch_size)) hexdi = m.hexdigest() params_dump = "/parameters_h5/" + hexdi + '.h5' # update the full json full_json = {'keras_model': model_json, 'datetime': datetime.now(), 'hashed_mod': hexdi, 'data_id': str(un_data), 'params_dump': params_dump, 'batch_size': batch_size, 'trained': 0, 'cell': cell, 'elec': elec, 'data_path': data_path, 'root': root, 'data_s': data_s} mod_id = models.insert_one(full_json).inserted_id try: loss, val_loss, model = train_model(model_str, custom_objects, datas, datas_val, batch_size, nb_epoch, callbacks) upres = models.update({"_id": mod_id}, {'$set': { 'train_loss': loss, 'min_tloss': np.min(loss), 'valid_loss': val_loss, 'min_vloss': np.min(val_loss), 'iter_stopped': nb_epoch * len(datas), 'trained': 1, 'date_finished_trained': datetime.now() }}) model.save_weights(params_dump, overwrite=True) except MemoryError as e: models.delete_one({'hashed_mod': hexdi}) raise except Exception as e: upres = models.update({"_id": mod_id}, {'$set': {'error': 1}}) raise return hexdi
def hosts_for_domain(self, domain): return map( lambda x: sliced(x, 1), self.hosts_by_domain()[dots(domain)] )
def dot_(string, *args): return sliced(string, *args) or string