class Project(object): @staticmethod def create(path): log.info("initializing project %s ...", path) os.makedirs(path, exist_ok=True) for filename, data in Templates.items(): log.info("creating %s", filename) with open(os.path.join(path, filename), 'wt') as fp: fp.write(data.strip()) @staticmethod def clean(path, full): Dataset.clean(path) if full: # clean everything clean_if_exist(path, ( \ '__pycache__', 'logs', 'model.yml', 'model.png', 'model.h5', 'model.fdeep', 'model.stats', # legacy 'stats.txt', 'stats.json', 'history.json')) def __init__(self, path): # base info self.path = os.path.abspath(path) self.logic = Logic(self.path) # model related data self.model = None self.accu = None self.model_path = os.path.join(self.path, 'model.yml') self.model_img_path = os.path.join(self.path, 'model.png') self.weights_path = os.path.join(self.path, 'model.h5') self.fdeep_path = os.path.join(self.path, 'model.fdeep') # training related data self.dataset = Dataset(self.path) self.txt_stats_path = os.path.join(self.path, 'stats.txt') self.json_stats_path = os.path.join(self.path, 'stats.json') self.history_path = os.path.join(self.path, 'history.json') self.history = None self.what = { 'train': "Training --------------------------------------------\n", 'val': "Validation ------------------------------------------\n", 'test': "Test ------------------------------------------------\n" } def exists(self): return os.path.exists(self.path) def is_trained(self): return os.path.exists(self.weights_path) def load(self): log.info("loading project %s ..." % self.path) if not self.exists(): return "%s does not exist" % self.path err = self.logic.load() if err is not None: return err if os.path.exists(self.weights_path): log.debug("loading model from %s ...", self.weights_path) self.model = load_model(self.weights_path) # https://github.com/keras-team/keras/issues/6462 self.model._make_predict_function() elif os.path.exists(self.model_path): log.debug("loading model from %s ...", self.model_path) with open(self.model_path, 'r') as fp: self.model = model_from_yaml(fp.read()) else: self.model = self.logic.builder(True) if os.path.exists(self.history_path): log.debug("loading history from %s ...", self.history_path) with open(self.history_path, 'r') as fp: self.history = json.loads(fp.read()) return None def accuracy_for(self, X, Y, repo_as_dict=False): Y_tpred = np.argmax(self.model.predict(X), axis=1) repo = classification_report(np.argmax(Y, axis=1), Y_tpred, output_dict=repo_as_dict) cm = confusion_matrix(np.argmax(Y, axis=1), Y_tpred) return repo, cm def accuracy(self): train, tr_cm = self.accuracy_for(self.dataset.X_train, self.dataset.Y_train) test, ts_cm = self.accuracy_for(self.dataset.X_test, self.dataset.Y_test) val, val_cm = self.accuracy_for(self.dataset.X_val, self.dataset.Y_val) return { 'train': (train, tr_cm), 'test': (test, ts_cm), 'val': (val, val_cm) } def _save_model(self): log.info("updating %s ...", self.model_path) with open(self.model_path, 'w') as fp: fp.write(self.model.to_yaml()) log.info("updating %s ...", self.weights_path) self.model.save(self.weights_path) def _save_history(self): log.info("updating %s ...", self.history_path) with open(self.history_path, 'w') as fp: json.dump(self.history, fp) def _emit_txt_stats(self, where): for who, header in self.what.items(): vals = self.accu[who] where.write(header) where.write(vals[0]) where.write("\n\n") where.write("confusion matrix:") where.write("\n\n") where.write("%s\n" % vals[1]) where.write("\n") def _emit_json_stats(self, where): stats = {} for who in self.what: report, cm = self.accu[who] stats[who] = { 'accuracy': serialize_classification_report(report), 'cm': serialize_cm(cm) } json.dump(stats, where) def _save_stats(self): log.info("updating %s ...", self.txt_stats_path) with open(self.txt_stats_path, 'w') as fp: self._emit_txt_stats(fp) log.info("updating %s ...", self.json_stats_path) with open(self.json_stats_path, 'wt') as fp: self._emit_json_stats(fp) def _from_file(self, filename): log.info("preparing data from %s ...", filename) return self.logic.prepare_dataset(filename) def _from_sum(self, source): m = re.findall(r"^sum:\/\/([^@]+)@([^:]+:\d+)$", source) if m is None: raise Exception( "no valid source provided, format is: 'sum:///etc/sumd/creds/cert.pem@localhost:50051'" ) conn = m[0][1] cert = m[0][0] log.info("connecting to sumd instance %s using certificate %s ...", conn, cert) size = 100 * 1024 * 1024 opts = [('grpc.max_send_message_length', size), ('grpc.max_receive_message_length', size)] cli = sumpy.Client(conn, cert, opts=opts) per_page = 4096 count = cli.list_records(0, per_page) left = count.total page = 1 data = [] log.info("fetching %d records (%d pages)...", count.total, count.pages) while left > 0: log.debug(" page %d/%d (left %d records)", page, count.pages, left) resp = cli.list_records(page, per_page) page += 1 for r in resp.records: data.append(r.data) left -= 1 return pd.DataFrame(data) def prepare(self, source, p_test, p_val, shuffle=True): if source.startswith('sum://'): data = self._from_sum(source) else: data = self._from_file(source) log.info("data shape: %s", data.shape) return self.dataset.source(data, p_test, p_val, shuffle) def train(self, gpus): # async datasets saver might be running, wait before training self.dataset.saver.wait() # train if self.model is None: self.model = self.logic.builder(True) to_train = self.model if gpus > 1: log.info("training with %d GPUs", gpus) to_train = multi_gpu_model(self.model, gpus=gpus) past = self.history.copy() if self.history is not None else None present = self.logic.trainer(to_train, self.dataset).history if past is None: self.history = present else: self.history = {} for name, past_values in past.items(): self.history[name] = past_values + present[name] self.accu = self.accuracy() print("") self._emit_txt_stats(sys.stdout) # save model structure and weights self._save_model() # save training history self._save_history() # save model accuracy statistics self._save_stats() def _view_model(self): import matplotlib.image as mpimg import matplotlib.pyplot as plt if self.model is not None: self.model.summary() log.info("saving model to %s ...", self.model_img_path) plot_model(self.model, to_file=self.model_img_path, show_shapes=True, show_layer_names=True) img = mpimg.imread(self.model_img_path) plt.figure("model structure") plt.imshow(img) def _view_stats(self): import matplotlib.pyplot as plt if os.path.exists(self.txt_stats_path): with open(self.txt_stats_path, 'rt') as fp: print(fp.read().strip()) if os.path.exists(self.json_stats_path): with open(self.json_stats_path, 'rt') as fp: stats = json.load(fp) for who, header in self.what.items(): orig = np.array(stats[who]['cm']) cm = np.array(stats[who]['cm']) tot = cm.sum() cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] title = "%s confusion matrix (%d samples)" % ( header.strip(" -\n").lower(), tot) plt.figure(title) plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Reds) plt.title(title) plt.colorbar() classes = range(0, cm.shape[0]) tick_marks = np.arange(len(classes)) plt.xticks(tick_marks, classes, rotation=45) plt.yticks(tick_marks, classes) thresh = cm.max() / 2. for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): plt.text( j, i, "%.1f%% (%d)" % (cm[i, j] * 100, orig[i, j]), horizontalalignment="center", color="white" if cm[i, j] > thresh else "black") plt.tight_layout() plt.ylabel('truth') plt.xlabel('prediction') def _view_history(self): import matplotlib.pyplot as plt if self.history is not None: plt.figure("training history") # Plot training & validation accuracy values plt.subplot(2, 1, 1) plt.plot(self.history['acc']) plt.plot(self.history['val_acc']) plt.title('Model accuracy') plt.ylabel('Accuracy') plt.xlabel('Epoch') plt.legend(['Train', 'Test'], loc='lower right') # Plot training & validation loss values plt.subplot(2, 1, 2) plt.plot(self.history['loss']) plt.plot(self.history['val_loss']) plt.title('Model loss') plt.ylabel('Loss') plt.xlabel('Epoch') plt.legend(['Train', 'Test'], loc='upper right') plt.tight_layout() def view(self): import matplotlib.pyplot as plt self._view_model() self._view_stats() self._view_history() plt.show()
class Project(object): def __init__(self, path): # base info self.path = os.path.abspath(path) self.logic = Logic(self.path) # model related data self.model = None self.accu = None self.model_path = os.path.join(self.path, 'model.yml') self.weights_path = os.path.join(self.path, 'model.h5') self.fdeep_path = os.path.join(self.path, 'model.fdeep') # training related data self.dataset = Dataset(self.path) self.txt_stats_path = os.path.join(self.path, 'stats.txt') self.json_stats_path = os.path.join(self.path, 'stats.json') self.history_path = os.path.join(self.path, 'history.json') self.classes_path = os.path.join(self.path, 'classes.json') self.history = None self.classes = None self.what = { 'train': "Training --------------------------------------------\n", 'val': "Validation ------------------------------------------\n", 'test': "Test ------------------------------------------------\n" } def exists(self): return os.path.exists(self.path) def is_trained(self): return os.path.exists(self.weights_path) def load(self): log.info("loading project %s ..." % self.path) if not self.exists(): return "%s does not exist" % self.path err = self.logic.load() if err is not None: return err if os.path.exists(self.weights_path): log.debug("loading model from %s ...", self.weights_path) self.model = load_model(self.weights_path) # https://github.com/keras-team/keras/issues/6462 self.model._make_predict_function() elif os.path.exists(self.model_path): log.debug("loading model from %s ...", self.model_path) with open(self.model_path, 'r') as fp: self.model = model_from_yaml(fp.read()) else: self.model = self.logic.builder(True) if os.path.exists(self.history_path): log.debug("loading history from %s ...", self.history_path) with open(self.history_path, 'r') as fp: self.history = json.load(fp) if os.path.exists(self.classes_path): log.debug("loading classes from %s ...", self.classes_path) with open(self.classes_path, 'r') as fp: self.classes = {int(k): v for k, v in json.load(fp).items()} return None def accuracy_for(self, X, Y, repo_as_dict=False): Y_tpred = np.argmax(self.model.predict(X), axis=1) repo = classification_report(np.argmax(Y, axis=1), Y_tpred, output_dict=repo_as_dict) cm = confusion_matrix(np.argmax(Y, axis=1), Y_tpred) return repo, cm def accuracy(self): train, tr_cm = self.accuracy_for(self.dataset.X_train, self.dataset.Y_train) test, ts_cm = self.accuracy_for(self.dataset.X_test, self.dataset.Y_test) val, val_cm = self.accuracy_for(self.dataset.X_val, self.dataset.Y_val) return { 'train': (train, tr_cm), 'test': (test, ts_cm), 'val': (val, val_cm) } def reload_model(self): K.clear_session() if os.path.exists(self.weights_path): self.model = load_model(self.weights_path) # https://github.com/keras-team/keras/issues/6462 self.model._make_predict_function() elif os.path.exists(self.model_path): with open(self.model_path, 'r') as fp: self.model = model_from_yaml(fp.read()) else: self.model = self.logic.builder(True) gc.collect() def _save_model(self): log.info("updating %s ...", self.model_path) with open(self.model_path, 'w') as fp: fp.write(self.model.to_yaml()) log.info("updating %s ...", self.weights_path) self.model.save(self.weights_path) def _save_history(self): log.info("updating %s ...", self.history_path) with open(self.history_path, 'w') as fp: json.dump(self.history, fp) def _emit_txt_stats(self, where): for who, header in self.what.items(): vals = self.accu[who] where.write(header) where.write(vals[0]) where.write("\n\n") where.write("confusion matrix:") where.write("\n\n") where.write("%s\n" % vals[1]) where.write("\n") def _emit_json_stats(self, where): stats = {} for who in self.what: report, cm = self.accu[who] stats[who] = { 'accuracy': serialize_classification_report(report), 'cm': serialize_cm(cm) } json.dump(stats, where) def _save_stats(self): log.info("updating %s ...", self.txt_stats_path) with open(self.txt_stats_path, 'w') as fp: self._emit_txt_stats(fp) log.info("updating %s ...", self.json_stats_path) with open(self.json_stats_path, 'wt') as fp: self._emit_json_stats(fp) def _from_file(self, filename): log.info("preparing data from %s ...", filename) return self.logic.prepare_dataset(filename) def prepare(self, source, p_test, p_val, shuffle=True): data = self._from_file(source) num_labels = None if self.model is not None: # assuming only one single dense output layer num_labels = self.model.outputs[-1].shape[1] log.info("data shape: %s", data.shape) return self.dataset.source(data, p_test, p_val, shuffle, num_labels) def train(self, gpus): # async datasets saver might be running, wait before training self.dataset.saver.wait() # train if self.model is None: self.model = self.logic.builder(True) to_train = self.model if gpus > 1: log.info("training with %d GPUs", gpus) to_train = multi_gpu_model(self.model, gpus=gpus) past = self.history.copy() if self.history is not None else None present = self.logic.trainer(to_train, self.dataset).history if past is None: self.history = present else: self.history = {} for name, past_values in past.items(): self.history[name] = past_values + present[name] self.accu = self.accuracy() print("") self._emit_txt_stats(sys.stdout) # save model structure and weights self._save_model() # save training history self._save_history() # save model accuracy statistics self._save_stats() def view(self, img_only=False): import ergo.views as views views.model(self, img_only) views.roc(self, img_only) views.stats(self, img_only) views.history(self, img_only) views.show(img_only)
class Project(object): @staticmethod def create(path): log.info("initializing project %s ...", path) for filename, data in Templates.items(): log.info("creating %s", filename) with open(os.path.join(path, filename), 'wt') as fp: fp.write(data.strip()) @staticmethod def clean(path, full): Dataset.clean(path) if full: # clean everything clean_if_exist(path, ( \ '__pycache__', 'logs', 'model.yml', 'model.png', 'model.h5', 'model.fdeep', 'model.stats', 'history.json')) def __init__(self, path): # base info self.path = os.path.abspath(path) self.logic = Logic(self.path) # model related data self.model = None self.accu = None self.model_path = os.path.join(self.path, 'model.yml') self.model_img_path = os.path.join(self.path, 'model.png') self.weights_path = os.path.join(self.path, 'model.h5') self.fdeep_path = os.path.join(self.path, 'model.fdeep') # training related data self.dataset = Dataset(self.path) self.stats_path = os.path.join(self.path, 'model.stats') self.history_path = os.path.join(self.path, 'history.json') self.history = None self.what = { 'train': "Training --------------------------------------------\n", 'val': "Validation ------------------------------------------\n", 'test': "Test ------------------------------------------------\n" } def exists(self): return os.path.exists(self.path) def is_trained(self): return os.path.exists(self.weights_path) def load(self): log.info("loading project %s ..." % self.path) if not self.exists(): return "%s does not exist" % self.path err = self.logic.load() if err is not None: return err if os.path.exists(self.weights_path): log.debug("loading model from %s ...", self.weights_path) self.model = load_model(self.weights_path) # https://github.com/keras-team/keras/issues/6462 self.model._make_predict_function() elif os.path.exists(self.model_path): log.debug("loading model from %s ...", self.model_path) with open(self.model_path, 'r') as fp: self.model = model_from_yaml(fp.read()) else: self.model = self.logic.builder(True) if os.path.exists(self.history_path): log.debug("loading history from %s ...", self.history_path) with open(self.history_path, 'r') as fp: self.history = json.loads(fp.read()) return None def accuracy_for(self, X, Y, repo_as_dict=False): Y_tpred = np.argmax(self.model.predict(X), axis=1) repo = classification_report(np.argmax(Y, axis=1), Y_tpred, output_dict=repo_as_dict) cm = confusion_matrix(np.argmax(Y, axis=1), Y_tpred) return repo, cm def accuracy(self): train, tr_cm = self.accuracy_for(self.dataset.X_train, self.dataset.Y_train) test, ts_cm = self.accuracy_for(self.dataset.X_test, self.dataset.Y_test) val, val_cm = self.accuracy_for(self.dataset.X_val, self.dataset.Y_val) return { 'train': (train, tr_cm), 'test': (test, ts_cm), 'val': (val, val_cm) } def _save_model(self): log.info("updating %s ...", self.model_path) with open(self.model_path, 'w') as fp: fp.write(self.model.to_yaml()) log.info("updating %s ...", self.weights_path) self.model.save(self.weights_path) def _save_history(self): log.info("updating %s ...", self.history_path) with open(self.history_path, 'w') as fp: json.dump(self.history, fp) def _out_stats(self, where): for who, header in self.what.items(): vals = self.accu[who] where.write(header) where.write(vals[0]) where.write("\n\n") where.write("confusion matrix:") where.write("\n\n") where.write("%s\n" % vals[1]) where.write("\n") def _save_stats(self): log.info("updating %s ...", self.stats_path) with open(self.stats_path, 'w') as fp: self._out_stats(fp) def prepare(self, filename, p_test, p_val): log.info("preparing data from %s ...", filename) data = self.logic.prepare_dataset(filename) return self.dataset.source(data, p_test, p_val) def train(self, gpus): # async datasets saver might be running, wait before training self.dataset.saver.wait() # train if self.model is None: self.model = self.logic.builder(True) to_train = self.model if gpus > 1: log.info("training with %d GPUs", gpus) to_train = multi_gpu_model(self.model, gpus=gpus) self.history = self.logic.trainer(to_train, self.dataset).history self.accu = self.accuracy() print("") self._out_stats(sys.stdout) # save model structure and weights self._save_model() # save training history self._save_history() # save model accuracy statistics self._save_stats() def view(self): import matplotlib.pyplot as plt import matplotlib.image as mpimg if self.model is not None: self.model.summary() log.info("saving model to %s ...", self.model_img_path) plot_model(self.model, to_file=self.model_img_path, show_shapes=True, show_layer_names=True) img = mpimg.imread(self.model_img_path) plt.figure() plt.imshow(img) if self.history is not None: plt.figure() # Plot training & validation accuracy values plt.subplot(2, 1, 1) plt.plot(self.history['acc']) plt.plot(self.history['val_acc']) plt.title('Model accuracy') plt.ylabel('Accuracy') plt.xlabel('Epoch') plt.legend(['Train', 'Test'], loc='lower right') # Plot training & validation loss values plt.subplot(2, 1, 2) plt.plot(self.history['loss']) plt.plot(self.history['val_loss']) plt.title('Model loss') plt.ylabel('Loss') plt.xlabel('Epoch') plt.legend(['Train', 'Test'], loc='upper right') plt.tight_layout() plt.show()