def train(path): name = os.path.splitext(os.path.basename(path))[0] print('Processing: ', name) features = pd.read_csv(path, index_col=None) selected_features_names = [name for name, desc in selected_features] features = features[selected_features_names] split_idx = 1200 features = features.drop(['sound.files'], axis=1) noise_only_df, df = features.iloc[:split_idx], features.iloc[split_idx:] y = df.pop('petrel') X = df.values y_noise = noise_only_df.pop('petrel') X_noise = noise_only_df.values X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.25, random_state=42, stratify=y) hyperparams = { 'n_estimators': [100, 300, 500, 1000], 'learning_rate': [0.1], 'gamma': [0.0, 0.5], 'max_depth': [2, 3, 4], 'min_child_weight': [1, 2], 'subsample': [1.0, 0.8], 'reg_alpha': [0.0, 0.1], 'reg_lambda': [1, 2, 3] } # # hyperparams = { # 'n_estimators': [100], # 'learning_rate': [0.1], # 'gamma': [0.0], # 'max_depth': [2], # 'min_child_weight': [1], # 'subsample': [1.0], # 'reg_alpha': [0.0], # 'reg_lambda': [1] # } clf = model_selection.GridSearchCV(estimator=xg.XGBClassifier(objective='binary:logistic', n_jobs=-1), param_grid=hyperparams, cv=4) fit_params = clf.fit(X_train, y_train) estimator = fit_params.best_estimator_ joblib.dump(estimator, name + '_model.pkl') test_pred = estimator.predict(X_test) metrics = calculate_metrics(test_pred, y_test) noise_pred = estimator.predict(X_noise) noise_detection_accuracy = accuracy_score(y_noise, noise_pred) experiment = Experiment(api_key="4PdGdUZmGf6P8QsMa5F2zB4Ui", project_name="storm petrels", workspace="tracewsl") experiment.set_name(name) experiment.log_parameter('name', name) experiment.log_multiple_params(fit_params.best_params_) experiment.log_multiple_metrics(metrics) experiment.log_metric('Noise detection accuracy', noise_detection_accuracy) experiment.log_figure('Confusion matrix', get_confusion_matrix_figure(test_pred, y_test)) experiment.log_figure('Feature importnace', get_feature_importance_figure(estimator, list(df.columns.values)))
class Logger: def __init__(self, sess, config): self.sess = sess self.config = config self.summary_placeholders = {} self.summary_ops = {} self.train_summary_writer = tf.summary.FileWriter(os.path.join(self.config.summary_dir, "train"), self.sess.graph) self.test_summary_writer = tf.summary.FileWriter( os.path.join(self.config.summary_dir, "test")) if "comet_api_key" in config: from comet_ml import Experiment self.experiment = Experiment( api_key=config['comet_api_key'], project_name=config['exp_name']) self.experiment.disable_mp() self.experiment.log_multiple_params(config) # it can summarize scalars and images. def summarize(self, step, summarizer="train", scope="", summaries_dict=None): """ :param step: the step of the summary :param summarizer: use the train summary writer or the test one :param scope: variable scope :param summaries_dict: the dict of the summaries values (tag,value) :return: """ summary_writer = self.train_summary_writer if summarizer == "train" else self.test_summary_writer with tf.variable_scope(scope): if summaries_dict is not None: summary_list = [] for tag, value in summaries_dict.items(): if tag not in self.summary_ops: if len(value.shape) <= 1: self.summary_placeholders[tag] = tf.placeholder( 'float32', value.shape, name=tag) else: self.summary_placeholders[tag] = tf.placeholder('float32', [None] + list(value.shape[1:]), name=tag) if len(value.shape) <= 1: self.summary_ops[tag] = tf.summary.scalar( tag, self.summary_placeholders[tag]) else: self.summary_ops[tag] = tf.summary.image( tag, self.summary_placeholders[tag]) summary_list.append(self.sess.run(self.summary_ops[tag], { self.summary_placeholders[tag]: value})) for summary in summary_list: summary_writer.add_summary(summary, step) if hasattr(self, 'experiment') and self.experiment is not None: self.experiment.log_multiple_metrics( summaries_dict, step=step) summary_writer.flush()
class Logger(object): def __init__(self, dataset_name, model_name): self.model_name = model_name self.project_name = "%s-%s" % (dataset_name, self.model_name) self.logdir = os.path.join(hp.logdir, self.project_name) self.writer = SummaryWriter(log_dir=self.logdir) self.experiment = None # Experiment(api_key="luY5eUQDsBynS168WxJiRPJmJ", project_name=self.project_name, log_code=False) if hp.comet_ml_api_key is not None: self.experiment = Experiment(api_key=hp.comet_ml_api_key, project_name=self.project_name, log_code=False) self.experiment.log_multiple_params( dict((name, getattr(hp, name)) for name in dir(hp) if not name.startswith('__'))) def log_step(self, phase, step, loss_dict, image_dict): if phase == 'train': if step % 50 == 0: if self.experiment is not None: with self.experiment.train(): self.experiment.log_multiple_metrics(loss_dict, step=step) # self.writer.add_scalar('lr', get_lr(), step) # self.writer.add_scalar('%s-step/loss' % phase, loss, step) for key in sorted(loss_dict): self.writer.add_scalar('%s-step/%s' % (phase, key), loss_dict[key], step) if step % 1000 == 0: for key in sorted(image_dict): self.writer.add_image('%s/%s' % (self.model_name, key), image_dict[key], step) def log_epoch(self, phase, step, loss_dict): for key in sorted(loss_dict): self.writer.add_scalar('%s/%s' % (phase, key), loss_dict[key], step) if phase == 'valid': if self.experiment is not None: with self.experiment.validate(): self.experiment.log_multiple_metrics(loss_dict, step=step)
padding='same', activation=params['activation'])) model.add(Dropout(params['dropout'])) model.add(Flatten()) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer=params['optimizer'], metrics=['accuracy']) #print model.summary() to preserve automatically in `Output` tab print(model.summary()) params.update({'total_number_of_parameters': model.count_params()}) #will log metrics with the prefix 'train_' with experiment.train(): model.fit(X_train, y_train, epochs=params['epochs'], batch_size=params['batch_size'], verbose=1, validation_data=(X_test, y_test)) #will log metrics with the prefix 'test_' with experiment.test(): loss, accuracy = model.evaluate(X_test, y_test) metrics = {'loss': loss, 'accuracy': accuracy} experiment.log_multiple_metrics(metrics) experiment.log_multiple_params(params) experiment.log_dataset_hash(X_train) #creates and logs a hash of your data
y_pred = y_prob.copy() y_pred[y_pred >= P_THRESHOLD] = 1 y_pred[y_pred < P_THRESHOLD] = 0 print('train micro: {}'.format(precision_recall_fscore_support(y_train, y_pred, average='micro', sample_weight=None))) print('train macro: {}'.format(precision_recall_fscore_support(y_train, y_pred, average='macro', sample_weight=None))) print('train weightedmacro: {}'.format(precision_recall_fscore_support(y_train, y_pred, average='weighted', sample_weight=None))) train_metrics = { "train_micro":f1_score(y_train, y_pred, average='micro', sample_weight=None), "train_macro":f1_score(y_train, y_pred, average='macro', sample_weight=None), "train_weighted_macro":f1_score(y_train, y_pred, average='weighted', sample_weight=None) } experiment.log_multiple_metrics(train_metrics) # Dev with experiment.validate(): y_prob_dev = model.predict([meta_dev, title_dev, desc_dev, x_dev]) to_file(y_prob_dev, "dev_results", y_train) y_pred_dev = y_prob_dev.copy() y_pred_dev[y_pred_dev >= P_THRESHOLD] = 1 y_pred_dev[y_pred_dev < P_THRESHOLD] = 0 print('dev micro: {}'.format(precision_recall_fscore_support(y_dev, y_pred_dev, average='micro', sample_weight=None))) print('dev macro: {}'.format(precision_recall_fscore_support(y_dev, y_pred_dev, average='macro', sample_weight=None))) print('dev weightedmacro: {}'.format(precision_recall_fscore_support(y_dev, y_pred_dev, average='weighted', sample_weight=None)))
plt.plot(y_pred, y_test) # Visualising the Test set results plt.scatter(X_test, y_test, color = 'red') plt.plot(X_train, regressor.predict(X_train), color = 'blue') plt.title('Salary vs Experience (Test set)') plt.xlabel('Years of Experience') plt.ylabel('Salary') plt.show() mse = mean_squared_error(y_test, y_pred) mae = mean_absolute_error(y_test, y_pred) evs = explained_variance_score(y_test, y_pred) #these will be logged to your sklearn-demos project on Comet.ml params={"random_state":0, "model_type":"simple regression", "scaler":"none", "stratify":True } metrics = {"mse":mse, "mae":mae, "evs":evs } exp.log_dataset_hash(X_train) exp.log_multiple_params(params) exp.log_multiple_metrics(metrics)
class Trainer: def __init__(self): # Simple training script for training a RetinaNet network. # Dataset type, must be one of csv or coco. self.dataset = 'coco' # Path to COCO directory self.coco_path = './data' # Path to file containing training annotations (see readme) self.csv_train = None # Path to file containing class list (see readme) self.csv_classes = None # Path to file containing validation annotations (optional, see readme) self.csv_val = None # Resnet depth, must be one of 18, 34, 50, 101, 152 self.depth = 50 # batch_size self.bs = 8 # learning rate self.lr = 1e-5 # Number of epochs self.epochs = 10 # set device self.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') # set focal loss self.focal_loss = losses.FocalLoss() # module calcurating nms self.nms = NMS(BBoxTransform, ClipBoxes) # index of the saving model self.save_name = 2 # use comet_ml self.cml = True # classification_loss self.cls_loss_meter = AverageMeter() # regression_loss self.rgrs_loss_meter = AverageMeter() self.set_comet_ml() def set_comet_ml(self): params = { 'epochs': self.epochs, 'batch_size': self.bs, 'lr': self.lr, 'resnet_depth': self.depth, 'save_name': self.save_name, } if self.cml: self.experiment = Experiment(api_key="xK18bJy5xiPuPf9Dptr43ZuMk", project_name="retinanet-coco", workspace="tanimutomo") else: self.experiment = None if self.cml: self.experiment.log_multiple_params(params) def set_dataset(self): # Create the data loaders if self.dataset == 'coco': if self.coco_path is None: raise ValueError( 'Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(self.coco_path, set_name='train2017', transform=transforms.Compose([ Normalizer(), Augmenter(), Resizer() ])) dataset_val = CocoDataset(self.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) elif self.dataset == 'csv': if self.csv_train is None: raise ValueError( 'Must provide --csv_train when training on COCO,') if self.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=self.csv_train, class_list=self.csv_classes, transform=transforms.Compose([ Normalizer(), Augmenter(), Resizer() ])) if self.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=self.csv_val, class_list=self.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') return dataset_train, dataset_val def set_models(self, dataset_train): # Create the model if self.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif self.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif self.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif self.depth == 101: retinanet = model.resnet101( num_classes=dataset_train.num_classes(), pretrained=True) elif self.depth == 152: retinanet = model.resnet152( num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs retinanet = nn.DataParallel(retinanet) self.retinanet = retinanet.to(self.device) self.retinanet.training = True self.optimizer = optim.Adam(self.retinanet.parameters(), lr=self.lr) # This lr_shceduler reduce the learning rate based on the models's validation loss self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, patience=3, verbose=True) self.loss_hist = collections.deque(maxlen=500) # self.retinanet.train() # self.retinanet.freeze_bn() def iterate(self): dataset_train, dataset_val = self.set_dataset() sampler = AspectRatioBasedSampler(dataset_train, batch_size=self.bs, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=0, collate_fn=collater, batch_sampler=sampler) # if dataset_val is not None: # sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) # dataloader_val = DataLoader(dataset_val, num_workers=0, collate_fn=collater, batch_sampler=sampler_val) print('Num training images: {}'.format(len(dataset_train))) self.set_models(dataset_train) for epoch_num in range(self.epochs): epoch_loss = [] metrics = { 'classification_loss': self.cls_loss_meter.avg, 'regression_loss': self.rgrs_loss_meter.avg, 'entire_loss': self.cls_loss_meter.avg + self.rgrs_loss_meter.avg } if self.experiment is not None: self.experiment.log_multiple_metrics(metrics, step=epoch_num) self.retinanet.train() self.retinanet.module.freeze_bn() epoch_loss = self.train(epoch_num, epoch_loss, dataloader_train) self.retinanet.eval() self.evaluate(epoch_num, dataset_val) torch.save( self.retinanet.state_dict(), os.path.join( './saved_models', 'model{}_final_{}.pth'.format(self.save_name, epoch_num))) # torch.save(self.retinanet.module, '{}_self.retinanet_{}.pt'.format(self.dataset, epoch_num)) # self.retinanet.load_state_dict(torch.load("./saved_models/model_final_0.pth")) self.scheduler.step(np.mean(epoch_loss)) self.retinanet.eval() def train(self, epoch_num, epoch_loss, dataloader_train): for iter_num, data in enumerate(dataloader_train): try: self.optimizer.zero_grad() input = data['img'].to(self.device).float() annot = data['annot'].to(self.device) regression, classification, anchors = self.retinanet(input) classification_loss, regression_loss = self.focal_loss.calcurate( classification, regression, anchors, annot) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() self.cls_loss_meter.update(classification_loss) self.rgrs_loss_meter.update(regression_loss) loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(self.retinanet.parameters(), 0.1) self.optimizer.step() self.loss_hist.append(float(loss.item())) epoch_loss.append(float(loss.item())) torch.nn.utils.clip_grad_norm_(self.retinanet.parameters(), 0.1) print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(self.loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue # if iter_num == 10: # break return epoch_loss def evaluate(self, epoch_num, dataset_val): if self.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, self.retinanet, self.nms, self.device) elif self.dataset == 'csv' and self.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, self.retinanet)
def train(args): experiment = Experiment( api_key=API_KEY, project_name="fasttext") params = { "batch_size": args.batch_size, "epochs": args.epochs, "learning_rate": args.learning_rate, "embedding_dimension": args.embedding_dimension } experiment.log_multiple_params(params) model_path = os.path.join(str(args.output), "model") if not os.path.isdir(model_path): os.makedirs(model_path) filepath = model_path + \ "/weights-{epoch:02d}-{val_loss:.3f}-" + \ args.id + ".hdf5" checkpoint = ModelCheckpoint( filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min', period=5 ) logdir = args.logdir if not os.path.isdir(logdir): os.makedirs(logdir) tensorboard = TensorBoard( log_dir=logdir, histogram_freq=0, write_grads=True, write_graph=False, write_images=False ) n_entities = args.n_entities n_relationships = args.n_relationships model = build_model( n_entities=n_entities, n_relationships=n_relationships, embedding_dimension=args.embedding_dimension ) optimizer = optimizers.Adam(lr=args.learning_rate, decay=0.0) model.compile( loss="binary_crossentropy", optimizer=optimizer, metrics=['accuracy'] ) data = load_data(n_entities=n_entities, n_relationships=n_relationships) model.fit( data["train"][0], data["train"][1], verbose=1, epochs=args.epochs, batch_size=args.batch_size, shuffle=True, validation_data=data["validation"], callbacks=[checkpoint, tensorboard] ) evaluation = model.evaluate( data["test"][0], data["test"][1], verbose=0) predictions = model.predict(data["test"][0]) auc_score = roc_auc_score( data["test"][1], predictions, average='samples') auc_score_micro = roc_auc_score( data["test"][1], predictions, average='micro') metrics = { "evaluation_loss": evaluation[0], "evaluation_accuracy": evaluation[1], "auc_score": auc_score, "auc_score_micro": auc_score_micro } experiment.log_multiple_metrics(metrics)
def main(_): experiment = Experiment(api_key="xXtJguCo8yFdU7dpjEpo6YbHw", project_name=args.experiment_name) hyper_params = { "learning_rate": args.lr, "num_epochs": args.max_epoch, "batch_size": args.single_batch_size, "alpha": args.alpha, "beta": args.beta, "gamma": args.gamma, "loss": args.loss } experiment.log_multiple_params(hyper_params) # TODO: split file support with tf.Graph().as_default(): global save_model_dir start_epoch = 0 global_counter = 0 gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=cfg.GPU_MEMORY_FRACTION, visible_device_list=cfg.GPU_AVAILABLE, allow_growth=True) config = tf.ConfigProto( gpu_options=gpu_options, device_count={ "GPU": cfg.GPU_USE_COUNT, }, allow_soft_placement=True, log_device_placement=False, ) with tf.Session(config=config) as sess: # sess=tf_debug.LocalCLIDebugWrapperSession(sess,ui_type='readline') model = RPN3D(cls=cfg.DETECT_OBJ, single_batch_size=args.single_batch_size, learning_rate=args.lr, max_gradient_norm=5.0, alpha=args.alpha, beta=args.beta, gamma=args.gamma, loss_type=args.loss, avail_gpus=cfg.GPU_AVAILABLE.split(',')) # param init/restore if tf.train.get_checkpoint_state(save_model_dir): print("Reading model parameters from %s" % save_model_dir) model.saver.restore(sess, tf.train.latest_checkpoint(save_model_dir)) start_epoch = model.epoch.eval() + 1 global_counter = model.global_step.eval() + 1 else: print("Created model with fresh parameters.") tf.global_variables_initializer().run() # train and validate is_summary, is_summary_image, is_validate = False, False, False summary_interval = 5 summary_val_interval = 10 summary_writer = tf.summary.FileWriter(log_dir, sess.graph) experiment.set_model_graph(sess.graph) # training with experiment.train(): for epoch in range(start_epoch, args.max_epoch): counter = 0 batch_time = time.time() experiment.log_current_epoch(epoch) for batch in iterate_data( train_dir, shuffle=True, aug=True, is_testset=False, batch_size=args.single_batch_size * cfg.GPU_USE_COUNT, multi_gpu_sum=cfg.GPU_USE_COUNT): counter += 1 global_counter += 1 experiment.set_step(global_counter) if counter % summary_interval == 0: is_summary = True else: is_summary = False epochs = args.max_epoch start_time = time.time() ret = model.train_step(sess, batch, train=True, summary=is_summary) forward_time = time.time() - start_time batch_time = time.time() - batch_time param = ret params = { "loss": param[0], "cls_loss": param[1], "cls_pos_loss": param[2], "cls_neg_loss": param[3] } experiment.log_multiple_metrics(params) # print(ret) print( 'train: {} @ epoch:{}/{} loss: {:.4f} cls_loss: {:.4f} cls_pos_loss: {:.4f} cls_neg_loss: {:.4f} forward time: {:.4f} batch time: {:.4f}' .format(counter, epoch, epochs, ret[0], ret[1], ret[2], ret[3], forward_time, batch_time)) # with open('log/train.txt', 'a') as f: # f.write( 'train: {} @ epoch:{}/{} loss: {:.4f} cls_loss: {:.4f} cls_pos_loss: {:.4f} cls_neg_loss: {:.4f} forward time: {:.4f} batch time: {:.4f}'.format(counter,epoch, epochs, ret[0], ret[1], ret[2], ret[3], forward_time, batch_time)) #print(counter, summary_interval, counter % summary_interval) if counter % summary_interval == 0: print("summary_interval now") summary_writer.add_summary(ret[-1], global_counter) #print(counter, summary_val_interval, counter % summary_val_interval) if counter % summary_val_interval == 0: print("summary_val_interval now") batch = sample_test_data( val_dir, args.single_batch_size * cfg.GPU_USE_COUNT, multi_gpu_sum=cfg.GPU_USE_COUNT) ret = model.validate_step(sess, batch, summary=True) summary_writer.add_summary(ret[-1], global_counter) try: ret = model.predict_step(sess, batch, summary=True) summary_writer.add_summary( ret[-1], global_counter) except: print("prediction skipped due to error") if check_if_should_pause(args.tag): model.saver.save(sess, os.path.join( save_model_dir, 'checkpoint'), global_step=model.global_step) print('pause and save model @ {} steps:{}'.format( save_model_dir, model.global_step.eval())) sys.exit(0) batch_time = time.time() experiment.log_epoch_end(epoch) sess.run(model.epoch_add_op) model.saver.save(sess, os.path.join(save_model_dir, 'checkpoint'), global_step=model.global_step) # dump test data every 10 epochs if (epoch + 1) % 10 == 0: # create output folder os.makedirs(os.path.join(args.output_path, str(epoch)), exist_ok=True) os.makedirs(os.path.join(args.output_path, str(epoch), 'data'), exist_ok=True) if args.vis: os.makedirs(os.path.join(args.output_path, str(epoch), 'vis'), exist_ok=True) for batch in iterate_data( val_dir, shuffle=False, aug=False, is_testset=False, batch_size=args.single_batch_size * cfg.GPU_USE_COUNT, multi_gpu_sum=cfg.GPU_USE_COUNT): if args.vis: tags, results, front_images, bird_views, heatmaps = model.predict_step( sess, batch, summary=False, vis=True) else: tags, results = model.predict_step( sess, batch, summary=False, vis=False) for tag, result in zip(tags, results): of_path = os.path.join(args.output_path, str(epoch), 'data', tag + '.txt') with open(of_path, 'w+') as f: labels = box3d_to_label( [result[:, 1:8]], [result[:, 0]], [result[:, -1]], coordinate='lidar')[0] for line in labels: f.write(line) print('write out {} objects to {}'.format( len(labels), tag)) # dump visualizations if args.vis: for tag, front_image, bird_view, heatmap in zip( tags, front_images, bird_views, heatmaps): front_img_path = os.path.join( args.output_path, str(epoch), 'vis', tag + '_front.jpg') bird_view_path = os.path.join( args.output_path, str(epoch), 'vis', tag + '_bv.jpg') heatmap_path = os.path.join( args.output_path, str(epoch), 'vis', tag + '_heatmap.jpg') cv2.imwrite(front_img_path, front_image) cv2.imwrite(bird_view_path, bird_view) cv2.imwrite(heatmap_path, heatmap) # execute evaluation code cmd_1 = "./kitti_eval/launch_test.sh" cmd_2 = os.path.join(args.output_path, str(epoch)) cmd_3 = os.path.join(args.output_path, str(epoch), 'log') os.system(" ".join([cmd_1, cmd_2, cmd_3])) print('train done. total epoch:{} iter:{}'.format( epoch, model.global_step.eval())) # finallly save model model.saver.save(sess, os.path.join(save_model_dir, 'checkpoint'), global_step=model.global_step)
class DefinedSummarizer: def __init__(self, sess, summary_dir, scalar_tags=None, images_tags=None): """ :param sess: The Graph tensorflow session used in your graph. :param summary_dir: the directory which will save the summaries of the graph :param scalar_tags: The tags of summaries you will use in your training loop :param images_tags: The tags of image summaries you will use in your training loop """ self.sess = sess self.scalar_tags = scalar_tags self.images_tags = images_tags self.summary_tags = [] self.summary_placeholders = {} self.summary_ops = {} self.init_summary_ops() self.summary_writer = tf.summary.FileWriter(summary_dir) if "comet_api_key" in config: from comet_ml import Experiment self.experiment = Experiment(api_key=config['comet_api_key'], project_name=config['exp_name']) self.experiment.log_multiple_params(config) def set_summaries(self, scalar_tags=None, images_tags=None): self.scalar_tags = scalar_tags self.images_tags = images_tags self.init_summary_ops() def init_summary_ops(self): with tf.variable_scope('summary_ops'): if self.scalar_tags is not None: for tag in self.scalar_tags: self.summary_tags += [tag] self.summary_placeholders[tag] = tf.placeholder('float32', None, name=tag) self.summary_ops[tag] = tf.summary.scalar( tag, self.summary_placeholders[tag]) if self.images_tags is not None: for tag, shape in self.images_tags: self.summary_tags += [tag] self.summary_placeholders[tag] = tf.placeholder('float32', shape, name=tag) self.summary_ops[tag] = tf.summary.image( tag, self.summary_placeholders[tag], max_outputs=10) def summarize(self, step, summaries_dict=None, summaries_merged=None): """ Add the summaries to tensorboard :param step: the number of iteration in your training :param summaries_dict: the dictionary which contains your summaries . :param summaries_merged: Merged summaries which they come from your graph :return: """ if summaries_dict is not None: summary_list = self.sess.run( [self.summary_ops[tag] for tag in summaries_dict.keys()], { self.summary_placeholders[tag]: value for tag, value in summaries_dict.items() }) for summary in summary_list: self.summary_writer.add_summary(summary, step) if summaries_merged is not None: self.summary_writer.add_summary(summaries_merged, step) if hasattr(self, 'experiment') and self.experiment is not None: self.experiment.log_multiple_metrics(summaries_dict, step=step) def finalize(self): self.summary_writer.flush()
while total_timesteps < args.max_timesteps: if done: if total_timesteps != 0: betas = np.array(betas) mean_beta, var_beta = betas.mean(), betas.var() print("Total T: ", total_timesteps, " Episode Num: ", episode_num, " Episode T: ", episode_timesteps, " Reward: ", episode_reward, "beta mean: ", mean_beta, "beta var: ", var_beta) if args.log: experiment.log_multiple_metrics( { "Episode reward": episode_reward, 'Episode Beta Mean': mean_beta, 'Episode Beta Var': var_beta }, step=total_timesteps) if args.policy_name == "TD3": policy.train(replay_buffer, episode_timesteps, args.batch_size, args.discount, args.tau, args.policy_noise, args.noise_clip, args.policy_freq) else: policy.train(replay_buffer, episode_timesteps, args.batch_size, args.discount, args.tau, args.n_backprop) # Evaluate episode if timesteps_since_eval >= args.eval_freq: