Ejemplo n.º 1
0
def train(path):
    name = os.path.splitext(os.path.basename(path))[0]
    print('Processing: ', name)
    features = pd.read_csv(path, index_col=None)
    selected_features_names = [name for name, desc in selected_features]
    features = features[selected_features_names]
    split_idx = 1200
    features = features.drop(['sound.files'], axis=1)
    noise_only_df, df = features.iloc[:split_idx], features.iloc[split_idx:]
    y = df.pop('petrel')
    X = df.values
    y_noise = noise_only_df.pop('petrel')
    X_noise = noise_only_df.values
    X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)
    hyperparams = {
        'n_estimators': [100, 300, 500, 1000],
        'learning_rate': [0.1],
        'gamma': [0.0, 0.5],
        'max_depth': [2, 3, 4],
        'min_child_weight': [1, 2],
        'subsample': [1.0, 0.8],
        'reg_alpha': [0.0, 0.1],
        'reg_lambda': [1, 2, 3]
    }
    #
    # hyperparams = {
    #     'n_estimators': [100],
    #     'learning_rate': [0.1],
    #     'gamma': [0.0],
    #     'max_depth': [2],
    #     'min_child_weight': [1],
    #     'subsample': [1.0],
    #     'reg_alpha': [0.0],
    #     'reg_lambda': [1]
    # }

    clf = model_selection.GridSearchCV(estimator=xg.XGBClassifier(objective='binary:logistic', n_jobs=-1),
                                       param_grid=hyperparams,
                                       cv=4)
    fit_params = clf.fit(X_train, y_train)
    estimator = fit_params.best_estimator_
    joblib.dump(estimator, name + '_model.pkl')

    test_pred = estimator.predict(X_test)
    metrics = calculate_metrics(test_pred, y_test)

    noise_pred = estimator.predict(X_noise)
    noise_detection_accuracy = accuracy_score(y_noise, noise_pred)

    experiment = Experiment(api_key="4PdGdUZmGf6P8QsMa5F2zB4Ui",
                            project_name="storm petrels",
                            workspace="tracewsl")
    experiment.set_name(name)
    experiment.log_parameter('name', name)
    experiment.log_multiple_params(fit_params.best_params_)
    experiment.log_multiple_metrics(metrics)
    experiment.log_metric('Noise detection accuracy', noise_detection_accuracy)
    experiment.log_figure('Confusion matrix', get_confusion_matrix_figure(test_pred, y_test))
    experiment.log_figure('Feature importnace', get_feature_importance_figure(estimator, list(df.columns.values)))
Ejemplo n.º 2
0
class Logger:
    def __init__(self, sess, config):
        self.sess = sess
        self.config = config
        self.summary_placeholders = {}
        self.summary_ops = {}
        self.train_summary_writer = tf.summary.FileWriter(os.path.join(self.config.summary_dir, "train"),
                                                          self.sess.graph)
        self.test_summary_writer = tf.summary.FileWriter(
            os.path.join(self.config.summary_dir, "test"))

        if "comet_api_key" in config:
            from comet_ml import Experiment
            self.experiment = Experiment(
                api_key=config['comet_api_key'], project_name=config['exp_name'])
            self.experiment.disable_mp()
            self.experiment.log_multiple_params(config)

    # it can summarize scalars and images.
    def summarize(self, step, summarizer="train", scope="", summaries_dict=None):
        """
        :param step: the step of the summary
        :param summarizer: use the train summary writer or the test one
        :param scope: variable scope
        :param summaries_dict: the dict of the summaries values (tag,value)
        :return:
        """
        summary_writer = self.train_summary_writer if summarizer == "train" else self.test_summary_writer
        with tf.variable_scope(scope):

            if summaries_dict is not None:
                summary_list = []
                for tag, value in summaries_dict.items():
                    if tag not in self.summary_ops:
                        if len(value.shape) <= 1:
                            self.summary_placeholders[tag] = tf.placeholder(
                                'float32', value.shape, name=tag)
                        else:
                            self.summary_placeholders[tag] = tf.placeholder('float32', [None] + list(value.shape[1:]),
                                                                            name=tag)
                        if len(value.shape) <= 1:
                            self.summary_ops[tag] = tf.summary.scalar(
                                tag, self.summary_placeholders[tag])
                        else:
                            self.summary_ops[tag] = tf.summary.image(
                                tag, self.summary_placeholders[tag])

                    summary_list.append(self.sess.run(self.summary_ops[tag], {
                                        self.summary_placeholders[tag]: value}))

                for summary in summary_list:
                    summary_writer.add_summary(summary, step)

                if hasattr(self, 'experiment') and self.experiment is not None:
                    self.experiment.log_multiple_metrics(
                        summaries_dict, step=step)

                summary_writer.flush()
Ejemplo n.º 3
0
class Logger(object):
    def __init__(self, dataset_name, model_name):
        self.model_name = model_name
        self.project_name = "%s-%s" % (dataset_name, self.model_name)
        self.logdir = os.path.join(hp.logdir, self.project_name)
        self.writer = SummaryWriter(log_dir=self.logdir)

        self.experiment = None  # Experiment(api_key="luY5eUQDsBynS168WxJiRPJmJ", project_name=self.project_name, log_code=False)
        if hp.comet_ml_api_key is not None:
            self.experiment = Experiment(api_key=hp.comet_ml_api_key,
                                         project_name=self.project_name,
                                         log_code=False)
            self.experiment.log_multiple_params(
                dict((name, getattr(hp, name)) for name in dir(hp)
                     if not name.startswith('__')))

    def log_step(self, phase, step, loss_dict, image_dict):
        if phase == 'train':
            if step % 50 == 0:
                if self.experiment is not None:
                    with self.experiment.train():
                        self.experiment.log_multiple_metrics(loss_dict,
                                                             step=step)

                # self.writer.add_scalar('lr', get_lr(), step)
                # self.writer.add_scalar('%s-step/loss' % phase, loss, step)
                for key in sorted(loss_dict):
                    self.writer.add_scalar('%s-step/%s' % (phase, key),
                                           loss_dict[key], step)

            if step % 1000 == 0:
                for key in sorted(image_dict):
                    self.writer.add_image('%s/%s' % (self.model_name, key),
                                          image_dict[key], step)

    def log_epoch(self, phase, step, loss_dict):
        for key in sorted(loss_dict):
            self.writer.add_scalar('%s/%s' % (phase, key), loss_dict[key],
                                   step)

        if phase == 'valid':
            if self.experiment is not None:
                with self.experiment.validate():
                    self.experiment.log_multiple_metrics(loss_dict, step=step)
Ejemplo n.º 4
0
           padding='same',
           activation=params['activation']))
model.add(Dropout(params['dropout']))

model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy',
              optimizer=params['optimizer'],
              metrics=['accuracy'])
#print model.summary() to preserve automatically in `Output` tab
print(model.summary())
params.update({'total_number_of_parameters': model.count_params()})

#will log metrics with the prefix 'train_'
with experiment.train():
    model.fit(X_train,
              y_train,
              epochs=params['epochs'],
              batch_size=params['batch_size'],
              verbose=1,
              validation_data=(X_test, y_test))

#will log metrics with the prefix 'test_'
with experiment.test():
    loss, accuracy = model.evaluate(X_test, y_test)
    metrics = {'loss': loss, 'accuracy': accuracy}
    experiment.log_multiple_metrics(metrics)

experiment.log_multiple_params(params)
experiment.log_dataset_hash(X_train)  #creates and logs a hash of your data
Ejemplo n.º 5
0
    y_pred = y_prob.copy()
    y_pred[y_pred >= P_THRESHOLD] = 1
    y_pred[y_pred < P_THRESHOLD] = 0

    print('train micro: {}'.format(precision_recall_fscore_support(y_train, y_pred, average='micro', sample_weight=None)))
    print('train macro: {}'.format(precision_recall_fscore_support(y_train, y_pred, average='macro', sample_weight=None)))
    print('train weightedmacro: {}'.format(precision_recall_fscore_support(y_train, y_pred, average='weighted', sample_weight=None)))

    train_metrics = {
        "train_micro":f1_score(y_train, y_pred, average='micro', sample_weight=None),
        "train_macro":f1_score(y_train, y_pred, average='macro', sample_weight=None),
        "train_weighted_macro":f1_score(y_train, y_pred, average='weighted', sample_weight=None)
        }

    experiment.log_multiple_metrics(train_metrics)

# Dev
with experiment.validate():
    y_prob_dev = model.predict([meta_dev, title_dev, desc_dev, x_dev])
    to_file(y_prob_dev, "dev_results", y_train)

    y_pred_dev = y_prob_dev.copy()
    y_pred_dev[y_pred_dev >= P_THRESHOLD] = 1
    y_pred_dev[y_pred_dev < P_THRESHOLD] = 0

    print('dev micro: {}'.format(precision_recall_fscore_support(y_dev, y_pred_dev, average='micro', sample_weight=None)))
    print('dev macro: {}'.format(precision_recall_fscore_support(y_dev, y_pred_dev, average='macro', sample_weight=None)))
    print('dev weightedmacro: {}'.format(precision_recall_fscore_support(y_dev, y_pred_dev, average='weighted', sample_weight=None)))

Ejemplo n.º 6
0
plt.plot(y_pred, y_test)

# Visualising the Test set results
plt.scatter(X_test, y_test, color = 'red')
plt.plot(X_train, regressor.predict(X_train), color = 'blue')
plt.title('Salary vs Experience (Test set)')
plt.xlabel('Years of Experience')
plt.ylabel('Salary')
plt.show()

mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
evs = explained_variance_score(y_test, y_pred) 

#these will be logged to your sklearn-demos project on Comet.ml
params={"random_state":0,
        "model_type":"simple regression",
        "scaler":"none",
        "stratify":True
}

metrics = {"mse":mse,
           "mae":mae,
           "evs":evs
}

exp.log_dataset_hash(X_train)
exp.log_multiple_params(params)
exp.log_multiple_metrics(metrics)
Ejemplo n.º 7
0
class Trainer:
    def __init__(self):
        # Simple training script for training a RetinaNet network.

        # Dataset type, must be one of csv or coco.
        self.dataset = 'coco'

        # Path to COCO directory
        self.coco_path = './data'

        # Path to file containing training annotations (see readme)
        self.csv_train = None

        # Path to file containing class list (see readme)
        self.csv_classes = None

        # Path to file containing validation annotations (optional, see readme)
        self.csv_val = None

        # Resnet depth, must be one of 18, 34, 50, 101, 152
        self.depth = 50

        # batch_size
        self.bs = 8

        # learning rate
        self.lr = 1e-5

        # Number of epochs
        self.epochs = 10

        # set device
        self.device = torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu')

        # set focal loss
        self.focal_loss = losses.FocalLoss()

        # module calcurating nms
        self.nms = NMS(BBoxTransform, ClipBoxes)

        # index of the saving model
        self.save_name = 2

        # use comet_ml
        self.cml = True

        # classification_loss
        self.cls_loss_meter = AverageMeter()

        # regression_loss
        self.rgrs_loss_meter = AverageMeter()

        self.set_comet_ml()

    def set_comet_ml(self):
        params = {
            'epochs': self.epochs,
            'batch_size': self.bs,
            'lr': self.lr,
            'resnet_depth': self.depth,
            'save_name': self.save_name,
        }

        if self.cml:
            self.experiment = Experiment(api_key="xK18bJy5xiPuPf9Dptr43ZuMk",
                                         project_name="retinanet-coco",
                                         workspace="tanimutomo")
        else:
            self.experiment = None

        if self.cml:
            self.experiment.log_multiple_params(params)

    def set_dataset(self):
        # Create the data loaders
        if self.dataset == 'coco':

            if self.coco_path is None:
                raise ValueError(
                    'Must provide --coco_path when training on COCO,')

            dataset_train = CocoDataset(self.coco_path,
                                        set_name='train2017',
                                        transform=transforms.Compose([
                                            Normalizer(),
                                            Augmenter(),
                                            Resizer()
                                        ]))
            dataset_val = CocoDataset(self.coco_path,
                                      set_name='val2017',
                                      transform=transforms.Compose(
                                          [Normalizer(),
                                           Resizer()]))

        elif self.dataset == 'csv':

            if self.csv_train is None:
                raise ValueError(
                    'Must provide --csv_train when training on COCO,')

            if self.csv_classes is None:
                raise ValueError(
                    'Must provide --csv_classes when training on COCO,')

            dataset_train = CSVDataset(train_file=self.csv_train,
                                       class_list=self.csv_classes,
                                       transform=transforms.Compose([
                                           Normalizer(),
                                           Augmenter(),
                                           Resizer()
                                       ]))

            if self.csv_val is None:
                dataset_val = None
                print('No validation annotations provided.')
            else:
                dataset_val = CSVDataset(train_file=self.csv_val,
                                         class_list=self.csv_classes,
                                         transform=transforms.Compose(
                                             [Normalizer(),
                                              Resizer()]))

        else:
            raise ValueError(
                'Dataset type not understood (must be csv or coco), exiting.')

        return dataset_train, dataset_val

    def set_models(self, dataset_train):
        # Create the model
        if self.depth == 18:
            retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                       pretrained=True)
        elif self.depth == 34:
            retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                       pretrained=True)
        elif self.depth == 50:
            retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                       pretrained=True)
        elif self.depth == 101:
            retinanet = model.resnet101(
                num_classes=dataset_train.num_classes(), pretrained=True)
        elif self.depth == 152:
            retinanet = model.resnet152(
                num_classes=dataset_train.num_classes(), pretrained=True)
        else:
            raise ValueError(
                'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

        if torch.cuda.device_count() > 1:
            print("Let's use", torch.cuda.device_count(), "GPUs!")
            # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
            retinanet = nn.DataParallel(retinanet)

        self.retinanet = retinanet.to(self.device)
        self.retinanet.training = True
        self.optimizer = optim.Adam(self.retinanet.parameters(), lr=self.lr)

        # This lr_shceduler reduce the learning rate based on the models's validation loss
        self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(self.optimizer,
                                                              patience=3,
                                                              verbose=True)

        self.loss_hist = collections.deque(maxlen=500)

        # self.retinanet.train()
        # self.retinanet.freeze_bn()

    def iterate(self):
        dataset_train, dataset_val = self.set_dataset()
        sampler = AspectRatioBasedSampler(dataset_train,
                                          batch_size=self.bs,
                                          drop_last=False)
        dataloader_train = DataLoader(dataset_train,
                                      num_workers=0,
                                      collate_fn=collater,
                                      batch_sampler=sampler)

        # if dataset_val is not None:
        #     sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
        #     dataloader_val = DataLoader(dataset_val, num_workers=0, collate_fn=collater, batch_sampler=sampler_val)
        print('Num training images: {}'.format(len(dataset_train)))

        self.set_models(dataset_train)

        for epoch_num in range(self.epochs):
            epoch_loss = []

            metrics = {
                'classification_loss': self.cls_loss_meter.avg,
                'regression_loss': self.rgrs_loss_meter.avg,
                'entire_loss':
                self.cls_loss_meter.avg + self.rgrs_loss_meter.avg
            }

            if self.experiment is not None:
                self.experiment.log_multiple_metrics(metrics, step=epoch_num)

            self.retinanet.train()
            self.retinanet.module.freeze_bn()

            epoch_loss = self.train(epoch_num, epoch_loss, dataloader_train)

            self.retinanet.eval()

            self.evaluate(epoch_num, dataset_val)

            torch.save(
                self.retinanet.state_dict(),
                os.path.join(
                    './saved_models',
                    'model{}_final_{}.pth'.format(self.save_name, epoch_num)))
            # torch.save(self.retinanet.module, '{}_self.retinanet_{}.pt'.format(self.dataset, epoch_num))

            # self.retinanet.load_state_dict(torch.load("./saved_models/model_final_0.pth"))

            self.scheduler.step(np.mean(epoch_loss))
            self.retinanet.eval()

    def train(self, epoch_num, epoch_loss, dataloader_train):
        for iter_num, data in enumerate(dataloader_train):
            try:
                self.optimizer.zero_grad()

                input = data['img'].to(self.device).float()
                annot = data['annot'].to(self.device)

                regression, classification, anchors = self.retinanet(input)

                classification_loss, regression_loss = self.focal_loss.calcurate(
                    classification, regression, anchors, annot)

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()
                self.cls_loss_meter.update(classification_loss)
                self.rgrs_loss_meter.update(regression_loss)

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(self.retinanet.parameters(),
                                               0.1)

                self.optimizer.step()

                self.loss_hist.append(float(loss.item()))

                epoch_loss.append(float(loss.item()))

                torch.nn.utils.clip_grad_norm_(self.retinanet.parameters(),
                                               0.1)
                print(
                    'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                    .format(epoch_num, iter_num, float(classification_loss),
                            float(regression_loss), np.mean(self.loss_hist)))

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue

            # if iter_num == 10:
            #     break

        return epoch_loss

    def evaluate(self, epoch_num, dataset_val):
        if self.dataset == 'coco':

            print('Evaluating dataset')

            coco_eval.evaluate_coco(dataset_val, self.retinanet, self.nms,
                                    self.device)

        elif self.dataset == 'csv' and self.csv_val is not None:

            print('Evaluating dataset')

            mAP = csv_eval.evaluate(dataset_val, self.retinanet)
Ejemplo n.º 8
0
def train(args):
    experiment = Experiment(
        api_key=API_KEY, project_name="fasttext")

    params = {
        "batch_size": args.batch_size,
        "epochs": args.epochs,
        "learning_rate": args.learning_rate,
        "embedding_dimension": args.embedding_dimension
    }
    experiment.log_multiple_params(params)

    model_path = os.path.join(str(args.output), "model")
    if not os.path.isdir(model_path):
        os.makedirs(model_path)

    filepath = model_path + \
        "/weights-{epoch:02d}-{val_loss:.3f}-" + \
        args.id + ".hdf5"
    checkpoint = ModelCheckpoint(
        filepath,
        monitor='val_loss',
        verbose=1,
        save_best_only=True,
        mode='min',
        period=5
    )

    logdir = args.logdir
    if not os.path.isdir(logdir):
        os.makedirs(logdir)

    tensorboard = TensorBoard(
        log_dir=logdir,
        histogram_freq=0,
        write_grads=True,
        write_graph=False,
        write_images=False
    )

    n_entities = args.n_entities
    n_relationships = args.n_relationships

    model = build_model(
        n_entities=n_entities,
        n_relationships=n_relationships,
        embedding_dimension=args.embedding_dimension
    )

    optimizer = optimizers.Adam(lr=args.learning_rate, decay=0.0)
    model.compile(
        loss="binary_crossentropy",
        optimizer=optimizer,
        metrics=['accuracy']
    )

    data = load_data(n_entities=n_entities, n_relationships=n_relationships)
    model.fit(
        data["train"][0],
        data["train"][1],
        verbose=1,
        epochs=args.epochs,
        batch_size=args.batch_size,
        shuffle=True,
        validation_data=data["validation"],
        callbacks=[checkpoint, tensorboard]
    )
    evaluation = model.evaluate(
        data["test"][0], data["test"][1], verbose=0)

    predictions = model.predict(data["test"][0])
    auc_score = roc_auc_score(
        data["test"][1], predictions, average='samples')
    auc_score_micro = roc_auc_score(
        data["test"][1], predictions, average='micro')

    metrics = {
        "evaluation_loss": evaluation[0],
        "evaluation_accuracy": evaluation[1],
        "auc_score": auc_score,
        "auc_score_micro": auc_score_micro
    }
    experiment.log_multiple_metrics(metrics)
Ejemplo n.º 9
0
def main(_):
    experiment = Experiment(api_key="xXtJguCo8yFdU7dpjEpo6YbHw",
                            project_name=args.experiment_name)
    hyper_params = {
        "learning_rate": args.lr,
        "num_epochs": args.max_epoch,
        "batch_size": args.single_batch_size,
        "alpha": args.alpha,
        "beta": args.beta,
        "gamma": args.gamma,
        "loss": args.loss
    }
    experiment.log_multiple_params(hyper_params)

    # TODO: split file support
    with tf.Graph().as_default():
        global save_model_dir
        start_epoch = 0
        global_counter = 0

        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=cfg.GPU_MEMORY_FRACTION,
            visible_device_list=cfg.GPU_AVAILABLE,
            allow_growth=True)
        config = tf.ConfigProto(
            gpu_options=gpu_options,
            device_count={
                "GPU": cfg.GPU_USE_COUNT,
            },
            allow_soft_placement=True,
            log_device_placement=False,
        )
        with tf.Session(config=config) as sess:
            # sess=tf_debug.LocalCLIDebugWrapperSession(sess,ui_type='readline')
            model = RPN3D(cls=cfg.DETECT_OBJ,
                          single_batch_size=args.single_batch_size,
                          learning_rate=args.lr,
                          max_gradient_norm=5.0,
                          alpha=args.alpha,
                          beta=args.beta,
                          gamma=args.gamma,
                          loss_type=args.loss,
                          avail_gpus=cfg.GPU_AVAILABLE.split(','))
            # param init/restore
            if tf.train.get_checkpoint_state(save_model_dir):
                print("Reading model parameters from %s" % save_model_dir)
                model.saver.restore(sess,
                                    tf.train.latest_checkpoint(save_model_dir))
                start_epoch = model.epoch.eval() + 1
                global_counter = model.global_step.eval() + 1
            else:
                print("Created model with fresh parameters.")
                tf.global_variables_initializer().run()

            # train and validate
            is_summary, is_summary_image, is_validate = False, False, False

            summary_interval = 5
            summary_val_interval = 10
            summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
            experiment.set_model_graph(sess.graph)

            # training
            with experiment.train():
                for epoch in range(start_epoch, args.max_epoch):
                    counter = 0
                    batch_time = time.time()
                    experiment.log_current_epoch(epoch)

                    for batch in iterate_data(
                            train_dir,
                            shuffle=True,
                            aug=True,
                            is_testset=False,
                            batch_size=args.single_batch_size *
                            cfg.GPU_USE_COUNT,
                            multi_gpu_sum=cfg.GPU_USE_COUNT):

                        counter += 1
                        global_counter += 1
                        experiment.set_step(global_counter)
                        if counter % summary_interval == 0:
                            is_summary = True
                        else:
                            is_summary = False
                        epochs = args.max_epoch
                        start_time = time.time()
                        ret = model.train_step(sess,
                                               batch,
                                               train=True,
                                               summary=is_summary)
                        forward_time = time.time() - start_time
                        batch_time = time.time() - batch_time
                        param = ret
                        params = {
                            "loss": param[0],
                            "cls_loss": param[1],
                            "cls_pos_loss": param[2],
                            "cls_neg_loss": param[3]
                        }
                        experiment.log_multiple_metrics(params)
                        # print(ret)
                        print(
                            'train: {} @ epoch:{}/{} loss: {:.4f} cls_loss: {:.4f} cls_pos_loss: {:.4f} cls_neg_loss: {:.4f} forward time: {:.4f} batch time: {:.4f}'
                            .format(counter, epoch, epochs, ret[0], ret[1],
                                    ret[2], ret[3], forward_time, batch_time))
                        # with open('log/train.txt', 'a') as f:
                        # f.write( 'train: {} @ epoch:{}/{} loss: {:.4f} cls_loss: {:.4f} cls_pos_loss: {:.4f} cls_neg_loss: {:.4f} forward time: {:.4f} batch time: {:.4f}'.format(counter,epoch, epochs, ret[0], ret[1], ret[2], ret[3], forward_time, batch_time))

                        #print(counter, summary_interval, counter % summary_interval)
                        if counter % summary_interval == 0:
                            print("summary_interval now")
                            summary_writer.add_summary(ret[-1], global_counter)

                        #print(counter, summary_val_interval, counter % summary_val_interval)
                        if counter % summary_val_interval == 0:
                            print("summary_val_interval now")
                            batch = sample_test_data(
                                val_dir,
                                args.single_batch_size * cfg.GPU_USE_COUNT,
                                multi_gpu_sum=cfg.GPU_USE_COUNT)

                            ret = model.validate_step(sess,
                                                      batch,
                                                      summary=True)
                            summary_writer.add_summary(ret[-1], global_counter)

                            try:
                                ret = model.predict_step(sess,
                                                         batch,
                                                         summary=True)
                                summary_writer.add_summary(
                                    ret[-1], global_counter)
                            except:
                                print("prediction skipped due to error")

                        if check_if_should_pause(args.tag):
                            model.saver.save(sess,
                                             os.path.join(
                                                 save_model_dir, 'checkpoint'),
                                             global_step=model.global_step)
                            print('pause and save model @ {} steps:{}'.format(
                                save_model_dir, model.global_step.eval()))
                            sys.exit(0)

                        batch_time = time.time()
                    experiment.log_epoch_end(epoch)
                    sess.run(model.epoch_add_op)

                    model.saver.save(sess,
                                     os.path.join(save_model_dir,
                                                  'checkpoint'),
                                     global_step=model.global_step)

                    # dump test data every 10 epochs
                    if (epoch + 1) % 10 == 0:
                        # create output folder
                        os.makedirs(os.path.join(args.output_path, str(epoch)),
                                    exist_ok=True)
                        os.makedirs(os.path.join(args.output_path, str(epoch),
                                                 'data'),
                                    exist_ok=True)
                        if args.vis:
                            os.makedirs(os.path.join(args.output_path,
                                                     str(epoch), 'vis'),
                                        exist_ok=True)

                        for batch in iterate_data(
                                val_dir,
                                shuffle=False,
                                aug=False,
                                is_testset=False,
                                batch_size=args.single_batch_size *
                                cfg.GPU_USE_COUNT,
                                multi_gpu_sum=cfg.GPU_USE_COUNT):

                            if args.vis:
                                tags, results, front_images, bird_views, heatmaps = model.predict_step(
                                    sess, batch, summary=False, vis=True)
                            else:
                                tags, results = model.predict_step(
                                    sess, batch, summary=False, vis=False)

                            for tag, result in zip(tags, results):
                                of_path = os.path.join(args.output_path,
                                                       str(epoch), 'data',
                                                       tag + '.txt')
                                with open(of_path, 'w+') as f:
                                    labels = box3d_to_label(
                                        [result[:, 1:8]], [result[:, 0]],
                                        [result[:, -1]],
                                        coordinate='lidar')[0]
                                    for line in labels:
                                        f.write(line)
                                    print('write out {} objects to {}'.format(
                                        len(labels), tag))
                            # dump visualizations
                            if args.vis:
                                for tag, front_image, bird_view, heatmap in zip(
                                        tags, front_images, bird_views,
                                        heatmaps):
                                    front_img_path = os.path.join(
                                        args.output_path, str(epoch), 'vis',
                                        tag + '_front.jpg')
                                    bird_view_path = os.path.join(
                                        args.output_path, str(epoch), 'vis',
                                        tag + '_bv.jpg')
                                    heatmap_path = os.path.join(
                                        args.output_path, str(epoch), 'vis',
                                        tag + '_heatmap.jpg')
                                    cv2.imwrite(front_img_path, front_image)
                                    cv2.imwrite(bird_view_path, bird_view)
                                    cv2.imwrite(heatmap_path, heatmap)

                        # execute evaluation code
                        cmd_1 = "./kitti_eval/launch_test.sh"
                        cmd_2 = os.path.join(args.output_path, str(epoch))
                        cmd_3 = os.path.join(args.output_path, str(epoch),
                                             'log')
                        os.system(" ".join([cmd_1, cmd_2, cmd_3]))

            print('train done. total epoch:{} iter:{}'.format(
                epoch, model.global_step.eval()))

            # finallly save model
            model.saver.save(sess,
                             os.path.join(save_model_dir, 'checkpoint'),
                             global_step=model.global_step)
Ejemplo n.º 10
0
class DefinedSummarizer:
    def __init__(self, sess, summary_dir, scalar_tags=None, images_tags=None):
        """
        :param sess: The Graph tensorflow session used in your graph.
        :param summary_dir: the directory which will save the summaries of the graph
        :param scalar_tags: The tags of summaries you will use in your training loop
        :param images_tags: The tags of image summaries you will use in your training loop
        """
        self.sess = sess

        self.scalar_tags = scalar_tags
        self.images_tags = images_tags

        self.summary_tags = []
        self.summary_placeholders = {}
        self.summary_ops = {}

        self.init_summary_ops()

        self.summary_writer = tf.summary.FileWriter(summary_dir)

        if "comet_api_key" in config:
            from comet_ml import Experiment
            self.experiment = Experiment(api_key=config['comet_api_key'],
                                         project_name=config['exp_name'])
            self.experiment.log_multiple_params(config)

    def set_summaries(self, scalar_tags=None, images_tags=None):
        self.scalar_tags = scalar_tags
        self.images_tags = images_tags
        self.init_summary_ops()

    def init_summary_ops(self):
        with tf.variable_scope('summary_ops'):
            if self.scalar_tags is not None:
                for tag in self.scalar_tags:
                    self.summary_tags += [tag]
                    self.summary_placeholders[tag] = tf.placeholder('float32',
                                                                    None,
                                                                    name=tag)
                    self.summary_ops[tag] = tf.summary.scalar(
                        tag, self.summary_placeholders[tag])
            if self.images_tags is not None:
                for tag, shape in self.images_tags:
                    self.summary_tags += [tag]
                    self.summary_placeholders[tag] = tf.placeholder('float32',
                                                                    shape,
                                                                    name=tag)
                    self.summary_ops[tag] = tf.summary.image(
                        tag, self.summary_placeholders[tag], max_outputs=10)

    def summarize(self, step, summaries_dict=None, summaries_merged=None):
        """
        Add the summaries to tensorboard
        :param step: the number of iteration in your training
        :param summaries_dict: the dictionary which contains your summaries .
        :param summaries_merged: Merged summaries which they come from your graph
        :return:
        """
        if summaries_dict is not None:
            summary_list = self.sess.run(
                [self.summary_ops[tag] for tag in summaries_dict.keys()], {
                    self.summary_placeholders[tag]: value
                    for tag, value in summaries_dict.items()
                })
            for summary in summary_list:
                self.summary_writer.add_summary(summary, step)
        if summaries_merged is not None:
            self.summary_writer.add_summary(summaries_merged, step)

            if hasattr(self, 'experiment') and self.experiment is not None:
                self.experiment.log_multiple_metrics(summaries_dict, step=step)

    def finalize(self):
        self.summary_writer.flush()
Ejemplo n.º 11
0
    while total_timesteps < args.max_timesteps:
        if done:
            if total_timesteps != 0:
                betas = np.array(betas)
                mean_beta, var_beta = betas.mean(), betas.var()

                print("Total T: ", total_timesteps, " Episode Num: ",
                      episode_num, " Episode T: ", episode_timesteps,
                      " Reward: ", episode_reward, "beta mean: ", mean_beta,
                      "beta var: ", var_beta)

                if args.log:
                    experiment.log_multiple_metrics(
                        {
                            "Episode reward": episode_reward,
                            'Episode Beta Mean': mean_beta,
                            'Episode Beta Var': var_beta
                        },
                        step=total_timesteps)
                if args.policy_name == "TD3":
                    policy.train(replay_buffer, episode_timesteps,
                                 args.batch_size, args.discount, args.tau,
                                 args.policy_noise, args.noise_clip,
                                 args.policy_freq)
                else:
                    policy.train(replay_buffer, episode_timesteps,
                                 args.batch_size, args.discount, args.tau,
                                 args.n_backprop)

                # Evaluate episode
            if timesteps_since_eval >= args.eval_freq: