Ejemplo n.º 1
0
def train(path):
    name = os.path.splitext(os.path.basename(path))[0]
    print('Processing: ', name)
    features = pd.read_csv(path, index_col=None)
    selected_features_names = [name for name, desc in selected_features]
    features = features[selected_features_names]
    split_idx = 1200
    features = features.drop(['sound.files'], axis=1)
    noise_only_df, df = features.iloc[:split_idx], features.iloc[split_idx:]
    y = df.pop('petrel')
    X = df.values
    y_noise = noise_only_df.pop('petrel')
    X_noise = noise_only_df.values
    X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)
    hyperparams = {
        'n_estimators': [100, 300, 500, 1000],
        'learning_rate': [0.1],
        'gamma': [0.0, 0.5],
        'max_depth': [2, 3, 4],
        'min_child_weight': [1, 2],
        'subsample': [1.0, 0.8],
        'reg_alpha': [0.0, 0.1],
        'reg_lambda': [1, 2, 3]
    }
    #
    # hyperparams = {
    #     'n_estimators': [100],
    #     'learning_rate': [0.1],
    #     'gamma': [0.0],
    #     'max_depth': [2],
    #     'min_child_weight': [1],
    #     'subsample': [1.0],
    #     'reg_alpha': [0.0],
    #     'reg_lambda': [1]
    # }

    clf = model_selection.GridSearchCV(estimator=xg.XGBClassifier(objective='binary:logistic', n_jobs=-1),
                                       param_grid=hyperparams,
                                       cv=4)
    fit_params = clf.fit(X_train, y_train)
    estimator = fit_params.best_estimator_
    joblib.dump(estimator, name + '_model.pkl')

    test_pred = estimator.predict(X_test)
    metrics = calculate_metrics(test_pred, y_test)

    noise_pred = estimator.predict(X_noise)
    noise_detection_accuracy = accuracy_score(y_noise, noise_pred)

    experiment = Experiment(api_key="4PdGdUZmGf6P8QsMa5F2zB4Ui",
                            project_name="storm petrels",
                            workspace="tracewsl")
    experiment.set_name(name)
    experiment.log_parameter('name', name)
    experiment.log_multiple_params(fit_params.best_params_)
    experiment.log_multiple_metrics(metrics)
    experiment.log_metric('Noise detection accuracy', noise_detection_accuracy)
    experiment.log_figure('Confusion matrix', get_confusion_matrix_figure(test_pred, y_test))
    experiment.log_figure('Feature importnace', get_feature_importance_figure(estimator, list(df.columns.values)))
Ejemplo n.º 2
0
                            th=f1_max_th)

y_pred, conf_matrix = predict(score_test, f1_max_th, y_test, labels)

experiment.add_tags([data, 'rmse'])
parameters = {
    'sequence_length': sequence_length,
    'number_of_vars': number_of_vars,
    'unit1': unit1,
    'unit2': unit2,
    'drop_rate': drop_rate,
    'batch_size': batch_size,
    'epochs': epochs,
    'learning_rate': learning_rate,
    'decay_rate': decay_rate,
    'decay_step': decay_step
}
experiment.log_parameters(parameters)
experiment.log_metric('ap', ap)
experiment.log_metric('f1', f1)
experiment.log_metric('precision', precision)
experiment.log_metric('recall', recall)
experiment.log_metric('train_time', rnn.time_)
experiment.log_parameter('th_f1', f1_max_th)
experiment.log_figure('losses', fig_loss)
experiment.log_figure('score_test', fig_score_test)
experiment.log_figure('precision_recall', fig_pre_rec)
experiment.log_figure('th_pre_rec_f1', fig_th_pre_rec)
experiment.log_confusion_matrix(matrix=conf_matrix, labels=labels)
experiment.end()
Ejemplo n.º 3
0
class Logger:
    def __init__(self, send_logs, tags, parameters, experiment=None):
        self.stations = 5
        self.send_logs = send_logs
        if self.send_logs:
            if experiment is None:
                self.experiment = Experiment(
                    api_key="OZwyhJHyqzPZgHEpDFL1zxhyI",
                    project_name="rl-in-wifi",
                    workspace="wwydmanski")
            else:
                self.experiment = experiment
        self.sent_mb = 0
        self.speed_window = deque(maxlen=100)
        self.step_time = None
        self.current_speed = 0
        if self.send_logs:
            if tags is not None:
                self.experiment.add_tags(tags)
            if parameters is not None:
                self.experiment.log_parameters(parameters)

    def begin_logging(self, episode_count, steps_per_ep, sigma, theta,
                      step_time):
        self.step_time = step_time
        if self.send_logs:
            self.experiment.log_parameter("Episode count", episode_count)
            self.experiment.log_parameter("Steps per episode", steps_per_ep)
            self.experiment.log_parameter("theta", theta)
            self.experiment.log_parameter("sigma", sigma)

    def log_round(self, states, reward, cumulative_reward, info, loss,
                  observations, step):
        self.experiment.log_histogram_3d(states,
                                         name="Observations",
                                         step=step)
        info = [[j for j in i.split("|")] for i in info]
        info = np.mean(np.array(info, dtype=np.float32), axis=0)
        try:
            # round_mb = np.mean([float(i.split("|")[0]) for i in info])
            round_mb = info[0]
        except Exception as e:
            print(info)
            print(reward)
            raise e
        self.speed_window.append(round_mb)
        self.current_speed = np.mean(
            np.asarray(self.speed_window) / self.step_time)
        self.sent_mb += round_mb
        # CW = np.mean([float(i.split("|")[1]) for i in info])
        CW = info[1]
        # stations = np.mean([float(i.split("|")[2]) for i in info])
        self.stations = info[2]
        fairness = info[3]

        if self.send_logs:
            self.experiment.log_metric("Round reward",
                                       np.mean(reward),
                                       step=step)
            self.experiment.log_metric("Per-ep reward",
                                       np.mean(cumulative_reward),
                                       step=step)
            self.experiment.log_metric("Megabytes sent",
                                       self.sent_mb,
                                       step=step)
            self.experiment.log_metric("Round megabytes sent",
                                       round_mb,
                                       step=step)
            self.experiment.log_metric("Chosen CW", CW, step=step)
            self.experiment.log_metric("Station count",
                                       self.stations,
                                       step=step)
            self.experiment.log_metric("Current throughput",
                                       self.current_speed,
                                       step=step)
            self.experiment.log_metric("Fairness index", fairness, step=step)

            for i, obs in enumerate(observations):
                self.experiment.log_metric(f"Observation {i}", obs, step=step)

            self.experiment.log_metrics(loss, step=step)

    def log_episode(self, cumulative_reward, speed, step):
        if self.send_logs:
            self.experiment.log_metric("Cumulative reward",
                                       cumulative_reward,
                                       step=step)
            self.experiment.log_metric("Speed", speed, step=step)

        self.sent_mb = 0
        self.last_speed = speed
        self.speed_window = deque(maxlen=100)
        self.current_speed = 0

    def end(self):
        if self.send_logs:
            self.experiment.end()
        n_sources=hparams['n_sources'],
        afe_dir_path=hparams['afe_dir'],
        afe_reg=hparams['afe_reg'],
        weighted_norm=hparams['weighted_norm'])
else:
    raise NotImplementedError(
        'Tasnet type: {} is not yet available.'.format(hparams['tasnet_type']))




numparams = 0
for f in model.parameters():
    if f.requires_grad:
        numparams += f.numel()
experiment.log_parameter('Parameters', numparams)
print(numparams)

model = torch.nn.DataParallel(model).cuda()
opt = torch.optim.Adam(model.parameters(), lr=hparams['learning_rate'])
all_losses = [back_loss_tr_loss_name] + \
             [k for k in sorted(val_losses.keys())] + \
             [k for k in sorted(tr_val_losses.keys())]

tr_step = 0
val_step = 0
for i in range(hparams['n_epochs']):
    res_dic = {}
    for loss_name in all_losses:
        res_dic[loss_name] = {'mean': 0., 'std': 0., 'acc': []}
    print("TasNet Mask Experiment: {} - {} || Epoch: {}/{}".format(
Ejemplo n.º 5
0
def main(args: argparse.Namespace):

    for package_name in args.include_package:
        import_module_and_submodules(package_name)

    params = Params.from_file(args.param_path, args.overrides)

    random_seed, numpy_seed, pytorch_seed = 41, 11, 302
    if not args.fix:
        random_seed, numpy_seed, pytorch_seed = random.randint(
            0, 999999999), random.randint(0, 999999999), random.randint(
                0, 999999999)

    params["random_seed"] = random_seed
    params["numpy_seed"] = numpy_seed
    params["pytorch_seed"] = pytorch_seed
    prepare_environment(params)
    serialization_dir = args.serialization_dir
    create_serialization_dir(params, serialization_dir, args.recover,
                             args.force)
    prepare_global_logging(serialization_dir, args.file_friendly_logging)

    hyperparams = list(
        get_hyperparams(params.as_dict(infer_type_and_cast=True)))

    params.to_file(os.path.join(serialization_dir, CONFIG_NAME))

    test_file = params.params.get("test_data_path", None)
    validation_data_path = params.get("validation_data_path", None)

    evaluate_on_test = params.pop_bool("evaluate_on_test", False)

    test_command = None
    if evaluate_on_test:
        test_command = BaseEvaluationCommand.from_params(
            params.pop("test_command"))

    cuda_device = params.params.get('trainer').get('cuda_device', -1)
    check_for_gpu(cuda_device)

    train_model = TrainPipelineModel.from_params(
        params=params, serialization_dir=serialization_dir, local_rank=0)

    trainer = train_model.trainer

    if trainer.validation_command is not None:
        trainer.validation_command.maybe_set_gold_file(validation_data_path)

    params.assert_empty('base train command')

    if args.comet is not None:
        experiment = Experiment(api_key=args.comet,
                                workspace=args.workspace,
                                project_name=args.project,
                                parse_args=False,
                                auto_output_logging=None)
        if args.tags:
            experiment.add_tags(args.tags)
        with open(args.param_path) as fil:
            code = "".join(fil.readlines())
        code += "\n\n#=============Full details=============\n\n"
        full_details = _jsonnet.evaluate_file(args.param_path)
        code += full_details
        code += "\n\n#=============IMPORTANT: overwritten options============\n\n"
        code += args.overrides
        experiment.set_code(code, overwrite=True)

        for key, val in hyperparams:
            experiment.log_parameter(key, val)

        experiment.log_parameter("model_directory", serialization_dir)
        experiment.log_parameter("cuda_device", cuda_device)
        experiment.log_parameter("hostname", socket.gethostname())
        experiment.log_parameter("random_seed", random_seed)
        experiment.log_parameter("numpy_seed", numpy_seed)
        experiment.log_parameter("pytorch_seed", pytorch_seed)
    else:
        experiment = None

    try:
        metrics = trainer.train(experiment)
    except KeyboardInterrupt:
        # if we have completed an epoch, try to create a model archive.
        if os.path.exists(os.path.join(serialization_dir, _DEFAULT_WEIGHTS)):
            logging.info(
                "Training interrupted by the user. Attempting to create "
                "a model archive using the current best epoch weights.")
            archive_model(serialization_dir)
        raise

    # Evaluate
    if test_file and evaluate_on_test:
        logger.info(
            "The model will be evaluated using the best epoch weights (see pred_test.txt)."
        )
        trainer.annotator.annotate_file(
            trainer.model, test_file,
            os.path.join(serialization_dir, "pred_test.txt"))

        if test_command:
            logger.info("Comparing against gold standard.")
            test_command.maybe_set_gold_file(test_file)
            test_metrics = test_command.evaluate(
                os.path.join(serialization_dir, "pred_test.txt"))
            if experiment:
                with experiment.test():
                    experiment.log_metrics({
                        k: v
                        for k, v in test_metrics.items() if np.isscalar(v)
                    })
            metrics = merge_dicts(metrics, "test", test_metrics)

    dump_metrics(os.path.join(serialization_dir, "metrics.json"),
                 metrics,
                 log=True)

    if not args.no_archive:
        # Now tar up results
        archive_model(serialization_dir)
Ejemplo n.º 6
0
def main(args):
    os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(args.gpu_idx)
    # if len(args.gpu_idx):
    #     device = torch.device("cuda" )
    #     print(device)
    # else:
    #     device = torch.device("cpu")

    # dataloader
    input_channel = 1
    if args.dataset == 'fashionmnist':
        args.num_classes = 2

        # args.train_path = '/storage/fei/data/'
        # args.val_path = '/storage/fei/data/'
        # transform = transforms.Compose([transforms.ToTensor(),
        #                                 transforms.Normalize((0.1307,), (0.3081,))])
        #
        # train_set = torchvision.datasets.FashionMNIST(
        #     root=args.train_path,
        #     train=True,
        #     transform=transform
        # )
        # val_set = torchvision.datasets.FashionMNIST(
        #     root=args.val_path,
        #     train=False,
        #     transform=transform
        # )

        from keras.datasets import fashion_mnist
        (trainX, trainy), (testX, testy) = fashion_mnist.load_data()
        # train_set = fashionMNIST(trainX, trainy, real=[5, 7, 9], fake=[0, 1, 2, 3])
        # val_set = fashionMNIST(testX, testy, real=[5, 7, 9], fake=[0, 1, 2, 3, 4, 6, 8])

        real = [3]
        fake_val = [0, 2]
        fake_test = [4, 6]

        train_set = fashionMNIST(trainX, trainy, real=real, fake=fake_val)
        val_set = fashionMNIST(testX, testy, real=real, fake=fake_val)
        test_set = fashionMNIST(testX, testy, real=real, fake=fake_test)
    else:
        raise ValueError(
            'Dataset should be: voxceleb1, imagenet, fashionmnist!')
    #
    train_dataloader = DataLoader(train_set,
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  num_workers=4,
                                  drop_last=True)
    val_dataloader = DataLoader(val_set,
                                batch_size=args.batch_size,
                                shuffle=False,
                                num_workers=4)
    test_dataloader = DataLoader(test_set,
                                 batch_size=args.batch_size,
                                 shuffle=False,
                                 num_workers=4)

    model = Model_base(args).cuda()

    experiment = Experiment(API_KEY, project_name='OC-Softmax')
    experiment.log_parameters(vars(args))
    experiment.set_name(args.model_dir)
    numparams = 0
    for f in model.parameters():
        if f.requires_grad:
            numparams += f.numel()
    experiment.log_parameter('Parameters', numparams)
    print('Total number of parameters: {}'.format(numparams))

    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        model.backbone = nn.DataParallel(model.backbone)

    model = model.cuda()

    # Optimizer
    # optimizer = getattr(optim, args.optim)(model.parameters(), lr=args.lr)
    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=0.9,
                                weight_decay=1e-4)
    # optimizer = optim.SGD([{'params': model.backbone.parameters()},
    #                        {'params': model.softmax_layer.parameters()}],
    #                       lr=args.lr, momentum=0.9, nesterov=False)
    # scheduler = StepLR(optimizer, step_size=30, gamma=0.6)

    # Save config
    model_path = os.path.join(args.exp_dir, args.model_dir)
    log_path = os.path.join(model_path, 'logs')
    if os.path.exists(log_path):
        res = input("Experiment {} already exists, continue? (y/n)".format(
            args.model_dir))
        print(res)
        if res == 'n':
            sys.exit()
    os.makedirs(log_path, exist_ok=True)
    conf_path = os.path.join(log_path, 'conf.yml')
    with open(conf_path, 'w') as outfile:
        yaml.safe_dump(vars(args), outfile)

    log_file = '{}/stats.csv'.format(log_path)
    log_content = [
        'Epoch', 'tr_acc', 'val_acc', 'test_acc', 'val_eer', 'test_eer',
        'tr_loss', 'val_loss', 'test_loss'
    ]

    if not os.path.exists(log_file):
        with open(log_file, 'w') as f:
            writer = csv.writer(f)
            writer.writerow(log_content)

    # Train model
    tr_step = 0
    val_step = 0
    new_lr = args.lr
    halving = False
    best_val_loss = float("-inf")
    val_no_impv = 0

    # training
    iteration = 0
    tr_step = 0
    for epoch in range(args.epochs):
        metric_dic = {}
        for m in log_content[1:]:
            metric_dic[m] = []
        current_lr = adjust_learning_rate(optimizer, tr_step, args.lr)
        # print('Epoch:', epoch,'LR:', optimizer.param_groups[0]['lr'], optimizer.param_groups[1]['lr'])
        print('Epoch: {}, learning rate: {}'.format(epoch + 1, current_lr))
        # train_utils.val_step(spk_classifier, embedding, val_dataloader,  iteration, val_log_path)

        # Training
        model.train()
        for data in tqdm(train_dataloader,
                         desc='{} Training'.format(
                             args.model_dir)):  # mini-batch
            # one batch of training data
            # input_feature, target = data['input_feature'].to(device), data['target'].to(device)
            input_feature, target = data[0].cuda(), data[1].cuda()

            # gradient accumulates
            optimizer.zero_grad()

            # embedding
            # embeddings = model.backbone(input_feature)
            output, loss = model(input_feature, target)
            metric_dic['tr_loss'].append(loss.detach().cpu())

            # if args.center > 0:
            #     l_c = 0
            #     for i in range(model.embeddings.shape[0]):
            #         l_c = l_c + 0.5 * (model.embeddings[i] - W[:, target[i]]).pow(2).sum()
            #     l_c = l_c / model.embeddings.shape[0]
            #     loss = loss + args.center * l_c
            #     metric_dic['tr_center_loss'].append(l_c.detch().cpu())
            #
            # if args.w_ortho > 0:
            #     W = F.normalize(model.softmax_layer.W, p=2, dim=0)
            #     l_w_reg = (W.T @ W - torch.eye(W.shape[1]).cuda()).norm(p=2)
            #     loss = loss + args.w_ortho * l_w_reg
            #     metric_dic['tr_w_reg'].append(l_w_reg.detach().cpu())

            train_acc = utils.accuracy(output, target)[0]  # Top-1 acc
            metric_dic['tr_acc'].append(train_acc.cpu())

            loss.backward()
            #             torch.nn.utils.clip_grad_norm_(embedding.parameters(), 1.0)
            #             torch.nn.utils.clip_grad_norm_(spk_classifier.parameters(), 1.0)
            optimizer.step()

            if iteration % 100 == 0:
                print('Train loss: {:.2f}, Acc: {:.2f}%'.format(
                    loss.item(), train_acc))

            iteration += 1
        tr_step += 1

        # res_dic['tr_loss']['acc'] += l.tolist()

        # Validation
        if val_dataloader is not None:
            model.eval()
            outputs = []
            targets = []
            with torch.no_grad():
                for data in tqdm(val_dataloader,
                                 desc='Validation'):  # mini-batch
                    # input_feature, target = data['input_feature'].to(device), data['target'].to(device)
                    input_feature, target = data[0].cuda(), data[1].cuda()

                    output, loss = model(input_feature, target)

                    # val_acc = utils.accuracy(output, target)[0] # Top-1 acc
                    # metric_dic['val_acc'].append(val_acc.cpu())
                    metric_dic['val_loss'].append(loss.cpu())
                    outputs.append(output)
                    targets.append(target)
            metric_dic['val_acc'] = utils.accuracy(
                torch.cat(outputs).cpu(),
                torch.cat(targets).cpu())[0]
            metric_dic['val_acc'] = metric_dic['val_acc'].item()

            eer1, _ = utils.compute_eer(
                torch.cat(outputs).cpu()[:, 0],
                torch.cat(targets).cpu())
            eer2, _ = utils.compute_eer(-torch.cat(outputs).cpu()[:, 0],
                                        torch.cat(targets).cpu())
            metric_dic['val_eer'] = min(eer1, eer2)

        # Test
        if test_dataloader is not None:
            model.eval()
            outputs = []
            targets = []
            with torch.no_grad():
                for data in tqdm(test_dataloader,
                                 desc='Validation'):  # mini-batch
                    # input_feature, target = data['input_feature'].to(device), data['target'].to(device)
                    input_feature, target = data[0].cuda(), data[1].cuda()

                    output, loss = model(input_feature, target)

                    # val_acc = utils.accuracy(output, target)[0] # Top-1 acc
                    # metric_dic['val_acc'].append(val_acc.cpu())
                    metric_dic['test_loss'].append(loss.cpu())
                    outputs.append(output)
                    targets.append(target)
            metric_dic['test_acc'] = utils.accuracy(
                torch.cat(outputs).cpu(),
                torch.cat(targets).cpu())[0]
            metric_dic['test_acc'] = metric_dic['test_acc'].item()

            eer1, _ = utils.compute_eer(
                torch.cat(outputs).cpu()[:, 0],
                torch.cat(targets).cpu())
            eer2, _ = utils.compute_eer(-torch.cat(outputs).cpu()[:, 0],
                                        torch.cat(targets).cpu())
            metric_dic['test_eer'] = min(eer1, eer2)

        for metric in metric_dic.keys():
            if isinstance(metric_dic[metric], list):
                metric_dic[metric] = np.mean(metric_dic[metric])
            if metric[:3] == 'tr_':
                with experiment.train():
                    experiment.log_metric(metric[3:],
                                          metric_dic[metric],
                                          step=tr_step)
            if metric[:4] == 'val_':
                with experiment.validate():
                    experiment.log_metric(metric[4:],
                                          metric_dic[metric],
                                          step=tr_step)

        pprint(metric_dic)

        # Write logs
        with open(log_file, 'a') as f:
            writer = csv.writer(f)
            write_content = [tr_step
                             ] + [metric_dic[m] for m in metric_dic.keys()]
            writer.writerow(write_content)

        Model_base.save_if_best(save_dir=model_path,
                                model=model,
                                optimizer=optimizer,
                                epoch=tr_step,
                                tr_metric=metric_dic['tr_acc'],
                                val_metric=metric_dic['val_eer'],
                                metric_name='eer',
                                save_every=10)
Ejemplo n.º 7
0
    "SJER", "MLBS", "NIWO", "TEAK"
]

#For each model, match the hand annotations with the pretraining model
models.sort()
results = []
for model in models:

    #load config
    DeepForest_config = copy.deepcopy(original_DeepForest_config)

    #Replace config file and experiment
    experiment = Experiment(api_key="ypQZhYfs3nSyKzOfz13iuJpj2",
                            project_name='deeplidar',
                            log_code=False)
    experiment.log_parameter("mode", "retrain_sequence")

    print("Running pretraining model {}".format(model))

    #Log experiments
    dirname = datetime.now().strftime("%Y%m%d_%H%M%S")
    experiment.log_parameters(DeepForest_config)
    experiment.log_parameter("Start Time", dirname)

    #Make a new dir and reformat args
    save_snapshot_path = os.path.join(DeepForest_config["save_snapshot_path"],
                                      dirname)
    save_image_path = os.path.join(DeepForest_config["save_image_path"],
                                   dirname)
    os.mkdir(save_snapshot_path)