예제 #1
0
def main():
    print('keras.__version__=' + str(keras.__version__))
    print('tf.__version__=' + str(tf.__version__))
    print('PIL.__version__=' + str(PIL.__version__))
    print('Using GPU ' + str(os.environ["CUDA_VISIBLE_DEVICES"]) +
          '  Good luck...')

    model_dir = 'F:/AMATEUR/models_mask_rcnn/'
    coco_model_path = 'C:/Users/cj3272/PycharmProjects/Mask_RCNN/mask_rcnn_coco.h5'
    if C.is_running_on_casir():
        model_dir = '/gpfs/home/cj3272/amateur/modeles/segmentation/MaskRCNN_win/logs/'
        coco_model_path = '/gpfs/home/cj3272/amateur/modeles/segmentation/MaskRCNN_win/mask_rcnn_coco.h5'

    config = AmateurTrain()
    assert config.BASE_DIR is None
    dataset_id = '30MAI2019'
    run_name = 'training'
    if C.is_running_on_casir():
        config.BASE_DIR = '/gpfs/groups/gc014a/AMATEUR/dataset/segmentation/' + dataset_id + '/GEN_segmentation/'
        config.IMAGES_PER_GPU = 4
        config.STEPS_PER_EPOCH = 1000
        config.VALIDATION_STEPS = 200
        n_train = None
        n_val = None
    else:
        config.BASE_DIR = 'F:/AMATEUR/segmentation/13JUIN2019/GEN_segmentation/'
        config.IMAGES_PER_GPU = 1
        config.IMAGE_MIN_DIM = 400
        config.IMAGE_MAX_DIM = 512
        n_train = 1
        n_val = 1

    # Datasets
    seed = 10
    val_size = 0.2
    image_ids = [
        f for f in listdir(config.BASE_DIR)
        if isfile(join(config.BASE_DIR, f)) and f.endswith('gz')
    ]
    train_list, val_list = train_test_split(image_ids,
                                            test_size=val_size,
                                            random_state=seed)
    print('train size=' + str(len(train_list)))
    print('val size=' + str(len(val_list)))

    # Training dataset
    dataset_train = dataset_util.AmateurDatasetOnDiskMRCNN()
    # if C.is_running_on_casir():
    #    dataset_train = dataset_util.AmateurDatasetMemoryMRCNN()
    dataset_train.load(config.BASE_DIR, train_list, n=n_train)
    dataset_train.prepare()

    # Validation dataset
    dataset_val = dataset_util.AmateurDatasetOnDiskMRCNN()
    dataset_val.load(config.BASE_DIR, val_list, n=n_val)
    dataset_val.prepare()

    print(str(len(dataset_train.image_ids)) + ' images for training.')
    print(str(len(dataset_val.image_ids)) + ' images for validating.')
    assert config.STEPS_PER_EPOCH * config.IMAGES_PER_GPU >= len(
        dataset_train.image_ids)
    assert config.VALIDATION_STEPS * config.IMAGES_PER_GPU >= len(
        dataset_val.image_ids), ''

    # Create model in training mode
    model = model_lib.MaskRCNN(mode="training",
                               config=config,
                               model_dir=model_dir)

    # Which weights to start with?
    init_with = "coco"  # imagenet, coco, or last

    if init_with == "imagenet":
        model.load_weights(model.get_imagenet_weights(), by_name=True)
    elif init_with == "coco":
        # Load weights trained on MS COCO, but skip layers that
        # are different due to the different number of classes
        # See README for instructions to download the COCO weights
        model.load_weights(coco_model_path,
                           by_name=True,
                           exclude=[
                               "mrcnn_class_logits", "mrcnn_bbox_fc",
                               "mrcnn_bbox", "mrcnn_mask"
                           ])
    elif init_with == "last":
        # Load the last model you trained and continue training
        model.load_weights(model.find_last(), by_name=True)

    mlflow.set_tracking_uri("http://10.7.248.206:3389")
    mlflow.set_experiment("AMATEUR_SEGMENTATION")
    mlflow.start_run(run_name=run_name)
    mlflow.log_params({
        'model_dir':
        str(model_dir),
        'n_train':
        str(n_train),
        'n_val':
        str(n_val),
        'init_with':
        init_with,
        'os':
        os.name,
        'dataset_id':
        dataset_id,
        'CUDA_VISIBLE_DEVICES':
        os.environ["CUDA_VISIBLE_DEVICES"]
    })
    mlflow.log_params(config.get_parameters())

    # Train the head branches
    # Passing layers="heads" freezes all layers except the head
    # layers. You can also pass a regular expression to select
    # which layers to train by name pattern.
    # model.train(dataset_train, dataset_val,                learning_rate=config.LEARNING_RATE,                epochs=6,                layers='heads')

    # Fine tune all layers
    # Passing layers="all" trains all layers. You can also
    # pass a regular expression to select which layers to
    # train by name pattern.
    custom_callbacks = [
        keras.callbacks.ReduceLROnPlateau(monitor='val_loss',
                                          factor=0.8,
                                          patience=7,
                                          verbose=1,
                                          mode='auto',
                                          min_delta=0.01,
                                          cooldown=0,
                                          min_lr=10e-7),
        keras.callbacks.LambdaCallback(
            on_epoch_end=lambda epoch, logs: print(logs.keys())),
        keras.callbacks.LambdaCallback(
            on_epoch_end=lambda epoch, logs: mlflow.log_metrics(logs))
    ]
    model.train(dataset_train,
                dataset_val,
                learning_rate=config.LEARNING_RATE,
                epochs=150,
                layers="all",
                custom_callbacks=custom_callbacks)
예제 #2
0
                        default=os.environ['SM_NUM_GPUS'])

    args = parser.parse_args()

    experiment_name = args.experiment_name
    mlflow.set_tracking_uri(args.mlflow_server)

    mlflow.set_experiment(experiment_name)

    experiment_id = mlflow.get_experiment_by_name(
        experiment_name).experiment_id
    tags = {"engineering": "ML Platform"}

    best_model, max_map = train(args)

    with mlflow.start_run(tags=tags) as run:
        params = {
            "batch-size": args.batch_size,
            "test-batch-size": args.test_batch_size,
            "epochs": args.epochs,
            "lr": args.lr,
            "momentum": args.momentum
        }
        mlflow.log_params(params)
        mlflow.log_metric(key='max_map', value=max_map)
        run_id = run.info.run_id

    metadata_dict = {'run_id': run_id}
    metadata_path = os.path.join(args.model_dir, 'metadata.json')

    with open(metadata_path, "w") as f:
예제 #3
0
def test_log_image_raises_exception_for_unsupported_image_object_type():
    with mlflow.start_run(), pytest.raises(
            TypeError, match="Unsupported image object type"):
        mlflow.log_image("not_image", "image.png")
    mlflow.set_experiment(exp_name)

    test_size = None

    if args.test_size:
        test_size = args.test_size
        LOGGER.info(f'Test size: {test_size}')
        mlflow.log_param(f'Test size', test_size)

    if DATA_AUGMENTATION:
        mlflow.log_param(f'AUGMENTATION_SIZE', AUGMENTATION_SIZE)
        LOGGER.info(f'Augmentation size: {AUGMENTATION_SIZE}')

    from models.naive_bayes import NaiveBayesEmotionDetection

    with mlflow.start_run() as run:
        naive_model = NaiveBayesEmotionDetection(test_size, DATA_AUGMENTATION,
                                                 AUGMENTATION_SIZE)
        LOGGER.info(f'Experiment {exp_name} started.')
        naive_model.experiment()

        if MODEL_FILENAME:
            with open(MODEL_FILENAME) as fl:
                pickle.dump(naive_model.model, fl)
            LOGGER.info(
                f'Model {exp_name} saved to {MODEL_FILENAME} successfully!')

        LOGGER.info(f'Experiment {exp_name} completed.')

if args.model == 'svm':
    LOGGER.info(f'Using SVM model')
예제 #5
0
def test_log_figure_raises_error_for_unsupported_figure_object_type():
    with mlflow.start_run(), pytest.raises(
            TypeError, match="Unsupported figure object type"):
        mlflow.log_figure("not_figure", "figure.png")
예제 #6
0
def test_log_image_numpy_raises_exception_for_invalid_array_shape():
    import numpy as np

    with mlflow.start_run(), pytest.raises(
            ValueError, match="`image` must be a 2D or 3D array"):
        mlflow.log_image(np.zeros((1, ), dtype=np.uint8), "image.png")
예제 #7
0
    targetcol = 'default'
    y = mydf[targetcol]

    experimento = mlflow.get_experiment_by_name('Risco de Credito')
    if experimento is None:
        experimento = mlflow.create_experiment('Risco de Credito')
        experimento = mlflow.get_experiment_by_name('Risco de Credito')

    # Cria o Classifier (modelo 1 com todas as colunas e parâmetros padrão do Sckit Learn)
    independentcols = [
        'renda', 'idade', 'etnia', 'sexo', 'casapropria', 'outrasrendas',
        'estadocivil', 'escolaridade'
    ]
    x = mydf[independentcols]
    run_name = get_a_funnyName()
    with mlflow.start_run(experiment_id=experimento.experiment_id,
                          run_name=("MyNameIs.. " + run_name)):
        from sklearn.ensemble import RandomForestClassifier as rfc

        clf = rfc()
        clf.fit(X=x, y=y)

        clf.independentcols = independentcols
        clf_acuracia = clf.score(X=x, y=y)
        print(
            "Modelo 01 (classificador), criado com acurácia de: [{0}]".format(
                clf_acuracia))

        mlflow.log_param("criterion", clf.criterion)
        mlflow.log_param("n_estimators", clf.n_estimators)
        mlflow.log_param("min_samples_leaf", clf.min_samples_leaf)
        mlflow.log_param("max_depth", clf.max_depth)
예제 #8
0
def main(params: dict, output_dir: str):
    import mlflow
    print("start params={}".format(params))
    model_id = "all"
    logger = get_logger()
    df = pd.read_pickle(
        "../input/riiid-test-answer-prediction/train_merged.pickle")
    # df = pd.read_pickle("../input/riiid-test-answer-prediction/split10/train_0.pickle").sort_values(["user_id", "timestamp"]).reset_index(drop=True)
    if is_debug:
        df = df.head(30000)
    df["prior_question_had_explanation"] = df[
        "prior_question_had_explanation"].fillna(-1)
    df["answered_correctly"] = df["answered_correctly"].replace(-1, np.nan)
    column_config = {
        ("content_id", "content_type_id"): {
            "type": "category"
        },
        "user_answer": {
            "type": "leakage_feature"
        },
        "answered_correctly": {
            "type": "leakage_feature"
        },
        "part": {
            "type": "category"
        },
        "prior_question_elapsed_time_bin300": {
            "type": "category"
        },
        "duration_previous_content_bin300": {
            "type": "category"
        },
        "prior_question_had_explanation": {
            "type": "category"
        },
        "rating_diff_content_user_id": {
            "type": "numeric"
        },
        "task_container_id_bin300": {
            "type": "category"
        },
        "previous_answer_index_question_id": {
            "type": "category"
        },
        "previous_answer_question_id": {
            "type": "category"
        },
        "timediff-elapsedtime_bin500": {
            "type": "category"
        },
        "timedelta_log10": {
            "type": "category"
        }
    }

    if not load_pickle or is_debug:
        feature_factory_dict = {"user_id": {}}
        feature_factory_dict["user_id"][
            "DurationPreviousContent"] = DurationPreviousContent(
                is_partial_fit=True)
        feature_factory_dict["user_id"][
            "ElapsedTimeBinningEncoder"] = ElapsedTimeBinningEncoder()
        feature_factory_dict["user_id"][
            "UserContentRateEncoder"] = UserContentRateEncoder(
                rate_func="elo", column="user_id")
        feature_factory_dict["user_id"]["PreviousAnswer2"] = PreviousAnswer2(
            groupby="user_id",
            column="question_id",
            is_debug=is_debug,
            model_id=model_id,
            n=300)
        feature_factory_dict["user_id"][
            "StudyTermEncoder2"] = StudyTermEncoder2(is_partial_fit=True)
        feature_factory_dict["user_id"][
            f"MeanAggregatorStudyTimebyUserId"] = MeanAggregator(
                column="user_id", agg_column="study_time", remove_now=False)

        feature_factory_dict["user_id"][
            "ElapsedTimeMeanByContentIdEncoder"] = ElapsedTimeMeanByContentIdEncoder(
            )
        feature_factory_dict["post"] = {
            "DurationFeaturePostProcess": DurationFeaturePostProcess()
        }

        feature_factory_manager = FeatureFactoryManager(
            feature_factory_dict=feature_factory_dict,
            logger=logger,
            split_num=1,
            model_id=model_id,
            load_feature=not is_debug,
            save_feature=not is_debug)
        print("all_predict")
        df = feature_factory_manager.all_predict(df)

        def f(x):
            x = x // 1000
            if x < -100:
                return -100
            if x > 400:
                return 400
            return x

        df["task_container_id_bin300"] = [
            x if x < 300 else 300 for x in df["task_container_id"]
        ]
        df["timediff-elapsedtime_bin500"] = [
            f(x) for x in df["timediff-elapsedtime"].values
        ]
        df["timedelta_log10"] = np.log10(
            df["duration_previous_content"].values)
        df["timedelta_log10"] = df["timedelta_log10"].replace(
            -np.inf, -1).replace(np.inf, -1).fillna(-1).astype("int8")
        df = df[[
            "user_id", "content_id", "content_type_id", "part", "user_answer",
            "answered_correctly", "prior_question_elapsed_time_bin300",
            "duration_previous_content_bin300",
            "prior_question_had_explanation", "rating_diff_content_user_id",
            "task_container_id_bin300", "previous_answer_index_question_id",
            "previous_answer_question_id", "row_id",
            "timediff-elapsedtime_bin500", "timedelta_log10"
        ]]
        print(df.head(10))

        print("data preprocess")

    ff_for_transformer = FeatureFactoryForTransformer(
        column_config=column_config,
        dict_path="../feature_engineering/",
        sequence_length=params["max_seq"],
        logger=logger)
    ff_for_transformer.make_dict(df=df)
    n_skill = len(ff_for_transformer.embbed_dict[("content_id",
                                                  "content_type_id")])

    if not load_pickle or is_debug:
        df_val_row = pd.read_feather(
            "../input/riiid-test-answer-prediction/train_transformer_last2500k_only_row_id.feather"
        )
        if is_debug:
            df_val_row = df_val_row.head(3000)
        df_val_row["is_val"] = 1

        df = pd.merge(df, df_val_row, how="left", on="row_id")
        df["is_val"] = df["is_val"].fillna(0)

        print(df["is_val"].value_counts())

        w_df = df[df["is_val"] == 0]
        w_df["group"] = (
            w_df.groupby("user_id")["user_id"].transform("count") -
            w_df.groupby("user_id").cumcount()) // params["max_seq"]
        w_df["user_id"] = w_df["user_id"].astype(
            str) + "_" + w_df["group"].astype(str)

        group = ff_for_transformer.all_predict(w_df)

        dataset_train = SAKTDataset(group,
                                    n_skill=n_skill,
                                    max_seq=params["max_seq"])

        del w_df
        gc.collect()

    ff_for_transformer = FeatureFactoryForTransformer(
        column_config=column_config,
        dict_path="../feature_engineering/",
        sequence_length=params["max_seq"],
        logger=logger)
    if not load_pickle or is_debug:
        group = ff_for_transformer.all_predict(df[df["content_type_id"] == 0])
        dataset_val = SAKTDataset(group,
                                  is_test=True,
                                  n_skill=n_skill,
                                  max_seq=params["max_seq"])

    os.makedirs("../input/feature_engineering/model275_all", exist_ok=True)
    if not is_debug and not load_pickle:
        with open(f"../input/feature_engineering/model275_all/train.pickle",
                  "wb") as f:
            pickle.dump(dataset_train, f)
        with open(f"../input/feature_engineering/model275_all/val.pickle",
                  "wb") as f:
            pickle.dump(dataset_val, f)

    if not is_debug and load_pickle:
        with open(f"../input/feature_engineering/model275_all/train.pickle",
                  "rb") as f:
            dataset_train = pickle.load(f)
        with open(f"../input/feature_engineering/model275_all/val.pickle",
                  "rb") as f:
            dataset_val = pickle.load(f)
        print("loaded!")
    dataloader_train = DataLoader(dataset_train,
                                  batch_size=params["batch_size"],
                                  shuffle=True)
    dataloader_val = DataLoader(dataset_val,
                                batch_size=params["batch_size"],
                                shuffle=False)

    model = SAKTModel(n_skill,
                      embed_dim=params["embed_dim"],
                      max_seq=params["max_seq"],
                      dropout=dropout,
                      cont_emb=params["cont_emb"])

    param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [{
        'params':
        [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
        'weight_decay':
        0.1
    }, {
        'params':
        [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
        'weight_decay':
        0.0
    }]

    optimizer = AdamW(
        optimizer_grouped_parameters,
        lr=params["lr"],
        weight_decay=0.1,
    )
    num_train_optimization_steps = int(len(dataloader_train) * 20)
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=params["num_warmup_steps"],
        num_training_steps=num_train_optimization_steps)
    criterion = nn.BCEWithLogitsLoss()

    model.to(device)
    criterion.to(device)
    auc_val = 0
    for epoch in range(epochs):
        loss, acc, auc, auc_val = train_epoch(model, dataloader_train,
                                              dataloader_val, optimizer,
                                              criterion, scheduler, epoch,
                                              output_dir, device)
        print("epoch - {} train_loss - {:.3f} auc - {:.4f} auc-val: {:.4f}".
              format(epoch, loss, auc, auc_val))
        torch.save(
            model.state_dict(),
            f"{output_dir}/transformers_epoch{epoch}_auc{round(auc_val, 4)}.pth"
        )

    # df_oof.to_csv(f"{output_dir}/transformers1.csv", index=False)
    """
    df_oof2 = pd.read_csv("../output/ex_237/20201213110353/oof_train_0_lgbm.csv")
    df_oof2.columns = ["row_id", "predict_lgbm", "target"]
    df_oof2 = pd.merge(df_oof, df_oof2, how="inner")

    auc_lgbm = roc_auc_score(df_oof2["target"].values, df_oof2["predict_lgbm"].values)
    print("lgbm: {:.4f}".format(auc_lgbm))

    print("ensemble")
    max_auc = 0
    max_nn_ratio = 0
    for r in np.arange(0, 1.05, 0.05):
        auc = roc_auc_score(df_oof2["target"].values, df_oof2["predict_lgbm"].values*(1-r) + df_oof2["predict"].values*r)
        print("[nn_ratio: {:.2f}] AUC: {:.4f}".format(r, auc))

        if max_auc < auc:
            max_auc = auc
            max_nn_ratio = r
    print(len(df_oof2))
    """
    if not is_debug:
        mlflow.start_run(experiment_id=10, run_name=os.path.basename(__file__))

        for key, value in params.items():
            mlflow.log_param(key, value)
        mlflow.log_metric("auc_val", auc_val)
        mlflow.end_run()
    torch.save(model.state_dict(), f"{output_dir}/transformers.pth")
    del model
    torch.cuda.empty_cache()
    with open(f"{output_dir}/transformer_param.json", "w") as f:
        json.dump(params, f)
    if is_make_feature_factory:
        # feature factory
        feature_factory_dict = {"user_id": {}}
        feature_factory_dict["user_id"][
            "DurationPreviousContent"] = DurationPreviousContent(
                is_partial_fit=True)
        feature_factory_dict["user_id"][
            "ElapsedTimeBinningEncoder"] = ElapsedTimeBinningEncoder()
        feature_factory_manager = FeatureFactoryManager(
            feature_factory_dict=feature_factory_dict,
            logger=logger,
            split_num=1,
            model_id="all",
            load_feature=not is_debug,
            save_feature=not is_debug)

        ff_for_transformer = FeatureFactoryForTransformer(
            column_config=column_config,
            dict_path="../feature_engineering/",
            sequence_length=params["max_seq"],
            logger=logger)
        df = pd.read_pickle(
            "../input/riiid-test-answer-prediction/train_merged.pickle")
        if is_debug:
            df = df.head(10000)
        df = df.sort_values(["user_id", "timestamp"]).reset_index(drop=True)
        feature_factory_manager.fit(df)
        df = feature_factory_manager.all_predict(df)
        for dicts in feature_factory_manager.feature_factory_dict.values():
            for factory in dicts.values():
                factory.logger = None
        feature_factory_manager.logger = None
        with open(f"{output_dir}/feature_factory_manager.pickle", "wb") as f:
            pickle.dump(feature_factory_manager, f)

        ff_for_transformer.fit(df)
        ff_for_transformer.logger = None
        with open(
                f"{output_dir}/feature_factory_manager_for_transformer.pickle",
                "wb") as f:
            pickle.dump(ff_for_transformer, f)
예제 #9
0
    return "".join(
        random.sample(string.ascii_letters, random.randint(1, max_len)))


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--large",
        help=
        "If true, will also generate larger datasets for testing UI performance.",
        action="store_true")
    args = parser.parse_args()
    client = MlflowClient()
    # Simple run
    for l1, alpha in itertools.product([0, 0.25, 0.5, 0.75, 1], [0, 0.5, 1]):
        with mlflow.start_run(run_name='ipython'):
            parameters = {
                'l1': str(l1),
                'alpha': str(alpha),
            }
            metrics = {
                'MAE': [rand()],
                'R2': [rand()],
                'RMSE': [rand()],
            }
            log_params(parameters)
            log_metrics(metrics)

    # Big parameter values
    with mlflow.start_run(run_name='ipython'):
        parameters = {
예제 #10
0
 def log_torch_model(self, model, epoch):
     with mlflow.start_run(self.run_id):
         mlflow.pytorch.log_model(model, "model_%04d" % epoch)

if __name__ == '__main__':
    with open('test.file', 'wb') as f:
        test = range(10)
        joblib.dump(test, f)

    local_registry = "sqlite:///mlruns.db"
    print(f"Running local model registry={local_registry}")
    mlflow.set_tracking_uri(local_registry)

    clt = MlflowClient()
    exp_a_uri = "file:///tmp/exp_A"
    exp_a_id = get_exp_id("experiment_A", local_registry, local_registry,
                          exp_a_uri)
    exp_b_uri = "file:///tmp/exp_B"
    exp_b_id = get_exp_id("experiment_B", local_registry, local_registry,
                          exp_b_uri)

    for i in range(3):
        with mlflow.start_run(experiment_id=exp_a_id):
            mlflow.log_metric("MEAN SQUARE ERROR", 0.25 * random())
            mlflow.log_artifact('test.file')
            print(f"artifact_uri={mlflow.get_artifact_uri()}")
    print("-" * 75)
    for i in range(3):
        with mlflow.start_run(experiment_id=exp_b_id):
            mlflow.log_metric("MEAN SQUARE ERROR", 0.25 * random())
            mlflow.log_artifact('test.file')
            print(f"artifact_uri={mlflow.get_artifact_uri()}")
예제 #12
0
def eval_Slim(params, cfg, train_mat, eval_mat, experiment):
    # This function is what Hyperopt is going to optimize (minimize 'loss' value)
    print(experiment)
    with mlflow.start_run(experiment_id=experiment):

        # Log the config
        utils.config_helpers.log_config(dict(cfg.model))

        n_users, n_items = train_mat.shape
        np.random.seed(seed=cfg.model.seed)

        # Log relevant parameters for this run.
        mlflow.log_param("alpha", params['alpha'])
        mlflow.log_param("l1_ratio", params['l1_ratio'])
        mlflow.log_param("max_iter", params['max_iter'])
        mlflow.log_param("tol", params['tol'])

        # Log this run
        log.info(
            f"Testing  alpha: {params['alpha']},  l1_ratio: {params['l1_ratio']}, max_iter: {params['max_iter']} and tol: {params['tol']}"
        )

        start = time.time()
        # Create model
        slim = RecModel.Slim(num_items=n_items, num_users=n_users)

        # Train Model
        slim.train(X=train_mat.copy(),
                   alpha=params['alpha'],
                   l1_ratio=params['l1_ratio'],
                   max_iter=params['max_iter'],
                   tolerance=params['tol'],
                   cores=1,
                   verbose=int(cfg.model.verbose))

        # Log run-time
        mlflow.log_metric("Runtime", int(round(time.time() - start, 0)))

        # Evaluate model
        perf_all = slim.eval_topn(eval_mat.copy(),
                                  rand_sampled=int(cfg.model.rand_sampled),
                                  topn=np.array(cfg.model.top_n_performances,
                                                dtype=np.int32),
                                  random_state=int(cfg.model.seed),
                                  cores=int(cfg.model.cores))

        # Log the performance of the model
        for pos in range(len(cfg.model.top_n_performances)):
            mlflow.log_metric(
                f"recallAT{cfg.model.top_n_performances[pos]}_of_{cfg.model.rand_sampled}",
                perf_all[f"Recall@{cfg.model.top_n_performances[pos]}"])
        mlflow.log_metric('MAE_train', slim.eval_prec(train_mat.copy()))
        mlflow.log_metric('MAE_eval', slim.eval_prec(eval_mat.copy()))

        #We will always choose the first topn performance. Hopefully, that is also the smallest is most relevant for us.g
        rel_topn_perf = perf_all[f"Recall@{cfg.model.top_n_performances[0]}"]

        log.info(
            f"Current recallAT{cfg.model.top_n_performances[0]}_of_{cfg.model.rand_sampled} performance was {rel_topn_perf}"
        )
        loss = -rel_topn_perf
        return {'loss': loss, 'status': hp.STATUS_OK, 'eval_time': time.time()}
예제 #13
0
def test_xgb_autolog_persists_manually_created_run(bst_params, dtrain):
    mlflow.xgboost.autolog()
    with mlflow.start_run() as run:
        xgb.train(bst_params, dtrain)
        assert mlflow.active_run()
        assert mlflow.active_run().info.run_id == run.info.run_id
예제 #14
0
def create_gen3_heatmap(json_file_path):
    """ Create elapsed time and speed heatmaps from a json file resulting from 'gen3_eval', which contains the times
    elapsed and speeds for a number of evaluation runs.

    Args:
        json_file_path: Path the json file containing the run evaluation elapsed times and speeds
    """

    # start mlflow run
    with mlflow.start_run() as mlrun:
        # if the json file does not exist, raise error
        if not os.path.exists(json_file_path) or not os.path.isfile(
                json_file_path):
            raise ValueError('{} does not exist'.format(json_file_path))

        # initialize chunk_sizes and chunks lists to contain all the powers of 2 between
        # 2^MIN_EXPONENT and 2^MAX_EXPONENT (included)
        chunk_sizes_iterator = [
            2**x
            for x in range(MIN_CHUNK_SIZE_EXPONENT, MAX_CHUNK_SIZE_EXPONENT +
                           1)
        ]
        chunks_iterator = [
            2**x for x in range(MIN_CHUNKS_EXPONENT, MAX_CHUNKS_EXPONENT + 1)
        ]

        # open json file and load its content in the data dict
        with open(json_file_path, 'r') as f:
            data = json.load(f)

            # instantiate empty numpy arrays for containing the average values and standard deviations for
            # both elapsed times and speeds
            elapsed_times_avg = np.empty(shape=(len(chunk_sizes_iterator),
                                                len(chunks_iterator)),
                                         dtype=np.float32)
            speeds_avg = np.empty(shape=(len(chunk_sizes_iterator),
                                         len(chunks_iterator)),
                                  dtype=np.float32)
            elapsed_times_std = np.empty(shape=(len(chunk_sizes_iterator),
                                                len(chunks_iterator)),
                                         dtype=np.float32)
            speeds_std = np.empty(shape=(len(chunk_sizes_iterator),
                                         len(chunks_iterator)),
                                  dtype=np.float32)

            # assign NaN (Not a Number) value to all positions of the numpy arrays just defined
            # (NaN will be ignored when plotting the heatmap)
            elapsed_times_avg[:] = np.NaN
            speeds_avg[:] = np.NaN
            elapsed_times_std[:] = np.NaN
            speeds_std[:] = np.NaN

            # for each chunk size
            for i, cs in enumerate(chunk_sizes_iterator):
                # for each chunks number
                for j, c in enumerate(chunks_iterator):
                    # if the value in data dict corresponding to the combination of chunk size and chunks number is None
                    # for speeds or times, continue to next combination
                    if data['elapsed_times'][str(cs)][str(c)] is None or data[
                            'speeds'][str(cs)][str(c)] is None:
                        continue

                    # compute average values and standard deviations for the elapsed times and speeds
                    elapsed_times_avg[i][j] = np.average(
                        data['elapsed_times'][str(cs)][str(c)])
                    elapsed_times_std[i][j] = np.std(
                        data['elapsed_times'][str(cs)][str(c)])
                    speeds_avg[i][j] = np.average(
                        data['speeds'][str(cs)][str(c)])
                    speeds_std[i][j] = np.std(data['speeds'][str(cs)][str(c)])

            # create elapsed times and speeds figures
            time_fig, time_ax = plt.subplots(figsize=(10, 9))
            speed_fig, speed_ax = plt.subplots(figsize=(10, 9))

            # compute elapsed times heatmap
            time_im, time_cbar = heatmap(elapsed_times_avg,
                                         row_labels=chunk_sizes_iterator,
                                         row_title='Chunk sizes',
                                         col_labels=chunks_iterator,
                                         col_title='Chunks',
                                         ax=time_ax,
                                         cmap="BuGn",
                                         cbarlabel="elapsed time [s]")

            # compute speeds heatmap
            speed_im, speed_cbar = heatmap(speeds_avg,
                                           row_labels=chunk_sizes_iterator,
                                           row_title='Chunk sizes',
                                           col_labels=chunks_iterator,
                                           col_title='Chunks',
                                           ax=speed_ax,
                                           cmap="BuGn",
                                           cbarlabel="speed [it/s]")

            # compute elapsed times and speeds masks
            time_mask = [[np.isnan(c) for c in cs] for cs in elapsed_times_avg]
            speed_mask = [[np.isnan(c) for c in cs] for cs in speeds_avg]

            # annotate elapsed times and speeds heatmaps give the corresponding masks
            time_texts = annotate_heatmap(time_im,
                                          elapsed_times_std,
                                          time_mask,
                                          ann_format='time',
                                          fontsize='x-small')
            speed_texts = annotate_heatmap(speed_im,
                                           speeds_std,
                                           speed_mask,
                                           ann_format='speed',
                                           fontsize='x-small')

            # adjusts subplots params so that the subplot fits in to the figure area
            time_fig.tight_layout()
            speed_fig.tight_layout()

            # create temporary directory
            with tempfile.TemporaryDirectory() as tmpdir:
                # save both elapsed times and speeds heatmaps to temporary files
                time_filename = os.path.join(tmpdir, 'times.png')
                speed_filename = os.path.join(tmpdir, 'speeds.png')
                time_fig.savefig(time_filename)
                speed_fig.savefig(speed_filename)

                # log temporary files as artifacts
                mlflow.log_artifact(time_filename)
                mlflow.log_artifact(speed_filename)
예제 #15
0
def benchmark_t(args):
    from plaidbench import cli
    results = cli.plaidbench(args)
    print('results....  ', type(results), '   ', results)


devices = ['metal_amd', 'opencl_amd', 'metal_uhd', 'opencl_uhd', 'cpu']
small_networks = 'mobilenet|nasnet_mobile|imdb_lstm'.split('|')

for device in devices:
    for network in small_networks:
        name = f'TRIAL_{device}_{network}'
        if path.exists(getcwd() + '/' + name):
            continue
        popen(f'cp ~/{device}.json ~/.plaidml')
        with mlflow.start_run(experiment_id=1, run_name=name[6:]):
            x = Thread(
                target=benchmark_t,
                kwargs={'args': [f'--results=./{name}', 'keras', network]})
            start_time = process_time()
            x.start()
            x.join()
            mlflow.log_metric('ttl_exec_time', process_time() - start_time)
            mlflow.log_param('device', device)
            mlflow.log_param('network', network)
            sleep(2)
            mlflow.log_artifact(getcwd() + '/' + name + '/result.json')
            with open(name + '/result.json') as json_file:
                result = json_load(json_file)

                mlflow.log_param('examples', result['examples'])
예제 #16
0
파일: model037.py 프로젝트: kurupical/riiid
def main(params: dict):
    import mlflow
    logger = get_logger()
    print("start params={}".format(params))
    if is_full_data:
        df = pd.read_pickle(
            "../input/riiid-test-answer-prediction/train_merged.pickle")
    else:
        df = pd.read_pickle(
            "../input/riiid-test-answer-prediction/split10/train_0.pickle"
        ).sort_values(["user_id", "timestamp"]).reset_index(drop=True)
    # df = pd.read_pickle("../input/riiid-test-answer-prediction/split10/train_0.pickle").sort_values(["user_id", "timestamp"]).reset_index(drop=True)
    if is_debug:
        df = df.head(30000)
    df = df.sort_values(["user_id", "timestamp"]).reset_index(drop=True)

    df = df[[
        "user_id", "content_id", "content_type_id", "part", "user_answer",
        "answered_correctly"
    ]]

    train_idx = []
    val_idx = []
    np.random.seed(0)
    for _, w_df in df[df["content_type_id"] == 0].groupby("user_id"):
        if np.random.random() < 0.1:
            # all val
            val_idx.extend(w_df.index.tolist())
        else:
            train_num = int(len(w_df) * 0.9)
            train_idx.extend(w_df[:train_num].index.tolist())
            val_idx.extend(w_df[train_num:].index.tolist())

    df["is_val"] = 0
    df["is_val"].loc[val_idx] = 1
    w_df = df[df["is_val"] == 0]
    w_df["group"] = (w_df.groupby("user_id")["user_id"].transform("count") -
                     w_df.groupby("user_id").cumcount()) // params["max_seq"]
    w_df["user_id"] = w_df["user_id"].astype(str) + "_" + w_df["group"].astype(
        str)
    ff_for_transformer = FeatureFactoryForTransformer(
        column_config={
            ("content_id", "content_type_id"): {
                "type": "category"
            },
            "user_answer": {
                "type": "category"
            },
            "part": {
                "type": "category"
            }
        },
        dict_path="../feature_engineering/",
        sequence_length=params["max_seq"],
        logger=logger)
    group = ff_for_transformer.all_predict(w_df)
    del w_df
    gc.collect()

    n_skill = len(ff_for_transformer.embbed_dict[("content_id",
                                                  "content_type_id")])
    print(group)
    dataset_train = SAKTDataset(group,
                                n_skill=n_skill,
                                max_seq=params["max_seq"])

    ff_for_transformer = FeatureFactoryForTransformer(
        column_config={
            ("content_id", "content_type_id"): {
                "type": "category"
            },
            "user_answer": {
                "type": "category"
            },
            "part": {
                "type": "category"
            }
        },
        dict_path="../feature_engineering/",
        sequence_length=params["max_seq"],
        logger=logger)
    df["group"] = (df.groupby("user_id")["user_id"].transform("count") -
                   df.groupby("user_id").cumcount()) // params["max_seq"]
    group = ff_for_transformer.all_predict(df)
    dataset_val = SAKTDataset(group,
                              is_test=True,
                              n_skill=n_skill,
                              max_seq=params["max_seq"])

    dataloader_train = DataLoader(dataset_train,
                                  batch_size=64,
                                  shuffle=True,
                                  num_workers=1)
    dataloader_val = DataLoader(dataset_val,
                                batch_size=64,
                                shuffle=False,
                                num_workers=1)

    model = SAKTModel(n_skill,
                      embed_dim=params["embed_dim"],
                      max_seq=params["max_seq"])
    optimizer = torch.optim.Adam(model.parameters(), lr=params["lr"])
    criterion = nn.BCEWithLogitsLoss()

    model.to(device)
    criterion.to(device)

    for epoch in range(epochs):
        loss, acc, auc, auc_val = train_epoch(model, dataloader_train,
                                              dataloader_val, optimizer,
                                              criterion, device)
        print("epoch - {} train_loss - {:.3f} auc - {:.4f} auc-val: {:.4f}".
              format(epoch, loss, auc, auc_val))

    preds = []
    labels = []
    for d in tqdm(dataloader_val):
        x = d[0].to(device).long()
        qa = d[1].to(device).long()
        target_id = d[2].to(device).long()
        part = d[3].to(device).long()
        label = d[4].to(device).long()

        output, atten_weight = model(x, qa, target_id, part)

        preds.extend(torch.nn.Sigmoid()(
            output[:, -1]).view(-1).data.cpu().numpy().tolist())
        labels.extend(label[:, -1].view(-1).data.cpu().numpy())
    df_oof = pd.DataFrame()
    df_oof["row_id"] = df.loc[val_idx].index
    df_oof["predict"] = preds
    df_oof["target"] = df.loc[val_idx]["answered_correctly"].values

    df_oof.to_csv(f"{output_dir}/transformers1.csv", index=False)

    df_oof2 = pd.read_csv(
        "../output/ex_172/20201202080625/oof_train_0_lgbm.csv")
    df_oof2.columns = ["row_id", "predict_lgbm", "target"]
    df_oof2 = pd.merge(df_oof, df_oof2, how="inner")

    auc_transformer = roc_auc_score(df_oof2["target"].values,
                                    df_oof2["predict"].values)
    auc_lgbm = roc_auc_score(df_oof2["target"].values,
                             df_oof2["predict_lgbm"].values)
    print("single transformer: {:.4f}".format(auc_transformer))
    print("lgbm: {:.4f}".format(auc_lgbm))

    print("ensemble")
    max_auc = 0
    max_nn_ratio = 0
    for r in np.arange(0, 1.05, 0.05):
        auc = roc_auc_score(
            df_oof2["target"].values, df_oof2["predict_lgbm"].values *
            (1 - r) + df_oof2["predict"].values * r)
        print("[nn_ratio: {:.2f}] AUC: {:.4f}".format(r, auc))

        if max_auc < auc:
            max_auc = auc
            max_nn_ratio = r
    print(len(df_oof2))

    if not is_debug:
        mlflow.start_run(experiment_id=10, run_name=os.path.basename(__file__))

        mlflow.log_param("count_row", len(df))

        for key, value in params.items():
            mlflow.log_param(key, value)
        mlflow.log_metric("auc_val", auc_transformer)
        mlflow.log_metric("auc_lgbm", auc_lgbm)
        mlflow.log_metric("auc_ensemble", max_auc)
        mlflow.log_metric("ensemble_nn_ratio", max_nn_ratio)
        mlflow.end_run()
    torch.save(model.state_dict(), f"{output_dir}/transformers.pth")

    del df, dataset_train, dataset_val, dataloader_train, dataloader_val
    gc.collect()

    with open(f"{output_dir}/transformer_param.json", "w") as f:
        json.dump(params, f)
    if is_make_feature_factory:
        ff_for_transformer = FeatureFactoryForTransformer(
            column_config={
                ("content_id", "content_type_id"): {
                    "type": "category"
                },
                "user_answer": {
                    "type": "category"
                },
                "part": {
                    "type": "category"
                }
            },
            dict_path="../feature_engineering/",
            sequence_length=params["max_seq"],
            logger=logger)
        df = pd.read_pickle(
            "../input/riiid-test-answer-prediction/train_merged.pickle")
        if is_debug:
            df = df.head(10000)
        df = df.sort_values(["user_id", "timestamp"]).reset_index(drop=True)
        ff_for_transformer.fit(df)
        ff_for_transformer.logger = None
        with open(
                f"{output_dir}/feature_factory_manager_for_transformer.pickle",
                "wb") as f:
            pickle.dump(ff_for_transformer, f)
예제 #17
0
def main(params: dict, output_dir: str):
    import mlflow
    print("start params={}".format(params))
    model_id = "train_0"
    logger = get_logger()
    # df = pd.read_pickle("../input/riiid-test-answer-prediction/train_merged.pickle")
    df = pd.read_pickle(
        "../input/riiid-test-answer-prediction/split10/train_0.pickle"
    ).sort_values(["user_id", "timestamp"]).reset_index(drop=True)
    if is_debug:
        df = df.head(30000)
    df["prior_question_had_explanation"] = df[
        "prior_question_had_explanation"].fillna(-1)
    column_config = {
        ("content_id", "content_type_id"): {
            "type": "category"
        },
        "user_answer": {
            "type": "leakage_feature"
        },
        "answered_correctly": {
            "type": "leakage_feature"
        },
        "part": {
            "type": "category"
        },
        "prior_question_elapsed_time_bin300": {
            "type": "category"
        },
        "duration_previous_content_bin300": {
            "type": "category"
        },
        "prior_question_had_explanation": {
            "type": "category"
        },
        "rating_diff_content_user_id": {
            "type": "numeric"
        },
        "task_container_id_bin300": {
            "type": "category"
        },
        "previous_answer_index_content_id": {
            "type": "category"
        },
        "previous_answer_content_id": {
            "type": "category"
        }
    }

    if not load_pickle or is_debug:
        feature_factory_dict = {"user_id": {}}
        feature_factory_dict["user_id"][
            "DurationPreviousContent"] = DurationPreviousContent()
        feature_factory_dict["user_id"][
            "ElapsedTimeBinningEncoder"] = ElapsedTimeBinningEncoder()
        feature_factory_dict["user_id"][
            "UserContentRateEncoder"] = UserContentRateEncoder(
                rate_func="elo", column="user_id")
        feature_factory_dict["user_id"]["PreviousAnswer2"] = PreviousAnswer2(
            groupby="user_id",
            column="content_id",
            is_debug=is_debug,
            model_id=model_id,
            n=300)
        feature_factory_manager = FeatureFactoryManager(
            feature_factory_dict=feature_factory_dict,
            logger=logger,
            split_num=1,
            model_id="train_0",
            load_feature=not is_debug,
            save_feature=not is_debug)

        print("all_predict")
        df = feature_factory_manager.all_predict(df)
        df["task_container_id_bin300"] = [
            x if x < 300 else 300 for x in df["task_container_id"]
        ]
        df = df[[
            "user_id", "content_id", "content_type_id", "part", "user_answer",
            "answered_correctly", "prior_question_elapsed_time_bin300",
            "duration_previous_content_bin300",
            "prior_question_had_explanation", "rating_diff_content_user_id",
            "task_container_id_bin300", "previous_answer_index_content_id",
            "previous_answer_content_id"
        ]]
        print(df.head(10))

        print("data preprocess")

        train_idx = []
        val_idx = []
        np.random.seed(0)
        for _, w_df in df[df["content_type_id"] == 0].groupby("user_id"):
            if np.random.random() < 0.01:
                # all val
                val_idx.extend(w_df.index.tolist())
            else:
                train_num = int(len(w_df) * 0.95)
                train_idx.extend(w_df[:train_num].index.tolist())
                val_idx.extend(w_df[train_num:].index.tolist())
    ff_for_transformer = FeatureFactoryForTransformer(
        column_config=column_config,
        dict_path="../feature_engineering/",
        sequence_length=params["max_seq"],
        logger=logger)
    ff_for_transformer.make_dict(df=df)
    n_skill = len(ff_for_transformer.embbed_dict[("content_id",
                                                  "content_type_id")])
    if not load_pickle or is_debug:
        df["is_val"] = 0
        df["is_val"].loc[val_idx] = 1
        w_df = df[df["is_val"] == 0]
        w_df["group"] = (
            w_df.groupby("user_id")["user_id"].transform("count") -
            w_df.groupby("user_id").cumcount()) // params["max_seq"]
        w_df["user_id"] = w_df["user_id"].astype(
            str) + "_" + w_df["group"].astype(str)

        group = ff_for_transformer.all_predict(w_df)

        dataset_train = SAKTDataset(group,
                                    n_skill=n_skill,
                                    max_seq=params["max_seq"])

        del w_df
        gc.collect()

    ff_for_transformer = FeatureFactoryForTransformer(
        column_config=column_config,
        dict_path="../feature_engineering/",
        sequence_length=params["max_seq"],
        logger=logger)
    if not load_pickle or is_debug:
        group = ff_for_transformer.all_predict(df[df["content_type_id"] == 0])
        dataset_val = SAKTDataset(group,
                                  is_test=True,
                                  n_skill=n_skill,
                                  max_seq=params["max_seq"])

    os.makedirs("../input/feature_engineering/model155", exist_ok=True)
    if not is_debug and not load_pickle:
        with open(f"../input/feature_engineering/model155/train.pickle",
                  "wb") as f:
            pickle.dump(dataset_train, f)
        with open(f"../input/feature_engineering/model155/val.pickle",
                  "wb") as f:
            pickle.dump(dataset_val, f)

    if not is_debug and load_pickle:
        with open(f"../input/feature_engineering/model155/train.pickle",
                  "rb") as f:
            dataset_train = pickle.load(f)
        with open(f"../input/feature_engineering/model155/val.pickle",
                  "rb") as f:
            dataset_val = pickle.load(f)
        print("loaded!")
    dataloader_train = DataLoader(dataset_train,
                                  batch_size=params["batch_size"],
                                  shuffle=True,
                                  num_workers=1)
    dataloader_val = DataLoader(dataset_val,
                                batch_size=params["batch_size"],
                                shuffle=False,
                                num_workers=1)

    model = SAKTModel(n_skill,
                      embed_dim=params["embed_dim"],
                      max_seq=params["max_seq"],
                      dropout=dropout,
                      cont_emb=params["cont_emb"])

    param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [{
        'params':
        [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
        'weight_decay':
        0.01
    }, {
        'params':
        [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
        'weight_decay':
        0.0
    }]

    optimizer = AdaBelief(
        optimizer_grouped_parameters,
        lr=params["lr"],
    )
    num_train_optimization_steps = int(len(dataloader_train) * 20)
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=params["num_warmup_steps"],
        num_training_steps=num_train_optimization_steps)
    criterion = nn.BCEWithLogitsLoss()

    model.to(device)
    criterion.to(device)

    for epoch in range(epochs):
        loss, acc, auc, auc_val = train_epoch(model, dataloader_train,
                                              dataloader_val, optimizer,
                                              criterion, scheduler, epoch,
                                              device)
        print("epoch - {} train_loss - {:.3f} auc - {:.4f} auc-val: {:.4f}".
              format(epoch, loss, auc, auc_val))

    preds = []
    labels = []
    with torch.no_grad():
        for item in tqdm(dataloader_val):
            x = item["x"].to(device).long()
            target_id = item["target_id"].to(device).long()
            part = item["part"].to(device).long()
            label = item["label"].to(device).float()
            elapsed_time = item["elapsed_time"].to(device).long()
            duration_previous_content = item["duration_previous_content"].to(
                device).long()
            prior_question_had_explanation = item["prior_q"].to(device).long()
            user_answer = item["user_answer"].to(device).long()
            rate_diff = item["rate_diff"].to(device).float()
            container_id = item["container_id"].to(device).long()
            prev_ans_idx = item["previous_answer_index_content_id"].to(
                device).long()
            prev_answer_content_id = item["previous_answer_content_id"].to(
                device).long()

            output = model(x, target_id, part, elapsed_time,
                           duration_previous_content,
                           prior_question_had_explanation, user_answer,
                           rate_diff, container_id, prev_ans_idx,
                           prev_answer_content_id)

            preds.extend(torch.nn.Sigmoid()(
                output[:, -1]).view(-1).data.cpu().numpy().tolist())
            labels.extend(label[:, -1].view(-1).data.cpu().numpy().tolist())

    auc_transformer = roc_auc_score(labels, preds)
    print("single transformer: {:.4f}".format(auc_transformer))
    df_oof = pd.DataFrame()
    # df_oof["row_id"] = df.loc[val_idx].index
    print(len(dataloader_val))
    print(len(preds))
    df_oof["predict"] = preds
    df_oof["target"] = labels

    df_oof.to_csv(f"{output_dir}/transformers1.csv", index=False)
    """
    df_oof2 = pd.read_csv("../output/ex_237/20201213110353/oof_train_0_lgbm.csv")
    df_oof2.columns = ["row_id", "predict_lgbm", "target"]
    df_oof2 = pd.merge(df_oof, df_oof2, how="inner")

    auc_lgbm = roc_auc_score(df_oof2["target"].values, df_oof2["predict_lgbm"].values)
    print("lgbm: {:.4f}".format(auc_lgbm))

    print("ensemble")
    max_auc = 0
    max_nn_ratio = 0
    for r in np.arange(0, 1.05, 0.05):
        auc = roc_auc_score(df_oof2["target"].values, df_oof2["predict_lgbm"].values*(1-r) + df_oof2["predict"].values*r)
        print("[nn_ratio: {:.2f}] AUC: {:.4f}".format(r, auc))

        if max_auc < auc:
            max_auc = auc
            max_nn_ratio = r
    print(len(df_oof2))
    """
    if not is_debug:
        mlflow.start_run(experiment_id=10, run_name=os.path.basename(__file__))

        for key, value in params.items():
            mlflow.log_param(key, value)
        mlflow.log_metric("auc_val", auc_transformer)
        mlflow.end_run()
    torch.save(model.state_dict(), f"{output_dir}/transformers.pth")
    del model
    torch.cuda.empty_cache()
    with open(f"{output_dir}/transformer_param.json", "w") as f:
        json.dump(params, f)
    if is_make_feature_factory:
        # feature factory
        feature_factory_dict = {"user_id": {}}
        feature_factory_dict["user_id"][
            "DurationPreviousContent"] = DurationPreviousContent(
                is_partial_fit=True)
        feature_factory_dict["user_id"][
            "ElapsedTimeBinningEncoder"] = ElapsedTimeBinningEncoder()
        feature_factory_manager = FeatureFactoryManager(
            feature_factory_dict=feature_factory_dict,
            logger=logger,
            split_num=1,
            model_id="all",
            load_feature=not is_debug,
            save_feature=not is_debug)

        ff_for_transformer = FeatureFactoryForTransformer(
            column_config=column_config,
            dict_path="../feature_engineering/",
            sequence_length=params["max_seq"],
            logger=logger)
        df = pd.read_pickle(
            "../input/riiid-test-answer-prediction/train_merged.pickle")
        if is_debug:
            df = df.head(10000)
        df = df.sort_values(["user_id", "timestamp"]).reset_index(drop=True)
        feature_factory_manager.fit(df)
        df = feature_factory_manager.all_predict(df)
        for dicts in feature_factory_manager.feature_factory_dict.values():
            for factory in dicts.values():
                factory.logger = None
        feature_factory_manager.logger = None
        with open(f"{output_dir}/feature_factory_manager.pickle", "wb") as f:
            pickle.dump(feature_factory_manager, f)

        ff_for_transformer.fit(df)
        ff_for_transformer.logger = None
        with open(
                f"{output_dir}/feature_factory_manager_for_transformer.pickle",
                "wb") as f:
            pickle.dump(ff_for_transformer, f)
예제 #18
0
def test_search_runs():
    mlflow.set_experiment("exp-for-search")
    # Create a run and verify that the current active experiment is the one we just set
    logged_runs = {}
    with mlflow.start_run() as active_run:
        logged_runs["first"] = active_run.info.run_id
        mlflow.log_metric("m1", 0.001)
        mlflow.log_metric("m2", 0.002)
        mlflow.log_metric("m1", 0.002)
        mlflow.log_param("p1", "a")
        mlflow.set_tag("t1", "first-tag-val")
    with mlflow.start_run() as active_run:
        logged_runs["second"] = active_run.info.run_id
        mlflow.log_metric("m1", 0.008)
        mlflow.log_param("p2", "aa")
        mlflow.set_tag("t2", "second-tag-val")

    def verify_runs(runs, expected_set):
        assert set([r.info.run_id for r in runs
                    ]) == set([logged_runs[r] for r in expected_set])

    experiment_id = MlflowClient().get_experiment_by_name(
        "exp-for-search").experiment_id

    # 2 runs in this experiment
    assert len(MlflowClient().list_run_infos(experiment_id,
                                             ViewType.ACTIVE_ONLY)) == 2

    # 2 runs that have metric "m1" > 0.001
    runs = MlflowClient().search_runs([experiment_id], "metrics.m1 > 0.0001")
    verify_runs(runs, ["first", "second"])

    # 1 run with has metric "m1" > 0.002
    runs = MlflowClient().search_runs([experiment_id], "metrics.m1 > 0.002")
    verify_runs(runs, ["second"])

    # no runs with metric "m1" > 0.1
    runs = MlflowClient().search_runs([experiment_id], "metrics.m1 > 0.1")
    verify_runs(runs, [])

    # 1 run with metric "m2" > 0
    runs = MlflowClient().search_runs([experiment_id], "metrics.m2 > 0")
    verify_runs(runs, ["first"])

    # 1 run each with param "p1" and "p2"
    runs = MlflowClient().search_runs([experiment_id], "params.p1 = 'a'",
                                      ViewType.ALL)
    verify_runs(runs, ["first"])
    runs = MlflowClient().search_runs([experiment_id], "params.p2 != 'a'",
                                      ViewType.ALL)
    verify_runs(runs, ["second"])
    runs = MlflowClient().search_runs([experiment_id], "params.p2 = 'aa'",
                                      ViewType.ALL)
    verify_runs(runs, ["second"])

    # 1 run each with tag "t1" and "t2"
    runs = MlflowClient().search_runs([experiment_id],
                                      "tags.t1 = 'first-tag-val'",
                                      ViewType.ALL)
    verify_runs(runs, ["first"])
    runs = MlflowClient().search_runs([experiment_id], "tags.t2 != 'qwerty'",
                                      ViewType.ALL)
    verify_runs(runs, ["second"])
    runs = MlflowClient().search_runs([experiment_id],
                                      "tags.t2 = 'second-tag-val'",
                                      ViewType.ALL)
    verify_runs(runs, ["second"])

    # delete "first" run
    MlflowClient().delete_run(logged_runs["first"])
    runs = MlflowClient().search_runs([experiment_id], "params.p1 = 'a'",
                                      ViewType.ALL)
    verify_runs(runs, ["first"])

    runs = MlflowClient().search_runs([experiment_id], "params.p1 = 'a'",
                                      ViewType.DELETED_ONLY)
    verify_runs(runs, ["first"])

    runs = MlflowClient().search_runs([experiment_id], "params.p1 = 'a'",
                                      ViewType.ACTIVE_ONLY)
    verify_runs(runs, [])
예제 #19
0
    def test_requestor(self, request):
        response = mock.MagicMock
        response.status_code = 200
        response.text = '{}'
        request.return_value = response

        creds = MlflowHostCreds('https://hello')
        store = RestStore(lambda: creds)

        user_name = "mock user"
        source_name = "rest test"

        source_name_patch = mock.patch(
            "mlflow.tracking.context.default_context._get_source_name",
            return_value=source_name)
        source_type_patch = mock.patch(
            "mlflow.tracking.context.default_context._get_source_type",
            return_value=SourceType.LOCAL)
        with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http, \
                mock.patch('mlflow.tracking._tracking_service.utils._get_store',
                           return_value=store), \
                mock.patch('mlflow.tracking.context.default_context._get_user',
                           return_value=user_name), \
                mock.patch('time.time', return_value=13579), \
                source_name_patch, source_type_patch:
            with mlflow.start_run(experiment_id="43"):
                cr_body = message_to_json(
                    CreateRun(experiment_id="43",
                              user_id=user_name,
                              start_time=13579000,
                              tags=[
                                  ProtoRunTag(key='mlflow.source.name',
                                              value=source_name),
                                  ProtoRunTag(key='mlflow.source.type',
                                              value='LOCAL'),
                                  ProtoRunTag(key='mlflow.user',
                                              value=user_name)
                              ]))
                expected_kwargs = self._args(creds, "runs/create", "POST",
                                             cr_body)

                assert mock_http.call_count == 1
                actual_kwargs = mock_http.call_args[1]

                # Test the passed tag values separately from the rest of the request
                # Tag order is inconsistent on Python 2 and 3, but the order does not matter
                expected_tags = expected_kwargs['json'].pop('tags')
                actual_tags = actual_kwargs['json'].pop('tags')
                assert (sorted(expected_tags,
                               key=lambda t: t['key']) == sorted(
                                   actual_tags, key=lambda t: t['key']))
                assert expected_kwargs == actual_kwargs

        with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http:
            store.log_param("some_uuid", Param("k1", "v1"))
            body = message_to_json(
                LogParam(run_uuid="some_uuid",
                         run_id="some_uuid",
                         key="k1",
                         value="v1"))
            self._verify_requests(mock_http, creds, "runs/log-parameter",
                                  "POST", body)

        with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http:
            store.set_experiment_tag("some_id",
                                     ExperimentTag("t1", "abcd" * 1000))
            body = message_to_json(
                SetExperimentTag(experiment_id="some_id",
                                 key="t1",
                                 value="abcd" * 1000))
            self._verify_requests(mock_http, creds,
                                  "experiments/set-experiment-tag", "POST",
                                  body)

        with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http:
            store.set_tag("some_uuid", RunTag("t1", "abcd" * 1000))
            body = message_to_json(
                SetTag(run_uuid="some_uuid",
                       run_id="some_uuid",
                       key="t1",
                       value="abcd" * 1000))
            self._verify_requests(mock_http, creds, "runs/set-tag", "POST",
                                  body)

        with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http:
            store.delete_tag("some_uuid", "t1")
            body = message_to_json(DeleteTag(run_id="some_uuid", key="t1"))
            self._verify_requests(mock_http, creds, "runs/delete-tag", "POST",
                                  body)

        with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http:
            store.log_metric("u2", Metric("m1", 0.87, 12345, 3))
            body = message_to_json(
                LogMetric(run_uuid="u2",
                          run_id="u2",
                          key="m1",
                          value=0.87,
                          timestamp=12345,
                          step=3))
            self._verify_requests(mock_http, creds, "runs/log-metric", "POST",
                                  body)

        with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http:
            metrics = [
                Metric("m1", 0.87, 12345, 0),
                Metric("m2", 0.49, 12345, -1),
                Metric("m3", 0.58, 12345, 2)
            ]
            params = [Param("p1", "p1val"), Param("p2", "p2val")]
            tags = [RunTag("t1", "t1val"), RunTag("t2", "t2val")]
            store.log_batch(run_id="u2",
                            metrics=metrics,
                            params=params,
                            tags=tags)
            metric_protos = [metric.to_proto() for metric in metrics]
            param_protos = [param.to_proto() for param in params]
            tag_protos = [tag.to_proto() for tag in tags]
            body = message_to_json(
                LogBatch(run_id="u2",
                         metrics=metric_protos,
                         params=param_protos,
                         tags=tag_protos))
            self._verify_requests(mock_http, creds, "runs/log-batch", "POST",
                                  body)

        with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http:
            store.delete_run("u25")
            self._verify_requests(mock_http, creds, "runs/delete", "POST",
                                  message_to_json(DeleteRun(run_id="u25")))

        with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http:
            store.restore_run("u76")
            self._verify_requests(mock_http, creds, "runs/restore", "POST",
                                  message_to_json(RestoreRun(run_id="u76")))

        with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http:
            store.delete_experiment("0")
            self._verify_requests(
                mock_http, creds, "experiments/delete", "POST",
                message_to_json(DeleteExperiment(experiment_id="0")))

        with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http:
            store.restore_experiment("0")
            self._verify_requests(
                mock_http, creds, "experiments/restore", "POST",
                message_to_json(RestoreExperiment(experiment_id="0")))

        with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http:
            response = mock.MagicMock
            response.text = '{"runs": ["1a", "2b", "3c"], "next_page_token": "67890fghij"}'
            mock_http.return_value = response
            result = store.search_runs(["0", "1"],
                                       "params.p1 = 'a'",
                                       ViewType.ACTIVE_ONLY,
                                       max_results=10,
                                       order_by=["a"],
                                       page_token="12345abcde")

            expected_message = SearchRuns(experiment_ids=["0", "1"],
                                          filter="params.p1 = 'a'",
                                          run_view_type=ViewType.to_proto(
                                              ViewType.ACTIVE_ONLY),
                                          max_results=10,
                                          order_by=["a"],
                                          page_token="12345abcde")
            self._verify_requests(mock_http, creds, "runs/search", "POST",
                                  message_to_json(expected_message))
            assert result.token == "67890fghij"
예제 #20
0
def test_log_image_numpy_raises_exception_for_invalid_array_data_type():
    import numpy as np

    with mlflow.start_run(), pytest.raises(TypeError,
                                           match="Invalid array data type"):
        mlflow.log_image(np.tile("a", (1, 1, 3)), "image.png")
예제 #21
0
def valid_rf(race_results_df_processed_valid, model_rf, parameters):
    # mlflow
    print('FILE_DIR: ' + FILE_DIR)
    mlflow.set_tracking_uri(FILE_DIR + '/../../../logs/mlruns/')
    mlflow.set_experiment('forecast_keiba_valid')
    run_info = mlflow.start_run()
    mlflow.set_tag('model', 'lr')

    # 検証のデータ準備
    race_results_df_processed_valid = race_results_df_processed_valid
    # 説明変数の取得
    X_valid = race_results_df_processed_valid.drop(['rank'], axis=1)
    # 目的変数の取得
    y_valid = race_results_df_processed_valid['rank']

    # 推論実行
    y_valid_pred = model_rf.predict(X_valid)

    # 集計用に処理
    valid_results_df = pd.DataFrame({'pred': y_valid_pred, 'actual': y_valid})
    race_id_list = list(set(list(valid_results_df.index)))
    valid_results_list = valid_results_df.reset_index().values.tolist()
    # シャッフル
    random.shuffle(valid_results_list)

    # 集計(馬単)
    correct_count = 0
    for race_id in race_id_list:
        pred_cnt_by_race = 0
        cnt_by_race = 0
        for rank in [1]:
            for i in range(len(valid_results_list)):
                # 対象レースidのうち、{rank}位と予測された馬
                if valid_results_list[i][0] == race_id and valid_results_list[
                        i][1] == rank:
                    pred_cnt_by_race += 1
                    if pred_cnt_by_race <= 1 and (valid_results_list[i][2]
                                                  == 1):
                        cnt_by_race += 1
        if cnt_by_race == 1:
            correct_count += 1
    acc_exacta_1 = correct_count / 100
    print('acc_exacta_1: ' + str(acc_exacta_1))

    # 集計(馬連)
    correct_count = 0
    for race_id in race_id_list:
        pred_cnt_by_race = 0
        cnt_by_race = 0
        for rank in [1, 2]:
            for i in range(len(valid_results_list)):
                # 対象レースidのうち、{rank}位と予測された馬
                if valid_results_list[i][0] == race_id and valid_results_list[
                        i][1] == rank:
                    pred_cnt_by_race += 1
                    if pred_cnt_by_race <= 2 and (valid_results_list[i][2] == 1
                                                  or valid_results_list[i][2]
                                                  == 2):
                        cnt_by_race += 1
        if cnt_by_race == 2:
            correct_count += 1
    acc_quinella_2 = correct_count / 100
    print('acc_quinella_2: ' + str(acc_quinella_2))

    # 集計(三連複)
    correct_count = 0
    for race_id in race_id_list:
        pred_cnt_by_race = 0
        cnt_by_race = 0
        for rank in [1, 2, 3]:
            for i in range(len(valid_results_list)):
                # 対象レースidのうち、{rank}位と予測された馬
                if valid_results_list[i][0] == race_id and valid_results_list[
                        i][1] == rank:
                    pred_cnt_by_race += 1
                    if pred_cnt_by_race <= 3 and (
                            valid_results_list[i][2] == 1
                            or valid_results_list[i][2] == 2
                            or valid_results_list[i][2] == 3):
                        cnt_by_race += 1
        if cnt_by_race == 3:
            correct_count += 1
    acc_trio_3 = correct_count / 100
    print('acc_trio_3: ' + str(acc_trio_3))

    mlflow.log_metric("acc_exacta_1", acc_exacta_1)
    mlflow.log_metric("acc_quinella_2", acc_quinella_2)
    mlflow.log_metric("acc_trio_3", acc_trio_3)

    # 通知
    if parameters['is_notify']:
        run_result_dict = mlflow.get_run(run_info.info.run_id).to_dictionary()
        run_result_str = json.dumps(run_result_dict, indent=4)

        conf_paths = [
            FILE_DIR + "/../../../conf/base", FILE_DIR + "/../../../conf/local"
        ]
        conf_loader = ConfigLoader(conf_paths)
        credentials = conf_loader.get("credentials*", "credentials*/**")
        token = credentials['dev_line']['access_token']

        url = "https://notify-api.line.me/api/notify"
        headers = {"Authorization": "Bearer " + token}
        payload = {"message": "model_rf" + run_result_str}
        requests.post(url, headers=headers, data=payload)

    mlflow.end_run()
예제 #22
0
def test_log_image_numpy_raises_exception_for_invalid_channel_length():
    import numpy as np

    with mlflow.start_run(), pytest.raises(ValueError,
                                           match="Invalid channel length"):
        mlflow.log_image(np.zeros((1, 1, 5), dtype=np.uint8), "image.png")
예제 #23
0
from random import random, randint
from sklearn.ensemble import RandomForestRegressor

import mlflow
import mlflow.sklearn

with mlflow.start_run(run_name="YOUR_RUN_NAME") as run:
    params = {"n_estimators": 5, "random_state": 42}
    sk_learn_rfr = RandomForestRegressor(**params)

    # Log parameters and metrics using the MLflow APIs
    mlflow.log_params(params)
    mlflow.log_param("param_1", randint(0, 100))
    mlflow.log_metrics({"metric_1": random(), "metric_2": random() + 1})

    # Log the sklearn model and register as version 1
    mlflow.sklearn.log_model(
        sk_model=sk_learn_rfr,
        artifact_path="sklearn-model",
        registered_model_name="sk-learn-random-forest-reg-model"
    )
예제 #24
0
def test_get_artifact_uri_with_artifact_path_unspecified_returns_artifact_root_dir(
):
    with mlflow.start_run() as active_run:
        assert mlflow.get_artifact_uri(
            artifact_path=None) == active_run.info.artifact_uri
예제 #25
0
파일: train.py 프로젝트: Daiver/torch_fuze
def main():
    torch.manual_seed(42)
    random.seed(42)
    np.random.seed(42)
    torch.backends.cudnn.deterministic = True
    # lr = 0.01
    n_epochs = 40
    batch_size = 64
    # device = "cpu"
    device = "cuda:0"

    mlflow.start_run()
    mlflow.log_param("n_epochs", n_epochs)
    mlflow.log_param("batch_size", batch_size)
    mlflow.log_param("device", device)

    run_start_time = mlflow.active_run().info.start_time
    readable_start_time = time.strftime('%Y-%m-%d_%H:%M:%S', time.localtime(run_start_time / 1000))

    trans = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (1.0,))])
    train_set = CIFAR10(root="data/", train=True, transform=trans, download=True)
    test_set = CIFAR10(root="data/", train=False, transform=trans, download=True)

    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, pin_memory=False, num_workers=4)
    test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, pin_memory=False, num_workers=4)

    model = Net()
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()

    optimizer = optim.Adam(model.parameters())
    best_lr, summary = find_lr_supervised(model, criterion, optimizer, train_loader, 1e-9, 1, device=device)
    # torch_fuze.utils.set_lr(optimizer, best_lr * 0.1)

    torch.manual_seed(42)
    random.seed(42)
    np.random.seed(42)

    # plt.plot(np.log10(summary.learning_rates), summary.losses)
    # plt.plot(np.log10(summary.learning_rates), summary.smoothed_losses)
    # plt.draw()
    # plt.pause(10)

    # scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[5, 8], gamma=0.3)
    # scheduler = OneCycleLR(optimizer, best_lr * 0.01, best_lr * 0.8, 1e-6, n_total_epochs=n_epochs, cycle_fraction=0.8)
    # scheduler = OneCycleLR(optimizer, best_lr * 0.01, best_lr * 0.5, 1e-6, n_total_epochs=n_epochs, cycle_fraction=0.8)
    # scheduler = OneCycleLR(optimizer, best_lr * 0.01, best_lr * 0.1, 1e-6, n_total_epochs=n_epochs, cycle_fraction=0.8)
    scheduler = None

    metrics = OrderedDict([
        ("loss", criterion),
        ("acc", torch_fuze.metrics.Accuracy())
    ])
    callbacks = [
        torch_fuze.callbacks.ProgressCallback(),
        torch_fuze.callbacks.BestModelSaverCallback(
            model, "checkpoints/best.pt", metric_name="acc", lower_is_better=False),
        torch_fuze.callbacks.TensorBoardXCallback(f"logs/{readable_start_time}/", remove_old_logs=True),
        torch_fuze.callbacks.MLFlowCallback(
            metrics_to_track={"valid_loss", "valid_acc", "train_acc"},
            lowest_metrics_to_track={"valid_loss"},
            highest_metrics_to_track={"valid_acc"},
            files_to_save_at_every_batch={"checkpoints/best.pt"})
    ]
    trainer = torch_fuze.SupervisedTrainer(model, criterion, device)
    trainer.run(
        train_loader, test_loader, optimizer, scheduler=scheduler, n_epochs=n_epochs, callbacks=callbacks, metrics=metrics)
예제 #26
0
파일: runRF.py 프로젝트: mindis/MLnotebooks
# COMMAND ----------

print(n_estimators)
print(max_depth)

rfHyperOpt = RandomForestClassifier(labelCol="label",
                                    featuresCol="features",
                                    maxDepth=max_depth,
                                    numTrees=n_estimators,
                                    featureSubsetStrategy="all",
                                    seed=42,
                                    maxBins=100)

rfHyperOptFitted = rfHyperOpt.fit(train_data)

loss = 1 - evaluator.evaluate(
    rfHyperOptFitted.transform(test_data))  # 1 - f-score

# COMMAND ----------

import mlflow
import mlflow.spark
with mlflow.start_run(run_id=run_id, experiment_id=experiment_id) as run:
    mlflow.spark.log_model(rfHyperOptFitted, "model")

# COMMAND ----------

dbutils.notebook.exit(str(loss))

# COMMAND ----------
예제 #27
0
run_name = parser["run_name"]

if mx.context.num_gpus() > 0 and using_cuda:
    GPU_COUNT = mx.context.num_gpus()
else:
    GPU_COUNT = 0

# window 운영체제에서 freeze support 안나오게 하려면, 아래와 같이 __name__ == "__main__" 에 해줘야함.
if __name__ == "__main__":

    print("\n실행 경로 : " + __file__)
    if training:
        if using_mlflow:
            ml.set_tracking_uri("./mlruns")  # mlruns가 기본 트래킹이다.
            ex_id = ml.set_experiment("CENTER_" + "RES" + str(base))
            ml.start_run(run_name=run_name, experiment_id=ex_id)
            ml.log_param("height", input_size[0])
            ml.log_param("width", input_size[1])
            ml.log_param("pretrained_base", pretrained_base)
            ml.log_param("train dataset path", train_dataset_path)
            ml.log_param("valid dataset path", valid_dataset_path)
            ml.log_param("test dataset path", test_dataset_path)
            ml.log_param("epoch", epoch)
            ml.log_param("batch size", batch_size)
            ml.log_param("multiscale", multiscale)
            ml.log_param("data augmentation", data_augmentation)
            ml.log_param("optimizer", optimizer)
            ml.log_param("learning rate", learning_rate)
            ml.log_param("decay lr", decay_lr)

        train.run(mean=image_mean,
import mlflow
#mlflow.set_tracking_uri("http://training.itu.dk:5000/")
#mlflow.set_experiment("Hermes Demo")
mlflow.sklearn.autolog()

def get_ys(xs):
    signal = -0.1*xs**3 + xs**2 - 5*xs - 5
    noise = np.random.normal(0,200,(len(xs),1))
    return signal + noise

X = np.random.uniform(-20,20,num_samples).reshape((num_samples,1))
y = get_ys(X)

poly_params = {
    'Poly__degree': range(1,8),
}
mlflow.end_run()
with mlflow.start_run():

    mlflow.log_param("num_samples", num_samples)

    model = Pipeline([
        ("Poly", PolynomialFeatures()),
        ("LinReg", LinearRegression())
    ])

    gridsearch = GridSearchCV(model, poly_params, scoring="r2")
    gridsearch.fit(X,y)


예제 #29
0
def test_list_run():
    with mlflow.start_run(run_name='apple'):
        pass
    result = CliRunner().invoke(list_run, ["--experiment-id", "0"])
    assert 'apple' in result.output
예제 #30
0
파일: main.py 프로젝트: Daiver/jff
import mlflow
import os
import time
from mlflow import log_metric, log_param, log_artifact


if __name__ == "__main__":
    mlflow.set_experiment("First")
    with mlflow.start_run():
        # Log a parameter (key-value pair)
        log_param("param1", 5)

        # Log a metric; metrics can be updated throughout the run
        for i in range(200):
            time.sleep(0.1)
            log_metric("foo1", 1 * i)
            log_metric("foo2", 2 * i)
            log_metric("foo3", 3 * i)
            log_metric("foo4", 3 * i)
            log_metric("foo5", 3 * i)
            log_metric("foo6", 3 * i)
            log_metric("foo7", 3 * i)
            log_metric("foo8", 3 * i)
            log_metric("foo9", 3 * i)
            log_metric("foo10", 3 * i)
            log_metric("foo11", 3 * i)
            log_metric("foo12", 3 * i)
            log_metric("foo13", 3 * i)
            log_metric("foo14", 3 * i)
            log_metric("foo15", 3 * i)
            log_metric("foo16", 3 * i)
예제 #31
0
def gen3_eval(
    ds_path,  # path of the directory where to find the pre-processed dataset (containing .dat files)
    # path to a new or an existent (from a previous run) json file to be used to store run
    # evaluation elapsed times and speeds
    json_file_path,
    net_type='mtje',  # network to use
    batch_size=8192,  # how many samples per batch to load
    min_mul=1,  # minimum product between chunks and chunk_size to consider (in # of batches)
    max_mul=32,  # maximum product between chunks and chunk_size to consider (in # of batches)
    epochs=1,  # number of epochs to perform evaluation for
    training_n_samples=0,  # number of training samples to consider (used to access the right files)
    use_malicious_labels=1,  # whether or not (1/0) to use malware/benignware labels as a target
    use_count_labels=1,  # whether or not (1/0) to use the counts as an additional target
    use_tag_labels=1,  # whether or not (1/0) to use the tags as additional targets
    feature_dimension=2381,  # The input dimension of the model
    # if provided, seed random number generation with this value (defaults None, no seeding)
    random_seed=None,
    # how many worker (threads) should the dataloader use (default: 0 -> use multiprocessing.cpu_count())
    workers=0):
    """ Evaluate generator alt3 speed changing values for 'chunk_size' and 'chunks' variables. The evaluation is done
    for 'epochs' epochs for each combination of values. The resulting elapsed times and speeds are save to a json file.

    Args:
        ds_path: Path of the directory where to find the pre-processed dataset (containing .dat files)
        json_file_path: Path to a new or an existent (from a previous run) json file to be used to store run
                        evaluation elapsed times and speeds
        net_type: Network to use between 'mtje', 'mtje_cosine', 'mtje_pairwise_distance' and 'aloha' (default: 'mtje')
        batch_size: How many samples per batch to load (default: 8192)
        min_mul: Minimum product between chunks and chunk_size to consider (in # of batches) (default: 1)
        max_mul: Maximum product between chunks and chunk_size to consider (in # of batches) (default: 32)
        epochs: How many epochs to train for (default: 1)
        training_n_samples: Number of training samples to consider (used to access the right files) (default: 0 -> all)
        use_malicious_labels: Whether or (1/0) not to use malware/benignware labels as a target (default: 1)
        use_count_labels: Whether or not (1/0) to use the counts as an additional target (default: 1)
        use_tag_labels: Whether or not (1/0) to use the tags as additional targets (default: 1)
        feature_dimension: The input dimension of the model (default: 2381 -> EMBER 2.0 feature size)
        random_seed: If provided, seed random number generation with this value (default: None -> no seeding)
        workers: How many worker (threads) should the dataloader use (default: 0 -> use multiprocessing.cpu_count())
    """

    # dynamically import some classes, functions and variables from modules depending on the current net type
    Net, run_additional_params = import_modules(net_type=net_type)

    # start mlflow run
    with mlflow.start_run() as mlrun:
        if net_type.lower() != 'aloha':
            # joint embedding nets have use_tag_labels set to 1 by default
            use_tag_labels = 1

        # if workers has a value (it is not None) then convert it to int if it is > 0, otherwise set it to None
        workers = workers if workers is None else int(
            workers) if int(workers) > 0 else None

        if random_seed is not None:  # if a seed was provided
            logger.info(f"Setting random seed to {int(random_seed)}.")
            # set the seed for generating random numbers
            torch.manual_seed(int(random_seed))

        logger.info('Running generator alternative 3 cross evaluation..')

        # initialize chunk_sizes and chunks lists to contain all the powers of 2 between
        # 2^MIN_EXPONENT and 2^MAX_EXPONENT (included)
        chunk_sizes_iterator = [
            2**x
            for x in range(MIN_CHUNK_SIZE_EXPONENT, MAX_CHUNK_SIZE_EXPONENT +
                           1)
        ]
        chunks_iterator = [
            2**x for x in range(MIN_CHUNKS_EXPONENT, MAX_CHUNKS_EXPONENT + 1)
        ]

        # create json file parent directory if it did not already exist
        os.makedirs(os.path.dirname(json_file_path), exist_ok=True)

        # if the json file path provided points to an existing file, open it and load its content into data dict
        if os.path.exists(json_file_path) and os.path.isfile(json_file_path):
            with open(json_file_path, 'r') as f:
                data = json.load(f)
        else:  # otherwise initialize data dict to contain two vectors (elapsed_times and speeds) containing None values
            data = {
                'elapsed_times': {
                    str(cs): {str(c): None
                              for c in chunks_iterator}
                    for cs in chunk_sizes_iterator
                },
                'speeds': {
                    str(cs): {str(c): None
                              for c in chunks_iterator}
                    for cs in chunk_sizes_iterator
                }
            }

        # for each chunk size
        for cs in chunk_sizes_iterator:
            # for each chunk number
            for c in chunks_iterator:

                # if the product between current chunk size and number of chunks is outside the valid range,
                # skip evaluation
                if cs * c < batch_size * min_mul or cs * c > batch_size * max_mul:
                    continue

                # if the values in the data dict corresponding to the current combination of chunk size and chunks
                # number are None, initialize then to empty vectors
                if data['elapsed_times'][str(cs)][str(
                        c)] is None or data['speeds'][str(cs)][str(c)] is None:
                    data['elapsed_times'][str(cs)][str(c)] = []
                    data['speeds'][str(cs)][str(c)] = []

                # create Network model
                model = Net(use_malware=bool(use_malicious_labels),
                            use_counts=bool(use_count_labels),
                            use_tags=bool(use_tag_labels),
                            n_tags=len(Dataset.tags),
                            feature_dimension=feature_dimension,
                            layer_sizes=run_additional_params['layer_sizes'],
                            dropout_p=run_additional_params['dropout_p'],
                            activation_function=run_additional_params[
                                'activation_function'],
                            normalization_function=run_additional_params[
                                'normalization_function'])

                # select optimizer is selected given the run additional parameters got from config file
                # if adam optimizer is selected
                if run_additional_params['optimizer'].lower() == 'adam':
                    # use Adam optimizer on all the model parameters
                    opt = torch.optim.Adam(
                        model.parameters(),
                        lr=run_additional_params['lr'],
                        weight_decay=run_additional_params['weight_decay'])
                # else if sgd optimizer is selected
                elif run_additional_params['optimizer'].lower() == 'sgd':
                    # use stochastic gradient descent on all the model parameters
                    opt = torch.optim.SGD(
                        model.parameters(),
                        lr=run_additional_params['lr'],
                        weight_decay=run_additional_params['weight_decay'],
                        momentum=run_additional_params['momentum'])
                else:  # otherwise raise error
                    raise ValueError(
                        'Unknown optimizer {}. Try "adam" or "sgd".'.format(
                            run_additional_params['optimizer']))

                # create train generator (a.k.a. Dataloader)
                generator = get_generator(
                    ds_root=ds_path,
                    batch_size=batch_size,
                    chunk_size=cs,
                    chunks=c,
                    mode='train',
                    num_workers=workers,
                    n_samples=training_n_samples,
                    use_malicious_labels=bool(use_malicious_labels),
                    use_count_labels=bool(use_count_labels),
                    use_tag_labels=bool(use_tag_labels))

                # get number of steps per epoch (# of total batches) from generator
                steps_per_epoch = len(generator)

                # allocate model to selected device
                model.to(device)

                # instantiate a new dictionary-like object called loss_histories
                loss_histories = defaultdict(list)
                # set the model mode to 'train'
                model.train()

                # initialize current elapsed times and speeds vectors with zeroes
                current_elapsed_times = [0.0 for _ in range(epochs)]
                current_speeds = [0.0 for _ in range(epochs)]

                # loop for the selected number of epochs
                for epoch in range(epochs):

                    # set current epoch start time
                    start_time = time.time()

                    # for all the training batches
                    for i, (features, labels) in enumerate(generator):
                        opt.zero_grad(
                        )  # clear old gradients from the last step

                        # copy current features and allocate them on the selected device (CPU or GPU)
                        features = deepcopy(features).to(device)

                        # perform a forward pass through the network
                        out = model(features)

                        # compute loss given the predicted output from the model
                        loss_dict = model.compute_loss(
                            out,
                            deepcopy(labels),
                            loss_wts=run_additional_params['loss_wts'])

                        # extract total loss
                        loss = loss_dict['total']

                        # compute gradients
                        loss.backward()

                        # update model parameters
                        opt.step()

                        # for all the calculated losses in loss_dict
                        for k in loss_dict.keys():
                            # if the loss is 'total' then append it to loss_histories['total'] after having detached it
                            # and passed it to the cpu
                            if k == 'total':
                                loss_histories[k].append(
                                    deepcopy(
                                        loss_dict[k].detach().cpu().item()))
                            # otherwise append the loss to loss_histories without having to detach it
                            else:
                                loss_histories[k].append(loss_dict[k])

                        # compute current epoch elapsed time (in seconds)
                        elapsed_time = time.time() - start_time

                        # create loss string with the current losses
                        loss_str = " ".join([
                            f"{key} loss:{value:7.3f}"
                            for key, value in loss_dict.items()
                        ])
                        loss_str += " | "
                        loss_str += " ".join([
                            f"{key} mean:{np.mean(value):7.3f}"
                            for key, value in loss_histories.items()
                        ])

                        # write on standard out the loss string + other information (elapsed time,
                        # predicted total epoch completion time, current mean speed and main memory usage)
                        sys.stdout.write(
                            '\r Epoch: {}/{} {}/{} '.format(
                                epoch + 1, epochs, i + 1, steps_per_epoch) +
                            '[{}/{}, {:6.3f}it/s, RAM used: {:4.1f}%, chunk_size: {}, chunks: {}] '
                            .format(
                                time.strftime(
                                    "%H:%M:%S",
                                    time.gmtime(elapsed_time)),  # elapsed time
                                time.strftime(
                                    "%H:%M:%S",  # predict total epoch completion time
                                    time.gmtime(steps_per_epoch *
                                                elapsed_time / (i + 1))),
                                (i + 1) /
                                elapsed_time,  # compute current mean speed (it/s)
                                psutil.virtual_memory().
                                percent,  # get percentage of main memory used
                                cs,  # chunk size
                                c)  # chunks number
                            + loss_str)  # append loss string

                        # flush standard output
                        sys.stdout.flush()
                        del features, labels  # to avoid weird references that lead to generator errors

                    print()

                    # save final elapsed time and speed for the current epoch
                    current_elapsed_times[epoch] = elapsed_time
                    current_speeds[epoch] = steps_per_epoch / elapsed_time

                # save current chunk size - chunks combination elapsed times and speeds extending the lists
                data['elapsed_times'][str(cs)][str(c)].extend(
                    current_elapsed_times)
                data['speeds'][str(cs)][str(c)].extend(current_speeds)

        # save content of data dict to json file
        with open(json_file_path, 'w') as f:
            json.dump(data, f)

        logger.info('...done')