コード例 #1
0
ファイル: test_train.py プロジェクト: Ohtar10/wtsp
def test_train_product_classifier_from_embeddings():
    runner = CliRunner()
    output_path = common.get_full_path(tests_path, common.TEST_WORK_DIR_PATH)

    embeddings_path = common.get_full_path(tests_path,
                                           common.TRANSFORMED_EMBEDDINGS_PATH)
    # we are going to assume the working directory already has a embeddings model trained

    params = "from_embeddings=True,label_col=categories,doc_col=document,classes=10,test_size=0.3," \
             "lr=0.01,epochs=10,vec_size=300"
    result = runner.invoke(cli.wtsp, [
        '--work-dir', output_path, "train", "products", "--model",
        "classifier", "--params", params, embeddings_path
    ])
    assert result.exit_code == 0
    # validate the existence of the output directory
    result_dir = f"{output_path}/products/models/classifier"
    assert os.path.exists(result_dir)
    # and the content
    assert os.path.exists(f"{result_dir}/category_encoder.model")
    assert os.path.exists(f"{result_dir}/prod_classifier-def.yaml")
    assert os.path.exists(f"{result_dir}/prod_classifier-weights.h5")
    assert os.path.exists(f"{result_dir}/training_history.png")
    assert os.path.exists(f"{result_dir}/classification_report.png")
    common.delete_path(output_path)
コード例 #2
0
def test_describe_tweets_no_filters_should_fail():
    runner = CliRunner()
    input_data = common.get_full_path(tests_path, common.RAW_TWEETS_PATH)
    output_path = common.get_full_path(tests_path, common.TEST_WORK_DIR_PATH)
    result = runner.invoke(
        cli.describe,
        ['tweets', "--output-dir", output_path, "--min-count", 10, input_data])
    assert result.exit_code != 0
    assert "Error: Missing option '-f' / '--filters'" in result.output
コード例 #3
0
def test_transform_no_params_should_fail():
    runner = CliRunner()
    input_data = common.get_full_path(tests_path, common.RAW_TWEETS_PATH)
    output_path = common.get_full_path(tests_path, common.TEST_WORK_DIR_PATH)
    result = runner.invoke(cli.wtsp, [
        "--work-dir", output_path, "predict", "clusters", "--filters",
        "place_name='Los Angeles'", input_data
    ])
    assert result.exit_code != 0
    assert "Error: Missing option '-p' / '--params'" in result.output
コード例 #4
0
ファイル: test_train.py プロジェクト: Ohtar10/wtsp
def test_train_products_no_params_should_fail():
    runner = CliRunner()
    input_data = common.get_full_path(tests_path, common.RAW_PRODUCTS_PATH)
    output_path = common.get_full_path(tests_path, common.TEST_WORK_DIR_PATH)
    result = runner.invoke(cli.wtsp, [
        '--work-dir', output_path, "train", "products", "--model",
        "embeddings", input_data
    ])
    assert result.exit_code != 0
    assert "Error: Missing option '-p' / '--params'" in result.output
コード例 #5
0
ファイル: test_train.py プロジェクト: Ohtar10/wtsp
def test_train_tweets_no_params_should_fail():
    runner = CliRunner()
    input_data = common.get_full_path(tests_path, common.RAW_TWEETS_PATH)
    output_path = common.get_full_path(tests_path, common.TEST_WORK_DIR_PATH)
    result = runner.invoke(cli.train_tweets, [
        '--filters', "place_name='Los Angeles'", "--output-dir", output_path,
        input_data
    ])
    assert result.exit_code != 0
    assert "Error: Missing option '-p' / '--params'" in result.output
コード例 #6
0
def test_transform_no_filters_should_fail():
    runner = CliRunner()
    input_data = common.get_full_path(tests_path, common.RAW_TWEETS_PATH)
    output_path = common.get_full_path(tests_path, common.TEST_WORK_DIR_PATH)
    params = "center='34;-118',eps=0.04,n_neighbors=2,location_column=location_geometry,min_score=0.1"
    result = runner.invoke(cli.wtsp, [
        "--work-dir", output_path, "predict", "clusters", "--params", params,
        input_data
    ])
    assert result.exit_code != 0
    assert "Error: Missing option '-f' / '--filters'" in result.output
コード例 #7
0
ファイル: test_train.py プロジェクト: Ohtar10/wtsp
def test_train_tweets_no_filters_should_fail():
    runner = CliRunner()
    input_data = common.get_full_path(tests_path, common.RAW_TWEETS_PATH)
    output_path = common.get_full_path(tests_path, common.TEST_WORK_DIR_PATH)
    result = runner.invoke(cli.train_tweets, [
        '--model', "nearest-neighbors", "--params",
        "n_neighbors=10,location_column=location_geometry", "--output-dir",
        output_path, input_data
    ])
    assert result.exit_code != 0
    assert "Error: Missing option '-f' / '--filters'" in result.output
コード例 #8
0
ファイル: test_train.py プロジェクト: Ohtar10/wtsp
def test_train_products_no_model_should_fail():
    runner = CliRunner()
    input_data = common.get_full_path(tests_path, common.RAW_PRODUCTS_PATH)
    output_path = common.get_full_path(tests_path, common.TEST_WORK_DIR_PATH)
    params = "label_col=category,doc_col=document,lr=0.0002,epochs=10,vec_size=300,alpha=0.025,min_alpha=0.00025"
    result = runner.invoke(cli.wtsp, [
        '--work-dir', output_path, "train", "products", "--params", params,
        input_data
    ])
    assert result.exit_code != 0
    assert "Error: Missing option '-m' / '--model'" in result.output
コード例 #9
0
def test_describe_tweets():
    runner = CliRunner()
    input_data = common.get_full_path(tests_path, common.RAW_TWEETS_PATH)
    output_path = common.get_full_path(tests_path, common.TEST_WORK_DIR_PATH)
    result = runner.invoke(cli.describe, [
        'tweets', "--filters", "country_code=US", "--output-dir", output_path,
        "--min-count", 10, input_data
    ])
    assert result.exit_code == 0
    # validate the existence of the output directory
    tweets_describe_result = f"{output_path}/tweets/country_code=US"
    assert os.path.exists(tweets_describe_result)
    # and the content
    assert os.path.exists(f"{tweets_describe_result}/counts.csv")
    assert os.path.exists(f"{tweets_describe_result}/bar_chart.png")
    common.delete_path(tweets_describe_result)
コード例 #10
0
ファイル: test_train.py プロジェクト: Ohtar10/wtsp
def test_train_product_embeddings():
    runner = CliRunner()
    input_data = common.get_full_path(tests_path, common.RAW_PRODUCTS_PATH)
    output_path = common.get_full_path(tests_path, common.TEST_WORK_DIR_PATH)
    params = "label_col=categories,doc_col=document,lr=0.0002,epochs=10,vec_size=300,alpha=0.025,min_alpha=0.00025,min_count=1"
    result = runner.invoke(cli.wtsp, [
        '--work-dir', output_path, "train", "products", "--model",
        "embeddings", "--params", params, input_data
    ])
    assert result.exit_code == 0
    # validate the existence of the output directory
    result_dir = f"{output_path}/products/models/embeddings"
    assert os.path.exists(result_dir)
    # and the content
    assert os.path.exists(f"{result_dir}/d2v_model.model")
    common.delete_path(output_path)
コード例 #11
0
def test_describe_products_with_explode():
    runner = CliRunner()
    input_data = common.get_full_path(tests_path, common.RAW_PRODUCTS_PATH)
    output_path = common.get_full_path(tests_path, common.TEST_WORK_DIR_PATH)
    result = runner.invoke(cli.describe, [
        'products', "--output-dir", output_path, "--groupby", "categories",
        "--min-count", 10, "--explode", input_data
    ])
    assert result.exit_code == 0
    # validate the existence of the output directory
    products_describe_result = f"{output_path}/documents"
    assert os.path.exists(products_describe_result)
    # and the content
    assert os.path.exists(f"{products_describe_result}/counts.csv")
    assert os.path.exists(f"{products_describe_result}/bar_chart.png")
    common.delete_path(products_describe_result)
コード例 #12
0
def test_describe_with_invalid_filter_value():
    filters = "lalala=US"
    describer = Describer("", "place_name", "tweet", "tweets", filters)
    input_data = common.get_full_path(tests_path, common.RAW_TWEETS_PATH)
    with pytest.raises(DescribeException) as e:
        describer.describe(input_data)
    assert "There is a problem processing the data, see the error message" in str(
        e.value)
コード例 #13
0
def test_describe_products_no_input_data_should_fail():
    runner = CliRunner()
    output_path = common.get_full_path(tests_path, common.TEST_WORK_DIR_PATH)
    result = runner.invoke(
        cli.describe,
        ['products', "--output-dir", output_path, "--min-count", 10])
    assert result.exit_code != 0
    assert "Error: Missing argument 'INPUT_DATA'" in result.output
コード例 #14
0
ファイル: test_train.py プロジェクト: Ohtar10/wtsp
def test_train_tweets_n_neighbors():
    runner = CliRunner()
    input_data = common.get_full_path(tests_path, common.RAW_TWEETS_PATH)
    output_path = common.get_full_path(tests_path, common.TEST_WORK_DIR_PATH)
    result = runner.invoke(cli.train_tweets, [
        '--model', "nearest-neighbors", "--filters",
        "place_name='Los Angeles'", "--params",
        "n_neighbors=10,location_column=location_geometry", "--output-dir",
        output_path, input_data
    ])
    assert result.exit_code == 0
    # validate the existence of the output directory
    result_dir = f"{output_path}/tweets/place_name=Los Angeles"
    assert os.path.exists(result_dir)
    # and the content
    assert os.path.exists(f"{result_dir}/nearest_neighbors.png")
    assert os.path.exists(f"{result_dir}/scatter_plot.png")
    common.delete_path(output_path)
コード例 #15
0
ファイル: test_train.py プロジェクト: Ohtar10/wtsp
def test_train_products_no_input_data_should_fail():
    runner = CliRunner()
    output_path = common.get_full_path(tests_path, common.TEST_WORK_DIR_PATH)
    params = "label_col=category,doc_col=document,lr=0.0002,epochs=10,vec_size=300,alpha=0.025,min_alpha=0.00025"
    result = runner.invoke(cli.wtsp, [
        '--work-dir', output_path, "train", "products", "--model",
        "embeddings", "--params", params
    ])
    assert result.exit_code != 0
    assert "Error: Missing argument 'INPUT_DATA'" in result.output
コード例 #16
0
def test_transform_no_input_data_should_fail():
    runner = CliRunner()
    output_path = common.get_full_path(tests_path, common.TEST_WORK_DIR_PATH)
    params = "center='34;-118',eps=0.04,n_neighbors=2,location_column=location_geometry,min_score=0.1"
    result = runner.invoke(cli.wtsp, [
        "--work-dir", output_path, "predict", "clusters", "--params", params,
        "--filters", "place_name='Los Angeles'"
    ])
    assert result.exit_code != 0
    assert "Error: Missing argument 'INPUT_DATA'" in result.output
コード例 #17
0
ファイル: test_train.py プロジェクト: Ohtar10/wtsp
def test_train_tweets_no_input_data_should_fail():
    runner = CliRunner()
    output_path = common.get_full_path(tests_path, common.TEST_WORK_DIR_PATH)
    result = runner.invoke(cli.train_tweets, [
        '--model', "nearest-neighbors", "--filters",
        "place_name='Los Angeles'", "--params",
        "n_neighbors=10,location_column=location_geometry", "--output-dir",
        output_path
    ])
    assert result.exit_code != 0
    assert "Error: Missing argument 'INPUT_DATA'" in result.output
コード例 #18
0
def test_transform_embeddings():
    runner = CliRunner()
    input_path = common.get_full_path(tests_path, common.RAW_PRODUCTS_PATH)
    output_path = common.get_full_path(tests_path, common.TEST_WORK_DIR_PATH)
    models_path = f"{output_path}/products/models/"

    # we are going to assume the working directory already has a embeddings model trained
    model_assets_path = common.get_full_path(tests_path, common.ASSETS_PATH)
    copy_folder_recursively(f"{model_assets_path}/products", models_path)

    result = runner.invoke(
        cli.wtsp,
        ["--work-dir", output_path, "predict", "embeddings", input_path])

    assert result.exit_code == 0
    # validate the existence of the output files
    result_embeddings = f"{output_path}/embeddings/document_embeddings.npz"
    assert os.path.exists(result_embeddings)
    result_cat_encoder = f"{output_path}/embeddings/category_encoder.save"
    assert os.path.exists(result_cat_encoder)
    common.delete_path(output_path)
コード例 #19
0
def test_transform_where_to_sell_products():
    runner = CliRunner()
    input_data = common.get_full_path(tests_path, common.RAW_TWEETS_PATH)
    output_path = common.get_full_path(tests_path, common.TEST_WORK_DIR_PATH)

    models_path = f"{output_path}/products/models/"
    # we are going to assume the working directory already has a embeddings model trained
    model_assets_path = common.get_full_path(tests_path, common.ASSETS_PATH)
    copy_folder_recursively(f"{model_assets_path}/products", models_path)

    params = "center='34;-118',eps=0.04,n_neighbors=2,location_column=location_geometry,min_score=0.1"
    result = runner.invoke(cli.wtsp, [
        "--work-dir", output_path, "predict", "clusters", "--filters",
        "place_name='Los Angeles'", "--params", params, input_data
    ])
    assert result.exit_code == 0
    # validate the existence of the output directory
    result_dir = f"{output_path}/where_to_sell_in/place_name=Los Angeles"
    assert os.path.exists(result_dir)
    # and the content
    assert os.path.exists(f"{result_dir}/classified_clusters.csv")
    assert os.path.exists(f"{result_dir}/classified_clusters.html")
    common.delete_path(output_path)