コード例 #1
0
def test_prepare_deep_without_embedding_columns():

	errors=[]
	df_randint = pd.DataFrame(np.random.choice(np.arange(100), (100,2)))
	df_randint.columns = ['col1', 'col2']
	preprocessor3 = DeepPreprocessor(continuous_cols=['col1', 'col2'])

	try:
		X_randint = preprocessor3.fit_transform(df_randint)
	except:
		errors.append('Fundamental Error')

	out_booleans = []

	means, stds = np.mean(X_randint, axis=0), np.std(X_randint, axis=0)
	for mean, std in zip(means, stds):
		out_booleans.append(np.isclose(mean, 0.))
		out_booleans.append(np.isclose(std, 1.))

	if not np.all(out_booleans):
		errors.append('There is something going on with the scaler')

	assert not errors, "errors occured:\n{}".format("\n".join(errors))
コード例 #2
0
    wide_cols = [
        'age_buckets', 'education', 'relationship', 'workclass', 'occupation',
        'native_country', 'gender'
    ]
    crossed_cols = [('education', 'occupation'),
                    ('native_country', 'occupation')]
    cat_embed_cols = [('education', 10), ('relationship', 8),
                      ('workclass', 10), ('occupation', 10),
                      ('native_country', 10)]
    continuous_cols = ["age", "hours_per_week"]
    target = 'income_label'
    target = df[target].values
    prepare_wide = WidePreprocessor(wide_cols=wide_cols,
                                    crossed_cols=crossed_cols)
    X_wide = prepare_wide.fit_transform(df)
    prepare_deep = DeepPreprocessor(embed_cols=cat_embed_cols,
                                    continuous_cols=continuous_cols)
    X_deep = prepare_deep.fit_transform(df)
    wide = Wide(wide_dim=X_wide.shape[1], output_dim=1)
    deepdense = DeepDense(hidden_layers=[64, 32],
                          dropout=[0.2, 0.2],
                          deep_column_idx=prepare_deep.deep_column_idx,
                          embed_input=prepare_deep.embeddings_input,
                          continuous_cols=continuous_cols)
    model = WideDeep(wide=wide, deepdense=deepdense)

    wide_opt = torch.optim.Adam(model.wide.parameters())
    deep_opt = RAdam(model.deepdense.parameters())
    wide_sch = torch.optim.lr_scheduler.StepLR(wide_opt, step_size=3)
    deep_sch = torch.optim.lr_scheduler.StepLR(deep_opt, step_size=5)

    optimizers = {'wide': wide_opt, 'deepdense': deep_opt}
コード例 #3
0
	]
	)
def test_label_encoder_with_custom_encoder(input_df, encoding_dict, output_df):
	tmp_df = input_df.copy()
	for c in input_df.columns:
		tmp_df[c] = tmp_df[c].map(encoding_dict[c])
	assert tmp_df.equals(output_df)

################################################################################
# Test the DeepPreprocessor: only categorical columns to be represented with
# embeddings
###############################################################################

cat_embed_cols = [('col1',5), ('col2',5)]

preprocessor1 = DeepPreprocessor(cat_embed_cols)
X_letters = preprocessor1.fit_transform(df_letters)
embed_input_letters = preprocessor1.embeddings_input
decoding_dict_letters = {c: {k:v for v,k in preprocessor1.encoding_dict[c].items()} for c in preprocessor1.encoding_dict.keys()}

preprocessor2 = DeepPreprocessor(cat_embed_cols)
X_numbers = preprocessor2.fit_transform(df_numbers)
embed_input_numbers = preprocessor2.embeddings_input
decoding_dict_numbers = {c: {k:v for v,k in preprocessor2.encoding_dict[c].items()} for c in preprocessor2.encoding_dict.keys()}


errors = []
@pytest.mark.parametrize('input_df, X_deep, embed_input, decoding_dict, error_list',
	[
	(df_letters, X_letters, embed_input_letters, decoding_dict_letters, errors),
	(df_numbers, X_numbers, embed_input_numbers, decoding_dict_numbers, errors),