Beispiel #1
0
### features_list is a list of strings, each of which is a feature name.
### The first feature must be "poi".

### Get data
with open("final_project_dataset.pkl", "r") as data_file:
    data_dict = pickle.load(data_file)
df = prepare_data_with_transformed_emails(data_dict)
# Get initial features and labels
labels = df['poi'].astype(float)
 # features 1: exclude email_address
features_df = df.drop(['poi', 'email_address'], axis=1)
# convert ints to float
features_df = features_df.applymap(lambda x: float(x))
feature_names = features_df.columns.values
# get scaled features
scaled_features = scale_features(features_df)
scaled_features_df = pd.DataFrame(scaled_features, 
                                  index = features_df.index, 
                                  columns=features_df.columns.values)
# Get features
features_list = select_features_with_lasso(scaled_features, 
                                           feature_names, 
                                           labels, 
                                           coefficient_threshold=0.001, 
                                           my_alpha=0.015)

### Task 2: Remove outliers
### Task 3: Create new feature(s)
### Store to my_dataset for easy export below.
# use non-scaled features
features_df['poi'] = df['poi']
Beispiel #2
0
### features_list is a list of strings, each of which is a feature name.
### The first feature must be "poi".

### Get data
with open("final_project_dataset.pkl", "r") as data_file:
    data_dict = pickle.load(data_file)
df = prepare_data_with_transformed_emails(data_dict)
# Get initial features and labels
labels = df['poi'].astype(float)
# features 1: exclude email_address
features_df = df.drop(['poi', 'email_address'], axis=1)
# convert ints to float
features_df = features_df.applymap(lambda x: float(x))
feature_names = features_df.columns.values
# get scaled features
scaled_features = scale_features(features_df)
scaled_features_df = pd.DataFrame(scaled_features,
                                  index=features_df.index,
                                  columns=features_df.columns.values)
# Get features
features_list = select_features_with_lasso(scaled_features,
                                           feature_names,
                                           labels,
                                           coefficient_threshold=0.001,
                                           my_alpha=0.015)

### Task 2: Remove outliers
### Task 3: Create new feature(s)
### Store to my_dataset for easy export below.
# use non-scaled features
features_df['poi'] = df['poi']