from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score
from sklearn import preprocessing
from sklearn import cross_validation
import enron_evaluate

sep = '##############################################################################################'
sep2 = '++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++'

### Load the dictionary containing the dataset
data_dict = pickle.load(open("final_project_dataset.pkl", "r") )

### create list of functions for use as argument to add_features function
add_feature_function_list = [enron_tools.add_poi_to_ratio,enron_tools.add_poi_from_ratio,enron_tools.add_poi_interaction_ratio]

## add features to data_dict
enron_tools.add_features(add_feature_function_list,data_dict)

### Task 1: Select what features you'll use.
### features_list is a list of strings, each of which is a feature name.
### The first feature must be "poi".

data_label = 'poi'
features_list = enron_tools.get_features(data_dict)


## email address does not help with prediction and causes exeception, remove
features_list.remove('email_address')

## other is not a well defined feature, remove
#features_list.remove('other')
Exemple #2
0
#         outliers_dict[count] += [employee_name]
#     else:
#         outliers_dict[count] = [employee_name]
# print outliers_dict

outliers = ['TOTAL', "LOCKHART EUGENE E", 'THE TRAVEL AGENCY IN THE PARK']
for i in outliers:
    data_dict.pop(i, 0)

### Task 3: Create new feature(s)

# Calculate frations
fraction_from_poi_email = enron_tools.calculate_fraction(data_dict, "from_poi_to_this_person", "to_messages")
fraction_to_poi_email = enron_tools.calculate_fraction(data_dict, "from_this_person_to_poi", "from_messages")
# Add new feature values to data_dict
data_dict = enron_tools.add_features(data_dict, "fraction_from_poi_email", fraction_from_poi_email)
data_dict = enron_tools.add_features(data_dict, "fraction_to_poi_email", fraction_to_poi_email)

# Add new features to feature list
# features_list += ["fraction_from_poi_email", "fraction_to_poi_email"]

### Store to my_dataset for easy export below.
my_dataset = data_dict

### Extract features and labels from dataset for local testing
data = featureFormat(my_dataset, features_list, sort_keys = True)
labels, features = targetFeatureSplit(data)

### Task 4: Try a varity of classifiers
### Please name your classifier clf for easy export below.
### Note that if you want to do PCA or other multi-stage operations,