#'loan_advances', #'long_term_incentive', 'poi_messages_pctg', #'restricted_stock', #'salary', #'shared_receipt_with_poi', 'total_payments', 'total_stock_value' ] ### Load the dictionary containing the dataset data_dict = pickle.load(open("final_project_dataset.pkl", "r")) ### Task 2: Remove outliers outliers = ['TOTAL', 'THE TRAVEL AGENCY IN THE PARK', 'LOCKHART EUGENE E'] remove_outliers(data_dict, outliers) ### Task 3: Create new feature(s) ### Fields to be considered as cash received cash_fields = ['salary','bonus','exercised_stock_options','loan_advances'] ### Add cash received fields calc_cash_received(data_dict, cash_fields) ### Add POI message percentage calc_poi_messages_pctg(data_dict) ### Store to my_dataset for easy export below my_dataset = data_dict ### Extract features and labels from dataset for local testing data = featureFormat(my_dataset, features_list, sort_keys = True) labels, features = targetFeatureSplit(data) # Create Min/Max Scaler
unsorted_pairs = zip(features_list[1:], scores) # Sort list sorted_pairs = list(reversed(sorted(unsorted_pairs, key=lambda x: x[1]))) # Create dict if k == 'all': k_best_features = dict(sorted_pairs) else: k_best_features = dict(sorted_pairs[:k]) return k_best_features if __name__ == '__main__': # Load the dictionary containing the dataset data_dict = pickle.load(open("../project/final_project_dataset.pkl", "r")) outliers = ['TOTAL', 'THE TRAVEL AGENCY IN THE PARK', 'LOCKHART EUGENE E'] # Remove outliers remove_outliers(data_dict, outliers) # Fields to be considered as cash received cash_fields = ['salary','bonus','exercised_stock_options','loan_advances'] # Add cash received fields calc_cash_received(data_dict, cash_fields) # Add POI message % calc_poi_messages_pctg(data_dict) #pprint(data_dict) # Create features list features_list = ['poi', 'bonus', 'cash_received', 'cash_received_pctg', 'deferral_payments', 'deferred_income', 'director_fees',