def perform_pca_prim(results, classify, exclude=['model', 'policy'], subsets={}, peel_alpha = 0.05, paste_alpha = 0.05, mass_min = 0.05, threshold = None, pasting=True, threshold_type=1, obj_func=prim.def_obj_func): ''' Perform (un)constrained PCA-PRIM. The cases of interest are identified. Next, the experiments are rotated to the eigen space of the covariance matrix of the experiments of interest. :param results: the return from perform_experiments :param classify: the classify function to be used in PRIM :param exclude: The uncertainties that should be excluded, optional argument :param subsets: optional kwarg, expects a dictonary with group name as key and a list of uncertainty names as values. If this is used, a constrained PCA-PRIM is executed **note:** the list of uncertainties should not contain categorical uncertainties. :param classify: either a string denoting the outcome of interest to use or a function. In case of a string and time series data, the end state is used. :param peel_alpha: parameter controlling the peeling stage (default = 0.05). :param paste_alpha: parameter controlling the pasting stage (default = 0.05). :param mass_min: minimum mass of a box (default = 0.05). :param threshold: the threshold of the output space that boxes should meet. :param pasting: perform pasting stage (default=True) :param threshold_type: If 1, the boxes should go above the threshold, if -1 the boxes should go below the threshold, if 0, the algorithm looks for both +1 and -1. :param obj_func: The objective function to use. Default is :func:`def_obj_func` :return: the rotation_matrix, the row_names, the column_names, the rotated_experiments, and the boxes found by prim ''' orig_experiments, outcomes = results #transform experiments to numpy array dtypes = orig_experiments.dtype.fields object_dtypes = [key for key, value in dtypes.items() if value[0]==np.dtype(object)] #get experiments of interest logical = classify(outcomes)==1 # if no subsets are provided all uncertainties with non dtype object are # in the same subset, the name of this is r, for rotation if not subsets: # non_object_dtypes = [key for key, value in dtypes.items() if value[0].name!=np.dtype(object)] subsets = {"r":[key for key, value in dtypes.items() if value[0].name!=np.dtype(object)]} # remove uncertainties that are in exclude and check whether uncertainties # occur in more then one subset seen = set() for key, value in subsets.items(): value = set(value) - set(exclude) subsets[key] = list(value) if (seen & value): raise EMAError("uncertainty occurs in more then one subset") else: seen = seen | set(value) #prepare the dtypes for the new rotated experiments recarray new_dtypes = [] for key, value in subsets.items(): assert_dtypes(value, dtypes) # the names of the rotated columns are based on the group name # and an index [new_dtypes.append(("%s_%s" % (key, i), float)) for i in range(len(value))] #add the uncertainties with object dtypes to the end included_object_dtypes = set(object_dtypes)-set(exclude) [new_dtypes.append((name, object)) for name in included_object_dtypes ] #make a new empty recarray rotated_experiments = np.recarray((orig_experiments.shape[0],),dtype=new_dtypes) #put the uncertainties with object dtypes already into the new recarray for name in included_object_dtypes : rotated_experiments[name] = orig_experiments[name] #iterate over the subsets, rotate them, and put them into the new recarray shape = 0 for key, value in subsets.items(): shape += len(value) rotation_matrix = np.zeros((shape,shape)) column_names = [] row_names = [] j = 0 for key, value in subsets.items(): subset_rotation_matrix, subset_experiments = rotate_subset(value, orig_experiments, logical) rotation_matrix[j:j+len(value), j:j+len(value)] = subset_rotation_matrix [row_names.append(entry) for entry in value] j += len(value) for i in range(len(value)): name = "%s_%s" % (key, i) rotated_experiments[name] = subset_experiments[:,i] [column_names.append(name)] results = rotated_experiments, outcomes #perform prim in the usual way boxes = prim.perform_prim(results, classify, peel_alpha=peel_alpha, paste_alpha=paste_alpha, mass_min=mass_min, threshold=threshold, pasting=pasting, threshold_type=threshold_type, obj_func=obj_func) return rotation_matrix, row_names, column_names, rotated_experiments, boxes
results = load_results(r'../Data/1000_runs_neoclassical_rational.bz2') # results = restrict_to_after_2010(results) # rotation_matrix, row_names, column_names, boxes = perform_pca_prim(results, # classify) rotation_matrix, row_names, column_names, boxes = perform_pca_prim(results, classify,threshold=0.7) # perform PCA prim for entry in row_names: print entry for entry in column_names: print entry np.savetxt('rotation matrix.txt', rotation_matrix, delimiter=',') import pylab as p p.matshow(rotation_matrix) p.show() # perform prim in the usual way unrotated for comparison boxes2 = prim.perform_prim(results, classify, threshold=0.8) prim.show_boxes_together(boxes2, results) prim.write_prim_to_stdout(boxes2) plt.show()