def get_df_inst(self, time=None, fname=None): """Get the data from a specified time or filename. Either the time or the filename must be specified. Parameters ---------- time : Optional[float] The time at which to extract the dataframe. fname : Optional[str] The filename to read (defaults to self.fdict[time]). Returns ------- df_inst : pandas.DataFrame DataFrame of the time. """ if (time is None) and (fname is None): print 'Error: must specify either the time or filename of the \ desired data.' if time is not None: # if the time is specified, get the filename fname = self.fdict[time] else: # otherwise, use the filename given pass # read the CSV data file df_inst = load_data(fname) return df_inst
def test_load_data_when_loading_training_data_then_the_count_is_correct( self): expected_count = 514 under_test = Q1() result = cu.load_data(under_test.path_to_cancer_training, Q1.cancer_dataset_column_headers) self.assertEqual(expected_count, len(result))
def read_grid_dims(self, filename): """Read the grid dimensions of a file. Returns ------- grid_dims : dict Dictionary of grid dimension. """ # get grid dimensions df = load_data(filename) _, grid_dims = self.fielddata_from_df(df) # unpack grid dimensions return grid_dims
def main(self, path_to_dataset, k_value=5, alg_to_use='euclidean', p=1): data_points = cu.load_data(path_to_dataset, None) prediction_values = [] #initializing container for values predicted for index, row in data_points.iterrows(): dist_matrix, sorted_matrix_indices = self.calculate_distances( data_points.loc[:, 0:11].values, row[0:12].values, alg_to_use, p) prediction_values.append( self.calculate_regression(data_points, sorted_matrix_indices, k_value)) r2_score = self.calculate_r_squared(data_points[12], prediction_values) #print('{0}, {1}, {2}'.format(k_value, alg_to_use, r2_score)) print('R\u00b2 (R squared) coefficient is {0}'.format(r2_score)) print('Accuracy of the model is: {0} %'.format(r2_score * 100))
def main(self, path_to_data=path_to_cancer_training, headers= cancer_dataset_column_headers, k_value=3, alg_to_use='euclidean', p=1): data_points = cu.load_data(path_to_data, headers) df_training, row_count_removed = cu.clean_cancer_dataset(data_points) print('The dataset has been cleaned of the impossible values. {0} rows have been removed'.format(row_count_removed)) correctly_classified = 0 incorrectly_classified = 0 for index, row in data_points.iterrows(): dist_matrix, sorted_matrix_indices = self.calculate_distances(data_points.loc[:,'bi_rads':'density'].values, row[0:5].values, alg_to_use, p) classification = self.classify_points_with_weight(dist_matrix, sorted_matrix_indices, data_points.values, k_value) if classification == row.values[5]: correctly_classified += 1 else: incorrectly_classified += 1 accuracy = cu.compute_classification_accuracy(correctly_classified, incorrectly_classified) print('For the k = {0} using {1} distance weighing algorithm, the accuracy is: {2} %,'.format(k_value, alg_to_use, accuracy))
def __init__(self, run_directory, case_name, input_fname='', geom_fname='', load_field_output=True, load_wakeelem_output=True, load_probe_output=True, wakeelem_fnames_pattern='*WakeElemData_*.csv', field_fnames_pattern='*FieldData_*.csv', probe_fnames_pattern='probe_*.csv*', quiet=False): """Initialize the class, reading some data to memory. This method relies on recursive searches within the specified run directory to find the appropriate CACTUS output files. Therefore, each run directory should only contain one set of output files (or else the behavior cannot be guaranteed). Parameters ---------- run_directory : str Path to the directory containing the CACTUS run. case_name : str 'case name' which precedes all input and output files. input_fname : Optional[str] Input filename (default `./[case_name].in`). geom_fname : Optional[str] Geometry filename (default `./[case_name].geom`) load_field_output : bool True (default) to load field data, False otherwise. load_wakeelem_output : bool True (default) to load wake element data, False otherwise. load_probe_output : bool True (default) to load probe data, False otherwise. wakeelem_fnames_pattern : Optional[str] Glob pattern for wake element data filenames (default is `*WakeElemData_*.csv`) field_fnames_pattern : Optional[str] Glob pattern for field data filenames (default is `*FieldData_*.csv`) probe_fnames_pattern : Optional[str] Glob pattern for probe filenames (default is `probe_*.csv`) quiet : Optional[bool] Set True to hide print statements (default is False). """ # if an input file is specified, use that if input_fname: self.input_fname = os.path.abspath(os.path.join(run_directory, input_fname)) else: # otherwise, look for one using [case_name].in as a glob pattern self.input_fname = self.__find_single_file(run_directory, case_name + '.in') # if a geom file is specified, use that if geom_fname: self.geom_fname = os.path.abspath(os.path.join(run_directory, geom_fname)) else: # otherwise, look for one using [case_name].geom as a glob pattern self.geom_fname = self.__find_single_file(run_directory, case_name + '.geom') # assemble filename patterns bladeelem_fname_pattern = case_name + '_ElementData.csv' param_fname_pattern = case_name + '_Param.csv' rev_fname_pattern = case_name + '_RevData.csv' time_fname_pattern = case_name + '_TimeData.csv' # Load the input, geometry, blade element, rev-averaged, parameter, # and time data. Only one of each file should be expected. The function # find_single_file is used to warn if multiple files (or none) are # found. # load the input namelist if self.input_fname: tic = pytime.time() self.input = CactusInput(self.input_fname) if not quiet: print 'Read input namelist in %2.2f s' % (pytime.time() - tic) else: warnings.warn("Input file not loaded.") # load geometry data if self.geom_fname: tic = pytime.time() # load the geometry data self.geom = CactusGeom(self.geom_fname) if not quiet: print 'Read geometry file in %2.2f s' % (pytime.time() - tic) else: warnings.warn("Geometry file not loaded.") # load parameter data self.param_fname = self.__find_single_file( run_directory, param_fname_pattern) if self.param_fname: tic = pytime.time() self.param_data = load_data(self.param_fname) if not quiet: print 'Read parameter data in %2.2f s' % (pytime.time() - tic) else: warnings.warn("Parameter data file not loaded.") # load revolution-averaged data self.rev_fname = self.__find_single_file( run_directory, rev_fname_pattern) if self.rev_fname: tic = pytime.time() self.rev_data = load_data(self.rev_fname) if not quiet: print 'Read revolution-averaged data in %2.2f s' %\ (pytime.time() - tic) else: warnings.warn("Revolution-averaged data file not loaded.") # load blade element data self.bladeelem_fname = self.__find_single_file( run_directory, bladeelem_fname_pattern) if self.bladeelem_fname: tic = pytime.time() self.bladeelem_data = CactusBladeElem(self.bladeelem_fname) if not quiet: print 'Read blade element data in %2.2f s' % (pytime.time() - tic) else: warnings.warn("Blade element data file not loaded.") # time data self.time_fname = self.__find_single_file( run_directory, time_fname_pattern) if self.time_fname: tic = pytime.time() self.time_data = load_data(self.time_fname) if not quiet: print 'Read time data in %2.2f s' % (pytime.time() - tic) else: warnings.warn("Time data file not loaded.") # The following sections initialize the CactusWakeElems, CactusField, # and CactusProbes classes. Initializing these classes will search for # files in the run_directory and parse the first line of each. This may # be slow, depending on the number of files # search for wake element, field files, and probe files anywhere in # the run directory if load_wakeelem_output: self.wake_filenames = sorted(recursive_glob(run_directory, wakeelem_fnames_pattern)) if self.wake_filenames: self.wakeelems = CactusWakeElems(self.wake_filenames) else: if not quiet: print 'Warning: Could not find any wake element data files \ in the work directory matching %s.' % \ (wakeelem_fnames_pattern) if load_field_output: self.field_filenames = sorted(recursive_glob(run_directory, field_fnames_pattern)) if self.field_filenames: self.field = CactusField(self.field_filenames) else: if not quiet: print 'Warning: Could not find any field data files in \ the work directory matching %s.' % \ (field_fnames_pattern) if load_probe_output: self.probe_filenames = sorted(recursive_glob(run_directory, probe_fnames_pattern)) if self.probe_filenames: self.probes = CactusProbes(self.probe_filenames) else: if not quiet: print 'Warning: Could not find any probe data files in \ the work directory matching %s.' % \ (probe_fnames_pattern) if not quiet: print 'Loaded case `%s` from path `%s`\n' % (case_name, run_directory)
def __init__(self, filename): """Initialize class, read in data.""" self.filename = filename self.data = load_data(self.filename)