def get_df_inst(self, time=None, fname=None):
        """Get the data from a specified time or filename.

        Either the time or the filename must be specified.

        Parameters
        ----------
        time : Optional[float]
            The time at which to extract the dataframe.
        fname : Optional[str]
            The filename to read (defaults to self.fdict[time]).

        Returns
        -------
        df_inst : pandas.DataFrame
            DataFrame of the time.
        """
        if (time is None) and (fname is None):
            print 'Error: must specify either the time or filename of the \
            desired data.'

        if time is not None:
            # if the time is specified, get the filename
            fname = self.fdict[time]
        else:
            # otherwise, use the filename given
            pass

        # read the CSV data file
        df_inst = load_data(fname)

        return df_inst
Exemple #2
0
 def test_load_data_when_loading_training_data_then_the_count_is_correct(
         self):
     expected_count = 514
     under_test = Q1()
     result = cu.load_data(under_test.path_to_cancer_training,
                           Q1.cancer_dataset_column_headers)
     self.assertEqual(expected_count, len(result))
    def read_grid_dims(self, filename):
        """Read the grid dimensions of a file.

        Returns
        -------
        grid_dims : dict
            Dictionary of grid dimension.
        """
        # get grid dimensions
        df = load_data(filename)
        _, grid_dims = self.fielddata_from_df(df)

        # unpack grid dimensions
        return grid_dims
    def main(self, path_to_dataset, k_value=5, alg_to_use='euclidean', p=1):
        data_points = cu.load_data(path_to_dataset, None)

        prediction_values = []  #initializing container for values predicted

        for index, row in data_points.iterrows():
            dist_matrix, sorted_matrix_indices = self.calculate_distances(
                data_points.loc[:, 0:11].values, row[0:12].values, alg_to_use,
                p)

            prediction_values.append(
                self.calculate_regression(data_points, sorted_matrix_indices,
                                          k_value))

        r2_score = self.calculate_r_squared(data_points[12], prediction_values)
        #print('{0}, {1}, {2}'.format(k_value, alg_to_use, r2_score))
        print('R\u00b2 (R squared) coefficient is {0}'.format(r2_score))
        print('Accuracy of the model is: {0} %'.format(r2_score * 100))
    def main(self, path_to_data=path_to_cancer_training, headers= cancer_dataset_column_headers, k_value=3, alg_to_use='euclidean', p=1):
        data_points = cu.load_data(path_to_data, headers)
        df_training, row_count_removed = cu.clean_cancer_dataset(data_points)

        print('The dataset has been cleaned of the impossible values. {0} rows have been removed'.format(row_count_removed))

        correctly_classified = 0
        incorrectly_classified = 0

        for index, row in data_points.iterrows():
            dist_matrix, sorted_matrix_indices = self.calculate_distances(data_points.loc[:,'bi_rads':'density'].values, row[0:5].values, alg_to_use, p)

            classification = self.classify_points_with_weight(dist_matrix, sorted_matrix_indices, data_points.values, k_value)

            if classification == row.values[5]:
                correctly_classified += 1
            else:
                incorrectly_classified += 1

        accuracy = cu.compute_classification_accuracy(correctly_classified, incorrectly_classified)

        print('For the k = {0} using {1} distance weighing algorithm, the accuracy is: {2} %,'.format(k_value, alg_to_use, accuracy))
    def __init__(self, run_directory, case_name,
                 input_fname='',
                 geom_fname='',
                 load_field_output=True,
                 load_wakeelem_output=True,
                 load_probe_output=True,
                 wakeelem_fnames_pattern='*WakeElemData_*.csv',
                 field_fnames_pattern='*FieldData_*.csv',
                 probe_fnames_pattern='probe_*.csv*',
                 quiet=False):
        """Initialize the class, reading some data to memory.

        This method relies on recursive searches within the specified run
        directory to find the appropriate CACTUS output files. Therefore, each
        run directory should only contain one set of output files (or else the
        behavior cannot be guaranteed).

        Parameters
        ----------
        run_directory : str
            Path to the directory containing the CACTUS run.
        case_name : str
            'case name' which precedes all input and output files.
        input_fname : Optional[str]
            Input filename (default `./[case_name].in`).
        geom_fname : Optional[str]
            Geometry filename (default `./[case_name].geom`)
        load_field_output : bool
            True (default) to load field data, False otherwise.
        load_wakeelem_output : bool
            True (default) to load wake element data, False otherwise.
        load_probe_output : bool
            True (default) to load probe data, False otherwise.
        wakeelem_fnames_pattern : Optional[str]
            Glob pattern for wake element data filenames (default is
            `*WakeElemData_*.csv`)
        field_fnames_pattern : Optional[str]
            Glob pattern for field data filenames (default is
            `*FieldData_*.csv`)
        probe_fnames_pattern : Optional[str]
            Glob pattern for probe filenames (default is `probe_*.csv`)
        quiet : Optional[bool]
            Set True to hide print statements (default is False).
        """
        # if an input file is specified, use that
        if input_fname:
            self.input_fname = os.path.abspath(os.path.join(run_directory,
                                                            input_fname))
        else:
            # otherwise, look for one using [case_name].in as a glob pattern
            self.input_fname = self.__find_single_file(run_directory,
                                                       case_name + '.in')

        # if a geom file is specified, use that
        if geom_fname:
            self.geom_fname = os.path.abspath(os.path.join(run_directory,
                                                           geom_fname))
        else:
            # otherwise, look for one using [case_name].geom as a glob pattern
            self.geom_fname = self.__find_single_file(run_directory,
                                                      case_name + '.geom')

        # assemble filename patterns
        bladeelem_fname_pattern = case_name + '_ElementData.csv'
        param_fname_pattern     = case_name + '_Param.csv'
        rev_fname_pattern       = case_name + '_RevData.csv'
        time_fname_pattern      = case_name + '_TimeData.csv'

        # Load the input, geometry, blade element, rev-averaged, parameter,
        # and time data. Only one of each file should be expected. The function
        # find_single_file is used to warn if multiple files (or none) are
        # found.

        # load the input namelist
        if self.input_fname:
            tic = pytime.time()
            self.input = CactusInput(self.input_fname)
            if not quiet:
                print 'Read input namelist in %2.2f s' % (pytime.time() - tic)
        else:
            warnings.warn("Input file not loaded.")

        # load geometry data
        if self.geom_fname:
            tic = pytime.time()
            # load the geometry data
            self.geom = CactusGeom(self.geom_fname)
            if not quiet:
                print 'Read geometry file in %2.2f s' % (pytime.time() - tic)
        else:
            warnings.warn("Geometry file not loaded.")

        # load parameter data
        self.param_fname = self.__find_single_file(
            run_directory, param_fname_pattern)
        if self.param_fname:
            tic = pytime.time()
            self.param_data  = load_data(self.param_fname)
            if not quiet:
                print 'Read parameter data in %2.2f s' % (pytime.time() - tic)
        else:
            warnings.warn("Parameter data file not loaded.")

        # load revolution-averaged data
        self.rev_fname = self.__find_single_file(
            run_directory, rev_fname_pattern)
        if self.rev_fname:
            tic = pytime.time()
            self.rev_data  = load_data(self.rev_fname)
            if not quiet:
                print 'Read revolution-averaged data in %2.2f s' %\
                    (pytime.time() - tic)

        else:
            warnings.warn("Revolution-averaged data file not loaded.")

        # load blade element data
        self.bladeelem_fname = self.__find_single_file(
            run_directory, bladeelem_fname_pattern)
        if self.bladeelem_fname:
            tic = pytime.time()
            self.bladeelem_data  = CactusBladeElem(self.bladeelem_fname)
            if not quiet:
                print 'Read blade element data in %2.2f s' % (pytime.time() -
                                                              tic)
        else:
            warnings.warn("Blade element data file not loaded.")

        # time data
        self.time_fname = self.__find_single_file(
            run_directory, time_fname_pattern)
        if self.time_fname:
            tic = pytime.time()
            self.time_data  = load_data(self.time_fname)
            if not quiet:
                print 'Read time data in %2.2f s' % (pytime.time() - tic)
        else:
            warnings.warn("Time data file not loaded.")

        # The following sections initialize the CactusWakeElems, CactusField,
        # and CactusProbes classes. Initializing these classes will search for
        # files in the run_directory and parse the first line of each. This may
        # be slow, depending on the number of files

        # search for wake element, field files, and probe files anywhere in
        # the run directory
        if load_wakeelem_output:
            self.wake_filenames = sorted(recursive_glob(run_directory,
                                                        wakeelem_fnames_pattern))
            if self.wake_filenames:
                self.wakeelems = CactusWakeElems(self.wake_filenames)
            else:
                if not quiet:
                    print 'Warning: Could not find any wake element data files \
in the work directory matching %s.' % \
                          (wakeelem_fnames_pattern)

        if load_field_output:
            self.field_filenames = sorted(recursive_glob(run_directory,
                                                         field_fnames_pattern))
            if self.field_filenames:
                self.field = CactusField(self.field_filenames)
            else:
                if not quiet:
                    print 'Warning: Could not find any field data files in \
the work directory matching %s.' % \
                          (field_fnames_pattern)

        if load_probe_output:
            self.probe_filenames = sorted(recursive_glob(run_directory,
                                                         probe_fnames_pattern))
            if self.probe_filenames:
                self.probes = CactusProbes(self.probe_filenames)
            else:
                if not quiet:
                    print 'Warning: Could not find any probe data files in \
the work directory matching %s.' % \
                          (probe_fnames_pattern)

        if not quiet:
            print 'Loaded case `%s` from path `%s`\n' % (case_name,
                                                         run_directory)
Exemple #7
0
 def __init__(self, filename):
     """Initialize class, read in data."""
     self.filename = filename
     self.data = load_data(self.filename)