Python load_csv_as_dict Examples

Programming Language: Python

Namespace/Package Name: anuga.file.csv_file

Method/Function: load_csv_as_dict

Examples at hotexamples.com: 9

Python load_csv_as_dict - 9 examples found. These are the top rated real world Python examples of anuga.file.csv_file.load_csv_as_dict extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def get_runup_data_for_locations_from_file(gauge_filename,
                                           sww_filename,
                                           runup_filename,
                                           size=10,
                                           verbose=False):
    """this will read a csv file with the header x,y. Then look in a square
    'size'x2 around this position for the 'max_inundaiton_height' in the
    'sww_filename' and report the findings in the 'runup_filename'.
    
    WARNING: NO TESTS! 
    """

    from anuga.shallow_water.data_manager import \
        get_maximum_inundation_data

    file = open(runup_filename, "w")
    file.write("easting,northing,runup \n ")
    file.close()

    #read gauge csv file to dictionary
    attribute_dic, title_index_dic = load_csv_as_dict(gauge_filename)
    northing = [float(x) for x in attribute_dic["y"]]
    easting = [float(x) for x in attribute_dic["x"]]

    log.critical('Reading %s' % sww_filename)

    runup_locations = []
    for i, x in enumerate(northing):
        poly = [[int(easting[i] + size),
                 int(northing[i] + size)],
                [int(easting[i] + size),
                 int(northing[i] - size)],
                [int(easting[i] - size),
                 int(northing[i] - size)],
                [int(easting[i] - size),
                 int(northing[i] + size)]]

        run_up, x_y = get_maximum_inundation_data(filename=sww_filename,
                                                  polygon=poly,
                                                  verbose=False)

        #if no runup will return 0 instead of NONE
        if run_up == None: run_up = 0
        if x_y == None: x_y = [0, 0]

        if verbose:
            log.critical('maximum inundation runup near %s is %s meters' %
                         (x_y, run_up))

        #writes to file
        file = open(runup_filename, "a")
        temp = '%s,%s,%s \n' % (x_y[0], x_y[1], run_up)
        file.write(temp)
        file.close()

Example #2

Show file

File: util.py Project: MattAndersonPE/anuga_core

def get_runup_data_for_locations_from_file(gauge_filename,
                                           sww_filename,
                                           runup_filename,
                                           size=10,
                                           verbose=False):
    """this will read a csv file with the header x,y. Then look in a square
    'size'x2 around this position for the 'max_inundaiton_height' in the
    'sww_filename' and report the findings in the 'runup_filename'.
    
    WARNING: NO TESTS! 
    """

    from anuga.shallow_water.data_manager import \
        get_maximum_inundation_data
                                                 
    file = open(runup_filename, "w")
    file.write("easting,northing,runup \n ")
    file.close()
    
    #read gauge csv file to dictionary
    attribute_dic, title_index_dic = load_csv_as_dict(gauge_filename)
    northing = [float(x) for x in attribute_dic["y"]]
    easting = [float(x) for x in attribute_dic["x"]]

    log.critical('Reading %s' % sww_filename)

    runup_locations=[]
    for i, x in enumerate(northing):
        poly = [[int(easting[i]+size),int(northing[i]+size)],
                [int(easting[i]+size),int(northing[i]-size)],
                [int(easting[i]-size),int(northing[i]-size)],
                [int(easting[i]-size),int(northing[i]+size)]]
        
        run_up, x_y = get_maximum_inundation_data(filename=sww_filename,
                                                  polygon=poly,
                                                  verbose=False) 

        #if no runup will return 0 instead of NONE
        if run_up==None: run_up=0
        if x_y==None: x_y=[0,0]
        
        if verbose:
            log.critical('maximum inundation runup near %s is %s meters'
                         % (x_y, run_up))
        
        #writes to file
        file = open(runup_filename, "a")
        temp = '%s,%s,%s \n' % (x_y[0], x_y[1], run_up)
        file.write(temp)
        file.close()

Example #3

Show file

File: test_csv2sts.py Project: benjimin/anuga_experimental

    def _check_generated_sts(self):
        """ check that we can read data out of the file """
        sts = NetCDFFile(sts_out,'r')
        
        data, names = load_csv_as_dict(testfile_csv, delimiter=' ', d_type = num.float64)
        
        assert sts.latitude == lat, 'latitude does not match'
        assert sts.longitude == lon, 'longitude does not match'
        
        assert len(sts.variables) == len(data), 'num variables does not match'
        
        # make sure data is returned in exactly the expected format
        for key, values in data.items():
            assert list(sts.variables[key][:]) == values, \
                                        'stored data does not match'

        if not sys.platform == 'win32':
            # Windows cannot delete the file for some reason.
            os.remove(sts_out)

Example #4

Show file

    def _check_generated_sts(self):
        """ check that we can read data out of the file """
        sts = NetCDFFile(sts_out,'r')
        
        data, names = load_csv_as_dict(testfile_csv, delimiter=' ', d_type = num.float64)
        
        assert sts.latitude == lat, 'latitude does not match'
        assert sts.longitude == lon, 'longitude does not match'
        
        assert len(sts.variables) == len(data), 'num variables does not match'
        
        # make sure data is returned in exactly the expected format
        for key, values in data.items():
            assert list(sts.variables[key][:]) == values, \
                                        'stored data does not match'

        if not sys.platform == 'win32':
            # Windows cannot delete the file for some reason.
            os.remove(sts_out)

Example #5

Show file

def csv2sts(infile, outfile, latitude = None, longitude = None,
                    verbose = False):
    """
        Take a csv file and convert it to an sts file.
        
        May be used for timeseries, or any other data.
    """
        
    timeseries_data, col_names = load_csv_as_dict(infile, delimiter=' ')
    
    if not col_names:
        raise IOError('csv2sts: file %s is empty or unreadable.' % infile)
    
    if verbose:
        log.critical('csv2sts input data:')
        for col in col_names:
            log.critical('column ' + col + ':')
            log.critical(timeseries_data[col])        

    data_len = len(timeseries_data.values()[0])
    if verbose:
        log.critical('   data length = %d.' % data_len)
    
    fid = NetCDFFile(outfile, netcdf_mode_w)

    fid.createDimension('number_of_timesteps', data_len)

    if latitude:
        fid.latitude = latitude
        
    if longitude:
        fid.longitude = longitude
    
    for col in col_names:
        fid.createVariable(col, netcdf_float, ('number_of_timesteps',))
        
        fid.variables[col][:] = timeseries_data[col]

    fid.close()

Example #6

Show file

File: exposure.py Project: chrimerss/anuga_core

    def __init__(self,
                 file_name,
                 latitude_title=LAT_TITLE,
                 longitude_title=LONG_TITLE,
                 is_x_y_locations=None,
                 x_title=X_TITLE,
                 y_title=Y_TITLE,
                 refine_polygon=None,
                 title_check_list=None):
        """
        This class is for handling the exposure csv file.
        It reads the file in and converts the lats and longs to a geospatial
        data object.
        Use the methods to read and write columns.

        The format of the csv files it reads is;
           The first row is a title row.
           comma's are the delimiters
           each column is a 'set' of data

        Feel free to use/expand it to read other csv files.

        It is not for adding and deleting rows

        Can geospatial handle string attributes? It's not made for them.
        Currently it can't load and save string att's.

        So just use geospatial to hold the x, y and georef? Bad, since
        different att's are in diferent structures.  Not so bad, the info
        to write if the .csv file is saved is in attribute_dic

        The location info is in the geospatial attribute.
        """

        self._file_name = file_name
        self._geospatial = None  #

        # self._attribute_dic is a dictionary.
        #The keys are the column titles.
        #The values are lists of column data

        # self._title_index_dic is a dictionary.
        #The keys are the column titles.
        #The values are the index positions of file columns.
        self._attribute_dic, self._title_index_dic = \
            load_csv_as_dict(self._file_name, \
            title_check_list=title_check_list)
        try:
            #Have code here that handles caps or lower
            lats = self._attribute_dic[latitude_title]
            longs = self._attribute_dic[longitude_title]
        except KeyError:
            # maybe a warning..
            #Let's see if this works..
            if False != is_x_y_locations:
                is_x_y_locations = True
            pass
        else:
            self._geospatial = Geospatial_data(latitudes=lats,
                                               longitudes=longs)

        if is_x_y_locations is True:
            if self._geospatial is not None:
                pass  #fixme throw an error
            try:
                xs = self._attribute_dic[x_title]
                ys = self._attribute_dic[y_title]
                points = [[float(i), float(j)] for i, j in map(None, xs, ys)]
            except KeyError:
                # maybe a warning..
                msg = "Could not find location information."
                raise TitleValueError, msg
            else:
                self._geospatial = Geospatial_data(data_points=points)

Example #7

Show file

def csv2timeseries_graphs(directories_dic={},
                          output_dir='',
                          base_name=None,
                          plot_numbers='',
                          quantities=['stage'],
                          extra_plot_name='',
                          assess_all_csv_files=True,
                          create_latex=False,
                          verbose=False):
    """
    Read in csv files that have the right header information and
    plot time series such as Stage, Speed, etc. Will also plot several
    time series on one plot. Filenames must follow this convention,
    <base_name><plot_number>.csv eg gauge_timeseries3.csv
    
    NOTE: relies that 'elevation' is in the csv file!

    Each file represents a location and within each file there are
    time, quantity columns.
    
    For example:    
    if "directories_dic" defines 4 directories and in each directories
    there is a csv files corresponding to the right "plot_numbers", 
    this will create a plot with 4 lines one for each directory AND 
    one plot for each "quantities".  ??? FIXME: unclear.
    
    Usage:
        csv2timeseries_graphs(directories_dic={'slide'+sep:['Slide',0, 0],
                                       'fixed_wave'+sep:['Fixed Wave',0,0]},
                            output_dir='fixed_wave'+sep,
                            base_name='gauge_timeseries_',
                            plot_numbers='',
                            quantities=['stage','speed'],
                            extra_plot_name='',
                            assess_all_csv_files=True,                            
                            create_latex=False,
                            verbose=True)
            this will create one plot for stage with both 'slide' and 
            'fixed_wave' lines on it for stage and speed for each csv
            file with 'gauge_timeseries_' as the prefix. The graghs 
            will be in the output directory 'fixed_wave' and the graph
            axis will be determined by assessing all the 
    
    ANOTHER EXAMPLE
        new_csv2timeseries_graphs(directories_dic={'slide'+sep:['Slide',0, 0],
                                       'fixed_wave'+sep:['Fixed Wave',0,0]},
                            output_dir='fixed_wave'+sep,
                            base_name='gauge_timeseries_',
                            plot_numbers=['1-3'],
                            quantities=['stage','speed'],
                            extra_plot_name='',
                            assess_all_csv_files=False,                            
                            create_latex=False,
                            verbose=True)
        This will plot csv files called gauge_timeseries_1.csv and 
        gauge_timeseries3.csv from both 'slide' and 'fixed_wave' directories
        to 'fixed_wave'. There will be 4 plots created two speed and two stage
        one for each csv file. There will be two lines on each of these plots.
        And the axis will have been determined from only these files, had 
        assess_all_csv_files = True all csv file with 'gauges_timeseries_' prefix
        would of been assessed.
    
    ANOTHER EXAMPLE    
         csv2timeseries_graphs({'J:'+sep+'anuga_validation'+sep:['new',20,-.1],
                                   'J:'+sep+'conical_island'+sep:['test',0,0]},
                                   output_dir='',
                                   plot_numbers=['1','3'],
                                   quantities=['stage','depth','bearing'],
                                   base_name='gauge_b',
                                   assess_all_csv_files=True,
                                  verbose=True)    
        
            This will produce one plot for each quantity (therefore 3) in the
            current directory, each plot will have 2 lines on them. The first
            plot named 'new' will have the time offseted by 20secs and the stage
            height adjusted by -0.1m
        
    Inputs:
        directories_dic: dictionary of directory with values (plot 
                         legend name for directory), (start time of 
                         the time series) and the (value to add to 
                         stage if needed). For example
                         {dir1:['Anuga_ons',5000, 0],
                          dir2:['b_emoth',5000,1.5],
                          dir3:['b_ons',5000,1.5]}
                         Having multiple directories defined will plot them on 
                         one plot, therefore there will be 3 lines on each of
                         these plot. If you only want one line per plot call
                         csv2timeseries_graph separately for each directory,
                         eg only have one directory in the 'directories_dic' in
                         each call. 
                         
        output_dir: directory for the plot outputs. Only important to define when
                    you have more than one directory in your directories_dic, if
                    you have not defined it and you have multiple directories in
                    'directories_dic' there will be plots in each directory,
                    however only one directory will contain the complete
                    plot/graphs.
        
        base_name: Is used a couple of times.
                   1) to find the csv files to be plotted if there is no
                      'plot_numbers' then csv files with 'base_name' are plotted
                   2) in the title of the plots, the length of base_name is 
                      removed from the front of the filename to be used in the
                      title. 
                   This could be changed if needed. 
                   Note is ignored if assess_all_csv_files=True
        
        plot_numbers: a String list of numbers to plot. For example 
                      [0-4,10,15-17] will read and attempt to plot
                      the follow 0,1,2,3,4,10,15,16,17
                      NOTE: if no plot numbers this will create one plot per
                            quantity, per gauge

        quantities: Will get available quantities from the header in the csv
                    file.  Quantities must be one of these.
                    NOTE: ALL QUANTITY NAMES MUST BE lower case!
                    
        extra_plot_name: A string that is appended to the end of the 
                         output filename.
                    
        assess_all_csv_files: if true it will read ALL csv file with
                             "base_name", regardless of 'plot_numbers'
                              and determine a uniform set of axes for 
                              Stage, Speed and Momentum. IF FALSE it 
                              will only read the csv file within the
                             'plot_numbers'
                             
        create_latex: NOT IMPLEMENTED YET!! sorry Jane....
        
    OUTPUTS: saves the plots to 
              <output_dir><base_name><plot_number><extra_plot_name>.png
    """

    #     try:
    #         import pylab
    #     except ImportError:
    #         msg='csv2timeseries_graphs needs pylab to be installed correctly'
    #         raise Exception(msg)
    #             #ANUGA don't need pylab to work so the system doesn't
    #             #rely on pylab being installed
    #         return

    try:
        import matplotlib
        matplotlib.use('Agg')
        import matplotlib.pyplot as pylab
    except:
        #print "Couldn't import module from matplotlib, probably you need to update matplotlib"
        return

    from os import sep
    from anuga.utilities.file_utils import get_all_files_with_extension

    seconds_in_hour = 3600
    seconds_in_minutes = 60

    quantities_label = {}
    #    quantities_label['time'] = 'time (hours)'
    quantities_label['time'] = 'time (minutes)'
    quantities_label['stage'] = 'wave height (m)'
    quantities_label['speed'] = 'speed (m/s)'
    quantities_label['momentum'] = 'momentum (m^2/sec)'
    quantities_label['depth'] = 'water depth (m)'
    quantities_label['xmomentum'] = 'momentum (m^2/sec)'
    quantities_label['ymomentum'] = 'momentum (m^2/sec)'
    quantities_label['bearing'] = 'degrees (o)'
    quantities_label['elevation'] = 'elevation (m)'

    if extra_plot_name != '':
        extra_plot_name = '_' + extra_plot_name

    new_plot_numbers = []
    #change plot_numbers to list, eg ['0-4','10']
    #to ['0','1','2','3','4','10']
    for i, num_string in enumerate(plot_numbers):
        if '-' in num_string:
            start = int(num_string[:num_string.rfind('-')])
            end = int(num_string[num_string.rfind('-') + 1:]) + 1
            for x in range(start, end):
                new_plot_numbers.append(str(x))
        else:
            new_plot_numbers.append(num_string)

    #finds all the files that fit the specs provided and return a list of them
    #so to help find a uniform max and min for the plots...
    list_filenames = []
    all_csv_filenames = []
    if verbose: log.critical('Determining files to access for axes ranges.')

    for i, directory in enumerate(directories_dic.keys()):
        all_csv_filenames.append(
            get_all_files_with_extension(directory, base_name, '.csv'))

        filenames = []
        if plot_numbers == '':
            list_filenames.append(
                get_all_files_with_extension(directory, base_name, '.csv'))
        else:
            for number in new_plot_numbers:
                filenames.append(base_name + number)
            list_filenames.append(filenames)

    #use all the files to get the values for the plot axis
    max_start_time = -1000.
    min_start_time = 100000

    if verbose: log.critical('Determining uniform axes')

    #this entire loop is to determine the min and max range for the
    #axes of the plots

    #    quantities.insert(0,'elevation')
    quantities.insert(0, 'time')

    directory_quantity_value = {}
    #    quantity_value={}
    min_quantity_value = {}
    max_quantity_value = {}

    for i, directory in enumerate(directories_dic.keys()):
        filename_quantity_value = {}
        if assess_all_csv_files == False:
            which_csv_to_assess = list_filenames[i]
        else:
            #gets list of filenames for directory "i"
            which_csv_to_assess = all_csv_filenames[i]

        for j, filename in enumerate(which_csv_to_assess):
            quantity_value = {}

            dir_filename = join(directory, filename)
            attribute_dic, title_index_dic = load_csv_as_dict(dir_filename +
                                                              '.csv')
            directory_start_time = directories_dic[directory][1]
            directory_add_tide = directories_dic[directory][2]

            if verbose: log.critical('reading: %s.csv' % dir_filename)

            #add time to get values
            for k, quantity in enumerate(quantities):
                quantity_value[quantity] = [
                    float(x) for x in attribute_dic[quantity]
                ]

                #add tide to stage if provided
                if quantity == 'stage':
                    quantity_value[quantity] = num.array(
                        quantity_value[quantity],
                        num.float) + directory_add_tide

                #condition to find max and mins for all the plots
                # populate the list with something when i=0 and j=0 and
                # then compare to the other values to determine abs max and min
                if i == 0 and j == 0:
                    min_quantity_value[quantity], \
                        max_quantity_value[quantity] = \
                            get_min_max_values(quantity_value[quantity])

                    if quantity != 'time':
                        min_quantity_value[quantity] = \
                            min_quantity_value[quantity] *1.1
                        max_quantity_value[quantity] = \
                            max_quantity_value[quantity] *1.1
                else:
                    min, max = get_min_max_values(quantity_value[quantity])

                    # min and max are multipled by "1+increase_axis" to get axes
                    # that are slighty bigger than the max and mins
                    # so the plots look good.

                    increase_axis = (max - min) * 0.05
                    if min <= min_quantity_value[quantity]:
                        if quantity == 'time':
                            min_quantity_value[quantity] = min
                        else:
                            if round(min, 2) == 0.00:
                                min_quantity_value[quantity] = -increase_axis
#                                min_quantity_value[quantity] = -2.
#min_quantity_value[quantity] = \
#    -max_quantity_value[quantity]*increase_axis
                            else:
                                #                                min_quantity_value[quantity] = \
                                #                                    min*(1+increase_axis)
                                min_quantity_value[
                                    quantity] = min - increase_axis

                    if max > max_quantity_value[quantity]:
                        if quantity == 'time':
                            max_quantity_value[quantity] = max
                        else:
                            max_quantity_value[quantity] = max + increase_axis


#                            max_quantity_value[quantity]=max*(1+increase_axis)

#set the time... ???
            if min_start_time > directory_start_time:
                min_start_time = directory_start_time
            if max_start_time < directory_start_time:
                max_start_time = directory_start_time

            filename_quantity_value[filename] = quantity_value

        directory_quantity_value[directory] = filename_quantity_value

    #final step to unifrom axis for the graphs
    quantities_axis = {}

    for i, quantity in enumerate(quantities):
        quantities_axis[quantity] = (float(min_start_time) \
                                         / float(seconds_in_minutes),
                                     (float(max_quantity_value['time']) \
                                          + float(max_start_time)) \
                                              / float(seconds_in_minutes),
                                     min_quantity_value[quantity],
                                     max_quantity_value[quantity])

        if verbose and (quantity != 'time' and quantity != 'elevation'):
            log.critical(
                'axis for quantity %s are x:(%s to %s)%s '
                'and y:(%s to %s)%s' %
                (quantity, quantities_axis[quantity][0],
                 quantities_axis[quantity][1], quantities_label['time'],
                 quantities_axis[quantity][2], quantities_axis[quantity][3],
                 quantities_label[quantity]))

    cstr = ['b', 'r', 'g', 'c', 'm', 'y', 'k']

    if verbose: log.critical('Now start to plot')

    i_max = len(directories_dic.keys())
    legend_list_dic = {}
    legend_list = []
    for i, directory in enumerate(directories_dic.keys()):
        if verbose:
            log.critical('Plotting in %s %s' % (directory, new_plot_numbers))

        # FIXME THIS SORT IS VERY IMPORTANT
        # Without it the assigned plot numbers may not work correctly
        # there must be a better way
        list_filenames[i].sort()
        for j, filename in enumerate(list_filenames[i]):
            if verbose: log.critical('Starting %s' % filename)

            directory_name = directories_dic[directory][0]
            directory_start_time = directories_dic[directory][1]
            directory_add_tide = directories_dic[directory][2]

            # create an if about the start time and tide height if don't exist
            attribute_dic, title_index_dic = load_csv_as_dict(directory + sep +
                                                              filename +
                                                              '.csv')
            #get data from dict in to list
            #do maths to list by changing to array
            t = (num.array(
                directory_quantity_value[directory][filename]['time']) +
                 directory_start_time) / seconds_in_minutes

            #finds the maximum elevation, used only as a test
            # and as info in the graphs
            max_ele = -100000
            min_ele = 100000
            elevation = [float(x) for x in attribute_dic["elevation"]]

            min_ele, max_ele = get_min_max_values(elevation)

            if min_ele != max_ele:
                log.critical("Note! Elevation changes in %s" % dir_filename)

            # creates a dictionary with keys that is the filename and attributes
            # are a list of lists containing 'directory_name' and 'elevation'.
            # This is used to make the contents for the legends in the graphs,
            # this is the name of the model and the elevation.  All in this
            # great one liner from DG. If the key 'filename' doesn't exist it
            # creates the entry if the entry exist it appends to the key.

            legend_list_dic.setdefault(filename,[]) \
                .append([directory_name, round(max_ele, 3)])

            # creates a LIST for the legend on the last iteration of the
            # directories which is when "legend_list_dic" has been fully
            # populated. Creates a list of strings which is used in the legend
            # only runs on the last iteration for all the gauges(csv) files
            # empties the list before creating it

            if i == i_max - 1:
                legend_list = []

                for name_and_elevation in legend_list_dic[filename]:
                    legend_list.append('%s (elevation = %sm)'\
                                       % (name_and_elevation[0],
                                          name_and_elevation[1]))

            #skip time and elevation so it is not plotted!
            for k, quantity in enumerate(quantities):
                if quantity != 'time' and quantity != 'elevation':
                    pylab.figure(int(k * 100 + j))
                    pylab.ylabel(quantities_label[quantity])
                    pylab.plot(t,
                               directory_quantity_value[directory]\
                                                       [filename][quantity],
                               c = cstr[i], linewidth=1)
                    pylab.xlabel(quantities_label['time'])
                    pylab.axis(quantities_axis[quantity])
                    pylab.legend(legend_list, loc='upper right')

                    pylab.title('%s at %s gauge' %
                                (quantity, filename[len(base_name):]))

                    if output_dir == '':
                        figname = '%s%s%s_%s%s.png' \
                                  % (directory, sep, filename, quantity,
                                     extra_plot_name)
                    else:
                        figname = '%s%s%s_%s%s.png' \
                                  % (output_dir, sep, filename, quantity,
                                     extra_plot_name)

                    if verbose: log.critical('saving figure here %s' % figname)

                    pylab.savefig(figname)

    if verbose: log.critical('Closing all plots')

    pylab.close('all')
    del pylab

    if verbose: log.critical('Finished closing plots')

Example #8

Show file

File: util.py Project: MattAndersonPE/anuga_core

def csv2timeseries_graphs(directories_dic={},
                          output_dir='',
                          base_name=None,
                          plot_numbers='',
                          quantities=['stage'],
                          extra_plot_name='',
                          assess_all_csv_files=True,
                          create_latex=False,
                          verbose=False):
                                
    """
    Read in csv files that have the right header information and
    plot time series such as Stage, Speed, etc. Will also plot several
    time series on one plot. Filenames must follow this convention,
    <base_name><plot_number>.csv eg gauge_timeseries3.csv
    
    NOTE: relies that 'elevation' is in the csv file!

    Each file represents a location and within each file there are
    time, quantity columns.
    
    For example:    
    if "directories_dic" defines 4 directories and in each directories
    there is a csv files corresponding to the right "plot_numbers", 
    this will create a plot with 4 lines one for each directory AND 
    one plot for each "quantities".  ??? FIXME: unclear.
    
    Usage:
        csv2timeseries_graphs(directories_dic={'slide'+sep:['Slide',0, 0],
                                       'fixed_wave'+sep:['Fixed Wave',0,0]},
                            output_dir='fixed_wave'+sep,
                            base_name='gauge_timeseries_',
                            plot_numbers='',
                            quantities=['stage','speed'],
                            extra_plot_name='',
                            assess_all_csv_files=True,                            
                            create_latex=False,
                            verbose=True)
            this will create one plot for stage with both 'slide' and 
            'fixed_wave' lines on it for stage and speed for each csv
            file with 'gauge_timeseries_' as the prefix. The graghs 
            will be in the output directory 'fixed_wave' and the graph
            axis will be determined by assessing all the 
    
    ANOTHER EXAMPLE
        new_csv2timeseries_graphs(directories_dic={'slide'+sep:['Slide',0, 0],
                                       'fixed_wave'+sep:['Fixed Wave',0,0]},
                            output_dir='fixed_wave'+sep,
                            base_name='gauge_timeseries_',
                            plot_numbers=['1-3'],
                            quantities=['stage','speed'],
                            extra_plot_name='',
                            assess_all_csv_files=False,                            
                            create_latex=False,
                            verbose=True)
        This will plot csv files called gauge_timeseries_1.csv and 
        gauge_timeseries3.csv from both 'slide' and 'fixed_wave' directories
        to 'fixed_wave'. There will be 4 plots created two speed and two stage
        one for each csv file. There will be two lines on each of these plots.
        And the axis will have been determined from only these files, had 
        assess_all_csv_files = True all csv file with 'gauges_timeseries_' prefix
        would of been assessed.
    
    ANOTHER EXAMPLE    
         csv2timeseries_graphs({'J:'+sep+'anuga_validation'+sep:['new',20,-.1],
                                   'J:'+sep+'conical_island'+sep:['test',0,0]},
                                   output_dir='',
                                   plot_numbers=['1','3'],
                                   quantities=['stage','depth','bearing'],
                                   base_name='gauge_b',
                                   assess_all_csv_files=True,
                                  verbose=True)    
        
            This will produce one plot for each quantity (therefore 3) in the
            current directory, each plot will have 2 lines on them. The first
            plot named 'new' will have the time offseted by 20secs and the stage
            height adjusted by -0.1m
        
    Inputs:
        directories_dic: dictionary of directory with values (plot 
                         legend name for directory), (start time of 
                         the time series) and the (value to add to 
                         stage if needed). For example
                         {dir1:['Anuga_ons',5000, 0],
                          dir2:['b_emoth',5000,1.5],
                          dir3:['b_ons',5000,1.5]}
                         Having multiple directories defined will plot them on 
                         one plot, therefore there will be 3 lines on each of
                         these plot. If you only want one line per plot call
                         csv2timeseries_graph separately for each directory,
                         eg only have one directory in the 'directories_dic' in
                         each call. 
                         
        output_dir: directory for the plot outputs. Only important to define when
                    you have more than one directory in your directories_dic, if
                    you have not defined it and you have multiple directories in
                    'directories_dic' there will be plots in each directory,
                    however only one directory will contain the complete
                    plot/graphs.
        
        base_name: Is used a couple of times.
                   1) to find the csv files to be plotted if there is no
                      'plot_numbers' then csv files with 'base_name' are plotted
                   2) in the title of the plots, the length of base_name is 
                      removed from the front of the filename to be used in the
                      title. 
                   This could be changed if needed. 
                   Note is ignored if assess_all_csv_files=True
        
        plot_numbers: a String list of numbers to plot. For example 
                      [0-4,10,15-17] will read and attempt to plot
                      the follow 0,1,2,3,4,10,15,16,17
                      NOTE: if no plot numbers this will create one plot per
                            quantity, per gauge

        quantities: Will get available quantities from the header in the csv
                    file.  Quantities must be one of these.
                    NOTE: ALL QUANTITY NAMES MUST BE lower case!
                    
        extra_plot_name: A string that is appended to the end of the 
                         output filename.
                    
        assess_all_csv_files: if true it will read ALL csv file with
                             "base_name", regardless of 'plot_numbers'
                              and determine a uniform set of axes for 
                              Stage, Speed and Momentum. IF FALSE it 
                              will only read the csv file within the
                             'plot_numbers'
                             
        create_latex: NOT IMPLEMENTED YET!! sorry Jane....
        
    OUTPUTS: saves the plots to 
              <output_dir><base_name><plot_number><extra_plot_name>.png
    """

#     try: 
#         import pylab
#     except ImportError:
#         msg='csv2timeseries_graphs needs pylab to be installed correctly'
#         raise Exception(msg)
#             #ANUGA don't need pylab to work so the system doesn't 
#             #rely on pylab being installed 
#         return
    
    try:
        import matplotlib
        matplotlib.use('Agg')
        import matplotlib.pyplot as pylab
    except:
        #print "Couldn't import module from matplotlib, probably you need to update matplotlib"
        return

    from os import sep
    from anuga.utilities.file_utils import get_all_files_with_extension

    seconds_in_hour = 3600
    seconds_in_minutes = 60
    
    quantities_label={}
#    quantities_label['time'] = 'time (hours)'
    quantities_label['time'] = 'time (minutes)'
    quantities_label['stage'] = 'wave height (m)'
    quantities_label['speed'] = 'speed (m/s)'
    quantities_label['momentum'] = 'momentum (m^2/sec)'
    quantities_label['depth'] = 'water depth (m)'
    quantities_label['xmomentum'] = 'momentum (m^2/sec)'
    quantities_label['ymomentum'] = 'momentum (m^2/sec)'
    quantities_label['bearing'] = 'degrees (o)'
    quantities_label['elevation'] = 'elevation (m)'
    
    if extra_plot_name != '':
        extra_plot_name = '_' + extra_plot_name

    new_plot_numbers=[]
    #change plot_numbers to list, eg ['0-4','10'] 
    #to ['0','1','2','3','4','10']
    for i, num_string in enumerate(plot_numbers):
        if '-' in num_string: 
            start = int(num_string[:num_string.rfind('-')])
            end = int(num_string[num_string.rfind('-') + 1:]) + 1
            for x in range(start, end):
                new_plot_numbers.append(str(x))
        else:
            new_plot_numbers.append(num_string)

    #finds all the files that fit the specs provided and return a list of them
    #so to help find a uniform max and min for the plots... 
    list_filenames=[]
    all_csv_filenames=[]
    if verbose: log.critical('Determining files to access for axes ranges.')
    
    for i,directory in enumerate(directories_dic.keys()):
        all_csv_filenames.append(get_all_files_with_extension(directory,
                                                              base_name, '.csv'))

        filenames=[]
        if plot_numbers == '': 
            list_filenames.append(get_all_files_with_extension(directory,
                                                               base_name,'.csv'))
        else:
            for number in new_plot_numbers:
                filenames.append(base_name + number)
            list_filenames.append(filenames)

    #use all the files to get the values for the plot axis
    max_start_time= -1000.
    min_start_time = 100000 
    
    if verbose: log.critical('Determining uniform axes')

    #this entire loop is to determine the min and max range for the 
    #axes of the plots

#    quantities.insert(0,'elevation')
    quantities.insert(0,'time')

    directory_quantity_value={}
#    quantity_value={}
    min_quantity_value={}
    max_quantity_value={}

    for i, directory in enumerate(directories_dic.keys()):
        filename_quantity_value = {}
        if assess_all_csv_files == False:
            which_csv_to_assess = list_filenames[i]
        else:
            #gets list of filenames for directory "i"
            which_csv_to_assess = all_csv_filenames[i]
        
        for j, filename in enumerate(which_csv_to_assess):
            quantity_value = {}

            dir_filename = join(directory,filename)
            attribute_dic, title_index_dic = load_csv_as_dict(dir_filename + '.csv')
            directory_start_time = directories_dic[directory][1]
            directory_add_tide = directories_dic[directory][2]

            if verbose: log.critical('reading: %s.csv' % dir_filename)

            #add time to get values
            for k, quantity in enumerate(quantities):
                quantity_value[quantity] = [float(x) for
                                                x in attribute_dic[quantity]]

                #add tide to stage if provided
                if quantity == 'stage':
                    quantity_value[quantity] = num.array(quantity_value[quantity],
                                                          num.float) + directory_add_tide

                #condition to find max and mins for all the plots
                # populate the list with something when i=0 and j=0 and
                # then compare to the other values to determine abs max and min
                if i==0 and j==0:
                    min_quantity_value[quantity], \
                        max_quantity_value[quantity] = \
                            get_min_max_values(quantity_value[quantity])

                    if quantity != 'time':
                        min_quantity_value[quantity] = \
                            min_quantity_value[quantity] *1.1
                        max_quantity_value[quantity] = \
                            max_quantity_value[quantity] *1.1
                else:
                    min, max = get_min_max_values(quantity_value[quantity])
                
                    # min and max are multipled by "1+increase_axis" to get axes
                    # that are slighty bigger than the max and mins
                    # so the plots look good.

                    increase_axis = (max-min)*0.05
                    if min <= min_quantity_value[quantity]:
                        if quantity == 'time': 
                            min_quantity_value[quantity] = min
                        else:
                            if round(min,2) == 0.00:
                                min_quantity_value[quantity] = -increase_axis
#                                min_quantity_value[quantity] = -2.
                                #min_quantity_value[quantity] = \
                                #    -max_quantity_value[quantity]*increase_axis
                            else:
#                                min_quantity_value[quantity] = \
#                                    min*(1+increase_axis)
                                min_quantity_value[quantity]=min-increase_axis
                    
                    if max > max_quantity_value[quantity]: 
                        if quantity == 'time': 
                            max_quantity_value[quantity] = max
                        else:
                            max_quantity_value[quantity] = max + increase_axis
#                            max_quantity_value[quantity]=max*(1+increase_axis)

            #set the time... ???
            if min_start_time > directory_start_time: 
                min_start_time = directory_start_time
            if max_start_time < directory_start_time: 
                max_start_time = directory_start_time
            
            filename_quantity_value[filename]=quantity_value
            
        directory_quantity_value[directory]=filename_quantity_value
    
    #final step to unifrom axis for the graphs
    quantities_axis={}
    
    for i, quantity in enumerate(quantities):
        quantities_axis[quantity] = (float(min_start_time) \
                                         / float(seconds_in_minutes),
                                     (float(max_quantity_value['time']) \
                                          + float(max_start_time)) \
                                              / float(seconds_in_minutes),
                                     min_quantity_value[quantity],
                                     max_quantity_value[quantity])

        if verbose and (quantity != 'time' and quantity != 'elevation'): 
            log.critical('axis for quantity %s are x:(%s to %s)%s '
                         'and y:(%s to %s)%s' 
                         % (quantity, quantities_axis[quantity][0],
                            quantities_axis[quantity][1],
                            quantities_label['time'],
                            quantities_axis[quantity][2],
                            quantities_axis[quantity][3],
                            quantities_label[quantity]))

    cstr = ['b', 'r', 'g', 'c', 'm', 'y', 'k']

    if verbose: log.critical('Now start to plot')
    
    i_max = len(directories_dic.keys())
    legend_list_dic = {}
    legend_list = []
    for i, directory in enumerate(directories_dic.keys()):
        if verbose: log.critical('Plotting in %s %s'
                                 % (directory, new_plot_numbers))

        # FIXME THIS SORT IS VERY IMPORTANT
        # Without it the assigned plot numbers may not work correctly
        # there must be a better way
        list_filenames[i].sort()
        for j, filename in enumerate(list_filenames[i]):
            if verbose: log.critical('Starting %s' % filename)

            directory_name = directories_dic[directory][0]
            directory_start_time = directories_dic[directory][1]
            directory_add_tide = directories_dic[directory][2]
            
            # create an if about the start time and tide height if don't exist
            attribute_dic, title_index_dic = load_csv_as_dict(directory + sep
                                                      + filename + '.csv')
            #get data from dict in to list
            #do maths to list by changing to array
            t = (num.array(directory_quantity_value[directory][filename]['time'])
                     + directory_start_time) / seconds_in_minutes

            #finds the maximum elevation, used only as a test
            # and as info in the graphs
            max_ele=-100000
            min_ele=100000
            elevation = [float(x) for x in attribute_dic["elevation"]]
            
            min_ele, max_ele = get_min_max_values(elevation)
            
            if min_ele != max_ele:
                log.critical("Note! Elevation changes in %s" % dir_filename)

            # creates a dictionary with keys that is the filename and attributes
            # are a list of lists containing 'directory_name' and 'elevation'.
            # This is used to make the contents for the legends in the graphs,
            # this is the name of the model and the elevation.  All in this
            # great one liner from DG. If the key 'filename' doesn't exist it
            # creates the entry if the entry exist it appends to the key.

            legend_list_dic.setdefault(filename,[]) \
                .append([directory_name, round(max_ele, 3)])

            # creates a LIST for the legend on the last iteration of the
            # directories which is when "legend_list_dic" has been fully
            # populated. Creates a list of strings which is used in the legend
            # only runs on the last iteration for all the gauges(csv) files
            # empties the list before creating it 

            if i == i_max - 1:
                legend_list = []
    
                for name_and_elevation in legend_list_dic[filename]:
                    legend_list.append('%s (elevation = %sm)'\
                                       % (name_and_elevation[0],
                                          name_and_elevation[1]))
            
            #skip time and elevation so it is not plotted!
            for k, quantity in enumerate(quantities):
                if quantity != 'time' and quantity != 'elevation':
                    pylab.figure(int(k*100+j))
                    pylab.ylabel(quantities_label[quantity])
                    pylab.plot(t,
                               directory_quantity_value[directory]\
                                                       [filename][quantity],
                               c = cstr[i], linewidth=1)
                    pylab.xlabel(quantities_label['time'])
                    pylab.axis(quantities_axis[quantity])
                    pylab.legend(legend_list,loc='upper right')
                    
                    pylab.title('%s at %s gauge'
                                % (quantity, filename[len(base_name):]))

                    if output_dir == '':
                        figname = '%s%s%s_%s%s.png' \
                                  % (directory, sep, filename, quantity,
                                     extra_plot_name)
                    else:
                        figname = '%s%s%s_%s%s.png' \
                                  % (output_dir, sep, filename, quantity,
                                     extra_plot_name)

                    if verbose: log.critical('saving figure here %s' % figname)

                    pylab.savefig(figname)
           
    if verbose: log.critical('Closing all plots')

    pylab.close('all')
    del pylab

    if verbose: log.critical('Finished closing plots')

Example #9

Show file

File: exposure.py Project: xuexianwu/anuga_core

    def __init__(
        self,
        file_name,
        latitude_title=LAT_TITLE,
        longitude_title=LONG_TITLE,
        is_x_y_locations=None,
        x_title=X_TITLE,
        y_title=Y_TITLE,
        refine_polygon=None,
        title_check_list=None,
    ):
        """
        This class is for handling the exposure csv file.
        It reads the file in and converts the lats and longs to a geospatial
        data object.
        Use the methods to read and write columns.

        The format of the csv files it reads is;
           The first row is a title row.
           comma's are the delimiters
           each column is a 'set' of data

        Feel free to use/expand it to read other csv files.

        It is not for adding and deleting rows

        Can geospatial handle string attributes? It's not made for them.
        Currently it can't load and save string att's.

        So just use geospatial to hold the x, y and georef? Bad, since
        different att's are in diferent structures.  Not so bad, the info
        to write if the .csv file is saved is in attribute_dic

        The location info is in the geospatial attribute.
        """

        self._file_name = file_name
        self._geospatial = None  #

        # self._attribute_dic is a dictionary.
        # The keys are the column titles.
        # The values are lists of column data

        # self._title_index_dic is a dictionary.
        # The keys are the column titles.
        # The values are the index positions of file columns.
        self._attribute_dic, self._title_index_dic = load_csv_as_dict(
            self._file_name, title_check_list=title_check_list
        )
        try:
            # Have code here that handles caps or lower
            lats = self._attribute_dic[latitude_title]
            longs = self._attribute_dic[longitude_title]
        except KeyError:
            # maybe a warning..
            # Let's see if this works..
            if False != is_x_y_locations:
                is_x_y_locations = True
            pass
        else:
            self._geospatial = Geospatial_data(latitudes=lats, longitudes=longs)

        if is_x_y_locations is True:
            if self._geospatial is not None:
                pass  # fixme throw an error
            try:
                xs = self._attribute_dic[x_title]
                ys = self._attribute_dic[y_title]
                points = [[float(i), float(j)] for i, j in map(None, xs, ys)]
            except KeyError:
                # maybe a warning..
                msg = "Could not find location information."
                raise TitleValueError, msg
            else:
                self._geospatial = Geospatial_data(data_points=points)