def read_timeseries_data(self, data_column_names='value', hide_exceptions=False, **filters):  # This function needs to be sped up
     """reads timeseries data to dataframe from database. Stored in self.raw_values"""
     # rowmap is used in ordering the data when read from the sql table
     headers = util.sql_read_headers(self.sql_data_table)
     rowmap = [headers.index(self.column_names[level]) for level in self.index_levels]
     data_col_ind  = []
     for data_col in util.put_in_list(data_column_names):
         data_col_ind.append(headers.index(data_col))
     # read each line of the data_table matching an id and assign the value to self.raw_values
     data = []
     if len(filters):
         merged_dict = dict({self.data_id_key: self.id}, **filters)
         read_data = util.sql_read_table(self.sql_data_table, return_iterable=True, **merged_dict)
     else:
         read_data = util.sql_read_table(self.sql_data_table, return_iterable=True,
                                         **dict([(self.data_id_key, self.id)]))
     if read_data:
         for row in read_data:
             try:
                 data.append([row[i] for i in rowmap] +
                             [row[i] * (self.unit_prefix if hasattr(self, 'unit_prefix') else 1) for i in data_col_ind ])
             except:
                 if hide_exceptions == False:
                     print (self.id, row, i)
         column_names = self.df_index_names + util.put_in_list(data_column_names)
         self.raw_values = pd.DataFrame(data, columns=column_names).set_index(keys=self.df_index_names).sort_index()
     else:
         self.raw_values = None
Example #2
0
 def read_timeseries_data(self, **filters):  # This function needs to be sped up
     """reads timeseries data to dataframe from database. Stored in self.raw_values"""
     # rowmap is used in ordering the data when read from the sql table
     headers = util.sql_read_headers(self.sql_data_table)
     rowmap = [headers.index(self.column_names[level]) for level in self.index_levels]
     data_col_ind = headers.index('value')
     # read each line of the data_table matching an id and assign the value to self.raw_values
     data = []
     if len(filters):
         merged_dict = dict({self.primary_key: self.id}, **filters)
         read_data = util.sql_read_table(self.sql_data_table, return_iterable=True, **merged_dict)
     else:
         read_data = util.sql_read_table(self.sql_data_table, return_iterable=True,
                                         **dict([(self.primary_key, self.id)]))
     if read_data:
         for row in read_data:
             try:
                 data.append([row[i] for i in rowmap] +
                             [row[data_col_ind] * (self.unit_prefix if hasattr(self, 'unit_prefix') else 1)])
             except:
                 print (self.id, row, i)
         column_names = self.df_index_names + ['value']
         self.raw_values = pd.DataFrame(data, columns=column_names).set_index(keys=self.df_index_names).sort_index()
         self.data = True
     else:
         self.raw_values = None
         self.data = False
Example #3
0
    def read_geography_data(self):
        # df2.loc[('kentucky', 'total', 'east north central', 'western interconnection'), 'households']
        headers = util.sql_read_headers('GeographyMap')

        # colmap and rowmap are used in ordering the data when read from the sql table
        colmap = []
        for col in self.map_keys:
            colmap.append(headers.index(col))
        rowmap = []
        for row in self.geographies.keys():
            rowmap.append(headers.index(row))
        for row in util.sql_read_table('GeographyMap'):
            self.values.loc[tuple([row[i] for i in rowmap]), tuple(self.map_keys)] = [row[i] for i in colmap]
 def read_index_levels(self):
     """
     creates a dictionary to store level headings (for database lookup) and
     level elements. Stored as attr 'index_level'
     """
     data_table_columns = [x for x in util.sql_read_headers(self.sql_data_table) if x not in self.data_id_key]
     for id, index_level, column_name in util.sql_read_table('IndexLevels'):
         if column_name not in data_table_columns:
             continue
         elements = util.sql_read_table(self.sql_data_table, column_names=column_name,
                                        return_iterable=True, return_unique=True,
                                        **dict([(self.data_id_key, self.id)]))
         if len(elements):
             self.index_levels[index_level] = elements
             self.column_names[index_level] = column_name
Example #5
0
 def read_index_levels(self):
     """
     creates a dictionary to store level headings (for database lookup) and
     level elements. Stored as attr 'index_level'
     """
     data_table_columns = [x for x in util.sql_read_headers(self.sql_data_table) if x not in self.primary_key]
     for id, index_level, column_name in util.sql_read_table('IndexLevels'):
         if column_name not in data_table_columns:
             continue
         elements = util.sql_read_table(self.sql_data_table, column_names=column_name,
                                        return_iterable=True, return_unique=True,
                                        **dict([(self.primary_key, self.id)]))
         if len(elements):
             self.index_levels[index_level] = elements
             self.column_names[index_level] = column_name
Example #6
0
    def read_timeseries_data(self, data_column_names='value', **filters):  # This function needs to be sped up
        """reads timeseries data to dataframe from database. Stored in self.raw_values"""
        # rowmap is used in ordering the data when read from the sql table
        headers = util.sql_read_headers(self.sql_data_table)
        filters[self.data_id_key] = self.id

        # Check for a sensitivity specification for this table and id. If there is no relevant sensitivity specified
        # but the data table has a sensitivity column, we set the sensitivity filter to "None", which will filter
        # the data table rows down to those where sensitivity is NULL, which is the default, no-sensitivity condition.
        if 'sensitivity' in headers:
            filters['sensitivity'] = None
            if hasattr(self, 'scenario'):
                # Note that this will return None if the scenario doesn't specify a sensitivity for this table and id
                filters['sensitivity'] = self.scenario.get_sensitivity(self.sql_data_table, self.id)

        # read each line of the data_table matching an id and assign the value to self.raw_values
        read_data = util.sql_read_table(self.sql_data_table, return_iterable=True, **filters)
        self.inspect_index_levels(headers, read_data)
        self._validate_other_indexes(headers, read_data)

        rowmap = [headers.index(self.column_names[level]) for level in self.index_levels]
        data_col_ind = [headers.index(data_col) for data_col in util.put_in_list(data_column_names)]

        unit_prefix = self.unit_prefix if hasattr(self, 'unit_prefix') else 1
        if read_data:
            data = []
            for row in read_data:
                try:
                    data.append([row[i] for i in rowmap] + [row[i] * unit_prefix for i in data_col_ind])
                except:
                    logging.warning('error reading table: {}, row: {}'.format(self.sql_data_table, row))
                    raise
            column_names = self.df_index_names + util.put_in_list(data_column_names)
            self.raw_values = pd.DataFrame(data, columns=column_names).set_index(keys=self.df_index_names).sort()
            # print the duplicate values
            duplicate_index = self.raw_values.index.duplicated(keep=False) #keep = False keeps all of the duplicate indices
            if any(duplicate_index):
                logging.warning('Duplicate indices in table: {}, parent id: {}, by default the first index will be kept.'.format(self.sql_data_table, self.id))
                logging.warning(self.raw_values[duplicate_index])
                self.raw_values = self.raw_values.groupby(level=self.raw_values.index.names).first()
        else:
            self.raw_values = None
            # We didn't find any timeseries data for this object, so now we want to let the user know if that
            # might be a problem. We only expect to find timeseries data if self actually existed in the database
            # (as opposed to being a placeholder). The existence of self in the database is flagged by self.data.
            if self.data:
                if getattr(self, 'reference_tech_id', None):
                    logging.debug('No {} found for {} with id {}; using reference technology values instead.'.format(
                        self.sql_data_table, self.sql_id_table, self.id
                    ))
                else:
                    msg = 'No {} or reference technology found for {} with id {}.'.format(
                        self.sql_data_table, self.sql_id_table, self.id
                    )
                    if re.search("Cost(New|Replacement)?Data$", self.sql_data_table):
                        # The model can run fine without cost data and this is sometimes useful during model
                        # development so we just gently note if cost data is missing.
                        logging.debug(msg)
                    else:
                        # Any other missing data is likely to be a real problem so we complain
                        logging.critical(msg)