def channel_names(dataframe, **kwargs):
    """
    Gets a named channel related to a generic ADC channel.
    For instance, an ADC with I2C address 48 has three
    channels: 0, 1, 2 and 3. 48_0 could corresponde to the working electrode
    of a CO sensor, so the channel ADC_48_0 would be copied over to a new
    channel named CO_WE. for the ADC_48_1, the CO auxiliary electrode
    it would be copied to CO_AE. The naming convention is specified in the
    hardware folder
    Parameters
    ----------
        channel: string
            Channel name, i.e. ADC_48_0
    Returns
    -------
        Channel named accordingly
    """
    # Check inputs
    flag_error = False
    if 'channel' not in kwargs: flag_error = True
    if kwargs['channel'] not in dataframe:
        std_out(f"Channel {kwargs['channel']} not in dataframe. Ignoring",
                'WARNING')
        return None

    if flag_error:
        std_out('Problem with input data', 'ERROR')
        return None

    # Make copy
    df = dataframe.copy()
    return df[kwargs['channel']]
Exemple #2
0
def to_csv(self, path=None, forced_overwrite=False):
    """
    Exports devices in test to desired path
    Parameters
    ----------
        path: string
        	None
            The path (directory) to export the csv(s) into. If None, exports to test_path/processed/
        forced_overwrite: boolean
        	False
            To overwrite existing files
    Returns
    -------
        True if export successul
    """
    export_ok = True

    if path is None: epath = join(self.path, 'processed')
    else: epath = path

    # Export to csv
    for device in self.devices.keys():
        export_ok &= self.devices[device].export(
            epath, forced_overwrite=forced_overwrite)

    if export_ok:
        std_out(f'Test {self.full_name} exported successfully', 'SUCCESS')
    else:
        std_out(f'Test {self.full_name} not exported successfully', 'ERROR')

    return export_ok
    def fill_template(individual_descriptor,
                      descriptor_file_name,
                      upload_type='dataset'):
        # Open base template with all keys

        if upload_type == 'dataset':
            template_file_name = 'template_zenodo_dataset'
        elif upload_type == 'publication':
            template_file_name = 'template_zenodo_publication'

        with open(join('zenodo_templates', f'{template_file_name}.json'),
                  'r') as template_file:
            template = json.load(template_file)

        filled_template = template

        # Fill it up for each key
        for key in individual_descriptor.keys():

            value = individual_descriptor[key]

            if key in filled_template['metadata'].keys():
                filled_template['metadata'][key] = value

        with open(
                join(config.paths['dataDirectory'], 'uploads',
                     descriptor_file_name), 'w') as descriptor_json:
            json.dump(filled_template, descriptor_json, ensure_ascii=True)
            std_out(f'Created descriptor file for {descriptor_file_name}',
                    'SUCCESS')

        return json.dumps(filled_template)
    def __update_descriptor__(self):
        if self.descriptor == {}: self.std_out('No descriptor file to update')

        for field in self._default_fields:
            if field not in self.descriptor.keys():
                self.descriptor[field] = self._default_fields[field]

        # Add details to descriptor, or update them if there is anything in details
        for detail in self.details.keys():
            self.descriptor[detail] = self.details[detail]

        # Add devices to descriptor
        for device_name in self.devices.keys():

            device = self.devices[device_name]

            if device.source == 'csv':
                device.processed_data_file = self.full_name + '_' + str(
                    device.id) + '.csv'

            dvars = vars(device).copy()
            for discvar in config._discvars:
                if discvar in dvars: dvars.pop(discvar)

            self.descriptor['devices'][device.id] = dvars

        # Create yaml with test description
        with open(join(self.path, 'test_description.yaml'), 'w') as yaml_file:
            yaml.dump(self.descriptor, yaml_file)

        std_out('Descriptor file updated')
Exemple #5
0
def export_csv_file(path, file_name, df, forced_overwrite=False):
    '''
    Exports pandas dataframe to a csv file
    Parameters
    ----------
        path: String
            Directory path
        file_name: String
            File name for the resulting csv
        df: pandas.DataFrame
            Dataframe to export
        forced_overwrite: boolean
            False
            If file exists, overwrite it or not
    Returns
    ---------
        True if exported, False if not (if file exists returns False)
    '''

    # If path does not exist, create it
    if not exists(path):
        makedirs(path)

    # If file does not exist
    if not exists(path + '/' + str(file_name) + '.csv') or forced_overwrite:
        df.to_csv(path + '/' + str(file_name) + '.csv', sep=",")
        std_out('File saved to: \n' + path + '/' + str(file_name) + '.csv',
                'SUCCESS')
    else:
        std_out(
            "File Already exists - delete it first, I was not asked to overwrite anything!",
            'ERROR')
        return False

    return True
    def post(self, with_post_info=True):
        '''
        Posts devices metrics. Only available for parent of ScApiDevice
        Parameters
        ----------
            with_post_info: boolean
                Default True
                Add the post info to the package
        Returns
        ----------
            boolean
            True if posted ok, False otherwise
        '''

        post_ok = True
        if self.sources[self.source]['handler'] != 'ScApiDevice':
            std_out('Only supported processing post is to SmartCitizen API',
                    'ERROR')
            return False

        for metric in self.metrics:
            if self.metrics[metric]['post'] == True:
                # Get single series for post
                df = DataFrame(self.readings[metric])
                sensor_id = self.metrics[metric]['id']
                post_ok &= self.api_device.post_device_data(
                    df, sensor_id=sensor_id)

        # Post info if requested. It should be updated elsewhere
        if with_post_info: self.api_device.post_postprocessing_info()

        return post_ok
    def load_postprocessing_info(self):

        if self.source != 'api':
            return None

        if self.sources[self.source]['handler'] != 'ScApiDevice':
            return None

        # Request to get postprocessing information
        if self.api_device.get_postprocessing_info() is None:
            return None

        # Put it where it goes
        self.hw_id = self.api_device.postprocessing_info['hardware_id']
        self.hw_updated_at = self.api_device.postprocessing_info['updated_at']
        self.hw_post_blueprint = self.api_device.postprocessing_info[
            'postprocessing_blueprint']
        self.latest_postprocessing = self.api_device.postprocessing_info[
            'latest_postprocessing']

        # Use postprocessing info blueprint
        if self.hw_post_blueprint in config.blueprints.keys():
            std_out(
                f'Using hardware postprocessing blueprint: {self.hw_post_blueprint}'
            )
            self.blueprint = self.hw_post_blueprint

        return self.api_device.postprocessing_info
Exemple #8
0
 def export(self, path, forced_overwrite=False, file_format='csv'):
     '''
     Exports Device.readings to file
     Parameters
     ----------
         path: String
             Path to export file to, does not include filename.
             The filename will be the Device.id property
         forced_overwrite: boolean
             False
             Force data export in case of already existing file
         file_format: String
             'csv'
             File format to export. Current supported format CSV
     Returns
     ---------
         True if exported ok, False otherwise
     '''
     # Export device
     if file_format == 'csv':
         return export_csv_file(path,
                                str(self.id),
                                self.readings,
                                forced_overwrite=forced_overwrite)
     else:
         std_out('Not supported format', 'ERROR')
         return False
    def post_postprocessing_info(self):
        '''
            POST postprocessing info into the device in the SmartCitizen API
            Updates all the post info. Changes need to be made info the keys of the postprocessing_info outside of here

            # Example postprocessing_info:
            # {
            #   "updated_at": "2020-10-29T04:35:23Z",
            #   "postprocessing_blueprint": 'sck_21_gps',
            #   "hardware_id": "SCS20100",
            #   "latest_postprocessing": "2020-10-29T08:35:23Z"
            # }
        '''

        if 'SC_BEARER' not in environ:
            std_out('Cannot post without Auth Bearer', 'ERROR')
            return

        headers = {'Authorization':'Bearer ' + environ['SC_BEARER'], 'Content-type': 'application/json'}

        post_json = dumps(self.postprocessing_info)
        std_out(f'Posting post-processing info:\n {post_json}')
        response = patch(f'https://api.smartcitizen.me/v0/devices/{self.id}/', data = post_json, headers = headers)

        if response.status_code == 200 or response.status_code == 201:
            return True

        return False
def deconvolution(dataframe, **kwargs):
    """
    Calculates pollutant concentration for convolved metrics, such as NO2+O3.
    Needs convolved metric, and target pollutant sensitivities
    Parameters
    ----------
        source: string
            Name of convolved metric containing both pollutants (such as NO2+O3)
        base: string
            Name of one of the already deconvolved pollutants (for instance NO2)
        id: int 
            Sensor ID
        pollutant: string
            Pollutant name. Must be included in the corresponding LUTs for unit convertion and additional parameters:
            MOLECULAR_WEIGHTS, config._background_conc, CHANNEL_LUT
    Returns
    -------
        calculation of pollutant based on: 6.36 * sensitivity(working - zero_working)/(auxiliary - zero_auxiliary)
    """

    result = Series()
    baseline = Series()

    flag_error = False
    if 'source' not in kwargs: flag_error = True
    if 'base' not in kwargs: flag_error = True
    if 'id' not in kwargs: flag_error = True
    if 'pollutant' not in kwargs: flag_error = True

    if flag_error:
        std_out('Problem with input data', 'ERROR')
        return None

    sensitivity_1 = config.calibrations.loc[kwargs['id'], 'sensitivity_1']
    sensitivity_2 = config.calibrations.loc[kwargs['id'], 'sensitivity_2']
    target_1 = config.calibrations.loc[kwargs['id'], 'target_1']
    target_2 = config.calibrations.loc[kwargs['id'], 'target_2']
    nWA = config.calibrations.loc[kwargs['id'],
                                  'w_zero_current'] / config.calibrations.loc[
                                      kwargs['id'], 'aux_zero_current']

    if target_1 != kwargs['pollutant']:
        std_out(
            f"Sensor {kwargs['id']} doesn't coincide with calibration data",
            'ERROR')
        return None

    factor_unit_1 = get_units_convf(kwargs['pollutant'], from_units='ppm')
    factor_unit_2 = get_units_convf(kwargs['base'], from_units='ppm')

    result = factor_unit_1 * (
        config._alphadelta_pcb * dataframe[kwargs['source']] -
        dataframe[kwargs['base']] / factor_unit_2 *
        abs(sensitivity_2)) / abs(sensitivity_1)

    # Add Background concentration
    result += config._background_conc[kwargs['pollutant']]

    return result
    def add_content(self,
                    title,
                    figure=None,
                    text=None,
                    iframe=None,
                    show_title=True,
                    force=False):
        '''
            Adds content for the rendered flask template of the test. Content is a dict()
            which contains a key per title (replacing ' ' with '_') and the content in it.

            Parameters
            ----------
            title
                None
                Content title. Needs to not be None
            figure
                None
                matplotlib or similar figure that can be converted to base64
            text
                None
                Text to be converted to <p> html tag with additional inner html (jinja2 safe rendered)
            iframe
                None
                HTML iframe contanining anything
            show_title
                True
                show title in HTML <h3> tag
            force
                If already added content with this title

            Returns
            ----------
            True if content added, false otherwise

        '''

        title_cor = sub('\W|^(?=\d)', '_', title)

        if title_cor not in self.content or force:
            self.content[title_cor] = dict()

            if title is not None:
                self.content[title_cor]['title'] = title
            if figure is not None:
                self.content[title_cor]['image'] = to_png_b64(figure)
            if text is not None:
                self.content[title_cor]['text'] = text
            if iframe is not None:
                self.content[title_cor]['iframe'] = iframe

            self.content[title_cor]['show_title'] = show_title

            std_out('Item added', 'SUCCESS')
            return True

        else:
            std_out('Item not added as its already in content', 'ERROR')
            return False
 def add_device(self, device):
     '''
         Adds a device to the test. The device has to be an instance of 'scdata.device.Device'
     '''
     if device.id not in self.devices.keys():
         self.devices[device.id] = device
     else:
         std_out(f'Device {device.id} is duplicated', 'WARNING')
    def post_device_data(self, df, sensor_id, clean_na = 'drop'):
        '''
            POST data in the SmartCitizen API
            Parameters
            ----------
                df: pandas DataFrame
                    Contains data in a DataFrame format. 
                    Data is posted regardless the name of the dataframe
                    It uses the sensor id provided, not the name
                    Data is posted in UTC TZ so dataframe needs to have located 
                    timestamp
                sensor_id: int
                    The sensor id
                clean_na: string, optional
                    'drop'
                    'drop', 'fill'
            Returns
            -------
                True if the data was posted succesfully
        '''
        if 'SC_ADMIN_BEARER' not in environ:
            std_out('Cannot post without Auth Bearer', 'ERROR')
            return

        headers = {'Authorization':'Bearer ' + environ['SC_ADMIN_BEARER'], 'Content-type': 'application/json'}

        # Get sensor name
        sensor_name = list(df.columns)[0]
        # Clean df of nans
        df = clean(df, clean_na, how = 'all')

        # Process dataframe
        df['id'] = sensor_id
        df.index.name = 'recorded_at'
        df.rename(columns = {sensor_name: 'value'}, inplace = True)
        df.columns = MultiIndex.from_product([['sensors'], df.columns])
        j = (df.groupby('recorded_at', as_index = True)
                .apply(lambda x: x['sensors'][['value', 'id']].to_dict('r'))
        )

        # Prepare json post
        payload = {"data":[]}
        for item in j.index:
            payload["data"].append(
                {
                    "recorded_at": localise_date(item, 'UTC').strftime('%Y-%m-%dT%H:%M:%SZ'),
                    "sensors": j[item]
                }
            )

        payload_json = dumps(payload)

        response = post(f'https://api.smartcitizen.me/v0/devices/{self.id}/readings', data = payload_json, headers = headers)
        if response.status_code == 200 or response.status_code == 201:
            return True

        return False
    def get_device_added_at(self):

        if self.added_at is None:
            if self.get_device_json() is not None:
                self.added_at = self.devicejson['added_at']

        std_out('Device {} was added at {}'.format(self.id, self.added_at))

        return self.added_at
    def get_device_last_reading(self, update = False):

        if self.last_reading_at is None or update:
            if self.get_device_json(update) is not None:
                self.last_reading_at = self.devicejson['last_reading_at']

        std_out ('Device {} has last reading at {}'.format(self.id, self.last_reading_at))

        return self.last_reading_at
Exemple #16
0
    def del_metric(self, metricn=''):
        if 'metrics' not in vars(self): return
        if metricn in self.metrics: self.metrics.pop(metricn, None)
        if metricn in self.readings.columns: self.readings.__delitem__(metricn)

        if metricn not in self.readings and metricn not in self.metrics:
            std_out(f'Metric {metricn} removed from metrics', 'SUCCESS')
            return True
        return False
Exemple #17
0
 def check_existing(self, comment):
     l = []
     c = self.cron.find_comment(comment)
     for item in c: l.append(c)
     if l:
         std_out(f'{comment} already running')
         return True
     else:
         std_out(f'{comment} not running')
         return False
    def get_device_location(self):

        if self.location is None:
            latitude, longitude = self.get_device_lat_long()
            # Localize it
            self.location = tz_where.tzNameAt(latitude, longitude)

        std_out('Device {} timezone is {}'.format(self.id, self.location))

        return self.location
def prepare(self, measurand, inputs, options=dict()):
    """
    Prepares a test for a regression model
    Parameters
    ----------
        measurand: dict
            measurand = {'8019043': ['NO2']}
        inputs: dict
            inputs per device and reading
                inputs = {'devicename': ['reading-1', 'reading-2']}
        options: dict
            Options including data processing. Defaults in config._model_def_opt
    Returns
    -------
        df = pandas Dataframe
        measurand_name = string
    """

    options = dict_fmerge(options, config._model_def_opt)

    # Measurand
    measurand_device = list(measurand.keys())[0]
    measurand_metric = measurand[measurand_device][0]
    measurand_name = measurand[measurand_device][0] + '_' + measurand_device

    df = DataFrame()
    df[measurand_name] = self.devices[measurand_device].readings[
        measurand_metric]

    for input_device in inputs.keys():
        combined_df = self.combine(devices=[input_device],
                                   readings=inputs[input_device])
        df = df.combine_first(combined_df)

    if options['common_avg']:

        common_channels = inputs[list(inputs.keys())[0]]
        for input_device in inputs.keys():
            common_channels = list(
                set(common_channels).intersection(set(inputs[input_device])))
        std_out(f'Performing avg in common columns {common_channels}')
        for channel in common_channels:
            columns_list = [
                channel + '_' + device for device in list(inputs.keys())
            ]
            df[channel + '_AVG'] = df[columns_list].mean(axis=1)

        df = df.loc[:,
                    df.columns.str.contains("_AVG")
                    | df.columns.str.contains(measurand_name)]

    if options['clean_na'] is not None:
        df = clean(df, options['clean_na'], how='any')

    return df, measurand_name
    def __fill_metrics__(self):

        if self.hw_id in config.hardware_info:
            std_out('Hardware ID found in history', "SUCCESS")
            hw_info = config.hardware_info[self.hw_id]
        else:
            std_out(f"Hardware id: {self.hw_id} not found in hardware_info",
                    'ERROR')
            return False

        # Now go through sensor versions and parse them
        for version in hw_info.keys():

            from_date = hw_info[version]["from"]
            to_date = hw_info[version]["to"]

            for slot in hw_info[version]["ids"]:

                # Alphasense type
                if slot.startswith('AS'):

                    sensor_id = hw_info[version]["ids"][slot]
                    as_type = config._as_sensor_codes[sensor_id[0:3]]
                    pollutant = as_type[as_type.index('_') + 1:]
                    platform_sensor_id = config._platform_sensor_ids[pollutant]
                    # TODO - USE POLLUTANT OR PLATFORM SENSOR ID?
                    process = 'alphasense_803_04'

                    wen = f"ADC_{slot.strip('AS_')[:slot.index('_')]}_{slot.strip('AS_')[slot.index('_')+1]}"
                    aen = f"ADC_{slot.strip('AS_')[:slot.index('_')]}_{slot.strip('AS_')[slot.index('_')+2]}"

                    # metric_name = f'{pollutant}_V{version}_S{list(hw_info[version]["ids"]).index(slot)}'
                    metric_name = f'{pollutant}'

                    metric = {
                        metric_name: {
                            'process': process,
                            'desc':
                            f'Calculation of {pollutant} based on AAN 803-04',
                            'units': 'ppb',  # always for alphasense sensors,
                            'id': platform_sensor_id,
                            'post': True,
                            'kwargs': {
                                'from_date': from_date,
                                'to_date': to_date,
                                'id': sensor_id,
                                'we': wen,
                                'ae': aen,
                                't': 'EXT_TEMP',  # With external temperature?
                                'location': self.location
                            }
                        }
                    }

                self.add_metric(metric)
Exemple #21
0
def apply_regressor(dataframe, **kwargs):
    '''
	Applies a regressor model based on a pretrained model
	Parameters
    ----------
    	model: sklearn predictor
    		Model with .predict method
		options: dict
			Options for data preprocessing. Defaults in config.model_def_opt
		variables: dict
			variables dictionary with:
				{
				'measurand': {
								'measurand-device-name': ['measurand']
								},
					'inputs': {'input-device-names': ['input-1', 'input_2', 'input-3']
					 			}
					}
    Returns
    ----------
    pandas series containing the prediction
	'''

    inputs = list()
    for device in kwargs['variables']['inputs']:
        inputs = list(
            set(inputs).union(set(kwargs['variables']['inputs'][device])))

    try:
        inputdf = dataframe[inputs].copy()
        inputdf = inputdf.reindex(sorted(inputdf.columns), axis=1)
    except KeyError:
        std_out('Inputs not in dataframe', 'ERROR')
        pass
        return None

    if 'model' not in kwargs:
        std_out('Model not in inputs', 'ERROR')
    else:
        model = kwargs['model']

    if 'options' not in kwargs:
        options = config.model_def_opt
    else:
        options = dict_fmerge(config.model_def_opt, kwargs['options'])

    # Remove na
    inputdf = clean(inputdf, options['clean_na'], how='any')

    features = array(inputdf)
    result = DataFrame(model.predict(features)).set_index(inputdf.index)

    return result
    def __check_sensors__(self):
        remove_sensors = list()
        for sensor in self.sensors:
            if sensor not in self.readings.columns:
                remove_sensors.append(sensor)

        if remove_sensors != []:
            std_out(f'Removing sensors from device: {remove_sensors}',
                    'WARNING')
        for sensor_to_remove in remove_sensors:
            self.sensors.pop(sensor_to_remove, None)
        std_out(f'Device sensors after removal: {list(self.sensors.keys())}')
    def get_device_location(self, update = False):

        if self.location is None or update:
            latitude, longitude = self.get_device_lat_long(update)
            # Localize it
            
            if latitude is not None and longitude is not None:
                self.location = tz_where.tzNameAt(latitude, longitude)

        std_out ('Device {} timezone is {}'.format(self.id, self.location))

        return self.location
 def get_device_json(self, update = False):
     if self.devicejson is None or update:
         try:
             deviceR = get(self.API_BASE_URL + '{}/'.format(self.id))
             if deviceR.status_code == 200 or deviceR.status_code == 201:
                 self.devicejson = deviceR.json()
             else: 
                 std_out('API reported {}'.format(deviceR.status_code), 'ERROR')  
         except:
             std_out('Failed request. Probably no connection', 'ERROR')  
             pass                
     return self.devicejson
def dispersion_summary(self):
    self._dispersion_summary = dict()

    if self.dispersion_df is None:
        std_out('Perform dispersion analysis first!', 'ERROR')
        return None
    for channel in self.common_channels:
        if channel in config._dispersion['ignore_channels']: continue
        # Calculate
        self._dispersion_summary[channel] = self.dispersion_df[channel +
                                                               '_STD'].mean()

    return self._dispersion_summary
 def get_user_json_by_username(self):
     if self.userjson is None:
         try:
             userR = get(self.API_BASE_URL + '{}/'.format(self.username), headers = self.headers)
             print (userR)
             if userR.status_code == 200 or userR.status_code == 201:
                 self.userjson = userR.json()
             else: 
                 std_out('API reported {}'.format(userR.status_code), 'ERROR')  
         except:
             std_out('Failed request. Probably no connection', 'ERROR')  
             pass                
     return self.userjson
    def get_device_json(self):

        if self.devicejson is None:
            try:
                s = get(self.API_BASE_URL + f'codi_eoi={self.id}')
                if s.status_code == 200 or s.status_code == 201:
                    self.devicejson = read_csv(StringIO(s.content.decode('utf-8')))
                else: 
                    std_out('API reported {}'.format(s.status_code), 'ERROR')  
            except:
                std_out('Failed request. Probably no connection', 'ERROR')  
                pass
        
        return self.devicejson
Exemple #28
0
    def set_descriptor_attrs(self):

        # Descriptor attributes
        for ditem in self.description.keys():
            if ditem not in vars(self):
                std_out(f'Ignoring {ditem} from input')
                continue
            if type(self.__getattribute__(ditem)) == dict:
                self.__setattr__(
                    ditem,
                    dict_fmerge(self.__getattribute__(ditem),
                                self.description[ditem]))
            else:
                self.__setattr__(ditem, self.description[ditem])
Exemple #29
0
def dprocess(device, dryrun=False):
    '''
        This function processes a device from SC API assuming there
        is postprocessing information in it and that it's valid for doing
        so 
    '''
    std_out(f'[CHUPIFLOW] Processing instance for device {device}')
    # Create device from SC API
    d = Device(descriptor={'source': 'api', 'id': f'{device}'})
    if d.validate():
        # Load only unprocessed
        if d.load(only_unprocessed=True, max_amount=config._max_load_amount):
            # Process it
            d.process()
            # Post results
            d.post_metrics(dry_run=dry_run,
                           max_retries=config._max_forward_retries)
            # Forward it if requested
            if d.forwarding_request is not None:
                std_out(f'[CHUPIFLOW] Forwarding {device}')
                d.forward(dry_run=dry_run,
                          max_retries=config._max_forward_retries)
            d.update_postprocessing(dry_run=dry_run)
    else:
        std_out(f'[CHUPIFLOW] Device {device} not valid', 'ERROR')
    std_out(f'[CHUPIFLOW] Concluded job for {device}')
    def process(self, only_new=False):
        ''' 
        Calculates all the metrics in each of the devices
        Returns True if done OK
        '''
        process_ok = True
        for device in self.devices:
            process_ok &= self.devices[device].process(only_new=only_new)

        # Cosmetic output
        if process_ok: std_out(f'Test {self.full_name} processed', 'SUCCESS')
        else: std_out(f'Test {self.full_name} not processed', 'ERROR')

        return process_ok