def channel_names(dataframe, **kwargs): """ Gets a named channel related to a generic ADC channel. For instance, an ADC with I2C address 48 has three channels: 0, 1, 2 and 3. 48_0 could corresponde to the working electrode of a CO sensor, so the channel ADC_48_0 would be copied over to a new channel named CO_WE. for the ADC_48_1, the CO auxiliary electrode it would be copied to CO_AE. The naming convention is specified in the hardware folder Parameters ---------- channel: string Channel name, i.e. ADC_48_0 Returns ------- Channel named accordingly """ # Check inputs flag_error = False if 'channel' not in kwargs: flag_error = True if kwargs['channel'] not in dataframe: std_out(f"Channel {kwargs['channel']} not in dataframe. Ignoring", 'WARNING') return None if flag_error: std_out('Problem with input data', 'ERROR') return None # Make copy df = dataframe.copy() return df[kwargs['channel']]
def to_csv(self, path=None, forced_overwrite=False): """ Exports devices in test to desired path Parameters ---------- path: string None The path (directory) to export the csv(s) into. If None, exports to test_path/processed/ forced_overwrite: boolean False To overwrite existing files Returns ------- True if export successul """ export_ok = True if path is None: epath = join(self.path, 'processed') else: epath = path # Export to csv for device in self.devices.keys(): export_ok &= self.devices[device].export( epath, forced_overwrite=forced_overwrite) if export_ok: std_out(f'Test {self.full_name} exported successfully', 'SUCCESS') else: std_out(f'Test {self.full_name} not exported successfully', 'ERROR') return export_ok
def fill_template(individual_descriptor, descriptor_file_name, upload_type='dataset'): # Open base template with all keys if upload_type == 'dataset': template_file_name = 'template_zenodo_dataset' elif upload_type == 'publication': template_file_name = 'template_zenodo_publication' with open(join('zenodo_templates', f'{template_file_name}.json'), 'r') as template_file: template = json.load(template_file) filled_template = template # Fill it up for each key for key in individual_descriptor.keys(): value = individual_descriptor[key] if key in filled_template['metadata'].keys(): filled_template['metadata'][key] = value with open( join(config.paths['dataDirectory'], 'uploads', descriptor_file_name), 'w') as descriptor_json: json.dump(filled_template, descriptor_json, ensure_ascii=True) std_out(f'Created descriptor file for {descriptor_file_name}', 'SUCCESS') return json.dumps(filled_template)
def __update_descriptor__(self): if self.descriptor == {}: self.std_out('No descriptor file to update') for field in self._default_fields: if field not in self.descriptor.keys(): self.descriptor[field] = self._default_fields[field] # Add details to descriptor, or update them if there is anything in details for detail in self.details.keys(): self.descriptor[detail] = self.details[detail] # Add devices to descriptor for device_name in self.devices.keys(): device = self.devices[device_name] if device.source == 'csv': device.processed_data_file = self.full_name + '_' + str( device.id) + '.csv' dvars = vars(device).copy() for discvar in config._discvars: if discvar in dvars: dvars.pop(discvar) self.descriptor['devices'][device.id] = dvars # Create yaml with test description with open(join(self.path, 'test_description.yaml'), 'w') as yaml_file: yaml.dump(self.descriptor, yaml_file) std_out('Descriptor file updated')
def export_csv_file(path, file_name, df, forced_overwrite=False): ''' Exports pandas dataframe to a csv file Parameters ---------- path: String Directory path file_name: String File name for the resulting csv df: pandas.DataFrame Dataframe to export forced_overwrite: boolean False If file exists, overwrite it or not Returns --------- True if exported, False if not (if file exists returns False) ''' # If path does not exist, create it if not exists(path): makedirs(path) # If file does not exist if not exists(path + '/' + str(file_name) + '.csv') or forced_overwrite: df.to_csv(path + '/' + str(file_name) + '.csv', sep=",") std_out('File saved to: \n' + path + '/' + str(file_name) + '.csv', 'SUCCESS') else: std_out( "File Already exists - delete it first, I was not asked to overwrite anything!", 'ERROR') return False return True
def post(self, with_post_info=True): ''' Posts devices metrics. Only available for parent of ScApiDevice Parameters ---------- with_post_info: boolean Default True Add the post info to the package Returns ---------- boolean True if posted ok, False otherwise ''' post_ok = True if self.sources[self.source]['handler'] != 'ScApiDevice': std_out('Only supported processing post is to SmartCitizen API', 'ERROR') return False for metric in self.metrics: if self.metrics[metric]['post'] == True: # Get single series for post df = DataFrame(self.readings[metric]) sensor_id = self.metrics[metric]['id'] post_ok &= self.api_device.post_device_data( df, sensor_id=sensor_id) # Post info if requested. It should be updated elsewhere if with_post_info: self.api_device.post_postprocessing_info() return post_ok
def load_postprocessing_info(self): if self.source != 'api': return None if self.sources[self.source]['handler'] != 'ScApiDevice': return None # Request to get postprocessing information if self.api_device.get_postprocessing_info() is None: return None # Put it where it goes self.hw_id = self.api_device.postprocessing_info['hardware_id'] self.hw_updated_at = self.api_device.postprocessing_info['updated_at'] self.hw_post_blueprint = self.api_device.postprocessing_info[ 'postprocessing_blueprint'] self.latest_postprocessing = self.api_device.postprocessing_info[ 'latest_postprocessing'] # Use postprocessing info blueprint if self.hw_post_blueprint in config.blueprints.keys(): std_out( f'Using hardware postprocessing blueprint: {self.hw_post_blueprint}' ) self.blueprint = self.hw_post_blueprint return self.api_device.postprocessing_info
def export(self, path, forced_overwrite=False, file_format='csv'): ''' Exports Device.readings to file Parameters ---------- path: String Path to export file to, does not include filename. The filename will be the Device.id property forced_overwrite: boolean False Force data export in case of already existing file file_format: String 'csv' File format to export. Current supported format CSV Returns --------- True if exported ok, False otherwise ''' # Export device if file_format == 'csv': return export_csv_file(path, str(self.id), self.readings, forced_overwrite=forced_overwrite) else: std_out('Not supported format', 'ERROR') return False
def post_postprocessing_info(self): ''' POST postprocessing info into the device in the SmartCitizen API Updates all the post info. Changes need to be made info the keys of the postprocessing_info outside of here # Example postprocessing_info: # { # "updated_at": "2020-10-29T04:35:23Z", # "postprocessing_blueprint": 'sck_21_gps', # "hardware_id": "SCS20100", # "latest_postprocessing": "2020-10-29T08:35:23Z" # } ''' if 'SC_BEARER' not in environ: std_out('Cannot post without Auth Bearer', 'ERROR') return headers = {'Authorization':'Bearer ' + environ['SC_BEARER'], 'Content-type': 'application/json'} post_json = dumps(self.postprocessing_info) std_out(f'Posting post-processing info:\n {post_json}') response = patch(f'https://api.smartcitizen.me/v0/devices/{self.id}/', data = post_json, headers = headers) if response.status_code == 200 or response.status_code == 201: return True return False
def deconvolution(dataframe, **kwargs): """ Calculates pollutant concentration for convolved metrics, such as NO2+O3. Needs convolved metric, and target pollutant sensitivities Parameters ---------- source: string Name of convolved metric containing both pollutants (such as NO2+O3) base: string Name of one of the already deconvolved pollutants (for instance NO2) id: int Sensor ID pollutant: string Pollutant name. Must be included in the corresponding LUTs for unit convertion and additional parameters: MOLECULAR_WEIGHTS, config._background_conc, CHANNEL_LUT Returns ------- calculation of pollutant based on: 6.36 * sensitivity(working - zero_working)/(auxiliary - zero_auxiliary) """ result = Series() baseline = Series() flag_error = False if 'source' not in kwargs: flag_error = True if 'base' not in kwargs: flag_error = True if 'id' not in kwargs: flag_error = True if 'pollutant' not in kwargs: flag_error = True if flag_error: std_out('Problem with input data', 'ERROR') return None sensitivity_1 = config.calibrations.loc[kwargs['id'], 'sensitivity_1'] sensitivity_2 = config.calibrations.loc[kwargs['id'], 'sensitivity_2'] target_1 = config.calibrations.loc[kwargs['id'], 'target_1'] target_2 = config.calibrations.loc[kwargs['id'], 'target_2'] nWA = config.calibrations.loc[kwargs['id'], 'w_zero_current'] / config.calibrations.loc[ kwargs['id'], 'aux_zero_current'] if target_1 != kwargs['pollutant']: std_out( f"Sensor {kwargs['id']} doesn't coincide with calibration data", 'ERROR') return None factor_unit_1 = get_units_convf(kwargs['pollutant'], from_units='ppm') factor_unit_2 = get_units_convf(kwargs['base'], from_units='ppm') result = factor_unit_1 * ( config._alphadelta_pcb * dataframe[kwargs['source']] - dataframe[kwargs['base']] / factor_unit_2 * abs(sensitivity_2)) / abs(sensitivity_1) # Add Background concentration result += config._background_conc[kwargs['pollutant']] return result
def add_content(self, title, figure=None, text=None, iframe=None, show_title=True, force=False): ''' Adds content for the rendered flask template of the test. Content is a dict() which contains a key per title (replacing ' ' with '_') and the content in it. Parameters ---------- title None Content title. Needs to not be None figure None matplotlib or similar figure that can be converted to base64 text None Text to be converted to <p> html tag with additional inner html (jinja2 safe rendered) iframe None HTML iframe contanining anything show_title True show title in HTML <h3> tag force If already added content with this title Returns ---------- True if content added, false otherwise ''' title_cor = sub('\W|^(?=\d)', '_', title) if title_cor not in self.content or force: self.content[title_cor] = dict() if title is not None: self.content[title_cor]['title'] = title if figure is not None: self.content[title_cor]['image'] = to_png_b64(figure) if text is not None: self.content[title_cor]['text'] = text if iframe is not None: self.content[title_cor]['iframe'] = iframe self.content[title_cor]['show_title'] = show_title std_out('Item added', 'SUCCESS') return True else: std_out('Item not added as its already in content', 'ERROR') return False
def add_device(self, device): ''' Adds a device to the test. The device has to be an instance of 'scdata.device.Device' ''' if device.id not in self.devices.keys(): self.devices[device.id] = device else: std_out(f'Device {device.id} is duplicated', 'WARNING')
def post_device_data(self, df, sensor_id, clean_na = 'drop'): ''' POST data in the SmartCitizen API Parameters ---------- df: pandas DataFrame Contains data in a DataFrame format. Data is posted regardless the name of the dataframe It uses the sensor id provided, not the name Data is posted in UTC TZ so dataframe needs to have located timestamp sensor_id: int The sensor id clean_na: string, optional 'drop' 'drop', 'fill' Returns ------- True if the data was posted succesfully ''' if 'SC_ADMIN_BEARER' not in environ: std_out('Cannot post without Auth Bearer', 'ERROR') return headers = {'Authorization':'Bearer ' + environ['SC_ADMIN_BEARER'], 'Content-type': 'application/json'} # Get sensor name sensor_name = list(df.columns)[0] # Clean df of nans df = clean(df, clean_na, how = 'all') # Process dataframe df['id'] = sensor_id df.index.name = 'recorded_at' df.rename(columns = {sensor_name: 'value'}, inplace = True) df.columns = MultiIndex.from_product([['sensors'], df.columns]) j = (df.groupby('recorded_at', as_index = True) .apply(lambda x: x['sensors'][['value', 'id']].to_dict('r')) ) # Prepare json post payload = {"data":[]} for item in j.index: payload["data"].append( { "recorded_at": localise_date(item, 'UTC').strftime('%Y-%m-%dT%H:%M:%SZ'), "sensors": j[item] } ) payload_json = dumps(payload) response = post(f'https://api.smartcitizen.me/v0/devices/{self.id}/readings', data = payload_json, headers = headers) if response.status_code == 200 or response.status_code == 201: return True return False
def get_device_added_at(self): if self.added_at is None: if self.get_device_json() is not None: self.added_at = self.devicejson['added_at'] std_out('Device {} was added at {}'.format(self.id, self.added_at)) return self.added_at
def get_device_last_reading(self, update = False): if self.last_reading_at is None or update: if self.get_device_json(update) is not None: self.last_reading_at = self.devicejson['last_reading_at'] std_out ('Device {} has last reading at {}'.format(self.id, self.last_reading_at)) return self.last_reading_at
def del_metric(self, metricn=''): if 'metrics' not in vars(self): return if metricn in self.metrics: self.metrics.pop(metricn, None) if metricn in self.readings.columns: self.readings.__delitem__(metricn) if metricn not in self.readings and metricn not in self.metrics: std_out(f'Metric {metricn} removed from metrics', 'SUCCESS') return True return False
def check_existing(self, comment): l = [] c = self.cron.find_comment(comment) for item in c: l.append(c) if l: std_out(f'{comment} already running') return True else: std_out(f'{comment} not running') return False
def get_device_location(self): if self.location is None: latitude, longitude = self.get_device_lat_long() # Localize it self.location = tz_where.tzNameAt(latitude, longitude) std_out('Device {} timezone is {}'.format(self.id, self.location)) return self.location
def prepare(self, measurand, inputs, options=dict()): """ Prepares a test for a regression model Parameters ---------- measurand: dict measurand = {'8019043': ['NO2']} inputs: dict inputs per device and reading inputs = {'devicename': ['reading-1', 'reading-2']} options: dict Options including data processing. Defaults in config._model_def_opt Returns ------- df = pandas Dataframe measurand_name = string """ options = dict_fmerge(options, config._model_def_opt) # Measurand measurand_device = list(measurand.keys())[0] measurand_metric = measurand[measurand_device][0] measurand_name = measurand[measurand_device][0] + '_' + measurand_device df = DataFrame() df[measurand_name] = self.devices[measurand_device].readings[ measurand_metric] for input_device in inputs.keys(): combined_df = self.combine(devices=[input_device], readings=inputs[input_device]) df = df.combine_first(combined_df) if options['common_avg']: common_channels = inputs[list(inputs.keys())[0]] for input_device in inputs.keys(): common_channels = list( set(common_channels).intersection(set(inputs[input_device]))) std_out(f'Performing avg in common columns {common_channels}') for channel in common_channels: columns_list = [ channel + '_' + device for device in list(inputs.keys()) ] df[channel + '_AVG'] = df[columns_list].mean(axis=1) df = df.loc[:, df.columns.str.contains("_AVG") | df.columns.str.contains(measurand_name)] if options['clean_na'] is not None: df = clean(df, options['clean_na'], how='any') return df, measurand_name
def __fill_metrics__(self): if self.hw_id in config.hardware_info: std_out('Hardware ID found in history', "SUCCESS") hw_info = config.hardware_info[self.hw_id] else: std_out(f"Hardware id: {self.hw_id} not found in hardware_info", 'ERROR') return False # Now go through sensor versions and parse them for version in hw_info.keys(): from_date = hw_info[version]["from"] to_date = hw_info[version]["to"] for slot in hw_info[version]["ids"]: # Alphasense type if slot.startswith('AS'): sensor_id = hw_info[version]["ids"][slot] as_type = config._as_sensor_codes[sensor_id[0:3]] pollutant = as_type[as_type.index('_') + 1:] platform_sensor_id = config._platform_sensor_ids[pollutant] # TODO - USE POLLUTANT OR PLATFORM SENSOR ID? process = 'alphasense_803_04' wen = f"ADC_{slot.strip('AS_')[:slot.index('_')]}_{slot.strip('AS_')[slot.index('_')+1]}" aen = f"ADC_{slot.strip('AS_')[:slot.index('_')]}_{slot.strip('AS_')[slot.index('_')+2]}" # metric_name = f'{pollutant}_V{version}_S{list(hw_info[version]["ids"]).index(slot)}' metric_name = f'{pollutant}' metric = { metric_name: { 'process': process, 'desc': f'Calculation of {pollutant} based on AAN 803-04', 'units': 'ppb', # always for alphasense sensors, 'id': platform_sensor_id, 'post': True, 'kwargs': { 'from_date': from_date, 'to_date': to_date, 'id': sensor_id, 'we': wen, 'ae': aen, 't': 'EXT_TEMP', # With external temperature? 'location': self.location } } } self.add_metric(metric)
def apply_regressor(dataframe, **kwargs): ''' Applies a regressor model based on a pretrained model Parameters ---------- model: sklearn predictor Model with .predict method options: dict Options for data preprocessing. Defaults in config.model_def_opt variables: dict variables dictionary with: { 'measurand': { 'measurand-device-name': ['measurand'] }, 'inputs': {'input-device-names': ['input-1', 'input_2', 'input-3'] } } Returns ---------- pandas series containing the prediction ''' inputs = list() for device in kwargs['variables']['inputs']: inputs = list( set(inputs).union(set(kwargs['variables']['inputs'][device]))) try: inputdf = dataframe[inputs].copy() inputdf = inputdf.reindex(sorted(inputdf.columns), axis=1) except KeyError: std_out('Inputs not in dataframe', 'ERROR') pass return None if 'model' not in kwargs: std_out('Model not in inputs', 'ERROR') else: model = kwargs['model'] if 'options' not in kwargs: options = config.model_def_opt else: options = dict_fmerge(config.model_def_opt, kwargs['options']) # Remove na inputdf = clean(inputdf, options['clean_na'], how='any') features = array(inputdf) result = DataFrame(model.predict(features)).set_index(inputdf.index) return result
def __check_sensors__(self): remove_sensors = list() for sensor in self.sensors: if sensor not in self.readings.columns: remove_sensors.append(sensor) if remove_sensors != []: std_out(f'Removing sensors from device: {remove_sensors}', 'WARNING') for sensor_to_remove in remove_sensors: self.sensors.pop(sensor_to_remove, None) std_out(f'Device sensors after removal: {list(self.sensors.keys())}')
def get_device_location(self, update = False): if self.location is None or update: latitude, longitude = self.get_device_lat_long(update) # Localize it if latitude is not None and longitude is not None: self.location = tz_where.tzNameAt(latitude, longitude) std_out ('Device {} timezone is {}'.format(self.id, self.location)) return self.location
def get_device_json(self, update = False): if self.devicejson is None or update: try: deviceR = get(self.API_BASE_URL + '{}/'.format(self.id)) if deviceR.status_code == 200 or deviceR.status_code == 201: self.devicejson = deviceR.json() else: std_out('API reported {}'.format(deviceR.status_code), 'ERROR') except: std_out('Failed request. Probably no connection', 'ERROR') pass return self.devicejson
def dispersion_summary(self): self._dispersion_summary = dict() if self.dispersion_df is None: std_out('Perform dispersion analysis first!', 'ERROR') return None for channel in self.common_channels: if channel in config._dispersion['ignore_channels']: continue # Calculate self._dispersion_summary[channel] = self.dispersion_df[channel + '_STD'].mean() return self._dispersion_summary
def get_user_json_by_username(self): if self.userjson is None: try: userR = get(self.API_BASE_URL + '{}/'.format(self.username), headers = self.headers) print (userR) if userR.status_code == 200 or userR.status_code == 201: self.userjson = userR.json() else: std_out('API reported {}'.format(userR.status_code), 'ERROR') except: std_out('Failed request. Probably no connection', 'ERROR') pass return self.userjson
def get_device_json(self): if self.devicejson is None: try: s = get(self.API_BASE_URL + f'codi_eoi={self.id}') if s.status_code == 200 or s.status_code == 201: self.devicejson = read_csv(StringIO(s.content.decode('utf-8'))) else: std_out('API reported {}'.format(s.status_code), 'ERROR') except: std_out('Failed request. Probably no connection', 'ERROR') pass return self.devicejson
def set_descriptor_attrs(self): # Descriptor attributes for ditem in self.description.keys(): if ditem not in vars(self): std_out(f'Ignoring {ditem} from input') continue if type(self.__getattribute__(ditem)) == dict: self.__setattr__( ditem, dict_fmerge(self.__getattribute__(ditem), self.description[ditem])) else: self.__setattr__(ditem, self.description[ditem])
def dprocess(device, dryrun=False): ''' This function processes a device from SC API assuming there is postprocessing information in it and that it's valid for doing so ''' std_out(f'[CHUPIFLOW] Processing instance for device {device}') # Create device from SC API d = Device(descriptor={'source': 'api', 'id': f'{device}'}) if d.validate(): # Load only unprocessed if d.load(only_unprocessed=True, max_amount=config._max_load_amount): # Process it d.process() # Post results d.post_metrics(dry_run=dry_run, max_retries=config._max_forward_retries) # Forward it if requested if d.forwarding_request is not None: std_out(f'[CHUPIFLOW] Forwarding {device}') d.forward(dry_run=dry_run, max_retries=config._max_forward_retries) d.update_postprocessing(dry_run=dry_run) else: std_out(f'[CHUPIFLOW] Device {device} not valid', 'ERROR') std_out(f'[CHUPIFLOW] Concluded job for {device}')
def process(self, only_new=False): ''' Calculates all the metrics in each of the devices Returns True if done OK ''' process_ok = True for device in self.devices: process_ok &= self.devices[device].process(only_new=only_new) # Cosmetic output if process_ok: std_out(f'Test {self.full_name} processed', 'SUCCESS') else: std_out(f'Test {self.full_name} not processed', 'ERROR') return process_ok