def start_publishing(self, message): """This method will enable delivery confirmations and schedule the first message to be sent to RabbitMQ """ LOGGER.info('Issuing consumer related RPC commands') self._enable_delivery_confirmations() if self._channel is None or not self._channel.is_open: return # Sem canal aberto #hdrs = {u'مفتاح': u' قيمة', u'键': u'值', u'キー': u'値'} #message = u'مفتاح قيمة 键 值 キー 値' #Formatando de forma adequada a mensagem: message = dict(data=message, _id=str(uuid4())) LOGGER.info("Mensagem para dispachar: %s" % (message, )) properties = pika.BasicProperties( app_id='example-publisher', content_type='application/json') #, headers=hdrs self._channel.basic_publish(self._server.EXCHANGE_CORE_TO_SENDER, self._server.ROUTING_KEY, json.dumps(message), properties) self._message_number += 1 self._deliveries.append(self._message_number) LOGGER.info('Mensagem Publicada # %i', self._message_number)
def push(self, message, *args, **kwargs): LOGGER.info("Publicando: {}".format(message)) try: self._channel.basic_publish(exchange=self._exchange, routing_key=self._routing_key, body=json.dumps(message)) except Exception as e: print(repr(e)) traceback.print_exc() raise e
def wrapper(*args, **kwargs): dataset = func(*args, **kwargs) raw_dataframe = dataset['raw_dataframe'] dataset['dataframe_info'] = json.loads( json.dumps(stats.extract_dataframe_info(raw_dataframe))) if 'feature_column_labels' not in dataset: dataset['feature_column_labels'] = raw_dataframe.columns.drop( dataset['target_column_label']) if 'MSE_baseline' not in dataset: y = raw_dataframe[dataset['target_column_label']] dataset['MSE_baseline'] = ((y - y.mean())**2).mean().compute() per_column_statistic = dataset['dataframe_info'][ 'per_column_statistic'] dataset['columns_info'] = { column_id: cloudsml.models.BaseDataTransformationColumn( id=column_id, name=column_name, statistics=per_column_statistic[column_name], data_type=per_column_statistic[column_name]['type'], data_format=per_column_statistic[column_name]['format']) for column_id, column_name in zip( sorted(random.sample(range(100000), len(raw_dataframe.columns)) ), dataset['dataframe_info']['columns']) } dataset['columns_info_by_name'] = { column.name: column for column in dataset['columns_info'].values() } dataframe = raw_dataframe.rename(columns={ column.name: column.id for column in dataset['columns_info'].values() }) dataset['dataframe'] = dataframe dataset['target_column_id'] = dataset['columns_info_by_name'][ dataset['target_column_label']].id dataset['feature_column_ids'] = dataset['dataframe'].columns.drop( dataset['target_column_id']).values.tolist() return dataset
def get_inverters(): try: url = ENVOY_URL+'/inventory.json' logger.info('Getting data from [%s]...' % url) resp = requests.get(url, auth=HTTPDigestAuth(ENVOY_USERNAME, ENVOY_PASSWORD), timeout=9) devices = False if resp.status_code == 200: #Count active inverters devices = json.loads('{ "inverters": ' + resp.text + ' }') else: logger.error('Failed to get data from [%s]. Error code [%i]' % (url, resp.status_code)) url = ENVOY_URL+'/api/v1/production/inverters' logger.info('Getting data from [%s]...' % url) resp = requests.get(url, auth=HTTPDigestAuth(ENVOY_USERNAME, ENVOY_PASSWORD), timeout=9) if resp.status_code == 200: if devices: readings = json.loads('{ "readings": ' + resp.text + ' }') for device in devices['inverters'][0]['devices']: #Match reading for reading in readings['readings']: if str(reading['serialNumber']) == device['serial_num']: device['serialNumber'] = reading['serialNumber'] device['lastReportDate'] = reading['lastReportDate'] device['lastReportWatts'] = reading['lastReportWatts'] device['maxReportWatts'] = reading['maxReportWatts'] break else: devices = readings data = '{ "inverters": %s }' % json.dumps(devices['inverters'][0]['devices']) redis.set_state(REDIS_INVERTER_DATA, data) write_influxdb_inverter_data.delay(dates.to_string(dates.utcnow()), data) else: logger.error('Failed to insert new data. Error code [%i]' % resp.status_code) except: logger.error('Failed to insert new data.') logger.exception('message')
def extract_dataset_info(dataset_url, data_transformation_id, dataset_transformations=None): """ Select reader function. Data loaded in dask DataFrame. Retrieve info from dask DataFrame """ from app.extensions import cloudsml data_transformation = cloudsml.data_api.get_data_transformation_by_id( data_transformation_id) if data_transformation.transformation_type == 'blank': columns_info = None elif data_transformation.transformation_type == 'add_column': columns_info = cloudsml.data_api.get_data_transformation_columns( data_transformation_id, initial=True, # XXX: We must read the list in batches when the number of columns exceeds 1000 limit=1000) else: raise NotImplementedError( "Transformation type '%s' cannot be handled" % data_transformation.transformation_type) dataset_df = dask_universal_read(dataset_url, columns_info=columns_info) if dataset_transformations: dataset_df = transform_dask_dataframe( dataset_df, fetch_data_transformations_by_id_in_pfa(dataset_transformations)) if data_transformation.transformation_type == 'add_column': dataset_df = dataset_df[['dt%s' % (data_transformation.id)]] dataframe_info = extract_dataframe_info(dataset_df) per_column_statistic = dataframe_info['per_column_statistic'] for index, column_name in enumerate(dataset_df.columns): assert column_name in per_column_statistic if data_transformation.transformation_type == 'blank': column_title = column_name else: if len(per_column_statistic) == 1: column_title = data_transformation.name else: column_name = '%s__%d' % (column_name, index) column_title = '%s #%d' % (data_transformation.name, index) cloudsml.data_api.create_data_transformation_column( data_transformation_id, name=column_name, title=column_title, statistics=json.dumps(per_column_statistic[column_name]), data_type=per_column_statistic[column_name]['type'].name, data_format=per_column_statistic[column_name]['format'].name, ) cloudsml.data_api.patch_data_transformation_by_id( data_transformation_id, [{ 'op': 'replace', 'path': '/status', 'value': 'ready' }, { 'op': 'replace', 'path': '/rows_count', 'value': dataframe_info['rows_count'] }])
def toJSON(self): return json.dumps( map(lambda x: x.toDict(), self.asSortedList()), separators=(',', ':'), )
def stream(): result = redis.get_state(redis.REDIS_METER_DATA, False) if not result: return json.dumps({'records': 0}) else: return result
def inverter(): result = redis.get_state(redis.REDIS_INVERTER_DATA, False) if not result: return json.dumps({'records': 0}) else: return result
def test_serialization(input_object, expected_output): assert json.dumps(input_object) == expected_output
def build_predictive_model(learn_dataset_url, dataset_transformations, predictive_analysis_method_name, predictive_analysis_options, predictive_model_id): """ This function is building predictive model, saves it to SeaWeedFS and patching through API the predictive model by it's id, putting there a seaweedfs' id to model and to model info. Args: learn_dataset_url (str): link to a learn dataset. dataset_transformations (list): a list of data transformation ids of the learn dataset in PFA format. predictive_analysis_method_name (str): name of the predictive method that is requested to be used. predictive_analysis_options (dict): kwargs to predictive analysis method. predictive_model_id (int): id of the model in API to patch it """ log.info("New %s model #%d is going to be built...", predictive_analysis_method_name, predictive_model_id) initial_columns_info = cloudsml.data_api.get_data_transformation_columns( dataset_transformations[-1], initial=True, # XXX: We must read the list in batches when the number of columns exceeds 1000 limit=1000) learn_dataset_df = dask_universal_read(learn_dataset_url, columns_info=initial_columns_info) learn_dataset_df = transform_dask_dataframe( learn_dataset_df, fetch_data_transformations_by_id_in_pfa(dataset_transformations)) target_column_id = predictive_analysis_options['target_column_id'] feature_column_ids = predictive_analysis_options['feature_column_ids'] selected_columns_info = { column.id: column \ for column in cloudsml.data_api.get_data_transformation_columns( dataset_transformations[-1], id=([target_column_id] + feature_column_ids), # XXX: API server limits the maximum possible limit of columns per single # request at 1000 to avoid too long response times. Thus, we must implement # querying in the columns info in batches. Yet, this might be hidden behind # a convinient wrapper. limit=1000 ) } learn_dataset_df = learn_dataset_df[sorted(selected_columns_info.keys())] missing_values_encoder = missing_values_encoding.missing_values_encoder learn_dataset_df, missing_values_substitution_map = missing_values_encoder( learn_dataset_df, selected_columns_info) learn_dataset_df, selected_columns_info = one_hot_encoding.OneHotEncoder( categorical_columns_ids=predictive_analysis_options[ 'categorical_column_ids'], columns_info=selected_columns_info).update(learn_dataset_df, selected_columns_info) test_partition_ratio = predictive_analysis_options.get( 'test_partition_ratio', 0.4) test_learn_splitter = SplitSampling(test_partition_ratio, random_state=0) test_dataset_df, learn_dataset_df = test_learn_splitter.split( learn_dataset_df) predictive_analysis_method = PREDICTIVE_ANALYSIS_METHODS[ predictive_analysis_method_name] log.info('Model #%d is being fitted with data...', predictive_model_id) model = predictive_analysis_method(learn_dataset_df, columns_info=selected_columns_info, **predictive_analysis_options) log.info('Model #%d is being exported to PFA...', predictive_model_id) one_hot_pfa_decoder = OneHotPFADecoder({ column.id: column.virtual_columns \ for column in selected_columns_info.values() \ if hasattr(column, 'virtual_columns') }) missing_values_pfa_decoder = MissingValuesPFADecoder( missing_values_substitution_map) translated_model = missing_values_pfa_decoder.transform( one_hot_pfa_decoder.transform(model.to_pfa())) model_file_id = seaweedfs.upload_file(stream=json.dumps(translated_model), name='model_%s.pfa' % predictive_model_id) log.info('Model #%d information is being collected...', predictive_model_id) model_info = { 'learn': model.get_info(learn_dataset_df), } if test_partition_ratio > 0.0: model_info['test'] = model.get_info(test_dataset_df) model_info = ModelInfoShema().load({'performance_stats': model_info}).data model_info_id = seaweedfs.upload_file(stream=json.dumps(model_info), name='model_info_%s.json' % predictive_model_id) cloudsml.predictive_analytics_api.patch_predictive_model_by_id( predictive_model_id, [ { "op": "replace", "path": "/model_seaweed_id", "value": model_file_id }, { "op": "replace", "path": "/model_info_seaweed_id", "value": model_info_id }, { "op": "replace", "path": "/status", "value": "fitted" }, # TODO use constant here ])
def build_predictive_pipeline( pipeline_id, pipeline_options, learn_dataset_url, dataset_transformations, ): """ Reads ``pipeline_options`` to retrieve parameters for building a bunch of models. Updates Pipeline through the API to change the status and models_count """ models_count = (len(pipeline_options['target_column_ids']) * len(pipeline_options['predictive_analysis_options'])) pipeline = cloudsml.predictive_analytics_api.patch_pipeline_by_id( pipeline_id=pipeline_id, body=[{ 'op': 'replace', 'path': '/models_count', 'value': models_count }]) predictive_models_to_build = [] pipeline_model_index = 0 static_feature_column_ids = [ int(feature_column_id) \ for feature_column_id, feature_column_options in ( pipeline_options['features'].items() ) if feature_column_options['type'] == 'static' ] # TODO: extend the support for cross-validation test_partition_ratio = pipeline_options['testing_settings'][ 'test_partition_ratio'] for target_column_id in pipeline_options['target_column_ids']: for (method_name, method_parameters ) in pipeline_options['predictive_analysis_options'].items(): pipeline_model_index += 1 categorical_column_ids = ( set(pipeline_options['categorical_column_ids']) & set(static_feature_column_ids)) predictive_analysis_options = dict( target_column_id=target_column_id, # TODO: implement optional and random predictors selection. feature_column_ids=static_feature_column_ids, categorical_column_ids=categorical_column_ids, # TODO: extend the support for cross-validation test_partition_ratio=test_partition_ratio, method_parameters=method_parameters) predictive_model = cloudsml.predictive_analytics_api.fit_predictive_model( predictive_analysis_method=method_name, name='{pipeline_name} #{pipeline_model_index}'.format( pipeline_name=pipeline.name, pipeline_model_index=pipeline_model_index), predictive_analysis_options=json.dumps( predictive_analysis_options), data_transformation_id=dataset_transformations[-1], pipeline_id=pipeline_id) predictive_models_to_build.append({ 'predictive_analysis_options': predictive_analysis_options, 'predictive_analysis_method_name': method_name, 'predictive_model_id': predictive_model.id, }) for predictive_model_params in predictive_models_to_build: build_predictive_model(learn_dataset_url=learn_dataset_url, dataset_transformations=dataset_transformations, **predictive_model_params) cloudsml.predictive_analytics_api.patch_pipeline_by_id( pipeline_id=pipeline_id, body=[{ 'op': 'replace', 'path': '/status', 'value': 'ready' }])