Beispiel #1
0
    def start_publishing(self, message):
        """This method will enable delivery confirmations and schedule the
        first message to be sent to RabbitMQ

        """
        LOGGER.info('Issuing consumer related RPC commands')

        self._enable_delivery_confirmations()

        if self._channel is None or not self._channel.is_open:
            return  # Sem canal aberto

        #hdrs = {u'مفتاح': u' قيمة', u'键': u'值', u'キー': u'値'}
        #message = u'مفتاح قيمة 键 值 キー 値'

        #Formatando de forma adequada a mensagem:
        message = dict(data=message, _id=str(uuid4()))

        LOGGER.info("Mensagem para dispachar: %s" % (message, ))

        properties = pika.BasicProperties(
            app_id='example-publisher',
            content_type='application/json')  #, headers=hdrs

        self._channel.basic_publish(self._server.EXCHANGE_CORE_TO_SENDER,
                                    self._server.ROUTING_KEY,
                                    json.dumps(message), properties)
        self._message_number += 1
        self._deliveries.append(self._message_number)

        LOGGER.info('Mensagem Publicada # %i', self._message_number)
Beispiel #2
0
    def push(self, message, *args, **kwargs):
        LOGGER.info("Publicando: {}".format(message))

        try:

            self._channel.basic_publish(exchange=self._exchange,
                                        routing_key=self._routing_key,
                                        body=json.dumps(message))

        except Exception as e:
            print(repr(e))
            traceback.print_exc()
            raise e
Beispiel #3
0
    def wrapper(*args, **kwargs):
        dataset = func(*args, **kwargs)

        raw_dataframe = dataset['raw_dataframe']
        dataset['dataframe_info'] = json.loads(
            json.dumps(stats.extract_dataframe_info(raw_dataframe)))
        if 'feature_column_labels' not in dataset:
            dataset['feature_column_labels'] = raw_dataframe.columns.drop(
                dataset['target_column_label'])

        if 'MSE_baseline' not in dataset:
            y = raw_dataframe[dataset['target_column_label']]
            dataset['MSE_baseline'] = ((y - y.mean())**2).mean().compute()

        per_column_statistic = dataset['dataframe_info'][
            'per_column_statistic']
        dataset['columns_info'] = {
            column_id: cloudsml.models.BaseDataTransformationColumn(
                id=column_id,
                name=column_name,
                statistics=per_column_statistic[column_name],
                data_type=per_column_statistic[column_name]['type'],
                data_format=per_column_statistic[column_name]['format'])
            for column_id, column_name in zip(
                sorted(random.sample(range(100000), len(raw_dataframe.columns))
                       ), dataset['dataframe_info']['columns'])
        }
        dataset['columns_info_by_name'] = {
            column.name: column
            for column in dataset['columns_info'].values()
        }
        dataframe = raw_dataframe.rename(columns={
            column.name: column.id
            for column in dataset['columns_info'].values()
        })
        dataset['dataframe'] = dataframe
        dataset['target_column_id'] = dataset['columns_info_by_name'][
            dataset['target_column_label']].id
        dataset['feature_column_ids'] = dataset['dataframe'].columns.drop(
            dataset['target_column_id']).values.tolist()
        return dataset
Beispiel #4
0
def get_inverters():
    try:
        url = ENVOY_URL+'/inventory.json'
        logger.info('Getting data from [%s]...' % url)
        resp = requests.get(url, auth=HTTPDigestAuth(ENVOY_USERNAME, ENVOY_PASSWORD), timeout=9)
        devices = False
        if resp.status_code == 200:
            #Count active inverters
            devices = json.loads('{ "inverters": ' + resp.text + ' }')
        else:
            logger.error('Failed to get data from [%s]. Error code [%i]' % (url, resp.status_code))

        url = ENVOY_URL+'/api/v1/production/inverters'
        logger.info('Getting data from [%s]...' % url)
        resp = requests.get(url, auth=HTTPDigestAuth(ENVOY_USERNAME, ENVOY_PASSWORD), timeout=9)
        if resp.status_code == 200:
            if devices:
                readings = json.loads('{ "readings": ' + resp.text + ' }')
                for device in devices['inverters'][0]['devices']:
                    #Match reading
                    for reading in readings['readings']:
                        if str(reading['serialNumber']) == device['serial_num']:
                            device['serialNumber'] = reading['serialNumber']
                            device['lastReportDate'] = reading['lastReportDate']
                            device['lastReportWatts'] = reading['lastReportWatts']
                            device['maxReportWatts'] = reading['maxReportWatts']
                            break
            else:
                devices = readings

            data = '{ "inverters": %s }' % json.dumps(devices['inverters'][0]['devices'])
            redis.set_state(REDIS_INVERTER_DATA, data)
            write_influxdb_inverter_data.delay(dates.to_string(dates.utcnow()), data)
        else:
            logger.error('Failed to insert new data. Error code [%i]' % resp.status_code)
    except:
        logger.error('Failed to insert new data.')
        logger.exception('message')
Beispiel #5
0
def extract_dataset_info(dataset_url,
                         data_transformation_id,
                         dataset_transformations=None):
    """
    Select reader function. Data loaded in dask DataFrame.
    Retrieve info from dask DataFrame
    """
    from app.extensions import cloudsml
    data_transformation = cloudsml.data_api.get_data_transformation_by_id(
        data_transformation_id)
    if data_transformation.transformation_type == 'blank':
        columns_info = None
    elif data_transformation.transformation_type == 'add_column':
        columns_info = cloudsml.data_api.get_data_transformation_columns(
            data_transformation_id,
            initial=True,
            # XXX: We must read the list in batches when the number of columns exceeds 1000
            limit=1000)
    else:
        raise NotImplementedError(
            "Transformation type '%s' cannot be handled" %
            data_transformation.transformation_type)

    dataset_df = dask_universal_read(dataset_url, columns_info=columns_info)
    if dataset_transformations:
        dataset_df = transform_dask_dataframe(
            dataset_df,
            fetch_data_transformations_by_id_in_pfa(dataset_transformations))

    if data_transformation.transformation_type == 'add_column':
        dataset_df = dataset_df[['dt%s' % (data_transformation.id)]]

    dataframe_info = extract_dataframe_info(dataset_df)
    per_column_statistic = dataframe_info['per_column_statistic']

    for index, column_name in enumerate(dataset_df.columns):
        assert column_name in per_column_statistic
        if data_transformation.transformation_type == 'blank':
            column_title = column_name
        else:
            if len(per_column_statistic) == 1:
                column_title = data_transformation.name
            else:
                column_name = '%s__%d' % (column_name, index)
                column_title = '%s #%d' % (data_transformation.name, index)
        cloudsml.data_api.create_data_transformation_column(
            data_transformation_id,
            name=column_name,
            title=column_title,
            statistics=json.dumps(per_column_statistic[column_name]),
            data_type=per_column_statistic[column_name]['type'].name,
            data_format=per_column_statistic[column_name]['format'].name,
        )
    cloudsml.data_api.patch_data_transformation_by_id(
        data_transformation_id, [{
            'op': 'replace',
            'path': '/status',
            'value': 'ready'
        }, {
            'op': 'replace',
            'path': '/rows_count',
            'value': dataframe_info['rows_count']
        }])
Beispiel #6
0
 def toJSON(self):
     return json.dumps(
         map(lambda x: x.toDict(), self.asSortedList()),
         separators=(',', ':'),
     )
Beispiel #7
0
def stream():
    result = redis.get_state(redis.REDIS_METER_DATA, False)
    if not result:
        return json.dumps({'records': 0})
    else:
        return result
Beispiel #8
0
def inverter():
    result = redis.get_state(redis.REDIS_INVERTER_DATA, False)
    if not result:
        return json.dumps({'records': 0})
    else:
        return result
Beispiel #9
0
def test_serialization(input_object, expected_output):
    assert json.dumps(input_object) == expected_output
Beispiel #10
0
def build_predictive_model(learn_dataset_url, dataset_transformations,
                           predictive_analysis_method_name,
                           predictive_analysis_options, predictive_model_id):
    """
    This function is building predictive model, saves it to SeaWeedFS and patching through API
    the predictive model by it's id, putting there a seaweedfs' id to model and to model info.

    Args:
        learn_dataset_url (str): link to a learn dataset.
        dataset_transformations (list): a list of data transformation ids of
            the learn dataset in PFA format.
        predictive_analysis_method_name (str): name of the predictive method
            that is requested to be used.
        predictive_analysis_options (dict): kwargs to predictive analysis
            method.
        predictive_model_id (int): id of the model in API to patch it
    """
    log.info("New %s model #%d is going to be built...",
             predictive_analysis_method_name, predictive_model_id)

    initial_columns_info = cloudsml.data_api.get_data_transformation_columns(
        dataset_transformations[-1],
        initial=True,
        # XXX: We must read the list in batches when the number of columns exceeds 1000
        limit=1000)

    learn_dataset_df = dask_universal_read(learn_dataset_url,
                                           columns_info=initial_columns_info)
    learn_dataset_df = transform_dask_dataframe(
        learn_dataset_df,
        fetch_data_transformations_by_id_in_pfa(dataset_transformations))

    target_column_id = predictive_analysis_options['target_column_id']
    feature_column_ids = predictive_analysis_options['feature_column_ids']
    selected_columns_info = {
            column.id: column \
                for column in cloudsml.data_api.get_data_transformation_columns(
                        dataset_transformations[-1],
                        id=([target_column_id] + feature_column_ids),
                        # XXX: API server limits the maximum possible limit of columns per single
                        # request at 1000 to avoid too long response times. Thus, we must implement
                        # querying in the columns info in batches. Yet, this might be hidden behind
                        # a convinient wrapper.
                        limit=1000
                    )
        }
    learn_dataset_df = learn_dataset_df[sorted(selected_columns_info.keys())]

    missing_values_encoder = missing_values_encoding.missing_values_encoder
    learn_dataset_df, missing_values_substitution_map = missing_values_encoder(
        learn_dataset_df, selected_columns_info)

    learn_dataset_df, selected_columns_info = one_hot_encoding.OneHotEncoder(
        categorical_columns_ids=predictive_analysis_options[
            'categorical_column_ids'],
        columns_info=selected_columns_info).update(learn_dataset_df,
                                                   selected_columns_info)

    test_partition_ratio = predictive_analysis_options.get(
        'test_partition_ratio', 0.4)
    test_learn_splitter = SplitSampling(test_partition_ratio, random_state=0)
    test_dataset_df, learn_dataset_df = test_learn_splitter.split(
        learn_dataset_df)

    predictive_analysis_method = PREDICTIVE_ANALYSIS_METHODS[
        predictive_analysis_method_name]
    log.info('Model #%d is being fitted with data...', predictive_model_id)
    model = predictive_analysis_method(learn_dataset_df,
                                       columns_info=selected_columns_info,
                                       **predictive_analysis_options)

    log.info('Model #%d is being exported to PFA...', predictive_model_id)
    one_hot_pfa_decoder = OneHotPFADecoder({
            column.id: column.virtual_columns \
                for column in selected_columns_info.values() \
                    if hasattr(column, 'virtual_columns')
        })
    missing_values_pfa_decoder = MissingValuesPFADecoder(
        missing_values_substitution_map)

    translated_model = missing_values_pfa_decoder.transform(
        one_hot_pfa_decoder.transform(model.to_pfa()))

    model_file_id = seaweedfs.upload_file(stream=json.dumps(translated_model),
                                          name='model_%s.pfa' %
                                          predictive_model_id)

    log.info('Model #%d information is being collected...',
             predictive_model_id)
    model_info = {
        'learn': model.get_info(learn_dataset_df),
    }
    if test_partition_ratio > 0.0:
        model_info['test'] = model.get_info(test_dataset_df)

    model_info = ModelInfoShema().load({'performance_stats': model_info}).data

    model_info_id = seaweedfs.upload_file(stream=json.dumps(model_info),
                                          name='model_info_%s.json' %
                                          predictive_model_id)

    cloudsml.predictive_analytics_api.patch_predictive_model_by_id(
        predictive_model_id,
        [
            {
                "op": "replace",
                "path": "/model_seaweed_id",
                "value": model_file_id
            },
            {
                "op": "replace",
                "path": "/model_info_seaweed_id",
                "value": model_info_id
            },
            {
                "op": "replace",
                "path": "/status",
                "value": "fitted"
            },  # TODO use constant here
        ])
Beispiel #11
0
def build_predictive_pipeline(
    pipeline_id,
    pipeline_options,
    learn_dataset_url,
    dataset_transformations,
):
    """
    Reads ``pipeline_options`` to retrieve parameters for building a bunch of models.
    Updates Pipeline through the API to change the status and models_count
    """
    models_count = (len(pipeline_options['target_column_ids']) *
                    len(pipeline_options['predictive_analysis_options']))
    pipeline = cloudsml.predictive_analytics_api.patch_pipeline_by_id(
        pipeline_id=pipeline_id,
        body=[{
            'op': 'replace',
            'path': '/models_count',
            'value': models_count
        }])
    predictive_models_to_build = []
    pipeline_model_index = 0
    static_feature_column_ids = [
            int(feature_column_id) \
                for feature_column_id, feature_column_options in (
                        pipeline_options['features'].items()
                    ) if feature_column_options['type'] == 'static'
        ]
    # TODO: extend the support for cross-validation
    test_partition_ratio = pipeline_options['testing_settings'][
        'test_partition_ratio']
    for target_column_id in pipeline_options['target_column_ids']:
        for (method_name, method_parameters
             ) in pipeline_options['predictive_analysis_options'].items():
            pipeline_model_index += 1
            categorical_column_ids = (
                set(pipeline_options['categorical_column_ids'])
                & set(static_feature_column_ids))
            predictive_analysis_options = dict(
                target_column_id=target_column_id,
                # TODO: implement optional and random predictors selection.
                feature_column_ids=static_feature_column_ids,
                categorical_column_ids=categorical_column_ids,
                # TODO: extend the support for cross-validation
                test_partition_ratio=test_partition_ratio,
                method_parameters=method_parameters)
            predictive_model = cloudsml.predictive_analytics_api.fit_predictive_model(
                predictive_analysis_method=method_name,
                name='{pipeline_name} #{pipeline_model_index}'.format(
                    pipeline_name=pipeline.name,
                    pipeline_model_index=pipeline_model_index),
                predictive_analysis_options=json.dumps(
                    predictive_analysis_options),
                data_transformation_id=dataset_transformations[-1],
                pipeline_id=pipeline_id)
            predictive_models_to_build.append({
                'predictive_analysis_options':
                predictive_analysis_options,
                'predictive_analysis_method_name':
                method_name,
                'predictive_model_id':
                predictive_model.id,
            })

    for predictive_model_params in predictive_models_to_build:
        build_predictive_model(learn_dataset_url=learn_dataset_url,
                               dataset_transformations=dataset_transformations,
                               **predictive_model_params)

    cloudsml.predictive_analytics_api.patch_pipeline_by_id(
        pipeline_id=pipeline_id,
        body=[{
            'op': 'replace',
            'path': '/status',
            'value': 'ready'
        }])