Python stream_data 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: cd4ml.read_data

메소드/함수: stream_data

hotexamples.com에서의 예제들: 5

Python stream_data - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 cd4ml.read_data.stream_data에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: validate.py 프로젝트: visenger/CD4ML-Playground

def validate(pipeline_params, model, encoder, track, date_cutoff, max_date):
    target_name = 'unit_sales'
    validate_stream = (row for row in stream_data(pipeline_params)
                       if validate_filter(row, date_cutoff, max_date))

    encoded_validate_stream = encoder.encode_data_stream(validate_stream)
    validation_predictions = [float(model.predict([row]))
                              for row in encoded_validate_stream]

    target = [row[target_name] for row in stream_data(pipeline_params)
              if validate_filter(row, date_cutoff, max_date)]

    print("Calculating metrics")
    validation_metrics = {'r2_score': metrics.r2_score(
        y_true=target, y_pred=validation_predictions)}

    track.log_metrics(validation_metrics)
    fluentd_logger.log('validation_metrics', validation_metrics)

    write_predictions_and_score(validation_metrics)

    print("Evaluation done with metrics {}.".format(
        json.dumps(validation_metrics)))

    write_model(model)

    track.log_artifact(file_names['model'])

    make_validation_plot(target, validation_predictions, track)

예제 #2

파일 보기

파일: run_ml.py 프로젝트: wellbeing18/CD4ML-Scenarios

def run_ml_model(pipeline_params, encoder, track, date_cutoff, seed=None):
    target_name = 'unit_sales'

    train_stream = (row for row in stream_data(pipeline_params)
                    if train_filter(row, date_cutoff))
    encoded_train_stream = encoder.encode_data_stream(train_stream)

    print('Encoding data')
    # batch step, read it all in
    encoded_train_data = list(encoded_train_stream)

    print('Getting target')
    # read it all in
    target = [
        row[target_name] for row in stream_data(pipeline_params)
        if train_filter(row, date_cutoff)
    ]

    model_name = pipeline_params['model_name']
    params = pipeline_params['model_params'][model_name]

    track.log_ml_params(params)
    track.log_pipeline_params(pipeline_params)

    trained_model, params = train_model(encoded_train_data,
                                        target,
                                        model_name,
                                        params,
                                        seed=seed)

    return trained_model, params

예제 #3

파일 보기

def test_get_encoder_from_stream():
    stream = stream_data(pipeline_params)
    stream_small = (next(stream) for _ in range(100))
    encoder = get_encoder_from_stream(stream_small)
    assert isinstance(encoder, OneHotEncoder)

    stream = stream_data(pipeline_params)
    stream_small = (next(stream) for _ in range(100))
    encoded = list(encoder.encode_data_stream(stream_small))
    assert len(encoded) == 100

    row_in = {
        'id': '88219279',
        'date': '2016-08-16',
        'item_nbr': '103520',
        'unit_sales': '10.0',
        'family': 'GROCERY I',
        'class': '1028',
        'perishable': '0',
        'year': '2016',
        'month': '8',
        'day': '16',
        'dayofweek': '1',
        'days_til_end_of_data': '364',
        'dayoff': 'False'
    }

    encoded = encoder.encode_data([row_in])
    decoded = encoder.decode_data(encoded)

    print(decoded[0].keys())
    print(row_in.keys())
    del row_in['date'], row_in['id'], row_in['unit_sales']

    assert decoded[0].keys() == row_in.keys()

    #  make a level not seen
    row_in['class'] = 'FOO'

    encoded = encoder.encode_data([row_in])
    decoded = encoder.decode_data(encoded)

    print(decoded[0].keys())
    print(row_in.keys())

    assert decoded[0].keys() == row_in.keys()
    print(decoded)
    assert decoded[0]['class'] == 'UNKNOWN_CATEGORICAL_LEVEL'

예제 #4

파일 보기

def test_stream_data():

    stream = stream_data(pipeline_params)
    row = next(stream)
    assert isinstance(row, dict)
    assert 'perishable' in row
    assert isinstance(row['perishable'], int)

예제 #5

파일 보기

def get_max_date(pipeline_params):
    # batch step
    # '2017-08-15'
    print('Getting max date')
    max_date = max(
        date_string_to_date(row["date"])
        for row in stream_data(pipeline_params))
    print('Max date: %s' % max_date)
    return max_date.strftime('%Y-%m-%d')