Esempi in Python per ETLUtils.write_row_to_json

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: etl

Classe/tipologia: ETLUtils

Metodo/funzione: write_row_to_json

Esempi su hotexamples.com: 4

ETLUtils.write_row_to_json in Python: 4 esempi trovati. Questi sono i migliori esempi reali in Python per etl.ETLUtils.write_row_to_json, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

load_json_file(30)

filter_records(23)

save_json_file(13)

save_csv_file(12)

split_train_test(7)

drop_fields(6)

load_csv_file(6)

filter_out_records(5)

select_fields(4)

split_train_test_copy(4)

add_transpose_list_column(2)

write_row_to_csv(2)

write_row_to_json(2)

count_frequency(1)

keep_fields(1)

search_sentences(1)

Esempio n. 1

Mostra file

File: topic_model_stability.py Progetto: swarnamd/yelp

def full_cycle(metric):
    csv_file_name = Constants.generate_file_name(metric, 'csv',
                                                 Constants.RESULTS_FOLDER,
                                                 None, None, False)
    json_file_name = Constants.generate_file_name(metric, 'json',
                                                  Constants.RESULTS_FOLDER,
                                                  None, None, False)
    print(json_file_name)
    print(csv_file_name)

    properties = Constants.get_properties_copy()
    results = evaluate_topic_model(metric)
    print(results)
    results.update(properties)

    ETLUtils.write_row_to_csv(csv_file_name, results)
    ETLUtils.write_row_to_json(json_file_name, results)

Esempio n. 2

Mostra file

File: topic_model_stability.py Progetto: melqkiades/yelp

def full_cycle(metric):
    csv_file_name = Constants.generate_file_name(
        metric, 'csv', Constants.RESULTS_FOLDER, None,
        None, False)
    json_file_name = Constants.generate_file_name(
        metric, 'json', Constants.RESULTS_FOLDER, None,
        None, False)
    print(json_file_name)
    print(csv_file_name)

    properties = Constants.get_properties_copy()
    results = evaluate_topic_model(metric)
    print(results)
    results.update(properties)

    ETLUtils.write_row_to_csv(csv_file_name, results)
    ETLUtils.write_row_to_json(json_file_name, results)

Esempio n. 3

Mostra file

File: classifier_evaluator.py Progetto: srividya89/yelp

def full_cycle():

    plant_random_seeds()
    my_records = load_records()
    preprocess_records(my_records)
    x_matrix, y_vector = transform(my_records)
    count_specific_generic(my_records)

    # Error estimation
    error_estimation_results = []
    best_classifier = None
    best_score = 0.0
    for classifier, params in PARAM_GRID_MAP.items():
        # print('Classifier: %s' % classifier)
        cv = StratifiedKFold(Constants.CROSS_VALIDATION_NUM_FOLDS)
        score = error_estimation(x_matrix, y_vector, params, cv,
                                 SCORE_METRIC).mean()
        error_estimation_results.append({
            'classifier':
            classifier,
            'accuracy':
            score,
            Constants.BUSINESS_TYPE_FIELD:
            Constants.ITEM_TYPE
        })
        print('%s score: %f' % (classifier, score))

        if score > best_score:
            best_score = score
            best_classifier = classifier

    # Model selection
    cv = StratifiedKFold(Constants.CROSS_VALIDATION_NUM_FOLDS)
    grid_search_cv = model_selection(x_matrix, y_vector,
                                     PARAM_GRID_MAP[best_classifier], cv,
                                     SCORE_METRIC)
    # best_model = grid_search_cv.best_estimator_.get_params()['classifier']
    # features_importance = best_model.coef_
    print('%s: %f' % (SCORE_METRIC, grid_search_cv.best_score_))
    print('best params', grid_search_cv.best_params_)

    # for key, value in grid_search_cv.best_params_.items():
    #     print(key, value)

    # print('best estimator', grid_search_cv.best_estimator_)
    # print('features importance', features_importance)

    # csv_file_name = Constants.generate_file_name(
    #     'classifier_results', 'csv', Constants.RESULTS_FOLDER, None,
    #     None, False)
    # json_file_name = Constants.generate_file_name(
    #     'classifier_results', 'json', Constants.RESULTS_FOLDER, None,
    #     None, False)
    csv_file_name2 = Constants.RESULTS_FOLDER + 'classifier_results.csv'
    json_file_name2 = Constants.RESULTS_FOLDER + 'classifier_results.json'

    # results = get_scores(final_grid_search_cv.cv_results_)
    # csv_file = '/Users/fpena/tmp/' + Constants.ITEM_TYPE + '_new_reviews_classifier_results.csv'
    # ETLUtils.save_csv_file(
    #     csv_file_name, error_estimation_results,
    #     error_estimation_results[0].keys())
    # ETLUtils.save_json_file(json_file_name, error_estimation_results)

    for result in error_estimation_results:
        ETLUtils.write_row_to_csv(csv_file_name2, result)
        ETLUtils.write_row_to_json(json_file_name2, result)
    #
    # print(csv_file)

    best_hyperparams_file_name = Constants.generate_file_name(
        'best_hyperparameters', 'json', Constants.CACHE_FOLDER, None, None,
        False)
    save_parameters(best_hyperparams_file_name, grid_search_cv.best_params_)

Esempio n. 4

Mostra file

File: classifier_evaluator.py Progetto: melqkiades/yelp

def full_cycle():

    plant_random_seeds()
    my_records = load_records()
    preprocess_records(my_records)
    x_matrix, y_vector = transform(my_records)
    count_specific_generic(my_records)

    # Error estimation
    error_estimation_results = []
    best_classifier = None
    best_score = 0.0
    for classifier, params in PARAM_GRID_MAP.items():
        # print('Classifier: %s' % classifier)
        cv = StratifiedKFold(Constants.CROSS_VALIDATION_NUM_FOLDS)
        score = error_estimation(x_matrix, y_vector, params, cv, SCORE_METRIC).mean()
        error_estimation_results.append(
            {
                'classifier': classifier,
                'accuracy': score,
                Constants.BUSINESS_TYPE_FIELD: Constants.ITEM_TYPE
            }
        )
        print('%s score: %f' % (classifier, score))

        if score > best_score:
            best_score = score
            best_classifier = classifier

    # Model selection
    cv = StratifiedKFold(Constants.CROSS_VALIDATION_NUM_FOLDS)
    grid_search_cv = model_selection(
        x_matrix, y_vector, PARAM_GRID_MAP[best_classifier], cv, SCORE_METRIC)
    # best_model = grid_search_cv.best_estimator_.get_params()['classifier']
    # features_importance = best_model.coef_
    print('%s: %f' % (SCORE_METRIC, grid_search_cv.best_score_))
    print('best params', grid_search_cv.best_params_)

    # for key, value in grid_search_cv.best_params_.items():
    #     print(key, value)

    # print('best estimator', grid_search_cv.best_estimator_)
    # print('features importance', features_importance)

    # csv_file_name = Constants.generate_file_name(
    #     'classifier_results', 'csv', Constants.RESULTS_FOLDER, None,
    #     None, False)
    # json_file_name = Constants.generate_file_name(
    #     'classifier_results', 'json', Constants.RESULTS_FOLDER, None,
    #     None, False)
    csv_file_name2 = Constants.RESULTS_FOLDER + 'classifier_results.csv'
    json_file_name2 = Constants.RESULTS_FOLDER + 'classifier_results.json'


    # results = get_scores(final_grid_search_cv.cv_results_)
    # csv_file = '/Users/fpena/tmp/' + Constants.ITEM_TYPE + '_new_reviews_classifier_results.csv'
    # ETLUtils.save_csv_file(
    #     csv_file_name, error_estimation_results,
    #     error_estimation_results[0].keys())
    # ETLUtils.save_json_file(json_file_name, error_estimation_results)

    for result in error_estimation_results:
        ETLUtils.write_row_to_csv(
            csv_file_name2, result)
        ETLUtils.write_row_to_json(json_file_name2, result)
    #
    # print(csv_file)

    best_hyperparams_file_name = Constants.generate_file_name(
        'best_hyperparameters', 'json', Constants.CACHE_FOLDER, None,
        None, False)
    save_parameters(best_hyperparams_file_name, grid_search_cv.best_params_)