Python DataManager.divide_into_foldsの例

プログラミング言語: Python

名前空間/パッケージ名: DataManager

クラス/型: DataManager

メソッド/関数: divide_into_folds

hotexamples.comのコード掲載数: 1

Python DataManager.divide_into_folds - 1件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのDataManager.DataManager.divide_into_foldsの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

DataManager(30)

set_data(12)

split_data(11)

getRows(7)

gen_data(6)

close(5)

getRow(5)

building_setpoints(4)

gen_word(3)

encode_object(2)

finishOneChangeItem(2)

addAChange(2)

addChangeRecord(2)

batch_iter(2)

getByRange(2)

closeFile(2)

continueFlushingQueue(2)

continueTrainingLoop(2)

build_price_lut(2)

add_data(2)

Add(1)

edit_user(1)

edit_credit_card(1)

edit_address(1)

divide_into_folds(1)

decode_object(1)

date_by_offset(1)

data_analyst(1)

display_posting_list(1)

getAfterDate(1)

execute_lp_solver(1)

getTargetCapacity(1)

setting(1)

push_point(1)

pop_point(1)

mesh_point_mapping(1)

insertGameLogIntoDb(1)

initSettings(1)

getSprinklerWaterRates(1)

genDataForModel(1)

getPreviousWateringAmounts(1)

getPredictedRainfall(1)

getLatestRainfall(1)

getLatestMoisture(1)

create_state_proxy(1)

gen_batch(1)

csvToDb(1)

compute_navigation_target(1)

create_first_population(1)

addAddress(1)

コード例 #1

ファイルを表示

def main():
    '''
    Runs cross validation on the input Twitter data.
    '''
    args = parser.parse_args()

    # Extract the data for LDA and divide into 10 folds
    dm = DataManager(args.train_path, 'twitter')
    if settings.DEBUG: print("Loading data...")

    # Time the process of loading in the data.
    start = time.perf_counter()

    # Load the data (possibly from the cache, if it exists)
    dm.load_data(args.cache_path)
    # The number of folds is passed in as a command-line arg
    dm.divide_into_folds(args.num_folds)
    end = time.perf_counter()
    if settings.DEBUG:
        print(
            f"Preparing the data (loading, dividing into folds) took {end-start:0.4f} seconds."
        )

    # Initialize the best k and best likelihood, along with the list of k values to try
    best_k = None
    best_likelihood = -float("inf")

    # Get the list of topic numbers to try as a command line arg too.
    possible_k_values = args.topic_numbers

    # Store the results to the result path. Add the headers if the file doesn't exist yet.
    if not os.path.exists(args.results_path):
        fout = open(args.results_path, "w")
        out_writer = csv.writer(fout)
        out_writer.writerow([
            "Model", "k", "Average Likelihood", "Number of Documents", "Source"
        ])
    else:
        fout = open(args.results_path, "w")
        out_writer = csv.writer(fout)

    # Run cross validation once for each parameter value
    for k in possible_k_values:

        if settings.DEBUG: print(f"Trying k={k} components...")

        # We will create a list of accuracies for each validation set
        likelihoods = []
        for i in range(dm.get_num_folds()):
            if settings.DEBUG:
                print(f"    Iteration {i+1}/{dm.get_num_folds()}")

            # Update the validation fold.
            dm.set_validation(i)

            # Retrieve the training data and validation set.
            train, validate = get_data_for_LDA(dm)
            start = time.perf_counter()
            # Train the model with the param choice.
            lda_model = run_LDA_for_CV(train, k)
            # Compute the resulting accuracy on the validation set.
            likelihood = lda_model.score(validate)
            end = time.perf_counter()
            if settings.DEBUG: print(f"        likelihood = {likelihood}")
            if settings.DEBUG:
                print(f"        Training took {end-start:0.4f} seconds.")

            likelihoods.append(likelihood)

        avg_likelihood = sum(likelihoods) / len(likelihoods)
        out_writer.writerow([
            "LDA", k, avg_likelihood,
            len(dm.get_all_fold_data()), settings.TWITTER_DIR
        ])
        if settings.DEBUG: print(f"    avg_likelihood = {avg_likelihood}")

        if avg_likelihood > best_likelihood:
            best_likelihood = avg_likelihood
            best_k = k

    print(
        f"Best average likelihood found was {best_likelihood} with parameter value k={best_k}"
    )
    fout.close()