Python CVOutputParser.read_est_obs_file 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: parsers

클래스/타입: CVOutputParser

메소드/함수: read_est_obs_file

hotexamples.com에서의 예제들: 2

Python CVOutputParser.read_est_obs_file - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 parsers.CVOutputParser.read_est_obs_file에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

read_est_obs_file_disc_version_2(4)

read_est_obs_file(2)

read_est_obs_file_disc_version(2)

read_merged_file_disc_version(2)

예제 #1

파일 보기

파일: interval.py 프로젝트: AndersHqst/SAAS-2014

def plot_intervals(output_folder):
    from parsers import CVOutputParser
    from preprocessing import Preprocessor
    from utils import avg
    import os
    import math
    """ 
    Given a cross validation ouput. Certain triple intervals can be plottet
    to compare the error for extrapolation, max ent and the heurestic.
    
    The algorithm runs through each triple interval, and then for each sampled estiamte output
    the triples in the interval are looked up in each sample and the MAPE error is 
    recorded and the average errors are added. And the average of these averages
    are then plottet for each interval.

    """
    if not output_folder[-1] == '/':
        output_folder += '/'
    intervals = 30
    triple_intervals = Preprocessor.triple_intervals(output_folder + 'observed_frequent_items.out', intervals=intervals)

    avg_max_ent_errors = []
    avg_ext_errors = []
    avg_heu_errors = []
    pair_triple_ratios = [i/10. for i in range(11)] # binned ratios [0.0 to 1.0]
    max_ent_ratio_error = [0 for i in range(11)]
    ext_ratio_error = [0 for i in range(11)]

    for index, triple_interval in enumerate(triple_intervals):
        print 'Triple interval {} of {}'.format(index, intervals)
        iteration = 0
        MAPE_avg_errors = []
        MAPE_avg_errors_ext = []
        # MAPE_avg_errors_heu = []
        while True:
            max_ent_est_file = output_folder + str(iteration) + '_data.tsv'
            ext_est_file = output_folder + str(iteration) + '_data_extrapolation.tsv'
            # heu_est_file = output_folder + str(iteration) + '_data_heurestic.tsv'
            # read baseline also?
            # Read until we do not find an output file
            if not os.path.exists(max_ent_est_file):
                break

            max_ent_est = CVOutputParser.read_est_obs_file(max_ent_est_file)
            ext_est = CVOutputParser.read_est_obs_file(ext_est_file)
            # heu_est = CVOutputParser.read_est_obs_file(heu_est_file)

            MAPE_errors = []
            MAPE_errors_ext = []
            # MAPE_errors_heu = []

            for triple in triple_interval:
                # Check that the triple has been estimated
                if triple in max_ent_est:

                    # Index 1 should hold the observed value parsed from the file
                    # is the same mapped to every estimate, so hust read it once.
                    obs = max_ent_est[triple][1]

                    # maxent estimate
                    est = max_ent_est[triple][0]

                    # extrapolation estimate
                    est2 = ext_est[triple][0]

                    # # independence estimat?

                    # heurestic, use max_ent for 0 triple in sample
                    # est4 = heu_est[triple][0]

                    # Index 2 should hold the pair triple ratio.
                    # is the sam for every estimat
                    ratio = max_ent_est[triple][2]
                    # bin the ratio to one decimal
                    ratio_binned = round(ratio, 1)
                    # add errors to the ratio
                    max_ent_ratio_error[pair_triple_ratios.index(ratio_binned)] += abs(est-obs) / float(obs)
                    ext_ratio_error[pair_triple_ratios.index(ratio_binned)] += abs(est2-obs) / float(obs)


                    # MAPE error max ent
                    # error = abs(obs-est) #/ float(obs) * 100
                    # MAPE_errors.append(error)

                    # # MAPE error extrapolation
                    # error2 = abs(obs-est2) #/ float(obs) * 100
                    # MAPE_errors_ext.append(error2)

                    # MAPE error independence?

                    # MAPE error heurestic
                    # error4 = abs(obs-est4) #/ float(obs) * 100
                    # MAPE_errors_heu.append(error4)

                    

                    # MAPE baseline error?
            MAPE_avg_errors.append(avg(MAPE_errors))
            MAPE_avg_errors_ext.append(avg(MAPE_errors_ext))
            # MAPE_avg_errors_heu.append(avg(MAPE_errors_heu))
            iteration += 1

        avg_max_ent_errors.append(avg(MAPE_avg_errors))
        avg_ext_errors.append(avg(MAPE_avg_errors_ext))
        # avg_heu_errors.append(avg(MAPE_avg_errors_heu))
        

    plot(range(len(avg_max_ent_errors)), avg_max_ent_errors, color='blue')
    plot(range(len(avg_ext_errors)), avg_ext_errors, color='red')

예제 #2

파일 보기

파일: ratios.py 프로젝트: AndersHqst/SAAS-2014

def plot_ratios(output_folder):
    from parsers import CVOutputParser
    from utils import interpolate
    import math
    from collections import Counter
    import os
    """
    Plot accumulated errors for estimators agains pari triple ratios.
    Ratios are binned in the range 0.0 to 1.0.
    """
    if not output_folder[-1] == '/':
        output_folder += '/'

    pair_triple_ratios = [i/10. for i in range(11)]
    max_ent_ratio_error = [0 for i in range(11)]
    ext_ratio_error = [0 for i in range(11)]
    maxent_better_ratio = [0 for i in range(11)]
    ext_better_ratio = [0 for i in range(11)]
    values_binned = 0
    values_ignored = 0
    iteration = 0
    pair_counts = Counter()
    trip_counts = Counter()
    while True:
        max_ent_est_file = output_folder + str(iteration) + '_data.tsv'
        ext_est_file = output_folder + str(iteration) + '_data_extrapolation.tsv'
        # heu_est_file = output_folder + str(iteration) + '_data_heurestic.tsv'
        # read baseline also?
        # Read until we do not find an output file
        if not os.path.exists(max_ent_est_file):
            break

        max_ent_est = CVOutputParser.read_est_obs_file(max_ent_est_file)
        ext_est = CVOutputParser.read_est_obs_file(ext_est_file)
        # heu_est = CVOutputParser.read_est_obs_file(heu_est_file)

        for triple in max_ent_est.keys():

            (s1, s2, s3, s12, s13, s23, s123) = max_ent_est[triple][3]
            pair_counts[s12] += 1
            pair_counts[s13] += 1
            pair_counts[s23] += 1
            trip_counts[s123] += 1



            # if not s123 < max_trips or not min(s12, s13, s23) > min_pairs:
            #     values_ignored += 1
            #     continue
            # Index 1 should hold the observed value parsed from the file
            # is the same mapped to every estimate, so just read it once.
            obs = max_ent_est[triple][1]

            # if obs < 200:
            #     values_ignored += 1
            #     continue

            if obs < 200:
                continue

            # maxent estimate
            est = max_ent_est[triple][0]

            # extrapolation estimate
            est2 = ext_est[triple][0]

            # # independence estimat?

            # heurestic, use max_ent for 0 triple in sample
            # est4 = heu_est[triple][0]

            # Index 2 should hold the pair triple ratio.
            # is the sam for every estimat
            ratio = max_ent_est[triple][2]
            # bin the ratio to one decimal
            ratio_binned = round(ratio, 1)

            # Record the ratio if maxent was better
            maxent_error = abs(est-obs)/math.sqrt(obs)
            ext_error = abs(est2-obs)/math.sqrt(obs)

            try:
                if maxent_error < ext_error:
                    maxent_better_ratio[pair_triple_ratios.index(ratio_binned)] +=1
                elif maxent_error > ext_error:
                    ext_better_ratio[pair_triple_ratios.index(ratio_binned)] +=1
            except ValueError, ve:
                pass

            # add errors to the ratio bin
            try:
                values_binned += 1
                max_ent_ratio_error[pair_triple_ratios.index(ratio_binned)] += maxent_error
                ext_ratio_error[pair_triple_ratios.index(ratio_binned)] += ext_error
            except ValueError, ve:
                pass