Python CVOutputParser.read_merged_file_disc_version 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: parsers

클래스/타입: CVOutputParser

메소드/함수: read_merged_file_disc_version

hotexamples.com에서의 예제들: 2

Python CVOutputParser.read_merged_file_disc_version - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 parsers.CVOutputParser.read_merged_file_disc_version에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

read_est_obs_file_disc_version_2(4)

read_est_obs_file(2)

read_est_obs_file_disc_version(2)

read_merged_file_disc_version(2)

예제 #1

파일 보기

파일: ratios.py 프로젝트: AndersHqst/SAAS-2014

def error_ratios_cross_val(output_folder):
    """
    Cross validation on the error ratios to find optimal
    triangle values
    """

    from parsers import CVOutputParser
    from utils import avg

    if not output_folder[-1] == '/':
        output_folder += '/'


    singleton_thresholds = [0, 10, 20, 30, 40, 50, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 2000, 3000, 4000]
    pair_thresholds = [0, 1, 2, 3, 4, 5, 7, 10, 15, 20, 25, 30, 40, 50, 60, 70, 80, 90, 100]
    triple_thresholds = [0, 1, 2, 3, 4, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 150, 200, 250]
    # Results are inserted at an offset
    # acc_error, count, maxent_best, ext_best
    c = [[[(0,0,0,0, (0,0,0)) for z in range(len(triple_thresholds))] for y in range(len(pair_thresholds))] for x in range(len(singleton_thresholds))]


    merged_file = output_folder + 'merged_estimates.tsv'

    iteration = 0
    for (n1, n2, n3), (est, ext, obs, ratio, triangle) in CVOutputParser.read_merged_file_disc_version(merged_file):

        s1, s2, s3, s12, s13, s23, s123 = triangle

        # Calculate errors and add the to the result matrix
        # Ratio error between estiamtes
        error = 0
        # check if both estimaters are spot on:
        if abs(ext-obs) == 0 and abs(est-obs) == 0:
            error = 1.
        # check that we are not dividing be a very small floating point
        # from extrapolation. If below one we just treat the error as
        # if it was 1
        if abs(ext-obs) < 1:
            error = float(abs(est-obs))
        # Get error ratio, avoid division by zero
        elif abs(ext-obs) != 0:
            error = abs(est-obs) / float(abs(ext-obs))
        # ratio_errors.append(error)
        for singleton_index, singleton_threshold in enumerate(singleton_thresholds):
            if not min(s1, s2, s3) > singleton_threshold:
                break
            for pair_index, pair_threshold in enumerate(pair_thresholds):
                if not min(s12, s13, s23) > pair_threshold:
                    break
                for triple_index, triple_threshold in enumerate(triple_thresholds):
                    if not s123 < triple_threshold:
                        continue
                    acc_error, count, maxent_best, ext_best, (s, p, t) = c[singleton_index][pair_index][triple_index]
                    acc_error += error
                    count += 1
                    if error < 1:
                        maxent_best += 1
                    elif error > 1:
                        ext_best += 1
                    c[singleton_index][pair_index][triple_index] = (acc_error, count, maxent_best, ext_best, (singleton_threshold, pair_threshold, triple_threshold))
        if iteration % 1000000 == 0:
            print 'iteration: ', iteration
        iteration += 1

        # maxent_errors.append(est / float(obs))
        # ext_errors.append(ext / float(obs))

    # Compute average errors
    for singleton_index, singleton_threshold in enumerate(singleton_thresholds):
        for pair_index, pair_threshold in enumerate(pair_thresholds):
            for triple_index, triple_threshold in enumerate(triple_thresholds):
                (acc_error, count, maxent_best, ext_best, (s,p,t)) = c[singleton_index][pair_index][triple_index]
                if count > 0:
                    c[singleton_index][pair_index][triple_index] = (acc_error / float(count), count, maxent_best, ext_best, (s,p,t))

    # ratio_error = sum(ratio_errors) / float(len(ratio_errors))
    # ext_ratio = avg(ext_errors)
    # maxent_ratio = avg(maxent_errors)

    # print 'Singletons done for threshold: ', singleton_threshold

    # fd.close()

    # fd = open(output_folder + 'parameter_cv.tsv', 'wr')
    # fd.write('singleton\tpair\ttriple\tmax_ent\text\tratio_error\n')
    # fd.write(singleton + '\t' + pair + '\t' + triple + '\t' + maxent_ratio + '\t' + ext_ratio + '\t' + ratio_error + '\n')
    # max_val = 1000
    # offset = 30
    # hist([x for x in range(max_val)[offset:]], ratio_errors[offset:max_val], color='green')

    return c

예제 #2

파일 보기

파일: ratios.py 프로젝트: AndersHqst/SAAS-2014

def error_ratios(output_folder, s_min=None, p_min=None, t_max=None, obs_min=None):
    """
    Error ratio against triple count in sample on a CV result.
    Needs the merged_estimates.tsv file that can be created
    with the relevant script in utils.py
    """

    from parsers import CVOutputParser
    from utils import interpolate, avg
    import math
    from collections import Counter
    import os

    if not output_folder[-1] == '/':
        output_folder += '/'

    max_singleton_occurrence = -1
    max_pair_occurrence = -1
    max_triple_occurrence = -1
    #max ent
    occurrence_ratio_errors = [0 for x in range(100000)]
    ratio_errors = []
    occurrences = [0 for x in range(100000)]
    merged_file = output_folder + 'merged_estimates.tsv'
    maxent_errors = []
    ext_errors = []
    iteration = 0
    maxent_was_best_estimates = []
    ext_was_best = []
    for (n1, n2, n3), (est, ext, obs, ratio, triangle) in CVOutputParser.read_merged_file_disc_version(merged_file):

        s1, s2, s3, s12, s13, s23, s123 = triangle

        iteration += 1
        if iteration % 1000000 == 0:
            print 'iteration: ', iteration

        if not s_min is None:
            if not min(s1,s2,s3) > s_min:
                continue
        if not p_min is None:
            if not min(s12,s23,s13) > p_min:
                continue
        if not t_max is None:
            if not s123 < t_max:
                continue

        if not obs_min is None:
            if not obs > obs_min:
                continue



        if max(s1,s2,s3) > max_singleton_occurrence:
            max_singleton_occurrence = max(s1,s2,s3)
        if max(s12,s13,s23) > max_pair_occurrence:
            max_pair_occurrence = max(s12,s13,s23)
        if s123 > max_triple_occurrence:
            max_triple_occurrence = s123

        # get the absolute errors, 
        # if this is below one we 
        # set it to one to avoid problems
        # with dividing with numbers < 1
        abs_ext_obs = abs(ext-obs)
        if abs_ext_obs < 1:
            abs_ext_obs = 1
        abs_est_obs = abs(est-obs)
        if abs_est_obs < 1:
            abs_est_obs = 1

        error = math.log(abs_est_obs / abs_ext_obs)
        ratio_errors.append(error)

        # low max ent estimate, magic numer is the value for the estiamtes
        # when a pair value was 1 for maxent, or 1 for ext
        # if est <= 104.0324:
        if error < 0:
            maxent_was_best_estimates.append(((n1, n2, n3), (est, ext, obs, ratio, triangle)))
        elif error > 0:
            ext_was_best.append(((n1, n2, n3), (est, ext, obs, ratio, triangle)))


        maxent_errors.append(est / float(obs))
        ext_errors.append(ext / float(obs))
        try:
            occurrences[int(obs)] += 1
            occurrence_ratio_errors[int(obs)] += error
        except IndexError, e:
            pass