def test_propagate_infection(params_path):
    """
    tests the consistency of the propagate_infection function in HomogenousEnvironment
    """
    Params.load_from(params_path)
    DiseaseState.init_infectiousness_list()
    percent_infected = 0.1
    contact_prob = 0.25
    num_of_people = 400
    weight_list = [
        max(0.3,
            random.random() - 0.1) for _ in range(num_of_people)
    ]
    env = HomogeneousEnvironment(contact_prob, "test")
    total = 0
    infections = []
    loops = 100
    for _ in range(loops):
        env._person_dict = {
            Person(random.randint(20, 60)): weight_list[i]
            for i in range(num_of_people)
        }
        for p, w in env._person_dict.items():
            if percent_infected < random.random():
                p._disease_state = DiseaseState.ASYMPTOMATICINFECTIOUS
            else:
                p._disease_state = DiseaseState.SUSCEPTIBLE
            p._change()
            env.sign_up_for_today(p, w)
        num_of_infections = len(
            env.propagate_infection(date(year=2020, month=12, day=1)))
        infections.append(num_of_infections)
        total += num_of_infections
    avg = total / loops
    assert abs(avg - median(infections)) < 10
def truncate_map(occurence_map):
    """Truncate an occurence map by removing uncommon iteration

    Parameters:
        occurence_map (dict): Dictionary containing word as key and occurence as value

    Returns:
        dict: Truncated map
    """
    # Get occurences distribution
    distribution = Counter(occurence_map.values())
    dist_median = median(distribution.values())

    # Compute upper bound
    limit = 0.99
    dist_upper_median = sorted(
        [v for v in distribution.values() if v > dist_median])
    dist_upper_bound = int(floor(len(dist_upper_median) * limit))

    # Compute new distribution
    min_dist_value = dist_upper_median[dist_upper_bound - 1]
    distribution = {
        k: v
        for k, v in distribution.items() if v <= min_dist_value
    }

    # Return new occurence map
    return {k: v for k, v in occurence_map.items() if v in distribution.keys()}
Exemple #3
0
def _compute_stats_function(values):
    stats = None
    if len(values)>1:
        stats = {}
        stats['min'] = min(values)
        stats['max'] = max(values)
        stats['mean'] = mean(values)
        stats['median'] = median(values)
        stats['1st-quartile'] = percentile(values,25)
        stats['3rd-quartile'] = percentile(values,75)
        stats['std-error'] = std(values)
        
    return stats
Exemple #4
0
    def computeTranslation(img1,
                           img2,
                           img1Points,
                           maxTranslation2,
                           minNMatches,
                           windowSize=(5, 5),
                           level=5,
                           criteria=(cv2.TERM_CRITERIA_EPS, 0, 0.01)):
        '''Computes the translation of img2 with respect to img1
        (loaded using OpenCV as numpy arrays)
        img1Points are used to compute the translation

        TODO add diagnostic if data is all over the place, and it most likely is not a translation (eg zoom, other non linear distortion)'''
        from numpy.core.multiarray import array
        from numpy.lib.function_base import median
        from numpy.core.fromnumeric import sum

        nextPoints = array([])
        (img2Points, status,
         track_error) = cv2.calcOpticalFlowPyrLK(img1,
                                                 img2,
                                                 img1Points,
                                                 nextPoints,
                                                 winSize=windowSize,
                                                 maxLevel=level,
                                                 criteria=criteria)
        # calcOpticalFlowPyrLK(prevImg, nextImg, prevPts[, nextPts[, status[, err[, winSize[, maxLevel[, criteria[, derivLambda[, flags]]]]]]]]) -> nextPts, status, err
        delta = []
        for (k, (p1, p2)) in enumerate(zip(img1Points, img2Points)):
            if status[k] == 1:
                dp = p2 - p1
                d = sum(dp**2)
                if d < maxTranslation2:
                    delta.append(dp)
        if len(delta) >= minNMatches:
            return median(delta, axis=0)
        else:
            print(dp)
            return None
Exemple #5
0
def truncate_map(occurence_map):
    """Truncate an occurence map by removing uncommon iteration

    Parameters:
        occurence_map (dict): Dictionary containing word as key and occurence as value

    Returns:
        dict: Truncated map
    """
    # Get occurences distribution
    distribution = Counter(occurence_map.values())
    dist_median = median(distribution.values())

    # Compute upper bound
    limit = 0.99
    dist_upper_median = sorted([v for v in distribution.values() if v > dist_median])
    dist_upper_bound = int(floor(len(dist_upper_median) * limit))

    # Compute new distribution
    min_dist_value = dist_upper_median[dist_upper_bound - 1]
    distribution = {k: v for k, v in distribution.items() if v <= min_dist_value}

    # Return new occurence map
    return {k: v for k, v in occurence_map.items() if v in distribution.keys()}
Exemple #6
0
    def computeTranslation(
        img1,
        img2,
        img1Points,
        maxTranslation2,
        minNMatches,
        windowSize=(5, 5),
        level=5,
        criteria=(cv2.TERM_CRITERIA_EPS, 0, 0.01),
    ):
        """Computes the translation of img2 with respect to img1
        (loaded using OpenCV as numpy arrays)
        img1Points are used to compute the translation

        TODO add diagnostic if data is all over the place, and it most likely is not a translation (eg zoom, other non linear distortion)"""
        from numpy.core.multiarray import array
        from numpy.lib.function_base import median
        from numpy.core.fromnumeric import sum

        nextPoints = array([])
        (img2Points, status, track_error) = cv2.calcOpticalFlowPyrLK(
            img1, img2, img1Points, nextPoints, winSize=windowSize, maxLevel=level, criteria=criteria
        )
        # calcOpticalFlowPyrLK(prevImg, nextImg, prevPts[, nextPts[, status[, err[, winSize[, maxLevel[, criteria[, derivLambda[, flags]]]]]]]]) -> nextPts, status, err
        delta = []
        for (k, (p1, p2)) in enumerate(zip(img1Points, img2Points)):
            if status[k] == 1:
                dp = p2 - p1
                d = sum(dp ** 2)
                if d < maxTranslation2:
                    delta.append(dp)
        if len(delta) >= minNMatches:
            return median(delta, axis=0)
        else:
            print (dp)
            return None
from scipy.stats.morestats import wilcoxon
from numpy.lib.function_base import average, median

#tree = [96.19047619047619, 96.28571428571429, 95.61904761904762, 96.0, 96.57142857142857, 96.57142857142857, 95.14285714285714, 96.0, 96.19047619047619, 95.9047619047619, 96.28571428571429, 95.61904761904762, 97.33333333333333, 94.85714285714286, 95.14285714285714, 94.28571428571429, 94.19047619047619, 96.38095238095238, 95.04761904761905, 94.85714285714286, 96.19047619047619, 96.38095238095238, 96.38095238095238, 96.47619047619048, 96.19047619047619, 94.57142857142857, 96.38095238095238, 96.47619047619048, 96.47619047619048, 94.95238095238095, 96.19047619047619, 95.14285714285714, 95.71428571428571, 94.85714285714286, 95.9047619047619, 96.66666666666667, 94.95238095238095, 94.57142857142857, 94.19047619047619, 94.19047619047619, 95.9047619047619, 95.9047619047619, 94.66666666666667, 96.0952380952381, 96.0, 95.9047619047619, 97.14285714285714, 96.19047619047619, 96.28571428571429, 96.19047619047619]
#C457NN = [96.0, 94.95238095238095, 94.28571428571429, 95.61904761904762, 95.23809523809524, 96.38095238095238, 95.23809523809524, 96.0, 95.04761904761905, 96.0952380952381, 96.19047619047619, 94.57142857142857, 96.47619047619048, 96.38095238095238, 95.42857142857143, 94.85714285714286, 94.57142857142857, 96.19047619047619, 94.76190476190476, 94.47619047619048, 94.95238095238095, 95.61904761904762, 96.19047619047619, 96.0, 95.04761904761905, 95.33333333333333, 96.28571428571429, 95.52380952380952, 96.47619047619048, 95.61904761904762, 95.80952380952381, 95.14285714285714, 96.0952380952381, 95.04761904761905, 95.33333333333333, 96.85714285714286, 95.23809523809524, 96.0952380952381, 93.52380952380952, 95.52380952380952, 95.71428571428571, 96.0, 94.57142857142857, 96.66666666666667, 96.0, 95.61904761904762, 96.38095238095238, 95.61904761904762, 96.0, 95.61904761904762]


tree = [96.18604651162791, 96.18604651162791, 96.37209302325581, 96.46511627906976, 96.74418604651163, 96.09302325581395, 95.90697674418605, 96.18604651162791, 96.27906976744185, 96.27906976744185, 96.09302325581395, 96.18604651162791, 96.65116279069767, 96.18604651162791, 95.81395348837209, 96.0, 96.55813953488372, 96.18604651162791, 95.72093023255815, 95.72093023255815, 96.0, 95.90697674418605, 96.09302325581395, 96.0, 96.46511627906976, 96.18604651162791, 96.09302325581395, 95.90697674418605, 95.81395348837209, 96.55813953488372, 96.0, 96.46511627906976, 96.0, 96.09302325581395, 96.18604651162791, 95.72093023255815, 96.27906976744185, 95.62790697674419, 94.79069767441861, 95.81395348837209, 96.09302325581395, 96.18604651162791, 96.37209302325581, 96.37209302325581, 96.27906976744185, 96.09302325581395, 96.46511627906976, 96.74418604651163, 96.0, 96.18604651162791]

C457NN = [94.13953488372093, 95.53488372093024, 96.46511627906976, 95.44186046511628, 95.72093023255815, 95.53488372093024, 95.06976744186046, 96.0, 94.88372093023256, 95.81395348837209, 94.97674418604652, 94.69767441860465, 96.37209302325581, 95.25581395348837, 94.79069767441861, 95.53488372093024, 96.46511627906976, 96.0, 95.90697674418605, 95.62790697674419, 95.81395348837209, 94.32558139534883, 95.16279069767442, 94.4186046511628, 94.97674418604652, 96.0, 95.81395348837209, 95.34883720930233, 95.72093023255815, 95.90697674418605, 95.53488372093024, 95.72093023255815, 95.25581395348837, 95.62790697674419, 96.55813953488372, 96.37209302325581, 96.09302325581395, 94.51162790697674, 95.16279069767442, 94.79069767441861, 95.25581395348837, 94.69767441860465, 96.46511627906976, 95.44186046511628, 95.81395348837209, 96.55813953488372, 95.25581395348837, 96.46511627906976, 94.97674418604652, 94.97674418604652]


print 'average tree = ',average(tree)
print 'average C4.5(7NN) = ', average(C457NN)
print 'median tree = ',median(tree)
print 'median C4.5(7NN) = ', median(C457NN)


print 'wilcoxon test for J48 or C4.5(7NN):', wilcoxon(tree, C457NN)


from numpy.core.fromnumeric import mean
from numpy.lib.function_base import median
from projeto.Evaluation import Evaluation
from projeto.instancia import Instances

from projeto.kmeans import KMeans

instancias = Instances('fertility_Diagnosisnormalized.csv')

kmeans=KMeans(instancias,2)
kmeans.setDistanceFunction(valor="2")
evaluation = Evaluation(kmeans,instancias)
resultado= evaluation.multipleRuns(30)
print median(resultado[0]),median(resultado[1])
print resultado
# for g1,g2 in zip(resultado[0],resultado[1]):
#     print "Grupo 1: %s  - Grupo 2: %s"%(g2,g1)

evaluation.maxDaviesBouldin(1)
print evaluation.BestPartition()
def itemsList2models(source):
    linecount = 0
    indcount = 0
    line1 = True
    lines = open(source)
    ind = {}

    perPerson = {}

    baseline = ['BaselineRandom', 'CorrectReply', 'AlwaysReject']

    models = [
        'CR&time', 'ClassicReas', 'FFT-Max', 'FFT-ZigZag(Z+)', 'HeurRecogn',
        'HeurRecogn-lin.', 'S2MR', 'SentimentAnalysis'
    ]
    if '3' in source:
        models = models + ['WMSupprByMood']

    for line in lines:
        listLine = line.replace('\r', '').replace('\n', '').split(',')
        if line1:
            line1 = False
            for key in listLine:
                ind[key] = indcount
                indcount += 1
            continue
        linecount += 1

        person = listLine[ind['id']]
        if person not in perPerson.keys():
            perPerson[person] = {}

        for model in models:  # ind.keys():
            if model not in perPerson[person].keys():
                perPerson[person][model] = []
            perPerson[person][model].append(1 - abs(
                float(listLine[ind['binaryResponse']]) -
                float(listLine[ind[model]])))

        for model in baseline:
            if model not in perPerson[person].keys():
                perPerson[person][model] = []
            if model == 'BaselineRandom':
                perPerson[person][model].append(
                    1 -
                    abs(float((listLine[ind['binaryResponse']])) - float(0.5)))
            if model == 'CorrectReply':
                perPerson[person][model].append(1 - abs(
                    float((listLine[ind['binaryResponse']])) -
                    float('T' in listLine[ind['truthful']])))
            if model == 'AlwaysReject':
                perPerson[person][model].append(
                    1 -
                    abs(float((listLine[ind['binaryResponse']])) - float(0)))

    pairs = []
    pairdone = []

    for model1 in models:
        for model2 in models:
            if model1 == model2:
                continue
            if model1 + model2 in pairdone or model2 + model1 in pairdone:
                continue
            pairdone.append(model1 + model2)
            maxModels = {}
            maxPerfs = {}
            for pers in perPerson.keys():
                maxperf, maxmodel = 0, None
                for model in [model1, model2]:
                    if model not in perPerson[pers].keys():
                        continue
                    if mean(perPerson[pers][model]) > maxperf:
                        maxperf, maxmodel = mean(perPerson[pers][model]), model
                maxPerfs[pers] = maxperf
                maxModels[pers] = maxmodel

            numberOfModelAsMax = {}
            for pers in maxModels.keys():
                if maxModels[pers] not in numberOfModelAsMax.keys():
                    numberOfModelAsMax[maxModels[pers]] = 0
                numberOfModelAsMax[maxModels[pers]] += 1

            allPersPerfList = [maxPerfs[a] for a in maxPerfs.keys()]
            if len([a for a in numberOfModelAsMax.keys() if a != None]) < 2:
                continue
            pairs.append((numberOfModelAsMax, mean(allPersPerfList),
                          std(allPersPerfList), median(allPersPerfList),
                          median_absolute_deviation(allPersPerfList)))
    pairs.sort(key=order)
    print(pairs[:5])

    for model in models + baseline:
        meanresperpers = [
            mean(perPerson[pers][model]) for pers in perPerson.keys()
        ]
        print(model, ':',
              int(20 - len(model)) * ' ', 'mean',
              round(mean(meanresperpers), 2), 'median',
              round(median(meanresperpers), 2), 'MAD',
              round(median_absolute_deviation(meanresperpers), 2))
def itemsList(source):
    linecount = 0
    indcount = 0
    line1 = True
    lines = open(source)
    ind = {}

    perPerson = {}

    baseline = ['BaselineRandom', 'CorrectReply', 'AlwaysReject']
    models = [
        'CR&time', 'ClassicReas', 'FFT-Max', 'FFT-ZigZag(Z+)', 'HeurRecogn',
        'HeurRecogn-lin.', 'S2MR', 'SentimentAnalysis'
    ]
    if '3' in source:
        models = models + ['WMSupprByMood']

    for line in lines:
        listLine = line.replace('\r', '').replace('\n', '').split(',')
        if line1:
            line1 = False
            for key in listLine:
                ind[key] = indcount
                indcount += 1
            continue
        linecount += 1

        person = listLine[ind['id']]
        if person not in perPerson.keys():
            perPerson[person] = {}

        for model in models:
            if model not in perPerson[person].keys():
                perPerson[person][model] = []
            perPerson[person][model].append(1 - abs(
                float((listLine[ind['binaryResponse']])) -
                float(listLine[ind[model]])))

    maxModels = {}
    maxPerfs = {}
    for pers in perPerson.keys():
        maxperf, maxmodel = 0, None
        for model in perPerson[pers].keys():
            if mean(perPerson[pers][model]) > maxperf:
                maxperf, maxmodel = mean(perPerson[pers][model]), model
        maxPerfs[pers] = maxperf
        maxModels[pers] = maxmodel

    numberOfModelAsMax = {}
    for pers in maxModels.keys():
        if maxModels[pers] not in numberOfModelAsMax.keys():
            numberOfModelAsMax[maxModels[pers]] = 0
        numberOfModelAsMax[maxModels[pers]] += 1

    allPersPerfList = [maxPerfs[a] for a in maxPerfs.keys()]

    #print(numberOfModelAsMax)
    percOfModelsAsMax = {}
    for a in numberOfModelAsMax.keys():
        percOfModelsAsMax[a] = float(numberOfModelAsMax[a]) / sum(
            numberOfModelAsMax[a] for a in numberOfModelAsMax.keys())

    print(percOfModelsAsMax)
    print('mean', round(mean(allPersPerfList), 2), 'median',
          round(median(allPersPerfList), 2), 'MAD',
          round(median_absolute_deviation(allPersPerfList), 2))