Example #1
0
p_dict = {}
r_dict = {}
f_dict = {}
w_dict = {}


files = [f for f in os.listdir(result_dir) if '.csv' in f]
for f in files:
    pattern = re.compile('[0-9]+')
    percentage = int(pattern.search(f).group())

    result = pd.read_csv(result_dir + 'result_' + str(percentage) + '.csv', index_col=0)
    if how == 'soft':
        matrix = metrics.confusion_matrix(result)
    elif how == 'hard':
        matrix = metrics.hard_matrix(result)

    matrix.to_csv(path + 'Metricas/' +  how + '_matrix_' + str(percentage) + '.csv')

    w_dict[percentage] = metrics.weighted_f_score(matrix)

    clases = matrix.columns.tolist()
    p = [metrics.precision(matrix, c) for c in clases]
    r = [metrics.recall(matrix, c) for c in clases]
    f = [metrics.f_score(matrix, c) for c in clases]

    p_dict[percentage] = p
    r_dict[percentage] = r
    f_dict[percentage] = f

save_dir = path + 'Metricas/'
Example #2
0
    ]
    paths = paths[0:10]

    # Entreno y clasifico con árboles
    partial_fit = partial(parallel.fit_tree,
                          feature_filter=feature_filter,
                          folds=folds)
    pool = Pool(processes=n_processes, maxtasksperchild=2)
    resultados_tree = pool.map(partial_fit, paths)
    pool.close()
    pool.join()

    # Imprimo y guardo resultados obtenidos
    for i, r in enumerate(resultados_tree):
        r.to_csv(result_path + 'result_tree_' + str(i) + '.csv')
        matrix = metrics.hard_matrix(r)
        print 'Tree ' + str(i) + ' f_score: ' + str(
            metrics.weighted_f_score(matrix))

    # Entreno y clasifico con rf
    partial_fit = partial(parallel.fit_rf,
                          feature_filter=feature_filter,
                          folds=folds)
    pool = Pool(processes=n_processes, maxtasksperchild=2)
    resultados_rf = pool.map(partial_fit, paths)
    pool.close()
    pool.join()

    # Imprimo y guardo resultados obtenidos
    for i, r in enumerate(resultados_rf):
        r.to_csv(result_path + 'result_rf_' + str(i) + '.csv')
Example #3
0
    feature_filter = args.feature_filter

    paths = [sets_path + catalog + '_sampled_' + str(i) + '.csv' for i in xrange(100)]
    paths = paths[0:10]

    # Entreno y clasifico con árboles
    partial_fit = partial(parallel.fit_tree, feature_filter=feature_filter, folds=folds)
    pool = Pool(processes=n_processes, maxtasksperchild=2)
    resultados_tree = pool.map(partial_fit, paths)
    pool.close()
    pool.join()

    # Imprimo y guardo resultados obtenidos
    for i, r in enumerate(resultados_tree):
        r.to_csv(result_path + 'result_tree_' + str(i) + '.csv')
        matrix = metrics.hard_matrix(r)
        print 'Tree ' + str(i) + ' f_score: ' + str(metrics.weighted_f_score(matrix))

    # Entreno y clasifico con rf
    partial_fit = partial(parallel.fit_rf, feature_filter=feature_filter, folds=folds)
    pool = Pool(processes=n_processes, maxtasksperchild=2)
    resultados_rf = pool.map(partial_fit, paths)
    pool.close()
    pool.join()

    # Imprimo y guardo resultados obtenidos
    for i, r in enumerate(resultados_rf):
        r.to_csv(result_path + 'result_rf_' + str(i) + '.csv')
        matrix = metrics.hard_matrix(r)
        print 'RF ' + str(i) + ' f_score: ' + str(metrics.weighted_f_score(matrix))
Example #4
0
        valores_accuracy = []
        valores_recall = []
        valores_fscore = []
        x_values = []
        x_values_fscore = []

        # Para cada porcentaje de confianza
        for i in xrange(100):

            # Obtengo las predicciones con una confianza mayor a cierto umbral
            porcentaje = float(i) / 100

            aux = result[result['trust'] > porcentaje]

            # matrix = metrics.confusion_matrix(aux)
            matrix = metrics.hard_matrix(aux)

            # Si la precision es menor que cero, es porque no habian datos que superaran tal nivel de confianza
            precision = metrics.accuracy(matrix, clase)
            if precision >= 0:
                valores_accuracy.append(precision)
                valores_recall.append(metrics.recall(matrix, clase))
                x_values.append(porcentaje)

            # Si el f_score es menor que cero, es porque no habian datos que superaran tal nivel de confianza
            f_score = metrics.f_score(matrix, clase)
            if f_score >= 0:
                valores_fscore.append(f_score)
                x_values_fscore.append(porcentaje)

        #graf(clase, x_values, valores_accuracy, 'Accuracy')
Example #5
0
p_dict = {}
r_dict = {}
f_dict = {}
w_dict = {}

files = [f for f in os.listdir(result_dir) if '.csv' in f]
for f in files:
    pattern = re.compile('[0-9]+')
    percentage = int(pattern.search(f).group())

    result = pd.read_csv(result_dir + 'result_' + str(percentage) + '.csv',
                         index_col=0)
    if how == 'soft':
        matrix = metrics.confusion_matrix(result)
    elif how == 'hard':
        matrix = metrics.hard_matrix(result)

    matrix.to_csv(path + 'Metricas/' + how + '_matrix_' + str(percentage) +
                  '.csv')

    w_dict[percentage] = metrics.weighted_f_score(matrix)

    clases = matrix.columns.tolist()
    p = [metrics.precision(matrix, c) for c in clases]
    r = [metrics.recall(matrix, c) for c in clases]
    f = [metrics.f_score(matrix, c) for c in clases]

    p_dict[percentage] = p
    r_dict[percentage] = r
    f_dict[percentage] = f
Example #6
0
		valores_accuracy = []
		valores_recall = []
		valores_fscore = []
		x_values = []
		x_values_fscore = []

		# Para cada porcentaje de confianza
		for i in xrange(100):

			# Obtengo las predicciones con una confianza mayor a cierto umbral
			porcentaje = float(i)/100

			aux = result[result['trust'] > porcentaje]

			# matrix = metrics.confusion_matrix(aux)
			matrix = metrics.hard_matrix(aux)

			# Si la precision es menor que cero, es porque no habian datos que superaran tal nivel de confianza
			precision = metrics.accuracy(matrix, clase)
			if precision >= 0:
				valores_accuracy.append(precision)
				valores_recall.append(metrics.recall(matrix, clase))
				x_values.append(porcentaje)

			# Si el f_score es menor que cero, es porque no habian datos que superaran tal nivel de confianza
			f_score = metrics.f_score(matrix, clase)
			if f_score >= 0:
				valores_fscore.append(f_score)
				x_values_fscore.append(porcentaje)			

		#graf(clase, x_values, valores_accuracy, 'Accuracy')