Exemple #1
0
def genScatterPlot(filename, table, xIndex, yIndex, xLabel, yLabel, title):
    output.update("... Plot %s" % xLabel)
    pyplot.figure()

    ys = getCol(table, yIndex)
    xs = getCol(table, xIndex)

    pyplot.plot(xs, ys, 'b.', alpha=0.2)
    pyplot.xlabel(xLabel)
    pyplot.ylabel(yLabel)
    pyplot.suptitle(title)

    pyplot.savefig(PDFs + filename)
Exemple #2
0
def genFrequencyGraph(filename, table, index, label, title):
    output.update("... Plot %s" % label)

    pyplot.figure()
    xs = getCol(table, index)
    pyplot.hist(xs, bins=100)
    pyplot.suptitle(title)
    pyplot.xlabel(label)

    pyplot.savefig(PDFs + filename)
def att_freqs(instances, att_index, class_index):
    """ gives the stats the distribution of class_labels given an index

    :param instances: a table
    :param att_index: the index of the attribute to get class_label stats on
    :param class_index: the index of the class_labels
    :return: {att_val:[{class1: freq, class2: freq, ...}, total], ...}
    """
    # get unique list of attribute and class values
    att_vals = list(set(table_utils.getCol(instances, att_index)))
    class_vals = list(set(table_utils.getCol(instances, class_index)))
    # initialize the result
    result = {v: [{c: 0 for c in class_vals}, 0] for v in att_vals}
    # build up the frequencies
    for row in instances:
        label = row[class_index]
        att_val = row[att_index]
        result[att_val][0][label] += 1
        result[att_val][1] += 1
    return result
Exemple #4
0
def summary(table):
    header = ["Attributes", "Min", "Max", "Mean", "Median"]
    attributes = [
        "Score", "Link Ratio", "Tag Ratio", "Entities", "Sentences",
        "Similarity"
    ]
    summaryTable = [header]
    for i, att in enumerate(attributes):
        col = getCol(table, i)
        summaryTable.append([att, min(col), max(col), mean(col), median(col)])

    logging.info(
        '\n' +
        str(tabulate(summaryTable, headers="firstrow", tablefmt="fancy")))
Exemple #5
0
def confusion_matrix(labels, class_label_name):
    """ Prints the confusion matrix of the given labels

    :param labels: A list of tuples of class labels [(actual, predicted),...]
    :param class_label_name: The name of the class label
    """
    class_labels = list(set(getCol(labels, 0)))  # all the actual class labels
    the_headers = [class_label_name]
    the_headers.extend(class_labels)
    the_headers.extend(['Total', 'Recognition (%)'])

    # makes an table filled with zeros of #columns = len(the_headers) and #rows = len(class_labels)
    _confusion_matrix = [[0] * len(the_headers)
                         for i in range(len(class_labels))]

    # fills out the confusion matrix with the predicted vs. actual
    for a_label_point in labels:
        actual, predicted = a_label_point
        _confusion_matrix[class_labels.index(actual)][the_headers.index(
            predicted)] += 1

    # add the rest of the values to the confusion matrix
    for i in range(len(_confusion_matrix)):
        row = _confusion_matrix[i]  # current row

        # adding total to the confusion matrix
        total = sum(row)
        row[the_headers.index('Total')] = total  # add the total in for the row

        row[0] = class_labels[
            i]  # adds the class label for the row to the beginning of row

        # adding recognition to the confusion matrix (% of guesses in row that are correct
        recognition = row[the_headers.index(class_labels[i])]  # TP
        recognition /= float(total)
        recognition *= 100
        row[the_headers.index('Recognition (%)')] = recognition

    logging.info(
        '\n' +
        str(tabulate(_confusion_matrix, headers=the_headers, tablefmt="rst")))
Exemple #6
0
def normalize_table(table, except_for=None):
    """ Assumes table has been cleaned of all NA values

    :param table: a data_table
    :param except_for: a list of indexes to not normalize in the table
    :return: A normalized table
    """
    new_table = [[] for i in range(len(table))]

    indexes = range(len(table[0]))  # number of indexes in a row

    for index in indexes:
        data_column = table_utils.getCol(table, index)
        if index not in except_for:
            data_column = normalized_value(data_column)  # normalize data in column

        # puts the values of the data column into the new_table
        for row_index in range(len(table)):
            new_table[row_index].append(data_column[row_index])

    return new_table