def __init__(self, data, attr, targetIndex, m):
     if targetIndex < 0:
         self.root = None
     else:
         self.root = id3.ID3(data, attr,
                             attr[:targetIndex] + attr[targetIndex + 1:],
                             None, targetIndex, m)
Beispiel #2
0
def main(argv):
    #-------------------------------------------------------------------------------
    """ usage:
    """
    args, opts = oss.gopt(argv[1:], [('s', 'sync'), ('d', 'dup')], [],
                          main.__doc__ + __doc__)

    path = AMAZON_PATH
    tbl = string.maketrans('/\',', '-  ')

    for i in oss.find(path, '*.mp3'):
        ii = id3.ID3(i)
        try:
            title = chkName(ii['TITLE'])
            artist = chkName(ii['ARTIST'])
            print(artist, '---', title)

            if opts.sync:
                dir = BU_PATH + '/' + artist
                try:
                    if not oss.exists(dir):
                        oss.mkdir(dir)
                except IOError as ex:
                    print(i, "IOError: %s" % str(ex))
                    raise MyException('artist name error')
                except TypeError as ex:
                    print(i, "IOError: %s" % str(ex))
                    raise MyException('artist name error')

                fn = dir + '/' + title + '.mp3'
                if not oss.exists(fn):
                    print('%s --> %s' % (i, fn))
                    cp(i, fn)
                elif opts.dup and not oss.cmp(i, fn):
                    raise MyException('duplicate song')

        except KeyError as ex:
            print('%s -- KeyError: %s' % (i, str(ex)))
        except UnicodeDecodeError as ex:
            print('%s -- UnicodeDecodeError: %s' % (i, str(ex)))
        except IOError as ex:
            print('%s -- IOError: %s' % (i, str(ex)))
        except TypeError as ex:
            print('%s -- TypeError: %s' % (i, str(ex)))
        except MyException as ex:
            print('%s -- MyExceptionError: %s' % (i, str(ex)))

    oss.exit(0)
Beispiel #3
0
def ID3(k, df, pruning, is_result_continuous):
    
    num_data_per_fold = round(len(df.index) / k)
    trees = []
    trees_accuracy = []
    
    # Creating models and counting accuracy
    for i in range(k):
        first_idx_val = i * num_data_per_fold
        last_idx_val = (i+1) * num_data_per_fold - 1

        if (first_idx_val != 0 and last_idx_val != (len(df.index) - 1)):
            df_training_top = df.iloc[:first_idx_val, :].reset_index(drop = True)
            df_training_bottom = df.iloc[(last_idx_val + 1):, :].reset_index(drop = True)
            df_training = pandas.concat([df_training_top, df_training_bottom])
        elif (first_idx_val == 0 and last_idx_val != (len(df.index) - 1)):
            df_training = df.iloc[(last_idx_val + 1):, :].reset_index(drop = True)
        elif (first_idx_val != 0 and last_idx_val == (len(df.index) - 1)):
            df_training = df.iloc[:first_idx_val, :].reset_index(drop = True)
        else:
            df_training = pandas.DataFrame()

        df_validation = df.iloc[first_idx_val:last_idx_val, :].reset_index(drop = True)

        print("-------------------- MODEL", i+1, "--------------------")

        # We use the whole training data for pruning validation
        result_tree = id3.ID3(df, df_training, df_validation, pruning)
        trees.append(result_tree)

        result_accuracy = id3.count_accuracy(result_tree, [], df_validation, is_result_continuous)
        trees_accuracy.append(result_accuracy)
    
        # Printing each model and its accuracy
        
        id3.print_tree(trees[i], 0)
        print("ACCURACY:", trees_accuracy[i])
        print("")

    best_tree_idx = 0
    best_accuracy = trees_accuracy[0]
    for i in range(k):
        if (trees_accuracy[i] > best_accuracy):
            best_tree_idx = i
            best_accuracy = trees_accuracy[i]

    return trees[best_tree_idx]
def ID3(df, pruning, is_result_continuous):
    # memisahkan data training dan testing dengan perbandingan 9:1
    separator_iris = round((9 / 10) * len(df.index))
    train_iris = df.iloc[:separator_iris, :].reset_index(drop=True)
    test_iris = df.iloc[separator_iris:, :].reset_index(drop=True)

    # pembelajaran dengan training data
    result_tree = id3.ID3(df, train_iris, test_iris, pruning)

    # menghitung kinerja
    result_accuracy = id3.count_accuracy(result_tree, [], test_iris,
                                         is_result_continuous)

    # menampilkan tree hasil
    id3.print_tree(result_tree, 0)
    print('Akurasi :', result_accuracy)
    print('Confussion Matrix untuk Virginica, Versicolor, Setosa :')
    print(ConfusionMatrixID3(result_tree, test_iris))

    return result_tree
Beispiel #5
0
def getSongs(path, verbose=None):
    #-------------------------------------------------------------------------------
    m = set()
    d = {}

    for i in oss.find(path, '*.mp3'):
        if verbose:
            print('*', end='')

        ii = id3.ID3(i)
        try:
            tag = ii['ARTIST'] + '/' + ii['TITLE']
            m.add(tag)
            d[tag] = i
        except KeyError:
            pass
        except UnicodeDecodeError:
            pass

    if verbose:
        print('\n')
    return m, d
Beispiel #6
0
import csv
import id3
import numpy
import pandas
import sklearn

if __name__ == '__main__':
    with open('../input/train.csv') as f:
        reader = csv.DictReader(f)
        train = list(reader)
    with open('../input/test.csv') as f:
        reader = csv.DictReader(f)
        test = list(reader)

    attrs_to_ignore = {'PassengerId', 'Name', 'Ticket', 'Cabin'}
    class_attr = 'Survived'
    attrs = set(train[0].keys())
    significant_attrs = attrs - set([class_attr]) - attrs_to_ignore

    tree = id3.ID3(train, significant_attrs, class_attr)

    csvData = [['PassengerId', 'Survived']]
    for row in test:
        csvData.append([row['PassengerId'], tree.query(row)])

    with open('person.csv', 'w') as csvFile:
        writer = csv.writer(csvFile)
        writer.writerows(csvData)

    csvFile.close()
Beispiel #7
0
def main(argv):
    #-------------------------------------------------------------------------------
    args, opts = oss.gopt(argv[1:], [('d', 'dup'), ('s', 'show')], [], __doc__)

    ign = getIgnoreSet()

    print("Cur Dir:", oss.pwd())
    for i in oss.find('.', '*.mp3'):
        ii = id3.ID3(i)
        err = True

        try:
            artist = translate(ii['ARTIST'])

            if artist in ['*', '<<unknown artist>>']:
                print(i, "artist name error: *")
                raise MyException('artist name error')

            if artist in ign:
                continue

            title = translate(ii['TITLE'])

            dir = BU_PATH + '/' + artist
            try:
                if not oss.exists(dir):
                    oss.mkdir(dir)
            except IOError as ex:
                print(i, "IOError: %s" % str(ex))
                raise MyException('artist name error')
            except TypeError as ex:
                print(i, "IOError: %s" % str(ex))
                raise MyException('artist name error')

            fn = dir + '/' + title + '.mp3'
            if not oss.exists(fn):
                print('%s --> %s' % (i, fn))
                if not opts.show:
                    cp(i, fn)
            elif opts.dup and not oss.cmp(i, fn):
                raise MyException('duplicate song')

            err = False

        except KeyError as ex:
            print('%s -- KeyError: %s' % (i, str(ex)))
        except UnicodeDecodeError as ex:
            print('%s -- UnicodeDecodeError: %s' % (i, str(ex)))
        except IOError as ex:
            print('%s -- IOError: %s' % (i, str(ex)))
        except TypeError as ex:
            print('%s -- TypeError: %s' % (i, str(ex)))
        except MyException as ex:
            print('%s -- MyExceptionError: %s' % (i, str(ex)))

        if 0 and err:
            dir = BU_PATH + '/id3_errors'
            f = dir + '/' + oss.basename(i)
            if not oss.exists(f):
                print('error:', i)
                cp(i, f)

    oss.exit(0)
Beispiel #8
0
import sys
import id3
import numpy as np

if __name__ == '__main__':
    result = id3.ID3()
    attr_end = False
    attrs = []
    data = []
    # Read each input line
    for line in sys.stdin:
        line = line.strip("\n")

        # ignore the comments in the file
        if line.startswith("%"):
            continue

        if not attr_end:  # While no "@data" is encountered, it will keep parsing attributes
            if line.startswith("@attribute") or line.startswith("@ATTRIBUTE"):
                # reads the line, removes "@attribute", appends the attr name to the array
                attrs.append(line.split()[1])

            # end attribute cycle if data is encountered
            elif line.startswith("@data") or line.startswith("@DATA"):
                attr_end = True
        else:
            # parse data into array, .arff data is separated by commas
            data.append(line.split(','))
    # print data
    # format the array with NumPy
    data = np.array(data)
Beispiel #9
0
print(len(trainset[0]))
step_size = len(trainset[0]) // 10

for length in range(10, upper_limit, step_size):
    print('Number of Training Instances:', length)
    outputfile_tr.write('Number of Training Instances: ' + str(length) + '\n')

    pruned_accuracies = []
    unpruned_accuracies = []

    for experiment in range(5):
        train = trainset[experiment][:length]
        test = testset[experiment]

        tree = id3.ID3(train, default)
        id3.prune(tree, validation_set)
        acc = id3.accuracy(tree, test)
        pruned_accuracies.append(acc)

        tree = id3.ID3(train, default)
        acc = id3.accuracy(tree, test)
        unpruned_accuracies.append(acc)

    avg_pruned_accuracies = sum(pruned_accuracies) / len(pruned_accuracies)
    avg_unpruned_accuracies = sum(unpruned_accuracies) / len(
        unpruned_accuracies)

    print('  Accuracy for Pruned tree: ' + str(avg_pruned_accuracies))
    print('Accuracy for Unpruned tree: ' + str(avg_unpruned_accuracies))
Beispiel #10
0
        if len(args) != 4:
            print("Not enough arguments!")
            exit(1)
        iterations = int(args[1])
        percentage_of_data = int(args[2])
        if_print_tree = False
        if(int(args[3]) == 1):
            if_print_tree = True

    list1 = []
    times1 = [0, 0]
    tree1depth = 0
    tree1_attr_used = 0
    for i in range(0, iterations):
        timer1 = time.time()
        dtree = id3.ID3(percentage_of_data)
        dtree.prepare_data("mushroom.txt")
        dtree.root = dtree.modified_ID3(dtree.learningDataSet, dtree.attributes)
        timer2 = time.time()
        times1[0] += timer2 - timer1
        if if_print_tree is True:
            dtree.print_tree(dtree.root)
            print("\n\n")
        timer1 = time.time()
        percent = dtree.test_tree()
        timer2 = time.time()
        times1[1] += timer2 - timer1
        list1.append(percent)
        tree1depth += dtree.max_tree_depth/2
        tree1_attr_used += (22 - len(dtree.attributes))
    list2 = []