Example #1
0
def main():
    num_args = len(sys.argv)

    # Make sure the right number of input files are specified
    if  num_args < 2 or num_args > 3:
        print 'Expected input format: python classify.py <csvFile> <XMLFile>'
        return
    # If they are read them in
    else:
 
        decision_file = open(sys.argv[2], "r")

        csv_reader = ClassificationData(sys.argv[1])#csv.reader(open(sys.argv[1], 'r'))
        csv_reader.parse_tuples()

        classifier = Classifier()

        print csv_reader.category

        if len(csv_reader.category) > 0:
            classifier.has_category = True

        tree = xml.dom.minidom.parse(decision_file)
        root = tree.documentElement
        for row in csv_reader.tuples:
            result = classifier.classify(root, row, csv_reader.attributes)

        classifier.print_stats()
Example #2
0
def main():
    num_args = len(sys.argv)

    # Make sure the right number of input files are specified
    if num_args < 3 or num_args > 4:
        print 'Expected input format: python classify.py <csvFile> <XMLFile>'
        return
    # If they are read them in
    else:
        if num_args == 4:
            restrictions = ClassificationData(sys.argv[3])
            restrictions.parse_restr_tuples()

        class_data = ClassificationData(sys.argv[1])
        class_data.parse_tuples()

        validator = Validator([])
        validator.train(sys.argv[2], class_data)
Example #3
0
def main():
    num_args = len(sys.argv)
    domain = restriction = ''

    # Make sure the right number of input files are specified
    if num_args <= 2 or num_args > 4:
        print(
            'Expected input format: python inducec45.py <domainFile.xml> <TrainingSetFile.csv> [<restrictionsFile>]'
        )
        return
    # If they are read them in
    else:
        if check_file(sys.argv[1]) == -1 or check_file(sys.argv[2]) == -1:
            return -1

        domain = open(sys.argv[1], "r")

        #parse the rows directly to the db
        class_data = ClassificationData(sys.argv[2])
        class_data.parse_tuples()

        if num_args == 4:
            restriction = ClassificationData(sys.argv[3])
            restriction.parse_restr_tuples()

    document = xml.dom.minidom.Document()
    node = document.createElement('Tree')

    document.appendChild(node)

    d = Trainer(domain, class_data, document)

    partial_atts = d.attributes
    partial_atts.remove("Id")
    partial_atts.remove("Vote")
    if num_args == 4:
        d.rem_restrictions(restriction.restr)

    d.c45(d.data, d.attributes, node, 0)
    print(document.toprettyxml())
Example #4
0
def main():
    num_args = len(sys.argv)

    # Make sure the right number of input files are specified
    if  num_args < 3 or num_args > 4:
        print 'Expected input format: python classify.py <csvFile> <XMLFile>'
        return
    # If they are read them in
    else:
        if num_args == 4:
            restrictions = ClassificationData(sys.argv[3])
            restrictions.parse_restr_tuples();
 
        class_data = ClassificationData(sys.argv[1])
        class_data.parse_tuples()

        validator = Validator([])
        validator.train(sys.argv[2], class_data)
Example #5
0
def main():
    num_args = len(sys.argv)
    domain = training = restriction = ''

    # Make sure the right number of input files are specified
    if  num_args <= 2 or num_args > 4:
        print 'Expected input format: python inducec45.py <domainFile.xml> <TrainingSetFile.csv> [<restrictionsFile>]'
        return
    # If they are read them in
    else: 
        if check_file(sys.argv[1]) == -1 or check_file(sys.argv[2]) == -1:
            return -1
    
        domain = open(sys.argv[1], "r")
 
        #parse the rows directly to the db
        class_data = ClassificationData(sys.argv[2]);
        class_data.parse_tuples();

        if num_args == 4:
            restriction = ClassificationData(sys.argv[3])
            restriction.parse_restr_tuples();
   
    document = xml.dom.minidom.Document() 
    node = document.createElement('Tree')

    document.appendChild(node)

    d = Trainer(domain, class_data, document)
    if num_args == 4: 
        d.rem_restrictions(restriction.restr)

    partial_atts = d.attributes
    partial_atts.remove("Id")
    partial_atts.remove("Vote")

    d.c45(d.data, d.attributes, node, 0)
    print document.toprettyxml()