def main(): num_args = len(sys.argv) # Make sure the right number of input files are specified if num_args < 2 or num_args > 3: print 'Expected input format: python classify.py <csvFile> <XMLFile>' return # If they are read them in else: decision_file = open(sys.argv[2], "r") csv_reader = ClassificationData(sys.argv[1])#csv.reader(open(sys.argv[1], 'r')) csv_reader.parse_tuples() classifier = Classifier() print csv_reader.category if len(csv_reader.category) > 0: classifier.has_category = True tree = xml.dom.minidom.parse(decision_file) root = tree.documentElement for row in csv_reader.tuples: result = classifier.classify(root, row, csv_reader.attributes) classifier.print_stats()
def main(): num_args = len(sys.argv) # Make sure the right number of input files are specified if num_args < 3 or num_args > 4: print 'Expected input format: python classify.py <csvFile> <XMLFile>' return # If they are read them in else: if num_args == 4: restrictions = ClassificationData(sys.argv[3]) restrictions.parse_restr_tuples() class_data = ClassificationData(sys.argv[1]) class_data.parse_tuples() validator = Validator([]) validator.train(sys.argv[2], class_data)
def main(): num_args = len(sys.argv) domain = restriction = '' # Make sure the right number of input files are specified if num_args <= 2 or num_args > 4: print( 'Expected input format: python inducec45.py <domainFile.xml> <TrainingSetFile.csv> [<restrictionsFile>]' ) return # If they are read them in else: if check_file(sys.argv[1]) == -1 or check_file(sys.argv[2]) == -1: return -1 domain = open(sys.argv[1], "r") #parse the rows directly to the db class_data = ClassificationData(sys.argv[2]) class_data.parse_tuples() if num_args == 4: restriction = ClassificationData(sys.argv[3]) restriction.parse_restr_tuples() document = xml.dom.minidom.Document() node = document.createElement('Tree') document.appendChild(node) d = Trainer(domain, class_data, document) partial_atts = d.attributes partial_atts.remove("Id") partial_atts.remove("Vote") if num_args == 4: d.rem_restrictions(restriction.restr) d.c45(d.data, d.attributes, node, 0) print(document.toprettyxml())
def main(): num_args = len(sys.argv) # Make sure the right number of input files are specified if num_args < 3 or num_args > 4: print 'Expected input format: python classify.py <csvFile> <XMLFile>' return # If they are read them in else: if num_args == 4: restrictions = ClassificationData(sys.argv[3]) restrictions.parse_restr_tuples(); class_data = ClassificationData(sys.argv[1]) class_data.parse_tuples() validator = Validator([]) validator.train(sys.argv[2], class_data)
def main(): num_args = len(sys.argv) domain = training = restriction = '' # Make sure the right number of input files are specified if num_args <= 2 or num_args > 4: print 'Expected input format: python inducec45.py <domainFile.xml> <TrainingSetFile.csv> [<restrictionsFile>]' return # If they are read them in else: if check_file(sys.argv[1]) == -1 or check_file(sys.argv[2]) == -1: return -1 domain = open(sys.argv[1], "r") #parse the rows directly to the db class_data = ClassificationData(sys.argv[2]); class_data.parse_tuples(); if num_args == 4: restriction = ClassificationData(sys.argv[3]) restriction.parse_restr_tuples(); document = xml.dom.minidom.Document() node = document.createElement('Tree') document.appendChild(node) d = Trainer(domain, class_data, document) if num_args == 4: d.rem_restrictions(restriction.restr) partial_atts = d.attributes partial_atts.remove("Id") partial_atts.remove("Vote") d.c45(d.data, d.attributes, node, 0) print document.toprettyxml()