def __init__(self, kind_string): """テーブルのコンストラクタ。""" print "[table]テーブルのコンストラクタの起動" self._kind = kind_string self._attributes = attributes.Attributes(self._kind) self._images = [] self._smallImages = [] self._tuples = [] return
def __init__(self, kind_string): """テーブルのコンストラクタ。""" self._kind_string = kind_string self._attributes = attributes.Attributes(kind_string) self._images = [] self._thumbnails = [] self._tuples = [] return
def __init__(self, classifier, training_data, attribute_set): self.classifier = classifier self.training_data = training_data self.attribute_set = attributes.Attributes( False, sorted(attribute_set, key=lambda attribute: attribute.name)) self.root = None self.build_decision_tree() return
dest='attributes_file', required=True) parser.add_argument('--train', type=argparse.FileType('r'), help='Name of the file to use for training', dest='training_file', required=True) parser.add_argument('--test', type=argparse.FileType('r'), dest='testing_file', help='Name of the file to use for testing') args = parser.parse_args() # Read in a complete list of attributes. # global all_attributes all_attributes = attributes.Attributes(args.attributes_file) if args.classifier not in all_attributes.all_names(): sys.stderr.write("Classifier '%s' not a recognized attribute name\n" % args.classifier) sys.exit(1) classifier = all_attributes[args.classifier] # Import the d-tree module, removing the .py extension if found if args.dtree_module.endswith('.py') and len(args.dtree_module) > 3: dtree_pkg = __import__(args.dtree_module[:-3]) else: dtree_pkg = __import__(args.dtree_module) # Train training_data = dataset.DataSet(args.training_file, all_attributes) starting_attrs = copy.copy(all_attributes)
def mainscript(): # parse argument ap = argparse.ArgumentParser( description='Handle Missing Values in Dataset') ap.add_argument( '--datafile', '-d', # type = argparse.FileType('r'), help='Name of the data file', dest='datafile', required=True) ap.add_argument( '--testfile', '-t', # type=argparse.FileType('r'), help='Name of the test file', dest='testfile') ap.add_argument( '--attributes', '-a', # type=argparse.FileType('r'), help='Name of the attribute specification file', dest='attributes_file', required=True) # ap.add_argument('--intermediate', '-inter', # help='Name of attribute intermeiate output file', # dest='inter' # ) ap.add_argument('--output', '-oa', type=argparse.FileType('w'), help='Name of attribute output file', dest='att_outfile', default=sys.stdout) ap.add_argument('--output2', '-od', type=argparse.FileType('w'), help='Name of training set output file', dest='train_outfile', default=sys.stdout) ap.add_argument('--output3', '-ot', type=argparse.FileType('w'), help='Name of test set output file', dest='test_outfile', default=sys.stdout) args = ap.parse_args([ '--datafile', './dataset/src/adult.dat', '--attributes', './dataset/src/adult_attrib.txt', '--testfile', './dataset/src/adult_test.dat', '-inter', './dataset/preprocessed/adult_attrib.txt', '-oa', './dataset/processed/adult_attrib.txt', '-od', './dataset/processed/adult.csv', '-ot', './dataset/processed/adult_test.csv' ]) # create training datatable preprocess_file(args.datafile, args.datafile + '_preprocessed') datatable = create_data_table(args.datafile + '_preprocessed') # create testing datatable if args.testfile: preprocess_file(args.testfile, args.testfile + '_preprocessed') testtable = create_data_table(args.datafile + '_preprocessed') # create attribute file preprocess_file(args.attributes_file, args.attributes_file + '_preprocessed') file = open(args.attributes_file + '_preprocessed', 'r') all_attributes = attributes.Attributes(file) file.close() mod_attributes = modify_attributes(datatable, all_attributes) if testtable: mod_attributes = modify_attributes(testtable, mod_attributes) # output attibutes file output_attribute(mod_attributes, args.att_outfile) # output datatable output_datatable(datatable, args.train_outfile) output_datatable(datatable, args.test_outfile)
def parse_attributes(character_info): attribute_info = dict() for key, value in character_info["Attributes"].items(): attribute_info[key.lower().replace(' ', '_')] = int(value) return attributes.Attributes(**attribute_info)