Esempio n. 1
0
 def __init__(self, kind_string):
     """テーブルのコンストラクタ。"""
     print "[table]テーブルのコンストラクタの起動"
     self._kind = kind_string
     self._attributes = attributes.Attributes(self._kind)
     self._images = []
     self._smallImages = []
     self._tuples = []
     return
Esempio n. 2
0
    def __init__(self, kind_string):
        """テーブルのコンストラクタ。"""
        self._kind_string = kind_string
        self._attributes = attributes.Attributes(kind_string)
        self._images = []
        self._thumbnails = []
        self._tuples = []

        return
Esempio n. 3
0
    def __init__(self, classifier, training_data, attribute_set):
        self.classifier = classifier
        self.training_data = training_data
        self.attribute_set = attributes.Attributes(
            False, sorted(attribute_set, key=lambda attribute: attribute.name))
        self.root = None

        self.build_decision_tree()
        return
Esempio n. 4
0
                    dest='attributes_file',
                    required=True)
parser.add_argument('--train',
                    type=argparse.FileType('r'),
                    help='Name of the file to use for training',
                    dest='training_file',
                    required=True)
parser.add_argument('--test',
                    type=argparse.FileType('r'),
                    dest='testing_file',
                    help='Name of the file to use for testing')
args = parser.parse_args()

# Read in a complete list of attributes.
# global all_attributes
all_attributes = attributes.Attributes(args.attributes_file)
if args.classifier not in all_attributes.all_names():
  sys.stderr.write("Classifier '%s' not a recognized attribute name\n" %
                   args.classifier)
  sys.exit(1)
classifier = all_attributes[args.classifier]

# Import the d-tree module, removing the .py extension if found
if args.dtree_module.endswith('.py') and len(args.dtree_module) > 3:
  dtree_pkg = __import__(args.dtree_module[:-3])
else:
  dtree_pkg = __import__(args.dtree_module)

# Train
training_data = dataset.DataSet(args.training_file, all_attributes)
starting_attrs = copy.copy(all_attributes)
Esempio n. 5
0
def mainscript():
    # parse argument
    ap = argparse.ArgumentParser(
        description='Handle Missing Values in Dataset')
    ap.add_argument(
        '--datafile',
        '-d',
        # type = argparse.FileType('r'),
        help='Name of the data file',
        dest='datafile',
        required=True)
    ap.add_argument(
        '--testfile',
        '-t',
        # type=argparse.FileType('r'),
        help='Name of the test file',
        dest='testfile')
    ap.add_argument(
        '--attributes',
        '-a',
        # type=argparse.FileType('r'),
        help='Name of the attribute specification file',
        dest='attributes_file',
        required=True)
    # ap.add_argument('--intermediate', '-inter',
    #                 help='Name of attribute intermeiate output file',
    #                 dest='inter'
    #                 )
    ap.add_argument('--output',
                    '-oa',
                    type=argparse.FileType('w'),
                    help='Name of attribute output file',
                    dest='att_outfile',
                    default=sys.stdout)
    ap.add_argument('--output2',
                    '-od',
                    type=argparse.FileType('w'),
                    help='Name of training set output file',
                    dest='train_outfile',
                    default=sys.stdout)
    ap.add_argument('--output3',
                    '-ot',
                    type=argparse.FileType('w'),
                    help='Name of test set output file',
                    dest='test_outfile',
                    default=sys.stdout)
    args = ap.parse_args([
        '--datafile', './dataset/src/adult.dat', '--attributes',
        './dataset/src/adult_attrib.txt', '--testfile',
        './dataset/src/adult_test.dat', '-inter',
        './dataset/preprocessed/adult_attrib.txt', '-oa',
        './dataset/processed/adult_attrib.txt', '-od',
        './dataset/processed/adult.csv', '-ot',
        './dataset/processed/adult_test.csv'
    ])

    # create training datatable
    preprocess_file(args.datafile, args.datafile + '_preprocessed')
    datatable = create_data_table(args.datafile + '_preprocessed')

    # create testing datatable
    if args.testfile:
        preprocess_file(args.testfile, args.testfile + '_preprocessed')
        testtable = create_data_table(args.datafile + '_preprocessed')

    # create attribute file
    preprocess_file(args.attributes_file,
                    args.attributes_file + '_preprocessed')
    file = open(args.attributes_file + '_preprocessed', 'r')
    all_attributes = attributes.Attributes(file)
    file.close()

    mod_attributes = modify_attributes(datatable, all_attributes)
    if testtable:
        mod_attributes = modify_attributes(testtable, mod_attributes)
    # output attibutes file
    output_attribute(mod_attributes, args.att_outfile)
    # output datatable
    output_datatable(datatable, args.train_outfile)
    output_datatable(datatable, args.test_outfile)
Esempio n. 6
0
def parse_attributes(character_info):
    attribute_info = dict()
    for key, value in character_info["Attributes"].items():
        attribute_info[key.lower().replace(' ', '_')] = int(value)
    return attributes.Attributes(**attribute_info)