def construct(self, data = None, objectClass = None): """ construct() creates data structures -- attributes or examples -- as indicated by the obj variable. Returns an AttributeSet or ExampleSet data structure. data: resource required to construct object. objectClass: indicator for the type of object class to use. Signals construction of ExampleSet. """ if data is None: raise ValueError("No data specified.") elif objectClass is None: a = AttributeSet() for line in data: a.add(Attribute(line.split('\t'))) return a elif objectClass is not None: e = ExampleSet() for line in data: e.add(Example(line, objectClass)) return e else: raise ValueError("Object type needs to be indicated as either 'Attribute' (0) or 'Example' (1).")
def build(self, data = None, attributeSet = None): if attributeSet is None: # build an attribute set. aa = AttributeSet() for line in data: if line[0] == '@': aa.add(Attribute(line[1:].split('\t'))) return aa else: # build an example set. ee = ExampleSet() for line in data: if type(line) == type(str()): if line[0] == '#': ee.add(Example(line[1:], attributeSet)) elif type(line) == type(ExampleSet()): ee.add(line) return ee
def __init__(self, filePath=None): self.name = None self.attributes = None self.examples = ExampleSet() self.iteration_index = 0 if filePath is not None: self.initialize(filePath)
def getTrainValidateTestSet(self, p = .6, v = .5): examples = self.getExamples() n = int(len(examples) * p) m = int(len(examples) * ((1. - p)*v)) s = sample(examples, n) train = ExampleSet() valid = ExampleSet() tests = ExampleSet() for example in examples: if example in s: train.add(example) elif example not in train and m != 0: valid.add(example); m-=1 elif example not in valid: tests.add(example) print "train: {0} valid: {1} tests: {2} all: {3}".format(len(train), len(valid), len(tests), len(self.getExamples())) return train, valid, tests
def build(self, data=None, attributeSet=None): """ return an AttributeSet or ExampleSet object @param data: input data; raw (textual) attributes or examples @param attributeSet: AttributeSet object required to create ExampleSet objects @return AttributeSet or ExampleSet objects """ # Build an AttributeSet object from raw (text) attributes. if attributeSet is None: attributeSet = AttributeSet() for line in data: # If the line is prefixed with '@', create an Attribute object and add it to the AttributeSet if line[0] == '@': attributeSet.add(Attribute(line[1:].split('\t'))) return attributeSet # Build an ExampleSet object from raw (text) examples and an AttributeSet. else: exampleSet = ExampleSet() # Loop through the data split by newline for line in data: # If the line is a string, check it is an example (prefixed by '#') if type(line) == type(str()): # If the line is an example, create an Example object and add it to the ExampleSet if line[0] == '#': exampleSet.add(Example(line[1:], attributeSet)) # Commented out for the time being 7/13/2016 #else: # exampleSet.add(line) return exampleSet
def build(self, data = None, attributeSet = None): """ return an AttributeSet or ExampleSet object @param data: input data; raw (textual) attributes or examples @param attributeSet: AttributeSet object required to create ExampleSet objects @return AttributeSet or ExampleSet objects """ # Build an AttributeSet object from raw (text) attributes. if attributeSet is None: attributeSet = AttributeSet() for line in data: # If the line is prefixed with '@', create an Attribute object and add it to the AttributeSet if line[0] == '@': attributeSet.add(Attribute(line[1:].split('\t'))) return attributeSet # Build an ExampleSet object from raw (text) examples and an AttributeSet. else: exampleSet = ExampleSet() # Loop through the data split by newline for line in data: # If the line is a string, check it is an example (prefixed by '#') if type(line) == type(str()): # If the line is an example, create an Example object and add it to the ExampleSet if line[0] == '#': exampleSet.add(Example(line[1:], attributeSet)) # Commented out for the time being 7/13/2016 #else: # exampleSet.add(line) return exampleSet
def getTrainTestSet(self, p = .6): examples = self.getExamples() n = int(len(examples) * p) s = sample(examples, n) train = ExampleSet() tests = ExampleSet() for example in examples: if example in s: train.add(example) elif example not in train: tests.add(example) return train, tests
def getExamplesByClass(self, i = None): """ return examples with label i """ return ExampleSet(self.examples.getExamples(i))