def __init__(self, training): self.training = training self.wnparents = trainer.WordnetParentsEngine(training) labels = ["Larger", "Smaller", "Equal", "None"] self.cls_variable = orange.EnumVariable("class", values=labels) alist = [] for var in self.wnparents.domain.attributes: if isinstance(var, orange.FloatVariable): v1 = orange.FloatVariable(name="%s_w1" % var.name) v2 = orange.FloatVariable(name="%s_w2" % var.name) alist.append(v1) alist.append(v2) elif isinstance(var, orange.EnumVariable): v1 = orange.EnumVariable(name="%s_w1" % var.name, values=var.values) v2 = orange.EnumVariable(name="%s_w2" % var.name, values=var.values) alist.append(v1) alist.append(v2) else: raise ValueError("Unhandled attribute: " + `var`) self.domain = orange.Domain(alist, self.cls_variable) self.training_table = self.makeTable(self.training)
def __call__(self, rule, examples, weights, targetClass): if not weights: weights = orange.newmetaid() examples.addMetaAttribute(weights, 1.) examples.domain.addmeta( weights, orange.FloatVariable("weights-" + str(weights)), True) try: coverage = examples.domain.getmeta("Coverage") except: coverage = orange.FloatVariable("Coverage") examples.domain.addmeta(orange.newmetaid(), coverage, True) examples.addMetaAttribute(coverage, 0.0) newWeightsID = orange.newmetaid() examples.addMetaAttribute(newWeightsID, 1.) examples.domain.addmeta( newWeightsID, orange.FloatVariable("weights-" + str(newWeightsID)), True) for example in examples: if rule(example) and example.getclass() == rule.classifier( example, orange.GetValue): try: example[coverage] += 1.0 except: example[coverage] = 1.0 example[newWeightsID] = 1.0 / (example[coverage] + 1) else: example[newWeightsID] = example[weights] return (examples, newWeightsID)
def sendList(self, selectedInd): if self.data and type(self.data[0]) == str: xAttr=orange.FloatVariable("X") yAttr=orange.FloatVariable("Y") nameAttr= orange.StringVariable("name") if self.selectionOptions == 1: domain = orange.Domain([xAttr, yAttr, nameAttr]) selection = orange.ExampleTable(domain) for i in range(len(selectedInd)): selection.append(list(self.mds.points[selectedInd[i]]) + [self.data[i]]) else: domain = orange.Domain([nameAttr]) if self.selectionOptions: domain.addmeta(orange.newmetaid(), xAttr) domain.addmeta(orange.newmetaid(), yAttr) selection = orange.ExampleTable(domain) for i in range(len(selectedInd)): selection.append([self.data[i]]) if self.selectionOptions: selection[i][xAttr]=self.mds.points[selectedInd[i]][0] selection[i][yAttr]=self.mds.points[selectedInd[i]][1] self.send("Data", selection) return if not selectedInd: self.send("Structured Data Files", None) else: datasets=[self.data[i] for i in selectedInd] names=list(set([d.dirname for d in datasets])) data=[(name, [d for d in filter(lambda a:a.strain==name, datasets)]) for name in names] self.send("Structured Data Files",data)
def to_network(self, terms=None): """ Return an Orange.network.Network instance constructed from this ontology. """ edge_types = self.edge_types() terms = self.terms() from Orange.orng import orngNetwork import orange network = orngNetwork.Network(len(terms), True, len(edge_types)) network.objects = dict([(term.id, i) for i, term in enumerate(terms)]) edges = defaultdict(set) for term in self.terms(): related = self.related_terms(term) for relType, relTerm in related: edges[(term.id, relTerm)].add(relType) edgeitems = edges.items() for (src, dst), eTypes in edgeitems: network[src, dst] = [1 if e in eTypes else 0 for e in edge_types] domain = orange.Domain([ orange.StringVariable("id"), orange.StringVariable("name"), orange.StringVariable("def"), ], False) items = orange.ExampleTable(domain) for term in terms: ex = orange.Example( domain, [term.id, term.name, term.values.get("def", [""])[0]]) items.append(ex) relationships = set( [", ".join(sorted(eTypes)) for (_, _), eTypes in edgeitems]) domain = orange.Domain([ orange.FloatVariable("u"), orange.FloatVariable("v"), orange.EnumVariable("relationship", values=list(edge_types)) ], False) id2index = dict([(term.id, i + 1) for i, term in enumerate(terms)]) links = orange.ExampleTable(domain) for (src, dst), eTypes in edgeitems: ex = orange.Example(domain, [id2index[src], id2index[dst], eTypes.pop()]) links.append(ex) network.items = items network.links = links network.optimization = None return network
def sendpredictions(self): if not self.data or not self.outvar: self.send("Predictions", None) return # predictions, data set with class predictions classification = self.outvar.varType == orange.VarTypes.Discrete metas = [] if classification: if len(self.selectedClasses): for c in self.predictors.values(): m = [orange.FloatVariable(name=str("%s(%s)" % (c.name, str(self.outvar.values[i]))), getValueFrom = lambda ex, rw, cindx=i, c=c: orange.Value(c(ex, c.GetProbabilities)[cindx])) \ for i in self.selectedClasses] metas.extend(m) if self.showClass: mc = [ orange.EnumVariable( name=str(c.name), values=self.outvar.values, getValueFrom=lambda ex, rw, c=c: orange.Value(c(ex))) for c in self.predictors.values() ] metas.extend(mc) else: # regression mc = [ orange.FloatVariable( name="%s" % c.name, getValueFrom=lambda ex, rw, c=c: orange.Value(c(ex))) for c in self.predictors.values() ] metas.extend(mc) classVar = self.outvar domain = orange.Domain(self.data.domain.attributes + [classVar]) domain.addmetas(self.data.domain.getmetas()) for m in metas: domain.addmeta(orange.newmetaid(), m) predictions = orange.ExampleTable(domain, self.data) if self.doPrediction: c = self.predictors.values()[0] for ex in predictions: ex[classVar] = c(ex) predictions.name = self.data.name self.send("Predictions", predictions) self.changedFlag = False
def expandToFuzzyExamples(self, examples, att, a, b): """ Function will return new 'fuzzy' example table. Every example from the input table will get two additional meta attributes ('fuzzy set' and 'u') \ based on 'a' and 'b' threshold (lower and higher) and attribute 'att'. Attribute 'fuzzy set' indicates name of the fuzzy set while atribute 'u' \ reflects example's degree of membership to particular fuzzy set. Note that input examples with values of 'att' lying on the (a,b) will be expanded \ into two fuzzy examples. """ mu = orange.FloatVariable("u") mv = orange.StringVariable("fuzzy set") examples.domain.addmeta(FUZZYMETAID, mu) examples.domain.addmeta(FUZZYMETAID - 1, mv) newexamples = [] for j in range(0, len(examples)): i = examples[j] v = float(i[att]) if v > a and v < b: # we have to expand this example newexamples.append(i) i["fuzzy set"] = 'yes' i["u"] = (v - a) / (b - a) examples.append(i) examples[-1]["fuzzy set"] = "no" examples[-1]["u"] = (b - v) / (b - a) else: if v > a: # u(yes) = 1.0 i["fuzzy set"] = 'yes' i["u"] = 1.0 else: # u(no) = 1.0 i["fuzzy set"] = 'no' i["u"] = 1.0 return examples
def __call__(self, weights=None): if not weights: weights = self.user_weights # New augmented table norm_data = orange.ExampleTable(self.data) newid = min(norm_data.domain.get_metas().keys(), 0) - 1 score_attr = orange.FloatVariable('score') norm_data.domain.add_meta(newid, score_attr) norm_data.add_meta_attribute(score_attr) # Normalize the attributes to the proper range for att, (lower_bound, upper_bound) in self.ranges.items(): for ex in norm_data: ex[att] = ex[att] / (upper_bound - lower_bound) # Normalize column-wise col_sum = {} for att in norm_data.domain.features: col_sum[att] = float(sum([ex[att] for ex in norm_data])) for ex in norm_data: for att in norm_data.domain.features: ex[att] = ex[att] / col_sum[att] # Use the inverse of an attr. value it should be minimized. inverse = lambda x, att: 1 - x if att in self.minimize else x for ex in norm_data: score = sum([ inverse(ex[att].value, att) * weights.get(att, 1) for att in self.ranges.keys() ]) ex['score'] = score return norm_data
def __call__(self, dataset): try: #retain class attribute attrDataset = dataset.select(self.domain) imputer = self.imputer(attrDataset) attrDataset = imputer(attrDataset) domain = self.continuizer(attrDataset) attrDataset = attrDataset.translate(domain) except TypeError as e: raise orange.KernelException("One or more attributes form training set are missing!") dataMatrix, classArray, x = attrDataset.toNumpy() dataMatrix -= self.center if self.deviation != None: dataMatrix *= 1./self.deviation #save transformed data self._dataMatrix = numpy.dot(dataMatrix, self.loadings) attributes = [orange.FloatVariable("PC%d" % (i + 1, )) for i in range(len(self.evalues))] new_domain = orange.Domain(attributes) new_table = orange.ExampleTable(new_domain, self._dataMatrix) if dataset.domain.classVar: #suboptimal classTable = dataset.select([dataset.domain.classVar.name]) self._classArray = numpy.array([row.getclass() for row in classTable]) new_table = orange.ExampleTable([new_table, classTable]) return new_table
def applySettings(self): """use the setting from the widget, identify the outliers""" if self.haveInput == 1: outlier = self.outlier outlier.setKNN(self.ks[self.k][1]) newdomain = orange.Domain(self.data.domain) newdomain.addmeta(orange.newmetaid(), orange.FloatVariable("Z score")) self.newdata = orange.ExampleTable(newdomain, self.data) zv = outlier.zValues() for i, el in enumerate(zv): self.newdata[i]["Z score"] = el self.send("Examples with Z-scores", self.newdata) filterout = orange.Filter_values(domain=self.newdata.domain) filterout["Z score"] = (orange.Filter_values.Greater, eval(self.zscore)) outliers = filterout(self.newdata) filterin = orange.Filter_values(domain=self.newdata.domain) filterin["Z score"] = (orange.Filter_values.LessEqual, eval(self.zscore)) inliers = filterin(self.newdata) self.send("Outliers", outliers) self.send("Inliers", inliers) else: self.send("Examples with Z-scores", None) self.send("Outliers", None) self.send("Inliers", None)
def etForAttribute(datal, a): """ Builds an example table for a single attribute across multiple example tables. """ tables = len(datal) def getAttrVals(data, attr): dom2 = orange.Domain([data.domain[attr]], False) dataa = orange.ExampleTable(dom2, data) return [a[0].native() for a in dataa] domainl = [] valuesl = [] for id, data in enumerate(datal): v = getAttrVals(data, a) valuesl.append(v) domainl.append(orange.FloatVariable(name=("v" + str(id)))) classvals = getAttrVals(data, datal[0].domain.classVar) valuesl += [classvals] dom = orange.Domain(domainl, datal[0].domain.classVar) examples = [list(a) for a in zip(*valuesl)] datat = orange.ExampleTable(dom, examples) return datat
def __makeExampleTable(namesDict, data): import orange from constants import CLASS_ATRR_NAME, CONTROL_GROUP_KEY, DATA_GROUP_KEY geneIDs = sorted(data.keys()) attrList = [orange.FloatVariable(name=str(geneID)) for geneID in geneIDs] classAttr = orange.EnumVariable(name=CLASS_ATRR_NAME, values=[CONTROL_GROUP_KEY, DATA_GROUP_KEY]) domain = orange.Domain(attrList, classAttr) table = orange.ExampleTable(domain) # first half: group 1 for attrName in namesDict[CONTROL_GROUP_KEY].keys(): exampleValues = [ data[geneID][CONTROL_GROUP_KEY][attrName] for geneID in geneIDs ] + [CONTROL_GROUP_KEY] example = orange.Example(domain, exampleValues) table.append(example) # second half: group 2 for attrName in namesDict[DATA_GROUP_KEY].keys(): exampleValues = [ data[geneID][DATA_GROUP_KEY][attrName] for geneID in geneIDs ] + [DATA_GROUP_KEY] example = orange.Example(domain, exampleValues) table.append(example) return table
def applySettings(self): if self.haveInput == 1: outlier = self.outlier outlier.setKNN(self.ks[self.k][1]) newdomain = orange.Domain(self.data.domain) newdomain.addmeta(orange.newmetaid(), orange.FloatVariable("Z score")) self.newdata = orange.ExampleTable(newdomain, self.data) zv = outlier.zValues() for i, el in enumerate(zv): self.newdata[i]["Z score"] = el self.send("Examples with Z-scores", self.newdata) filter = orange.Filter_values(domain=self.newdata.domain) filter["Z score"] = (orange.Filter_values.Greater, eval(self.zscore)) self.outliers = filter(self.newdata) self.send("Outliers", self.outliers) else: self.send("Examples with Z-scores", None) self.send("Outliers", None)
def getSMARTSrecalcDesc(data, smarts): """ Calculates structural descriptors for test and training data. In other words, checks for the substructure occurrence (0/1) in the test or prediction molecules. Uses RDK. Expects the test/prediction data and a list of SMARTS strings. Returns the data including the new features. """ smilesName = dataUtilities.getSMILESAttr(data) if not smilesName or type(smarts) != list or not len(smarts): print "Please check the input parameters" return None existingAttrs = [attr for attr in smarts if attr in data.domain] if existingAttrs: print "The input data cannot contain the smarts to be calculated!" return None newdomain = orange.Domain(data.domain.attributes + \ [orange.FloatVariable(attr, numberOfDecimals=1) for attr in smarts],\ data.domain.classVar ) newdata = orange.ExampleTable(newdomain, data) for ex in newdata: smile = str(ex[smilesName].value) mol = rdk.Chem.MolFromSmiles(smile) if mol is None: continue for smrt in smarts: patt = rdk.Chem.MolFromSmarts(smrt) if mol.HasSubstructMatch(patt): ex[smrt] = 1.0 else: ex[smrt] = 0.0 return newdata
def addMetaID(data): meta_id = orange.FloatVariable("meta_id") mid = orange.newmetaid() while mid in data.domain.getmetas().keys(): mid = orange.newmetaid() data.domain.addmeta(mid, meta_id) for i in range(len(data)): data[i][meta_id] = i
def __call__(self, rule, examples, weights, targetClass): if not weights: weights = orange.newmetaid() examples.addMetaAttribute(weights, 1.) examples.domain.addmeta( weights, orange.FloatVariable("weights-" + str(weights)), True) newWeightsID = orange.newmetaid() examples.addMetaAttribute(newWeightsID, 1.) examples.domain.addmeta( newWeightsID, orange.FloatVariable("weights-" + str(newWeightsID)), True) for example in examples: if rule(example) and example.getclass() == rule.classifier( example, orange.GetValue): example[newWeightsID] = example[weights] * self.mult else: example[newWeightsID] = example[weights] return (examples, newWeightsID)
def __make_rule_term_example_table(tableDict, allTerms): import orange import constants as const attrList = [ orange.EnumVariable(name=str(term), values=[const.PRESENT, const.ABSENT]) for term in allTerms ] # three meta attributes ruleName = orange.StringVariable(const.NAME_ATTR) mid = orange.newmetaid() ruleTerms = orange.StringVariable(const.TERMS_ATTR) mid1 = orange.newmetaid() #ruleNumber = orange.EnumVariable(SEQ_NUM_ATTR) #StringVariable(SEQ_NUM_ATTR) ruleNumber = orange.FloatVariable(const.SEQ_NUM_ATTR, startValue=1, endValue=len(tableDict), stepValue=1, numberOfDecimals=0) mid2 = orange.newmetaid() # this is a classless domain domain = orange.Domain(attrList, False) # name of the rule is a meta attribute domain.addmeta(mid, ruleName, False) domain.addmeta(mid1, ruleTerms, False) domain.addmeta(mid2, ruleNumber, False) table = orange.ExampleTable(domain) for k in sorted(tableDict.keys()): exampleValues = [] for (i, term) in enumerate(allTerms): if term in tableDict[k][const.RULETERMS_KEY]: #exampleValues.append(PRESENT) exampleValues.append(orange.Value(attrList[i], const.PRESENT)) else: #exampleValues.append(ABSENT) exampleValues.append(orange.Value(attrList[i], const.ABSENT)) example = orange.Example(domain, exampleValues) #example[NAME_ATTR] = tableDict[k][RULENAME_KEY][1:-1] #skip square brackets from the string #example[TERMS_ATTR] = tableDict[k][RULETERMS_STR_KEY][1:-1] #example[SEQ_NUM_ATTR] = k example[const.NAME_ATTR] = orange.Value(ruleName, tableDict[k][ const.RULENAME_KEY][1:-1]) #skip square brackets from the string example[const.TERMS_ATTR] = orange.Value( ruleTerms, tableDict[k][const.RULETERMS_STR_KEY][1:-1]) example[const.SEQ_NUM_ATTR] = orange.Value(ruleNumber, k) table.append(example) #end return table
def sendData(self): self.selectionDirty = False selected = [(x.row(), x.column()) for x in self.table.selectedIndexes()] res = self.res if not res or not selected or not self.selectedLearner: self.send("Selected Data", None) return learnerI = self.selectedLearner[0] data = None if hasattr(res, "examples") and isinstance(res.examples, orange.ExampleTable): selectionIndices = [ i for i, rese in enumerate(res.results) if (rese.actualClass, rese.classes[learnerI]) in selected ] data = res.examples.getitemsref(selectionIndices) if data is not None and (self.appendPredictions or self.appendProbabilities): domain = orange.Domain(data.domain.attributes, data.domain.classVar) domain.addmetas(data.domain.getmetas()) data = orange.ExampleTable(domain, data) if self.appendPredictions: cname = self.learnerNames[learnerI] predVar = type(domain.classVar)( "%s(%s)" % (domain.classVar.name, cname.encode("utf-8") if isinstance(cname, unicode) else cname)) if hasattr(domain.classVar, "values"): predVar.values = domain.classVar.values predictionsId = orange.newmetaid() domain.addmeta(predictionsId, predVar) for i, ex in zip(selectionIndices, data): ex[predictionsId] = res.results[i].classes[learnerI] if self.appendProbabilities: probVars = [ orange.FloatVariable("p(%s)" % v) for v in domain.classVar.values ] probIds = [orange.newmetaid() for pv in probVars] domain.addmetas(dict(zip(probIds, probVars))) for i, ex in zip(selectionIndices, data): for id, p in zip(probIds, res.results[i].probabilities[learnerI]): ex[id] = p if data is not None: data.name = self.learnerNames[learnerI] self.send("Selected Data", data)
def sendExampleTable(self, selectedInd): if self.selectionOptions==0: self.send("Data", orange.ExampleTable(self.data.getitems(selectedInd))) else: xAttr=orange.FloatVariable("X") yAttr=orange.FloatVariable("Y") if self.selectionOptions==1: domain=orange.Domain([xAttr, yAttr]+[v for v in self.data.domain.variables]) domain.addmetas(self.data.domain.getmetas()) else: domain=orange.Domain(self.data.domain) domain.addmeta(orange.newmetaid(), xAttr) domain.addmeta(orange.newmetaid(), yAttr) selection=orange.ExampleTable(domain) selection.extend(self.data.getitems(selectedInd)) for i in range(len(selectedInd)): selection[i][xAttr]=self.mds.points[selectedInd[i]][0] selection[i][yAttr]=self.mds.points[selectedInd[i]][1] self.send("Data", selection)
def initialize(self, examples, weightID, targetClass, apriori): self.bestRule = [None] * len(examples) self.probAttribute = orange.newmetaid() examples.addMetaAttribute(self.probAttribute, -1.e-6) examples.domain.addmeta(self.probAttribute, orange.FloatVariable("Probs")) for example in examples: ## if targetClass<0 or (example.getclass() == targetClass): example[self.probAttribute] = apriori[targetClass] / apriori.abs return examples
def PCAOnExampleTable(table, keepOriginal=1, nPCs=-1): data = table.toNumpyMA("a")[0] projData, vectors, values = pca(data, nPCs) newDomain = orange.Domain( [orange.FloatVariable("PC %d" % (d + 1)) for d in range(len(vectors))], 0) newTable = orange.ExampleTable(newDomain, projData.data) if keepOriginal: return orange.ExampleTable([table, newTable]) else: return newTable
class PCAClassifier(object): def __init__(self, domain, imputer, continuizer, center, deviation, evalues, loadings): #data checking and modifying self.domain = domain self.imputer = imputer self.continuizer = continuizer #PCA properites self.center = center self.deviation = deviation self.evalues = evalues self.loadings = loadings #last predicition performed -> used for biplot self._dataMatrix = None self._classArray = None def __call__(self, dataset): try: #retain class attribute attrDataset = dataset.select(self.domain) imputer = self.imputer(attrDataset) attrDataset = imputer(attrDataset) domain = self.continuizer(attrDataset) attrDataset = attrDataset.translate(domain) except TypeError, e: raise orange.KernelException, "One or more attributes form training set are missing!" dataMatrix, classArray, x = attrDataset.toNumpy() dataMatrix -= self.center if self.deviation != None: dataMatrix *= 1. / self.deviation #save transformed data self._dataMatrix = numpy.dot(dataMatrix, self.loadings) attributes = [ orange.FloatVariable("PC%d" % (i + 1, )) for i in range(len(self.evalues)) ] new_domain = orange.Domain(attributes) new_table = orange.ExampleTable(new_domain, self._dataMatrix) if dataset.domain.classVar: #suboptimal classTable = dataset.select([dataset.domain.classVar.name]) self._classArray = numpy.array( [row.getclass() for row in classTable]) new_table = orange.ExampleTable([new_table, classTable]) return new_table
def makeDomain(names): attributes = [orange.FloatVariable(n) for n in names] domain = orange.Domain( attributes, orange.EnumVariable("class", values=["True", "False"])) domain.addmeta(orange.newmetaid(), orange.FloatVariable("weight")) domain.addmeta(orange.newmetaid(), orange.EnumVariable("isInsane", values=["True", "False"])) domain.addmeta(orange.newmetaid(), orange.StringVariable("filename")) domain.addmeta(orange.newmetaid(), orange.StringVariable("sourceEngineName")) domain.addmeta(orange.newmetaid(), orange.StringVariable("engineName")) domain.addmeta(orange.newmetaid(), orange.StringVariable("landmarkName")) domain.addmeta(orange.newmetaid(), orange.PythonVariable("geometry")) domain.addmeta(orange.newmetaid(), orange.PythonVariable("track")) domain.addmeta(orange.newmetaid(), orange.PythonVariable("drawMap")) domain.addmeta(orange.newmetaid(), orange.PythonVariable("description")) domain.addmeta(orange.newmetaid(), orange.PythonVariable("farAway")) return domain
def get_domain_trans(self): #if(self.domain != None): # return self.domain attributes = [ orange.FloatVariable(name) for name in self.dataset.trans_alphabet ] alp = [str(i) for i in range(len(self.dataset.label_alphabet)**2)] classattr = orange.EnumVariable("classname", values=alp) domain = orange.Domain(attributes + [classattr]) return domain
def get_domain_obs(self): #if(self.domain != None): # return self.domain attributes = [ orange.FloatVariable(name) for name in self.dataset.obs_alphabet ] alp = [str(s) for s in self.dataset.label_alphabet] classattr = orange.EnumVariable("classname", values=alp) domain = orange.Domain(attributes + [classattr]) return domain
def learnModel(self, X, y): if numpy.unique(y).shape[0] != 2: raise ValueError("Can only operate on binary data") classes = numpy.unique(y) self.worstResponse = classes[classes != self.bestResponse][0] #We need to convert y into indices newY = self.labelsToInds(y) XY = numpy.c_[X, newY] attrList = [] for i in range(X.shape[1]): attrList.append(orange.FloatVariable("X" + str(i))) attrList.append(orange.EnumVariable("y")) attrList[-1].addValue(str(self.bestResponse)) attrList[-1].addValue(str(self.worstResponse)) self.domain = orange.Domain(attrList) eTable = orange.ExampleTable(self.domain, XY) #Weight examples preprocessor = orange.Preprocessor_addClassWeight(equalize=1) preprocessor.classWeights = [1 - self.weight, self.weight] eTable, weightID = preprocessor(eTable) eTable.domain.addmeta(weightID, orange.FloatVariable("w")) tree = orngTree.TreeLearner(mForPruning=self.m, measure="gainRatio", minExamples=self.minSplit, maxDepth=self.maxDepth).instance() self.learner = orngEnsemble.RandomForestLearner( learner=tree, trees=self.numTrees, attributes=numpy.round(X.shape[1] * self.featureSize)) self.classifier = self.learner(eTable, weightID)
def loadLibSVM(filename): data = [ line.split() for line in open(filename, "rb").read().splitlines() if line.strip() ] vars = type( "attr", (dict, ), { "__missing__": lambda self, key: self.setdefault(key, orange.FloatVariable(key)) })() item = lambda i, v: (vars[i], vars[i](v)) values = [dict([item(*val.split(":")) for val in ex[1:]]) for ex in data] classes = [ex[0] for ex in data] disc = all(["." not in c for c in classes]) attributes = sorted(list(vars.values()), key=lambda var: int(var.name)) classVar = orange.EnumVariable("class", values=sorted( set(classes))) if disc else orange.FloatVariable("target") domain = orange.Domain(attributes, classVar) return orange.ExampleTable([ orange.Example(domain, [ex.get(attr, attr("?")) for attr in attributes] + [c]) for ex, c in zip(values, classes) ])
def learnModel(self, X, y): if numpy.unique(y).shape[0] != 2: raise ValueError("Can only operate on binary data") classes = numpy.unique(y) self.worstResponse = classes[classes != self.bestResponse][0] #We need to convert y into indices newY = self.labelsToInds(y) XY = numpy.c_[X, newY] attrList = [] for i in range(X.shape[1]): attrList.append(orange.FloatVariable("X" + str(i))) attrList.append(orange.EnumVariable("y")) attrList[-1].addValue(str(self.bestResponse)) attrList[-1].addValue(str(self.worstResponse)) self.domain = orange.Domain(attrList) eTable = orange.ExampleTable(self.domain, XY) #Weight examples and equalise #Equalizing computes such weights that the weighted number of examples #in each class is equivalent. preprocessor = orange.Preprocessor_addClassWeight(equalize=1) preprocessor.classWeights = [1 - self.weight, self.weight] eTable, weightID = preprocessor(eTable) eTable.domain.addmeta(weightID, orange.FloatVariable("w")) self.learner = orngTree.TreeLearner(m_pruning=self.m, measure="gainRatio") self.learner.max_depth = self.maxDepth self.learner.stop = orange.TreeStopCriteria_common() self.learner.stop.min_instances = self.minSplit self.classifier = self.learner(eTable, weightID)
def __parseBBRCoutput(self, res): #Parse the results to an orange tab file if self.verbose: print "Parsing BBRC results. Please wait..." nCompounds = len(self.data) allDesc = [] allIDs = [] for line in res: allDesc.append(line.split("\t")[0].strip()) allIDs.append( [int(x) for x in line.split("\t")[1][1:-1].strip().split(" ")]) # Find the Descriptors that are required to be at the output file, but they are not among allDesc missingDesc = [] desAttr = [] selDesc = [x for x in allDesc] newDomainAttrs = [attr for attr in self.data.domain.attributes] + \ [orange.FloatVariable(name) for name in selDesc] newDomain = orange.Domain(newDomainAttrs, self.data.domain.classVar) if self.verbose: print "Original domain lenght: ", len(self.data.domain) print "New domain lenght : ", len(newDomain) print "\n0%" + " " * 98 + "100%" print "|" + "-" * 100 + "|" sys.stdout.write("|") sys.stdout.flush() newData = dataUtilities.DataTable(newDomain) for idx, ex in enumerate(self.data): newEx = orange.Example(newDomain, ex) if self.verbose: if nCompounds < 100: sys.stdout.write("=") elif idx % (int(nCompounds / 100)) == 0: sys.stdout.write("=") sys.stdout.flush() ID = idx + 1 # ID is the number of coumpound in self.data which is the number os the example (1 based!) for dIdx, d in enumerate(selDesc): if ID in allIDs[dIdx]: newEx[d] = 1.0 else: newEx[d] = 0.0 newData.append(newEx) if self.verbose: if nCompounds < 100: sys.stdout.write("=" * (100 - nCompounds + 1)) print "" return newData
def createLogRegExampleTable(data, weightID): finalData = orange.ExampleTable(data) origData = orange.ExampleTable(data) for at in data.domain.attributes: # za vsak atribut kreiraj nov newExampleTable newData # v dataOrig, dataFinal in newData dodaj nov atribut -- continuous variable if at.varType == orange.VarTypes.Continuous: atDisc = orange.FloatVariable(at.name + "Disc") newDomain = orange.Domain(origData.domain.attributes + [atDisc, data.domain.classVar]) newDomain.addmetas(newData.domain.getmetas()) finalData = orange.ExampleTable(newDomain, finalData) newData = orange.ExampleTable(newDomain, origData) origData = orange.ExampleTable(newDomain, origData) for d in origData: d[atDisc] = 0 for d in finalData: d[atDisc] = 0 for i, d in enumerate(newData): d[atDisc] = 1 d[at] = 0 d[weightID] = 100 * data[i][weightID] elif at.varType == orange.VarTypes.Discrete: # v dataOrig, dataFinal in newData atributu "at" dodaj ee eno vreednost, ki ima vrednost kar ime atributa + "X" atNew = orange.EnumVariable(at.name, values=at.values + [at.name + "X"]) newDomain = orange.Domain( filter(lambda x: x != at, origData.domain.attributes) + [atNew, origData.domain.classVar]) newDomain.addmetas(origData.domain.getmetas()) temp_finalData = orange.ExampleTable(finalData) finalData = orange.ExampleTable(newDomain, finalData) newData = orange.ExampleTable(newDomain, origData) temp_origData = orange.ExampleTable(origData) origData = orange.ExampleTable(newDomain, origData) for i, d in enumerate(origData): d[atNew] = temp_origData[i][at] for i, d in enumerate(finalData): d[atNew] = temp_finalData[i][at] for i, d in enumerate(newData): d[atNew] = at.name + "X" d[weightID] = 10 * data[i][weightID] finalData.extend(newData) return finalData
def test(): app = QApplication(sys.argv) w = OWHierarchicalClustering() w.show() data = orange.ExampleTable("../../doc/datasets/iris.tab") id = orange.newmetaid() data.domain.addmeta(id, orange.FloatVariable("a")) data.addMetaAttribute(id) matrix = orange.SymMatrix(len(data)) dist = orange.ExamplesDistanceConstructor_Euclidean(data) matrix = orange.SymMatrix(len(data)) matrix.setattr('items', data) for i in range(len(data)): for j in range(i + 1): matrix[i, j] = dist(data[i], data[j]) w.set_matrix(matrix) app.exec_()