def sendList(self, selectedInd): if self.data and type(self.data[0]) == str: xAttr=orange.FloatVariable("X") yAttr=orange.FloatVariable("Y") nameAttr= orange.StringVariable("name") if self.selectionOptions == 1: domain = orange.Domain([xAttr, yAttr, nameAttr]) selection = orange.ExampleTable(domain) for i in range(len(selectedInd)): selection.append(list(self.mds.points[selectedInd[i]]) + [self.data[i]]) else: domain = orange.Domain([nameAttr]) if self.selectionOptions: domain.addmeta(orange.newmetaid(), xAttr) domain.addmeta(orange.newmetaid(), yAttr) selection = orange.ExampleTable(domain) for i in range(len(selectedInd)): selection.append([self.data[i]]) if self.selectionOptions: selection[i][xAttr]=self.mds.points[selectedInd[i]][0] selection[i][yAttr]=self.mds.points[selectedInd[i]][1] self.send("Data", selection) return if not selectedInd: self.send("Structured Data Files", None) else: datasets=[self.data[i] for i in selectedInd] names=list(set([d.dirname for d in datasets])) data=[(name, [d for d in filter(lambda a:a.strain==name, datasets)]) for name in names] self.send("Structured Data Files",data)
def add_meta_id(data): meta_id = orange.FloatVariable("meta_id") mid = orange.newmetaid() while mid in data.domain.getmetas().keys(): mid = orange.newmetaid() data.domain.addmeta(mid, meta_id) for i in range(len(data)): data[i][meta_id] = i
def __make_rule_gene_example_table(tableDict, genes): import orange import constants as const # attributes are rules (all conjuncts of a rule form the name of the attribute) #attrList = [orange.EnumVariable(name=ruleString[1:-1].replace(' ', '_'), values=[PRESENT, ABSENT]) # for ruleString in tableDict.keys()] attrList = [orange.EnumVariable(name=str(gene), values=[const.PRESENT, const.ABSENT]) for gene in genes] # three meta attributes ruleName = orange.StringVariable(const.NAME_ATTR) mid = orange.newmetaid() ruleTerms = orange.StringVariable(const.TERMS_ATTR) mid1 = orange.newmetaid() #ruleNumber = orange.EnumVariable(SEQ_NUM_ATTR) #StringVariable(SEQ_NUM_ATTR) ruleNumber = orange.FloatVariable(const.SEQ_NUM_ATTR, startValue=1, endValue=len(tableDict), stepValue=1, numberOfDecimals=0) mid2 = orange.newmetaid() # this is a classless domain domain = orange.Domain(attrList, False) # name of the rule is a meta attribute domain.addmeta(mid, ruleName, False) domain.addmeta(mid1, ruleTerms, False) domain.addmeta(mid2, ruleNumber, False) table = orange.ExampleTable(domain) for k in sorted(tableDict.keys()): exampleValues = [] for (i,gene) in enumerate(genes): #if gene in tableDict[k][GENES_KEY]: if gene in tableDict[k][const.TOP_GENES_KEY]: #exampleValues.append(PRESENT) exampleValues.append(orange.Value(attrList[i], const.PRESENT)) else: exampleValues.append(orange.Value(attrList[i], const.ABSENT)) #exampleValues.append(ABSENT) example = orange.Example(domain, exampleValues) example[const.NAME_ATTR] = tableDict[k][const.RULENAME_KEY][1:-1] #skip square brackets from the string example[const.TERMS_ATTR] = tableDict[k][const.RULETERMS_STR_KEY][1:-1] example[const.SEQ_NUM_ATTR] = k example[const.NAME_ATTR] = orange.Value(ruleName, tableDict[k][const.RULENAME_KEY][1:-1]) #skip square brackets from the string example[const.TERMS_ATTR] = orange.Value(ruleTerms, tableDict[k][const.RULETERMS_STR_KEY][1:-1]) example[const.SEQ_NUM_ATTR] = orange.Value(ruleNumber, k) table.append(example) #end return table
def ruleSubsetSelection(self, beam, num_of_rules, data): SS = [] c = orange.newmetaid() data.addMetaAttribute(c) #initialize to 1 if num_of_rules <= len(beam): for i in range(num_of_rules): best_score = 0 best_rule_index = 0 for i in range(len(beam)): score = 0 for d in data: # calculate sum of weights of examples if beam[i].filter(d): score += 1.0/d.getweight(c) if score>best_score: best_score = score best_rule_index = i for d in data: # increase exampe counter if beam[best_rule_index].filter(d): d.setweight(c, d.getweight(c)+1) SS.append(beam[best_rule_index]) del beam[best_rule_index] data.removeMetaAttribute(c) else: return beam return SS
def applySettings(self): """use the setting from the widget, identify the outliers""" if self.haveInput == 1: outlier = self.outlier outlier.setKNN(self.ks[self.k][1]) newdomain = orange.Domain(self.data.domain) newdomain.addmeta(orange.newmetaid(), orange.FloatVariable("Z score")) self.newdata = orange.ExampleTable(newdomain, self.data) zv = outlier.zValues() for i, el in enumerate(zv): self.newdata[i]["Z score"] = el self.send("Data with z-score", self.newdata) filterout = orange.Filter_values(domain=self.newdata.domain) filterout["Z score"] = (orange.Filter_values.Greater, eval(self.zscore)) outliers = filterout(self.newdata) filterin = orange.Filter_values(domain=self.newdata.domain) filterin["Z score"] = (orange.Filter_values.LessEqual, eval(self.zscore)) inliers = filterin(self.newdata) self.send("Outliers", outliers) self.send("Inliers", inliers) else: self.send("Data with z-score", None) self.send("Outliers", None) self.send("Inliers", None)
def __make_rule_term_example_table(tableDict, allTerms): import orange import constants as const attrList = [orange.EnumVariable(name=str(term), values=[const.PRESENT, const.ABSENT]) for term in allTerms] # three meta attributes ruleName = orange.StringVariable(const.NAME_ATTR) mid = orange.newmetaid() ruleTerms = orange.StringVariable(const.TERMS_ATTR) mid1 = orange.newmetaid() #ruleNumber = orange.EnumVariable(SEQ_NUM_ATTR) #StringVariable(SEQ_NUM_ATTR) ruleNumber = orange.FloatVariable(const.SEQ_NUM_ATTR, startValue=1, endValue=len(tableDict), stepValue=1, numberOfDecimals=0) mid2 = orange.newmetaid() # this is a classless domain domain = orange.Domain(attrList, False) # name of the rule is a meta attribute domain.addmeta(mid, ruleName, False) domain.addmeta(mid1, ruleTerms, False) domain.addmeta(mid2, ruleNumber, False) table = orange.ExampleTable(domain) for k in sorted(tableDict.keys()): exampleValues = [] for (i,term) in enumerate(allTerms): if term in tableDict[k][const.RULETERMS_KEY]: #exampleValues.append(PRESENT) exampleValues.append(orange.Value(attrList[i], const.PRESENT)) else: #exampleValues.append(ABSENT) exampleValues.append(orange.Value(attrList[i], const.ABSENT)) example = orange.Example(domain, exampleValues) #example[NAME_ATTR] = tableDict[k][RULENAME_KEY][1:-1] #skip square brackets from the string #example[TERMS_ATTR] = tableDict[k][RULETERMS_STR_KEY][1:-1] #example[SEQ_NUM_ATTR] = k example[const.NAME_ATTR] = orange.Value(ruleName, tableDict[k][const.RULENAME_KEY][1:-1]) #skip square brackets from the string example[const.TERMS_ATTR] = orange.Value(ruleTerms, tableDict[k][const.RULETERMS_STR_KEY][1:-1]) example[const.SEQ_NUM_ATTR] = orange.Value(ruleNumber, k) table.append(example) #end return table
def __init__(self, k, rank_id = None, counter_id=None, num_of_rules=0, bdiscretize=True, **kwargs): self.k = k self.counter = counter_id or orange.newmetaid() self.rank_id = rank_id or orange.newmetaid() self.weightID = orange.newmetaid() self.attrs = kwargs.get('attrs', []) self.max_rules = num_of_rules self.rbf = kwargs.get('rbf', kwargs['beamfinder'](self.rank_id, **kwargs)) # self.rbf = BeamFinder(self.rank_id, width=width) # self.rbf.evaluator = RuleEvaluator_WRAccAdd() self.bdiscretize = bdiscretize
def add_class_noise(data, noise_level, rnd_seed): """adds class Noise :param data: Orange dataset :param noise_level: :param rnd_seed: :return: """ meta_noisy = orange.EnumVariable("noise", values=["no", "yes"]) mid = orange.newmetaid() while mid in data.domain.getmetas().keys(): mid = orange.newmetaid() data.domain.addmeta(mid, meta_noisy) data.addMetaAttribute("noise", "no") # Generate random indices for noise insertion percent = float(noise_level)/100 try: rnds = int(rnd_seed) except: rnds = 0 print "Random Seed:", rnds orange.setrandseed(rnds) noise_indices = random.sample(range(len(data)), int(round(percent*len(data)))) #print "Amount of added noise:", percent*100, "percent (", len(noise_indices), "examples ):" #print "Random indices for added noise:", noise_indices className = data.domain.classVar.name #print "Class name:", className for index in noise_indices: data[index]["noise"] = "yes" temp = data[index][className] ## if len(data.domain.classVar.values) > 2: # random value + check if it is diferent from the current one new_label = data.domain.classVar.randomvalue() while new_label == temp: new_label = data.domain.classVar.randomvalue() data[index][className] = new_label ## else: ## # switch the class value ## data[index][className] = data.domain.classVar.nextvalue(data[index][className]) #print "\t", temp, "changed to:", data[index].getclass(), "(", index, ")" #print "\n" noise_indices.sort() return noise_indices, data
def node_selection_changed(self): self.warning() if self.graph is None or self.graph.items() is None or self.graph_matrix is None: self.send("Model", None) self.send("Selected Models", None) return if self.graph.number_of_nodes() != self.graph_matrix.dim: self.warning('Network items and matrix results not of equal length.') self.send("Model", None) self.send("Selected Models", None) return selection = self.networkCanvas.selected_nodes() if len(selection) == 1: modelInstance = self.graph.items()[selection[0]] # modelInfo - Python Dict; keys: method, classifier, probabilities, # results, XAnchors, YAnchors, attributes modelInfo = self.graph_matrix.results[modelInstance['uuid'].value] #uuid = modelInstance["uuid"].value #method, vizr_result, projection_points, classifier, attrs = self.matrix.results[uuid] if 'YAnchors' in modelInfo and 'XAnchors' in modelInfo: if not modelInstance.domain.hasmeta('anchors'): modelInstance.domain.addmeta(orange.newmetaid(), orange.PythonVariable('anchors')) modelInstance['anchors'] = (modelInfo['XAnchors'], modelInfo['YAnchors']) if 'classifier' in modelInfo and modelInfo['classifier'] is not None: if not modelInstance.domain.hasmeta('classifier'): modelInstance.domain.addmeta(orange.newmetaid(), orange.PythonVariable('classifier')) modelInstance['classifier'] = modelInfo['classifier'] self.send('Classifier', modelInfo['classifier']) self.send('Model', modelInstance) self.send('Selected Models', self.graph.items().getitems(selection)) elif len(selection) > 1: self.send('Model', None) self.send('Selected Models', self.graph.items().getitems(selection)) else: self.send('Model', None) self.send('Selected Models', None)
def sendData(self, km=None): if km is None: km = self.bestRun[1] if self.optimized else self.km if not self.data or not km: self.send("Data", None) self.send("Centroids", None) return clustVar = orange.EnumVariable(self.classifyName, values=["C%d" % (x + 1) \ for x in range(km.k)]) origDomain = self.data.domain if self.addIdAs == 0: domain = orange.Domain(origDomain.attributes, clustVar) if origDomain.classVar: domain.addmeta(orange.newmetaid(), origDomain.classVar) aid = -1 elif self.addIdAs == 1: domain = orange.Domain(origDomain.attributes + [clustVar], origDomain.classVar) aid = len(origDomain.attributes) else: domain = orange.Domain(origDomain.attributes, origDomain.classVar) aid = orange.newmetaid() domain.addmeta(aid, clustVar) domain.addmetas(origDomain.getmetas()) # construct a new data set, with a class as assigned by # k-means clustering new = orange.ExampleTable(domain, self.data) for ex, midx in izip(new, km.clusters): ex[aid] = midx centroids = orange.ExampleTable(domain, km.centroids) for i, c in enumerate(centroids): c[aid] = i if origDomain.classVar: c[origDomain.classVar] = "?" self.send("Data", new) self.send("Centroids", centroids)
def sendData(self): self.selectionDirty = False selected = [(x.row(), x.column()) for x in self.table.selectedIndexes()] res = self.res if not res or not selected or not self.selectedLearner: self.send("Selected Data", None) return learnerI = self.selectedLearner[0] data = None if hasattr(res, "examples") and isinstance(res.examples, orange.ExampleTable): selectionIndices = [i for i, rese in enumerate(res.results) if (rese.actualClass, rese.classes[learnerI]) in selected] data = res.examples.getitemsref(selectionIndices) if data is not None and (self.appendPredictions or self.appendProbabilities): domain = orange.Domain(data.domain.attributes, data.domain.classVar) domain.addmetas(data.domain.getmetas()) data = orange.ExampleTable(domain, data) if self.appendPredictions: cname = self.learnerNames[learnerI] predVar = type(domain.classVar)("%s(%s)" % (domain.classVar.name, cname.encode("utf-8") if isinstance(cname, unicode) else cname)) if hasattr(domain.classVar, "values"): predVar.values = domain.classVar.values predictionsId = orange.newmetaid() domain.addmeta(predictionsId, predVar) for i, ex in zip(selectionIndices, data): ex[predictionsId] = res.results[i].classes[learnerI] if self.appendProbabilities: probVars = [orange.FloatVariable("p(%s)" % v) for v in domain.classVar.values] probIds = [orange.newmetaid() for pv in probVars] domain.addmetas(dict(zip(probIds, probVars))) for i, ex in zip(selectionIndices, data): for id, p in zip(probIds, res.results[i].probabilities[learnerI]): ex[id] = p if data is not None: data.name = self.learnerNames[learnerI] self.send("Selected Data", data)
def sendExampleTable(self, selectedInd): if self.selectionOptions==0: self.send("Data", orange.ExampleTable(self.data.getitems(selectedInd))) else: xAttr=orange.FloatVariable("X") yAttr=orange.FloatVariable("Y") if self.selectionOptions==1: domain=orange.Domain([xAttr, yAttr]+[v for v in self.data.domain.variables]) domain.addmetas(self.data.domain.getmetas()) else: domain=orange.Domain(self.data.domain) domain.addmeta(orange.newmetaid(), xAttr) domain.addmeta(orange.newmetaid(), yAttr) selection=orange.ExampleTable(domain) selection.extend(self.data.getitems(selectedInd)) for i in range(len(selectedInd)): selection[i][xAttr]=self.mds.points[selectedInd[i]][0] selection[i][yAttr]=self.mds.points[selectedInd[i]][1] self.send("Data", selection)
def sendFragmentTerms(self): matrix=[] for frag in self.fragments: vec=[] for term in self.terms.keys(): genes=self.terms[term][0] chem=filter(lambda a:self.fragmentMap[a][frag], self.fragmentMap.keys()) avgSens=0.0 for g in genes: for c in chem: avgSens+=self.sensDict[g][self.reverseSmilesDict[c]] avgSens/=len(genes)*len(chem) vec.append(avgSens) matrix.append(vec) vars=[orange.FloatVariable(term) for term in self.terms.keys()] mid=orange.newmetaid() domain=orange.Domain(vars,0) domain.addmeta(mid, orange.StringVariable("fragment")) table=orange.ExampleTable(domain) for frag, vec in zip(self.fragments, matrix): e=orange.Example(domain, vec) e[mid]=frag table.append(e) self.send("Slim-based fragment profiles", table) import Numeric matrix=Numeric.transpose(matrix) vars=[orange.FloatVariable(frag) for frag in self.fragments] mid=orange.newmetaid() mid1=orange.newmetaid() domain=orange.Domain(vars,0) domain.addmeta(mid, orange.StringVariable("term id")) domain.addmeta(mid1, orange.StringVariable("term name")) table=orange.ExampleTable(domain) for term_id, vec in zip(self.terms.keys(),matrix): e=orange.Example(domain, list(vec)) term = self.ontology[term_id] e[mid]=term_id e[mid1]=term.name table.append(e) self.send("Fragment-based slim profiles", table)
def sendMoleculeTerms(self): matrix=[] for chem in self.data.domain.variables[1:]: vec=[] for term in self.terms.keys(): genes=self.terms[term][0] avgSens=0.0 for g in genes: avgSens+=self.sensDict[g][chem] avgSens/=len(genes) vec.append(avgSens) matrix.append(vec) vars=[orange.FloatVariable(term) for term in self.terms.keys()] mid=orange.newmetaid() domain=orange.Domain(vars,0) domain.addmeta(mid, orange.StringVariable("chemical name")) table=orange.ExampleTable(domain) for chem, vec in zip(self.chemicals,matrix): e=orange.Example(domain, vec) e[mid]=chem table.append(e) self.send("Slim-based molecular profiles", table) import Numeric matrix=Numeric.transpose(matrix) vars=[orange.FloatVariable(chem) for chem in self.chemicals] mid=orange.newmetaid() mid1=orange.newmetaid() domain=orange.Domain(vars,0) domain.addmeta(mid, orange.StringVariable("term id")) domain.addmeta(mid1, orange.StringVariable("term name")) table=orange.ExampleTable(domain) for term_id, vec in zip(self.terms.keys(),matrix): e=orange.Example(domain, list(vec)) term = self.ontology[term_id] e[mid]=term_id e[mid1]=term.name table.append(e) self.send("Molecule-based slim profiles", table)
def sendFragments(self): vars = [orange.FloatVariable(frag) for frag in self.fragments] mid = orange.newmetaid() chemVar = orange.StringVariable("chemical name") domain = orange.Domain(vars,0) domain.addmeta(mid, chemVar) table = orange.ExampleTable(domain) for chem, map in self.fragmentMap.items(): val = [map[frag] for frag in self.fragments] e = orange.Example(domain, val) e[mid] = chem table.append(e) self.send("Molecule fragmetns", table)
def __call__(self, instances, origWeight=0): weight = orange.newmetaid() if origWeight: for i in instances: i.setweight(weight, i.getweight(origWeight)) else: instances.addMetaAttribute(weight, 1.0) n = len(instances) classifiers = [] for i in range(self.t): epsilon = 0.0 classifier = self.learner(instances, weight) corr = [] for ex in instances: if classifier(ex) != ex.getclass(): epsilon += ex.getweight(weight) corr.append(0) else: corr.append(1) epsilon = epsilon / float( reduce(lambda x, y: x + y.getweight(weight), instances, 0)) classifiers.append((classifier, epsilon and math.log( (1 - epsilon) / epsilon) or inf)) if epsilon == 0 or epsilon >= 0.499: if epsilon >= 0.499 and len(classifiers) > 1: del classifiers[-1] instances.removeMetaAttribute(weight) return BoostedClassifier( classifiers=classifiers, name=self.name, classvar=instances.domain.classVar) beta = epsilon / (1 - epsilon) for e in range(n): if corr[e]: instances[e].setweight( weight, instances[e].getweight(weight) * beta) f = 1 / float( reduce(add, [e.getweight(weight) for e in instances])) for e in range(n): instances[e].setweight(weight, instances[e].getweight(weight) * f) instances.removeMetaAttribute(weight) return BoostedClassifier( classifiers=classifiers, name=self.name, classvar=instances.domain.classVar)
def cforange_hierarchical_clustering_finished(postdata, input_dict, output_dict): print "cforange_hierarchical_clustering_finished" import json import Orange, orange matrix = input_dict['dm'] linkage = int(input_dict['linkage']) widget_pk = postdata['widget_id'][0] try: selected_nodes = json.loads(postdata.get('selected_nodes')[0]) except: raise Exception('Please select a threshold for determining clusters.') if isinstance(matrix.items, orange.ExampleTable): root = Clustering.hierarchical_clustering(linkage, matrix) cluster_ids = set([cluster for _,_,cluster in selected_nodes]) selected_clusters = set([cluster for _,selected,cluster in selected_nodes if selected]) clustVar = orange.EnumVariable(str('Cluster'), values=["Cluster %d" % i for i in cluster_ids] + ["Other"]) origDomain = matrix.items.domain domain = orange.Domain(origDomain.attributes, origDomain.classVar) domain.addmeta(orange.newmetaid(), clustVar) domain.addmetas(origDomain.getmetas()) # Build table with selected clusters selected_table, unselected_table = orange.ExampleTable(domain), orange.ExampleTable(domain) for id, selected, cluster in selected_nodes: new_ex = orange.Example(domain, matrix.items[id]) if selected: new_ex[clustVar] = clustVar("Cluster %d" % cluster) selected_table.append(new_ex) else: new_ex[clustVar] = clustVar("Other") unselected_table.append(new_ex) # Build table of centroids centroids = orange.ExampleTable(selected_table.domain) if len(selected_table) > 0: for cluster in sorted(selected_clusters): clusterEx = orange.ExampleTable([ex for ex in selected_table if ex[clustVar] == "Cluster %d" % cluster]) # Attribute statistics contstat = orange.DomainBasicAttrStat(clusterEx) discstat = orange.DomainDistributions(clusterEx, 0, 0, 1) ex = [cs.avg if cs else (ds.modus() if ds else "?") for cs, ds in zip(contstat, discstat)] example = orange.Example(centroids.domain, ex) example[clustVar] = clustVar("Cluster %d" % cluster) centroids.append(example) else: # Attribute distance centroids, selected_table, unselected_table = None, None, None return {'centroids' : centroids, 'selected_examples' : selected_table, 'unselected_examples' : unselected_table}
def __call__(self, data, weight=0): import orngLookup if self.alternativeMeasure: raise SystemError, "alternativeMeasure not implemented yet" keepDuplicates = getattr(self, "keepDuplicates", 0) data = orange.ExampleTable(data) if not weight: # This is here for backward compatibility if hasattr(self, "weight"): weight = self.weight else: weight = orange.newmetaid() data.addMetaAttribute(weight) if self.redundancyRemover: data = self.redundancyRemover(data, weight) if not keepDuplicates: data.removeDuplicates(weight) induced = 0 featureGenerator = FeatureGenerator(featureInducer=self.featureInducer, subsetsGenerator = self.subsetsGenerator) while(1): newFeatures = featureGenerator(data, weight) if not newFeatures or not len(newFeatures): break best = orngMisc.selectBest(newFeatures, orngMisc.compare2_lastBigger)[0] if len(best.getValueFrom.boundset()) == len(data.domain.attributes): break induced += 1 best.name = "c%d" % induced data = replaceWithInduced(best, data) if not keepDuplicates: data.removeDuplicates(weight) if self.learnerForUnknown: learnerForUnknown = self.learnerForUnknown else: learnerForUnknown = orange.BayesLearner() return orngLookup.lookupFromExamples(data, weight, learnerForUnknown)
def __loadDataFromES(self, dataType, domain): table = None if dataType != "train": table = orange.ExampleTable(domain) else: attributes = map(self.__getOrangeVariableForFeature, self.features) classAttribute = orange.EnumVariable("is_good", values = ["0", "1"]) domain = orange.Domain(attributes, classAttribute) domain.addmeta(orange.newmetaid(), orange.StringVariable("phrase")) table = orange.ExampleTable(domain) phrases = [] if dataType == "train": phrasesCount = self.esClient.count(index=self.processorIndex, doc_type=self.processorPhraseType, body={"query":{"terms":{"is_training":["1","0"]}}}) size = phrasesCount["count"] phrases = self.esClient.search(index=self.processorIndex, doc_type=self.processorPhraseType, body={"query":{"terms":{"is_training":["1","0"]}}}, size=size) phrases = phrases["hits"]["hits"] elif dataType == "holdout": phraseCount = self.esClient.count(index=self.processorIndex, doc_type=self.processorPhraseType, body={"query":{"terms":{"is_holdout":["1","0"]}}}) size = phrasesCount["count"] phrases = self.esClient.search(index=self.processorIndex, doc_type=self.processorPhraseType, body={"query":{"terms":{"is_holdout":["1","0"]}}}, size=size) phrases = phrases["hits"]["hits"] else: self.phraseData = self.esClient.get(index=self.processorIndex, doc_type=self.processorPhraseType, id=self.phraseId) phrases = [self.phraseData] for row in phrases: try: row = row["_source"] featureValues = [] classType = "?" for feature in self.features: featureValues.append(row["features"][feature["name"]].encode("ascii")) if dataType == "train": classType = row["is_training"].encode("ascii", "ignore") elif dataType == "holdout": classType = row["is_holdout"].encode("ascii") example = None for i,featureValue in enumerate(featureValues): attr = domain.attributes[i] if type(attr) is orange.EnumVariable: attr.addValue(featureValue) example = orange.Example(domain, (featureValues + [classType])) example[domain.getmetas().items()[0][0]] = row["phrase"].encode("ascii") table.append(example) except: self.logger.error("Error classifying phrase '" + row["phrase"] + "'") return table
def generateETStruct(path, medaData, numGenes=None): ddbList = Dicty.DAnnotation.getDDBList() if not os.path.exists(path): os.mkdir(path) medaData = Dicty.DData.DData_Nancy() for st in medaData.strains: pathSt = path + "\\" + st if not os.path.exists(pathSt): os.mkdir(pathSt) for rep in medaData.strain2replicaList(st): ma2d = medaData.getRaw2d(rep) et = Meda.Preproc.ma2orng(ma2d,Meda.Preproc.getTcDomain(ma2d.shape[1], False, [], None)) et.domain.addmeta(orange.newmetaid(), orange.StringVariable("DDB")) for eIdx,e in enumerate(et): e["DDB"] = ddbList[eIdx] if numGenes: orange.saveTabDelimited(pathSt + "\\" + rep + ".tab", orange.ExampleTable(et[:numGenes])) else: orange.saveTabDelimited(pathSt + "\\" + rep + ".tab", et)
def sendpredictions(self): if not self.data or not self.outvar: self.send("Predictions", None) return # predictions, data set with class predictions classification = self.outvar.varType == orange.VarTypes.Discrete metas = [] if classification: if len(self.selectedClasses): for c in self.predictors.values(): m = [orange.FloatVariable(name=str("%s(%s)" % (c.name, str(self.outvar.values[i]))), getValueFrom = lambda ex, rw, cindx=i, c=c: orange.Value(c(ex, c.GetProbabilities)[cindx])) \ for i in self.selectedClasses] metas.extend(m) if self.showClass: mc = [orange.EnumVariable(name=str(c.name), values = self.outvar.values, getValueFrom = lambda ex, rw, c=c: orange.Value(c(ex))) for c in self.predictors.values()] metas.extend(mc) else: # regression mc = [orange.FloatVariable(name="%s" % str(c.name), getValueFrom=lambda ex, rw, c=c: orange.Value(c(ex))) for c in self.predictors.values()] metas.extend(mc) classVar = self.outvar domain = orange.Domain(self.data.domain.attributes + [classVar]) domain.addmetas(self.data.domain.getmetas()) for m in metas: domain.addmeta(orange.newmetaid(), m) predictions = orange.ExampleTable(domain, self.data) if self.doPrediction: c = self.predictors.values()[0] for ex in predictions: ex[classVar] = c(ex) predictions.name = self.data.name self.send("Predictions", predictions) self.changedFlag = False
def convert_table(self, table_name, cls_att=None): ''' Returns the target table as an orange example table. ''' import orange cols = self.db.cols[table_name] attributes, metas, class_var = [], [], None for col in cols: att_type = self.orng_type(table_name,col) if att_type == 'd': att_vals = self.db.col_vals[table_name][col] att_var = orange.EnumVariable(str(col), values=[str(val) for val in att_vals]) elif att_type == 'c': att_var = orange.FloatVariable(str(col)) else: att_var = orange.StringVariable(str(col)) if col == cls_att: if att_type == 'string': raise Exception('Unsuitable data type for a target variable: %s' % att_type) class_var=att_var continue elif att_type == 'string' or table_name in self.db.pkeys and col in self.db.pkeys[table_name] or table_name in self.db.fkeys and col in self.db.fkeys[table_name]: metas.append(att_var) else: attributes.append(att_var) domain = orange.Domain(attributes, class_var) for meta in metas: domain.addmeta(orange.newmetaid(), meta) dataset = orange.ExampleTable(domain) dataset.name=table_name for row in self.db.rows(table_name, cols): example = orange.Example(domain) for col, val in zip(cols, row): example[str(col)] = str(val) if val!=None else '?' dataset.append(example) return dataset
# xtest: RANDOM import orange data1 = orange.ExampleTable("merge1") data2 = orange.ExampleTable("merge2", use = data1.domain) a1, a2 = data1.domain.attributes metas = data1.domain.getmetas() m1, m2 = data1.domain["m1"], data1.domain["m2"] m1i, m2i = data1.domain.metaid(m1), data1.domain.metaid(m2) a1, a3 = data2.domain.attributes n1 = orange.FloatVariable("n1") n2 = orange.FloatVariable("n2") newdomain = orange.Domain([a1, a3, m1, n1]) newdomain.addmeta(m2i, m2) newdomain.addmeta(orange.newmetaid(), a2) newdomain.addmeta(orange.newmetaid(), n2) merge = orange.Example(newdomain, [data1[0], data2[0]]) print "First example: ", data1[0] print "Second example: ", data2[0] print "Merge: ", merge
def getSelectionsAsExampleTables(self, attrList, useAnchorData=1, addProjectedPositions=0): return (None, None) # TODO: this is disabled for now if not self.have_data: return (None, None) selected = self.get_selected_indices() if addProjectedPositions == 0 and not numpy.any(selected): return (None, self.raw_data) if (useAnchorData and len(self.anchor_data) < 3) or len(attrList) < 3: return (None, None) x_attr = orange.FloatVariable("X Positions") y_attr = orange.FloatVariable("Y Positions") z_attr = orange.FloatVariable("Z Positions") if addProjectedPositions == 1: domain = orange.Domain([x_attr, y_attr, z_attr] + [v for v in self.data_domain.variables]) elif addProjectedPositions == 2: domain = orange.Domain(self.data_domain) domain.addmeta(orange.newmetaid(), x_attr) domain.addmeta(orange.newmetaid(), y_attr) domain.addmeta(orange.newmetaid(), z_attr) else: domain = orange.Domain(self.data_domain) domain.addmetas(self.data_domain.getmetas()) if useAnchorData: indices = [self.attribute_name_index[val[3]] for val in self.anchor_data] else: indices = [self.attribute_name_index[label] for label in attrList] valid_data = self.getValidList(indices) if len(valid_data) == 0: return (None, None) array = self.create_projection_as_numeric_array(attrList, scaleFactor=self.scaleFactor, useAnchorData=useAnchorData, removeMissingData=0) if array == None: return (None, None) unselected = numpy.logical_not(selected) selected_indices, unselected_indices = list(selected), list(unselected) if addProjectedPositions: selected = orange.ExampleTable(domain, self.raw_data.selectref(selected_indices)) unselected = orange.ExampleTable(domain, self.raw_data.selectref(unselected_indices)) selected_index = 0 unselected_index = 0 for i in range(len(selected_indices)): if selected_indices[i]: selected[selected_index][x_attr] = array[i][0] selected[selected_index][y_attr] = array[i][1] selected[selected_index][z_attr] = array[i][2] selected_index += 1 else: unselected[unselected_index][x_attr] = array[i][0] unselected[unselected_index][y_attr] = array[i][1] unselected[unselected_index][z_attr] = array[i][2] else: selected = self.raw_data.selectref(selected_indices) unselected = self.raw_data.selectref(unselected_indices) if len(selected) == 0: selected = None if len(unselected) == 0: unselected = None return (selected, unselected)
dcont = orange.DomainContingency(data) print "Computing information gain from DomainContingency" print fstr % (("- by attribute number:",) + tuple([meas(i, dcont) for i in range(attrs)])) print fstr % (("- by attribute name:",) + tuple([meas(i, dcont) for i in names])) print fstr % (("- by attribute descriptor:",) + tuple([meas(i, dcont) for i in data.domain.attributes])) print print "Computing information gain from DomainContingency" cdist = orange.Distribution(data.domain.classVar, data) print fstr % (("- by attribute number:",) + tuple([meas(orange.ContingencyAttrClass(i, data), cdist) for i in range(attrs)])) print fstr % (("- by attribute name:",) + tuple([meas(orange.ContingencyAttrClass(i, data), cdist) for i in names])) print fstr % (("- by attribute descriptor:",) + tuple([meas(orange.ContingencyAttrClass(i, data), cdist) for i in data.domain.attributes])) print values = ["v%i" % i for i in range(len(data.domain[2].values)*len(data.domain[3].values))] cartesian = orange.EnumVariable("cart", values = values) cartesian.getValueFrom = orange.ClassifierByLookupTable(cartesian, data.domain[2], data.domain[3], values) print "Information gain of Cartesian product of %s and %s: %6.4f" % (data.domain[2].name, data.domain[3].name, meas(cartesian, data)) mid = orange.newmetaid() data.domain.addmeta(mid, orange.EnumVariable(values = ["v0", "v1"])) data.addMetaAttribute(mid) rg = random.Random() rg.seed(0) for ex in data: ex[mid] = orange.Value(rg.randint(0, 1)) print "Information gain for a random meta attribute: %6.4f" % meas(mid, data)
# Category: basic classes, meta-attributes # Classes: Example # Uses: lenses # Referenced: Example.htm import orange, random data = orange.ExampleTable("lenses") random.seed(0) #id2 = orange.newmetaid() #w2 = orange.FloatVariable("ww") #The below two lines fail (and SHOULD fail): #data[0].setmeta(id, orange.Value(ww, 2.0)) #data[0].setmeta(id2, "2.0") ok_id = orange.newmetaid() ok = orange.EnumVariable("ok?", values=["no", "yes"]) data[0][ok_id] = orange.Value(ok, "yes") data.domain.addmeta(ok_id, ok) data[0][ok_id] = "yes" data[0][ok] = "no" data[0]["ok?"] = "no" no_yes = [orange.Value(ok, "no"), orange.Value(ok, "yes")] for example in data: example.setvalue(no_yes[random.randint(0, 1)]) print data[0][ok_id]
def commit(self): if not self.data or not self.scores: return test = self.score_methods[self.method_index][2] cutOffUpper = self.histogram.upperBoundary cutOffLower = self.histogram.lowerBoundary scores = np.array(self.scores.items()) scores[:, 1] = test(np.array(scores[:, 1], dtype=float), cutOffLower, cutOffUpper) selected = set([key for key, test in scores if test]) remaining = set([key for key, test in scores if not test]) if self.data and self.genes_in_columns: selected = sorted(selected) if selected: newdata = orange.ExampleTable( orange.Domain(self.data.domain), [self.data[int(i)] for i in selected], name=self.data.name ) else: newdata = None if self.add_scores_to_output: score_attr = orange.FloatVariable(self.score_methods[self.method_index][0]) mid = orange.newmetaid() if self.add_scores_to_output and newdata is not None: newdata.domain.addmeta(mid, score_attr) for ex, key in zip(newdata, selected): ex[mid] = self.scores[key] self.send("Example table with selected genes", newdata) remaining = sorted(remaining) if remaining: newdata = orange.ExampleTable( orange.Domain(self.data.domain), [self.data[int(i)] for i in remaining], name=self.data.name ) else: newdata = None if self.add_scores_to_output and newdata is not None: newdata.domain.addmeta(mid, score_attr) for ex, key in zip(newdata, remaining): ex[mid] = self.scores[key] self.send("Example table with remaining genes", newdata) elif self.data and not self.genes_in_columns: method_name = self.score_methods[self.method_index][0] selected_attrs = [attr for attr in self.data.domain.attributes if attr in selected or attr.varType == orange.VarTypes.String] # ?? why strings if self.add_scores_to_output: scores = [self.scores[attr] for attr in selected_attrs] attrs = [copy_descriptor(attr) for attr in selected_attrs] for attr, score in zip(attrs, scores): attr.attributes[method_name] = str(score) selected_attrs = attrs newdomain = orange.Domain(selected_attrs, self.data.domain.classVar) newdomain.addmetas(self.data.domain.getmetas()) newdata = orange.ExampleTable( newdomain, self.data, name=self.data.name ) self.send("Example table with selected genes", newdata if selected_attrs else None) remaining_attrs = [attr for attr in self.data.domain.attributes if attr in remaining] if self.add_scores_to_output: scores = [self.scores[attr] for attr in remaining_attrs] attrs = [copy_descriptor(attr) for attr in remaining_attrs] for attr, score in zip(attrs, scores): attr.attributes[method_name] = str(scores) remaining_attrs = attrs newdomain = orange.Domain(remaining_attrs, self.data.domain.classVar) newdomain.addmetas(self.data.domain.getmetas()) newdata = orange.ExampleTable( newdomain, self.data, name=self.data.name ) self.send("Example table with remaining genes", newdata if remaining_attrs else None) domain = orange.Domain([orange.StringVariable("label"), orange.FloatVariable(self.score_methods[self.method_index][0])], False) if selected_attrs: selected_genes = orange.ExampleTable(domain, [[attr.name, self.scores.get(attr, 0)] for attr in selected_attrs]) else: selected_genes = None self.send("Selected genes", selected_genes) else: self.send("Example table with selected genes", None) self.send("Example table with remaining genes", None) self.send("Selected genes", None) self.data_changed_flag = False
def __init__(self, minSupport = 0.05, minConfidence = 0.8, k=3): self.minSup = minSupport self.minConf = minConfidence self.weightID = orange.newmetaid() self.k = k
def __init__(self,k): self.k = k self.counter = orange.newmetaid() self.weightID = orange.newmetaid() self.rbf = orange.RuleBeamFinder() self.rbf.evaluator = RuleEvaluator_WRAcc()
def __call__(self, data, targetClass, num_of_rules ): self.alredyRefinedRules[str(targetClass)] = set() if self.dataOK(data): # Checks weather targetClass is discrete data_discretized = False # If any of the attributes are continuous, discretize them if data.domain.hasContinuousAttributes(): original_data = data data_discretized = True new_domain = [] discretize = orange.EntropyDiscretization(forceAttribute=True) for attribute in data.domain.attributes: if attribute.varType == orange.VarTypes.Continuous: d_attribute = discretize(attribute, data) # An attribute is irrelevant, if it is discretized into a single interval # if len(d_attribute.getValueFrom.transformer.points) > 0: new_domain.append(d_attribute) else: new_domain.append(attribute) data = original_data.select(new_domain + [original_data.domain.classVar]) self.data = data self.weigted_data = data self.c = orange.newmetaid() self.count = orange.newmetaid() self.weigted_data.addMetaAttribute(self.c) self.weigted_data.addMetaAttribute(self.count) #print self.c #print self.weigted_data.domain.attributes self.targetClass = targetClass #Initialize CanditatesList (all features) self.fillCandidatesList(data,targetClass) """ print "Candidates for refinement:\n" for rule in self.refinementCandidates: print "N: %d\t\tTP: %d\t\t\tFP: %d\t\tRule:\t%s" %(len(rule.TP)+len(rule.FP),len(rule.TP), len(rule.FP), rule.ruleToString()) print "\nCandidates for selection:\n" for rule in self.selectionCandidates: print "N: %d\t\tTP: %d\t\t\tFP: %d\t\tRule:\t%s" %(len(rule.TP)+len(rule.FP),len(rule.TP), len(rule.FP), rule.ruleToString()) """ """ print self.refinementCandidates[0].ruleToString() print "Best refinement: P %d\tN %d\tp %d\tn %d\tRQ %.3f" %(self.refinementCandidates[0].P,self.refinementCandidates[0].N,len(self.refinementCandidates[0].TP),len(self.refinementCandidates[0].FP), self.refinementCandidates[0].refinement_quality) print "\n\n" """ #Initialize RefinementBeam, consisting of refinementBeamWidth empty rules self.initializeRefinementBeam() #Initialize SelectionBeam, consisting of selectionBeamWidth empty rules self.initializeSelectionBeam() #update RefinementBeam self.updateRefinementBeam(self.refinementCandidates) #update SelectionBeam #self.chooseSelectionCandidates(self.RefinementBeam) """ print self.selectionCandidates[0].ruleToString() print "Best selection: P %d\tN %d\tp %d\tn %d\tSQ %.3f" %(self.selectionCandidates[0].P,self.selectionCandidates[0].N,len(self.selectionCandidates[0].TP),len(self.selectionCandidates[0].FP), self.selectionCandidates[0].selection_quality) print "\n\n" """ #print "Before updatation" self.updateSelectionBeam(self.selectionCandidates) #print "After update" #self.printBeam(self.refinementCandidates, name="Refinement candidates") #self.printBeam(self.RefinementBeam, name="Refinement beam") #self.printBeam(self.refinementCandidates, name="Refinement candidates") #self.printBeam(self.SelectionBeam, name="Selection beam") improvements = True refinement_improvements = True ms=2 max_steps=5 # and i<max_steps and refinement_improvements # improvements and i<max_steps and refinement_improvements: #while i<max_steps: while ms <= max_steps: #print "pocnuva rafiniranjeto, dolzina %d" %i self.refinedRefinementBeam(targetClass) #self.printBeam(self.refinementCandidates,"Refinement candidates") refinement_improvements = self.updateRefinementBeam(self.refinementCandidates) #self.printBeam(self.RefinementBeam, name="Refinement beam") #unionOfBeams = []; unionOfBeams.extend(self.RefinementBeam); unionOfBeams.extend(self.SelectionBeam) #self.chooseSelectionCandidates(unionOfBeams) #self.printBeam(self.selectionCandidates, name="Selection candidates") #print "Pred update" improvements = self.updateSelectionBeam(self.selectionCandidates) #print "Posle update" #m(self.SelectionBeam, "Selection beam") ms=ms+1 beam = self.SelectionBeam #self.printBeam(beam, "Final selection beam.") if num_of_rules != 0: beam = self.ruleSubsetSelection(beam, num_of_rules, data) #self.printBeam(beam, "Posle SS") self.SelectionBeam = beam if data_discretized: targetClassRule = SDRule(original_data, targetClass, conditions=[], g=self.g) #targetClassRule = SDRule(original_data, targetClass, conditions=[], g=1, refinement_heuristics=self.refinement_heuristics, selection_heuristics=self.selection_heuristics) # change beam so the rules apply to original data #self.printBeam(self.SelectionBeam, "Pred diskretizacija") self.SelectionBeam = [rule.getUndiscretized(original_data) for rule in self.SelectionBeam] #self.printBeam(self.SelectionBeam, "Posle diskretizacija") else: targetClassRule = SDRule(data, targetClass, conditions=[], g =self.g) #targetClassRule = SDRule(data, targetClass, conditions=[], g =1, refinement_heuristics=self.refinement_heuristics, selection_heuristics=self.selection_heuristics) #print "Ready to return" #self.printBeam(self.SelectionBeam, "Ova se vrakja") rules = SDRules(self.SelectionBeam, targetClassRule, "SD-inverted") #rules.printRules() #print "*"*100 return rules