Beispiel #1
0
 def sendList(self, selectedInd):
     if self.data and type(self.data[0]) == str:
         xAttr=orange.FloatVariable("X")
         yAttr=orange.FloatVariable("Y")
         nameAttr=  orange.StringVariable("name")
         if self.selectionOptions == 1:
             domain = orange.Domain([xAttr, yAttr, nameAttr])
             selection = orange.ExampleTable(domain)
             for i in range(len(selectedInd)):
                 selection.append(list(self.mds.points[selectedInd[i]]) + [self.data[i]])
         else:
             domain = orange.Domain([nameAttr])
             if self.selectionOptions:
                 domain.addmeta(orange.newmetaid(), xAttr)
                 domain.addmeta(orange.newmetaid(), yAttr)
             selection = orange.ExampleTable(domain)
             for i in range(len(selectedInd)):
                 selection.append([self.data[i]])
                 if self.selectionOptions:
                     selection[i][xAttr]=self.mds.points[selectedInd[i]][0]
                     selection[i][yAttr]=self.mds.points[selectedInd[i]][1]
         self.send("Data", selection)
         return
            
     if not selectedInd:
         self.send("Structured Data Files", None)
     else:
         datasets=[self.data[i] for i in selectedInd]
         names=list(set([d.dirname for d in datasets]))
         data=[(name, [d for d in filter(lambda a:a.strain==name, datasets)]) for name in names]
         self.send("Structured Data Files",data)
def add_meta_id(data):
    meta_id = orange.FloatVariable("meta_id")
    mid = orange.newmetaid()
    while mid in data.domain.getmetas().keys():
        mid = orange.newmetaid()
    data.domain.addmeta(mid, meta_id)
    for i in range(len(data)):
        data[i][meta_id] = i
Beispiel #3
0
def __make_rule_gene_example_table(tableDict, genes):
    import orange
    import constants as const
    # attributes are rules (all conjuncts of a rule form the name of the attribute)
    #attrList = [orange.EnumVariable(name=ruleString[1:-1].replace(' ', '_'), values=[PRESENT, ABSENT])
    #            for ruleString in tableDict.keys()]

    attrList = [orange.EnumVariable(name=str(gene), values=[const.PRESENT, const.ABSENT]) for gene in genes]

    # three meta attributes
    ruleName = orange.StringVariable(const.NAME_ATTR)
    mid = orange.newmetaid()
    ruleTerms = orange.StringVariable(const.TERMS_ATTR)
    mid1 = orange.newmetaid()
    #ruleNumber = orange.EnumVariable(SEQ_NUM_ATTR) #StringVariable(SEQ_NUM_ATTR)
    ruleNumber = orange.FloatVariable(const.SEQ_NUM_ATTR, startValue=1, endValue=len(tableDict), stepValue=1, numberOfDecimals=0)
    mid2 = orange.newmetaid()


    # this is a classless domain
    domain = orange.Domain(attrList, False)

    # name of the rule is a meta attribute
    domain.addmeta(mid, ruleName, False)
    domain.addmeta(mid1, ruleTerms, False)
    domain.addmeta(mid2, ruleNumber, False)

    table = orange.ExampleTable(domain)

    for k in sorted(tableDict.keys()):
        exampleValues = []
        for (i,gene) in enumerate(genes):
            #if gene in tableDict[k][GENES_KEY]:
            if gene in tableDict[k][const.TOP_GENES_KEY]:
                #exampleValues.append(PRESENT)
                exampleValues.append(orange.Value(attrList[i], const.PRESENT))
            else:
                exampleValues.append(orange.Value(attrList[i], const.ABSENT))
                #exampleValues.append(ABSENT)
        example = orange.Example(domain, exampleValues)
        example[const.NAME_ATTR] = tableDict[k][const.RULENAME_KEY][1:-1]    #skip square brackets from the string
        example[const.TERMS_ATTR] = tableDict[k][const.RULETERMS_STR_KEY][1:-1]
        example[const.SEQ_NUM_ATTR] = k

        example[const.NAME_ATTR] = orange.Value(ruleName, tableDict[k][const.RULENAME_KEY][1:-1])    #skip square brackets from the string
        example[const.TERMS_ATTR] = orange.Value(ruleTerms, tableDict[k][const.RULETERMS_STR_KEY][1:-1])
        example[const.SEQ_NUM_ATTR] = orange.Value(ruleNumber, k)

        table.append(example)
    #end
    return table
Beispiel #4
0
    def ruleSubsetSelection(self, beam, num_of_rules, data):
        SS = []
        c = orange.newmetaid()
        data.addMetaAttribute(c)   #initialize to 1
        if num_of_rules <= len(beam):
            for i in range(num_of_rules):
                best_score = 0
                best_rule_index = 0
                for i in range(len(beam)):
                    score = 0
                    for d in data:          # calculate sum of weights of examples
                        if beam[i].filter(d):
                            score += 1.0/d.getweight(c)
                    if score>best_score:
                        best_score = score
                        best_rule_index = i
                for d in data:              # increase exampe counter
                    if beam[best_rule_index].filter(d):
                        d.setweight(c, d.getweight(c)+1)
                SS.append(beam[best_rule_index])
                del beam[best_rule_index]
            data.removeMetaAttribute(c)

        else:
            return beam
        return SS
    def applySettings(self):              
        """use the setting from the widget, identify the outliers"""
        if self.haveInput == 1:
            outlier = self.outlier
            outlier.setKNN(self.ks[self.k][1])
       
            newdomain = orange.Domain(self.data.domain)
            newdomain.addmeta(orange.newmetaid(), orange.FloatVariable("Z score"))
            
            self.newdata = orange.ExampleTable(newdomain, self.data)

            zv = outlier.zValues()
            for i, el in enumerate(zv):
                self.newdata[i]["Z score"] = el            

            self.send("Data with z-score", self.newdata)
            
            filterout = orange.Filter_values(domain=self.newdata.domain)
            filterout["Z score"] = (orange.Filter_values.Greater, eval(self.zscore))
            outliers = filterout(self.newdata)

            filterin = orange.Filter_values(domain=self.newdata.domain)
            filterin["Z score"] = (orange.Filter_values.LessEqual, eval(self.zscore))
            inliers = filterin(self.newdata)
            
            self.send("Outliers", outliers)
            self.send("Inliers", inliers)
        else:
            self.send("Data with z-score", None)
            self.send("Outliers", None)
            self.send("Inliers", None)
Beispiel #6
0
def __make_rule_term_example_table(tableDict, allTerms):
    import orange
    import constants as const

    attrList = [orange.EnumVariable(name=str(term), values=[const.PRESENT, const.ABSENT]) for term in allTerms]

    # three meta attributes
    ruleName = orange.StringVariable(const.NAME_ATTR)
    mid = orange.newmetaid()
    ruleTerms = orange.StringVariable(const.TERMS_ATTR)
    mid1 = orange.newmetaid()
    #ruleNumber = orange.EnumVariable(SEQ_NUM_ATTR) #StringVariable(SEQ_NUM_ATTR)
    ruleNumber = orange.FloatVariable(const.SEQ_NUM_ATTR, startValue=1, endValue=len(tableDict), stepValue=1, numberOfDecimals=0)
    mid2 = orange.newmetaid()


    # this is a classless domain
    domain = orange.Domain(attrList, False)

    # name of the rule is a meta attribute
    domain.addmeta(mid, ruleName, False)
    domain.addmeta(mid1, ruleTerms, False)
    domain.addmeta(mid2, ruleNumber, False)

    table = orange.ExampleTable(domain)

    for k in sorted(tableDict.keys()):
        exampleValues = []
        for (i,term) in enumerate(allTerms):
            if term in tableDict[k][const.RULETERMS_KEY]:
                #exampleValues.append(PRESENT)
                exampleValues.append(orange.Value(attrList[i], const.PRESENT))
            else:
                #exampleValues.append(ABSENT)
                exampleValues.append(orange.Value(attrList[i], const.ABSENT))
        example = orange.Example(domain, exampleValues)
        #example[NAME_ATTR] = tableDict[k][RULENAME_KEY][1:-1]    #skip square brackets from the string
        #example[TERMS_ATTR] = tableDict[k][RULETERMS_STR_KEY][1:-1]
        #example[SEQ_NUM_ATTR] = k

        example[const.NAME_ATTR] = orange.Value(ruleName, tableDict[k][const.RULENAME_KEY][1:-1])    #skip square brackets from the string
        example[const.TERMS_ATTR] = orange.Value(ruleTerms, tableDict[k][const.RULETERMS_STR_KEY][1:-1])
        example[const.SEQ_NUM_ATTR] = orange.Value(ruleNumber, k)

        table.append(example)
    #end
    return table
Beispiel #7
0
 def __init__(self,
              k,
              rank_id = None,
              counter_id=None,
              num_of_rules=0,                 
              bdiscretize=True,
              **kwargs):
     self.k = k
     self.counter = counter_id or orange.newmetaid()
     self.rank_id = rank_id or orange.newmetaid()
     self.weightID = orange.newmetaid()
     self.attrs = kwargs.get('attrs', [])
     self.max_rules = num_of_rules
     self.rbf = kwargs.get('rbf', kwargs['beamfinder'](self.rank_id, **kwargs))
     
     # self.rbf = BeamFinder(self.rank_id, width=width)
     # self.rbf.evaluator = RuleEvaluator_WRAccAdd()
     self.bdiscretize = bdiscretize
def add_class_noise(data, noise_level, rnd_seed):
    """adds class Noise

    :param data: Orange dataset
    :param noise_level:
    :param rnd_seed:
    :return:
    """

    meta_noisy = orange.EnumVariable("noise", values=["no", "yes"])
    mid = orange.newmetaid()
    while mid in data.domain.getmetas().keys():
        mid = orange.newmetaid()
    data.domain.addmeta(mid, meta_noisy)
    data.addMetaAttribute("noise", "no")
    # Generate random indices for noise insertion
    percent = float(noise_level)/100
    try:
        rnds = int(rnd_seed)
    except:
        rnds = 0
    print "Random Seed:", rnds
    orange.setrandseed(rnds)
    noise_indices = random.sample(range(len(data)), int(round(percent*len(data))))
    #print "Amount of added noise:", percent*100, "percent (", len(noise_indices), "examples ):"
    #print "Random indices for added noise:", noise_indices
    className = data.domain.classVar.name
    #print "Class name:", className
    for index in noise_indices:
        data[index]["noise"] = "yes"
        temp = data[index][className]
##        if len(data.domain.classVar.values) > 2:
        # random value + check if it is diferent from the current one
        new_label = data.domain.classVar.randomvalue()
        while new_label == temp:
            new_label = data.domain.classVar.randomvalue()
        data[index][className] = new_label
##        else:
##            # switch the class value
##            data[index][className] = data.domain.classVar.nextvalue(data[index][className])
        #print "\t", temp, "changed to:", data[index].getclass(), "(", index, ")"
    #print "\n"
    noise_indices.sort()
    return noise_indices, data
 def node_selection_changed(self):
     self.warning()
     
     if self.graph is None or self.graph.items() is None or self.graph_matrix is None:
         self.send("Model", None)
         self.send("Selected Models", None)
         return
     
     if self.graph.number_of_nodes() != self.graph_matrix.dim:
         self.warning('Network items and matrix results not of equal length.')
         self.send("Model", None)
         self.send("Selected Models", None)
         return
                 
     selection = self.networkCanvas.selected_nodes()
     
     if len(selection) == 1:
         modelInstance = self.graph.items()[selection[0]]
         # modelInfo - Python Dict; keys: method, classifier, probabilities,
         # results, XAnchors, YAnchors, attributes
         modelInfo = self.graph_matrix.results[modelInstance['uuid'].value]
         
         #uuid = modelInstance["uuid"].value
         #method, vizr_result, projection_points, classifier, attrs = self.matrix.results[uuid]
         
         if 'YAnchors' in modelInfo and 'XAnchors' in modelInfo:
             if not modelInstance.domain.hasmeta('anchors'):
                 modelInstance.domain.addmeta(orange.newmetaid(), orange.PythonVariable('anchors'))
             modelInstance['anchors'] = (modelInfo['XAnchors'], modelInfo['YAnchors'])
             
         if 'classifier' in modelInfo and modelInfo['classifier'] is not None:
             if not modelInstance.domain.hasmeta('classifier'):
                 modelInstance.domain.addmeta(orange.newmetaid(), orange.PythonVariable('classifier'))
             modelInstance['classifier'] = modelInfo['classifier']
             self.send('Classifier', modelInfo['classifier'])
             
         self.send('Model', modelInstance)
         self.send('Selected Models', self.graph.items().getitems(selection))
     elif len(selection) > 1: 
         self.send('Model', None)
         self.send('Selected Models', self.graph.items().getitems(selection))
     else:
         self.send('Model', None)
         self.send('Selected Models', None)
Beispiel #10
0
    def sendData(self, km=None):
        if km is None:
            km = self.bestRun[1] if self.optimized else self.km
        if not self.data or not km:
            self.send("Data", None)
            self.send("Centroids", None)
            return

        clustVar = orange.EnumVariable(self.classifyName,
                                       values=["C%d" % (x + 1) \
                                               for x in range(km.k)])

        origDomain = self.data.domain
        if self.addIdAs == 0:
            domain = orange.Domain(origDomain.attributes, clustVar)
            if origDomain.classVar:
                domain.addmeta(orange.newmetaid(), origDomain.classVar)
            aid = -1
        elif self.addIdAs == 1:
            domain = orange.Domain(origDomain.attributes + [clustVar],
                                   origDomain.classVar)
            aid = len(origDomain.attributes)
        else:
            domain = orange.Domain(origDomain.attributes,
                                   origDomain.classVar)
            aid = orange.newmetaid()
            domain.addmeta(aid, clustVar)

        domain.addmetas(origDomain.getmetas())

        # construct a new data set, with a class as assigned by
        # k-means clustering
        new = orange.ExampleTable(domain, self.data)
        for ex, midx in izip(new, km.clusters):
            ex[aid] = midx

        centroids = orange.ExampleTable(domain, km.centroids)
        for i, c in enumerate(centroids):
            c[aid] = i
            if origDomain.classVar:
                c[origDomain.classVar] = "?"

        self.send("Data", new)
        self.send("Centroids", centroids)
Beispiel #11
0
    def sendData(self):
        self.selectionDirty = False

        selected = [(x.row(), x.column()) for x in self.table.selectedIndexes()]
        res = self.res
        if not res or not selected or not self.selectedLearner:
            self.send("Selected Data", None)
            return

        learnerI = self.selectedLearner[0]
        
        data = None
        if hasattr(res, "examples") and isinstance(res.examples, orange.ExampleTable):
            selectionIndices = [i for i, rese in enumerate(res.results) if (rese.actualClass, rese.classes[learnerI]) in selected]
            data = res.examples.getitemsref(selectionIndices)
        
        if data is not None and (self.appendPredictions or self.appendProbabilities):
            domain = orange.Domain(data.domain.attributes, data.domain.classVar)
            domain.addmetas(data.domain.getmetas())
            data = orange.ExampleTable(domain, data)
        
            if self.appendPredictions:
                cname = self.learnerNames[learnerI]
                predVar = type(domain.classVar)("%s(%s)" % (domain.classVar.name, cname.encode("utf-8") if isinstance(cname, unicode) else cname))
                if hasattr(domain.classVar, "values"):
                    predVar.values = domain.classVar.values
                predictionsId = orange.newmetaid()
                domain.addmeta(predictionsId, predVar)
                for i, ex in zip(selectionIndices, data):
                    ex[predictionsId] = res.results[i].classes[learnerI]
                    
            if self.appendProbabilities:
                probVars = [orange.FloatVariable("p(%s)" % v) for v in domain.classVar.values]
                probIds = [orange.newmetaid() for pv in probVars]
                domain.addmetas(dict(zip(probIds, probVars)))
                for i, ex in zip(selectionIndices, data):
                    for id, p in zip(probIds, res.results[i].probabilities[learnerI]):
                        ex[id] = p

        if data is not None:
            data.name = self.learnerNames[learnerI]

        self.send("Selected Data", data)
Beispiel #12
0
 def sendExampleTable(self, selectedInd):
     if self.selectionOptions==0:
         self.send("Data", orange.ExampleTable(self.data.getitems(selectedInd)))
     else:
         xAttr=orange.FloatVariable("X")
         yAttr=orange.FloatVariable("Y")
         if self.selectionOptions==1:
             domain=orange.Domain([xAttr, yAttr]+[v for v in self.data.domain.variables])
             domain.addmetas(self.data.domain.getmetas())
         else:
             domain=orange.Domain(self.data.domain)
             domain.addmeta(orange.newmetaid(), xAttr)
             domain.addmeta(orange.newmetaid(), yAttr)
         selection=orange.ExampleTable(domain)
         selection.extend(self.data.getitems(selectedInd))
         for i in range(len(selectedInd)):
             selection[i][xAttr]=self.mds.points[selectedInd[i]][0]
             selection[i][yAttr]=self.mds.points[selectedInd[i]][1]
         self.send("Data", selection)
 def sendFragmentTerms(self):
     matrix=[]
     for frag in self.fragments:
         vec=[]
         for term in self.terms.keys():
             genes=self.terms[term][0]
             chem=filter(lambda a:self.fragmentMap[a][frag], self.fragmentMap.keys())
             avgSens=0.0
             for g in genes:
                 for c in chem:
                     avgSens+=self.sensDict[g][self.reverseSmilesDict[c]]
             avgSens/=len(genes)*len(chem)
             vec.append(avgSens)
         matrix.append(vec)
         
     vars=[orange.FloatVariable(term) for term in self.terms.keys()]
     mid=orange.newmetaid()
     domain=orange.Domain(vars,0)
     domain.addmeta(mid, orange.StringVariable("fragment"))
     table=orange.ExampleTable(domain)
     for frag, vec in zip(self.fragments, matrix):
         e=orange.Example(domain, vec)
         e[mid]=frag
         table.append(e)
     self.send("Slim-based fragment profiles", table)
     
     import Numeric
     matrix=Numeric.transpose(matrix)
     vars=[orange.FloatVariable(frag) for frag in self.fragments]
     mid=orange.newmetaid()
     mid1=orange.newmetaid()
     domain=orange.Domain(vars,0)
     domain.addmeta(mid, orange.StringVariable("term id"))
     domain.addmeta(mid1, orange.StringVariable("term name"))
     table=orange.ExampleTable(domain)
     for term_id, vec in zip(self.terms.keys(),matrix):
         e=orange.Example(domain, list(vec))
         term = self.ontology[term_id]
         e[mid]=term_id
         e[mid1]=term.name
         table.append(e)
     self.send("Fragment-based slim profiles", table)
 def sendMoleculeTerms(self):
     matrix=[]
     for chem in self.data.domain.variables[1:]:
         vec=[]
         for term in self.terms.keys():
             genes=self.terms[term][0]
             avgSens=0.0
             for g in genes:
                 avgSens+=self.sensDict[g][chem]
             avgSens/=len(genes)
             vec.append(avgSens)
         matrix.append(vec)
         
     vars=[orange.FloatVariable(term) for term in self.terms.keys()]
     mid=orange.newmetaid()
     domain=orange.Domain(vars,0)
     domain.addmeta(mid, orange.StringVariable("chemical name"))
     table=orange.ExampleTable(domain)
     for chem, vec in zip(self.chemicals,matrix):
         e=orange.Example(domain, vec)
         e[mid]=chem
         table.append(e)
     self.send("Slim-based molecular profiles", table)
     
     import Numeric
     matrix=Numeric.transpose(matrix)
     vars=[orange.FloatVariable(chem) for chem in self.chemicals]
     mid=orange.newmetaid()
     mid1=orange.newmetaid()
     domain=orange.Domain(vars,0)
     domain.addmeta(mid, orange.StringVariable("term id"))
     domain.addmeta(mid1, orange.StringVariable("term name"))
     table=orange.ExampleTable(domain)
     for term_id, vec in zip(self.terms.keys(),matrix):
         e=orange.Example(domain, list(vec))
         term = self.ontology[term_id]
         e[mid]=term_id
         e[mid1]=term.name
         table.append(e)
     self.send("Molecule-based slim profiles", table)
 def sendFragments(self):
     vars = [orange.FloatVariable(frag) for frag in self.fragments]
     mid = orange.newmetaid()
     chemVar = orange.StringVariable("chemical name")
     domain = orange.Domain(vars,0)
     domain.addmeta(mid, chemVar)
     table = orange.ExampleTable(domain)
     for chem, map in self.fragmentMap.items():
         val = [map[frag] for frag in self.fragments]
         e = orange.Example(domain, val)
         e[mid] = chem
         table.append(e)
     self.send("Molecule fragmetns", table)
    def __call__(self, instances, origWeight=0):
        weight = orange.newmetaid()
        if origWeight:
            for i in instances:
                i.setweight(weight, i.getweight(origWeight))
        else:
            instances.addMetaAttribute(weight, 1.0)

        n = len(instances)
        classifiers = []
        for i in range(self.t):
            epsilon = 0.0
            classifier = self.learner(instances, weight)
            corr = []
            for ex in instances:
                if classifier(ex) != ex.getclass():
                    epsilon += ex.getweight(weight)
                    corr.append(0)
                else:
                    corr.append(1)
            epsilon = epsilon / float(
                reduce(lambda x, y: x + y.getweight(weight), instances, 0))
            classifiers.append((classifier, epsilon and math.log(
                (1 - epsilon) / epsilon) or inf))
            if epsilon == 0 or epsilon >= 0.499:
                if epsilon >= 0.499 and len(classifiers) > 1:
                    del classifiers[-1]
                instances.removeMetaAttribute(weight)
                return BoostedClassifier(
                    classifiers=classifiers,
                    name=self.name,
                    classvar=instances.domain.classVar)
            beta = epsilon / (1 - epsilon)
            for e in range(n):
                if corr[e]:
                    instances[e].setweight(
                        weight, instances[e].getweight(weight) * beta)
            f = 1 / float(
                reduce(add, [e.getweight(weight) for e in instances]))
            for e in range(n):
                instances[e].setweight(weight,
                                       instances[e].getweight(weight) * f)

        instances.removeMetaAttribute(weight)
        return BoostedClassifier(
            classifiers=classifiers,
            name=self.name,
            classvar=instances.domain.classVar)
Beispiel #17
0
def cforange_hierarchical_clustering_finished(postdata, input_dict, output_dict):
    print "cforange_hierarchical_clustering_finished"
    import json
    import Orange, orange
    
    matrix = input_dict['dm']
    linkage = int(input_dict['linkage'])
    widget_pk = postdata['widget_id'][0]

    try:
        selected_nodes = json.loads(postdata.get('selected_nodes')[0])
    except:
        raise Exception('Please select a threshold for determining clusters.')
    if isinstance(matrix.items, orange.ExampleTable):
        root = Clustering.hierarchical_clustering(linkage, matrix)
        cluster_ids = set([cluster for _,_,cluster in selected_nodes])
        selected_clusters = set([cluster for _,selected,cluster in selected_nodes if selected])
        clustVar = orange.EnumVariable(str('Cluster'), values=["Cluster %d" % i for i in cluster_ids] + ["Other"])
        origDomain = matrix.items.domain
        domain = orange.Domain(origDomain.attributes, origDomain.classVar)
        domain.addmeta(orange.newmetaid(), clustVar)
        domain.addmetas(origDomain.getmetas())
        # Build table with selected clusters
        selected_table, unselected_table = orange.ExampleTable(domain), orange.ExampleTable(domain)
        for id, selected, cluster in selected_nodes:
            new_ex = orange.Example(domain, matrix.items[id])
            if selected:
                new_ex[clustVar] = clustVar("Cluster %d" % cluster)
                selected_table.append(new_ex)
            else:
                new_ex[clustVar] = clustVar("Other")
                unselected_table.append(new_ex)
        # Build table of centroids
        centroids = orange.ExampleTable(selected_table.domain)
        if len(selected_table) > 0:
            for cluster in sorted(selected_clusters):
                clusterEx = orange.ExampleTable([ex for ex in selected_table if ex[clustVar] == "Cluster %d" % cluster])
                # Attribute statistics
                contstat = orange.DomainBasicAttrStat(clusterEx)
                discstat = orange.DomainDistributions(clusterEx, 0, 0, 1)
                ex = [cs.avg if cs else (ds.modus() if ds else "?") for cs, ds in zip(contstat, discstat)]
                example = orange.Example(centroids.domain, ex)
                example[clustVar] = clustVar("Cluster %d" % cluster)
                centroids.append(example)
    else: # Attribute distance
        centroids, selected_table, unselected_table = None, None, None
    return {'centroids' : centroids, 'selected_examples' : selected_table, 'unselected_examples' : unselected_table}
Beispiel #18
0
  def __call__(self, data, weight=0):
    import orngLookup
    
    if self.alternativeMeasure:
      raise SystemError, "alternativeMeasure not implemented yet"

    keepDuplicates = getattr(self, "keepDuplicates", 0)

    data = orange.ExampleTable(data)
    if not weight:
      # This is here for backward compatibility
      if hasattr(self, "weight"):
        weight = self.weight
      else:
        weight = orange.newmetaid()
        data.addMetaAttribute(weight)

    if self.redundancyRemover:
      data = self.redundancyRemover(data, weight)
    if not keepDuplicates:
      data.removeDuplicates(weight)

    induced = 0
    featureGenerator = FeatureGenerator(featureInducer=self.featureInducer, subsetsGenerator = self.subsetsGenerator)
    
    while(1):
      newFeatures = featureGenerator(data, weight)
      if not newFeatures or not len(newFeatures):
        break

      best = orngMisc.selectBest(newFeatures, orngMisc.compare2_lastBigger)[0]
      if len(best.getValueFrom.boundset()) == len(data.domain.attributes):
        break
      
      induced += 1
      best.name = "c%d" % induced
      
      data = replaceWithInduced(best, data)
      if not keepDuplicates:
        data.removeDuplicates(weight)

    if self.learnerForUnknown:
      learnerForUnknown = self.learnerForUnknown
    else:
      learnerForUnknown = orange.BayesLearner()

    return orngLookup.lookupFromExamples(data, weight, learnerForUnknown)
  def __loadDataFromES(self, dataType, domain):
    table = None
    if dataType != "train":
      table = orange.ExampleTable(domain)
    else:
      attributes = map(self.__getOrangeVariableForFeature, self.features)
      classAttribute = orange.EnumVariable("is_good", values = ["0", "1"])
      domain = orange.Domain(attributes, classAttribute)
      domain.addmeta(orange.newmetaid(), orange.StringVariable("phrase"))
      table = orange.ExampleTable(domain)
    phrases = []
    if dataType == "train":
      phrasesCount = self.esClient.count(index=self.processorIndex, doc_type=self.processorPhraseType, body={"query":{"terms":{"is_training":["1","0"]}}})
      size = phrasesCount["count"]
      phrases = self.esClient.search(index=self.processorIndex, doc_type=self.processorPhraseType, body={"query":{"terms":{"is_training":["1","0"]}}}, size=size)
      phrases = phrases["hits"]["hits"]
    elif dataType == "holdout":
      phraseCount = self.esClient.count(index=self.processorIndex, doc_type=self.processorPhraseType, body={"query":{"terms":{"is_holdout":["1","0"]}}})
      size = phrasesCount["count"]
      phrases = self.esClient.search(index=self.processorIndex, doc_type=self.processorPhraseType, body={"query":{"terms":{"is_holdout":["1","0"]}}}, size=size)
      phrases = phrases["hits"]["hits"]
    else:
      self.phraseData = self.esClient.get(index=self.processorIndex, doc_type=self.processorPhraseType, id=self.phraseId)
      phrases = [self.phraseData]

    for row in phrases:
      try:
        row = row["_source"]
        featureValues = []
        classType = "?"
        for feature in self.features:
          featureValues.append(row["features"][feature["name"]].encode("ascii"))
        if dataType == "train":
          classType = row["is_training"].encode("ascii", "ignore")
        elif dataType == "holdout":
          classType = row["is_holdout"].encode("ascii")
        example = None
        for i,featureValue in enumerate(featureValues):
          attr = domain.attributes[i]
          if type(attr) is orange.EnumVariable: 
            attr.addValue(featureValue)
        example = orange.Example(domain, (featureValues + [classType]))
        example[domain.getmetas().items()[0][0]] = row["phrase"].encode("ascii")
        table.append(example)
      except:
        self.logger.error("Error classifying phrase '" + row["phrase"] + "'")
    return table
Beispiel #20
0
 def generateETStruct(path, medaData, numGenes=None):
     ddbList = Dicty.DAnnotation.getDDBList()
     if not os.path.exists(path):
         os.mkdir(path)
     medaData = Dicty.DData.DData_Nancy()    
     for st in medaData.strains:
         pathSt = path + "\\" + st
         if not os.path.exists(pathSt):
             os.mkdir(pathSt)
         for rep in medaData.strain2replicaList(st):
             ma2d = medaData.getRaw2d(rep)
             et = Meda.Preproc.ma2orng(ma2d,Meda.Preproc.getTcDomain(ma2d.shape[1], False, [], None))
             et.domain.addmeta(orange.newmetaid(), orange.StringVariable("DDB"))
             for eIdx,e in enumerate(et):
                 e["DDB"] = ddbList[eIdx]
             if numGenes:
                 orange.saveTabDelimited(pathSt + "\\" + rep + ".tab", orange.ExampleTable(et[:numGenes]))
             else:
                 orange.saveTabDelimited(pathSt + "\\" + rep + ".tab", et)
    def sendpredictions(self):
        if not self.data or not self.outvar:
            self.send("Predictions", None)
            return

        # predictions, data set with class predictions
        classification = self.outvar.varType == orange.VarTypes.Discrete

        metas = []
        if classification:
            if len(self.selectedClasses):
                for c in self.predictors.values():
                    m = [orange.FloatVariable(name=str("%s(%s)" % (c.name, str(self.outvar.values[i]))),
                                              getValueFrom = lambda ex, rw, cindx=i, c=c: orange.Value(c(ex, c.GetProbabilities)[cindx])) \
                         for i in self.selectedClasses]
                    metas.extend(m)
            if self.showClass:
                mc = [orange.EnumVariable(name=str(c.name), values = self.outvar.values,
                                         getValueFrom = lambda ex, rw, c=c: orange.Value(c(ex)))
                      for c in self.predictors.values()]
                metas.extend(mc)
        else:
            # regression
            mc = [orange.FloatVariable(name="%s" % str(c.name),
                    getValueFrom=lambda ex, rw, c=c: orange.Value(c(ex)))
                  for c in self.predictors.values()]
            metas.extend(mc)
                
        classVar = self.outvar
        domain = orange.Domain(self.data.domain.attributes + [classVar])
        domain.addmetas(self.data.domain.getmetas())
        for m in metas:
            domain.addmeta(orange.newmetaid(), m)
        predictions = orange.ExampleTable(domain, self.data)
        if self.doPrediction:
            c = self.predictors.values()[0]
            for ex in predictions:
                ex[classVar] = c(ex)
                
        predictions.name = self.data.name
        self.send("Predictions", predictions)
        
        self.changedFlag = False
Beispiel #22
0
    def convert_table(self, table_name, cls_att=None):
        '''
        Returns the target table as an orange example table.
        '''
        import orange

        cols = self.db.cols[table_name]
        attributes, metas, class_var = [], [], None
        for col in cols:
            att_type = self.orng_type(table_name,col)
            if att_type == 'd':
                att_vals = self.db.col_vals[table_name][col]
                att_var = orange.EnumVariable(str(col), values=[str(val) for val in att_vals])
            elif att_type == 'c':
                att_var = orange.FloatVariable(str(col))
            else:
                att_var = orange.StringVariable(str(col))
            if col == cls_att:
                if att_type == 'string':
                    raise Exception('Unsuitable data type for a target variable: %s' % att_type)
                class_var=att_var
                continue
            elif att_type == 'string' or table_name in self.db.pkeys and col in self.db.pkeys[table_name] or table_name in self.db.fkeys and col in self.db.fkeys[table_name]:
                metas.append(att_var)
            else:
                attributes.append(att_var)
        domain = orange.Domain(attributes, class_var)
        for meta in metas:
            domain.addmeta(orange.newmetaid(), meta)
        dataset = orange.ExampleTable(domain)
        dataset.name=table_name
        for row in self.db.rows(table_name, cols):
            example = orange.Example(domain)
            for col, val in zip(cols, row):
                example[str(col)] = str(val) if val!=None else '?'
            dataset.append(example)
        return dataset
# xtest: RANDOM

import orange

data1 = orange.ExampleTable("merge1")
data2 = orange.ExampleTable("merge2", use = data1.domain)

a1, a2 = data1.domain.attributes

metas = data1.domain.getmetas()
m1, m2 = data1.domain["m1"], data1.domain["m2"]
m1i, m2i = data1.domain.metaid(m1), data1.domain.metaid(m2)

a1, a3 = data2.domain.attributes
n1 = orange.FloatVariable("n1")
n2 = orange.FloatVariable("n2")

newdomain = orange.Domain([a1, a3, m1, n1])
newdomain.addmeta(m2i, m2)
newdomain.addmeta(orange.newmetaid(), a2)
newdomain.addmeta(orange.newmetaid(), n2)

merge = orange.Example(newdomain, [data1[0], data2[0]])
print "First example: ", data1[0]
print "Second example: ", data2[0]
print "Merge: ", merge
    def getSelectionsAsExampleTables(self, attrList, useAnchorData=1, addProjectedPositions=0):
        return (None, None) # TODO: this is disabled for now

        if not self.have_data:
            return (None, None)

        selected = self.get_selected_indices()

        if addProjectedPositions == 0 and not numpy.any(selected):
            return (None, self.raw_data)
        if (useAnchorData and len(self.anchor_data) < 3) or len(attrList) < 3:
            return (None, None)

        x_attr = orange.FloatVariable("X Positions")
        y_attr = orange.FloatVariable("Y Positions")
        z_attr = orange.FloatVariable("Z Positions")

        if addProjectedPositions == 1:
            domain = orange.Domain([x_attr, y_attr, z_attr] + [v for v in self.data_domain.variables])
        elif addProjectedPositions == 2:
            domain = orange.Domain(self.data_domain)
            domain.addmeta(orange.newmetaid(), x_attr)
            domain.addmeta(orange.newmetaid(), y_attr)
            domain.addmeta(orange.newmetaid(), z_attr)
        else:
            domain = orange.Domain(self.data_domain)

        domain.addmetas(self.data_domain.getmetas())

        if useAnchorData:
            indices = [self.attribute_name_index[val[3]] for val in self.anchor_data]
        else:
            indices = [self.attribute_name_index[label] for label in attrList]
        valid_data = self.getValidList(indices)
        if len(valid_data) == 0:
            return (None, None)

        array = self.create_projection_as_numeric_array(attrList, scaleFactor=self.scaleFactor, useAnchorData=useAnchorData, removeMissingData=0)
        if array == None:
            return (None, None)

        unselected = numpy.logical_not(selected)
        selected_indices, unselected_indices = list(selected), list(unselected)

        if addProjectedPositions:
            selected = orange.ExampleTable(domain, self.raw_data.selectref(selected_indices))
            unselected = orange.ExampleTable(domain, self.raw_data.selectref(unselected_indices))
            selected_index = 0
            unselected_index = 0
            for i in range(len(selected_indices)):
                if selected_indices[i]:
                    selected[selected_index][x_attr] = array[i][0]
                    selected[selected_index][y_attr] = array[i][1]
                    selected[selected_index][z_attr] = array[i][2]
                    selected_index += 1
                else:
                    unselected[unselected_index][x_attr] = array[i][0]
                    unselected[unselected_index][y_attr] = array[i][1]
                    unselected[unselected_index][z_attr] = array[i][2]
        else:
            selected = self.raw_data.selectref(selected_indices)
            unselected = self.raw_data.selectref(unselected_indices)

        if len(selected) == 0:
            selected = None
        if len(unselected) == 0:
            unselected = None
        return (selected, unselected)
dcont = orange.DomainContingency(data)
print "Computing information gain from DomainContingency"
print fstr % (("- by attribute number:",) + tuple([meas(i, dcont) for i in range(attrs)]))
print fstr % (("- by attribute name:",) + tuple([meas(i, dcont) for i in names]))
print fstr % (("- by attribute descriptor:",) + tuple([meas(i, dcont) for i in data.domain.attributes]))
print

print "Computing information gain from DomainContingency"
cdist = orange.Distribution(data.domain.classVar, data)
print fstr % (("- by attribute number:",) + tuple([meas(orange.ContingencyAttrClass(i, data), cdist) for i in range(attrs)]))
print fstr % (("- by attribute name:",) + tuple([meas(orange.ContingencyAttrClass(i, data), cdist) for i in names]))
print fstr % (("- by attribute descriptor:",) + tuple([meas(orange.ContingencyAttrClass(i, data), cdist) for i in data.domain.attributes]))
print

values = ["v%i" % i for i in range(len(data.domain[2].values)*len(data.domain[3].values))]
cartesian = orange.EnumVariable("cart", values = values)
cartesian.getValueFrom = orange.ClassifierByLookupTable(cartesian, data.domain[2], data.domain[3], values)

print "Information gain of Cartesian product of %s and %s: %6.4f" % (data.domain[2].name, data.domain[3].name, meas(cartesian, data))

mid = orange.newmetaid()
data.domain.addmeta(mid, orange.EnumVariable(values = ["v0", "v1"]))
data.addMetaAttribute(mid)

rg = random.Random()
rg.seed(0)
for ex in data:
    ex[mid] = orange.Value(rg.randint(0, 1))

print "Information gain for a random meta attribute: %6.4f" % meas(mid, data)
# Category:    basic classes, meta-attributes
# Classes:     Example
# Uses:        lenses
# Referenced:  Example.htm

import orange, random

data = orange.ExampleTable("lenses")
random.seed(0)
#id2 = orange.newmetaid()
#w2 = orange.FloatVariable("ww")
#The below two lines fail (and SHOULD fail):
#data[0].setmeta(id, orange.Value(ww, 2.0))
#data[0].setmeta(id2, "2.0")

ok_id = orange.newmetaid()
ok = orange.EnumVariable("ok?", values=["no", "yes"])

data[0][ok_id] = orange.Value(ok, "yes")

data.domain.addmeta(ok_id, ok)

data[0][ok_id] = "yes"
data[0][ok] = "no"
data[0]["ok?"] = "no"

no_yes = [orange.Value(ok, "no"), orange.Value(ok, "yes")]
for example in data:
    example.setvalue(no_yes[random.randint(0, 1)])

print data[0][ok_id]
    def commit(self):
        if not self.data or not self.scores:
            return
        test = self.score_methods[self.method_index][2]
        
        cutOffUpper = self.histogram.upperBoundary
        cutOffLower = self.histogram.lowerBoundary
        
        scores = np.array(self.scores.items())
        scores[:, 1] = test(np.array(scores[:, 1], dtype=float), cutOffLower, cutOffUpper)
        selected = set([key for key, test in scores if test])
        remaining = set([key for key, test in scores if not test])
        if self.data and self.genes_in_columns:
            selected = sorted(selected)
            if selected:
                newdata = orange.ExampleTable(
                    orange.Domain(self.data.domain),
                    [self.data[int(i)] for i in selected],
                    name=self.data.name
                )
            else:
                newdata = None
            if self.add_scores_to_output:
                score_attr = orange.FloatVariable(self.score_methods[self.method_index][0])
                mid = orange.newmetaid()
                
            if self.add_scores_to_output and newdata is not None:
                newdata.domain.addmeta(mid, score_attr)
                for ex, key in zip(newdata, selected):
                    ex[mid] = self.scores[key]
                    
            self.send("Example table with selected genes", newdata)
            
            remaining = sorted(remaining)
            if remaining:
                newdata = orange.ExampleTable(
                    orange.Domain(self.data.domain),
                    [self.data[int(i)] for i in remaining],
                    name=self.data.name
                )
            else:
                newdata = None
            
            if self.add_scores_to_output and newdata is not None:
                newdata.domain.addmeta(mid, score_attr)
                for ex, key in zip(newdata, remaining):
                    ex[mid] = self.scores[key]
                    
            self.send("Example table with remaining genes", newdata)
            
        elif self.data and not self.genes_in_columns:
            method_name = self.score_methods[self.method_index][0]
            selected_attrs = [attr for attr in self.data.domain.attributes
                              if attr in selected or
                              attr.varType == orange.VarTypes.String]  # ?? why strings
            if self.add_scores_to_output:
                scores = [self.scores[attr] for attr in selected_attrs]
                attrs = [copy_descriptor(attr) for attr in selected_attrs]
                for attr, score in zip(attrs, scores):
                    attr.attributes[method_name] = str(score)
                selected_attrs = attrs

            newdomain = orange.Domain(selected_attrs, self.data.domain.classVar)
            newdomain.addmetas(self.data.domain.getmetas())
            newdata = orange.ExampleTable(
                newdomain, self.data, name=self.data.name
            )
            self.send("Example table with selected genes",
                      newdata if selected_attrs else None)

            remaining_attrs = [attr for attr in self.data.domain.attributes
                               if attr in remaining]
            if self.add_scores_to_output:
                scores = [self.scores[attr] for attr in remaining_attrs]
                attrs = [copy_descriptor(attr) for attr in remaining_attrs]
                for attr, score in zip(attrs, scores):
                    attr.attributes[method_name] = str(scores)
                remaining_attrs = attrs

            newdomain = orange.Domain(remaining_attrs, self.data.domain.classVar)
            newdomain.addmetas(self.data.domain.getmetas())
            newdata = orange.ExampleTable(
                newdomain, self.data, name=self.data.name
            )
            self.send("Example table with remaining genes",
                      newdata if remaining_attrs else None)

            domain = orange.Domain([orange.StringVariable("label"),
                                    orange.FloatVariable(self.score_methods[self.method_index][0])],
                                    False)
            if selected_attrs:
                selected_genes = orange.ExampleTable(domain,
                            [[attr.name, self.scores.get(attr, 0)] for attr in selected_attrs])
            else:
                selected_genes = None
            self.send("Selected genes",  selected_genes)
            
        else:
            self.send("Example table with selected genes", None)
            self.send("Example table with remaining genes", None)
            self.send("Selected genes", None)
        self.data_changed_flag = False
Beispiel #28
0
 def __init__(self,  minSupport = 0.05, minConfidence = 0.8, k=3):
     self.minSup = minSupport
     self.minConf = minConfidence
     self.weightID = orange.newmetaid()
     self.k = k
Beispiel #29
0
 def __init__(self,k):
     self.k = k
     self.counter = orange.newmetaid()
     self.weightID = orange.newmetaid()
     self.rbf = orange.RuleBeamFinder()
     self.rbf.evaluator = RuleEvaluator_WRAcc()
Beispiel #30
0
    def __call__(self, data, targetClass, num_of_rules ):
        self.alredyRefinedRules[str(targetClass)] = set()
        if self.dataOK(data):  # Checks weather targetClass is discrete
            data_discretized = False
            # If any of the attributes are continuous, discretize them
            if data.domain.hasContinuousAttributes():
                original_data = data
                data_discretized = True
                new_domain = []
                discretize = orange.EntropyDiscretization(forceAttribute=True)
                for attribute in data.domain.attributes:
                    if attribute.varType == orange.VarTypes.Continuous:
                        d_attribute = discretize(attribute, data)
                        # An attribute is irrelevant, if it is discretized into a single interval
#                        if len(d_attribute.getValueFrom.transformer.points) > 0:
                        new_domain.append(d_attribute)
                    else:
                        new_domain.append(attribute)
                data = original_data.select(new_domain + [original_data.domain.classVar])

            self.data = data
            self.weigted_data = data
            self.c = orange.newmetaid()
            self.count = orange.newmetaid()
            self.weigted_data.addMetaAttribute(self.c)
            self.weigted_data.addMetaAttribute(self.count)
            #print self.c
            #print self.weigted_data.domain.attributes
            self.targetClass = targetClass

            #Initialize CanditatesList (all features)
            self.fillCandidatesList(data,targetClass)
            """
            print "Candidates for refinement:\n"
            for rule in self.refinementCandidates:
                print "N: %d\t\tTP: %d\t\t\tFP: %d\t\tRule:\t%s" %(len(rule.TP)+len(rule.FP),len(rule.TP), len(rule.FP), rule.ruleToString())

            print "\nCandidates for selection:\n"
            for rule in self.selectionCandidates:
                print "N: %d\t\tTP: %d\t\t\tFP: %d\t\tRule:\t%s" %(len(rule.TP)+len(rule.FP),len(rule.TP), len(rule.FP), rule.ruleToString())
            """
            """
            print self.refinementCandidates[0].ruleToString()
            print "Best refinement: P %d\tN %d\tp %d\tn %d\tRQ %.3f" %(self.refinementCandidates[0].P,self.refinementCandidates[0].N,len(self.refinementCandidates[0].TP),len(self.refinementCandidates[0].FP), self.refinementCandidates[0].refinement_quality)
            print "\n\n"
            """
            #Initialize RefinementBeam, consisting of refinementBeamWidth empty rules
            self.initializeRefinementBeam()
            #Initialize SelectionBeam, consisting of selectionBeamWidth empty rules
            self.initializeSelectionBeam()

            #update RefinementBeam
            self.updateRefinementBeam(self.refinementCandidates)
            #update SelectionBeam
            #self.chooseSelectionCandidates(self.RefinementBeam)
            """
            print self.selectionCandidates[0].ruleToString()
            print "Best selection: P %d\tN %d\tp %d\tn %d\tSQ %.3f" %(self.selectionCandidates[0].P,self.selectionCandidates[0].N,len(self.selectionCandidates[0].TP),len(self.selectionCandidates[0].FP), self.selectionCandidates[0].selection_quality)
            print "\n\n"
            """
            #print "Before updatation"
            self.updateSelectionBeam(self.selectionCandidates)
            #print "After update"

            #self.printBeam(self.refinementCandidates, name="Refinement candidates")
            #self.printBeam(self.RefinementBeam, name="Refinement beam")
            #self.printBeam(self.refinementCandidates, name="Refinement candidates")


            #self.printBeam(self.SelectionBeam, name="Selection beam")

            improvements = True
            refinement_improvements = True

            ms=2
            max_steps=5
            # and i<max_steps and refinement_improvements
            # improvements and i<max_steps and refinement_improvements:
            #while i<max_steps:
            while ms <= max_steps:
                #print "pocnuva rafiniranjeto, dolzina %d" %i
                self.refinedRefinementBeam(targetClass)
                #self.printBeam(self.refinementCandidates,"Refinement candidates")
                refinement_improvements = self.updateRefinementBeam(self.refinementCandidates)
                #self.printBeam(self.RefinementBeam, name="Refinement beam")
                #unionOfBeams = []; unionOfBeams.extend(self.RefinementBeam); unionOfBeams.extend(self.SelectionBeam)
                #self.chooseSelectionCandidates(unionOfBeams)
                #self.printBeam(self.selectionCandidates, name="Selection candidates")
                #print "Pred update"
                improvements = self.updateSelectionBeam(self.selectionCandidates)
                #print "Posle update"
                #m(self.SelectionBeam, "Selection beam")
                ms=ms+1

            beam = self.SelectionBeam
            #self.printBeam(beam, "Final selection beam.")
            if num_of_rules != 0:
                beam = self.ruleSubsetSelection(beam, num_of_rules, data)
                #self.printBeam(beam, "Posle SS")
                self.SelectionBeam = beam

            if data_discretized:
                 targetClassRule = SDRule(original_data, targetClass, conditions=[], g=self.g)
                 #targetClassRule = SDRule(original_data, targetClass, conditions=[], g=1, refinement_heuristics=self.refinement_heuristics, selection_heuristics=self.selection_heuristics)
                 # change beam so the rules apply to original data
                 #self.printBeam(self.SelectionBeam, "Pred diskretizacija")
                 self.SelectionBeam = [rule.getUndiscretized(original_data) for rule in self.SelectionBeam]
                 #self.printBeam(self.SelectionBeam, "Posle diskretizacija")

            else:
                 targetClassRule = SDRule(data, targetClass, conditions=[], g =self.g)
                 #targetClassRule = SDRule(data, targetClass, conditions=[], g =1, refinement_heuristics=self.refinement_heuristics, selection_heuristics=self.selection_heuristics)

            #print "Ready to return"
            #self.printBeam(self.SelectionBeam, "Ova se vrakja")
            rules = SDRules(self.SelectionBeam, targetClassRule, "SD-inverted")
            #rules.printRules()
            #print "*"*100
            return rules