예제 #1
0
def WrapperFSS(data, learner, verbose=0, folds=10):
  classVar = data.domain.classVar
  currentAtt = []
  freeAttributes = list(data.domain.attributes)

  newDomain = orange.Domain(currentAtt + [classVar])
  d = data.select(newDomain)
  results = orngTest.crossValidation([learner], d, folds=folds)
  maxStat = orngStat.CA(results)[0]
  if verbose>=2:
    print "start (%5.3f)" % maxStat

  while 1:
    stat = []
    for a in freeAttributes:
      newDomain = orange.Domain([a] + currentAtt + [classVar])
      d = data.select(newDomain)
      results = orngTest.crossValidation([learner], d, folds=folds)
      stat.append(orngStat.CA(results)[0])
      if verbose>=2:
        print "  %s gained %5.3f" % (a.name, orngStat.CA(results)[0])

    if (max(stat) > maxStat):
      oldMaxStat = maxStat
      maxStat = max(stat)
      bestVarIndx = stat.index(max(stat))
      if verbose:
        print "gain: %5.3f, attribute: %s" % (maxStat-oldMaxStat, freeAttributes[bestVarIndx].name)
      currentAtt = currentAtt + [freeAttributes[bestVarIndx]]
      del freeAttributes[bestVarIndx]
    else:
      if verbose:
        print "stopped (%5.3f)" % (max(stat) - maxStat)
      return orange.Domain(currentAtt + [classVar])
      break
예제 #2
0
파일: orngLR.py 프로젝트: janezd/orange30-c
    def __call__(self, examples):
        examples = createFullNoDiscTable(examples)
        classifiers = [
            LogRegLearner(
                orange.Preprocessor_dropMissing(
                    examples.select(orange.Domain(x,
                                                  examples.domain.classVar))))
            for x in examples.domain.attributes
        ]
        maj_classifier = LogRegLearner(
            orange.Preprocessor_dropMissing(
                examples.select(orange.Domain(examples.domain.classVar))))
        beta = [maj_classifier.beta[0]] + [x.beta[1] for x in classifiers]
        beta_se = [maj_classifier.beta_se[0]
                   ] + [x.beta_se[1] for x in classifiers]
        P = [maj_classifier.P[0]] + [x.P[1] for x in classifiers]
        wald_Z = [maj_classifier.wald_Z[0]
                  ] + [x.wald_Z[1] for x in classifiers]
        domain = examples.domain

        return Univariate_LogRegClassifier(beta=beta,
                                           beta_se=beta_se,
                                           P=P,
                                           wald_Z=wald_Z,
                                           domain=domain)
예제 #3
0
    def get_example_table(self):
        import orange
        data = self.run(count=False, header=True)

        if self.format.lower() == "tsv":
            header, data = data.split("\n", 1)
            domain = orange.Domain(
                [orange.StringVariable(name) for name in header.split("\t")],
                False)
            data = [
                line.split("\t") for line in data.split("\n") if line.strip()
            ]
            return orange.ExampleTable(domain, data) if data else None
        elif self.format.lower() == "fasta":
            domain = orange.Domain([
                orange.StringVariable("id"),
                orange.StringVariable("sequence")
            ], False)  # TODO: meaningful id
            examples = []
            from StringIO import StringIO
            from Bio import SeqIO
            for seq in SeqIO.parse(StringIO(data), "fasta"):
                examples.append([seq.id, str(seq.seq)])
            return orange.ExampleTable(domain, examples)
        else:
            raise BioMartError("Unsupported format: %" % self.format)
예제 #4
0
    def test_saveTab(self):
        d = orange.ExampleTable("iris")[:3]
        d.save("test-save.tab")
        try:
            d2 = orange.ExampleTable("test-save.tab")
            for e1, e2 in zip(d, d2):
                self.assertEqual(e1, e2)
        finally:
            os.remove("test-save.tab")

        dom = orange.Domain([orange.ContinuousVariable("a")])
        d = orange.ExampleTable(dom)
        d += [[i] for i in range(3)]
        d.save("test-save.tab")
        try:
            d2 = orange.ExampleTable("test-save.tab")
            self.assertEqual(len(d.domain.attributes), 0)
            self.assertEqual(d.domain.classVar, dom[0])
            for i in range(3):
                self.assertEqual(d2[i], [i])
        finally:
            os.remove("test-save.tab")

        dom = orange.Domain([orange.ContinuousVariable("a")], None)
        d = orange.ExampleTable(dom)
        d += [[i] for i in range(3)]
        d.save("test-save.tab")
        try:
            d2 = orange.ExampleTable("test-save.tab")
            self.assertEqual(len(d.domain.attributes), 1)
            self.assertEqual(d.domain[0], dom[0])
            for i in range(3):
                self.assertEqual(d2[i], [i])
        finally:
            os.remove("test-save.tab")
예제 #5
0
    def sendData(self, km=None):
        if km is None:
            km = self.bestRun[1] if self.optimized else self.km
        if not self.data or not km:
            self.send("Examples", None)
            self.send("Centroids", None)
            return
        clustVar = orange.EnumVariable(
            self.classifyName, values=["C%d" % (x + 1) for x in range(km.k)])

        origDomain = self.data.domain
        if self.addIdAs == 0:
            domain = orange.Domain(origDomain.attributes, clustVar)
            if origDomain.classVar:
                domain.addmeta(orange.newmetaid(), origDomain.classVar)
            aid = -1
        elif self.addIdAs == 1:
            domain = orange.Domain(origDomain.attributes + [clustVar],
                                   origDomain.classVar)
            aid = len(origDomain.attributes)
        else:
            domain = orange.Domain(origDomain.attributes, origDomain.classVar)
            aid = orange.newmetaid()
            domain.addmeta(aid, clustVar)

        domain.addmetas(origDomain.getmetas())

        # construct a new data set, with a class as assigned by k-means clustering
        new = orange.ExampleTable(domain, self.data)
        for ex, midx in izip(new, km.clusters):
            ex[aid] = midx

        self.send("Examples", new)
        self.send("Centroids", orange.ExampleTable(km.centroids))
예제 #6
0
 def sendList(self, selectedInd):
     if self.data and type(self.data[0]) == str:
         xAttr=orange.FloatVariable("X")
         yAttr=orange.FloatVariable("Y")
         nameAttr=  orange.StringVariable("name")
         if self.selectionOptions == 1:
             domain = orange.Domain([xAttr, yAttr, nameAttr])
             selection = orange.ExampleTable(domain)
             for i in range(len(selectedInd)):
                 selection.append(list(self.mds.points[selectedInd[i]]) + [self.data[i]])
         else:
             domain = orange.Domain([nameAttr])
             if self.selectionOptions:
                 domain.addmeta(orange.newmetaid(), xAttr)
                 domain.addmeta(orange.newmetaid(), yAttr)
             selection = orange.ExampleTable(domain)
             for i in range(len(selectedInd)):
                 selection.append([self.data[i]])
                 if self.selectionOptions:
                     selection[i][xAttr]=self.mds.points[selectedInd[i]][0]
                     selection[i][yAttr]=self.mds.points[selectedInd[i]][1]
         self.send("Data", selection)
         return
            
     if not selectedInd:
         self.send("Structured Data Files", None)
     else:
         datasets=[self.data[i] for i in selectedInd]
         names=list(set([d.dirname for d in datasets]))
         data=[(name, [d for d in filter(lambda a:a.strain==name, datasets)]) for name in names]
         self.send("Structured Data Files",data)
예제 #7
0
    def commit(self):
        self.clearLineEditFocus()

        if self.data:
            newattrs=[]
            for attr, disc in zip(self.data.domain.attributes, self.discretizers):
                if disc:
                    if disc.getValueFrom.transformer.points:
                        newattrs.append(disc)
                elif disc == None:  # can also be False -> remove
                    newattrs.append(attr)

            if self.data.domain.classVar:
                if self.outputOriginalClass:
                    newdomain = orange.Domain(newattrs, self.originalData.domain.classVar)
                else:
                    newdomain = orange.Domain(newattrs, self.data.domain.classVar)
            else:
                newdomain = orange.Domain(newattrs, None)

            newdata = orange.ExampleTable(newdomain, self.originalData)

        elif self.discClassData and self.outputOriginalClass:
            newdata = self.discClassData

        elif self.originalData and not (self.originalData.domain.classVar and self.originalData.domain.classVar.varType == orange.VarTypes.Continuous and not self.discClassData):  # no continuous attributes...
            newdata = self.originalData
        else:
            newdata = None

        self.send("Data", newdata)
        dataChanged = False
예제 #8
0
 def test_memory_leaks(self):
     import sys
     f = orange.Domain(self.vars)
     refcount = sys.getrefcount(self.vars[0])
     for i in range(1000):
         f = orange.Domain(self.vars)
     refcount2 = sys.getrefcount(self.vars[0])
     self.assertEqual(refcount, refcount2)
예제 #9
0
 def setUp(self):
     self.contvars = [orange.ContinuousVariable(x) for x in "abcde"]
     self.discvars = [orange.DiscreteVariable(x, values=["ana", "berta", "cilka"]) for x in "ABCDE"]
     self.yvar = [orange.DiscreteVariable("y", values="01")]
     self.contdomain = orange.Domain(self.contvars, self.yvar)
     self.discdomain = orange.Domain(self.discvars, self.yvar)
     self.allvars = self.contvars+self.discvars+[self.yvar]
     self.domain = orange.Domain(self.contvars+self.discvars, self.yvar)
예제 #10
0
    def to_network(self, terms=None):
        """
        Return an Orange.network.Network instance constructed from
        this ontology.

        """
        edge_types = self.edge_types()
        terms = self.terms()
        from Orange.orng import orngNetwork
        import orange

        network = orngNetwork.Network(len(terms), True, len(edge_types))
        network.objects = dict([(term.id, i) for i, term in enumerate(terms)])

        edges = defaultdict(set)
        for term in self.terms():
            related = self.related_terms(term)
            for relType, relTerm in related:
                edges[(term.id, relTerm)].add(relType)

        edgeitems = edges.items()
        for (src, dst), eTypes in edgeitems:
            network[src, dst] = [1 if e in eTypes else 0 for e in edge_types]

        domain = orange.Domain([
            orange.StringVariable("id"),
            orange.StringVariable("name"),
            orange.StringVariable("def"),
        ], False)

        items = orange.ExampleTable(domain)
        for term in terms:
            ex = orange.Example(
                domain, [term.id, term.name,
                         term.values.get("def", [""])[0]])
            items.append(ex)

        relationships = set(
            [", ".join(sorted(eTypes)) for (_, _), eTypes in edgeitems])
        domain = orange.Domain([
            orange.FloatVariable("u"),
            orange.FloatVariable("v"),
            orange.EnumVariable("relationship", values=list(edge_types))
        ], False)

        id2index = dict([(term.id, i + 1) for i, term in enumerate(terms)])
        links = orange.ExampleTable(domain)
        for (src, dst), eTypes in edgeitems:
            ex = orange.Example(domain,
                                [id2index[src], id2index[dst],
                                 eTypes.pop()])
            links.append(ex)

        network.items = items
        network.links = links
        network.optimization = None
        return network
예제 #11
0
    def test_construction_attribute_list(self):
        dold = orange.Domain(self.vars)
        d = orange.Domain([self.vars[1], 0, "c"], source=dold)        
        self.assertEqual(d.variables, [self.vars[1], self.vars[0], self.vars[2]])

        d = orange.Domain([self.vars[1], 0, "c"], source=dold, class_var="d")
        self.assertEqual(d.variables, [self.vars[1], self.vars[0], self.vars[2], self.vars[3]])
        self.assertEqual(d.attributes, [self.vars[1], self.vars[0], self.vars[2]])
        self.assertEqual(d.class_var, self.vars[3])
예제 #12
0
 def test_memory_leaks_error(self):
     import sys
     f = orange.Domain(self.vars)
     refcount = sys.getrefcount(self.vars[0])
     for i in range(1000):
         try:
             f = orange.Domain([self.vars[0], "abc"])
         except:
             pass
     refcount2 = sys.getrefcount(self.vars[0])
     self.assertEqual(refcount, refcount2)
예제 #13
0
 def test_convert_through_append(self):
     d = orange.ExampleTable("iris")
     dom2 = orange.Domain([d.domain[0], d.domain[2], d.domain[4]])
     d2 = orange.ExampleTable(dom2)
     dom3 = orange.Domain([d.domain[1], d.domain[2]], None)
     d3 = orange.ExampleTable(dom3)
     for e in d[:5]:
         d2.append(e)
         d3.append(e)
     for e, e2, e3 in zip(d, d2, d3):
         self.assertEqual(e[0], e2[0])
         self.assertEqual(e[1], e3[0])
예제 #14
0
    def test_construction(self):
        d = orange.Domain([])
        self.assertEqual(len(d.variables), 0)
        self.assertEqual(len(d.attributes), 0)
        self.assertEqual(d.class_var, None)

        d = orange.Domain([], True)
        self.assertEqual(len(d.variables), 0)
        self.assertEqual(len(d.attributes), 0)
        self.assertEqual(d.class_var, None)

        d = orange.Domain([], False)
        self.assertEqual(len(d.variables), 0)
        self.assertEqual(len(d.attributes), 0)
        self.assertEqual(d.class_var, None)


        d = orange.Domain(self.vars[:1])
        self.assertEqual(d.variables, self.vars[:1])
        self.assertEqual(len(d.attributes), 0)
        self.assertEqual(d.class_var, self.vars[0])

        d = orange.Domain(self.vars[:1], True)
        self.assertEqual(d.variables, self.vars[:1])
        self.assertEqual(len(d.attributes), 0)
        self.assertEqual(d.class_var, self.vars[0])

        d = orange.Domain(self.vars[:1], False)
        self.assertEqual(d.variables, self.vars[:1])
        self.assertEqual(d.attributes, self.vars[:1])
        self.assertEqual(d.class_var, None)


        d = orange.Domain(self.vars)
        self.assertEqual(d.variables, self.vars)
        self.assertEqual(d.attributes, self.vars[:-1])
        self.assertEqual(d.class_var, self.vars[-1])

        d = orange.Domain(self.vars, True)
        self.assertEqual(d.variables, self.vars)
        self.assertEqual(d.attributes, self.vars[:-1])
        self.assertEqual(d.class_var, self.vars[-1])

        d = orange.Domain(self.vars, False)            
        self.assertEqual(d.variables, self.vars)
        self.assertEqual(d.attributes, self.vars)
        self.assertEqual(d.class_var, None)


        d = orange.Domain(self.vars, self.vars[5])
        self.assertEqual(d.variables, self.vars+[self.vars[5]])
        self.assertEqual(d.attributes, self.vars)
        self.assertEqual(d.class_var, self.vars[5])
예제 #15
0
def getCinfonyDescResults(origData,descList,radius=1):
    """Calculates the cinfony descriptors on origData
       maintains the input variables and class
       Adds the Cinfony descritors 
            Returns a new Dataset"""
    if not origData or not descList: return None
    smilesName = getSMILESAttr(origData)
    if not smilesName: return None
    #Create a new domain saving original smiles and other attributes
    newDomain = orange.Domain([attr for attr in origData.domain if attr is not origData.domain.classVar] + [orange.StringVariable("origSmiles")],origData.domain.classVar)
    data = dataUtilities.DataTable(newDomain, origData)
    # Standardize SMILES
    for ex in data:
        ex["origSmiles"] = ex[smilesName].value
    #TODO: Create a method in dataUtilities to standardize the attribute smilesName in place having the attr origSmiles as ID
    if "AZutilities.extraUtilities" in sys.modules and hasattr(extraUtilities, "StandardizeSMILES"):
         # Call a method for standardizing the SMILES in Data.
         # The method is expected to change the attribute defined as smiAttr in data object
         #                                 +->Data     +-> SMILES attribuite name     +->Compound Name or attribute to act as an ID"
         extraUtilities.StandardizeSMILES(data,      smiAttr = smilesName,           cName="origSmiles") 
    results = []

    # Calculate available descriptors
    res = getObabelDescResult(data,descList)
    if res: results.append(res)
    res = getRdkDescResult(data,descList,radius)
    if res: results.append(res)
    res = getWebelDescResult(data,descList)
    if res: results.append(res)
    res = getCdkDescResult(data,descList)
    if res: results.append(res)
    # Convert any nan to a '?'
    if len(results):
        for res in results:
            for ex in res:
                for attr in ex.domain:
                    if ex[attr] != ex[attr]:   # Will fail if it is 'nan'
                        ex[attr] = '?'
    # return None if no results at all 
    if not results:
        return None
    resData = results[0]
    if len(results) > 1:
        for res in results[1:]:
            resData = dataUtilities.horizontalMerge(resData, res, smilesName, smilesName)
    data = dataUtilities.horizontalMerge(data, resData, smilesName, smilesName)
    # Revert the SMILES back to it's original state
    for ex in data:
        ex[smilesName] = ex["origSmiles"]
    #Remove the origSmiles attributes
    data = dataUtilities.DataTable(orange.Domain([attr for attr in data.domain if attr.name != "origSmiles" and attr is not data.domain.classVar],data.domain.classVar),data)
    return data
예제 #16
0
파일: orngLR.py 프로젝트: stefie10/slu_hri
    def __call__(self, examples, weight=0):
        imputer = getattr(self, "imputer", None) or None
        if getattr(self, "removeMissing", 0):
            examples = orange.Preprocessor_dropMissing(examples)


##        if hasDiscreteValues(examples.domain):
##            examples = createNoDiscTable(examples)
        if not len(examples):
            return None
        if getattr(self, "stepwiseLR", 0):
            addCrit = getattr(self, "addCrit", 0.2)
            removeCrit = getattr(self, "removeCrit", 0.3)
            numAttr = getattr(self, "numAttr", -1)
            attributes = StepWiseFSS(examples,
                                     addCrit=addCrit,
                                     deleteCrit=removeCrit,
                                     imputer=imputer,
                                     numAttr=numAttr)
            tmpDomain = orange.Domain(attributes, examples.domain.classVar)
            tmpDomain.addmetas(examples.domain.getmetas())
            examples = examples.select(tmpDomain)
        learner = orange.LogRegLearner()
        learner.imputerConstructor = imputer
        if imputer:
            examples = self.imputer(examples)(examples)
        examples = orange.Preprocessor_dropMissing(examples)
        if self.fitter:
            learner.fitter = self.fitter
        if self.removeSingular:
            lr = learner.fitModel(examples, weight)
        else:
            lr = learner(examples, weight)
        while isinstance(lr, orange.Variable):
            if isinstance(lr.getValueFrom,
                          orange.ClassifierFromVar) and isinstance(
                              lr.getValueFrom.transformer,
                              orange.Discrete2Continuous):
                lr = lr.getValueFrom.variable
            attributes = examples.domain.attributes[:]
            if lr in attributes:
                attributes.remove(lr)
            else:
                attributes.remove(lr.getValueFrom.variable)
            newDomain = orange.Domain(attributes, examples.domain.classVar)
            newDomain.addmetas(examples.domain.getmetas())
            examples = examples.select(newDomain)
            lr = learner.fitModel(examples, weight)
        return lr
예제 #17
0
 def build_orange_data_from_eig_vector(self):
     eig_vector = self.load_eigen_vector(matlab=True)
     # create table for orange to clustering
     '''
     How to convert a data table
     1.  create features as you wish
     2.  create domain based on the features
     3.  add meta-attributes for the domain
     4.  create data, actually, instance list
     5.  create data table base on Domain and instances list
     '''
     # 1
     new_features = list()
     new_features.append(Orange.feature.Continuous('eigValue'))
     # 2
     new_domain = orange.Domain(new_features, False)
     # 3
     # new_domain.add_meta(Orange.feature.Descriptor.new_meta_id(),Orange.feature.Continuous('graphIndex'))
     new_domain.add_meta(
         Orange.feature.Descriptor.new_meta_id(),
         Orange.feature.Continuous(SQLDao.LABEL_USER_GROUP_INFO_USERID))
     # new_domain.add_meta(Orange.feature.Descriptor.new_meta_id(),Orange.feature.Continuous(SQLDao.LABEL_USER_GROUP_INFO_GROUPID))
     # 4
     new_datas = []
     for graphIndex, i in enumerate(eig_vector):
         t = Orange.data.Instance(new_domain, [i])
         t[SQLDao.LABEL_USER_GROUP_INFO_USERID] = self.g.vs[graphIndex][
             SQLDao.LABEL_USER_GROUP_INFO_USERID]
         # you dont have a group id. What are you doing?
         # t[SQLDao.LABEL_USER_GROUP_INFO_GROUPID]=self.g.vs[graphIndex][SQLDao.LABEL_USER_GROUP_INFO_GROUPID]
         new_datas.append(t)
         # 5
     data = Orange.data.Table(new_domain, new_datas)
     return data
     pass
예제 #18
0
    def applySettings(self):
        """use the setting from the widget, identify the outliers"""
        if self.haveInput == 1:
            outlier = self.outlier
            outlier.setKNN(self.ks[self.k][1])

            newdomain = orange.Domain(self.data.domain)
            newdomain.addmeta(orange.newmetaid(),
                              orange.FloatVariable("Z score"))

            self.newdata = orange.ExampleTable(newdomain, self.data)

            zv = outlier.zValues()
            for i, el in enumerate(zv):
                self.newdata[i]["Z score"] = el

            self.send("Examples with Z-scores", self.newdata)

            filterout = orange.Filter_values(domain=self.newdata.domain)
            filterout["Z score"] = (orange.Filter_values.Greater,
                                    eval(self.zscore))
            outliers = filterout(self.newdata)

            filterin = orange.Filter_values(domain=self.newdata.domain)
            filterin["Z score"] = (orange.Filter_values.LessEqual,
                                   eval(self.zscore))
            inliers = filterin(self.newdata)

            self.send("Outliers", outliers)
            self.send("Inliers", inliers)
        else:
            self.send("Examples with Z-scores", None)
            self.send("Outliers", None)
            self.send("Inliers", None)
예제 #19
0
def __makeExampleTable(namesDict, data):
    import orange
    from constants import CLASS_ATRR_NAME, CONTROL_GROUP_KEY, DATA_GROUP_KEY

    geneIDs = sorted(data.keys())
    attrList = [orange.FloatVariable(name=str(geneID)) for geneID in geneIDs]
    classAttr = orange.EnumVariable(name=CLASS_ATRR_NAME,
                                    values=[CONTROL_GROUP_KEY, DATA_GROUP_KEY])
    domain = orange.Domain(attrList, classAttr)
    table = orange.ExampleTable(domain)

    # first half: group 1
    for attrName in namesDict[CONTROL_GROUP_KEY].keys():
        exampleValues = [
            data[geneID][CONTROL_GROUP_KEY][attrName] for geneID in geneIDs
        ] + [CONTROL_GROUP_KEY]
        example = orange.Example(domain, exampleValues)
        table.append(example)

    # second half: group 2
    for attrName in namesDict[DATA_GROUP_KEY].keys():
        exampleValues = [
            data[geneID][DATA_GROUP_KEY][attrName] for geneID in geneIDs
        ] + [DATA_GROUP_KEY]
        example = orange.Example(domain, exampleValues)
        table.append(example)

    return table
예제 #20
0
    def relabel(self):
        #print 'relabel'
        self.error()
        matrix = self.matrix
        if matrix is not None and self.data is not None:
            if self.takeAttributeNames:
                domain = self.data.domain
                if matrix.dim == len(domain.attributes):
                    matrix.setattr("items", domain.attributes)
                elif matrix.dim == len(domain.variables):
                    matrix.setattr("items", domain.variables)
                else:
                    self.error("The number of attributes doesn't match the matrix dimension")

            else:
                if matrix.dim == len(self.data):
                    matrix.setattr("items", self.data)
                else:
                    self.error("The number of examples doesn't match the matrix dimension")
        elif matrix and self.labels:
            lbl = orange.StringVariable('label')
            self.data = orange.ExampleTable(orange.Domain([lbl]), 
                                            [[str(l)] for l in self.labels])
            for e, label in zip(self.data, self.labels):
                e.name = label
            matrix.setattr("items", self.data)
        
        if self.data == None and self.labels == None:
            matrix.setattr("items", [str(i) for i in range(matrix.dim)])
        
        self.matrix.matrixType = orange.SymMatrix.Symmetric
        self.send("Distances", self.matrix)
예제 #21
0
    def removeSelectedClassLabel(self):
        index = self.selectedClassLabelIndex()
        if index is not None and len(self.classValuesModel) > 1:
            label = self.classValuesModel[index]
            examples = [
                ex for ex in self.graph.data if str(ex.getclass()) != label
            ]

            values = [val for val in self.classValuesModel if val != label]
            newclass = orange.EnumVariable("Class label", values=values)
            newdomain = orange.Domain(self.graph.data.domain.attributes,
                                      newclass)
            newdata = orange.ExampleTable(newdomain)
            for ex in examples:
                if ex[self.classVariable] != label and ex[
                        self.classVariable] in values:
                    newdata.append(
                        orange.Example(newdomain,
                                       [ex[a] for a in ex.domain.attributes] +
                                       [str(ex.getclass())]))

            self.classVariable = newclass
            self.classValuesModel.wrap(self.classVariable.values)

            self.graph.data = newdata
            self.graph.updateGraph()

            newindex = self.classValuesModel.index(max(0, index - 1))
            self.classValuesView.selectionModel().select(
                newindex, QItemSelectionModel.ClearAndSelect)

            self.removeClassLabel.setEnabled(len(self.classValuesModel) > 1)
예제 #22
0
파일: trainer.py 프로젝트: stefie10/slu_hri
def wordnet_glosses(training, testing):
    stopwords = set(nltk.corpus.stopwords.words())
    gloss_dist = training.gloss_map()
    used_words = [
        k for k in gloss_dist.keys()
        if not k in stopwords and gloss_dist[k] > 2
    ]

    print "words", used_words

    attributes = [
        orange.EnumVariable(a, values=["True", "False"]) for a in used_words
    ]
    print "got", len(used_words), "features"
    domain = orange.Domain(attributes, training.orange_class_var)

    results = []
    for annotation in [training, testing]:
        table = orange.ExampleTable(domain)
        results.append(table)
        for i, (word, label) in enumerate(annotation.data):
            ancestors = annotation.ancestors(i)
            ex = orange.Example(domain)
            ex["class"] = label
            ex["word"] = word
            for a_i, a in enumerate(attributes):
                word_i = used_words[a_i]
                if word_i in annotation.synset(i).definition:
                    ex[a.name] = "True"
                else:
                    ex[a.name] = "False"
            table.append(ex)

    training_table, testing_table = results
    return training_table, testing_table
예제 #23
0
    def commit(self):
        rows = self.tableView.selectionModel().selectedRows()
        rows = [self.proxyModel.mapToSource(index) for index in rows]
        rows = [index.row() for index in rows]
        selectedRules = [self.classifier.rules[row] for row in rows]

        if selectedRules:
            examples = self.classifier.examples
            selectedExamples = self.selectedExamplesFromRules(
                selectedRules, self.classifier.examples)
            selectedAttrs = self.selectedAttrsFromRules(selectedRules)
            selectedAttrs = [
                attr for attr in examples.domain.attributes
                if attr in selectedAttrs
            ]  # restore the order
            if self.selectedAttrsOnly:
                domain = orange.Domain(selectedAttrs, examples.domain.classVar)
                domain.addmetas(examples.domain.getmetas())
                selectedExamples = orange.ExampleTable(domain,
                                                       selectedExamples)
            else:
                selectedExamples = orange.ExampleTable(selectedExamples)

            self.send("Data", selectedExamples)
            self.send("Features", orange.VarList(list(selectedAttrs)))

        else:
            self.send("Data", None)
            self.send("Features", None)

        self.changedFlag = False
예제 #24
0
def bench_orange(X, y, T, valid):
#
#       .. Orange ..
#
    import orange
    start = datetime.now()

    # prepare data in Orange's format
    columns = []
    for i in range(0, X.shape[1]):
        columns.append("a" + str(i))
    [orange.EnumVariable(x) for x in columns]
    classValues = ['0', '1']

    domain = orange.Domain(map(orange.FloatVariable, columns),
                   orange.EnumVariable("class", values=classValues))
    y.shape = (len(y), 1) #reshape for Orange
    y[np.where(y < 0)] = 0 # change class labels to 0..K
    orng_train_data = orange.ExampleTable(domain, np.hstack((X, y)))

    valid.shape = (len(valid), 1)  #reshape for Orange
    valid[np.where(valid < 0)] = 0 # change class labels to 0..K
    orng_test_data = orange.ExampleTable(domain, np.hstack((T, valid)))

    learner = orange.SVMLearner(orng_train_data, \
                                svm_type=orange.SVMLearner.Nu_SVC, \
                                kernel_type=orange.SVMLearner.RBF, C=1., \
                                gamma=1. / sigma)

    pred = np.empty(T.shape[0], dtype=np.int32)
    for i, e in enumerate(orng_test_data):
        pred[i] = learner(e)

    score = np.mean(pred == valid)
    return score, datetime.now() - start
예제 #25
0
    def addNewClassLabel(self):
        i = 1
        while True:
            newlabel = "Class %i" % i
            if newlabel not in self.classValuesModel:
                #                self.classValuesModel.append(newlabel)
                break
            i += 1
        values = list(self.classValuesModel) + [newlabel]
        newclass = orange.EnumVariable("Class label", values=values)
        newdomain = orange.Domain(self.graph.data.domain.attributes, newclass)
        newdata = orange.ExampleTable(newdomain)
        for ex in self.graph.data:
            newdata.append(
                orange.Example(newdomain,
                               [ex[a] for a in ex.domain.attributes] +
                               [str(ex.getclass())]))

        self.classVariable = newclass
        self.classValuesModel.wrap(self.classVariable.values)

        self.graph.data = newdata
        self.graph.updateGraph()

        newindex = self.classValuesModel.index(len(self.classValuesModel) - 1)
        self.classValuesView.selectionModel().select(
            newindex, QItemSelectionModel.ClearAndSelect)

        self.removeClassLabel.setEnabled(len(self.classValuesModel) > 1)
예제 #26
0
파일: trainer.py 프로젝트: stefie10/slu_hri
def wordnet_meronyms(training, testing):

    ancestor_to_count = training.meronym_ancestor_map()

    all_ancestors = list(ancestor_to_count.keys())
    all_ancestors.sort(key=lambda a: ancestor_to_count[a], reverse=True)

    used_ancestors = all_ancestors
    print "name", used_ancestors[0].name
    attributes = [
        orange.EnumVariable(a.name, values=["True", "False"])
        for a in used_ancestors
    ]
    print "got", len(used_ancestors), "features"
    domain = orange.Domain(attributes, training.orange_class_var)

    results = []
    for annotation in [training, testing]:
        table = orange.ExampleTable(domain)
        results.append(table)
        for i, (word, label) in enumerate(annotation.data):
            ancestors = annotation.ancestors(i)
            ex = orange.Example(domain)
            ex["class"] = label
            for a_i, a in enumerate(attributes):
                ancestor_i = used_ancestors[a_i]
                if ancestor_i in ancestors:
                    ex[a.name] = "True"
                else:
                    ex[a.name] = "False"
            table.append(ex)

    training_table, testing_table = results
    return training_table, testing_table
예제 #27
0
def starRegression(cache, dimensions, progressCallback=None, **args):
    if len(cache.contAttributes) == 1:
        return triangles1D(cache, True)

    if not cache.points:
        cache.points = orange.ExampleTable(
            orange.Domain(cache.contAttributes, cache.data.domain.classVar),
            cache.data).native(0)
    points = cache.points
    npoints = len(points)

    if not cache.tri:
        cache.tri = triangulate(cache, points)
    tri = cache.tri

    if not cache.stars:
        cache.stars = [star(x, tri) for x in xrange(npoints)]
    S = cache.stars

    points = cache.points

    if progressCallback:
        nPoints = 100.0 / len(points)

    for x, (S, p) in enumerate(zip(cache.stars, points)):
        if S == []:
            cache.deltas[x] = ['?' for i in dimensions]
            continue
        st = list(set(reduce(lambda x, y: x + y, S)))
        A = [points[i][:-1] for i in st]
        b = [[points[i][-1]] for i in st]
        cache.deltas[x] = [i[0] for i in numpy.linalg.lstsq(A, b)[0]]

        if progressCallback:
            progressCallback(x * nPoints)
예제 #28
0
    def test_to_numpy_multi(self):
        data = orange.ExampleTable("iris")
        self.assertRaises(ValueError, data.to_numpy, "AC/w", multinomial=2)
        self.assertRaises(ValueError, data.to_numpy, "Ac/Cw", multinomial=2)
        data.to_numpy(multinomial=2)
        data.to_numpy("a", multinomial=2)
        a, c, w = data.to_numpy(multinomial=0)
        self.assertIsNone(c)

        data = orange.ExampleTable("zoo")
        self.assertRaises(ValueError, data.to_numpy, multinomial=2)
        self.assertRaises(ValueError, data.to_numpy, "a", multinomial=2)

        ar, cl, w = data.to_numpy()
        self.assertIsNone(w)
        for i in range(len(data)):
            self.assertEqual(data[i, :-1], ar[i])
            self.assertEqual(data[i, -1], cl[i])

        ar, cl, w = data.to_numpy(multinomial=0)
        self.assertIsNone(cl)
        self.assertIsNone(w)
        nd = orange.Domain(
            [attr for attr in data.domain if len(attr.values) <= 2])
        data2 = orange.ExampleTable(nd, data)
        for i in range(len(data)):
            self.assertEqual(data2[i], ar[i])
예제 #29
0
 def test_to_numpy_noclass(self):
     data = orange.ExampleTable("iris")
     nd = orange.Domain(data.domain.attributes, None)
     data = orange.ExampleTable(nd, data)
     self.assertRaises(ValueError, data.to_numpy, "AC/w")
     self.assertRaises(ValueError, data.to_numpy, "AC/w", multinomial=0)
     self.assertRaises(ValueError, data.to_numpy, "A/Cw", multinomial=0)
예제 #30
0
파일: gsea.py 프로젝트: acopar/orange-bio
def etForAttribute(datal, a):
    """
    Builds an example table for a single attribute across multiple 
    example tables.
    """

    tables = len(datal)

    def getAttrVals(data, attr):
        dom2 = orange.Domain([data.domain[attr]], False)
        dataa = orange.ExampleTable(dom2, data)
        return [a[0].native() for a in dataa]

    domainl = []
    valuesl = []

    for id, data in enumerate(datal):
        v = getAttrVals(data, a)
        valuesl.append(v)
        domainl.append(orange.FloatVariable(name=("v" + str(id))))

    classvals = getAttrVals(data, datal[0].domain.classVar)
    valuesl += [classvals]

    dom = orange.Domain(domainl, datal[0].domain.classVar)
    examples = [list(a) for a in zip(*valuesl)]

    datat = orange.ExampleTable(dom, examples)

    return datat