Beispiel #1
0
 def __init__(self, training):
     self.training = training
     self.wnparents = trainer.WordnetParentsEngine(training)
     labels = ["Larger", "Smaller", "Equal", "None"]
     self.cls_variable = orange.EnumVariable("class", values=labels)
     
     alist = []
     for var in self.wnparents.domain.attributes:
         if isinstance(var, orange.FloatVariable):
             v1 = orange.FloatVariable(name="%s_w1" % var.name)
             v2 = orange.FloatVariable(name="%s_w2" % var.name)
             alist.append(v1)
             alist.append(v2)
         elif isinstance(var, orange.EnumVariable):
             v1 = orange.EnumVariable(name="%s_w1" % var.name, 
                                      values=var.values)
             v2 = orange.EnumVariable(name="%s_w2" % var.name, 
                                      values=var.values)
             alist.append(v1)
             alist.append(v2)
         else:
             raise ValueError("Unhandled attribute: " + `var`)
         
     self.domain = orange.Domain(alist,
                                 self.cls_variable)
     self.training_table = self.makeTable(self.training)
Beispiel #2
0
 def __call__(self, rule, examples, weights, targetClass):
     if not weights:
         weights = orange.newmetaid()
         examples.addMetaAttribute(weights, 1.)
         examples.domain.addmeta(
             weights, orange.FloatVariable("weights-" + str(weights)), True)
     try:
         coverage = examples.domain.getmeta("Coverage")
     except:
         coverage = orange.FloatVariable("Coverage")
         examples.domain.addmeta(orange.newmetaid(), coverage, True)
         examples.addMetaAttribute(coverage, 0.0)
     newWeightsID = orange.newmetaid()
     examples.addMetaAttribute(newWeightsID, 1.)
     examples.domain.addmeta(
         newWeightsID, orange.FloatVariable("weights-" + str(newWeightsID)),
         True)
     for example in examples:
         if rule(example) and example.getclass() == rule.classifier(
                 example, orange.GetValue):
             try:
                 example[coverage] += 1.0
             except:
                 example[coverage] = 1.0
             example[newWeightsID] = 1.0 / (example[coverage] + 1)
         else:
             example[newWeightsID] = example[weights]
     return (examples, newWeightsID)
Beispiel #3
0
 def sendList(self, selectedInd):
     if self.data and type(self.data[0]) == str:
         xAttr=orange.FloatVariable("X")
         yAttr=orange.FloatVariable("Y")
         nameAttr=  orange.StringVariable("name")
         if self.selectionOptions == 1:
             domain = orange.Domain([xAttr, yAttr, nameAttr])
             selection = orange.ExampleTable(domain)
             for i in range(len(selectedInd)):
                 selection.append(list(self.mds.points[selectedInd[i]]) + [self.data[i]])
         else:
             domain = orange.Domain([nameAttr])
             if self.selectionOptions:
                 domain.addmeta(orange.newmetaid(), xAttr)
                 domain.addmeta(orange.newmetaid(), yAttr)
             selection = orange.ExampleTable(domain)
             for i in range(len(selectedInd)):
                 selection.append([self.data[i]])
                 if self.selectionOptions:
                     selection[i][xAttr]=self.mds.points[selectedInd[i]][0]
                     selection[i][yAttr]=self.mds.points[selectedInd[i]][1]
         self.send("Data", selection)
         return
            
     if not selectedInd:
         self.send("Structured Data Files", None)
     else:
         datasets=[self.data[i] for i in selectedInd]
         names=list(set([d.dirname for d in datasets]))
         data=[(name, [d for d in filter(lambda a:a.strain==name, datasets)]) for name in names]
         self.send("Structured Data Files",data)
Beispiel #4
0
    def to_network(self, terms=None):
        """
        Return an Orange.network.Network instance constructed from
        this ontology.

        """
        edge_types = self.edge_types()
        terms = self.terms()
        from Orange.orng import orngNetwork
        import orange

        network = orngNetwork.Network(len(terms), True, len(edge_types))
        network.objects = dict([(term.id, i) for i, term in enumerate(terms)])

        edges = defaultdict(set)
        for term in self.terms():
            related = self.related_terms(term)
            for relType, relTerm in related:
                edges[(term.id, relTerm)].add(relType)

        edgeitems = edges.items()
        for (src, dst), eTypes in edgeitems:
            network[src, dst] = [1 if e in eTypes else 0 for e in edge_types]

        domain = orange.Domain([
            orange.StringVariable("id"),
            orange.StringVariable("name"),
            orange.StringVariable("def"),
        ], False)

        items = orange.ExampleTable(domain)
        for term in terms:
            ex = orange.Example(
                domain, [term.id, term.name,
                         term.values.get("def", [""])[0]])
            items.append(ex)

        relationships = set(
            [", ".join(sorted(eTypes)) for (_, _), eTypes in edgeitems])
        domain = orange.Domain([
            orange.FloatVariable("u"),
            orange.FloatVariable("v"),
            orange.EnumVariable("relationship", values=list(edge_types))
        ], False)

        id2index = dict([(term.id, i + 1) for i, term in enumerate(terms)])
        links = orange.ExampleTable(domain)
        for (src, dst), eTypes in edgeitems:
            ex = orange.Example(domain,
                                [id2index[src], id2index[dst],
                                 eTypes.pop()])
            links.append(ex)

        network.items = items
        network.links = links
        network.optimization = None
        return network
    def sendpredictions(self):
        if not self.data or not self.outvar:
            self.send("Predictions", None)
            return

        # predictions, data set with class predictions
        classification = self.outvar.varType == orange.VarTypes.Discrete

        metas = []
        if classification:
            if len(self.selectedClasses):
                for c in self.predictors.values():
                    m = [orange.FloatVariable(name=str("%s(%s)" % (c.name, str(self.outvar.values[i]))),
                                              getValueFrom = lambda ex, rw, cindx=i, c=c: orange.Value(c(ex, c.GetProbabilities)[cindx])) \
                         for i in self.selectedClasses]
                    metas.extend(m)
            if self.showClass:
                mc = [
                    orange.EnumVariable(
                        name=str(c.name),
                        values=self.outvar.values,
                        getValueFrom=lambda ex, rw, c=c: orange.Value(c(ex)))
                    for c in self.predictors.values()
                ]
                metas.extend(mc)
        else:
            # regression
            mc = [
                orange.FloatVariable(
                    name="%s" % c.name,
                    getValueFrom=lambda ex, rw, c=c: orange.Value(c(ex)))
                for c in self.predictors.values()
            ]
            metas.extend(mc)

        classVar = self.outvar
        domain = orange.Domain(self.data.domain.attributes + [classVar])
        domain.addmetas(self.data.domain.getmetas())
        for m in metas:
            domain.addmeta(orange.newmetaid(), m)
        predictions = orange.ExampleTable(domain, self.data)
        if self.doPrediction:
            c = self.predictors.values()[0]
            for ex in predictions:
                ex[classVar] = c(ex)

        predictions.name = self.data.name
        self.send("Predictions", predictions)

        self.changedFlag = False
Beispiel #6
0
 def expandToFuzzyExamples(self, examples, att, a, b):
     """
     Function will return new 'fuzzy' example table. Every example from the input table will get two additional meta attributes ('fuzzy set' and 'u') \
     based on 'a' and 'b' threshold (lower and higher) and attribute 'att'. Attribute 'fuzzy set' indicates name of the fuzzy set while atribute 'u' \
     reflects example's degree of membership to particular fuzzy set. Note that input examples with values of 'att' lying on the (a,b) will be expanded \
     into two fuzzy examples.
     """
     mu = orange.FloatVariable("u")
     mv = orange.StringVariable("fuzzy set")
     examples.domain.addmeta(FUZZYMETAID, mu)
     examples.domain.addmeta(FUZZYMETAID - 1, mv)
     newexamples = []
     for j in range(0, len(examples)):
         i = examples[j]
         v = float(i[att])
         if v > a and v < b:  # we have to expand this example
             newexamples.append(i)
             i["fuzzy set"] = 'yes'
             i["u"] = (v - a) / (b - a)
             examples.append(i)
             examples[-1]["fuzzy set"] = "no"
             examples[-1]["u"] = (b - v) / (b - a)
         else:
             if v > a:  # u(yes) = 1.0
                 i["fuzzy set"] = 'yes'
                 i["u"] = 1.0
             else:  # u(no) = 1.0
                 i["fuzzy set"] = 'no'
                 i["u"] = 1.0
     return examples
Beispiel #7
0
    def __call__(self, weights=None):
        if not weights:
            weights = self.user_weights

        # New augmented table
        norm_data = orange.ExampleTable(self.data)
        newid = min(norm_data.domain.get_metas().keys(), 0) - 1
        score_attr = orange.FloatVariable('score')
        norm_data.domain.add_meta(newid, score_attr)
        norm_data.add_meta_attribute(score_attr)

        # Normalize the attributes to the proper range
        for att, (lower_bound, upper_bound) in self.ranges.items():
            for ex in norm_data:
                ex[att] = ex[att] / (upper_bound - lower_bound)

        # Normalize column-wise
        col_sum = {}
        for att in norm_data.domain.features:
            col_sum[att] = float(sum([ex[att] for ex in norm_data]))
        for ex in norm_data:
            for att in norm_data.domain.features:
                ex[att] = ex[att] / col_sum[att]

        # Use the inverse of an attr. value it should be minimized.
        inverse = lambda x, att: 1 - x if att in self.minimize else x
        for ex in norm_data:
            score = sum([
                inverse(ex[att].value, att) * weights.get(att, 1)
                for att in self.ranges.keys()
            ])
            ex['score'] = score

        return norm_data
Beispiel #8
0
    def __call__(self, dataset):
                
        try:
            #retain class attribute
            attrDataset = dataset.select(self.domain)
            imputer = self.imputer(attrDataset)
            attrDataset = imputer(attrDataset)
            domain = self.continuizer(attrDataset)
            attrDataset = attrDataset.translate(domain)
        except TypeError as e:
            raise orange.KernelException("One or more attributes form training set are missing!")

        dataMatrix, classArray, x = attrDataset.toNumpy()

        dataMatrix -= self.center
        if self.deviation != None:
            dataMatrix *= 1./self.deviation
            
        #save transformed data
        self._dataMatrix = numpy.dot(dataMatrix, self.loadings)

        attributes = [orange.FloatVariable("PC%d" % (i + 1, )) for i in range(len(self.evalues))]
        new_domain = orange.Domain(attributes)
        new_table = orange.ExampleTable(new_domain, self._dataMatrix)

        if dataset.domain.classVar:
            #suboptimal
            classTable = dataset.select([dataset.domain.classVar.name])
            self._classArray = numpy.array([row.getclass() for row in classTable])
            new_table = orange.ExampleTable([new_table, classTable])
        
        return new_table
Beispiel #9
0
    def applySettings(self):
        """use the setting from the widget, identify the outliers"""
        if self.haveInput == 1:
            outlier = self.outlier
            outlier.setKNN(self.ks[self.k][1])

            newdomain = orange.Domain(self.data.domain)
            newdomain.addmeta(orange.newmetaid(),
                              orange.FloatVariable("Z score"))

            self.newdata = orange.ExampleTable(newdomain, self.data)

            zv = outlier.zValues()
            for i, el in enumerate(zv):
                self.newdata[i]["Z score"] = el

            self.send("Examples with Z-scores", self.newdata)

            filterout = orange.Filter_values(domain=self.newdata.domain)
            filterout["Z score"] = (orange.Filter_values.Greater,
                                    eval(self.zscore))
            outliers = filterout(self.newdata)

            filterin = orange.Filter_values(domain=self.newdata.domain)
            filterin["Z score"] = (orange.Filter_values.LessEqual,
                                   eval(self.zscore))
            inliers = filterin(self.newdata)

            self.send("Outliers", outliers)
            self.send("Inliers", inliers)
        else:
            self.send("Examples with Z-scores", None)
            self.send("Outliers", None)
            self.send("Inliers", None)
Beispiel #10
0
def etForAttribute(datal, a):
    """
    Builds an example table for a single attribute across multiple 
    example tables.
    """

    tables = len(datal)

    def getAttrVals(data, attr):
        dom2 = orange.Domain([data.domain[attr]], False)
        dataa = orange.ExampleTable(dom2, data)
        return [a[0].native() for a in dataa]

    domainl = []
    valuesl = []

    for id, data in enumerate(datal):
        v = getAttrVals(data, a)
        valuesl.append(v)
        domainl.append(orange.FloatVariable(name=("v" + str(id))))

    classvals = getAttrVals(data, datal[0].domain.classVar)
    valuesl += [classvals]

    dom = orange.Domain(domainl, datal[0].domain.classVar)
    examples = [list(a) for a in zip(*valuesl)]

    datat = orange.ExampleTable(dom, examples)

    return datat
Beispiel #11
0
def __makeExampleTable(namesDict, data):
    import orange
    from constants import CLASS_ATRR_NAME, CONTROL_GROUP_KEY, DATA_GROUP_KEY

    geneIDs = sorted(data.keys())
    attrList = [orange.FloatVariable(name=str(geneID)) for geneID in geneIDs]
    classAttr = orange.EnumVariable(name=CLASS_ATRR_NAME,
                                    values=[CONTROL_GROUP_KEY, DATA_GROUP_KEY])
    domain = orange.Domain(attrList, classAttr)
    table = orange.ExampleTable(domain)

    # first half: group 1
    for attrName in namesDict[CONTROL_GROUP_KEY].keys():
        exampleValues = [
            data[geneID][CONTROL_GROUP_KEY][attrName] for geneID in geneIDs
        ] + [CONTROL_GROUP_KEY]
        example = orange.Example(domain, exampleValues)
        table.append(example)

    # second half: group 2
    for attrName in namesDict[DATA_GROUP_KEY].keys():
        exampleValues = [
            data[geneID][DATA_GROUP_KEY][attrName] for geneID in geneIDs
        ] + [DATA_GROUP_KEY]
        example = orange.Example(domain, exampleValues)
        table.append(example)

    return table
Beispiel #12
0
    def applySettings(self):

        if self.haveInput == 1:

            outlier = self.outlier

            outlier.setKNN(self.ks[self.k][1])

            newdomain = orange.Domain(self.data.domain)
            newdomain.addmeta(orange.newmetaid(),
                              orange.FloatVariable("Z score"))

            self.newdata = orange.ExampleTable(newdomain, self.data)

            zv = outlier.zValues()
            for i, el in enumerate(zv):
                self.newdata[i]["Z score"] = el

            self.send("Examples with Z-scores", self.newdata)

            filter = orange.Filter_values(domain=self.newdata.domain)
            filter["Z score"] = (orange.Filter_values.Greater,
                                 eval(self.zscore))
            self.outliers = filter(self.newdata)

            self.send("Outliers", self.outliers)
        else:
            self.send("Examples with Z-scores", None)
            self.send("Outliers", None)
Beispiel #13
0
def getSMARTSrecalcDesc(data, smarts):
    """ Calculates structural descriptors for test and training data.
                In other words, checks for the substructure occurrence (0/1) in the 
                test or prediction molecules. Uses RDK.
                Expects the test/prediction data and a list of SMARTS strings.
                Returns the data including the new features. 
    """
    smilesName = dataUtilities.getSMILESAttr(data)
    if not smilesName or type(smarts) != list or not len(smarts):
        print "Please check the input parameters"
        return None

    existingAttrs = [attr for attr in smarts if attr in data.domain]
    if existingAttrs:
        print "The input data cannot contain the smarts to be calculated!"
        return None

    newdomain = orange.Domain(data.domain.attributes + \
                              [orange.FloatVariable(attr, numberOfDecimals=1) for attr in smarts],\
                              data.domain.classVar )
    newdata = orange.ExampleTable(newdomain, data)

    for ex in newdata:
        smile = str(ex[smilesName].value)
        mol = rdk.Chem.MolFromSmiles(smile)
        if mol is None:
            continue
        for smrt in smarts:
            patt = rdk.Chem.MolFromSmarts(smrt)
            if mol.HasSubstructMatch(patt):
                ex[smrt] = 1.0
            else:
                ex[smrt] = 0.0
    return newdata
Beispiel #14
0
def addMetaID(data):
    meta_id = orange.FloatVariable("meta_id")
    mid = orange.newmetaid()
    while mid in data.domain.getmetas().keys():
        mid = orange.newmetaid()
    data.domain.addmeta(mid, meta_id)
    for i in range(len(data)):
        data[i][meta_id] = i
Beispiel #15
0
 def __call__(self, rule, examples, weights, targetClass):
     if not weights:
         weights = orange.newmetaid()
         examples.addMetaAttribute(weights, 1.)
         examples.domain.addmeta(
             weights, orange.FloatVariable("weights-" + str(weights)), True)
     newWeightsID = orange.newmetaid()
     examples.addMetaAttribute(newWeightsID, 1.)
     examples.domain.addmeta(
         newWeightsID, orange.FloatVariable("weights-" + str(newWeightsID)),
         True)
     for example in examples:
         if rule(example) and example.getclass() == rule.classifier(
                 example, orange.GetValue):
             example[newWeightsID] = example[weights] * self.mult
         else:
             example[newWeightsID] = example[weights]
     return (examples, newWeightsID)
Beispiel #16
0
def __make_rule_term_example_table(tableDict, allTerms):
    import orange
    import constants as const

    attrList = [
        orange.EnumVariable(name=str(term),
                            values=[const.PRESENT, const.ABSENT])
        for term in allTerms
    ]

    # three meta attributes
    ruleName = orange.StringVariable(const.NAME_ATTR)
    mid = orange.newmetaid()
    ruleTerms = orange.StringVariable(const.TERMS_ATTR)
    mid1 = orange.newmetaid()
    #ruleNumber = orange.EnumVariable(SEQ_NUM_ATTR) #StringVariable(SEQ_NUM_ATTR)
    ruleNumber = orange.FloatVariable(const.SEQ_NUM_ATTR,
                                      startValue=1,
                                      endValue=len(tableDict),
                                      stepValue=1,
                                      numberOfDecimals=0)
    mid2 = orange.newmetaid()

    # this is a classless domain
    domain = orange.Domain(attrList, False)

    # name of the rule is a meta attribute
    domain.addmeta(mid, ruleName, False)
    domain.addmeta(mid1, ruleTerms, False)
    domain.addmeta(mid2, ruleNumber, False)

    table = orange.ExampleTable(domain)

    for k in sorted(tableDict.keys()):
        exampleValues = []
        for (i, term) in enumerate(allTerms):
            if term in tableDict[k][const.RULETERMS_KEY]:
                #exampleValues.append(PRESENT)
                exampleValues.append(orange.Value(attrList[i], const.PRESENT))
            else:
                #exampleValues.append(ABSENT)
                exampleValues.append(orange.Value(attrList[i], const.ABSENT))
        example = orange.Example(domain, exampleValues)
        #example[NAME_ATTR] = tableDict[k][RULENAME_KEY][1:-1]    #skip square brackets from the string
        #example[TERMS_ATTR] = tableDict[k][RULETERMS_STR_KEY][1:-1]
        #example[SEQ_NUM_ATTR] = k

        example[const.NAME_ATTR] = orange.Value(ruleName, tableDict[k][
            const.RULENAME_KEY][1:-1])  #skip square brackets from the string
        example[const.TERMS_ATTR] = orange.Value(
            ruleTerms, tableDict[k][const.RULETERMS_STR_KEY][1:-1])
        example[const.SEQ_NUM_ATTR] = orange.Value(ruleNumber, k)

        table.append(example)
    #end
    return table
Beispiel #17
0
    def sendData(self):
        self.selectionDirty = False

        selected = [(x.row(), x.column())
                    for x in self.table.selectedIndexes()]
        res = self.res
        if not res or not selected or not self.selectedLearner:
            self.send("Selected Data", None)
            return

        learnerI = self.selectedLearner[0]

        data = None
        if hasattr(res, "examples") and isinstance(res.examples,
                                                   orange.ExampleTable):
            selectionIndices = [
                i for i, rese in enumerate(res.results)
                if (rese.actualClass, rese.classes[learnerI]) in selected
            ]
            data = res.examples.getitemsref(selectionIndices)

        if data is not None and (self.appendPredictions
                                 or self.appendProbabilities):
            domain = orange.Domain(data.domain.attributes,
                                   data.domain.classVar)
            domain.addmetas(data.domain.getmetas())
            data = orange.ExampleTable(domain, data)

            if self.appendPredictions:
                cname = self.learnerNames[learnerI]
                predVar = type(domain.classVar)(
                    "%s(%s)" % (domain.classVar.name, cname.encode("utf-8")
                                if isinstance(cname, unicode) else cname))
                if hasattr(domain.classVar, "values"):
                    predVar.values = domain.classVar.values
                predictionsId = orange.newmetaid()
                domain.addmeta(predictionsId, predVar)
                for i, ex in zip(selectionIndices, data):
                    ex[predictionsId] = res.results[i].classes[learnerI]

            if self.appendProbabilities:
                probVars = [
                    orange.FloatVariable("p(%s)" % v)
                    for v in domain.classVar.values
                ]
                probIds = [orange.newmetaid() for pv in probVars]
                domain.addmetas(dict(zip(probIds, probVars)))
                for i, ex in zip(selectionIndices, data):
                    for id, p in zip(probIds,
                                     res.results[i].probabilities[learnerI]):
                        ex[id] = p

        if data is not None:
            data.name = self.learnerNames[learnerI]

        self.send("Selected Data", data)
Beispiel #18
0
 def sendExampleTable(self, selectedInd):
     if self.selectionOptions==0:
         self.send("Data", orange.ExampleTable(self.data.getitems(selectedInd)))
     else:
         xAttr=orange.FloatVariable("X")
         yAttr=orange.FloatVariable("Y")
         if self.selectionOptions==1:
             domain=orange.Domain([xAttr, yAttr]+[v for v in self.data.domain.variables])
             domain.addmetas(self.data.domain.getmetas())
         else:
             domain=orange.Domain(self.data.domain)
             domain.addmeta(orange.newmetaid(), xAttr)
             domain.addmeta(orange.newmetaid(), yAttr)
         selection=orange.ExampleTable(domain)
         selection.extend(self.data.getitems(selectedInd))
         for i in range(len(selectedInd)):
             selection[i][xAttr]=self.mds.points[selectedInd[i]][0]
             selection[i][yAttr]=self.mds.points[selectedInd[i]][1]
         self.send("Data", selection)
Beispiel #19
0
 def initialize(self, examples, weightID, targetClass, apriori):
     self.bestRule = [None] * len(examples)
     self.probAttribute = orange.newmetaid()
     examples.addMetaAttribute(self.probAttribute, -1.e-6)
     examples.domain.addmeta(self.probAttribute,
                             orange.FloatVariable("Probs"))
     for example in examples:
         ##            if targetClass<0 or (example.getclass() == targetClass):
         example[self.probAttribute] = apriori[targetClass] / apriori.abs
     return examples
Beispiel #20
0
def PCAOnExampleTable(table, keepOriginal=1, nPCs=-1):
    data = table.toNumpyMA("a")[0]
    projData, vectors, values = pca(data, nPCs)
    newDomain = orange.Domain(
        [orange.FloatVariable("PC %d" % (d + 1)) for d in range(len(vectors))],
        0)
    newTable = orange.ExampleTable(newDomain, projData.data)
    if keepOriginal:
        return orange.ExampleTable([table, newTable])
    else:
        return newTable
Beispiel #21
0
class PCAClassifier(object):
    def __init__(self, domain, imputer, continuizer, center, deviation,
                 evalues, loadings):
        #data checking and modifying
        self.domain = domain
        self.imputer = imputer
        self.continuizer = continuizer
        #PCA properites
        self.center = center
        self.deviation = deviation
        self.evalues = evalues
        self.loadings = loadings

        #last predicition performed -> used for biplot
        self._dataMatrix = None
        self._classArray = None

    def __call__(self, dataset):

        try:
            #retain class attribute
            attrDataset = dataset.select(self.domain)
            imputer = self.imputer(attrDataset)
            attrDataset = imputer(attrDataset)
            domain = self.continuizer(attrDataset)
            attrDataset = attrDataset.translate(domain)
        except TypeError, e:
            raise orange.KernelException, "One or more attributes form training set are missing!"

        dataMatrix, classArray, x = attrDataset.toNumpy()

        dataMatrix -= self.center
        if self.deviation != None:
            dataMatrix *= 1. / self.deviation

        #save transformed data
        self._dataMatrix = numpy.dot(dataMatrix, self.loadings)

        attributes = [
            orange.FloatVariable("PC%d" % (i + 1, ))
            for i in range(len(self.evalues))
        ]
        new_domain = orange.Domain(attributes)
        new_table = orange.ExampleTable(new_domain, self._dataMatrix)

        if dataset.domain.classVar:
            #suboptimal
            classTable = dataset.select([dataset.domain.classVar.name])
            self._classArray = numpy.array(
                [row.getclass() for row in classTable])
            new_table = orange.ExampleTable([new_table, classTable])

        return new_table
Beispiel #22
0
def makeDomain(names):
    attributes = [orange.FloatVariable(n) for n in names]
    domain = orange.Domain(
        attributes, orange.EnumVariable("class", values=["True", "False"]))
    domain.addmeta(orange.newmetaid(), orange.FloatVariable("weight"))

    domain.addmeta(orange.newmetaid(),
                   orange.EnumVariable("isInsane", values=["True", "False"]))

    domain.addmeta(orange.newmetaid(), orange.StringVariable("filename"))
    domain.addmeta(orange.newmetaid(),
                   orange.StringVariable("sourceEngineName"))
    domain.addmeta(orange.newmetaid(), orange.StringVariable("engineName"))
    domain.addmeta(orange.newmetaid(), orange.StringVariable("landmarkName"))
    domain.addmeta(orange.newmetaid(), orange.PythonVariable("geometry"))
    domain.addmeta(orange.newmetaid(), orange.PythonVariable("track"))
    domain.addmeta(orange.newmetaid(), orange.PythonVariable("drawMap"))
    domain.addmeta(orange.newmetaid(), orange.PythonVariable("description"))
    domain.addmeta(orange.newmetaid(), orange.PythonVariable("farAway"))

    return domain
    def get_domain_trans(self):
        #if(self.domain != None):
        #    return self.domain

        attributes = [
            orange.FloatVariable(name) for name in self.dataset.trans_alphabet
        ]

        alp = [str(i) for i in range(len(self.dataset.label_alphabet)**2)]
        classattr = orange.EnumVariable("classname", values=alp)
        domain = orange.Domain(attributes + [classattr])

        return domain
    def get_domain_obs(self):
        #if(self.domain != None):
        #    return self.domain

        attributes = [
            orange.FloatVariable(name) for name in self.dataset.obs_alphabet
        ]

        alp = [str(s) for s in self.dataset.label_alphabet]
        classattr = orange.EnumVariable("classname", values=alp)
        domain = orange.Domain(attributes + [classattr])

        return domain
Beispiel #25
0
    def learnModel(self, X, y):
        if numpy.unique(y).shape[0] != 2:
            raise ValueError("Can only operate on binary data")

        classes = numpy.unique(y)
        self.worstResponse = classes[classes != self.bestResponse][0]

        #We need to convert y into indices
        newY = self.labelsToInds(y)

        XY = numpy.c_[X, newY]
        attrList = []
        for i in range(X.shape[1]):
            attrList.append(orange.FloatVariable("X" + str(i)))

        attrList.append(orange.EnumVariable("y"))
        attrList[-1].addValue(str(self.bestResponse))
        attrList[-1].addValue(str(self.worstResponse))

        self.domain = orange.Domain(attrList)
        eTable = orange.ExampleTable(self.domain, XY)

        #Weight examples
        preprocessor = orange.Preprocessor_addClassWeight(equalize=1)
        preprocessor.classWeights = [1 - self.weight, self.weight]
        eTable, weightID = preprocessor(eTable)
        eTable.domain.addmeta(weightID, orange.FloatVariable("w"))

        tree = orngTree.TreeLearner(mForPruning=self.m,
                                    measure="gainRatio",
                                    minExamples=self.minSplit,
                                    maxDepth=self.maxDepth).instance()

        self.learner = orngEnsemble.RandomForestLearner(
            learner=tree,
            trees=self.numTrees,
            attributes=numpy.round(X.shape[1] * self.featureSize))
        self.classifier = self.learner(eTable, weightID)
Beispiel #26
0
def loadLibSVM(filename):
    data = [
        line.split() for line in open(filename, "rb").read().splitlines()
        if line.strip()
    ]
    vars = type(
        "attr", (dict, ), {
            "__missing__":
            lambda self, key: self.setdefault(key, orange.FloatVariable(key))
        })()
    item = lambda i, v: (vars[i], vars[i](v))
    values = [dict([item(*val.split(":")) for val in ex[1:]]) for ex in data]
    classes = [ex[0] for ex in data]
    disc = all(["." not in c for c in classes])
    attributes = sorted(list(vars.values()), key=lambda var: int(var.name))
    classVar = orange.EnumVariable("class", values=sorted(
        set(classes))) if disc else orange.FloatVariable("target")
    domain = orange.Domain(attributes, classVar)
    return orange.ExampleTable([
        orange.Example(domain,
                       [ex.get(attr, attr("?")) for attr in attributes] + [c])
        for ex, c in zip(values, classes)
    ])
Beispiel #27
0
    def learnModel(self, X, y):
        if numpy.unique(y).shape[0] != 2:
            raise ValueError("Can only operate on binary data")

        classes = numpy.unique(y)
        self.worstResponse = classes[classes != self.bestResponse][0]

        #We need to convert y into indices
        newY = self.labelsToInds(y)

        XY = numpy.c_[X, newY]
        attrList = []
        for i in range(X.shape[1]):
            attrList.append(orange.FloatVariable("X" + str(i)))

        attrList.append(orange.EnumVariable("y"))
        attrList[-1].addValue(str(self.bestResponse))
        attrList[-1].addValue(str(self.worstResponse))

        self.domain = orange.Domain(attrList)
        eTable = orange.ExampleTable(self.domain, XY)

        #Weight examples and equalise
        #Equalizing computes such weights that the weighted number of examples
        #in each class is equivalent.
        preprocessor = orange.Preprocessor_addClassWeight(equalize=1)
        preprocessor.classWeights = [1 - self.weight, self.weight]
        eTable, weightID = preprocessor(eTable)
        eTable.domain.addmeta(weightID, orange.FloatVariable("w"))

        self.learner = orngTree.TreeLearner(m_pruning=self.m,
                                            measure="gainRatio")
        self.learner.max_depth = self.maxDepth
        self.learner.stop = orange.TreeStopCriteria_common()
        self.learner.stop.min_instances = self.minSplit
        self.classifier = self.learner(eTable, weightID)
Beispiel #28
0
    def __parseBBRCoutput(self, res):
        #Parse the results to an orange tab file
        if self.verbose: print "Parsing BBRC results. Please wait..."
        nCompounds = len(self.data)
        allDesc = []
        allIDs = []
        for line in res:
            allDesc.append(line.split("\t")[0].strip())
            allIDs.append(
                [int(x) for x in line.split("\t")[1][1:-1].strip().split(" ")])

        # Find the Descriptors that are required to be at the output file, but they are not among allDesc
        missingDesc = []
        desAttr = []
        selDesc = [x for x in allDesc]
        newDomainAttrs = [attr for attr in self.data.domain.attributes] + \
                         [orange.FloatVariable(name) for name in selDesc]
        newDomain = orange.Domain(newDomainAttrs, self.data.domain.classVar)
        if self.verbose:
            print "Original domain lenght: ", len(self.data.domain)
            print "New domain lenght     : ", len(newDomain)
            print "\n0%" + " " * 98 + "100%"
            print "|" + "-" * 100 + "|"
            sys.stdout.write("|")
            sys.stdout.flush()

        newData = dataUtilities.DataTable(newDomain)
        for idx, ex in enumerate(self.data):
            newEx = orange.Example(newDomain, ex)
            if self.verbose:
                if nCompounds < 100:
                    sys.stdout.write("=")
                elif idx % (int(nCompounds / 100)) == 0:
                    sys.stdout.write("=")
                sys.stdout.flush()

            ID = idx + 1  # ID is the number of coumpound in self.data which is the number os the example (1 based!)
            for dIdx, d in enumerate(selDesc):
                if ID in allIDs[dIdx]:
                    newEx[d] = 1.0
                else:
                    newEx[d] = 0.0
            newData.append(newEx)
        if self.verbose:
            if nCompounds < 100:
                sys.stdout.write("=" * (100 - nCompounds + 1))
            print ""
        return newData
Beispiel #29
0
        def createLogRegExampleTable(data, weightID):
            finalData = orange.ExampleTable(data)
            origData = orange.ExampleTable(data)
            for at in data.domain.attributes:
                # za vsak atribut kreiraj nov newExampleTable newData
                # v dataOrig, dataFinal in newData dodaj nov atribut -- continuous variable
                if at.varType == orange.VarTypes.Continuous:
                    atDisc = orange.FloatVariable(at.name + "Disc")
                    newDomain = orange.Domain(origData.domain.attributes +
                                              [atDisc, data.domain.classVar])
                    newDomain.addmetas(newData.domain.getmetas())
                    finalData = orange.ExampleTable(newDomain, finalData)
                    newData = orange.ExampleTable(newDomain, origData)
                    origData = orange.ExampleTable(newDomain, origData)
                    for d in origData:
                        d[atDisc] = 0
                    for d in finalData:
                        d[atDisc] = 0
                    for i, d in enumerate(newData):
                        d[atDisc] = 1
                        d[at] = 0
                        d[weightID] = 100 * data[i][weightID]

                elif at.varType == orange.VarTypes.Discrete:
                    # v dataOrig, dataFinal in newData atributu "at" dodaj ee  eno  vreednost, ki ima vrednost kar  ime atributa +  "X"
                    atNew = orange.EnumVariable(at.name,
                                                values=at.values +
                                                [at.name + "X"])
                    newDomain = orange.Domain(
                        filter(lambda x: x != at, origData.domain.attributes) +
                        [atNew, origData.domain.classVar])
                    newDomain.addmetas(origData.domain.getmetas())
                    temp_finalData = orange.ExampleTable(finalData)
                    finalData = orange.ExampleTable(newDomain, finalData)
                    newData = orange.ExampleTable(newDomain, origData)
                    temp_origData = orange.ExampleTable(origData)
                    origData = orange.ExampleTable(newDomain, origData)
                    for i, d in enumerate(origData):
                        d[atNew] = temp_origData[i][at]
                    for i, d in enumerate(finalData):
                        d[atNew] = temp_finalData[i][at]
                    for i, d in enumerate(newData):
                        d[atNew] = at.name + "X"
                        d[weightID] = 10 * data[i][weightID]
                finalData.extend(newData)
            return finalData
Beispiel #30
0
def test():
    app = QApplication(sys.argv)
    w = OWHierarchicalClustering()
    w.show()
    data = orange.ExampleTable("../../doc/datasets/iris.tab")
    id = orange.newmetaid()
    data.domain.addmeta(id, orange.FloatVariable("a"))
    data.addMetaAttribute(id)
    matrix = orange.SymMatrix(len(data))
    dist = orange.ExamplesDistanceConstructor_Euclidean(data)
    matrix = orange.SymMatrix(len(data))
    matrix.setattr('items', data)
    for i in range(len(data)):
        for j in range(i + 1):
            matrix[i, j] = dist(data[i], data[j])

    w.set_matrix(matrix)
    app.exec_()