Beispiel #1
0
    def get_example_table(self):
        import orange
        data = self.run(count=False, header=True)

        if self.format.lower() == "tsv":
            header, data = data.split("\n", 1)
            domain = orange.Domain(
                [orange.StringVariable(name) for name in header.split("\t")],
                False)
            data = [
                line.split("\t") for line in data.split("\n") if line.strip()
            ]
            return orange.ExampleTable(domain, data) if data else None
        elif self.format.lower() == "fasta":
            domain = orange.Domain([
                orange.StringVariable("id"),
                orange.StringVariable("sequence")
            ], False)  # TODO: meaningful id
            examples = []
            from StringIO import StringIO
            from Bio import SeqIO
            for seq in SeqIO.parse(StringIO(data), "fasta"):
                examples.append([seq.id, str(seq.seq)])
            return orange.ExampleTable(domain, examples)
        else:
            raise BioMartError("Unsupported format: %" % self.format)
Beispiel #2
0
    def to_network(self, terms=None):
        """
        Return an Orange.network.Network instance constructed from
        this ontology.

        """
        edge_types = self.edge_types()
        terms = self.terms()
        from Orange.orng import orngNetwork
        import orange

        network = orngNetwork.Network(len(terms), True, len(edge_types))
        network.objects = dict([(term.id, i) for i, term in enumerate(terms)])

        edges = defaultdict(set)
        for term in self.terms():
            related = self.related_terms(term)
            for relType, relTerm in related:
                edges[(term.id, relTerm)].add(relType)

        edgeitems = edges.items()
        for (src, dst), eTypes in edgeitems:
            network[src, dst] = [1 if e in eTypes else 0 for e in edge_types]

        domain = orange.Domain([
            orange.StringVariable("id"),
            orange.StringVariable("name"),
            orange.StringVariable("def"),
        ], False)

        items = orange.ExampleTable(domain)
        for term in terms:
            ex = orange.Example(
                domain, [term.id, term.name,
                         term.values.get("def", [""])[0]])
            items.append(ex)

        relationships = set(
            [", ".join(sorted(eTypes)) for (_, _), eTypes in edgeitems])
        domain = orange.Domain([
            orange.FloatVariable("u"),
            orange.FloatVariable("v"),
            orange.EnumVariable("relationship", values=list(edge_types))
        ], False)

        id2index = dict([(term.id, i + 1) for i, term in enumerate(terms)])
        links = orange.ExampleTable(domain)
        for (src, dst), eTypes in edgeitems:
            ex = orange.Example(domain,
                                [id2index[src], id2index[dst],
                                 eTypes.pop()])
            links.append(ex)

        network.items = items
        network.links = links
        network.optimization = None
        return network
Beispiel #3
0
def __make_rule_term_example_table(tableDict, allTerms):
    import orange
    import constants as const

    attrList = [
        orange.EnumVariable(name=str(term),
                            values=[const.PRESENT, const.ABSENT])
        for term in allTerms
    ]

    # three meta attributes
    ruleName = orange.StringVariable(const.NAME_ATTR)
    mid = orange.newmetaid()
    ruleTerms = orange.StringVariable(const.TERMS_ATTR)
    mid1 = orange.newmetaid()
    #ruleNumber = orange.EnumVariable(SEQ_NUM_ATTR) #StringVariable(SEQ_NUM_ATTR)
    ruleNumber = orange.FloatVariable(const.SEQ_NUM_ATTR,
                                      startValue=1,
                                      endValue=len(tableDict),
                                      stepValue=1,
                                      numberOfDecimals=0)
    mid2 = orange.newmetaid()

    # this is a classless domain
    domain = orange.Domain(attrList, False)

    # name of the rule is a meta attribute
    domain.addmeta(mid, ruleName, False)
    domain.addmeta(mid1, ruleTerms, False)
    domain.addmeta(mid2, ruleNumber, False)

    table = orange.ExampleTable(domain)

    for k in sorted(tableDict.keys()):
        exampleValues = []
        for (i, term) in enumerate(allTerms):
            if term in tableDict[k][const.RULETERMS_KEY]:
                #exampleValues.append(PRESENT)
                exampleValues.append(orange.Value(attrList[i], const.PRESENT))
            else:
                #exampleValues.append(ABSENT)
                exampleValues.append(orange.Value(attrList[i], const.ABSENT))
        example = orange.Example(domain, exampleValues)
        #example[NAME_ATTR] = tableDict[k][RULENAME_KEY][1:-1]    #skip square brackets from the string
        #example[TERMS_ATTR] = tableDict[k][RULETERMS_STR_KEY][1:-1]
        #example[SEQ_NUM_ATTR] = k

        example[const.NAME_ATTR] = orange.Value(ruleName, tableDict[k][
            const.RULENAME_KEY][1:-1])  #skip square brackets from the string
        example[const.TERMS_ATTR] = orange.Value(
            ruleTerms, tableDict[k][const.RULETERMS_STR_KEY][1:-1])
        example[const.SEQ_NUM_ATTR] = orange.Value(ruleNumber, k)

        table.append(example)
    #end
    return table
Beispiel #4
0
 def expandToFuzzyExamples(self, examples, att, a, b):
     """
     Function will return new 'fuzzy' example table. Every example from the input table will get two additional meta attributes ('fuzzy set' and 'u') \
     based on 'a' and 'b' threshold (lower and higher) and attribute 'att'. Attribute 'fuzzy set' indicates name of the fuzzy set while atribute 'u' \
     reflects example's degree of membership to particular fuzzy set. Note that input examples with values of 'att' lying on the (a,b) will be expanded \
     into two fuzzy examples.
     """
     mu = orange.FloatVariable("u")
     mv = orange.StringVariable("fuzzy set")
     examples.domain.addmeta(FUZZYMETAID, mu)
     examples.domain.addmeta(FUZZYMETAID - 1, mv)
     newexamples = []
     for j in range(0, len(examples)):
         i = examples[j]
         v = float(i[att])
         if v > a and v < b:  # we have to expand this example
             newexamples.append(i)
             i["fuzzy set"] = 'yes'
             i["u"] = (v - a) / (b - a)
             examples.append(i)
             examples[-1]["fuzzy set"] = "no"
             examples[-1]["u"] = (b - v) / (b - a)
         else:
             if v > a:  # u(yes) = 1.0
                 i["fuzzy set"] = 'yes'
                 i["u"] = 1.0
             else:  # u(no) = 1.0
                 i["fuzzy set"] = 'no'
                 i["u"] = 1.0
     return examples
Beispiel #5
0
 def parsePubMed(self, filename, attributes=["pmid", "title", "abstract", "mesh"], skipExamplesWithout=["mesh"]):
     """
     Function parsePubMed can be used to parse (into Orange example table) PubMed search results (in XML).
     """
     parser = make_parser()
     handler = pubMedHandler()
     parser.setContentHandler(handler)
     parser.parse(open(filename))
     atts = []
     for i in attributes:
         atts.append(orange.StringVariable(i))
     domain = orange.Domain(atts, 0)
     data = orange.ExampleTable(domain)
     print data.domain.attributes
     mapping = {"pmid": 0, "title": 1, "abstract": 2, "mesh": 3, "affilation": 4}
     for i in handler.articles:
         r = []
         skip = False
         for f in attributes:
             if skipExamplesWithout.count(f) > 0:
                 if (f == "mesh" and len(i[mapping[f]]) == 0) or str(i[mapping[f]]) == "":
                     skip = True
             r.append(str(i[mapping[f]]))
         if not skip:
             data.append(r)
     return data
    def relabel(self):
        #print 'relabel'
        self.error()
        matrix = self.matrix
        if matrix is not None and self.data is not None:
            if self.takeAttributeNames:
                domain = self.data.domain
                if matrix.dim == len(domain.attributes):
                    matrix.setattr("items", domain.attributes)
                elif matrix.dim == len(domain.variables):
                    matrix.setattr("items", domain.variables)
                else:
                    self.error("The number of attributes doesn't match the matrix dimension")

            else:
                if matrix.dim == len(self.data):
                    matrix.setattr("items", self.data)
                else:
                    self.error("The number of examples doesn't match the matrix dimension")
        elif matrix and self.labels:
            lbl = orange.StringVariable('label')
            self.data = orange.ExampleTable(orange.Domain([lbl]), 
                                            [[str(l)] for l in self.labels])
            for e, label in zip(self.data, self.labels):
                e.name = label
            matrix.setattr("items", self.data)
        
        if self.data == None and self.labels == None:
            matrix.setattr("items", [str(i) for i in range(matrix.dim)])
        
        self.matrix.matrixType = orange.SymMatrix.Symmetric
        self.send("Distances", self.matrix)
Beispiel #7
0
 def sendList(self, selectedInd):
     if self.data and type(self.data[0]) == str:
         xAttr=orange.FloatVariable("X")
         yAttr=orange.FloatVariable("Y")
         nameAttr=  orange.StringVariable("name")
         if self.selectionOptions == 1:
             domain = orange.Domain([xAttr, yAttr, nameAttr])
             selection = orange.ExampleTable(domain)
             for i in range(len(selectedInd)):
                 selection.append(list(self.mds.points[selectedInd[i]]) + [self.data[i]])
         else:
             domain = orange.Domain([nameAttr])
             if self.selectionOptions:
                 domain.addmeta(orange.newmetaid(), xAttr)
                 domain.addmeta(orange.newmetaid(), yAttr)
             selection = orange.ExampleTable(domain)
             for i in range(len(selectedInd)):
                 selection.append([self.data[i]])
                 if self.selectionOptions:
                     selection[i][xAttr]=self.mds.points[selectedInd[i]][0]
                     selection[i][yAttr]=self.mds.points[selectedInd[i]][1]
         self.send("Data", selection)
         return
            
     if not selectedInd:
         self.send("Structured Data Files", None)
     else:
         datasets=[self.data[i] for i in selectedInd]
         names=list(set([d.dirname for d in datasets]))
         data=[(name, [d for d in filter(lambda a:a.strain==name, datasets)]) for name in names]
         self.send("Structured Data Files",data)
Beispiel #8
0
 def getAZOdata(self, smi):
     """
     Create an orange data set with a smiles attribute
     """
     smilesAttr = orange.StringVariable("SMILES")
     smilesDomain = orange.Domain(smilesAttr, 0)
     smilesData = dataUtilities.DataTable(smilesDomain, [[smi]])
     return smilesData
Beispiel #9
0
    def as_orange_table(self):
        domain = orange.Domain([], self.orange_class_var)
        domain.addmeta(orange.newmetaid(), orange.StringVariable("word"))

        table = orange.ExampleTable(domain)
        for word, label in self.data:
            ex = orange.Example(domain)
            ex["class"] = label
            ex["word"] = word
            table.append(ex)

        return table
Beispiel #10
0
def makeDomain(names):
    attributes = [orange.FloatVariable(n) for n in names]
    domain = orange.Domain(
        attributes, orange.EnumVariable("class", values=["True", "False"]))
    domain.addmeta(orange.newmetaid(), orange.FloatVariable("weight"))

    domain.addmeta(orange.newmetaid(),
                   orange.EnumVariable("isInsane", values=["True", "False"]))

    domain.addmeta(orange.newmetaid(), orange.StringVariable("filename"))
    domain.addmeta(orange.newmetaid(),
                   orange.StringVariable("sourceEngineName"))
    domain.addmeta(orange.newmetaid(), orange.StringVariable("engineName"))
    domain.addmeta(orange.newmetaid(), orange.StringVariable("landmarkName"))
    domain.addmeta(orange.newmetaid(), orange.PythonVariable("geometry"))
    domain.addmeta(orange.newmetaid(), orange.PythonVariable("track"))
    domain.addmeta(orange.newmetaid(), orange.PythonVariable("drawMap"))
    domain.addmeta(orange.newmetaid(), orange.PythonVariable("description"))
    domain.addmeta(orange.newmetaid(), orange.PythonVariable("farAway"))

    return domain
  def __loadDataFromES(self, dataType, domain):
    table = None
    if dataType != "train":
      table = orange.ExampleTable(domain)
    else:
      attributes = map(self.__getOrangeVariableForFeature, self.features)
      classAttribute = orange.EnumVariable("is_good", values = ["0", "1"])
      domain = orange.Domain(attributes, classAttribute)
      domain.addmeta(orange.newmetaid(), orange.StringVariable("phrase"))
      table = orange.ExampleTable(domain)
    phrases = []
    if dataType == "train":
      phrasesCount = self.esClient.count(index=self.processorIndex, doc_type=self.processorPhraseType, body={"query":{"terms":{"is_training":["1","0"]}}})
      size = phrasesCount["count"]
      phrases = self.esClient.search(index=self.processorIndex, doc_type=self.processorPhraseType, body={"query":{"terms":{"is_training":["1","0"]}}}, size=size)
      phrases = phrases["hits"]["hits"]
    elif dataType == "holdout":
      phraseCount = self.esClient.count(index=self.processorIndex, doc_type=self.processorPhraseType, body={"query":{"terms":{"is_holdout":["1","0"]}}})
      size = phrasesCount["count"]
      phrases = self.esClient.search(index=self.processorIndex, doc_type=self.processorPhraseType, body={"query":{"terms":{"is_holdout":["1","0"]}}}, size=size)
      phrases = phrases["hits"]["hits"]
    else:
      self.phraseData = self.esClient.get(index=self.processorIndex, doc_type=self.processorPhraseType, id=self.phraseId)
      phrases = [self.phraseData]

    for row in phrases:
      try:
        row = row["_source"]
        featureValues = []
        classType = "?"
        for feature in self.features:
          featureValues.append(row["features"][feature["name"]].encode("ascii"))
        if dataType == "train":
          classType = row["is_training"].encode("ascii", "ignore")
        elif dataType == "holdout":
          classType = row["is_holdout"].encode("ascii")
        example = None
        for i,featureValue in enumerate(featureValues):
          attr = domain.attributes[i]
          if type(attr) is orange.EnumVariable: 
            attr.addValue(featureValue)
        example = orange.Example(domain, (featureValues + [classType]))
        example[domain.getmetas().items()[0][0]] = row["phrase"].encode("ascii")
        table.append(example)
      except:
        self.logger.error("Error classifying phrase '" + row["phrase"] + "'")
    return table
Beispiel #12
0
    def convert_table(self, table_name, cls_att=None):
        '''
        Returns the target table as an orange example table.
        '''
        import orange

        cols = self.db.cols[table_name]
        attributes, metas, class_var = [], [], None
        for col in cols:
            att_type = self.orng_type(table_name, col)
            if att_type == 'd':
                att_vals = self.db.col_vals[table_name][col]
                att_var = orange.EnumVariable(
                    str(col), values=[str(val) for val in att_vals])
            elif att_type == 'c':
                att_var = orange.FloatVariable(str(col))
            else:
                att_var = orange.StringVariable(str(col))
            if col == cls_att:
                if att_type == 'string':
                    raise Exception(
                        'Unsuitable data type for a target variable: %s' %
                        att_type)
                class_var = att_var
                continue
            elif att_type == 'string' or table_name in self.db.pkeys and col in self.db.pkeys[
                    table_name] or table_name in self.db.fkeys and col in self.db.fkeys[
                        table_name]:
                metas.append(att_var)
            else:
                attributes.append(att_var)
        domain = orange.Domain(attributes, class_var)
        for meta in metas:
            domain.addmeta(orange.newmetaid(), meta)
        dataset = orange.ExampleTable(domain)
        dataset.name = table_name
        for row in self.db.rows(table_name, cols):
            example = orange.Example(domain)
            for col, val in zip(cols, row):
                example[str(col)] = str(val) if val != None else '?'
            dataset.append(example)
        return dataset
Beispiel #13
0
    def __init__(self, training):
        self.training = training
        self.ancestor_to_count = training.ancestor_map()

        self.all_ancestors = list(self.ancestor_to_count.keys())
        self.all_ancestors.sort(key=lambda a: self.ancestor_to_count[a],
                                reverse=True)

        self.used_ancestors = self.all_ancestors
        print "name", self.used_ancestors[0].name
        self.attributes = [
            orange.EnumVariable(a.name, values=["True", "False"])
            for a in self.used_ancestors
        ]
        #self.attributes = [orange.FloatVariable(a.name)
        #                   for a in self.used_ancestors]
        print "got", len(self.used_ancestors), "features"
        self.domain = orange.Domain(self.attributes, training.orange_class_var)
        self.domain.addmeta(orange.newmetaid(), orange.StringVariable("word"))
        table = self.makeTable(self.training)
        self.classifier = orngEnsemble.RandomForestLearner()(table)
Beispiel #14
0
 def test_pickle(self):
     import pickle
     d = orange.ExampleTable("iris")
     e = d[0]
     self.assertRaises(pickle.PicklingError, pickle.dumps, e)
     e = orange.Example(e)
     s = pickle.dumps(e)
     e2 = pickle.loads(s)
     self.assertEqual(e, e)
     self.assertEqual(e, e2)
     
     id = orange.newmetaid()
     e[id] = 33
     d.domain.addmeta(id, orange.ContinuousVariable("x"))
     id2 = orange.newmetaid()
     d.domain.addmeta(id2, orange.StringVariable("y"))
     e[id2] = "foo"
     s = pickle.dumps(e)
     e2 = pickle.loads(s)
     self.assertEqual(e, e2)
     self.assertEqual(e2[id], 33)
     self.assertEqual(e2[id2], "foo")
Beispiel #15
0
 def generateETStruct(path, medaData, numGenes=None):
     ddbList = Dicty.DAnnotation.getDDBList()
     if not os.path.exists(path):
         os.mkdir(path)
     medaData = Dicty.DData.DData_Nancy()
     for st in medaData.strains:
         pathSt = path + "\\" + st
         if not os.path.exists(pathSt):
             os.mkdir(pathSt)
         for rep in medaData.strain2replicaList(st):
             ma2d = medaData.getRaw2d(rep)
             et = Meda.Preproc.ma2orng(
                 ma2d,
                 Meda.Preproc.getTcDomain(ma2d.shape[1], False, [], None))
             et.domain.addmeta(orange.newmetaid(),
                               orange.StringVariable("DDB"))
             for eIdx, e in enumerate(et):
                 e["DDB"] = ddbList[eIdx]
             if numGenes:
                 orange.saveTabDelimited(pathSt + "\\" + rep + ".tab",
                                         orange.ExampleTable(et[:numGenes]))
             else:
                 orange.saveTabDelimited(pathSt + "\\" + rep + ".tab", et)
Beispiel #16
0
data.domain["[C]([C]=[C])"].name = "Measure"
domain = orange.Domain([
    attr for attr in data.domain.attributes
    if attr.name not in ["activity", "Measure"]
], data.domain["Measure"])
data = orange.ExampleTable(domain, data)
random.seed(6)
for ex in data:
    ex["Measure"] = ex["Measure"] + random.random()
data.save(DataDesc + "_No_metas_FullNumeric_Train.tab")

#================ Create small test set =================
dataFile = "BinClass_No_metas_SmallTest.tab"
os.system("head -n 33 BinClass_No_metas_Test.tab > " + dataFile)
data = dataUtilities.DataTable(dataFile)
var = orange.StringVariable("Comments")
data.domain.addmeta(-1, var)
idxs = [2, 5, 12, 25]
for idx, ex in enumerate(data):
    if idx in idxs:
        ex["Comments"] = "notok"
    else:
        ex["Comments"] = "ok"
data.save("BinClass_W_metas_SmallTest.tab")
#================ Test data for BAD thinks :) =================
fileH = open(dataFile, "r")
lines = fileH.readlines()
fileH.close()
#-----------------
saveFile = "BinClass_BadVarType.tab"
fileH = open(saveFile, "w")
Beispiel #17
0
def getRdkDescResult(data,descList, radius = 1):
    """ Calculates the descriptors for the descList using RDK
        It expects an attribute containing smiles with a name defined in AZOrangeConfig.SMILESNAMES
        It returns a dataset with the same smiles input variable, and as many variables as the descriptors 
       returned by the toolkit
    """
    if "rdk" not in toolkitsEnabled:
        return None
    FingerPrints = False
    smilesName = getSMILESAttr(data) 
    if not smilesName: return None
    
    FP_desc = []
    myDescList = [desc.replace(toolkitsDef["rdk"]["tag"],"") for desc in descList if toolkitsDef["rdk"]["tag"] in desc]
    if not myDescList: return None

    if "FingerPrints" in myDescList:
        FingerPrints = True
        myDescList.remove("FingerPrints")
    if sum(["FP_" in fp for fp in myDescList]):
        tmpDescList = []
        FingerPrints = True
        for attr in myDescList:
            if "FP_" not in attr:
                tmpDescList.append(attr)
            else:
                FP_desc.append(attr)
        myDescList = tmpDescList

    #Get fingerprints in advance
    fingerPrintsAttrs = []
    fingerPrintsRes = {}
    if FingerPrints:
        for ex in data:
            mol = str(ex[smilesName].value)
            try:
                chemMol = rdk.Chem.MolFromSmiles(mol,True)
                if not chemMol:
                    chemMol = rdk.Chem.MolFromSmiles(mol,False)
                fingerPrint = rdk.AllChem.GetMorganFingerprint(chemMol,radius)
                resDict = fingerPrint.GetNonzeroElements()
            except:
                continue
            fingerPrintsRes[mol] = {}
            for ID in resDict:
                count = resDict[ID]
                name = toolkitsDef["rdk"]["tag"]+"FP_"+str(ID)
                if name not in [x.name for x in fingerPrintsAttrs]:
                    fingerPrintsAttrs.append(orange.FloatVariable(name))
                fingerPrintsRes[mol][name] = float(count)
        #Add FP attributes even if there was no reference to it. Models will need it as FP not present, i.e. equal 0.0 !
        for fpDesc in FP_desc:
            name = toolkitsDef["rdk"]["tag"]+fpDesc
            if name not in [str(attr.name) for attr in fingerPrintsAttrs]:
                fingerPrintsAttrs.append(orange.FloatVariable(name))
    #Test attrTypes
    for ex in data:
        try:
             attrObj = []
             molStr = str(ex[smilesName].value)
             chemMol = rdk.Chem.MolFromSmiles(molStr,True)
             if not chemMol:
                chemMol = rdk.Chem.MolFromSmiles(molStr,False)
             mol = rdk.readstring("mol", rdk.Chem.MolToMolBlock(chemMol))
             moldesc = mol.calcdesc(myDescList)
             for desc in myDescList:
		 if type(moldesc[desc]) == str:
                     attrObj.append(orange.StringVariable(toolkitsDef["rdk"]["tag"] + desc))
                 else:
                     attrObj.append(orange.FloatVariable(toolkitsDef["rdk"]["tag"] + desc))

             #Process fingerprints
             if FingerPrints:
                 for desc in [fp for fp in fingerPrintsAttrs if fp.name not in attrObj]:
                     attrObj.append(desc)#orange.FloatVariable(desc.name))
             break
        except:
            continue    


    resData = orange.ExampleTable(orange.Domain([data.domain[smilesName]] + attrObj,0))     
    badCompounds = 0
    for ex in data:
        newEx = orange.Example(resData.domain)   # All attrs: ?, ?, ?, ..., ?
        newEx[smilesName] = ex[smilesName]
        molStr = str(newEx[smilesName].value)
        # OBS - add something keeping count on the number of unused smiles
        try:
             chemMol = rdk.Chem.MolFromSmiles(molStr,True)
             if not chemMol:
                chemMol = rdk.Chem.MolFromSmiles(molStr,False) 
             mol = rdk.readstring("mol", rdk.Chem.MolToMolBlock(chemMol))
             #mol = rdk.readstring("smi", molStr)
             moldesc = mol.calcdesc(myDescList)
             for desc in myDescList:
                 newEx[toolkitsDef["rdk"]["tag"]+desc] = moldesc[desc]
 
             #Process fingerprints
             if FingerPrints:
                 for desc in fingerPrintsAttrs:
                     if desc.name in fingerPrintsRes[molStr]:
                         newEx[desc.name] = fingerPrintsRes[molStr][desc.name]
                     else:
                         newEx[desc.name] = 0.0
             resData.append(newEx)
        except: 
            badCompounds += 1
    print "Compounds in original data:       ",len(data)
    print "Compounds able to calculate descs:",len(resData)
    print "Ignored Compounds:                ",badCompounds

    return resData
Beispiel #18
0
def getCinfonyDescResults(origData,descList,radius=1):
    """Calculates the cinfony descriptors on origData
       maintains the input variables and class
       Adds the Cinfony descritors 
            Returns a new Dataset"""
    if not origData or not descList: return None
    smilesName = getSMILESAttr(origData)
    if not smilesName: return None
    #Create a new domain saving original smiles and other attributes
    newDomain = orange.Domain([attr for attr in origData.domain if attr is not origData.domain.classVar] + [orange.StringVariable("origSmiles")],origData.domain.classVar)
    data = dataUtilities.DataTable(newDomain, origData)
    # Standardize SMILES
    for ex in data:
        ex["origSmiles"] = ex[smilesName].value
    #TODO: Create a method in dataUtilities to standardize the attribute smilesName in place having the attr origSmiles as ID
    if "AZutilities.extraUtilities" in sys.modules and hasattr(extraUtilities, "StandardizeSMILES"):
         # Call a method for standardizing the SMILES in Data.
         # The method is expected to change the attribute defined as smiAttr in data object
         #                                 +->Data     +-> SMILES attribuite name     +->Compound Name or attribute to act as an ID"
         extraUtilities.StandardizeSMILES(data,      smiAttr = smilesName,           cName="origSmiles") 
    results = []

    # Calculate available descriptors
    res = getObabelDescResult(data,descList)
    if res: results.append(res)
    res = getRdkDescResult(data,descList,radius)
    if res: results.append(res)
    res = getWebelDescResult(data,descList)
    if res: results.append(res)
    res = getCdkDescResult(data,descList)
    if res: results.append(res)
    # Convert any nan to a '?'
    if len(results):
        for res in results:
            for ex in res:
                for attr in ex.domain:
                    if ex[attr] != ex[attr]:   # Will fail if it is 'nan'
                        ex[attr] = '?'
    # return None if no results at all 
    if not results:
        return None
    resData = results[0]
    if len(results) > 1:
        for res in results[1:]:
            resData = dataUtilities.horizontalMerge(resData, res, smilesName, smilesName)
    data = dataUtilities.horizontalMerge(data, resData, smilesName, smilesName)
    # Revert the SMILES back to it's original state
    for ex in data:
        ex[smilesName] = ex["origSmiles"]
    #Remove the origSmiles attributes
    data = dataUtilities.DataTable(orange.Domain([attr for attr in data.domain if attr.name != "origSmiles" and attr is not data.domain.classVar],data.domain.classVar),data)
    return data
Beispiel #19
0
    def exportNetwork(self,
                      absolute_int=10,
                      positive_int=0,
                      negative_int=0,
                      best_attributes=0,
                      significant_digits=2,
                      pretty_names=1,
                      widget_coloring=1,
                      pcutoff=1):
        NA = len(self.names)

        ### SELECTION OF INTERACTIONS AND ATTRIBUTES ###

        # prevent crashes
        best_attributes = min(best_attributes, len(self.attlist))
        positive_int = min(positive_int, len(self.list))
        absolute_int = min(absolute_int, len(self.list))
        negative_int = min(negative_int, len(self.list))

        # select the top interactions
        ins = []
        if positive_int > 0:
            ins += self.list[-positive_int:]
        ins += self.list[:negative_int]
        if absolute_int > 0:
            ins += self.abslist[-absolute_int:]

        # pick best few attributes
        atts = []
        if best_attributes > 0:
            atts += [i for (x, i) in self.attlist[-best_attributes:]]

        # disregard the insignificant attributes, interactions
        if len(self.plist) > 0 and pcutoff < 1:
            # attributes
            oats = atts
            atts = []
            for i in oats:
                if self.plut[(i, -1)] < pcutoff:
                    atts.append(i)
            # interactions
            oins = ins
            ins = []
            for y in oins:
                (ig, i, j) = y[1]
                if self.plut[(i, j, -1)] < pcutoff:
                    ins.append(y)

        ints = []
        max_igain = -1e6
        min_gain = 1e6  # lowest information gain of involved attributes
        # remove duplicates and sorting keys
        for (x, v) in ins:
            if v not in ints:
                ints.append(v)
                # add to attribute list
                (ig, i, j) = v
                max_igain = max(abs(ig), max_igain)
                for x in [i, j]:
                    if x not in atts:
                        atts.append(x)
                        min_gain = min(min_gain, self.gains[x])

        # fill-in the attribute list with all possibly more important attributes
        ## todo

        ### NODE DRAWING ###
        map = {}
        graph = Orange.core.Network(len(atts), 0)
        table = []

        for i in range(len(atts)):
            map[atts[i]] = i

            ndx = atts[i]
            t = '%s' % self.names[ndx]
            if pretty_names:
                t = string.replace(t, "ED_", "")
                t = string.replace(t, "D_", "")
                t = string.replace(t, "M_", "")
                t = string.replace(t, " ", "\\n")
                t = string.replace(t, "-", "\\n")
                t = string.replace(t, "_", "\\n")
                r = self.gains[ndx] * 100.0 / self.entropy
                table.append([i + 1, t, r])

        d = orange.Domain([
            orange.FloatVariable('index'),
            orange.StringVariable('label'),
            orange.FloatVariable('norm. gain')
        ])
        data = orange.ExampleTable(d, table)
        graph.items = data

        table = []
        for (ig, i, j) in ints:
            j = map[j]
            i = map[i]

            perc = int(
                abs(ig) * 100.0 / max(max_igain, self.attlist[-1][0]) + 0.5)
            graph[i, j] = perc / 30 + 1

            if self.entropy > 1e-6:
                mc = _nicefloat(100.0 * ig / self.entropy,
                                significant_digits) + "%"
            else:
                mc = _nicefloat(0.0, significant_digits)
            if len(self.plist) > 0 and pcutoff < 1:
                mc += "\\nP\<%.3f" % self.plut[(i, j, -1)]

            if ig > 0:
                if widget_coloring:
                    color = "green"
                else:
                    color = '"0.0 %f 0.9"' % (0.3 + 0.7 * perc / 100.0
                                              )  # adjust saturation
                dir = "both"
            else:
                if widget_coloring:
                    color = "red"
                else:
                    color = '"0.5 %f 0.9"' % (0.3 + 0.7 * perc / 100.0
                                              )  # adjust saturation
                dir = 'none'

            table.append([i, j, mc, dir, color])

        d = orange.Domain([
            orange.FloatVariable('u'),
            orange.FloatVariable('v'),
            orange.StringVariable('label'),
            orange.EnumVariable('dir', values=["both", "none"]),
            orange.EnumVariable('color', values=["green", "red"])
        ])
        data = orange.ExampleTable(d, table)
        graph.links = data

        return graph
Beispiel #20
0
    def createStatData(self, statistics):
        specialVars = [
            orange.StringVariable("Method"),
            orange.FloatVariable("Fold")
        ]
        classificationVars = [
            orange.FloatVariable("CA"),
            orange.FloatVariable("MCC"),
            orange.FloatVariable("truePOS"),
            orange.FloatVariable("trueNEG"),
            orange.FloatVariable("falsePOS"),
            orange.FloatVariable("falseNEG")
        ]

        regressionVars = [
            orange.FloatVariable("Q2"),
            orange.FloatVariable("RMSE")
        ]
        if self.isClassDiscrete:
            allVars = specialVars + classificationVars
        else:
            allVars = specialVars + regressionVars
        commVars = [
            orange.FloatVariable("nTest"),
            orange.FloatVariable("nTrain")
        ]
        allVars += commVars

        self.statistics = orange.ExampleTable(
            orange.Domain(allVars, orange.FloatVariable("Stability")))
        for ml in statistics:
            # Total row
            ex = orange.Example(self.statistics.domain)
            if ml == "selectedML":
                ex["Method"] = "Total"
            else:
                ex["Method"] = ml + " Total"
            ex["Stability"] = statistics[ml]["StabilityValue"]
            # [[TP, FN],[FP, TN]]
            if self.isClassDiscrete:
                ex["CA"] = statistics[ml]["CA"]
                ex["MCC"] = statistics[ml]["MCC"]
                ex["truePOS"] = statistics[ml]["CM"][0][0]
                ex["trueNEG"] = statistics[ml]["CM"][1][1]
                ex["falsePOS"] = statistics[ml]["CM"][1][0]
                ex["falseNEG"] = statistics[ml]["CM"][0][1]
            else:
                ex["Q2"] = statistics[ml]["Q2"]
                ex["RMSE"] = statistics[ml]["RMSE"]
            self.statistics.append(ex)
            # Fold rows
            for fold, nTest in enumerate(
                    statistics[ml]["foldStat"]["nTestCmpds"]):
                ex = orange.Example(self.statistics.domain)
                if ml == "selectedML":
                    ex["Method"] = statistics[ml]["foldStat"][
                        "foldSelectedML"][fold]
                else:
                    ex["Method"] = ml
                ex["Fold"] = fold
                ex["nTrain"] = statistics[ml]["foldStat"]["nTrainCmpds"][fold]
                ex["nTest"] = statistics[ml]["foldStat"]["nTestCmpds"][fold]
                # [[TP, FN],[FP, TN]]
                if self.isClassDiscrete:
                    ex["CA"] = statistics[ml]["foldStat"]["CA"][fold]
                    ex["MCC"] = statistics[ml]["foldStat"]["MCC"][fold]
                    ex["truePOS"] = statistics[ml]["foldStat"]["CM"][fold][0][
                        0]
                    ex["trueNEG"] = statistics[ml]["foldStat"]["CM"][fold][1][
                        1]
                    ex["falsePOS"] = statistics[ml]["foldStat"]["CM"][fold][1][
                        0]
                    ex["falseNEG"] = statistics[ml]["foldStat"]["CM"][fold][0][
                        1]
                else:
                    ex["Q2"] = statistics[ml]["foldStat"]["Q2"][fold]
                    ex["RMSE"] = statistics[ml]["foldStat"]["RMSE"][fold]
                self.statistics.append(ex)
        return self.statistics
Beispiel #21
0
 def generateGraph(self, N_changed = False):
     self.searchStringTimer.stop()
     self.attributeCombo.box.setEnabled(False)
     self.error()
     matrix = None
     self.warning('')
     
     if N_changed:
         self.netOption = 1
         
     if self.data == None:
         self.infoa.setText("No data loaded.")
         self.infob.setText("")
         return
     
     #print len(self.histogram.yData), len(self.histogram.xData)
     nEdgesEstimate = 2 * sum([self.histogram.yData[i] for i,e in enumerate(self.histogram.xData) if self.spinLowerThreshold <= e <= self.spinUpperThreshold])
     
     if nEdgesEstimate > 200000:
         self.graph = None
         nedges = 0
         n = 0
         self.error('Estimated number of edges is too high (%d).' % nEdgesEstimate)
     else:
         graph = orngNetwork.Network(self.data.dim, 0)
         matrix = self.data
         
         if hasattr(self.data, "items"):               
             if type(self.data.items) == type(orange.ExampleTable(orange.Domain(orange.StringVariable('tmp')))):
                 #graph.setattr("items", self.data.items)
                 graph.items = self.data.items
             else:
                 data = [[str(x)] for x in self.data.items]
                 items = orange.ExampleTable(orange.Domain(orange.StringVariable('label'), 0), data)
                 #graph.setattr("items", list(items))
                 graph.items = list(items)
             
         # set the threshold
         # set edges where distance is lower than threshold
               
         nedges = graph.fromDistanceMatrix(self.data, self.spinLowerThreshold, self.spinUpperThreshold, self.kNN, self.andor)
         edges = graph.getEdges()
         
         #print graph.nVertices, self.matrix.dim
         
         if self.dstWeight == 1:
             if graph.directed:
                 for u,v in edges:
                     foo = 1
                     if str(graph[u,v]) != "0":
                         foo = 1.0 - float(graph[u,v])
                     
                     graph[u,v] = foo
             else:
                 for u,v in edges:
                     if u <= v:
                         foo = 1
                         if str(graph[u,v]) != "0":
                             foo = 1.0 - float(graph[u,v])
                         
                         graph[u,v] = foo
                 
         n = len(edges)
         #print 'self.netOption',self.netOption
         # exclude unconnected
         if str(self.netOption) == '1':
             components = [x for x in graph.getConnectedComponents() if len(x) > self.excludeLimit]
             if len(components) > 0:
                 include = reduce(lambda x,y: x+y, components)
                 if len(include) > 1:
                     self.graph = orngNetwork.Network(graph.getSubGraph(include))
                     matrix = self.data.getitems(include)
                 else:
                     self.graph = None
                     matrix = None
             else:
                 self.graph = None
                 matrix = None
         # largest connected component only        
         elif str(self.netOption) == '2':
             component = graph.getConnectedComponents()[0]
             if len(component) > 1:
                 self.graph = orngNetwork.Network(graph.getSubGraph(component))
                 matrix = self.data.getitems(component)
             else:
                 self.graph = None
                 matrix = None
         # connected component with vertex by label
         elif str(self.netOption) == '3':
             self.attributeCombo.box.setEnabled(True)
             self.graph = None
             matrix = None
             #print self.attributeCombo.currentText()
             if self.attributeCombo.currentText() != '' and self.label != '':
                 components = graph.getConnectedComponents()
                     
                 txt = self.label.lower()
                 #print 'txt:',txt
                 nodes = [i for i, values in enumerate(self.data.items) if txt in str(values[str(self.attributeCombo.currentText())]).lower()]
                 #print "nodes:",nodes
                 if len(nodes) > 0:
                     vertices = []
                     for component in components:
                         for node in nodes:
                             if node in component:
                                 if len(component) > 0:
                                     vertices.extend(component)
                                     
                     if len(vertices) > 0:
                         #print "n vertices:", len(vertices), "n set vertices:", len(set(vertices))
                         vertices = list(set(vertices))
                         self.graph = orngNetwork.Network(graph.getSubGraph(vertices))
                         matrix = self.data.getitems(vertices)
         else:
             self.graph = graph
             
             
     self.pconnected = nedges
     self.nedges = n
     self.infoa.setText("%d vertices" % self.data.dim)
     self.infob.setText("%d connected (%3.1f%%)" % (nedges, nedges / float(self.data.dim) * 100))
     self.infoc.setText("%d edges (%d average)" % (n, n / float(self.data.dim)))
     
     #print 'self.graph:',self.graph+
     if self.graph != None:
         #setattr(matrix, "items", self.graph.items)
         matrix.items = self.graph.items
     
     self.send("Network", self.graph)
     
     if matrix:
         self.send("Distance Matrix", matrix)
         
     if self.graph == None:
          self.send("Examples", None)
     else:
         self.send("Examples", self.graph.items)
     
     self.histogram.setBoundary(self.spinLowerThreshold, self.spinUpperThreshold)
Beispiel #22
0
 def getSmilesData(self, smiles):
     # Create an Orange ExampleTable with a smiles attribute
     smilesAttr = orange.StringVariable("SMILEStoPred")
     myDomain = orange.Domain([smilesAttr], 0)
     self.smilesData = dataUtilities.DataTable(myDomain, [[smiles]])
Beispiel #23
0
for ex in fcont(data):
    print ex

fcont[0] = (orange.ValueFilter.Between, 4.6, 5.0)
print "\n\nThe first attribute is between to 4.5 and 5.0"
for ex in fcont(data):
    print ex

fcont[0] = (orange.ValueFilter.Outside, 4.6, 7.5)
print "\n\nThe first attribute is between to 4.5 and 5.0"
for ex in fcont(data):
    print ex

############ THIS IS WHAT YOU CAN DO WITH STRING ATTRIBUTES

data.domain.addmeta(orange.newmetaid(), orange.StringVariable("name"))
for ex in data:
    ex["name"] = str(ex.getclass())

fstr = orange.Filter_values(domain=data.domain)
fstr["name"] = "Iris-setosa"
print "\n\nSetosae"
d = fstr(data)
print "%i examples, starting with %s" % (len(d), d[0])

fstr["name"] = ["Iris-setosa", "Iris-virginica"]
print "\n\nSetosae and virginicae"
d = fstr(data)
print "%i examples, starting with %s\n  finishing with %s" % (len(d), d[0],
                                                              d[-1])
Beispiel #24
0
def parse(DOM):
    """ Parse the graph DOM as returned from geneMANIA server and return
    an :class:`Orange.network.Graph` instance.
    
    """
    nodes = DOM.getElementsByTagName("node")
    edges = DOM.getElementsByTagName("edge")
    from collections import defaultdict
    graphNodes = {}
    graphEdges = defaultdict(list)

    def parseAttributes(element):
        return dict([(key, value)
                     for key, value in element.attributes.items()])

    def parseText(element):
        text = u""
        for el in element.childNodes:
            if isinstance(el, minidom.Text):
                text += el.wholeText
        return text

    def parseData(node):
        data = node.getElementsByTagName("data")
        parsed = {}
        for el in data:
            attrs = parseAttributes(el)
            key = attrs["key"]
            parsed[key] = parseText(el)
        return parsed

    for node in nodes:
        attrs = parseAttributes(node)
        id = attrs["id"]
        data = parseData(node)
        graphNodes[id] = data

    for edge in edges:
        attrs = parseAttributes(edge)
        source, target = attrs["source"], attrs["target"]
        data = parseData(edge)
        graphEdges[source, target].append(data)

    allData = reduce(list.__add__, graphEdges.values(), [])
    edgeTypes = set([int(data["networkGroupId"]) for data in allData])
    groupId2int = dict(zip(edgeTypes, range(len(edgeTypes))))
    groupId2groupCode = dict([(int(data["networkGroupId"]),
                               str(data["networkGroupCode"]))
                              for data in allData])
    graphNode2nodeNumber = dict(zip(graphNodes, range(len(graphNodes))))

    import Orange
    graph = Orange.network.Graph()
    for id, data in graphNodes.items():
        graph.add_node(graphNode2nodeNumber[id],
                       original_id=str(id),
                       symbol=data["symbol"],
                       score=float(data["score"]))

    graph.add_nodes_from(sorted(graphNode2nodeNumber.values()))

    edgeWeights = []
    for (source, target), edge_data in graphEdges.items():
        edgesDefined = [None] * len(edgeTypes)
        for data in edge_data:
            networkGroupId = int(data["networkGroupId"])
            edgeInd = groupId2int[networkGroupId]
            edgesDefined[edgeInd] = float(data["weight"])
            graph.add_edge(graphNode2nodeNumber[source],
                           graphNode2nodeNumber[target],
                           weight=float(data["weight"]),
                           networkGroupId=networkGroupId)

        edgesDefined = [0 if w is None else w for w in edgesDefined]
        edgeWeights.append(edgesDefined)

    nodedomain = orange.Domain([
        orange.StringVariable("label"),
        orange.StringVariable("id"),
        orange.FloatVariable("score"),
        orange.StringVariable("symbol"),
        orange.StringVariable("go"),
        orange.EnumVariable("source", values=["true", "false"])
    ], None)

    edgedomain = orange.Domain([orange.FloatVariable("u"),
                                orange.FloatVariable("v")] +\
                               [orange.FloatVariable("weight_%s" % groupId2groupCode[id]) for id in edgeTypes],
                               None)

    node_items = graphNodes.items()
    node_items = sorted(node_items, key=lambda t: graphNode2nodeNumber[t[0]])

    nodeitems = orange.ExampleTable(nodedomain,
                  [[str(node["symbol"]), str(id), float(node["score"]),
                    str(node["symbol"]), str(node["go"]), str(node["source"])]\
                     for id, node in node_items])

    edgeitems = orange.ExampleTable(edgedomain,
                  [[str(graphNode2nodeNumber[source] + 1),
                    str(graphNode2nodeNumber[target] + 1)] + weights \
                   for ((source, target), _), weights in zip(graphEdges.items(), edgeWeights)])

    graph.set_items(nodeitems)
    graph.set_links(edgeitems)

    return graph
Beispiel #25
0
    def commit(self):
        if not self.data or not self.scores:
            return
        test = self.score_methods[self.method_index][2]

        cutOffUpper = self.histogram.upperBoundary
        cutOffLower = self.histogram.lowerBoundary

        scores = np.array(self.scores.items())
        scores[:, 1] = test(np.array(scores[:, 1], dtype=float), cutOffLower,
                            cutOffUpper)
        selected = set([key for key, test in scores if test])
        remaining = set([key for key, test in scores if not test])
        if self.data and self.genes_in_columns:
            selected = sorted(selected)
            if selected:
                newdata = orange.ExampleTable(orange.Domain(
                    self.data.domain), [self.data[int(i)] for i in selected],
                                              name=self.data.name)
            else:
                newdata = None
            if self.add_scores_to_output:
                score_attr = orange.FloatVariable(
                    self.score_methods[self.method_index][0])
                mid = orange.newmetaid()

            if self.add_scores_to_output and newdata is not None:
                newdata.domain.addmeta(mid, score_attr)
                for ex, key in zip(newdata, selected):
                    ex[mid] = self.scores[key]

            self.send("Example table with selected genes", newdata)

            remaining = sorted(remaining)
            if remaining:
                newdata = orange.ExampleTable(orange.Domain(
                    self.data.domain), [self.data[int(i)] for i in remaining],
                                              name=self.data.name)
            else:
                newdata = None

            if self.add_scores_to_output and newdata is not None:
                newdata.domain.addmeta(mid, score_attr)
                for ex, key in zip(newdata, remaining):
                    ex[mid] = self.scores[key]

            self.send("Example table with remaining genes", newdata)

        elif self.data and not self.genes_in_columns:
            method_name = self.score_methods[self.method_index][0]
            selected_attrs = [
                attr for attr in self.data.domain.attributes
                if attr in selected or attr.varType == orange.VarTypes.String
            ]  # ?? why strings
            if self.add_scores_to_output:
                scores = [self.scores[attr] for attr in selected_attrs]
                attrs = [copy_descriptor(attr) for attr in selected_attrs]
                for attr, score in zip(attrs, scores):
                    attr.attributes[method_name] = str(score)
                selected_attrs = attrs

            newdomain = orange.Domain(selected_attrs,
                                      self.data.domain.classVar)
            newdomain.addmetas(self.data.domain.getmetas())
            newdata = orange.ExampleTable(newdomain,
                                          self.data,
                                          name=self.data.name)
            self.send("Example table with selected genes",
                      newdata if selected_attrs else None)

            remaining_attrs = [
                attr for attr in self.data.domain.attributes
                if attr in remaining
            ]
            if self.add_scores_to_output:
                scores = [self.scores[attr] for attr in remaining_attrs]
                attrs = [copy_descriptor(attr) for attr in remaining_attrs]
                for attr, score in zip(attrs, scores):
                    attr.attributes[method_name] = str(scores)
                remaining_attrs = attrs

            newdomain = orange.Domain(remaining_attrs,
                                      self.data.domain.classVar)
            newdomain.addmetas(self.data.domain.getmetas())
            newdata = orange.ExampleTable(newdomain,
                                          self.data,
                                          name=self.data.name)
            self.send("Example table with remaining genes",
                      newdata if remaining_attrs else None)

            domain = orange.Domain([
                orange.StringVariable("label"),
                orange.FloatVariable(self.score_methods[self.method_index][0])
            ], False)
            if selected_attrs:
                selected_genes = orange.ExampleTable(
                    domain, [[attr.name, self.scores.get(attr, 0)]
                             for attr in selected_attrs])
            else:
                selected_genes = None
            self.send("Selected genes", selected_genes)

        else:
            self.send("Example table with selected genes", None)
            self.send("Example table with remaining genes", None)
            self.send("Selected genes", None)
        self.data_changed_flag = False
Beispiel #26
0
def __make_rule_gene_example_table(tableDict, genes):
    import orange
    import constants as const
    # attributes are rules (all conjuncts of a rule form the name of the attribute)
    #attrList = [orange.EnumVariable(name=ruleString[1:-1].replace(' ', '_'), values=[PRESENT, ABSENT])
    #            for ruleString in tableDict.keys()]

    attrList = [
        orange.EnumVariable(name=str(gene),
                            values=[const.PRESENT, const.ABSENT])
        for gene in genes
    ]

    # three meta attributes
    ruleName = orange.StringVariable(const.NAME_ATTR)
    mid = orange.newmetaid()
    ruleTerms = orange.StringVariable(const.TERMS_ATTR)
    mid1 = orange.newmetaid()
    #ruleNumber = orange.EnumVariable(SEQ_NUM_ATTR) #StringVariable(SEQ_NUM_ATTR)
    ruleNumber = orange.FloatVariable(const.SEQ_NUM_ATTR,
                                      startValue=1,
                                      endValue=len(tableDict),
                                      stepValue=1,
                                      numberOfDecimals=0)
    mid2 = orange.newmetaid()

    # this is a classless domain
    domain = orange.Domain(attrList, False)

    # name of the rule is a meta attribute
    domain.addmeta(mid, ruleName, False)
    domain.addmeta(mid1, ruleTerms, False)
    domain.addmeta(mid2, ruleNumber, False)

    table = orange.ExampleTable(domain)

    for k in sorted(tableDict.keys()):
        exampleValues = []
        for (i, gene) in enumerate(genes):
            #if gene in tableDict[k][GENES_KEY]:
            if gene in tableDict[k][const.TOP_GENES_KEY]:
                #exampleValues.append(PRESENT)
                exampleValues.append(orange.Value(attrList[i], const.PRESENT))
            else:
                exampleValues.append(orange.Value(attrList[i], const.ABSENT))
                #exampleValues.append(ABSENT)
        example = orange.Example(domain, exampleValues)
        example[const.NAME_ATTR] = tableDict[k][const.RULENAME_KEY][
            1:-1]  #skip square brackets from the string
        example[const.TERMS_ATTR] = tableDict[k][const.RULETERMS_STR_KEY][1:-1]
        example[const.SEQ_NUM_ATTR] = k

        example[const.NAME_ATTR] = orange.Value(ruleName, tableDict[k][
            const.RULENAME_KEY][1:-1])  #skip square brackets from the string
        example[const.TERMS_ATTR] = orange.Value(
            ruleTerms, tableDict[k][const.RULETERMS_STR_KEY][1:-1])
        example[const.SEQ_NUM_ATTR] = orange.Value(ruleNumber, k)

        table.append(example)
    #end
    return table
Beispiel #27
0
import os
import os.path
import glob

import orange
import orngNetwork

atts = []
atts.append(orange.StringVariable("Network Name"))
atts.append(orange.StringVariable("Network File"))
atts.append(orange.StringVariable("dir"))
atts.append(orange.StringVariable("Item Set"))
atts.append(orange.StringVariable("Edge Set"))
atts.append(orange.FloatVariable("Vertices"))
atts[-1].numberOfDecimals = 0
atts.append(orange.FloatVariable("Edges"))
atts[-1].numberOfDecimals = 0
atts.append(orange.StringVariable("Date"))
atts.append(orange.StringVariable("Description"))

netlist = orange.ExampleTable(orange.Domain(atts, False))

for netFile in glob.glob(os.path.join(os.getcwd(), '*.net')):
    net = orngNetwork.Network.read(netFile)
    name, ext = os.path.splitext(netFile)

    itemFile = ""
    if os.path.exists(name + '_items.tab'):
        itemFile = name + '_items.tab'
    elif os.path.exists(name + '.tab'):
        itemFile = name + '.tab'
Beispiel #28
0
    def test_meta_direct(self):
        e = orange.ExampleTableReader("iris.tab", 3).read()[0]
        d = e.domain
        e[0] = 3.14
        mid1 = orange.newmetaid()
        e[mid1] = 2.79
        self.assertEqual(e[0], 3.14)
        self.assertEqual(e[mid1], 2.79)

        mid2 = orange.newmetaid()
        nf = orange.ContinuousVariable("m2")
        d.addmeta(mid2, nf)
        e[nf] = 6.28
        self.assertEqual(e[nf], 6.28)
        self.assertEqual(e[mid2], 6.28)
        self.assertEqual(e["m2"], 6.28)

        mid3 = orange.newmetaid()
        sf = orange.StringVariable("m3")
        d.addmeta(mid3, sf)
        e["m3"] = "pixies"
        self.assertEqual(e[sf], "pixies")
        self.assertEqual(e[mid3], "pixies")
        self.assertEqual(e["m3"], "pixies")

        self.assertEqual(set(e.get_metas().values()), set([2.79, 6.28, "pixies"]))
        
        del e[mid1]
        with self.assertRaises(KeyError):
            e[mid1]
        self.assertEqual(set(e.get_metas().values()), set([6.28, "pixies"]))
        
        del e[mid3]
        with self.assertRaises(KeyError):
            e[sf]
        self.assertEqual(set(e.get_metas().values()), set([6.28]))

        del e[mid2]
        with self.assertRaises(KeyError):
            e["m2"]
        self.assertEqual(set(e.get_metas().values()), set([]))

        with self.assertRaises(TypeError):
            e[mid1] = "3.15"
        with self.assertRaises(TypeError):
            e[mid3] = 3.15

        with self.assertRaises(TypeError):
            e.set_meta(mid1, "3.15")
        with self.assertRaises(TypeError):
            e.set_meta(mid3 = 3.15)

        e.set_meta(mid1, 3.15)
        e.set_meta("m2", 3.16)
        e.set_meta(sf, "3.17")

        self.assertTrue(e.has_meta(mid1))
        self.assertTrue(e.has_meta(nf))
        self.assertTrue(e.has_meta(sf))

        e.remove_meta(sf)
        self.assertTrue(e.has_meta(mid1))
        self.assertTrue(e.has_meta(nf))
        self.assertFalse(e.has_meta(sf))

        e.remove_meta(mid1)
        self.assertFalse(e.has_meta(mid1))
        self.assertTrue(e.has_meta(nf))
        self.assertFalse(e.has_meta(sf))

        e.remove_meta("m2")        
        self.assertFalse(e.has_meta(mid1))
        self.assertFalse(e.has_meta(nf))
        self.assertFalse(e.has_meta(sf))
Beispiel #29
0
 def getVariable(self):
     return orange.StringVariable(str(self.text()))
Beispiel #30
0
def getSimDescriptors(InActives,
                      InData,
                      methods,
                      active_ids=None,
                      pharmacophore_file=None,
                      callBack=None):
    """ calculates similarity descriptors for a training set (orange object) using the 
                given similarity methods against the given actives
                Possible method strings in methods are the names of the sim_* methods below,
                e.g. rdk_topo_fps for sim_rdk_topo_fps
            callBack function, if defined, will be called on each step sending the pergentage done (0-100): 
                   e.g. callBack(25)
                the callBack function shall return True of False which will indicate to this method if the process it to be continued or Not.
                   e.g. if callBack(25) == False it indicates the caller want's to stop the process of calculating descriptors                 
        """
    # Pre-process input Data tto standardize the SMILES
    SMILESattr = getSMILESAttr(InData)

    if not SMILESattr:
        return None

    #TODO: Create a method in dataUtilities to standardize the attribute smilesName in place having the attr origSmiles as ID
    if "AZutilities.extraUtilities" in sys.modules and hasattr(
            extraUtilities, "StandardizeSMILES"):
        # Call a method for standardizing the SMILES in Data.
        # The method is expected to change the attribute defined as smiAttr in data object
        cleanedData = True
        # Process InData
        tmpDomain = orange.Domain([orange.StringVariable("OrigSMI_ID")] +
                                  [attr for attr in InData.domain])
        data = orange.ExampleTable(tmpDomain, InData)
        #    Fill the OrigSMI_ID
        for ex in data:
            ex["OrigSMI_ID"] = ex[SMILESattr]
        extraUtilities.StandardizeSMILES(data,
                                         smiAttr=SMILESattr,
                                         cName="OrigSMI_ID")
        # Process  Input actives
        activesDomain = orange.Domain([
            orange.StringVariable("OrigSMI_ID"),
            orange.StringVariable("SMILES")
        ], 0)
        activesData = orange.ExampleTable(activesDomain)
        for act in InActives:
            activesData.append([act, act])
        extraUtilities.StandardizeSMILES(activesData,
                                         smiAttr="SMILES",
                                         cName="OrigSMI_ID")
        #print activesData.domain
        actives = []
        for ex in activesData:
            actives.append(str(ex["SMILES"].value))
    else:
        data = InData
        actives = InActives
        cleanedData = False

    # adjust the header
    atts = []
    for m in methods:
        count = 1
        for a in actives:
            attname = m + '(active_' + str(count) + ')'
            atts.append(orange.FloatVariable(attname))
            count += 1

    newdomain = orange.Domain(data.domain.attributes + atts,
                              data.domain.classVar)
    newdata = orange.ExampleTable(newdomain, data)

    att_idx = 0
    # if callBack is defined, it will be called with the percentage done, i.e. 0-100
    if active_ids:
        nTotalSteps = len(newdata) * (
            (len(methods) - 1) * len(actives) + len(active_ids))
    else:
        nTotalSteps = len(methods) * len(actives) * len(newdata)
    stepsDone = 0

    # fill up the data
    for m in methods:
        if m == 'rdk_topo_fps':
            count = 1
            for a in actives:
                attname = m + '(active_' + str(count) + ')'
                for j in range(len(newdata)):
                    instance = newdata[j]
                    tmp = orange.Value(atts[att_idx],
                                       orng_sim_rdk_topo_fps(a, instance))
                    instance[atts[att_idx]] = tmp
                    if callBack:
                        stepsDone += 1
                        if not callBack((100 * stepsDone) / nTotalSteps):
                            return None
                att_idx += 1

        elif m == 'rdk_MACCS_keys':
            count = 1
            for a in actives:
                attname = m + '(active_' + str(count) + ')'
                for j in range(len(newdata)):
                    instance = newdata[j]
                    tmp = orange.Value(atts[att_idx],
                                       orng_sim_rdk_MACCS_keys(a, instance))
                    instance[atts[att_idx]] = tmp
                    if callBack:
                        stepsDone += 1
                        if not callBack((100 * stepsDone) / nTotalSteps):
                            return None

                att_idx += 1

        elif m == 'rdk_morgan_fps':
            count = 1
            for a in actives:
                attname = m + '(active_' + str(count) + ')'
                for j in range(len(newdata)):
                    instance = newdata[j]
                    tmp = orange.Value(atts[att_idx],
                                       orng_sim_rdk_morgan_fps(a, instance))
                    instance[atts[att_idx]] = tmp
                    if callBack:
                        stepsDone += 1
                        if not callBack((100 * stepsDone) / nTotalSteps):
                            return None

                att_idx += 1

        elif m == 'rdk_morgan_features_fps':
            count = 1
            for a in actives:
                attname = m + '(active_' + str(count) + ')'
                for j in range(len(newdata)):
                    instance = newdata[j]
                    tmp = orange.Value(
                        atts[att_idx],
                        orng_sim_rdk_morgan_features_fps(a, instance))
                    instance[atts[att_idx]] = tmp
                    if callBack:
                        stepsDone += 1
                        if not callBack((100 * stepsDone) / nTotalSteps):
                            return None

                att_idx += 1

        elif m == 'rdk_atompair_fps':
            count = 1
            for a in actives:
                attname = m + '(active_' + str(count) + ')'
                for j in range(len(newdata)):
                    instance = newdata[j]
                    tmp = orange.Value(atts[att_idx],
                                       orng_sim_rdk_atompair_fps(a, instance))
                    instance[atts[att_idx]] = tmp
                    if callBack:
                        stepsDone += 1
                        if not callBack((100 * stepsDone) / nTotalSteps):
                            return None

                att_idx += 1

        elif m == 'azo_pharmacophore_fps':
            count = 1
            for a in active_ids:
                attname = m + '(active_' + str(count) + ')'
                for j in range(len(newdata)):
                    instance = newdata[j]
                    tmp = orange.Value(
                        atts[att_idx],
                        azo_pharmacophore_az_inhouse(a, instance,
                                                     pharmacophore_file))
                    instance[atts[att_idx]] = tmp
                    if callBack:
                        stepsDone += 1
                        if not callBack((100 * stepsDone) / nTotalSteps):
                            return None

                att_idx += 1

    if cleanedData:
        #Remove the fixed SMILES and revert to the Original SMILES
        newdata = dataUtilities.attributeDeselectionData(newdata, [SMILESattr])
        newdata.domain["OrigSMI_ID"].name = SMILESattr
    return newdata