def get_example_table(self): import orange data = self.run(count=False, header=True) if self.format.lower() == "tsv": header, data = data.split("\n", 1) domain = orange.Domain( [orange.StringVariable(name) for name in header.split("\t")], False) data = [ line.split("\t") for line in data.split("\n") if line.strip() ] return orange.ExampleTable(domain, data) if data else None elif self.format.lower() == "fasta": domain = orange.Domain([ orange.StringVariable("id"), orange.StringVariable("sequence") ], False) # TODO: meaningful id examples = [] from StringIO import StringIO from Bio import SeqIO for seq in SeqIO.parse(StringIO(data), "fasta"): examples.append([seq.id, str(seq.seq)]) return orange.ExampleTable(domain, examples) else: raise BioMartError("Unsupported format: %" % self.format)
def to_network(self, terms=None): """ Return an Orange.network.Network instance constructed from this ontology. """ edge_types = self.edge_types() terms = self.terms() from Orange.orng import orngNetwork import orange network = orngNetwork.Network(len(terms), True, len(edge_types)) network.objects = dict([(term.id, i) for i, term in enumerate(terms)]) edges = defaultdict(set) for term in self.terms(): related = self.related_terms(term) for relType, relTerm in related: edges[(term.id, relTerm)].add(relType) edgeitems = edges.items() for (src, dst), eTypes in edgeitems: network[src, dst] = [1 if e in eTypes else 0 for e in edge_types] domain = orange.Domain([ orange.StringVariable("id"), orange.StringVariable("name"), orange.StringVariable("def"), ], False) items = orange.ExampleTable(domain) for term in terms: ex = orange.Example( domain, [term.id, term.name, term.values.get("def", [""])[0]]) items.append(ex) relationships = set( [", ".join(sorted(eTypes)) for (_, _), eTypes in edgeitems]) domain = orange.Domain([ orange.FloatVariable("u"), orange.FloatVariable("v"), orange.EnumVariable("relationship", values=list(edge_types)) ], False) id2index = dict([(term.id, i + 1) for i, term in enumerate(terms)]) links = orange.ExampleTable(domain) for (src, dst), eTypes in edgeitems: ex = orange.Example(domain, [id2index[src], id2index[dst], eTypes.pop()]) links.append(ex) network.items = items network.links = links network.optimization = None return network
def __make_rule_term_example_table(tableDict, allTerms): import orange import constants as const attrList = [ orange.EnumVariable(name=str(term), values=[const.PRESENT, const.ABSENT]) for term in allTerms ] # three meta attributes ruleName = orange.StringVariable(const.NAME_ATTR) mid = orange.newmetaid() ruleTerms = orange.StringVariable(const.TERMS_ATTR) mid1 = orange.newmetaid() #ruleNumber = orange.EnumVariable(SEQ_NUM_ATTR) #StringVariable(SEQ_NUM_ATTR) ruleNumber = orange.FloatVariable(const.SEQ_NUM_ATTR, startValue=1, endValue=len(tableDict), stepValue=1, numberOfDecimals=0) mid2 = orange.newmetaid() # this is a classless domain domain = orange.Domain(attrList, False) # name of the rule is a meta attribute domain.addmeta(mid, ruleName, False) domain.addmeta(mid1, ruleTerms, False) domain.addmeta(mid2, ruleNumber, False) table = orange.ExampleTable(domain) for k in sorted(tableDict.keys()): exampleValues = [] for (i, term) in enumerate(allTerms): if term in tableDict[k][const.RULETERMS_KEY]: #exampleValues.append(PRESENT) exampleValues.append(orange.Value(attrList[i], const.PRESENT)) else: #exampleValues.append(ABSENT) exampleValues.append(orange.Value(attrList[i], const.ABSENT)) example = orange.Example(domain, exampleValues) #example[NAME_ATTR] = tableDict[k][RULENAME_KEY][1:-1] #skip square brackets from the string #example[TERMS_ATTR] = tableDict[k][RULETERMS_STR_KEY][1:-1] #example[SEQ_NUM_ATTR] = k example[const.NAME_ATTR] = orange.Value(ruleName, tableDict[k][ const.RULENAME_KEY][1:-1]) #skip square brackets from the string example[const.TERMS_ATTR] = orange.Value( ruleTerms, tableDict[k][const.RULETERMS_STR_KEY][1:-1]) example[const.SEQ_NUM_ATTR] = orange.Value(ruleNumber, k) table.append(example) #end return table
def expandToFuzzyExamples(self, examples, att, a, b): """ Function will return new 'fuzzy' example table. Every example from the input table will get two additional meta attributes ('fuzzy set' and 'u') \ based on 'a' and 'b' threshold (lower and higher) and attribute 'att'. Attribute 'fuzzy set' indicates name of the fuzzy set while atribute 'u' \ reflects example's degree of membership to particular fuzzy set. Note that input examples with values of 'att' lying on the (a,b) will be expanded \ into two fuzzy examples. """ mu = orange.FloatVariable("u") mv = orange.StringVariable("fuzzy set") examples.domain.addmeta(FUZZYMETAID, mu) examples.domain.addmeta(FUZZYMETAID - 1, mv) newexamples = [] for j in range(0, len(examples)): i = examples[j] v = float(i[att]) if v > a and v < b: # we have to expand this example newexamples.append(i) i["fuzzy set"] = 'yes' i["u"] = (v - a) / (b - a) examples.append(i) examples[-1]["fuzzy set"] = "no" examples[-1]["u"] = (b - v) / (b - a) else: if v > a: # u(yes) = 1.0 i["fuzzy set"] = 'yes' i["u"] = 1.0 else: # u(no) = 1.0 i["fuzzy set"] = 'no' i["u"] = 1.0 return examples
def parsePubMed(self, filename, attributes=["pmid", "title", "abstract", "mesh"], skipExamplesWithout=["mesh"]): """ Function parsePubMed can be used to parse (into Orange example table) PubMed search results (in XML). """ parser = make_parser() handler = pubMedHandler() parser.setContentHandler(handler) parser.parse(open(filename)) atts = [] for i in attributes: atts.append(orange.StringVariable(i)) domain = orange.Domain(atts, 0) data = orange.ExampleTable(domain) print data.domain.attributes mapping = {"pmid": 0, "title": 1, "abstract": 2, "mesh": 3, "affilation": 4} for i in handler.articles: r = [] skip = False for f in attributes: if skipExamplesWithout.count(f) > 0: if (f == "mesh" and len(i[mapping[f]]) == 0) or str(i[mapping[f]]) == "": skip = True r.append(str(i[mapping[f]])) if not skip: data.append(r) return data
def relabel(self): #print 'relabel' self.error() matrix = self.matrix if matrix is not None and self.data is not None: if self.takeAttributeNames: domain = self.data.domain if matrix.dim == len(domain.attributes): matrix.setattr("items", domain.attributes) elif matrix.dim == len(domain.variables): matrix.setattr("items", domain.variables) else: self.error("The number of attributes doesn't match the matrix dimension") else: if matrix.dim == len(self.data): matrix.setattr("items", self.data) else: self.error("The number of examples doesn't match the matrix dimension") elif matrix and self.labels: lbl = orange.StringVariable('label') self.data = orange.ExampleTable(orange.Domain([lbl]), [[str(l)] for l in self.labels]) for e, label in zip(self.data, self.labels): e.name = label matrix.setattr("items", self.data) if self.data == None and self.labels == None: matrix.setattr("items", [str(i) for i in range(matrix.dim)]) self.matrix.matrixType = orange.SymMatrix.Symmetric self.send("Distances", self.matrix)
def sendList(self, selectedInd): if self.data and type(self.data[0]) == str: xAttr=orange.FloatVariable("X") yAttr=orange.FloatVariable("Y") nameAttr= orange.StringVariable("name") if self.selectionOptions == 1: domain = orange.Domain([xAttr, yAttr, nameAttr]) selection = orange.ExampleTable(domain) for i in range(len(selectedInd)): selection.append(list(self.mds.points[selectedInd[i]]) + [self.data[i]]) else: domain = orange.Domain([nameAttr]) if self.selectionOptions: domain.addmeta(orange.newmetaid(), xAttr) domain.addmeta(orange.newmetaid(), yAttr) selection = orange.ExampleTable(domain) for i in range(len(selectedInd)): selection.append([self.data[i]]) if self.selectionOptions: selection[i][xAttr]=self.mds.points[selectedInd[i]][0] selection[i][yAttr]=self.mds.points[selectedInd[i]][1] self.send("Data", selection) return if not selectedInd: self.send("Structured Data Files", None) else: datasets=[self.data[i] for i in selectedInd] names=list(set([d.dirname for d in datasets])) data=[(name, [d for d in filter(lambda a:a.strain==name, datasets)]) for name in names] self.send("Structured Data Files",data)
def getAZOdata(self, smi): """ Create an orange data set with a smiles attribute """ smilesAttr = orange.StringVariable("SMILES") smilesDomain = orange.Domain(smilesAttr, 0) smilesData = dataUtilities.DataTable(smilesDomain, [[smi]]) return smilesData
def as_orange_table(self): domain = orange.Domain([], self.orange_class_var) domain.addmeta(orange.newmetaid(), orange.StringVariable("word")) table = orange.ExampleTable(domain) for word, label in self.data: ex = orange.Example(domain) ex["class"] = label ex["word"] = word table.append(ex) return table
def makeDomain(names): attributes = [orange.FloatVariable(n) for n in names] domain = orange.Domain( attributes, orange.EnumVariable("class", values=["True", "False"])) domain.addmeta(orange.newmetaid(), orange.FloatVariable("weight")) domain.addmeta(orange.newmetaid(), orange.EnumVariable("isInsane", values=["True", "False"])) domain.addmeta(orange.newmetaid(), orange.StringVariable("filename")) domain.addmeta(orange.newmetaid(), orange.StringVariable("sourceEngineName")) domain.addmeta(orange.newmetaid(), orange.StringVariable("engineName")) domain.addmeta(orange.newmetaid(), orange.StringVariable("landmarkName")) domain.addmeta(orange.newmetaid(), orange.PythonVariable("geometry")) domain.addmeta(orange.newmetaid(), orange.PythonVariable("track")) domain.addmeta(orange.newmetaid(), orange.PythonVariable("drawMap")) domain.addmeta(orange.newmetaid(), orange.PythonVariable("description")) domain.addmeta(orange.newmetaid(), orange.PythonVariable("farAway")) return domain
def __loadDataFromES(self, dataType, domain): table = None if dataType != "train": table = orange.ExampleTable(domain) else: attributes = map(self.__getOrangeVariableForFeature, self.features) classAttribute = orange.EnumVariable("is_good", values = ["0", "1"]) domain = orange.Domain(attributes, classAttribute) domain.addmeta(orange.newmetaid(), orange.StringVariable("phrase")) table = orange.ExampleTable(domain) phrases = [] if dataType == "train": phrasesCount = self.esClient.count(index=self.processorIndex, doc_type=self.processorPhraseType, body={"query":{"terms":{"is_training":["1","0"]}}}) size = phrasesCount["count"] phrases = self.esClient.search(index=self.processorIndex, doc_type=self.processorPhraseType, body={"query":{"terms":{"is_training":["1","0"]}}}, size=size) phrases = phrases["hits"]["hits"] elif dataType == "holdout": phraseCount = self.esClient.count(index=self.processorIndex, doc_type=self.processorPhraseType, body={"query":{"terms":{"is_holdout":["1","0"]}}}) size = phrasesCount["count"] phrases = self.esClient.search(index=self.processorIndex, doc_type=self.processorPhraseType, body={"query":{"terms":{"is_holdout":["1","0"]}}}, size=size) phrases = phrases["hits"]["hits"] else: self.phraseData = self.esClient.get(index=self.processorIndex, doc_type=self.processorPhraseType, id=self.phraseId) phrases = [self.phraseData] for row in phrases: try: row = row["_source"] featureValues = [] classType = "?" for feature in self.features: featureValues.append(row["features"][feature["name"]].encode("ascii")) if dataType == "train": classType = row["is_training"].encode("ascii", "ignore") elif dataType == "holdout": classType = row["is_holdout"].encode("ascii") example = None for i,featureValue in enumerate(featureValues): attr = domain.attributes[i] if type(attr) is orange.EnumVariable: attr.addValue(featureValue) example = orange.Example(domain, (featureValues + [classType])) example[domain.getmetas().items()[0][0]] = row["phrase"].encode("ascii") table.append(example) except: self.logger.error("Error classifying phrase '" + row["phrase"] + "'") return table
def convert_table(self, table_name, cls_att=None): ''' Returns the target table as an orange example table. ''' import orange cols = self.db.cols[table_name] attributes, metas, class_var = [], [], None for col in cols: att_type = self.orng_type(table_name, col) if att_type == 'd': att_vals = self.db.col_vals[table_name][col] att_var = orange.EnumVariable( str(col), values=[str(val) for val in att_vals]) elif att_type == 'c': att_var = orange.FloatVariable(str(col)) else: att_var = orange.StringVariable(str(col)) if col == cls_att: if att_type == 'string': raise Exception( 'Unsuitable data type for a target variable: %s' % att_type) class_var = att_var continue elif att_type == 'string' or table_name in self.db.pkeys and col in self.db.pkeys[ table_name] or table_name in self.db.fkeys and col in self.db.fkeys[ table_name]: metas.append(att_var) else: attributes.append(att_var) domain = orange.Domain(attributes, class_var) for meta in metas: domain.addmeta(orange.newmetaid(), meta) dataset = orange.ExampleTable(domain) dataset.name = table_name for row in self.db.rows(table_name, cols): example = orange.Example(domain) for col, val in zip(cols, row): example[str(col)] = str(val) if val != None else '?' dataset.append(example) return dataset
def __init__(self, training): self.training = training self.ancestor_to_count = training.ancestor_map() self.all_ancestors = list(self.ancestor_to_count.keys()) self.all_ancestors.sort(key=lambda a: self.ancestor_to_count[a], reverse=True) self.used_ancestors = self.all_ancestors print "name", self.used_ancestors[0].name self.attributes = [ orange.EnumVariable(a.name, values=["True", "False"]) for a in self.used_ancestors ] #self.attributes = [orange.FloatVariable(a.name) # for a in self.used_ancestors] print "got", len(self.used_ancestors), "features" self.domain = orange.Domain(self.attributes, training.orange_class_var) self.domain.addmeta(orange.newmetaid(), orange.StringVariable("word")) table = self.makeTable(self.training) self.classifier = orngEnsemble.RandomForestLearner()(table)
def test_pickle(self): import pickle d = orange.ExampleTable("iris") e = d[0] self.assertRaises(pickle.PicklingError, pickle.dumps, e) e = orange.Example(e) s = pickle.dumps(e) e2 = pickle.loads(s) self.assertEqual(e, e) self.assertEqual(e, e2) id = orange.newmetaid() e[id] = 33 d.domain.addmeta(id, orange.ContinuousVariable("x")) id2 = orange.newmetaid() d.domain.addmeta(id2, orange.StringVariable("y")) e[id2] = "foo" s = pickle.dumps(e) e2 = pickle.loads(s) self.assertEqual(e, e2) self.assertEqual(e2[id], 33) self.assertEqual(e2[id2], "foo")
def generateETStruct(path, medaData, numGenes=None): ddbList = Dicty.DAnnotation.getDDBList() if not os.path.exists(path): os.mkdir(path) medaData = Dicty.DData.DData_Nancy() for st in medaData.strains: pathSt = path + "\\" + st if not os.path.exists(pathSt): os.mkdir(pathSt) for rep in medaData.strain2replicaList(st): ma2d = medaData.getRaw2d(rep) et = Meda.Preproc.ma2orng( ma2d, Meda.Preproc.getTcDomain(ma2d.shape[1], False, [], None)) et.domain.addmeta(orange.newmetaid(), orange.StringVariable("DDB")) for eIdx, e in enumerate(et): e["DDB"] = ddbList[eIdx] if numGenes: orange.saveTabDelimited(pathSt + "\\" + rep + ".tab", orange.ExampleTable(et[:numGenes])) else: orange.saveTabDelimited(pathSt + "\\" + rep + ".tab", et)
data.domain["[C]([C]=[C])"].name = "Measure" domain = orange.Domain([ attr for attr in data.domain.attributes if attr.name not in ["activity", "Measure"] ], data.domain["Measure"]) data = orange.ExampleTable(domain, data) random.seed(6) for ex in data: ex["Measure"] = ex["Measure"] + random.random() data.save(DataDesc + "_No_metas_FullNumeric_Train.tab") #================ Create small test set ================= dataFile = "BinClass_No_metas_SmallTest.tab" os.system("head -n 33 BinClass_No_metas_Test.tab > " + dataFile) data = dataUtilities.DataTable(dataFile) var = orange.StringVariable("Comments") data.domain.addmeta(-1, var) idxs = [2, 5, 12, 25] for idx, ex in enumerate(data): if idx in idxs: ex["Comments"] = "notok" else: ex["Comments"] = "ok" data.save("BinClass_W_metas_SmallTest.tab") #================ Test data for BAD thinks :) ================= fileH = open(dataFile, "r") lines = fileH.readlines() fileH.close() #----------------- saveFile = "BinClass_BadVarType.tab" fileH = open(saveFile, "w")
def getRdkDescResult(data,descList, radius = 1): """ Calculates the descriptors for the descList using RDK It expects an attribute containing smiles with a name defined in AZOrangeConfig.SMILESNAMES It returns a dataset with the same smiles input variable, and as many variables as the descriptors returned by the toolkit """ if "rdk" not in toolkitsEnabled: return None FingerPrints = False smilesName = getSMILESAttr(data) if not smilesName: return None FP_desc = [] myDescList = [desc.replace(toolkitsDef["rdk"]["tag"],"") for desc in descList if toolkitsDef["rdk"]["tag"] in desc] if not myDescList: return None if "FingerPrints" in myDescList: FingerPrints = True myDescList.remove("FingerPrints") if sum(["FP_" in fp for fp in myDescList]): tmpDescList = [] FingerPrints = True for attr in myDescList: if "FP_" not in attr: tmpDescList.append(attr) else: FP_desc.append(attr) myDescList = tmpDescList #Get fingerprints in advance fingerPrintsAttrs = [] fingerPrintsRes = {} if FingerPrints: for ex in data: mol = str(ex[smilesName].value) try: chemMol = rdk.Chem.MolFromSmiles(mol,True) if not chemMol: chemMol = rdk.Chem.MolFromSmiles(mol,False) fingerPrint = rdk.AllChem.GetMorganFingerprint(chemMol,radius) resDict = fingerPrint.GetNonzeroElements() except: continue fingerPrintsRes[mol] = {} for ID in resDict: count = resDict[ID] name = toolkitsDef["rdk"]["tag"]+"FP_"+str(ID) if name not in [x.name for x in fingerPrintsAttrs]: fingerPrintsAttrs.append(orange.FloatVariable(name)) fingerPrintsRes[mol][name] = float(count) #Add FP attributes even if there was no reference to it. Models will need it as FP not present, i.e. equal 0.0 ! for fpDesc in FP_desc: name = toolkitsDef["rdk"]["tag"]+fpDesc if name not in [str(attr.name) for attr in fingerPrintsAttrs]: fingerPrintsAttrs.append(orange.FloatVariable(name)) #Test attrTypes for ex in data: try: attrObj = [] molStr = str(ex[smilesName].value) chemMol = rdk.Chem.MolFromSmiles(molStr,True) if not chemMol: chemMol = rdk.Chem.MolFromSmiles(molStr,False) mol = rdk.readstring("mol", rdk.Chem.MolToMolBlock(chemMol)) moldesc = mol.calcdesc(myDescList) for desc in myDescList: if type(moldesc[desc]) == str: attrObj.append(orange.StringVariable(toolkitsDef["rdk"]["tag"] + desc)) else: attrObj.append(orange.FloatVariable(toolkitsDef["rdk"]["tag"] + desc)) #Process fingerprints if FingerPrints: for desc in [fp for fp in fingerPrintsAttrs if fp.name not in attrObj]: attrObj.append(desc)#orange.FloatVariable(desc.name)) break except: continue resData = orange.ExampleTable(orange.Domain([data.domain[smilesName]] + attrObj,0)) badCompounds = 0 for ex in data: newEx = orange.Example(resData.domain) # All attrs: ?, ?, ?, ..., ? newEx[smilesName] = ex[smilesName] molStr = str(newEx[smilesName].value) # OBS - add something keeping count on the number of unused smiles try: chemMol = rdk.Chem.MolFromSmiles(molStr,True) if not chemMol: chemMol = rdk.Chem.MolFromSmiles(molStr,False) mol = rdk.readstring("mol", rdk.Chem.MolToMolBlock(chemMol)) #mol = rdk.readstring("smi", molStr) moldesc = mol.calcdesc(myDescList) for desc in myDescList: newEx[toolkitsDef["rdk"]["tag"]+desc] = moldesc[desc] #Process fingerprints if FingerPrints: for desc in fingerPrintsAttrs: if desc.name in fingerPrintsRes[molStr]: newEx[desc.name] = fingerPrintsRes[molStr][desc.name] else: newEx[desc.name] = 0.0 resData.append(newEx) except: badCompounds += 1 print "Compounds in original data: ",len(data) print "Compounds able to calculate descs:",len(resData) print "Ignored Compounds: ",badCompounds return resData
def getCinfonyDescResults(origData,descList,radius=1): """Calculates the cinfony descriptors on origData maintains the input variables and class Adds the Cinfony descritors Returns a new Dataset""" if not origData or not descList: return None smilesName = getSMILESAttr(origData) if not smilesName: return None #Create a new domain saving original smiles and other attributes newDomain = orange.Domain([attr for attr in origData.domain if attr is not origData.domain.classVar] + [orange.StringVariable("origSmiles")],origData.domain.classVar) data = dataUtilities.DataTable(newDomain, origData) # Standardize SMILES for ex in data: ex["origSmiles"] = ex[smilesName].value #TODO: Create a method in dataUtilities to standardize the attribute smilesName in place having the attr origSmiles as ID if "AZutilities.extraUtilities" in sys.modules and hasattr(extraUtilities, "StandardizeSMILES"): # Call a method for standardizing the SMILES in Data. # The method is expected to change the attribute defined as smiAttr in data object # +->Data +-> SMILES attribuite name +->Compound Name or attribute to act as an ID" extraUtilities.StandardizeSMILES(data, smiAttr = smilesName, cName="origSmiles") results = [] # Calculate available descriptors res = getObabelDescResult(data,descList) if res: results.append(res) res = getRdkDescResult(data,descList,radius) if res: results.append(res) res = getWebelDescResult(data,descList) if res: results.append(res) res = getCdkDescResult(data,descList) if res: results.append(res) # Convert any nan to a '?' if len(results): for res in results: for ex in res: for attr in ex.domain: if ex[attr] != ex[attr]: # Will fail if it is 'nan' ex[attr] = '?' # return None if no results at all if not results: return None resData = results[0] if len(results) > 1: for res in results[1:]: resData = dataUtilities.horizontalMerge(resData, res, smilesName, smilesName) data = dataUtilities.horizontalMerge(data, resData, smilesName, smilesName) # Revert the SMILES back to it's original state for ex in data: ex[smilesName] = ex["origSmiles"] #Remove the origSmiles attributes data = dataUtilities.DataTable(orange.Domain([attr for attr in data.domain if attr.name != "origSmiles" and attr is not data.domain.classVar],data.domain.classVar),data) return data
def exportNetwork(self, absolute_int=10, positive_int=0, negative_int=0, best_attributes=0, significant_digits=2, pretty_names=1, widget_coloring=1, pcutoff=1): NA = len(self.names) ### SELECTION OF INTERACTIONS AND ATTRIBUTES ### # prevent crashes best_attributes = min(best_attributes, len(self.attlist)) positive_int = min(positive_int, len(self.list)) absolute_int = min(absolute_int, len(self.list)) negative_int = min(negative_int, len(self.list)) # select the top interactions ins = [] if positive_int > 0: ins += self.list[-positive_int:] ins += self.list[:negative_int] if absolute_int > 0: ins += self.abslist[-absolute_int:] # pick best few attributes atts = [] if best_attributes > 0: atts += [i for (x, i) in self.attlist[-best_attributes:]] # disregard the insignificant attributes, interactions if len(self.plist) > 0 and pcutoff < 1: # attributes oats = atts atts = [] for i in oats: if self.plut[(i, -1)] < pcutoff: atts.append(i) # interactions oins = ins ins = [] for y in oins: (ig, i, j) = y[1] if self.plut[(i, j, -1)] < pcutoff: ins.append(y) ints = [] max_igain = -1e6 min_gain = 1e6 # lowest information gain of involved attributes # remove duplicates and sorting keys for (x, v) in ins: if v not in ints: ints.append(v) # add to attribute list (ig, i, j) = v max_igain = max(abs(ig), max_igain) for x in [i, j]: if x not in atts: atts.append(x) min_gain = min(min_gain, self.gains[x]) # fill-in the attribute list with all possibly more important attributes ## todo ### NODE DRAWING ### map = {} graph = Orange.core.Network(len(atts), 0) table = [] for i in range(len(atts)): map[atts[i]] = i ndx = atts[i] t = '%s' % self.names[ndx] if pretty_names: t = string.replace(t, "ED_", "") t = string.replace(t, "D_", "") t = string.replace(t, "M_", "") t = string.replace(t, " ", "\\n") t = string.replace(t, "-", "\\n") t = string.replace(t, "_", "\\n") r = self.gains[ndx] * 100.0 / self.entropy table.append([i + 1, t, r]) d = orange.Domain([ orange.FloatVariable('index'), orange.StringVariable('label'), orange.FloatVariable('norm. gain') ]) data = orange.ExampleTable(d, table) graph.items = data table = [] for (ig, i, j) in ints: j = map[j] i = map[i] perc = int( abs(ig) * 100.0 / max(max_igain, self.attlist[-1][0]) + 0.5) graph[i, j] = perc / 30 + 1 if self.entropy > 1e-6: mc = _nicefloat(100.0 * ig / self.entropy, significant_digits) + "%" else: mc = _nicefloat(0.0, significant_digits) if len(self.plist) > 0 and pcutoff < 1: mc += "\\nP\<%.3f" % self.plut[(i, j, -1)] if ig > 0: if widget_coloring: color = "green" else: color = '"0.0 %f 0.9"' % (0.3 + 0.7 * perc / 100.0 ) # adjust saturation dir = "both" else: if widget_coloring: color = "red" else: color = '"0.5 %f 0.9"' % (0.3 + 0.7 * perc / 100.0 ) # adjust saturation dir = 'none' table.append([i, j, mc, dir, color]) d = orange.Domain([ orange.FloatVariable('u'), orange.FloatVariable('v'), orange.StringVariable('label'), orange.EnumVariable('dir', values=["both", "none"]), orange.EnumVariable('color', values=["green", "red"]) ]) data = orange.ExampleTable(d, table) graph.links = data return graph
def createStatData(self, statistics): specialVars = [ orange.StringVariable("Method"), orange.FloatVariable("Fold") ] classificationVars = [ orange.FloatVariable("CA"), orange.FloatVariable("MCC"), orange.FloatVariable("truePOS"), orange.FloatVariable("trueNEG"), orange.FloatVariable("falsePOS"), orange.FloatVariable("falseNEG") ] regressionVars = [ orange.FloatVariable("Q2"), orange.FloatVariable("RMSE") ] if self.isClassDiscrete: allVars = specialVars + classificationVars else: allVars = specialVars + regressionVars commVars = [ orange.FloatVariable("nTest"), orange.FloatVariable("nTrain") ] allVars += commVars self.statistics = orange.ExampleTable( orange.Domain(allVars, orange.FloatVariable("Stability"))) for ml in statistics: # Total row ex = orange.Example(self.statistics.domain) if ml == "selectedML": ex["Method"] = "Total" else: ex["Method"] = ml + " Total" ex["Stability"] = statistics[ml]["StabilityValue"] # [[TP, FN],[FP, TN]] if self.isClassDiscrete: ex["CA"] = statistics[ml]["CA"] ex["MCC"] = statistics[ml]["MCC"] ex["truePOS"] = statistics[ml]["CM"][0][0] ex["trueNEG"] = statistics[ml]["CM"][1][1] ex["falsePOS"] = statistics[ml]["CM"][1][0] ex["falseNEG"] = statistics[ml]["CM"][0][1] else: ex["Q2"] = statistics[ml]["Q2"] ex["RMSE"] = statistics[ml]["RMSE"] self.statistics.append(ex) # Fold rows for fold, nTest in enumerate( statistics[ml]["foldStat"]["nTestCmpds"]): ex = orange.Example(self.statistics.domain) if ml == "selectedML": ex["Method"] = statistics[ml]["foldStat"][ "foldSelectedML"][fold] else: ex["Method"] = ml ex["Fold"] = fold ex["nTrain"] = statistics[ml]["foldStat"]["nTrainCmpds"][fold] ex["nTest"] = statistics[ml]["foldStat"]["nTestCmpds"][fold] # [[TP, FN],[FP, TN]] if self.isClassDiscrete: ex["CA"] = statistics[ml]["foldStat"]["CA"][fold] ex["MCC"] = statistics[ml]["foldStat"]["MCC"][fold] ex["truePOS"] = statistics[ml]["foldStat"]["CM"][fold][0][ 0] ex["trueNEG"] = statistics[ml]["foldStat"]["CM"][fold][1][ 1] ex["falsePOS"] = statistics[ml]["foldStat"]["CM"][fold][1][ 0] ex["falseNEG"] = statistics[ml]["foldStat"]["CM"][fold][0][ 1] else: ex["Q2"] = statistics[ml]["foldStat"]["Q2"][fold] ex["RMSE"] = statistics[ml]["foldStat"]["RMSE"][fold] self.statistics.append(ex) return self.statistics
def generateGraph(self, N_changed = False): self.searchStringTimer.stop() self.attributeCombo.box.setEnabled(False) self.error() matrix = None self.warning('') if N_changed: self.netOption = 1 if self.data == None: self.infoa.setText("No data loaded.") self.infob.setText("") return #print len(self.histogram.yData), len(self.histogram.xData) nEdgesEstimate = 2 * sum([self.histogram.yData[i] for i,e in enumerate(self.histogram.xData) if self.spinLowerThreshold <= e <= self.spinUpperThreshold]) if nEdgesEstimate > 200000: self.graph = None nedges = 0 n = 0 self.error('Estimated number of edges is too high (%d).' % nEdgesEstimate) else: graph = orngNetwork.Network(self.data.dim, 0) matrix = self.data if hasattr(self.data, "items"): if type(self.data.items) == type(orange.ExampleTable(orange.Domain(orange.StringVariable('tmp')))): #graph.setattr("items", self.data.items) graph.items = self.data.items else: data = [[str(x)] for x in self.data.items] items = orange.ExampleTable(orange.Domain(orange.StringVariable('label'), 0), data) #graph.setattr("items", list(items)) graph.items = list(items) # set the threshold # set edges where distance is lower than threshold nedges = graph.fromDistanceMatrix(self.data, self.spinLowerThreshold, self.spinUpperThreshold, self.kNN, self.andor) edges = graph.getEdges() #print graph.nVertices, self.matrix.dim if self.dstWeight == 1: if graph.directed: for u,v in edges: foo = 1 if str(graph[u,v]) != "0": foo = 1.0 - float(graph[u,v]) graph[u,v] = foo else: for u,v in edges: if u <= v: foo = 1 if str(graph[u,v]) != "0": foo = 1.0 - float(graph[u,v]) graph[u,v] = foo n = len(edges) #print 'self.netOption',self.netOption # exclude unconnected if str(self.netOption) == '1': components = [x for x in graph.getConnectedComponents() if len(x) > self.excludeLimit] if len(components) > 0: include = reduce(lambda x,y: x+y, components) if len(include) > 1: self.graph = orngNetwork.Network(graph.getSubGraph(include)) matrix = self.data.getitems(include) else: self.graph = None matrix = None else: self.graph = None matrix = None # largest connected component only elif str(self.netOption) == '2': component = graph.getConnectedComponents()[0] if len(component) > 1: self.graph = orngNetwork.Network(graph.getSubGraph(component)) matrix = self.data.getitems(component) else: self.graph = None matrix = None # connected component with vertex by label elif str(self.netOption) == '3': self.attributeCombo.box.setEnabled(True) self.graph = None matrix = None #print self.attributeCombo.currentText() if self.attributeCombo.currentText() != '' and self.label != '': components = graph.getConnectedComponents() txt = self.label.lower() #print 'txt:',txt nodes = [i for i, values in enumerate(self.data.items) if txt in str(values[str(self.attributeCombo.currentText())]).lower()] #print "nodes:",nodes if len(nodes) > 0: vertices = [] for component in components: for node in nodes: if node in component: if len(component) > 0: vertices.extend(component) if len(vertices) > 0: #print "n vertices:", len(vertices), "n set vertices:", len(set(vertices)) vertices = list(set(vertices)) self.graph = orngNetwork.Network(graph.getSubGraph(vertices)) matrix = self.data.getitems(vertices) else: self.graph = graph self.pconnected = nedges self.nedges = n self.infoa.setText("%d vertices" % self.data.dim) self.infob.setText("%d connected (%3.1f%%)" % (nedges, nedges / float(self.data.dim) * 100)) self.infoc.setText("%d edges (%d average)" % (n, n / float(self.data.dim))) #print 'self.graph:',self.graph+ if self.graph != None: #setattr(matrix, "items", self.graph.items) matrix.items = self.graph.items self.send("Network", self.graph) if matrix: self.send("Distance Matrix", matrix) if self.graph == None: self.send("Examples", None) else: self.send("Examples", self.graph.items) self.histogram.setBoundary(self.spinLowerThreshold, self.spinUpperThreshold)
def getSmilesData(self, smiles): # Create an Orange ExampleTable with a smiles attribute smilesAttr = orange.StringVariable("SMILEStoPred") myDomain = orange.Domain([smilesAttr], 0) self.smilesData = dataUtilities.DataTable(myDomain, [[smiles]])
for ex in fcont(data): print ex fcont[0] = (orange.ValueFilter.Between, 4.6, 5.0) print "\n\nThe first attribute is between to 4.5 and 5.0" for ex in fcont(data): print ex fcont[0] = (orange.ValueFilter.Outside, 4.6, 7.5) print "\n\nThe first attribute is between to 4.5 and 5.0" for ex in fcont(data): print ex ############ THIS IS WHAT YOU CAN DO WITH STRING ATTRIBUTES data.domain.addmeta(orange.newmetaid(), orange.StringVariable("name")) for ex in data: ex["name"] = str(ex.getclass()) fstr = orange.Filter_values(domain=data.domain) fstr["name"] = "Iris-setosa" print "\n\nSetosae" d = fstr(data) print "%i examples, starting with %s" % (len(d), d[0]) fstr["name"] = ["Iris-setosa", "Iris-virginica"] print "\n\nSetosae and virginicae" d = fstr(data) print "%i examples, starting with %s\n finishing with %s" % (len(d), d[0], d[-1])
def parse(DOM): """ Parse the graph DOM as returned from geneMANIA server and return an :class:`Orange.network.Graph` instance. """ nodes = DOM.getElementsByTagName("node") edges = DOM.getElementsByTagName("edge") from collections import defaultdict graphNodes = {} graphEdges = defaultdict(list) def parseAttributes(element): return dict([(key, value) for key, value in element.attributes.items()]) def parseText(element): text = u"" for el in element.childNodes: if isinstance(el, minidom.Text): text += el.wholeText return text def parseData(node): data = node.getElementsByTagName("data") parsed = {} for el in data: attrs = parseAttributes(el) key = attrs["key"] parsed[key] = parseText(el) return parsed for node in nodes: attrs = parseAttributes(node) id = attrs["id"] data = parseData(node) graphNodes[id] = data for edge in edges: attrs = parseAttributes(edge) source, target = attrs["source"], attrs["target"] data = parseData(edge) graphEdges[source, target].append(data) allData = reduce(list.__add__, graphEdges.values(), []) edgeTypes = set([int(data["networkGroupId"]) for data in allData]) groupId2int = dict(zip(edgeTypes, range(len(edgeTypes)))) groupId2groupCode = dict([(int(data["networkGroupId"]), str(data["networkGroupCode"])) for data in allData]) graphNode2nodeNumber = dict(zip(graphNodes, range(len(graphNodes)))) import Orange graph = Orange.network.Graph() for id, data in graphNodes.items(): graph.add_node(graphNode2nodeNumber[id], original_id=str(id), symbol=data["symbol"], score=float(data["score"])) graph.add_nodes_from(sorted(graphNode2nodeNumber.values())) edgeWeights = [] for (source, target), edge_data in graphEdges.items(): edgesDefined = [None] * len(edgeTypes) for data in edge_data: networkGroupId = int(data["networkGroupId"]) edgeInd = groupId2int[networkGroupId] edgesDefined[edgeInd] = float(data["weight"]) graph.add_edge(graphNode2nodeNumber[source], graphNode2nodeNumber[target], weight=float(data["weight"]), networkGroupId=networkGroupId) edgesDefined = [0 if w is None else w for w in edgesDefined] edgeWeights.append(edgesDefined) nodedomain = orange.Domain([ orange.StringVariable("label"), orange.StringVariable("id"), orange.FloatVariable("score"), orange.StringVariable("symbol"), orange.StringVariable("go"), orange.EnumVariable("source", values=["true", "false"]) ], None) edgedomain = orange.Domain([orange.FloatVariable("u"), orange.FloatVariable("v")] +\ [orange.FloatVariable("weight_%s" % groupId2groupCode[id]) for id in edgeTypes], None) node_items = graphNodes.items() node_items = sorted(node_items, key=lambda t: graphNode2nodeNumber[t[0]]) nodeitems = orange.ExampleTable(nodedomain, [[str(node["symbol"]), str(id), float(node["score"]), str(node["symbol"]), str(node["go"]), str(node["source"])]\ for id, node in node_items]) edgeitems = orange.ExampleTable(edgedomain, [[str(graphNode2nodeNumber[source] + 1), str(graphNode2nodeNumber[target] + 1)] + weights \ for ((source, target), _), weights in zip(graphEdges.items(), edgeWeights)]) graph.set_items(nodeitems) graph.set_links(edgeitems) return graph
def commit(self): if not self.data or not self.scores: return test = self.score_methods[self.method_index][2] cutOffUpper = self.histogram.upperBoundary cutOffLower = self.histogram.lowerBoundary scores = np.array(self.scores.items()) scores[:, 1] = test(np.array(scores[:, 1], dtype=float), cutOffLower, cutOffUpper) selected = set([key for key, test in scores if test]) remaining = set([key for key, test in scores if not test]) if self.data and self.genes_in_columns: selected = sorted(selected) if selected: newdata = orange.ExampleTable(orange.Domain( self.data.domain), [self.data[int(i)] for i in selected], name=self.data.name) else: newdata = None if self.add_scores_to_output: score_attr = orange.FloatVariable( self.score_methods[self.method_index][0]) mid = orange.newmetaid() if self.add_scores_to_output and newdata is not None: newdata.domain.addmeta(mid, score_attr) for ex, key in zip(newdata, selected): ex[mid] = self.scores[key] self.send("Example table with selected genes", newdata) remaining = sorted(remaining) if remaining: newdata = orange.ExampleTable(orange.Domain( self.data.domain), [self.data[int(i)] for i in remaining], name=self.data.name) else: newdata = None if self.add_scores_to_output and newdata is not None: newdata.domain.addmeta(mid, score_attr) for ex, key in zip(newdata, remaining): ex[mid] = self.scores[key] self.send("Example table with remaining genes", newdata) elif self.data and not self.genes_in_columns: method_name = self.score_methods[self.method_index][0] selected_attrs = [ attr for attr in self.data.domain.attributes if attr in selected or attr.varType == orange.VarTypes.String ] # ?? why strings if self.add_scores_to_output: scores = [self.scores[attr] for attr in selected_attrs] attrs = [copy_descriptor(attr) for attr in selected_attrs] for attr, score in zip(attrs, scores): attr.attributes[method_name] = str(score) selected_attrs = attrs newdomain = orange.Domain(selected_attrs, self.data.domain.classVar) newdomain.addmetas(self.data.domain.getmetas()) newdata = orange.ExampleTable(newdomain, self.data, name=self.data.name) self.send("Example table with selected genes", newdata if selected_attrs else None) remaining_attrs = [ attr for attr in self.data.domain.attributes if attr in remaining ] if self.add_scores_to_output: scores = [self.scores[attr] for attr in remaining_attrs] attrs = [copy_descriptor(attr) for attr in remaining_attrs] for attr, score in zip(attrs, scores): attr.attributes[method_name] = str(scores) remaining_attrs = attrs newdomain = orange.Domain(remaining_attrs, self.data.domain.classVar) newdomain.addmetas(self.data.domain.getmetas()) newdata = orange.ExampleTable(newdomain, self.data, name=self.data.name) self.send("Example table with remaining genes", newdata if remaining_attrs else None) domain = orange.Domain([ orange.StringVariable("label"), orange.FloatVariable(self.score_methods[self.method_index][0]) ], False) if selected_attrs: selected_genes = orange.ExampleTable( domain, [[attr.name, self.scores.get(attr, 0)] for attr in selected_attrs]) else: selected_genes = None self.send("Selected genes", selected_genes) else: self.send("Example table with selected genes", None) self.send("Example table with remaining genes", None) self.send("Selected genes", None) self.data_changed_flag = False
def __make_rule_gene_example_table(tableDict, genes): import orange import constants as const # attributes are rules (all conjuncts of a rule form the name of the attribute) #attrList = [orange.EnumVariable(name=ruleString[1:-1].replace(' ', '_'), values=[PRESENT, ABSENT]) # for ruleString in tableDict.keys()] attrList = [ orange.EnumVariable(name=str(gene), values=[const.PRESENT, const.ABSENT]) for gene in genes ] # three meta attributes ruleName = orange.StringVariable(const.NAME_ATTR) mid = orange.newmetaid() ruleTerms = orange.StringVariable(const.TERMS_ATTR) mid1 = orange.newmetaid() #ruleNumber = orange.EnumVariable(SEQ_NUM_ATTR) #StringVariable(SEQ_NUM_ATTR) ruleNumber = orange.FloatVariable(const.SEQ_NUM_ATTR, startValue=1, endValue=len(tableDict), stepValue=1, numberOfDecimals=0) mid2 = orange.newmetaid() # this is a classless domain domain = orange.Domain(attrList, False) # name of the rule is a meta attribute domain.addmeta(mid, ruleName, False) domain.addmeta(mid1, ruleTerms, False) domain.addmeta(mid2, ruleNumber, False) table = orange.ExampleTable(domain) for k in sorted(tableDict.keys()): exampleValues = [] for (i, gene) in enumerate(genes): #if gene in tableDict[k][GENES_KEY]: if gene in tableDict[k][const.TOP_GENES_KEY]: #exampleValues.append(PRESENT) exampleValues.append(orange.Value(attrList[i], const.PRESENT)) else: exampleValues.append(orange.Value(attrList[i], const.ABSENT)) #exampleValues.append(ABSENT) example = orange.Example(domain, exampleValues) example[const.NAME_ATTR] = tableDict[k][const.RULENAME_KEY][ 1:-1] #skip square brackets from the string example[const.TERMS_ATTR] = tableDict[k][const.RULETERMS_STR_KEY][1:-1] example[const.SEQ_NUM_ATTR] = k example[const.NAME_ATTR] = orange.Value(ruleName, tableDict[k][ const.RULENAME_KEY][1:-1]) #skip square brackets from the string example[const.TERMS_ATTR] = orange.Value( ruleTerms, tableDict[k][const.RULETERMS_STR_KEY][1:-1]) example[const.SEQ_NUM_ATTR] = orange.Value(ruleNumber, k) table.append(example) #end return table
import os import os.path import glob import orange import orngNetwork atts = [] atts.append(orange.StringVariable("Network Name")) atts.append(orange.StringVariable("Network File")) atts.append(orange.StringVariable("dir")) atts.append(orange.StringVariable("Item Set")) atts.append(orange.StringVariable("Edge Set")) atts.append(orange.FloatVariable("Vertices")) atts[-1].numberOfDecimals = 0 atts.append(orange.FloatVariable("Edges")) atts[-1].numberOfDecimals = 0 atts.append(orange.StringVariable("Date")) atts.append(orange.StringVariable("Description")) netlist = orange.ExampleTable(orange.Domain(atts, False)) for netFile in glob.glob(os.path.join(os.getcwd(), '*.net')): net = orngNetwork.Network.read(netFile) name, ext = os.path.splitext(netFile) itemFile = "" if os.path.exists(name + '_items.tab'): itemFile = name + '_items.tab' elif os.path.exists(name + '.tab'): itemFile = name + '.tab'
def test_meta_direct(self): e = orange.ExampleTableReader("iris.tab", 3).read()[0] d = e.domain e[0] = 3.14 mid1 = orange.newmetaid() e[mid1] = 2.79 self.assertEqual(e[0], 3.14) self.assertEqual(e[mid1], 2.79) mid2 = orange.newmetaid() nf = orange.ContinuousVariable("m2") d.addmeta(mid2, nf) e[nf] = 6.28 self.assertEqual(e[nf], 6.28) self.assertEqual(e[mid2], 6.28) self.assertEqual(e["m2"], 6.28) mid3 = orange.newmetaid() sf = orange.StringVariable("m3") d.addmeta(mid3, sf) e["m3"] = "pixies" self.assertEqual(e[sf], "pixies") self.assertEqual(e[mid3], "pixies") self.assertEqual(e["m3"], "pixies") self.assertEqual(set(e.get_metas().values()), set([2.79, 6.28, "pixies"])) del e[mid1] with self.assertRaises(KeyError): e[mid1] self.assertEqual(set(e.get_metas().values()), set([6.28, "pixies"])) del e[mid3] with self.assertRaises(KeyError): e[sf] self.assertEqual(set(e.get_metas().values()), set([6.28])) del e[mid2] with self.assertRaises(KeyError): e["m2"] self.assertEqual(set(e.get_metas().values()), set([])) with self.assertRaises(TypeError): e[mid1] = "3.15" with self.assertRaises(TypeError): e[mid3] = 3.15 with self.assertRaises(TypeError): e.set_meta(mid1, "3.15") with self.assertRaises(TypeError): e.set_meta(mid3 = 3.15) e.set_meta(mid1, 3.15) e.set_meta("m2", 3.16) e.set_meta(sf, "3.17") self.assertTrue(e.has_meta(mid1)) self.assertTrue(e.has_meta(nf)) self.assertTrue(e.has_meta(sf)) e.remove_meta(sf) self.assertTrue(e.has_meta(mid1)) self.assertTrue(e.has_meta(nf)) self.assertFalse(e.has_meta(sf)) e.remove_meta(mid1) self.assertFalse(e.has_meta(mid1)) self.assertTrue(e.has_meta(nf)) self.assertFalse(e.has_meta(sf)) e.remove_meta("m2") self.assertFalse(e.has_meta(mid1)) self.assertFalse(e.has_meta(nf)) self.assertFalse(e.has_meta(sf))
def getVariable(self): return orange.StringVariable(str(self.text()))
def getSimDescriptors(InActives, InData, methods, active_ids=None, pharmacophore_file=None, callBack=None): """ calculates similarity descriptors for a training set (orange object) using the given similarity methods against the given actives Possible method strings in methods are the names of the sim_* methods below, e.g. rdk_topo_fps for sim_rdk_topo_fps callBack function, if defined, will be called on each step sending the pergentage done (0-100): e.g. callBack(25) the callBack function shall return True of False which will indicate to this method if the process it to be continued or Not. e.g. if callBack(25) == False it indicates the caller want's to stop the process of calculating descriptors """ # Pre-process input Data tto standardize the SMILES SMILESattr = getSMILESAttr(InData) if not SMILESattr: return None #TODO: Create a method in dataUtilities to standardize the attribute smilesName in place having the attr origSmiles as ID if "AZutilities.extraUtilities" in sys.modules and hasattr( extraUtilities, "StandardizeSMILES"): # Call a method for standardizing the SMILES in Data. # The method is expected to change the attribute defined as smiAttr in data object cleanedData = True # Process InData tmpDomain = orange.Domain([orange.StringVariable("OrigSMI_ID")] + [attr for attr in InData.domain]) data = orange.ExampleTable(tmpDomain, InData) # Fill the OrigSMI_ID for ex in data: ex["OrigSMI_ID"] = ex[SMILESattr] extraUtilities.StandardizeSMILES(data, smiAttr=SMILESattr, cName="OrigSMI_ID") # Process Input actives activesDomain = orange.Domain([ orange.StringVariable("OrigSMI_ID"), orange.StringVariable("SMILES") ], 0) activesData = orange.ExampleTable(activesDomain) for act in InActives: activesData.append([act, act]) extraUtilities.StandardizeSMILES(activesData, smiAttr="SMILES", cName="OrigSMI_ID") #print activesData.domain actives = [] for ex in activesData: actives.append(str(ex["SMILES"].value)) else: data = InData actives = InActives cleanedData = False # adjust the header atts = [] for m in methods: count = 1 for a in actives: attname = m + '(active_' + str(count) + ')' atts.append(orange.FloatVariable(attname)) count += 1 newdomain = orange.Domain(data.domain.attributes + atts, data.domain.classVar) newdata = orange.ExampleTable(newdomain, data) att_idx = 0 # if callBack is defined, it will be called with the percentage done, i.e. 0-100 if active_ids: nTotalSteps = len(newdata) * ( (len(methods) - 1) * len(actives) + len(active_ids)) else: nTotalSteps = len(methods) * len(actives) * len(newdata) stepsDone = 0 # fill up the data for m in methods: if m == 'rdk_topo_fps': count = 1 for a in actives: attname = m + '(active_' + str(count) + ')' for j in range(len(newdata)): instance = newdata[j] tmp = orange.Value(atts[att_idx], orng_sim_rdk_topo_fps(a, instance)) instance[atts[att_idx]] = tmp if callBack: stepsDone += 1 if not callBack((100 * stepsDone) / nTotalSteps): return None att_idx += 1 elif m == 'rdk_MACCS_keys': count = 1 for a in actives: attname = m + '(active_' + str(count) + ')' for j in range(len(newdata)): instance = newdata[j] tmp = orange.Value(atts[att_idx], orng_sim_rdk_MACCS_keys(a, instance)) instance[atts[att_idx]] = tmp if callBack: stepsDone += 1 if not callBack((100 * stepsDone) / nTotalSteps): return None att_idx += 1 elif m == 'rdk_morgan_fps': count = 1 for a in actives: attname = m + '(active_' + str(count) + ')' for j in range(len(newdata)): instance = newdata[j] tmp = orange.Value(atts[att_idx], orng_sim_rdk_morgan_fps(a, instance)) instance[atts[att_idx]] = tmp if callBack: stepsDone += 1 if not callBack((100 * stepsDone) / nTotalSteps): return None att_idx += 1 elif m == 'rdk_morgan_features_fps': count = 1 for a in actives: attname = m + '(active_' + str(count) + ')' for j in range(len(newdata)): instance = newdata[j] tmp = orange.Value( atts[att_idx], orng_sim_rdk_morgan_features_fps(a, instance)) instance[atts[att_idx]] = tmp if callBack: stepsDone += 1 if not callBack((100 * stepsDone) / nTotalSteps): return None att_idx += 1 elif m == 'rdk_atompair_fps': count = 1 for a in actives: attname = m + '(active_' + str(count) + ')' for j in range(len(newdata)): instance = newdata[j] tmp = orange.Value(atts[att_idx], orng_sim_rdk_atompair_fps(a, instance)) instance[atts[att_idx]] = tmp if callBack: stepsDone += 1 if not callBack((100 * stepsDone) / nTotalSteps): return None att_idx += 1 elif m == 'azo_pharmacophore_fps': count = 1 for a in active_ids: attname = m + '(active_' + str(count) + ')' for j in range(len(newdata)): instance = newdata[j] tmp = orange.Value( atts[att_idx], azo_pharmacophore_az_inhouse(a, instance, pharmacophore_file)) instance[atts[att_idx]] = tmp if callBack: stepsDone += 1 if not callBack((100 * stepsDone) / nTotalSteps): return None att_idx += 1 if cleanedData: #Remove the fixed SMILES and revert to the Original SMILES newdata = dataUtilities.attributeDeselectionData(newdata, [SMILESattr]) newdata.domain["OrigSMI_ID"].name = SMILESattr return newdata