def random_graph(n=7, k=3, p=0.5, maxwt=100): # random weighted graph G = Graph(n) E = [[0 for j in range(0, n)] for i in range(0, n)] W = [[0 for j in range(0, n)] for i in range(0, n)] for i in range(0, n): for j in range(i + 1, n): if random.random() <= p: E[i][j] = E[j][i] = 1 W[i][j] = W[j][i] = random.randint(1, maxwt) G.Edges = E G.Wt = W # random s-t pairs for multi-cut SS = [[-1, -1] for i in range(0, k)] for i in range(0, k): si = random.randint(0, n - 1) ti = random.randint(0, n - 1) while si == ti: ti = random.randint(0, n - 1) SS[i][0], SS[i][1] = si, ti return G, SS
def input_graph(): n = 5 G = Graph(n) G.Edges = [[0, 1, 1, 1, 1], [1, 0, 1, 1, 1], [1, 1, 0, 1, 1], [1, 1, 1, 0, 1], [1, 1, 1, 1, 0]] G.Wt = [[0, 1, 1, 1, 1], [1, 0, 1, 1, 1], [1, 1, 0, 1, 1], [1, 1, 1, 0, 1], [1, 1, 1, 1, 0]] SS = [[0, 1], [0, 3], [2, 3]] return G, SS
class HMM: def __init__(self, XMLFileName=None, G=None): # self.itsEditor = itsEditor if (G is None): self.G = Graph() else: self.G = G self.G.directed = 1 self.G.euclidian = 0 self.G.simple = 0 self.Pi = {} self.id2index = {} # self.hmmAlphabet = DiscreteHMMAlphabet() self.hmmClass = HMMClass() # in the case of pair HMMs we have several self.hmmAlphabets = {} self.transitionFunctions = {} self.editableAttr = {} self.editableAttr['HMM'] = ['desc'] self.desc = ValidatingString() self.state = {} self.modelType = 0 self.name = "NoName" self.backgroundDistributions = NamedDistributions(self) self.DocumentName = "graphml" if XMLFileName != None: self.OpenXML(XMLFileName) def Clear(self): self.G.Clear() self.Pi = {} self.id2index = {} # self.hmmAlphabet = DiscreteHMMAlphabet() self.hmmAlphabets = {} self.hmmClass = HMMClass() self.backgroundDistributions = NamedDistributions(self) self.editableAttr = {} self.editableAttr['HMM'] = ['desc'] self.desc = ValidatingString() self.state = {} self.DocumentName = "graphml" def AddState(self, index, label='None'): state = HMMState(-1, self) if self.id2index.keys() != []: state.id = max(self.id2index.keys()) + 1 else: state.id = 1 state.index = index self.id2index[state.id] = state.index self.state[state.index] = state # XXX Use canvas id state.label = typed_assign(state.label, state.id) self.G.labeling[state.index] = "%s" % (state.label) return state.index def DeleteState(self, index): """ The method only deletes a map between index and its state object. The caller must delete the corresponding vertex in the owner Graph self.G. """ del self.id2index[self.state[index].id] del self.state[index] def fromDOM(self, XMLNode): # self.hmmClass.fromDOM(XMLNode.getElementsByTagName("hmm:class")[0]) for tag in XMLNode.getElementsByTagName("hmm:class"): self.hmmClass.fromDOM(tag) nameNodes = XMLNode.getElementsByTagName("hmm:name") if (len(nameNodes) > 0): self.modelType = nameNodes[0].firstChild.nodeValue # model type node modelTypeNodes = XMLNode.getElementsByTagName("hmm:modeltype") if (len(modelTypeNodes) > 0): self.modelType = modelTypeNodes[0].firstChild.nodeValue if (self.modelType == "pairHMM"): alphabetNodes = XMLNode.getElementsByTagName("hmm:alphabet") for alphabetNode in alphabetNodes: alphabet = DiscreteHMMAlphabet() alphabet.fromDOM(alphabetNode) self.hmmAlphabets[alphabet.id] = alphabet transitionFunctionNodes = XMLNode.getElementsByTagName( "hmm:transitionfunction") for transitionFunctionNode in transitionFunctionNodes: transitionFunction = TransitionFunction() transitionFunction.fromDom(transitionFunctionNode) self.transitionFunctions[ transitionFunction.id] = transitionFunction else: # If it is no pair hmm One "hmm:alphabet" XML element self.hmmAlphabets[0] = DiscreteHMMAlphabet() self.hmmAlphabets[0].fromDOM( XMLNode.getElementsByTagName("hmm:alphabet")[0]) self.backgroundDistributions.fromDOM(XMLNode) nodes = XMLNode.getElementsByTagName("node") for n in nodes: state = HMMState(-1, self) state.fromDOM(n) self.state[state.index] = state # key must be string self.id2index[state.id] = state.index self.G.embedding[state.index] = state.pos self.G.labeling[state.index] = "%s\n%s" % (state.id, state.label ) # XXX Hack Aaaargh! edges = XMLNode.getElementsByTagName("edge") # nr_classes = int(self.hmmClass.high()-self.hmmClass.low())+1 nr_classes = 1 # search in all states for the maximal kclasses for s in self.state.values(): if (s.kclasses > nr_classes): nr_classes = s.kclasses for i in range(nr_classes): self.G.edgeWeights[i] = EdgeWeight(self.G) for edge in edges: i = self.id2index[int(edge.attributes['source'].nodeValue)] j = self.id2index[int(edge.attributes['target'].nodeValue)] source = self.state[i] datas = edge.getElementsByTagName("data") for data in datas: dataKey = data.attributes['key'].nodeValue # dataValue = data.firstChild.nodeValue if dataKey == 'prob': #p = float(dataValue) # collect all strings from childnodes dataValue = "" for child in data.childNodes: dataValue += child.nodeValue p = listFromCSV(dataValue, types.FloatType) self.G.AddEdge(i, j) if len(p) == 1: # only one class for cl in range(source.kclasses - 1): p.append(0.0) for cl in range(source.kclasses): self.G.edgeWeights[cl][(i, j)] = p[cl] def modelCheck(self): # Compute sums of initial probabilities for renormalization initial_sum = 0.0 for s in self.state: initial_sum = initial_sum + self.state[s].initial if initial_sum == 0.0: raise NotValidHMMType("Initial state is not specified.") if (len(self.hmmAlphabets) == 0): raise AlphabetErrorType( "Alphabet object is empty. You must create alphabet before saving." ) def toDOM(self, XMLDoc, XMLNode): graphml = XMLDoc.createElement("graphml") # define namespaces (proper XML and new expat needs it) graphml.setAttribute('xmlns', 'http://graphml.graphdrawing.org/xmlns') graphml.setAttribute('xmlns:gd', 'gdnamespace') # find the correct URI graphml.setAttribute('xmlns:hmm', 'http://www.ghmm.org/xml/') #arbitrary XMLNode.appendChild(graphml) # Create key elements hmmtype = XMLDoc.createElement("key") hmmtype.setAttribute('id', 'emissions') hmmtype.setAttribute('gd:type', 'HigherDiscreteProbDist') # what's your type? hmmtype.setAttribute('for', 'node') graphml.appendChild(hmmtype) self.hmmClass.toDOM(XMLDoc, graphml) if (self.modelType == "pairHMM"): modelType = XMLDoc.createElement("hmm:modeltype") modelType.appendChild(XMLDoc.createTextNode("pairHMM")) graphml.appendChild(modelType) for alphabet in self.hmmAlphabets.values(): alphabet.toDOM(XMLDoc, graphml) self.backgroundDistributions.toDOM(XMLDoc, graphml) if len(self.transitionFunctions.keys()) != 0: transitionFunctionsNode = XMLDoc.createElement( "hmm:transitionfunctions") for transitionFunction in self.transitionFunctions.values(): transitionFunction.toDom(XMLDoc, transitionFunctionsNode) graphml.appendChild(transitionFunctionsNode) graph = XMLDoc.createElement("graph") # Compute sums of initial probabilities for renormalization initial_sum = 0.0 for s in self.state.keys(): initial_sum = initial_sum + self.state[s].initial for s in self.state.keys(): self.state[s].toDOM(XMLDoc, graph, initial_sum) # Compute sums of outgoing probabilities for renormalization of transition probabilities # NOTE: need dictionaries here out_sum = {} nr_classes = int(self.hmmClass.high()) - int(self.hmmClass.low()) + 1 for v in self.G.vertices: out_sum[v] = [0.0] * nr_classes for cl in range(1): # XXX Assuming one transition class for e in self.G.Edges(): if self.G.edgeWeights[cl].has_key(e): out_sum[e[0]][cl] = out_sum[ e[0]][cl] + self.G.edgeWeights[cl][e] for e in self.G.Edges(): transitions = [] edge_elem = XMLDoc.createElement("edge") edge_elem.setAttribute('source', "%s" % self.state[e[0]].id) edge_elem.setAttribute('target', "%s" % self.state[e[1]].id) # writeData(XMLDoc, edge_elem, 'prob', self.G.edgeWeights[cl][e] / out_sum[e[0]]) # XXX Assuming one transition class for cl in range(nr_classes): for cl in range(1): if self.G.edgeWeights[cl].has_key(e) and out_sum[e[0]][cl]: transitions.append(self.G.edgeWeights[cl][e] / out_sum[e[0]][cl]) else: transitions.append(0.0) writeData(XMLDoc, edge_elem, 'prob', csvFromList(transitions)) graph.appendChild(edge_elem) graphml.appendChild(graph) def AlphabetType(self): """ return the type of emission domain XXX should call the method in HMMAlphabet """ return int def ClassType(self): pass def DistributionType(self): pass def getBackgroundDist(self): """ Return a pair of two dictionaries: (distribution, its orders): a distribution is a list of real values of length N^(order+1). """ return (self.backgroundDistributions.dist, self.backgroundDistributions.order, self.backgroundDistributions.code2name) def buildMatrices(self): """ return [alphabets_code, A, B, pi, state_orders] """ pi = [] B = [] A = [] nstates = len(self.state.keys()) orders = {} k = 0 # C style index for s in self.state.values(): # ordering from XML orders[s.index] = k k = k + 1 state_orders = [] for s in self.state.values(): # a list of indices pi.append(s.initial) state_orders.append(s.order) # state order size = self.hmmAlphabets[s.alphabet_id].size() if (self.modelType != "pairHMM" and size**(s.order + 1) != len(s.emissions)): raise ValueError # exception: inconsistency between ordering and emission B.append(s.emissions) # emission # transition probability v = s.index outprobs = [0.0] * nstates for outid in self.G.OutNeighbors(v)[:]: myorder = orders[outid] outprobs[myorder] = self.G.edgeWeights[0][(v, outid)] A.append(outprobs) alphabets = self.hmmAlphabets[0].name.values() # list of alphabets return [alphabets, A, B, pi, state_orders] def getStateAlphabets(self): alphabets = [] for s in self.state.values(): alphabets.append(self.hmmAlphabets[s.alphabet_id]) return alphabets def getAlphabets(self): return self.hmmAlphabets def getLabels(self): """ returns list of state labels and unique labels """ label_list = [] labels = {} for s in self.state.values(): # a list of indices label_list.append(self.hmmClass.code2name[s.state_class]) labels[label_list[-1]] = 0 return (label_list, labels.keys()) def getTiedStates(self): """ returns list of tied states, entry is None if a state isn't to any other state, returns an empty list, if no state is tied """ tiedstates = [] isTied = 0 orders = {} k = 0 # C style index for s in self.state.values(): # ordering from XML orders[s.id] = k k = k + 1 for s in self.state.values(): # a list of indices if s.tiedto == '': tiedstates.append(-1) else: tiedstates.append(orders[int(s.tiedto)]) isTied = 1 if not isTied: tiedstates = [] return tiedstates def getStateDurations(self): """ returns a list of the minimal number of times a state is evaluated before the HMM changes to another state.""" durations = [] hasduration = 0 for s in self.state.values(): # a list of indices if s.duration == 0: durations.append(1) else: durations.append(s.duration) hasduration = 1 if not hasduration: durations = [] return durations def OpenXML(self, fileName_file_or_dom): if (not isinstance(fileName_file_or_dom, xml.dom.minidom.Document)): dom = xml.dom.minidom.parse(fileName_file_or_dom) else: dom = fileName_file_or_dom if dom.documentElement.tagName == "ghmm": sys.stderr.write("Do not support ghmm format") raise FormatError dom.unlink() #self.DocumentName = "ghmm" #ghmmdom = dom #ghmml = GHMMXML() #dom = ghmml.GraphMLDOM(ghmmdom) #ghmmdom.unlink() else: assert dom.documentElement.tagName == "graphml" self.fromDOM(dom) # dom.unlink() def WriteXML(self, fileName): try: self.modelCheck() # raise exceptions here doc = xml.dom.minidom.Document() self.toDOM(doc, doc) file = open(fileName, 'w') # xml.dom.ext.PrettyPrint(doc, file) file.write(toprettyxml(doc)) # problem with white spaces file.close() doc.unlink() except HMMEdError: print "HMMEdError: No file was written due to errors in the model." def WriteGHMM(self, fileName): self.modelCheck() # raise exceptions here doc = xml.dom.minidom.Document() ghmm = doc.createElement("ghmm") doc.appendChild(ghmm) self.toGHMM(doc, ghmm) file = open(fileName, 'w') # xml.dom.ext.PrettyPrint(doc, file) file.write(toprettyxml(doc)) # problem with white spaces file.close() doc.unlink() def SaveAs(self, fileName): if (self.DocumentName == "graphml"): self.WriteXML(fileName) else: self.WriteGHMM(fileName) def SaveAsGHMM(self, fileName): self.WriteGHMM(fileName)
class HMM: def __init__(self, XMLFileName=None): self.G = Graph() self.G.directed = 1 self.G.euclidian = 0 self.Pi = {} self.id2index = {} self.hmmAlphabet = DiscreteHMMAlphabet() self.hmmClass = HMMClass() self.editableAttr = {} self.editableAttr['HMM'] = ['desc'] self.desc = ValidatingString() self.state = {} self.backgroundDistributions = NamedDistributions(self) if XMLFileName != None: self.OpenXML(XMLFileName) def AddState(self, v): state = HMMState(v, self) self.state[v] = state def DeleteState(self, v): del self.id2index[self.state[v].id] del self.state[v] def fromDOM(self, XMLNode): self.hmmClass.fromDOM( XMLNode.getElementsByTagName("hmm:class")[0]) # One class! self.hmmAlphabet.fromDOM( XMLNode.getElementsByTagName("hmm:alphabet")[0]) # One alphabet! self.backgroundDistributions.fromDOM(XMLNode) nodes = XMLNode.getElementsByTagName("node") for n in nodes: state = HMMState(-1, self) state.fromDOM(n) i = state.index self.state[i] = state self.id2index[state.id] = i self.G.embedding[i] = state.pos self.G.labeling[i] = "%s\n%s" % (state.id, state.label ) # XXX Hack Aaaargh! edges = XMLNode.getElementsByTagName("edge") for edge in edges: i = self.id2index[edge.attributes['source'].nodeValue] j = self.id2index[edge.attributes['target'].nodeValue] datas = edge.getElementsByTagName("data") for data in datas: dataKey = data.attributes['key'].nodeValue dataValue = data.firstChild.nodeValue if dataKey == 'prob': p = float(dataValue) self.G.AddEdge(i, j) self.G.edgeWeights[0][(i, j)] = p def toDOM(self, XMLDoc, XMLNode): graphml = XMLDoc.createElement("graphml") XMLNode.appendChild(graphml) self.hmmClass.toDOM(XMLDoc, graphml) self.hmmAlphabet.toDOM(XMLDoc, graphml) self.backgroundDistributions.toDOM(XMLDoc, graphml) graph = XMLDoc.createElement("graph") # Compute sums of initial probabilities for renormalization initial_sum = 0.0 for s in self.state: initial_sum = initial_sum + self.state[s].initial for s in self.state: self.state[s].toDOM(XMLDoc, graph, initial_sum) # Compute sums of outgoing probabilities for renormalization of transition probabilities # NOTE: need dictionaries here out_sum = {} for v in self.G.vertices: out_sum[v] = 0.0 for e in self.G.Edges(): out_sum[e[0]] = out_sum[e[0]] + self.G.edgeWeights[0][e] for e in self.G.Edges(): edge_elem = XMLDoc.createElement("edge") edge_elem.setAttribute('source', "%s" % self.state[e[0]].id) edge_elem.setAttribute('target', "%s" % self.state[e[1]].id) writeData(XMLDoc, edge_elem, 'prob', self.G.edgeWeights[0][e] / out_sum[e[0]]) graph.appendChild(edge_elem) graphml.appendChild(graph) def OpenXML(self, fileName): dom = xml.dom.minidom.parse(fileName) assert dom.documentElement.tagName == "graphml" self.fromDOM(dom) dom.unlink() def WriteXML(self, fileName): doc = xml.dom.minidom.Document() self.toDOM(doc, doc) file = open(fileName, 'w') file.write(doc.toprettyxml()) file.close() doc.unlink() def SaveAs(self, fileName): self.WriteXML(fileName)