Beispiel #1
0
def random_graph(n=7, k=3, p=0.5, maxwt=100):

    # random weighted graph
    G = Graph(n)
    E = [[0 for j in range(0, n)] for i in range(0, n)]
    W = [[0 for j in range(0, n)] for i in range(0, n)]

    for i in range(0, n):
        for j in range(i + 1, n):
            if random.random() <= p:
                E[i][j] = E[j][i] = 1
                W[i][j] = W[j][i] = random.randint(1, maxwt)

    G.Edges = E
    G.Wt = W

    # random s-t pairs for multi-cut
    SS = [[-1, -1] for i in range(0, k)]

    for i in range(0, k):

        si = random.randint(0, n - 1)
        ti = random.randint(0, n - 1)

        while si == ti:
            ti = random.randint(0, n - 1)

        SS[i][0], SS[i][1] = si, ti

    return G, SS
Beispiel #2
0
def input_graph():

    n = 5
    G = Graph(n)

    G.Edges = [[0, 1, 1, 1, 1], [1, 0, 1, 1, 1], [1, 1, 0, 1, 1],
               [1, 1, 1, 0, 1], [1, 1, 1, 1, 0]]
    G.Wt = [[0, 1, 1, 1, 1], [1, 0, 1, 1, 1], [1, 1, 0, 1, 1], [1, 1, 1, 0, 1],
            [1, 1, 1, 1, 0]]

    SS = [[0, 1], [0, 3], [2, 3]]

    return G, SS
Beispiel #3
0
class HMM:
    def __init__(self, XMLFileName=None, G=None):
        # self.itsEditor = itsEditor
        if (G is None):
            self.G = Graph()
        else:
            self.G = G

        self.G.directed = 1
        self.G.euclidian = 0
        self.G.simple = 0
        self.Pi = {}
        self.id2index = {}

        # self.hmmAlphabet = DiscreteHMMAlphabet()
        self.hmmClass = HMMClass()

        # in the case of pair HMMs we have several
        self.hmmAlphabets = {}
        self.transitionFunctions = {}

        self.editableAttr = {}
        self.editableAttr['HMM'] = ['desc']
        self.desc = ValidatingString()

        self.state = {}

        self.modelType = 0
        self.name = "NoName"

        self.backgroundDistributions = NamedDistributions(self)

        self.DocumentName = "graphml"
        if XMLFileName != None:
            self.OpenXML(XMLFileName)

    def Clear(self):
        self.G.Clear()
        self.Pi = {}
        self.id2index = {}

        # self.hmmAlphabet = DiscreteHMMAlphabet()
        self.hmmAlphabets = {}
        self.hmmClass = HMMClass()
        self.backgroundDistributions = NamedDistributions(self)

        self.editableAttr = {}
        self.editableAttr['HMM'] = ['desc']
        self.desc = ValidatingString()
        self.state = {}
        self.DocumentName = "graphml"

    def AddState(self, index, label='None'):
        state = HMMState(-1, self)
        if self.id2index.keys() != []:
            state.id = max(self.id2index.keys()) + 1
        else:
            state.id = 1
        state.index = index
        self.id2index[state.id] = state.index
        self.state[state.index] = state  # XXX Use canvas id
        state.label = typed_assign(state.label, state.id)
        self.G.labeling[state.index] = "%s" % (state.label)
        return state.index

    def DeleteState(self, index):
        """ The method only deletes a map between index and its state object.
	    The caller must delete the corresponding vertex in the owner Graph self.G. """
        del self.id2index[self.state[index].id]
        del self.state[index]

    def fromDOM(self, XMLNode):

        # self.hmmClass.fromDOM(XMLNode.getElementsByTagName("hmm:class")[0])
        for tag in XMLNode.getElementsByTagName("hmm:class"):
            self.hmmClass.fromDOM(tag)

        nameNodes = XMLNode.getElementsByTagName("hmm:name")
        if (len(nameNodes) > 0):
            self.modelType = nameNodes[0].firstChild.nodeValue

        # model type node
        modelTypeNodes = XMLNode.getElementsByTagName("hmm:modeltype")
        if (len(modelTypeNodes) > 0):
            self.modelType = modelTypeNodes[0].firstChild.nodeValue
        if (self.modelType == "pairHMM"):
            alphabetNodes = XMLNode.getElementsByTagName("hmm:alphabet")
            for alphabetNode in alphabetNodes:
                alphabet = DiscreteHMMAlphabet()
                alphabet.fromDOM(alphabetNode)
                self.hmmAlphabets[alphabet.id] = alphabet
            transitionFunctionNodes = XMLNode.getElementsByTagName(
                "hmm:transitionfunction")
            for transitionFunctionNode in transitionFunctionNodes:
                transitionFunction = TransitionFunction()
                transitionFunction.fromDom(transitionFunctionNode)
                self.transitionFunctions[
                    transitionFunction.id] = transitionFunction
        else:
            # If it is no pair hmm One "hmm:alphabet" XML element
            self.hmmAlphabets[0] = DiscreteHMMAlphabet()
            self.hmmAlphabets[0].fromDOM(
                XMLNode.getElementsByTagName("hmm:alphabet")[0])

        self.backgroundDistributions.fromDOM(XMLNode)

        nodes = XMLNode.getElementsByTagName("node")
        for n in nodes:
            state = HMMState(-1, self)
            state.fromDOM(n)
            self.state[state.index] = state  # key must be string
            self.id2index[state.id] = state.index
            self.G.embedding[state.index] = state.pos
            self.G.labeling[state.index] = "%s\n%s" % (state.id, state.label
                                                       )  # XXX Hack Aaaargh!

        edges = XMLNode.getElementsByTagName("edge")
        # nr_classes = int(self.hmmClass.high()-self.hmmClass.low())+1
        nr_classes = 1
        # search in all states for the maximal kclasses
        for s in self.state.values():
            if (s.kclasses > nr_classes):
                nr_classes = s.kclasses

        for i in range(nr_classes):
            self.G.edgeWeights[i] = EdgeWeight(self.G)

        for edge in edges:
            i = self.id2index[int(edge.attributes['source'].nodeValue)]
            j = self.id2index[int(edge.attributes['target'].nodeValue)]
            source = self.state[i]
            datas = edge.getElementsByTagName("data")
            for data in datas:
                dataKey = data.attributes['key'].nodeValue
                # dataValue = data.firstChild.nodeValue

            if dataKey == 'prob':
                #p = float(dataValue)
                # collect all strings from childnodes
                dataValue = ""
                for child in data.childNodes:
                    dataValue += child.nodeValue
                p = listFromCSV(dataValue, types.FloatType)
                self.G.AddEdge(i, j)
                if len(p) == 1:  # only one class
                    for cl in range(source.kclasses - 1):
                        p.append(0.0)

                for cl in range(source.kclasses):
                    self.G.edgeWeights[cl][(i, j)] = p[cl]

    def modelCheck(self):

        # Compute sums of initial probabilities for renormalization
        initial_sum = 0.0
        for s in self.state:
            initial_sum = initial_sum + self.state[s].initial

        if initial_sum == 0.0:
            raise NotValidHMMType("Initial state is not specified.")

        if (len(self.hmmAlphabets) == 0):
            raise AlphabetErrorType(
                "Alphabet object is empty. You must create alphabet before saving."
            )

    def toDOM(self, XMLDoc, XMLNode):
        graphml = XMLDoc.createElement("graphml")
        # define namespaces (proper XML and new expat needs it)
        graphml.setAttribute('xmlns', 'http://graphml.graphdrawing.org/xmlns')
        graphml.setAttribute('xmlns:gd', 'gdnamespace')  # find the correct URI
        graphml.setAttribute('xmlns:hmm',
                             'http://www.ghmm.org/xml/')  #arbitrary
        XMLNode.appendChild(graphml)

        # Create key elements
        hmmtype = XMLDoc.createElement("key")
        hmmtype.setAttribute('id', 'emissions')
        hmmtype.setAttribute('gd:type',
                             'HigherDiscreteProbDist')  # what's your type?
        hmmtype.setAttribute('for', 'node')
        graphml.appendChild(hmmtype)

        self.hmmClass.toDOM(XMLDoc, graphml)

        if (self.modelType == "pairHMM"):
            modelType = XMLDoc.createElement("hmm:modeltype")
            modelType.appendChild(XMLDoc.createTextNode("pairHMM"))
            graphml.appendChild(modelType)

        for alphabet in self.hmmAlphabets.values():
            alphabet.toDOM(XMLDoc, graphml)
        self.backgroundDistributions.toDOM(XMLDoc, graphml)

        if len(self.transitionFunctions.keys()) != 0:
            transitionFunctionsNode = XMLDoc.createElement(
                "hmm:transitionfunctions")
            for transitionFunction in self.transitionFunctions.values():
                transitionFunction.toDom(XMLDoc, transitionFunctionsNode)
            graphml.appendChild(transitionFunctionsNode)

        graph = XMLDoc.createElement("graph")

        # Compute sums of initial probabilities for renormalization
        initial_sum = 0.0
        for s in self.state.keys():
            initial_sum = initial_sum + self.state[s].initial

        for s in self.state.keys():
            self.state[s].toDOM(XMLDoc, graph, initial_sum)

        # Compute sums of outgoing probabilities for renormalization of transition probabilities
        # NOTE: need dictionaries here
        out_sum = {}
        nr_classes = int(self.hmmClass.high()) - int(self.hmmClass.low()) + 1
        for v in self.G.vertices:
            out_sum[v] = [0.0] * nr_classes

        for cl in range(1):  # XXX Assuming one transition class
            for e in self.G.Edges():
                if self.G.edgeWeights[cl].has_key(e):
                    out_sum[e[0]][cl] = out_sum[
                        e[0]][cl] + self.G.edgeWeights[cl][e]

        for e in self.G.Edges():
            transitions = []
            edge_elem = XMLDoc.createElement("edge")
            edge_elem.setAttribute('source', "%s" % self.state[e[0]].id)
            edge_elem.setAttribute('target', "%s" % self.state[e[1]].id)
            # writeData(XMLDoc, edge_elem, 'prob', self.G.edgeWeights[cl][e] / out_sum[e[0]])
            # XXX Assuming one transition class for cl in range(nr_classes):
            for cl in range(1):
                if self.G.edgeWeights[cl].has_key(e) and out_sum[e[0]][cl]:
                    transitions.append(self.G.edgeWeights[cl][e] /
                                       out_sum[e[0]][cl])
                else:
                    transitions.append(0.0)

            writeData(XMLDoc, edge_elem, 'prob', csvFromList(transitions))

            graph.appendChild(edge_elem)

        graphml.appendChild(graph)

    def AlphabetType(self):
        """ return the type of emission domain 
	    XXX should call the method in HMMAlphabet
	"""
        return int

    def ClassType(self):
        pass

    def DistributionType(self):
        pass

    def getBackgroundDist(self):
        """ Return a pair of two dictionaries: (distribution, its orders):
            a distribution is a list of real values of length N^(order+1).   
        """
        return (self.backgroundDistributions.dist,
                self.backgroundDistributions.order,
                self.backgroundDistributions.code2name)

    def buildMatrices(self):
        """ return [alphabets_code, A, B, pi, state_orders] """
        pi = []
        B = []
        A = []
        nstates = len(self.state.keys())
        orders = {}
        k = 0  # C style index
        for s in self.state.values():  # ordering from XML
            orders[s.index] = k
            k = k + 1

        state_orders = []
        for s in self.state.values():  # a list of indices
            pi.append(s.initial)
            state_orders.append(s.order)  # state order

            size = self.hmmAlphabets[s.alphabet_id].size()
            if (self.modelType != "pairHMM"
                    and size**(s.order + 1) != len(s.emissions)):
                raise ValueError  # exception: inconsistency between ordering and emission

            B.append(s.emissions)  # emission

            # transition probability
            v = s.index
            outprobs = [0.0] * nstates
            for outid in self.G.OutNeighbors(v)[:]:
                myorder = orders[outid]
                outprobs[myorder] = self.G.edgeWeights[0][(v, outid)]
            A.append(outprobs)

        alphabets = self.hmmAlphabets[0].name.values()  # list of alphabets
        return [alphabets, A, B, pi, state_orders]

    def getStateAlphabets(self):
        alphabets = []
        for s in self.state.values():
            alphabets.append(self.hmmAlphabets[s.alphabet_id])
        return alphabets

    def getAlphabets(self):
        return self.hmmAlphabets

    def getLabels(self):
        """ returns list of state labels and unique labels """
        label_list = []
        labels = {}
        for s in self.state.values():  # a list of indices
            label_list.append(self.hmmClass.code2name[s.state_class])
            labels[label_list[-1]] = 0
        return (label_list, labels.keys())

    def getTiedStates(self):
        """ returns list of tied states, entry is None if a state isn't to
            any other state, returns an empty list, if no state is tied """
        tiedstates = []
        isTied = 0

        orders = {}
        k = 0  # C style index
        for s in self.state.values():  # ordering from XML
            orders[s.id] = k
            k = k + 1

        for s in self.state.values():  # a list of indices
            if s.tiedto == '':
                tiedstates.append(-1)
            else:
                tiedstates.append(orders[int(s.tiedto)])
                isTied = 1

        if not isTied:
            tiedstates = []
        return tiedstates

    def getStateDurations(self):
        """ returns a list of the minimal number of times a state is evaluated
            before the HMM changes to another state."""

        durations = []
        hasduration = 0

        for s in self.state.values():  # a list of indices
            if s.duration == 0:
                durations.append(1)
            else:
                durations.append(s.duration)
                hasduration = 1

        if not hasduration:
            durations = []
        return durations

    def OpenXML(self, fileName_file_or_dom):
        if (not isinstance(fileName_file_or_dom, xml.dom.minidom.Document)):
            dom = xml.dom.minidom.parse(fileName_file_or_dom)
        else:
            dom = fileName_file_or_dom
        if dom.documentElement.tagName == "ghmm":
            sys.stderr.write("Do not support ghmm format")
            raise FormatError
            dom.unlink()
            #self.DocumentName = "ghmm"
            #ghmmdom  = dom
            #ghmml = GHMMXML()
            #dom   = ghmml.GraphMLDOM(ghmmdom)
            #ghmmdom.unlink()
        else:
            assert dom.documentElement.tagName == "graphml"
            self.fromDOM(dom)

# dom.unlink()

    def WriteXML(self, fileName):
        try:
            self.modelCheck()  # raise exceptions here
            doc = xml.dom.minidom.Document()
            self.toDOM(doc, doc)
            file = open(fileName, 'w')
            # xml.dom.ext.PrettyPrint(doc, file)
            file.write(toprettyxml(doc))  # problem with white spaces
            file.close()
            doc.unlink()
        except HMMEdError:
            print "HMMEdError: No file was written due to errors in the model."

    def WriteGHMM(self, fileName):
        self.modelCheck()  # raise exceptions here
        doc = xml.dom.minidom.Document()
        ghmm = doc.createElement("ghmm")
        doc.appendChild(ghmm)
        self.toGHMM(doc, ghmm)
        file = open(fileName, 'w')
        # xml.dom.ext.PrettyPrint(doc, file)
        file.write(toprettyxml(doc))  # problem with white spaces
        file.close()
        doc.unlink()

    def SaveAs(self, fileName):
        if (self.DocumentName == "graphml"):
            self.WriteXML(fileName)
        else:
            self.WriteGHMM(fileName)

    def SaveAsGHMM(self, fileName):
        self.WriteGHMM(fileName)
Beispiel #4
0
class HMM:
    def __init__(self, XMLFileName=None):

        self.G = Graph()
        self.G.directed = 1
        self.G.euclidian = 0
        self.Pi = {}
        self.id2index = {}

        self.hmmAlphabet = DiscreteHMMAlphabet()
        self.hmmClass = HMMClass()

        self.editableAttr = {}
        self.editableAttr['HMM'] = ['desc']
        self.desc = ValidatingString()

        self.state = {}

        self.backgroundDistributions = NamedDistributions(self)

        if XMLFileName != None:
            self.OpenXML(XMLFileName)

    def AddState(self, v):
        state = HMMState(v, self)
        self.state[v] = state

    def DeleteState(self, v):
        del self.id2index[self.state[v].id]
        del self.state[v]

    def fromDOM(self, XMLNode):

        self.hmmClass.fromDOM(
            XMLNode.getElementsByTagName("hmm:class")[0])  # One class!
        self.hmmAlphabet.fromDOM(
            XMLNode.getElementsByTagName("hmm:alphabet")[0])  # One alphabet!
        self.backgroundDistributions.fromDOM(XMLNode)

        nodes = XMLNode.getElementsByTagName("node")
        for n in nodes:
            state = HMMState(-1, self)
            state.fromDOM(n)
            i = state.index
            self.state[i] = state
            self.id2index[state.id] = i

            self.G.embedding[i] = state.pos
            self.G.labeling[i] = "%s\n%s" % (state.id, state.label
                                             )  # XXX Hack Aaaargh!

        edges = XMLNode.getElementsByTagName("edge")
        for edge in edges:
            i = self.id2index[edge.attributes['source'].nodeValue]
            j = self.id2index[edge.attributes['target'].nodeValue]

            datas = edge.getElementsByTagName("data")
            for data in datas:
                dataKey = data.attributes['key'].nodeValue
                dataValue = data.firstChild.nodeValue

            if dataKey == 'prob':
                p = float(dataValue)

            self.G.AddEdge(i, j)
            self.G.edgeWeights[0][(i, j)] = p

    def toDOM(self, XMLDoc, XMLNode):
        graphml = XMLDoc.createElement("graphml")
        XMLNode.appendChild(graphml)

        self.hmmClass.toDOM(XMLDoc, graphml)
        self.hmmAlphabet.toDOM(XMLDoc, graphml)
        self.backgroundDistributions.toDOM(XMLDoc, graphml)

        graph = XMLDoc.createElement("graph")

        # Compute sums of initial probabilities for renormalization
        initial_sum = 0.0
        for s in self.state:
            initial_sum = initial_sum + self.state[s].initial

        for s in self.state:
            self.state[s].toDOM(XMLDoc, graph, initial_sum)

            # Compute sums of outgoing probabilities for renormalization of transition probabilities
            # NOTE: need dictionaries here
        out_sum = {}
        for v in self.G.vertices:
            out_sum[v] = 0.0

        for e in self.G.Edges():
            out_sum[e[0]] = out_sum[e[0]] + self.G.edgeWeights[0][e]

        for e in self.G.Edges():
            edge_elem = XMLDoc.createElement("edge")
            edge_elem.setAttribute('source', "%s" % self.state[e[0]].id)
            edge_elem.setAttribute('target', "%s" % self.state[e[1]].id)
            writeData(XMLDoc, edge_elem, 'prob',
                      self.G.edgeWeights[0][e] / out_sum[e[0]])
            graph.appendChild(edge_elem)

        graphml.appendChild(graph)

    def OpenXML(self, fileName):
        dom = xml.dom.minidom.parse(fileName)
        assert dom.documentElement.tagName == "graphml"
        self.fromDOM(dom)
        dom.unlink()

    def WriteXML(self, fileName):
        doc = xml.dom.minidom.Document()
        self.toDOM(doc, doc)
        file = open(fileName, 'w')
        file.write(doc.toprettyxml())
        file.close()
        doc.unlink()

    def SaveAs(self, fileName):
        self.WriteXML(fileName)