Python Digraph.clear Examples

Programming Language: Python

Namespace/Package Name: graphviz

Class/Type: Digraph

Method/Function: clear

Examples at hotexamples.com: 24

The `clear` function in `graphviz.Digraph` is a method used to remove all nodes and edges from the graph. It clears the graph, making it empty and ready to be populated with new nodes and edges.

Python Digraph.clear - 24 examples found. These are the top rated real world Python examples of graphviz.Digraph.clear extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Digraph(30)

attr(30)

format(30)

edges(30)

edge(30)

engine(24)

clear(24)

filename(6)

graph_attr(6)

edge_attr(6)

_repr_svg_(5)

copy(4)

directory(3)

add_edge(2)

add_node(2)

append(2)

label(1)

__setattr__(1)

fillcolor(1)

aid(1)

_format(1)

__init__(1)

browser(1)

body(1)

background(1)

quote(1)

Example #1

Show file

def t1() :
  g=Digraph()
  g.node('1','a')
  g.node('2','b')
  g.edge('1','2')
  g.node('3','c')
  g.edge('1','3')
  g.view()
  g.clear()

Example #2

Show file

 def show(self, file_name):
     d = Digraph(filename=file_name, directory="./pdf_data")
     d.clear()
     # 将所有节点 画出
     for node in self._nodes:
         d.node(name=node.name, label=node.name)
     for node in self._nodes:
         father_name = node.name
         for son, weight in node.son_weight.items():
             son_name = son.name
             d.edge(tail_name=father_name,
                    head_name=son_name,
                    label=str(weight))
     d.view()

Example #3

Show file

File: GP.py Project: arg1998/GeneticProgramming

    def render_current_generation(self, folder_name: str):
        """
        this function render the current generation in PNG format with some extra information
        :param folder_name:
        :return:
        """
        g = Digraph()
        g.format = 'png'
        g.directory = folder_name + "/Generation " + str(self.generation)
        counter = 1

        for tree in self.population:
            g.clear()
            info = '"00_comment_00" [label="G : {0}\nF : {1}\nD : {2}\nW : {3}\nn : {4}" , shape="box" , color="white"]'.format(
                self.generation, tree.fitness, tree.depth, tree.width,
                tree.number_of_nodes_in_tree)
            g.body.append(info)
            g.body.append(tree.print_graph())
            g.render("Individual {0}".format(counter))
            counter += 1

Example #4

Show file

File: automata.py Project: davp00/automatas

class Automata(Grafo):
    def __init__(self):
        super().__init__()
        self.estados = []
        self.d = Digraph('Automata',
                         filename='out/process.gv',
                         engine='sfdp',
                         format="png")
        self.d.attr(rankdir='LR', size='8,5')
        self.d.render()

    def nuevo_estado(self, valor):
        estado = Estado(valor)
        self.estados.append(estado)

    def nueva_relacion(self, est1, est2, valor):
        estado1 = self.buscar(est1, self.estados)
        estado2 = self.buscar(est2, self.estados)

        if estado1 is not None and estado2 is not None:
            estado1.relacionar(valor, estado2)
        else:
            print("ERROR: ALGUNO DE LO ESTADOS NO EXISTE")

    def show(self):
        self.d.clear()
        self.d.attr(rankdir='LR', size='8,5')
        self.d.attr('node', shape='doublecircle')
        #self.estados[0].set_final(True)
        '''for estado in self.estados:
            if estado.final is True:
                self.d.node(estado.valor)'''

        self.d.attr('node', shape='circle')
        for estado in self.estados:
            for arista in estado.aristas:
                self.d.edge(estado.valor,
                            arista.vertice.valor,
                            label=arista.valor)

        self.d.render()

Example #5

Show file

File: heap.py Project: xyycha/data-struct

 def show(self, file_name=None):
     d = Digraph(filename=file_name, directory="./pdf_data")
     d.clear()
     node_name = []
     for node in self.heap:
         if node is None:
             node_name.append(None)
             continue
         name = str(id(node))
         d.node(name=name, label=str(node.value))
         node_name.append(name)
     max_father_index = self.size // 2
     for father_index in range(1, max_father_index + 1):
         left_son_index = father_index * 2
         right_son_index = father_index * 2 + 1
         if left_son_index <= self.size:
             d.edge(head_name=node_name[left_son_index],
                    tail_name=node_name[father_index])
         if right_son_index <= self.size:
             d.edge(head_name=node_name[right_son_index],
                    tail_name=node_name[father_index])
     d.view()

Example #6

Show file

File: automato.py Project: MrTrotta2010/Automatos-Plus

class Automato:
    def __init__(self):  # Construtor

        self.tipo = None
        self.descricao = "Autômato sem descrição"
        self.alfabeto = []
        self.alfabetoPilha = []
        self.transicoes = {}
        self.estadosAtivos = {}
        self.estadoInicial = None
        self.estadosFinais = []
        self.grafo = Digraph(format='svg')

    # Recebe o edereço da entrada, constrói o grafo do autômato e salva com o nome apropriado
    def montaGrafo(self, arquivo):

        self.arquivo = arquivo

        self.grafo.attr(rankdir='LR')
        self.grafo.attr('edge', arrowsize="0.3")

        # Um nó invisível, utilizado para criar a seta do estado inicial
        self.grafo.node('',
                        shape='plaintext',
                        fixedsize='true',
                        height='0.1',
                        width='0.1')

        for no in self.transicoes.keys(
        ):  # Para cada estado, cria um nó no grafo

            if no in self.estadosFinais:  # Caso o estado seja final, círculo duplo

                self.grafo.attr('node', shape='doublecircle')

            else:

                self.grafo.attr('node', shape='circle')

            self.grafo.node(no)

        arestas = {}

        for no in self.transicoes.keys(
        ):  # Para cada transição, cria uma aresta no grafo

            if no == self.estadoInicial:  # Cria a seta do estado inicial

                self.grafo.edge('', no, arrowsize='0.5')

            for transicao in self.transicoes[no]:

                for no2 in self.transicoes[no][transicao]:

                    if (no + ' ' + no2) in arestas.keys():
                        arestas[no + ' ' +
                                no2] = arestas[no + ' ' +
                                               no2] + ", " + transicao

                    else:
                        arestas[no + ' ' + no2] = transicao

        for aresta in arestas.keys():

            self.grafo.edge(aresta.split(' ')[0],
                            aresta.split(' ')[1],
                            label=arestas[aresta])

        # Salva o grafo num arquivo svg, pasta "Grafos"
        self.grafo.render(filename=(arquivo.replace("Entradas", "Grafos")),
                          format='svg',
                          cleanup=True)

    # Devolve um grafo ressaltando os estados ativos de palavra ao processar indice
    def montaGrafoPassoAPasso(self, palavra, indice):

        grafo = Digraph()
        grafo.attr(rankdir='LR')
        # automato.grafo.node(automato.estadoInicial)

        grafo.node('',
                   shape='plaintext',
                   fixedsize='true',
                   height='0.1',
                   width='0.1')

        # Para cada no, decide se ressalta ou não
        for no in self.transicoes.keys():

            if no in self.estadosFinais:
                grafo.attr('node', shape='doublecircle')

            else:
                grafo.attr('node', shape='circle')

            # Caso antes do processamento da palavra
            if indice == -1:

                # O estado inicial se ativa
                if no == self.estadoInicial:
                    grafo.attr('node', color='red')

                else:
                    grafo.attr('node', color='black')

            # Caso contrário
            else:

                # Se o nó estiver aivo, é colorido de vermelho
                if no in self.estadosAtivos[indice]:
                    grafo.attr('node', color='red')

                else:
                    grafo.attr('node', color='black')

            grafo.node(no)

        # Dicionário auxiliar para evitar transições desnecessárias
        # i.e., ao caso haja mais de uma transição saindo do nó a e levando ao no b, elas são
        # condensadas na mesma transição
        arestas = {}

        # Para cada transição, condensa as transições desnecessárias
        for no in self.transicoes.keys():

            for transicao in self.transicoes[no]:

                for no2 in self.transicoes[no][transicao]:

                    if (no + ' ' + no2) in arestas.keys():
                        arestas[no + ' ' + no2].append(transicao)

                    else:
                        arestas[no + ' ' + no2] = [transicao]

        # Para cada transição, decide se ressalta ou não
        for aresta in arestas.keys():

            no = aresta.split(' ')
            no2 = no[1]
            no = no[0]

            if indice > 0 and palavra[indice] in arestas[
                    aresta] and no2 in self.estadosAtivos[
                        indice] and no in self.estadosAtivos[indice - 1]:
                grafo.attr('edge', color='red')

            elif indice == 0 and palavra[indice] in arestas[
                    aresta] and no == self.estadoInicial:
                grafo.attr('edge', color='red')

            else:
                grafo.attr('edge', color='black')

            grafo.edge(no,
                       no2,
                       label=(((str(arestas[aresta])).replace(
                           "'", '')).replace(']', '')).replace('[', ''))

        return grafo

    def destroiAutomato(self):  # Reseta todos os dados do autômato

        self.arquivo = None
        self.tipo = None
        self.descricao = "Autômato sem descrição"
        self.alfabeto.clear()
        self.alfabetoPilha.clear()
        self.transicoes.clear()
        self.estadosAtivos.clear()
        self.estadoInicial = None
        self.estadosFinais.clear()
        self.grafo.clear(keep_attrs=False)

    # Recebe uma palavra e verifica se e quais símbolos não são aceitos pelo alfabeto do autômato
    def verificaAlfabeto(self, palavra):

        caracteres = []  # Armazena os símbolos não aceitos

        for c in palavra:

            if c not in self.alfabeto and c not in caracteres:

                caracteres.append(c)

        if len(caracteres) > 0:

            return "Palavra não aceita! - Os símbolos " + str(caracteres)\
            + " não estão no alfabeto\n<< Alfabeto: " + str(self.alfabeto)

        return True

    # Recebe uma palavra a ser testada e um dicionário onde poderá ser salvo o passo-a-passo
    def testaPalavra(self, palavra, passoAPasso):

        if palavra == '':  # Caso a palavra seja vazia

            if self.estadoInicial in self.estadosFinais:
                retorno = "Palavra aceita!"

            else:
                retorno = "Palavra não aceita!"

        else:

            # Verifica se a palavra é aceita pelo alfabeto do autômato
            retorno = self.verificaAlfabeto(palavra)

        if retorno == True:  # Caso seja, testa as transições

            # Índices da palavra
            caractere = 0
            caractereAnterior = 0

            # A busca começa no estado inicial e tenta alcançar o estado final
            estadoAtual = self.estadoInicial

            # A pilha armazena duplas do tipo (estado, indice da palavra a ser processado no estado)
            fila = Queue()
            fila.put([estadoAtual, caractere])

            # Caso o usuário queira exibir o passo-a-passo, limpa o dicionário de estados,
            # pois ele será diferente para cada palavra
            if passoAPasso:

                self.estadosAtivos.clear()

            while True:

                # Se a pilha não estiver vazia, desempílha uma dupla e processa a transição a partir do estado
                if not fila.empty():

                    aux = fila.get()
                    estadoAtual = aux[0]  # Novo estado atual
                    caractere = aux[
                        1]  # Indice da palavra que armazena o caractere a ser processado

                # Se a pilha estiver vazia, significa que, ao final do processamento,
                # nenhum estado final foi atingido
                else:

                    retorno = "Palavra não aceita!"

                    break

                # Se o índice a ser processado for -1, significa que o autômato chegou
                # ao fim de um dos caminhos de processamento da palavra
                if caractere == -1:

                    # Dessa forma, se o estado atual for final, a palavra deve ser aceita
                    if estadoAtual in self.estadosFinais:

                        retorno = "Palavra aceita!"

                        break

                # Do contrário, processa a transição a prtir do estado atual
                else:

                    # Caso a transição exista na tabela de transições
                    if palavra[caractere] in self.transicoes[estadoAtual].keys(
                    ):

                        # O passo-a-passo funciona armazenando em um dicionário todos os estados
                        # ativos ao processar cada símbolo da palavra
                        if passoAPasso:

                            if caractere not in self.estadosAtivos.keys():

                                self.estadosAtivos[caractere] = []

                        # Para todos os estados atingidos a partir da transição
                        for estado in self.transicoes[estadoAtual][
                                palavra[caractere]]:

                            # Caso o caminho de processamento não tenha terminado, empilha o estado
                            # e o próximo índice da palavra a ser processado
                            if caractere + 1 != len(palavra):
                                fila.put([estado, caractere + 1])

                            # Caso o índice da palavra seja o último, fim de um caminho de processamento
                            else:
                                fila.put([estado, -1])

                            # Armazena o estado no dicionário de passo-a-passo
                            if passoAPasso and estado not in self.estadosAtivos[
                                    caractere]:
                                self.estadosAtivos[caractere].append(estado)

        return retorno

    def testaPalavraPilha(self, palavra, passoAPasso):

        if palavra == '':  # Caso a palavra seja vazia

            if self.estadoInicial in self.estadosFinais:
                retorno = "Palavra aceita!"

            else:
                retorno = "Palavra não aceita!"

        else:

            # Verifica se a palavra é aceita pelo alfabeto do autômato
            retorno = self.verificaAlfabeto(palavra)

        if retorno == True:  # Caso seja, testa as transições

            # Índices da palavra
            caractere = 0
            caractereAnterior = 0

            # A busca começa no estado inicial e tenta alcançar o estado final
            estadoAtual = self.estadoInicial

            # A fila armazena duplas do tipo (estado, indice da palavra a ser processado no estado)
            fila = [[estadoAtual, caractere]]

            # A pilha do autômato começa vazia
            pilha = []

            # Variável usada como tipo 'coringa'
            ANYTHING = Any()

            # Caso o usuário queira exibir o passo-a-passo, limpa o dicionário de estados,
            # pois ele será diferente para cada palavra
            if passoAPasso:

                self.estadosAtivos.clear()

            while True:

                # Se a fila não estiver vazia, desempílha uma dupla e processa a transição a partir do estado
                if len(fila) > 0:

                    aux = fila.pop(0)
                    estadoAtual = aux[0]  # Novo estado atual
                    caractere = aux[
                        1]  # Indice da palavra que armazena o caractere a ser processado

                # Se a pilha estiver vazia, significa que, ao final do processamento,
                # nenhum estado final foi atingido
                else:

                    retorno = "Palavra não aceita!"

                    break

                # Se o índice a ser processado for -1, significa que o autômato chegou
                # ao fim de um dos caminhos de processamento da palavra
                if caractere == -1:

                    # Dessa forma, se o estado atual for final, a palavra deve ser aceita
                    if estadoAtual in self.estadosFinais:

                        retorno = "Palavra aceita!"

                        break

                # Do contrário, processa a transição a prtir do estado atual
                else:

                    # Processa as transições vazias
                    self.transicoesVazias(fila, estadoAtual, caractere)
                    print(fila)
                    #input()

                    # Processa as transições interrogativas
                    self.transicoesInterrogativas(fila, estadoAtual, caractere,
                                                  palavra, pilha)

                    # Processa a transição
                    for transicao in self.transicoes[estadoAtual].keys():

                        # Caso a transição exista na tabela de transições
                        if transicao[0] == palavra[caractere]:

                            # O passo-a-passo funciona armazenando em um dicionário todos os estados
                            # ativos ao processar cada símbolo da palavra
                            if passoAPasso:

                                if caractere not in self.estadosAtivos.keys():

                                    self.estadosAtivos[caractere] = []

                            # Para todos os estados atingidos a partir da transição
                            for estado in self.transicoes[estadoAtual][
                                    transicao]:

                                print(estadoAtual, estado)

                                desempilha = transicao[1]
                                empilha = transicao[2]

                                try:
                                    if desempilha != '&' and desempilha != '?':
                                        pilha.reverse()
                                        pilha.remove(desempilha)
                                        pilha.reverse()

                                except:
                                    pass

                                else:
                                    # Caso o caminho de processamento não tenha terminado, empilha o estado
                                    # e o próximo índice da palavra a ser processado
                                    if caractere + 1 != len(palavra):
                                        fila.append([estado, caractere + 1])

                                    # Caso o índice da palavra seja o último, fim de um caminho de processamento
                                    else:
                                        fila.append([estado, -1])

                                    # Armazena o estado no dicionário de passo-a-passo
                                    if passoAPasso and estado not in self.estadosAtivos[
                                            caractere]:
                                        self.estadosAtivos[caractere].append(
                                            estado)

                                    if empilha != '&' and empilha != '?':
                                        pilha.append(empilha)

        return retorno

    def transicoesVazias(self, fila, estado, caractere):

        filaTemp = [estado]
        transVazia = ('&', '&', '&')

        while len(filaTemp) > 0:

            atual = filaTemp.pop(0)

            if transVazia in self.transicoes[atual]:

                for e in self.transicoes[atual][transVazia]:

                    filaTemp.append(e)
                    fila.append([e, caractere])

    def transicoesInterrogativas(self, fila, estado, caractere, palavra,
                                 pilha):

        for transicao in self.transicoes[estado]:

            if transicao[0] == '?':

                if transicao[1] == '?':

                    if caractere == len(palavra) - 1 and len(pilha) == 0:
                        for e in self.transicoes[estado][transicao]:
                            fila.append([e, -1])

                else:
                    if caractere == len(palavra) - 1:
                        for e in self.transicoes[estado][transicao]:
                            fila.append([e, -1])

            elif transicao[1] == '?':

                for e in self.transicoes[estado][transicao]:

                    if caractere == len(palavra) - 1:
                        fila.append([e, -1])

                    else:
                        fila.append([e, caractere + 1])

    # Imprime o dicionário de passo-a-passo
    def imprimePassoAPasso(self, palavra):

        print("<< Estado inicial: " +
              (self.estadoInicial.replace('*', '')).replace('+', ''))

        for indice in self.estadosAtivos.keys():

            print("<< Simbolo: " + palavra[indice] + " - Estados ativos: " +
                  str(self.estadosAtivos[indice]))

Example #7

Show file

class ros_msdn:
    def __init__(self):
        # To set the model name automatically
        args = parser.parse_args()
        print args
        args = get_model_name(args)
        print 'Model name: {}'.format(args.model_name)
        self.check = True

        # To set the random seed
        random.seed(args.seed)
        torch.manual_seed(args.seed + 1)
        torch.cuda.manual_seed(args.seed + 2)

        print("Loading training params"),
        self.train_set = visual_genome('normal', 'train')
        print("Done.")

        self.train_loader = torch.utils.data.DataLoader(self.train_set,
                                                        batch_size=1,
                                                        shuffle=True,
                                                        num_workers=8,
                                                        pin_memory=True)
        end = time.time()
        # Model declaration
        self.net = Hierarchical_Descriptive_Model(
            nhidden=args.mps_feature_len,
            n_object_cats=self.train_set.num_object_classes,
            n_predicate_cats=self.train_set.num_predicate_classes,
            n_vocab=self.train_set.voc_size,
            voc_sign=self.train_set.voc_sign,
            max_word_length=self.train_set.max_size,
            MPS_iter=args.MPS_iter,
            use_language_loss=not args.disable_language_model,
            object_loss_weight=self.train_set.inverse_weight_object,
            predicate_loss_weight=self.train_set.inverse_weight_predicate,
            dropout=args.dropout,
            use_kmeans_anchors=not args.use_normal_anchors,
            gate_width=args.gate_width,
            nhidden_caption=args.nhidden_caption,
            nembedding=args.nembedding,
            rnn_type=args.rnn_type,
            rnn_droptout=args.caption_use_dropout,
            rnn_bias=args.caption_use_bias,
            use_region_reg=args.region_bbox_reg,
            use_kernel=args.use_kernel_function)

        params = list(self.net.parameters())
        for param in params:
            print param.size()
        print self.net

        # To group up the features
        vgg_features_fix, vgg_features_var, rpn_features, hdn_features, language_features = group_features(
            self.net)

        # Setting the state of the training model
        self.net.cuda()
        self.net.train()
        network.set_trainable(self.net, False)

        # loading model for inference
        print 'Resume training from: {}'.format(args.resume_model)
        if len(args.resume_model) == 0:
            raise Exception('[resume_model] not specified')
        network.load_net(args.resume_model, self.net)
        args.train_all = True
        optimizer_select = 2

        optimizer = network.get_optimizer(args.lr, optimizer_select, args,
                                          vgg_features_var, rpn_features,
                                          hdn_features, language_features)

        target_net = self.net
        self.net.eval()
        print('Model Loading time: ', time.time() - end)

        # Set topics
        self.bridge = CvBridge()
        self.dot = Digraph(comment='warehouse', format='svg')
        self.regions_dot = Digraph(comment='regions', format='svg')

        self.image_sub = message_filters.Subscriber(
            '/turtlebot2i/camera/rgb/raw_image', Image)
        self.image_depth_sub = message_filters.Subscriber(
            '/turtlebot2i/camera/depth/raw_image', Image)
        self.ts = message_filters.TimeSynchronizer(
            [self.image_sub, self.image_depth_sub], queue_size=1)
        print('calling callback')
        self.ts.registerCallback(self.callback)
        self.scenegraph_pub = rospy.Publisher('/turtlebot2i/scene_graph',
                                              SceneGraph,
                                              queue_size=10)

    def callback(self, image, depth_image):

        try:
            print 'inside callback '
            farClippingPlane = 3.5
            nearClippingPlane = 0.0099999
            cv_depth_image = self.bridge.imgmsg_to_cv2(depth_image,
                                                       "passthrough")
            cv_depth_image = cv2.flip(cv_depth_image, 0)
            cv_depth_image = nearClippingPlane + (
                cv_depth_image * (farClippingPlane - nearClippingPlane))
            cv_image = self.bridge.imgmsg_to_cv2(image, "rgb8")

            predicates_frequency = {
                'behind': 1,
                'on': 1,
                'has': 1000000,
                'in_front_of': 1,
                'next_to': 2,
                'beside': 2,
                'with': 1,
                'attach_to': 1,
                'connected_to': 1,
                'charges': 1,
                'in_hands_of': 1
            }
            all_classes = {
                'slidingdoor': 0,
                'wall': 0,
                'shelf': 0,
                'robot': 0,
                'human': 0,
                'conveyorbelt': 0,
                'dockstation': 0,
                'product': 0,
                'floor': 0
            }
            class_names = [
                'floor', 'wall', 'shelf', 'robot', 'human', 'conveyorbelt',
                'dockstation', 'product', 'slidingdoor'
            ]
            allowed_self_relationship = {
                'slidingdoor': [],
                'wall': ['beside', 'attach_to'],
                'shelf': ['beside', 'next_to'],
                'robot': [],
                'human': ['in_front_of', 'behind'],
                'conveyorbelt': [],
                'dockstation': [],
                'product': ['beside', 'next_to'],
                'floor': []
            }

            print("Describing.....")
            if self.check == False:
                self.dot.clear()
                self.regions_dot.clear()
            im, im_info = self.train_set.get_image_info(cv_image)

            end = time.time()

            region_caption, region_list, region_pred_boxes, region_logprobs, class_pred_boxes, class_scores,\
             class_inds, subject_list, object_list, predicate_list, predicate_inds, predicate_scores = self.net.describe(im.unsqueeze(0), [im_info], top_N=[50])

            class_idx = []
            for class_ in all_classes.keys():
                class_idx.append(self.train_set.word2idx[class_])

            predicate_idx = []
            for predicate in predicates_frequency.keys():
                predicate_idx.append(self.train_set.word2idx[predicate])

            classes_name = []

            predicate_scores = predicate_scores.squeeze()[predicate_list]
            subject_scores = class_scores[subject_list].squeeze()
            object_scores = class_scores[object_list].squeeze()
            relationship_scores = predicate_scores * (subject_scores +
                                                      object_scores) / 2.0

            keep_indexes = np.where((subject_scores > 0.7)
                                    & (object_scores > 0.7)
                                    & (predicate_scores > 0.5))[0]
            keep_classes = np.where(class_scores > 0.7)[0]
            class_name_score = dict()
            for i in keep_classes:
                class_name = self.train_set._object_classes[class_inds[i]]
                score = class_scores[i]
                all_classes[class_name] += 1
                if class_name != 'floor':
                    classes_name.append(
                        str(class_name + '#' + str(all_classes[class_name])))
                    class_name_score[str(class_name + '#' +
                                         str(all_classes[class_name]))] = score
                else:
                    classes_name.append(str(class_name))
                    class_name_score[str(class_name)] = score

            #_ = draw_bbox_label_msdn(cv_image, class_pred_boxes[keep_classes], class_inds[keep_classes], class_scores[keep_classes])

            classes_name = np.array(classes_name)
            subject_scores = subject_scores[keep_indexes]
            object_scores = object_scores[keep_indexes]
            subject_list = subject_list[keep_indexes]
            object_list = object_list[keep_indexes]
            predicate_list = predicate_list[keep_indexes]
            relationship_scores = relationship_scores[keep_indexes]
            predicate_scores = predicate_scores[keep_indexes]

            # subject_inds = class_inds[subject_list]
            # object_inds = class_inds[object_list]

            subjects_name = classes_name[subject_list]
            objects_name = classes_name[object_list]
            predicate_inds = predicate_inds.squeeze()[predicate_list]

            #print (class_inds[subject_list[keep_indexes]])
            relationship_dict = dict()

            last_subject = ''
            last_predicate = ''
            temp_score_list = []
            object_ids = []

            for i in range(len(subjects_name)):

                predicate = self.train_set._predicate_classes[
                    predicate_inds[i]]
                subject = subjects_name[i]
                _object = objects_name[i]

                if subject != _object:
                    if (subject == 'floor' and predicate != 'has') or (
                            _object == 'floor' and predicate != 'on'
                    ) or (subject[:-2] == 'wall' and predicate == 'in_front_of'
                          and _object[:-2] == 'dockstation'):
                        print 'unwanted relationship', subject, '-> ', predicate, ' -> ', _object

                    elif subject == 'floor' and predicate == 'has':
                        if subject not in relationship_dict.keys():
                            relationship_dict[subject] = dict()
                            relationship_dict[subject][predicate] = dict()
                            relationship_dict[subject][predicate][
                                _object] = predicate_scores[i]

                        elif predicate not in relationship_dict[subject].keys(
                        ):
                            relationship_dict[subject][predicate] = dict()
                            relationship_dict[subject][predicate][
                                _object] = predicate_scores[i]

                        elif _object not in relationship_dict[subject][
                                predicate].keys():
                            relationship_dict[subject][predicate][
                                _object] = predicate_scores[i]

                        else:
                            relationship_dict[subject][predicate][
                                _object] = predicate_scores[i]

                    elif _object in relationship_dict.keys() and predicate in relationship_dict[_object].keys()\
                             and subject in relationship_dict[_object][predicate].keys() and  predicate_scores[i] > relationship_dict[_object][predicate][subject]:

                        if subject not in relationship_dict.keys():
                            relationship_dict[subject] = dict()
                            relationship_dict[subject][predicate] = dict()
                            relationship_dict[subject][predicate][
                                _object] = predicate_scores[i]
                        elif predicate not in relationship_dict[subject].keys(
                        ):
                            relationship_dict[subject][predicate] = dict()
                            relationship_dict[subject][predicate][
                                _object] = predicate_scores[i]

                        elif _object not in relationship_dict[subject][
                                predicate].keys():
                            relationship_dict[subject][predicate][
                                _object] = predicate_scores[i]

                        del relationship_dict[_object][predicate][subject]
                        if len(relationship_dict[_object][predicate]) == 0:
                            del relationship_dict[_object][predicate]
                            if len(relationship_dict[_object]) == 0:
                                del relationship_dict[_object]

                    elif subject == last_subject:
                        if predicate == last_predicate:
                            temp_score_list.append(predicate_scores[i])
                            object_ids.append(i)
                        else:
                            if len(temp_score_list) > 1:
                                sorted_scores = np.array(
                                    temp_score_list).argsort()[::-1]
                                indx = np.array(object_ids)[sorted_scores][0]
                            else:
                                indx = object_ids[-1]
                            #print 'Saving relationship 1', subject, '-> ' , last_predicate, ' -> ', objects_name[indx]

                            relationship_dict[subject][last_predicate][
                                objects_name[indx]] = predicate_scores[indx]

                            relationship_dict[subject][predicate] = dict()
                            temp_score_list = [predicate_scores[i]]

                            if subject[:
                                       -2] == _object[:
                                                      -2] and predicate not in allowed_self_relationship[
                                                          subject[:-2]]:
                                object_ids = []
                                last_predicate = ''
                                last_subject = ''
                            else:
                                object_ids = [i]
                                last_predicate = predicate
                    else:
                        relationship_dict[subject] = dict()
                        relationship_dict[subject][predicate] = dict()
                        if last_subject != '':
                            if len(temp_score_list) > 1:
                                sorted_scores = np.array(
                                    temp_score_list).argsort()[::-1]
                                indx = np.array(object_ids)[sorted_scores][0]
                            else:
                                indx = object_ids[-1]

                            #print 'Saving relationship 2', last_subject, '-> ' , last_predicate, ' -> ', objects_name[indx]

                            relationship_dict[last_subject][last_predicate][
                                objects_name[indx]] = predicate_scores[indx]

                        if subject[:
                                   -2] == _object[:
                                                  -2] and predicate not in allowed_self_relationship[
                                                      subject[:-2]]:
                            object_ids = []
                            last_predicate = ''
                            last_subject = ''
                            temp_score_list = []
                        else:
                            last_subject = subject
                            last_predicate = predicate
                            temp_score_list = [predicate_scores[i]]
                            object_ids = [i]

            if last_subject != '':
                if len(temp_score_list) > 1:
                    sorted_scores = np.array(temp_score_list).argsort()[::-1]
                    indx = np.array(object_ids)[sorted_scores][0]
                else:
                    indx = object_ids[-1]
                #print 'Saving relationship 3', last_subject, '-> ' , last_predicate, ' -> ', objects_name[indx]

                relationship_dict[last_subject][last_predicate][
                    objects_name[indx]] = predicate_scores[indx]

            print('Time taken to decribe: ', time.time() - end)

            self.dot.node_attr['shape'] = 'record'
            robot_label = "turtlebot2i"

            #self.dot.node('robot', label=robot_label)
            self.dot.node('warehouse', label='warehouse')
            floor_label = "{floor|Score: 0.7}"
            if 'floor' in class_name_score.keys():
                floor_label = '%s|Score: %.2f' % ('floor',
                                                  class_name_score['floor'])

            self.dot.node('floor', label=floor_label)
            self.dot.edge('warehouse', 'floor')

            list_nodes = ['warehouse', 'floor']

            for subject in relationship_dict.keys():
                for predicate in relationship_dict[subject].keys():
                    for _object in relationship_dict[subject][predicate].keys(
                    ):
                        if subject not in list_nodes:
                            node_label = '%s|Score: %.2f' % (
                                subject, class_name_score[subject])
                            self.dot.node(subject, label=node_label)
                            list_nodes.append(subject)
                        if _object not in list_nodes:
                            node_label = '%s|Score: %.2f' % (
                                _object, class_name_score[_object])
                            self.dot.node(_object, label=node_label)
                            list_nodes.append(_object)
                        self.dot.edge(subject, _object, label=predicate)

                        print 'Subject : ', subject, ' Predicate: ', predicate, ' Object: ', _object, ' Score: ', relationship_dict[
                            subject][predicate][_object]
            print 'END PRINTING Relationships...'

            sorted_regions = region_logprobs.argsort()[::-1]
            regions_dict = dict()
            regions_prob_dict = dict()
            sorted_region_keys = []

            for i in sorted_regions:
                if region_logprobs[i] > -0.5:
                    region_idx = region_caption[i]
                    common = list(frozenset(region_idx) & frozenset(class_idx))
                    #print 'Common classes: ', common
                    if len(common) == 2:
                        class_1 = self.train_set.idx2word[common[0]]
                        class_2 = self.train_set.idx2word[common[1]]
                        if all_classes[class_1] != 0 and all_classes[
                                class_2] != 0:

                            key = frozenset([class_1, class_2])
                            #print key
                            if key not in regions_prob_dict.keys():
                                regions_prob_dict[key] = region_logprobs[i]
                                regions_dict[key] = region_caption[i]
                                sorted_region_keys.append(key)

                            elif regions_prob_dict[key] < region_logprobs[i]:
                                regions_prob_dict[key] = region_logprobs[i]
                                regions_dict[key] = region_caption[i]
                                sorted_region_keys.append(key)

                    elif len(common) == 1:
                        class_1 = self.train_set.idx2word[common[0]]
                        if all_classes[class_1] != 0:
                            key = frozenset([class_1])
                            #print key
                            if key not in regions_prob_dict.keys():
                                regions_prob_dict[key] = region_logprobs[i]
                                regions_dict[key] = region_caption[i]
                                sorted_region_keys.append(key)

                            elif all_classes[class_1] > 1:
                                j = 1
                                while j < all_classes[class_1]:
                                    key = frozenset([class_1 + '#' + str(j)])
                                    #print key

                                    if key not in regions_prob_dict.keys():
                                        regions_prob_dict[
                                            key] = region_logprobs[i]
                                        regions_dict[key] = region_caption[i]
                                        sorted_region_keys.append(key)

                                    elif regions_prob_dict[
                                            key] < region_logprobs[i]:
                                        regions_prob_dict[
                                            key] = region_logprobs[i]
                                        regions_dict[key] = region_caption[i]
                                        sorted_region_keys.append(key)

                                    j += 1

            self.regions_dot.node_attr['shape'] = 'record'
            captions_list = []

            for key in sorted_region_keys:
                region_idx = regions_dict[key]
                log_prob = regions_prob_dict[key]
                caption = ""
                space = ""
                for indx in region_idx:
                    word = self.train_set.idx2word[indx]
                    if word != "<unknown>" and word != "<start>" and word != "<end>":
                        caption += space + word
                        space = " "
                node_label = "%s|Log probability: %.6f" % (caption, log_prob)
                repetition_check = True
                if caption not in captions_list:
                    self.regions_dot.node(caption, label=node_label)
                    repetition_check = False

                if len(captions_list) > 0:
                    self.regions_dot.edge(captions_list[-1], caption)

                if repetition_check == False:
                    captions_list.append(caption)

                #print caption, log_prob
            self.dot.render('scene_graph.gv', view=self.check)
            self.regions_dot.render('region_graph.gv', view=self.check)

            #s = Source(self.dot, filename="scene_graph", format="png")
            #s1 = Source(self.regions_dot, filename="region_graph", format="png")
            # #if self.check == False:
            # s.view()
            # s1.view()

            if self.check == True:
                self.check = False
            print 'END PRINTING Regions...'

        except CvBridgeError as e:
            print(e)

Example #8

Show file

class GraphViewer(QWidget):
    def __init__(self, parent):
        super().__init__(parent)
        self._y = 0
        self._width = 1
        self._height = 1

        self.dot = Digraph(format='svg', strict=True)
        self._declared_count = 1
        self._declared = dict()
        self._renderer = QSvgRenderer(self.dot.pipe(), self)

        self.scrollbar = QScrollBar(self.parent())
        self.scrollbar.setRange(0, 0)
        self.parent().wheelEvent = self.wheelEvent

    def wheelEvent(self, event):
        if event.x() > self.getScrollWidth():
            return
        if event.y() > self._height:
            return
        self.scrollbar.wheelEvent(event)

    def add(self, data):
        # is variable
        if data in self._declared.keys():
            return self._declared[data]
        if data.is_variable:
            name = data.name
            self._declared[data] = name
            self.dot.node(name)
            if data.toward is not None:
                toward = self.add(data.toward)
                self.dot.edge(toward, name)
            return name
        # is constant
        if data.is_constant:
            name = data.symbol
            self._declared[data] = name
            self.dot.node(name)
            return name
        # is operator
        if data.is_operator:
            name = '[%d] %s' % (self._declared_count, data.name)
            self._declared_count += 1
            self._declared[data] = name
            self.dot.node(name)
            args = [data.sub, data.obj, data.step]
            if data.args is not None:
                args += data.args
            args = [arg for arg in args if arg is not None]
            for arg in args:
                arg = self.add(arg)
                self.dot.edge(arg, name)
            return name

    def paintEvent(self, event):
        self._width = self.width()
        self._height = self.height()
        self.scrollbar.setGeometry(self.getScrollWidth(), 0, 20, self._height)
        self.resize(self._renderer.defaultSize())
        painter = QPainter(self)
        painter.restore()
        drawRect = QRectF(self.rect())

        if self.scrollbar.maximum() == 0:
            draw_y = 0
        else:
            draw_y = drawRect.height() - self._height
            draw_y *= self.scrollbar.value() / self.scrollbar.maximum()

        drawRect.setY(-draw_y)
        drawRect.setHeight(drawRect.y() + drawRect.height())
        self._renderer.render(painter, drawRect)

    def flush(self):
        self._renderer = QSvgRenderer(self.dot.pipe())
        max_h = self._renderer.defaultSize().height() / self._height
        if max_h <= 1:
            max_h = 0
        max_h = int(self.delta() * max_h)
        self.scrollbar.setMaximum(max_h)

    def clear(self):
        self._declared_count = 1
        self._declared = dict()
        self.dot.clear()

    def getScrollWidth(self):
        return self._width - 20

    def delta(self):
        return 3.14

Example #9

Show file

class FsmDrawer():
    def __init__(self, formalDefinition, label):
        self.label = label
        self.formalDefinition = formalDefinition
        self.createReference()

    def createReference(self):
        self.fsm = Digraph("FSM", format="svg", filename="fsm.txt")
        self.fsm.attr("node",
                      shape="doublecircle",
                      color="#fdfaf6",
                      fontsize="10")  #fontsize can be resize
        self.fsm.attr(rankdir="LR", bgcolor="transparent", size="9,9!")

        try:
            for item in self.formalDefinition.accept:
                self.fsm.node(str(item), fontcolor="#fdfaf6")
        except:
            self.fsm.node(str(self.formalDefinition.accept))
        self.img = Window(self.label)

    def drawFsm(self, fsmStart, fsmNext, fsmLabel):
        self.fsm.attr("node",
                      shape="circle",
                      color="#fdfaf6",
                      fontcolor="#fdfaf6")
        self.fsm.edge(fsmStart,
                      fsmNext,
                      label=fsmLabel,
                      color="#fdfaf6",
                      fontcolor="#fdfaf6")

    def drawFsmColored(self, fsmStart, fsmNext, fsmLabel):
        self.fsm.attr("node",
                      shape="circle",
                      color="#fdfaf6",
                      fontcolor="#fdfaf6")
        self.fsm.edge(fsmStart,
                      fsmNext,
                      fontcolor="#fb743e",
                      label=fsmLabel,
                      color="#fb743e")

    def click(self, char, first):
        for a in self.formalDefinition.transitions:
            if ((str(a.regex) == char) & (str(a.start) == first)):
                self.drawFsmColored(str(a.start), str(a.end), str(a.regex))
            else:
                self.drawFsm(str(a.start), str(a.end), str(a.regex))

        self.fsm.attr("node", shape="plaintext", color="#fdfaf6")
        self.fsm.edge("", str(self.formalDefinition.start), color="#fdfaf6")

        self.fsm.render(view=False)

        self.clearReference()

    def clearReference(self):
        time.sleep(0.8)
        self.fsm.clear()
        self.createReference()

Example #10

Show file

File: link_extractor.py Project: J0s34h/DataMining

import time

from urllib.parse import urlparse, urljoin

import treelib as treelib
from bs4 import BeautifulSoup
from fractions import Fraction

from graphviz import Digraph

import _thread

from treelib import Tree

main_graph = Digraph()
main_graph.clear()

# init the colorama module
colorama.init()

GREEN = colorama.Fore.GREEN
GRAY = colorama.Fore.LIGHTBLACK_EX
RED = colorama.Fore.RED
CYAN = colorama.Fore.CYAN
BLACK = colorama.Fore.BLACK
MAGENTA = colorama.Fore.LIGHTMAGENTA_EX
RESET = colorama.Fore.RESET

# initialize the set of links (unique links)
internal_urls = set()
external_urls = set()

Example #11

Show file

class SndpGraph():
    # Be careful with these parameters
    FLOAT_PERCENT_OF_LOC_WITH_END_PROD = 0.5  # this amount*num locations will be number bins in the problem
    INT_MAX_PRODUCTS_IN_ONE_LOCATION = 3  # might be higher under some conditions
    INT_MAX_DISTANCE = 5  # average is 3, too high value might lead to solution value = 0 (cost > sales)

    FLOAT_INIT_SALES_PRICE = 120  # in the initial instance it was 13. Might be adjusted with adjust_sales_price
    FLOAT_PLANT_COST = 2000
    FLOAT_PLANT_CAPACITY = 5000
    # min possible scenario demand = (1-FLOAT_MAX_PERCENT_DEMAND_DEFICIT) * FLOAT_PLANT_CAPACITY * number end product plants
    # max possible scenario demand = 0.9 * FLOAT_PLANT_CAPACITY * number end product plants
    FLOAT_MAX_PERCENT_DEMAND_DEFICIT = 0.5

    STR_PRODUCT_TYPE_MATERIAL = 'STR_PRODUCT_TYPE_MATERIAL'
    STR_PRODUCT_TYPE_END_PRODUCT = 'STR_PRODUCT_TYPE_END_PRODUCT'

    INT_MIN_MULTITHREAD_LOCATION_LIMIT = 2000  # we force num_cpu to be 1 if number_locations lower this value
    INT_MAX_LOCATIONS_TO_VISUALIZE = 40  # we will not run visualize() if the number of locations exceeds this value
    DEBUG = False

    def __init__(self,
                 name,
                 num_locations,
                 num_products,
                 num_scen,
                 random_seed=None):

        Timer('Core data generated').start()

        self.name = name
        self.dot_graph = None
        self.random_seed = random_seed
        random.seed(random_seed)

        # Initialize data cache
        self._data = {}
        self.sales_price = SndpGraph.FLOAT_INIT_SALES_PRICE
        self._data['PlantCost'] = SndpGraph.FLOAT_PLANT_COST
        self._data['PlantCapacity'] = SndpGraph.FLOAT_PLANT_CAPACITY
        self._data['NrOfLocations'] = 0
        self._data['NrOfProducts'] = 0
        self._data_valid_export = {
            'ScalarData': None
        }  # path to the .dat file that is actual for current data

        list_data_names = [
            'MaterialReq', 'Prob', 'Demand', 'ShipCost', 'ArcProduct', 'arc'
        ]
        self._data_txt = {}  # textual representation for .dat files
        for name in list_data_names:
            self._data[name] = {}
            self._data_txt[name] = ''
            self._data_valid_export[name] = None

        # Initialize products
        if num_products < 2:
            raise (
                'There should be at least two products in the SNDP problem: material and end product.'
            )
        if num_products > 40:
            print(
                f'If num products > 40, the instance might be disbalanced: production too expensive and solution value 0'
            )
        self._products = {
            product_id: _Product(product_id, self)
            for product_id in range(1, num_products + 1)
        }  # +1 since in MPL indexing starts from 1
        self.get_products(
        )[-1].type = SndpGraph.STR_PRODUCT_TYPE_END_PRODUCT  # last product is end product
        max_material_req = math.floor(
            40 / (num_products) *
            2)  # in order to have moderate production costs
        self.material_requirements = [
            random.randint(1, max_material_req)
            for material in self.get_materials()
        ]  # in the end product
        self._data['MaterialReq'] = {
            i: {
                'material': i + 1,
                'value': k
            }
            for (i, k) in enumerate(self.material_requirements)
        }
        self._data_txt['MaterialReq'] += '\n'.join([
            f'{i + 1},{k}' for (i, k) in enumerate(self.material_requirements)
        ])

        # Initialize all locations
        if num_locations < 2:
            raise (
                'There should be at least two locations in the SNDP problem: market and another location.'
            )
        self._locations = {
            location_id: _Location(location_id, self)
            for location_id in range(1, num_locations + 1)
        }

        # Nodes with end product
        self._routes = {}
        plants_for_end_products = random_subset(
            self.get_plants(),
            math.floor(num_locations *
                       SndpGraph.FLOAT_PERCENT_OF_LOC_WITH_END_PROD)
        )  # set it right away for efficiency
        self._end_product_plants = set()
        for plant in plants_for_end_products:
            # end product (at least) should be produced there
            route_object = _Route(
                plant, self.get_end_location(),
                random.randint(1, SndpGraph.INT_MAX_DISTANCE))
            self.add_route(route_object)
            plant.add_product(self.get_end_product())
        end_product_plants = self.get_end_product_plants()

        # Assign materials to plants and create routes
        #num_cpu = mp.cpu_count()
        num_cpu = 1  # multiprocessing does not provide any efficiency improvements
        if num_locations < SndpGraph.INT_MIN_MULTITHREAD_LOCATION_LIMIT:
            num_cpu = 1

        if num_cpu > 1:  # multiprocessing
            raise NotImplementedError(
                'We need to fill in data cache here in the same manner as for single thread case.'
            )
            add_products = []  # will store the changes to be applied
            manager = mp.Manager()
            # normal dict will not be shared among processes but we need it to be shared
            # manager.dict() makes things much slower - maybe use Array.
            add_routes = manager.dict()
            # add_routes = {} # even if we use normal dict (makes no sense) there is no benefit in speed
            pool = mp.Pool(num_cpu)
            results = [
                pool.apply_async(self.generate_plant_data,
                                 args=(worker_id, add_routes, num_cpu))
                for worker_id in range(num_cpu)
            ]
            pool.close()
            pool.join()
            for result in [res.get() for res in results]:
                add_products += result

            # apply changes
            # since data is not shared among processes, .add_product() and .add_route() should not be called from generate_plant_data()
            for record in add_products:
                plant = self.get_location(record['plant'])
                material = self.get_product(record['material'])
                plant.add_product(material)
            for key, distance in add_routes.items():
                star_id = int(key.split('-')[0])
                start_location = self.get_location(star_id)
                end_id = int(key.split('-')[1])
                end_location = self.get_location(end_id)
                self.add_route(_Route(start_location, end_location, distance))
        else:
            # we do not use here generate_plant_data() because making .add_route() and .add_product() right away is much faster
            num_plants = len(self.get_plants())
            for plant in self.get_plants():
                if plant in end_product_plants:
                    min_materials = 0  # in potential plants none of the materials might be manufactured
                    max_materials = math.ceil(
                        len(self.get_materials()) / 4
                    )  # to avoid that all materials are manufactured on the plant site and should not be delivered
                else:
                    min_materials = 1
                    max_materials = len(self.get_materials())
                random_num_materials = min(
                    random.randint(min_materials, max_materials),
                    SndpGraph.INT_MAX_PRODUCTS_IN_ONE_LOCATION)
                if random_num_materials == 0:  # no materials produced, lets go to the next plant
                    continue

                # Define the route to (several or all) potential end product plants for every plant
                random_num_end_product_plants = random.randint(
                    1, len(end_product_plants))
                random_end_product_plants = random_subset(
                    end_product_plants, random_num_end_product_plants)
                # connect the location with the end product plants
                for end_product_plant in random_end_product_plants:
                    # we need route only if product is produced not in the potential plant locations
                    if plant.id == end_product_plant.id:
                        continue
                    # routes can be one directional, omit the route if it already exists in another direction
                    if self.get_route(end_product_plant, plant):
                        continue
                    if not self.get_route(
                            plant, end_product_plant
                    ):  # if the route does not already exist
                        self.add_route(
                            _Route(
                                plant, end_product_plant,
                                random.randint(1, SndpGraph.INT_MAX_DISTANCE)))

                # Define materials to produce
                random_materials = random_subset(
                    self.get_materials(),
                    random_num_materials)  # except the last one
                for material in random_materials:
                    plant.add_product(material)

                progress_bar('Generate data for plants', plant.id, num_plants)

        # Test if graph is valid and solve the issues
        # - check if plant with material has at least one route to potential plant: this is guaranteed during assignment of materials to plants
        # - check if every potential plant has all the materials delivered
        # it will also automatically solve the issue if a material has no plant, since such material will not be delivered to all plants
        for counter, end_product_plant in enumerate(end_product_plants, 1):
            # materials produced in the plant itself
            available_materials = [
                product for product in end_product_plant.get_products()
                if product.type == SndpGraph.STR_PRODUCT_TYPE_MATERIAL
            ]
            # and materials delivered
            connected_plants = [
                route.start for route in end_product_plant.get_inbounds()
            ]
            for connected_plant in connected_plants:
                available_materials += [
                    product for product in connected_plant.get_products()
                    if product.type == SndpGraph.STR_PRODUCT_TYPE_MATERIAL
                ]
            materials_not_delivered_to_plant = [
                material for material in self.get_materials()
                if material not in available_materials
            ]
            for material in materials_not_delivered_to_plant:
                # add material to the potential plant itself or connected plants
                if len(connected_plants) > 0:
                    random_plant = random_subset(connected_plants, 1)[0]
                else:  # produce in plant itself if no plants are connected
                    random_plant = end_product_plant
                random_plant.add_product(material)

            progress_bar('Validate data for plants', counter,
                         len(end_product_plants))

        str(Timer('Core data generated'))
        Timer('Core data generated').reset()

        assert (self._data['NrOfLocations'] == num_locations)
        assert (self._data['NrOfLocations'] == len(self.get_locations()))
        assert (self._data['NrOfProducts'] == num_products)
        assert (self._data['NrOfProducts'] == len(self.get_products()))

        # Stochastic data
        self._scenarios = []
        self.regenerate_stochastic_data(num_scen)

    @property
    def sales_price(self):
        return self._data['SalesPrice']

    @sales_price.setter
    def sales_price(self, value):
        self._data['SalesPrice'] = value

    @property
    def data_as_dict(self):
        result = {}
        # scalar data
        for name in [
                'NrOfLocations', 'NrOfProducts', 'NrOfScen', 'SalesPrice',
                'PlantCost', 'PlantCapacity'
        ]:  # basically we do not need to clear it because it cannot be modified:
            result[name] = self._data[name]
        # array data
        for name in [
                'MaterialReq', 'Prob', 'Demand', 'ShipCost', 'ArcProduct',
                'arc'
        ]:  # 'MaterialReq' are excluded since they cannot be modified:
            result[name] = list(self._data[name].values())

        return result

    def generate_plant_data(self, worker_id, shared_add_routes, num_cpu=0):
        '''Used in multiprocessing. Generates most of the data except the stochastic data'''
        random.seed(self.random_seed * worker_id + 1)  # +1 to avoid 0
        if num_cpu == 0:
            num_cpu = mp.cpu_count()

        add_products = []  # result that we will return
        all_plants = self.get_plants()
        end_product_plants = self.get_end_product_plants()

        plants_per_worker = max(math.floor(len(all_plants) / num_cpu), 1)
        if (worker_id * plants_per_worker
            ) >= len(all_plants):  # we have no plants for this worker
            return []
        plants_start = plants_per_worker * (worker_id - 1)
        if worker_id == 0:  # no previous worker
            plants_start = 0
        plants_end = plants_start + plants_per_worker
        if worker_id == num_cpu - 1:  # last worker might be an exception
            plants_end = len(all_plants)
        plants = all_plants[plants_start:plants_end]

        for plant in plants:
            if plant in end_product_plants:
                min_materials = 0  # in potential plants none of the materials might be manufactured
                max_materials = 1  # to avoid that all materials are manufactured on the plant site and should not be delivered
            else:
                min_materials = 1
                max_materials = len(self.get_materials())
            random_num_materials = min(
                random.randint(min_materials, max_materials),
                SndpGraph.INT_MAX_PRODUCTS_IN_ONE_LOCATION)
            if random_num_materials == 0:
                continue
            random_materials = random_subset(
                self.get_materials(),
                random_num_materials)  # except the last one
            for material in random_materials:
                #plant.add_product(material)
                add_products.append({
                    'plant': plant.id,
                    'material': material.id
                })

            # Define the route to (several or all) potential end product plants for every plant
            random_num_end_product_plants = random.randint(
                1, len(end_product_plants))
            random_end_product_plants = random_subset(
                end_product_plants, random_num_end_product_plants)
            # connect the location with the end product plants
            for end_product_plant in random_end_product_plants:
                # we need route only if product is produced not in the potential plant locations
                if plant.id == end_product_plant.id:
                    continue
                # routes can be one directional, omit the route if it already exists in another direction
                key_opposite = '{}-{}'.format(end_product_plant.id, plant.id)
                if self._routes.get(key_opposite):
                    continue
                key = '{}-{}'.format(plant.id, end_product_plant.id)
                if not self._routes.get(
                        key):  # if the route does not already exist
                    #self.add_route(Route(plant, end_product_plant, random.randint(1, SNDP_Graph.MAX_DISTANCE)))
                    shared_add_routes[key] = random.randint(
                        1, SndpGraph.INT_MAX_DISTANCE)

            print(f'Data generated for plant {plant.id}')

        return add_products

    def regenerate_stochastic_data(self, num_scen):
        Timer('Stochastic data generated').start()

        self._clear_stochastic_data_cache()
        min_scenario_demand = (1 - SndpGraph.FLOAT_MAX_PERCENT_DEMAND_DEFICIT
                               ) * SndpGraph.FLOAT_PLANT_CAPACITY * len(
                                   self.get_end_product_plants())
        max_scenario_demand = 0.9 * SndpGraph.FLOAT_PLANT_CAPACITY * len(
            self.get_end_product_plants())
        if num_scen > (max_scenario_demand - min_scenario_demand):
            raise ValueError(
                "SndpGraph.FLOAT_MAX_PERCENT_DEMAND_DEFICIT is too small for the num_scen."
            )

        self._scenarios = []
        probability_per_scenario = 1 / num_scen  # we assume uniformal distribution
        demands = random.sample(
            range(int(min_scenario_demand), int(max_scenario_demand)),
            num_scen)
        for scenario_id in range(
                1, num_scen
        ):  # indexing starts from 1, all scenarios except the last one
            self.add_scenario(
                _Scenario(scenario_id, probability_per_scenario,
                          demands[scenario_id - 1]))

        left_probability = 1.0 - sum(scen.probability
                                     for scen in self.get_scenarios())
        assert (left_probability > 0)
        self.add_scenario(
            _Scenario(num_scen, left_probability,
                      demands[num_scen - 1]))  # last scenario

        print(Timer('Stochastic data generated'))
        Timer('Stochastic data generated').reset()

        assert (self._data['NrOfScen'] == num_scen)
        assert (self._data['NrOfScen'] == len(self.get_scenarios()))

    def visualize(self, format='jpg', view=False, to_file=None):
        if len(self.get_locations()
               ) > SndpGraph.INT_MAX_LOCATIONS_TO_VISUALIZE:
            print(
                f'Visualization of graph {self.name} with {len(self.get_locations())} locations will take to much time and will not be done.'
            )
            return
        if self.dot_graph is None:
            self.dot_graph = Digraph(comment=self.name)
        # Reload all the data
        self.dot_graph.clear()
        self.dot_graph.format = format

        end_product_plants = self.get_end_product_plants()
        for location in self.get_locations():
            if location == self.get_end_location():
                color = 'red'
                style = 'filled'
            elif location in end_product_plants:
                color = 'grey'
                style = 'filled'
            else:
                color = None
                style = 'solid'
            self.dot_graph.node(name=str(location.id),
                                label=str(location),
                                style=style,
                                color=color)

        for route in self.get_routes():
            self.dot_graph.edge(str(route.start.id),
                                str(route.end.id),
                                label=str(route.distance),
                                len=str(route.distance))

        # print(self.dot_graph.source)
        if to_file is None:
            to_file = self.name
        try:
            self.dot_graph.render(to_file, view=view)
        except Exception as e:
            print(
                f"WARNING: Visulalization of {self.name} failed. Due to this error: {e}"
            )

    def export_mpl(self, filename: str):

        # export .mpl file
        model_formulation = Path(
            resource_filename(__name__, 'SNDP_default.mpl')).read_text()

        # export .dat files
        # scalar
        valid_export = self._data_valid_export['ScalarData']
        if valid_export is not None:
            out_filename = str(valid_export)
        else:
            out_filename = f'{filename}_ScalarData.dat'
            dat_file = Path(
                resource_filename(__name__,
                                  'SNDP_default_ScalarData.dat')).read_text()
            dat_file_lines = dat_file.split('\n')
            for data_item_name in [
                    'NrOfLocations', 'NrOfProducts', 'NrOfScen', 'SalesPrice',
                    'PlantCost', 'PlantCapacity'
            ]:
                # load and modify the data from the current data file
                data_row = dat_file_lines.index('!' + data_item_name) + 1
                dat_file_lines[data_row] = str(self._data[data_item_name])
            # and write to the new file
            out_file = Path(out_filename)
            out_file.write_text('\n'.join(dat_file_lines))
            self._data_valid_export['ScalarData'] = out_file
        # update links in the model formulation
        model_formulation = model_formulation.replace(
            f'SNDP_default_ScalarData.dat', str(out_filename))

        # arrays
        for data_item_name in [
                'ShipCost', 'ArcProduct', 'arc', 'Prob', 'Demand',
                'MaterialReq'
        ]:
            valid_export = self._data_valid_export[data_item_name]
            if valid_export is not None:
                out_filename = str(valid_export)
            else:
                out_filename = f'{filename}_{data_item_name}.dat'
                some_value = next(iter(
                    self._data[data_item_name].values()))  # we get dict
                keys = some_value.keys()
                first_two_lines = '!{}\n!{}\n'.format(data_item_name,
                                                      ','.join(keys))
                dat_contents = first_two_lines + self._data_txt[data_item_name]
                # and write to the new file
                out_file = Path(out_filename)
                out_file.write_text(dat_contents)
                self._data_valid_export[data_item_name] = out_file
            # update links in the model formulation
            model_formulation = model_formulation.replace(
                f'SNDP_default_{data_item_name}.dat', str(out_filename))

        Path(filename + '.mpl').write_text(model_formulation)

    def adjust_sales_price(self):
        '''Find the smallest value of SalesPrice
        that does not decrease the number of open plants.
        Motivation: has as small obj value as possible to avoid numerical issues.'''

        try:
            from sndpgen.sndp_model import SndpModel
        except ImportError:
            warn(
                'optconvert is not installed, adjust_sales_price() will not be executed',
                ImportWarning)
            return
        self.export_mpl(f'{self.name}')
        sndp_model = SndpModel(Path(f'{self.name}.mpl'))
        sndp_model.adjust_sales_price()
        self.sales_price = sndp_model.data_as_dict['SalesPrice']
        self._data_valid_export['ScalarData'] = None

    def _clear_nodes_data_cache(self):
        self._data_valid_export['ScalarData'] = None
        for name in [
                'NrOfLocations', 'NrOfProducts'
        ]:  # basically we do not need to clear it because it cannot be modified:
            self._data[name] = 0
        for name in [
                'ShipCost', 'ArcProduct', 'arc'
        ]:  # 'MaterialReq' are excluded since they cannot be modified:
            self._data[name] = {}
            self._data_txt[name] = ''
            self._data_valid_export[name] = None

    def _clear_stochastic_data_cache(self):
        self._data_valid_export['ScalarData'] = None
        for name in ['NrOfScen']:
            self._data[name] = 0
        for name in ['Prob', 'Demand']:
            self._data[name] = {}
            self._data_txt[name] = ''
            self._data_valid_export[name] = None

    def add_route(self, route):
        if self.get_route(route.start, route.end):
            raise KeyError('Route already exists in the graph.')
        route._graph = self
        self._routes['{}-{}'.format(route.start.id, route.end.id)] = route

        # data cache
        for name in [
                'ScalarData', 'ShipCost', 'ArcProduct', 'arc'
        ]:  # 'MaterialReq' are excluded since they cannot be modified:
            self._data_valid_export[name] = None
        route.start.update_graph_data_cache(product=None, route=route)
        # we check for duplicates above
        new_key = f'{route.start.id},{route.end.id}'
        self._data['ShipCost'][new_key] = {
            'start': route.start.id,
            'finish': route.end.id,
            'value': route.distance
        }
        self._data_txt['ShipCost'] += f'{new_key},{route.distance}\n'

    def add_scenario(self, scenario):
        scenario._graph = self
        self._scenarios.append(scenario)

        # data cache
        self._data['NrOfScen'] += 1
        if scenario.id in self._data['Prob']:
            raise KeyError('Scenario already exists in the graph.')
        assert (scenario.id not in self._data['Demand']
                and 'How would this happen if error obove does not raise?')
        self._data['Prob'][scenario.id] = {
            'SCEN': scenario.id,
            'value': scenario.probability
        }
        self._data_txt['Prob'] += f'{scenario.id},{scenario.probability}\n'
        self._data['Demand'][scenario.id] = {
            'SCEN': scenario.id,
            'value': scenario.demand
        }
        self._data_txt['Demand'] += f'{scenario.id},{scenario.demand}\n'

    def get_products(self):
        return list(self._products.values())

    def get_materials(self):
        return self.get_products(
        )[:-1]  # all except the last products which are the end product

    def get_product(self, id):
        product = self._products.get(id)
        return product

    def get_end_product(self):
        return self.get_products()[-1]

    def get_locations(self):
        return list(self._locations.values())

    def get_plants(self):
        return self.get_locations()[:-1]  # last location is end location

    def get_end_product_plants(self):
        return self._end_product_plants

    def get_location(self, id):
        location = self._locations.get(id)
        if location is None:
            raise (f'Location with {id} was not found.')
        return location

    def get_end_location(self):
        return self.get_locations()[-1]

    def get_routes(self):
        return list(self._routes.values())

    def get_route(self, start, end):
        if not isinstance(start, _Location) or not isinstance(end, _Location):
            raise ('Start and end arguments should be Location objects')
        key = '{}-{}'.format(start.id, end.id)
        return self._routes.get(key)

    def get_scenarios(self):
        return self._scenarios[:]

Example #12

Show file

 def show_m_nodes(self, file_name):
     d = Digraph(filename=file_name, directory="./pdf_data")
     d.clear()
     self.print_m_nodes(father_name=None, d=d)
     d.view()

Example #13

Show file

def graph_gen(start=start_state(), trace=False):
  '''This creates the state graph with random variation of inflow'''
  
  dot = Digraph('State Graph')
  
  change = 1
  vizited = []
  in_graph = [start]
  queue = [start]
  dot.node (state_label(start), state_description(start, 1), shape = 'box')
  
  while len(queue):
    cur_state = queue[0]
    del queue[0]
    
    if cur_state not in vizited:
      vizited.append(cur_state)
      
      next_states = next_state(cur_state)
      queue = queue + next_states
      
      for state in next_states:
        if state not in in_graph:
          posn = len(in_graph) + 1
          dot.node(state_label(state), state_description(state, posn), shape = 'box')
          in_graph.append(state)
        
        dot.edge(state_label(cur_state), state_label(state))
  
  dot.render('state_graph')

  if trace:
    dot.clear()
    queue = [start]
    dot, traced_states = create_trace(dot, queue)
    counter = 0
    while (traced_states == [] or traced_states[-1]['inflow']['mag'] == '+') and counter < 100:
      queue = [start]
      dot.clear()
      dot, traced_states = create_trace(dot, queue)
      counter += 1
    if counter >= 100:
      exit("Unknown error while creating a trace, please try again.\n(This may happen many times, sorry)")
    dot.render('trace')

    for i, traced_state in enumerate(traced_states):
      if i == 0:
        print("State 1 (starting state, all quantities are 0):\n" + state_description(traced_state))
      elif i > 0 and traced_state['inflow'] == {'mag': '0', 'der': '0'} and traced_state['volume'] == {'mag': '0', 'der': '0'}:
        print("\nState 1:\n" + state_description(traced_state) + "\n")
      else:
        print("\nState " + str(i+1) + ":\n" + state_description(traced_state) + "\n")

      if traced_states[i-1]['inflow']['der'] == '-' and traced_state['volume']['mag'] == 'M':
        print("Because inflow has decreased in state " + str(i) + ", but outflow is still maximum, volume decreases in state " +
        str(i+1) + ".")
      if traced_states[i-1]['volume']['der'] == '-' and traced_states[i-1]['volume']['mag'] == 'M':
        print("Because volume has decreased from the maximum in state " + str(i) + ", volume is + in state " + str(i+1) + ".")
      if i > 0 and traced_states[i-1]['inflow']['der'] == '+' and traced_state['inflow']['der'] == '0' and \
                      traced_state['inflow']['mag'] == '+':
        print("Inflow stopped increasing in state " + str(i+1) + ", but is still +, so volume remains increasing " +
              "(because of the positive influence (I+) from inflow to volume).")
      if traced_state['inflow']['der'] == '-' and traced_states[i-1]['inflow']['der'] != '-':
        print("Inflow starts decreasing in state " + str(i+1) + ".")
      if i > 0 and traced_states[i-1]['inflow']['der'] == '-' and traced_state['inflow']['mag'] == '0':
        print("Inflow reaches zero in state " + str(i+1) + " (and therefore stops decreasing).")
      if traced_state['volume'] == {'mag': 'M', 'der': '0'} and traced_states[i-1]['volume'] != {'mag': 'M', 'der': '0'}:
        print("Volume reaches the maximum in state " + str(i+1) + ", and therefore stops increasing.")
      if i < len(traced_states) - 1 and i != 0:
        for quant, val in traced_state.items():
          if quant == 'inflow' or quant == 'volume':
            if traced_state[quant]['der'] == '+' and traced_states[i+1][quant]['mag'] in '+M' and traced_state[quant]['mag'] != '+':
              print("Because " + quant + " is increasing (from " + traced_states[i][quant]['mag'] + ") in state " +
                    str(i+1) + ", " + quant + " is " + traced_states[i+1][quant]['mag'] + " in state " + str(i+2) + ".")
        if traced_states[i+1]['inflow']['mag'] == '+' and traced_states[i+1]['volume']['der'] == '+':
          print("Because inflow is + in state " + str(i+2) + ", volume is increasing in state " + str(i+2) +
                " (because of the positive influence (I+) from inflow to volume).")
      if i > 1 and traced_state['volume'] == {'mag': '0', 'der': '0'}:
        print("Volume has reached 0 (and therefore stops decreasing).")
      if i > 1 and traced_states[i-1]['volume'] != traced_state['volume']:
        print("Height, pressure and outflow are also (" + traced_state['volume']['mag'] + ", " + traced_state['volume'][
          'der'] + ") because of the value correspondence between volume, height, pressure and outflow.")
      if i > 0 and traced_state['inflow'] == {'mag': '0', 'der': '0'} and traced_state['volume'] == {'mag': '0', 'der': '0'}:
        print("All quantities have reached 0, thus we are at the starting state again.")

  return len(vizited)

Example #14

Show file

File: decisionTree.py Project: pandabytes/PokemonDatamining

class DecisionTree(SupervisedModel):
    ''' Decision Tree classifiier. It takes a target feature as the predicted feature.
        It contains a reference to a TreeNode object after it is trained.
        Use Gini Impurity and build a binary tree.

        This class uses the example from here as a base https://github.com/random-forests/tutorials/blob/master/decision_tree.ipynb
    '''

    ContinousSplitMethods = ["k-tile", "mean"]

    def __init__(self,
                 targetFeature: str,
                 continuousSplitmethod: str = "k-tile",
                 maxDepth: int = 3,
                 filePath: str = "tree"):
        ''' Constructor '''
        if (continuousSplitmethod not in DecisionTree.ContinousSplitMethods):
            raise ValueError("Continuous split method \"" +
                             continuousSplitmethod + "\" is not supported")
        elif (maxDepth < 0):
            raise ValueError("Max depth must be at least 0")

        super().__init__(targetFeature)
        self._trainedRootNode = None
        self._maxDepth = maxDepth
        self._continuousSplitmethod = continuousSplitmethod
        self._filePath = filePath
        self._nodeId = 0  # Use to keep track of the nodes in DiGraph
        self._diGraph = Digraph("G", filename=filePath, format="png")

    @property
    def name(self) -> str:
        return "Decision Tree"

    @property
    def maxDepth(self) -> int:
        ''' '''
        return self._maxDepth

    @maxDepth.setter
    def maxDepth(self, value: int):
        ''' '''
        if (value < 0):
            raise ValueError("Max depth must be at least 0")
        self._maxDepth = value

    @property
    def continuousSplitMethod(self) -> str:
        ''' '''
        return self._continuousSplitmethod

    @continuousSplitMethod.setter
    def continuousSplitMethod(self, value: str):
        ''' '''
        if (value not in DecisionTree.ContinousSplitMethods):
            raise ValueError("Continuous split method \"" + value +
                             "\" is not supported")
        self._continuousSplitmethod = value

    @property
    def numLeafNodes(self) -> int:
        ''' Get the number of leaf nodes in the tree '''
        return self._countLeafNodes(self._trainedRootNode)

    @property
    def depth(self) -> int:
        ''' Get the depth of the tree '''
        return self._countTreeDepth(self._trainedRootNode)

    def clear(self):
        ''' Clear the current state and all data of the model.
            This doesn't clear the properties of the model, however.
        '''
        self._trainedRootNode = None
        self._diGraph.clear()

    def informationGain(self, left: pd.DataFrame, right: pd.DataFrame,
                        currentImpurity: float) -> float:
        ''' Compute the information gain of the split 

            @left: the left partition of the data
            @right: the right partition of the data
            @currentImpurity: impurity value of the left and right partition data combined
            @return: the information gain obtained from resulting left & right partition
        '''
        p = len(left) / float(len(left) + len(right))
        childrenImpurity = (p * self.giniImpurity(left)) + (
            (1 - p) * self.giniImpurity(right))
        return currentImpurity - childrenImpurity

    def giniImpurity(self, dataFrame: pd.DataFrame) -> float:
        ''' Compute the Gini Impurity of the given data frame 
        
            @dataFrame: data frame object
            @return: gini impurity value of the given data frame
        '''
        labelCounts = dataFrame[self._targetFeature].value_counts()
        impurity = 1
        for label in labelCounts.index:
            probability = labelCounts[label] / float(len(dataFrame))
            impurity -= probability**2
        return impurity

    def partition(self, dataFrame: pd.DataFrame, feature: str,
                  value: object) -> (pd.DataFrame, pd.DataFrame):
        ''' Partition the given data frame into 2 sub-data frames by the given feature and its value 
        
            @dataFrame: the data frame object
            @feature: the featured that is used as the splitting feature
            @value: value of the given feature
            @return: a tuple of 2 partitions after the split
        '''
        leftData, rightData = None, None
        featureType = super()._getFeatureType(dataFrame, feature)

        if (featureType == FeatureType.Continuous):
            if not (DecisionTree.isContinuous(value)):
                raise ValueError(
                    "Numeric feature must be passed with a numeric value")
            leftData, rightData = self.partitionContinuous(
                dataFrame, feature, value)

        elif (featureType == FeatureType.Categorical):
            if not (DecisionTree.isCategorical(value)):
                raise ValueError(
                    "Categorical feature must be passed with a string or boolean value"
                )
            leftData, rightData = self.partitionDiscreteBinary(
                dataFrame, feature, value)

        return leftData, rightData

    def partitionContinuous(self, dataFrame: pd.DataFrame, feature: str,
                            value: float) -> (pd.DataFrame, pd.DataFrame):
        ''' Partition continous values with a given feature and quantile value. 
        
            @dataFrame: the data frame object
            @feature: the featured that is used as the splitting feature
            @value: value of the given feature
            @return: a tuple of 2 partitions after the split
        '''
        leftData = dataFrame[dataFrame[feature] < value]
        rightData = dataFrame[dataFrame[feature] >= value]
        return leftData, rightData

    def partitionDiscrete(self, dataFrame: pd.DataFrame,
                          feature: str) -> (pd.DataFrame, pd.DataFrame):
        ''' Partition a categorical feature into x number of categorical value of the given feature 
        
            @dataFrame: the data frame object
            @feature: the featured that is used as the splitting feature
            @return: a list of x partitions
        '''
        partitions = []
        for value in dataFrame[feature].unique():
            partitions.append(dataFrame[dataFrame[feature] == value])
        return partitions

    def partitionDiscreteBinary(self, dataFrame: pd.DataFrame, feature: str,
                                value: str) -> (pd.DataFrame, pd.DataFrame):
        ''' Partition a categorical feature into 2 sub-panda frames
        
            @dataFrame: the data frame object
            @feature: the featured that is used as the splitting feature
            @value: value of the given feature
            @return: a tuple of 2 partitions after the split
        '''
        leftData = dataFrame[dataFrame[feature] == value]
        rightData = dataFrame[dataFrame[feature] != value]
        return leftData, rightData

    def findBestFeature(
            self,
            dataFrame: pd.DataFrame,
            quantiles: [int] = [0.2, 0.4, 0.6, 0.8]) -> (str, object, float):
        ''' Find the best feature to split the given data frame. Quantiles are optional and 
            are only used for continous features.

            @dataFrame: the data frame object
            @quantiles (optional): list of quantiles to test against to find the best quantile. 
            @return: a tuple of the best feature to be split, its corresponding value, and the best information gain
        '''
        bestGain = 0.0
        currentImpurity = self.giniImpurity(dataFrame)
        bestFeature = None
        bestFeatureValue = None
        features = dataFrame.loc[:, dataFrame.columns != self.
                                 _targetFeature].columns.values

        for feature in features:
            featureType = super()._getFeatureType(dataFrame, feature)

            if (featureType == FeatureType.Continuous):
                infoGain, featureValue = 0.0, 0.0

                if (self._continuousSplitmethod == "k-tile"):
                    infoGain, featureValue = self._splitByKTile(
                        dataFrame, feature, quantiles)
                elif (self._continuousSplitmethod == "mean"):
                    infoGain, featureValue = self._splitByMean(
                        dataFrame, feature)

                # Store the current best values
                if (infoGain > bestGain):
                    bestGain = infoGain
                    bestFeature = feature
                    bestFeatureValue = featureValue

            elif (featureType == FeatureType.Categorical):
                for featureValue in dataFrame[feature].unique():
                    leftData, rightData = self.partition(
                        dataFrame, feature, featureValue)

                    if (len(leftData) == 0 or len(rightData) == 0):
                        continue

                    infoGain = self.informationGain(leftData, rightData,
                                                    currentImpurity)
                    if (infoGain > bestGain):
                        bestGain = infoGain
                        bestFeature = feature
                        bestFeatureValue = featureValue

        return bestFeature, bestFeatureValue, bestGain

    @decor.elapsedTime
    def train(self, dataFrame: pd.DataFrame, **kwargs):
        ''' Train the decision tree with the given data frame input. Build the tree.
        
            @dataFrame: the data frame object
        '''
        self.clear()
        self._trainedRootNode = self._buildTree(dataFrame, 0)

    def classify(self, dataFrame: pd.DataFrame, **kwargs):
        ''' Classify the input data frame and return a data frame with 2 columns: Prediction and Probability.
            Prediction column denotes the predicted label of a data point and Probability column denotes the
            probability that the prediction is drawn from.

            @dataFrame: the data frame object
        '''
        super().classify(dataFrame, **kwargs)
        predictions = []
        probabilities = []
        for i, row in dataFrame.iterrows():
            prediction, probability = self._classifyOneSample(
                row, self._trainedRootNode)
            predictions.append(prediction)
            probabilities.append(probability)

        return self._createResultDataFrame(predictions, probabilities,
                                           dataFrame.index)

    def getTreeGraph(self, regenerate: bool) -> Digraph:
        ''' Get the graph object representing this decision tree
        
            @regenerate: True if we want to regenerate the graph object. False otherwise
            @return: a Digraph object from graphviz library
        '''
        if (regenerate):
            self._diGraph.clear()
            self._nodeId = 0
            self._generateGraph(self._trainedRootNode)
        return self._diGraph

    def _createEdgeLabel(self, branch: str, featureValue: object) -> str:
        ''' Create edge label according to the type of the feature and its value 
        
            @branch: value must be "left" or "right". Case-sensitive
            @featureValue: feature value to be displayed in the edge label
            @return: the edge label 
        '''
        if (branch != "left") and (branch != "right"):
            raise ValueError(
                "Argument branch must be either \"left\" or \"right\"")

        if (DecisionTree.isCategorical(featureValue)):
            if (branch == "left"):
                return "yes"
            else:
                return "no"

        if (DecisionTree.isContinuous(featureValue)):
            if (branch == "left"):
                return "< {0:.2f}".format(featureValue)
            else:
                return ">= {0:.2f}".format(featureValue)

        raise ValueError(
            "Feature type \"{0}\" is not str, int, bool, or float".format(
                featureValueType))

    def _generateGraph(self, node: TreeNode):
        ''' Generate the decision tree graph. Assign unique id to each node 
            starting from the root, left side, and then right side .
        
            @node: the root node of the decision tree
        '''
        if (node is None):
            return

        left = node.left
        right = node.right
        nodeId = self._nodeId

        decisionNodeLabelFormat = "{0}\nValue: {1}"
        leafNodeLabelFormat = "Prediction: {0}\nProbability: {1:.2f}"
        decisionNodeLabelFunc = lambda feature, value : decisionNodeLabelFormat.format(feature,
                                                                                       value if (type(value) is str or \
                                                                                                 type(value) is bool or \
                                                                                                 type(value) is np.bool_)
                                                                                             else round(value, 2))

        # If the root node is a leaf node
        if (type(node) is LeafNode):
            nodeLabel = leafNodeLabelFormat.format(node.prediction,
                                                   node.probability)
            self._addNode(LeafNode, nodeId, nodeLabel)
            return
        else:
            nodeLabel = decisionNodeLabelFunc(node.feature, node.featureValue)
            self._addNode(DecisionNode, nodeId, nodeLabel)

        if (type(left) is LeafNode and type(right) is LeafNode):
            leftLabel = leafNodeLabelFormat.format(left.prediction,
                                                   left.probability)
            rightLabel = leafNodeLabelFormat.format(right.prediction,
                                                    right.probability)

            # Get left and right node id
            leftId = self._nodeId + 1
            rightId = self._nodeId + 2
            self._nodeId += 2

            self._addNode(LeafNode, leftId, nodeLabel)
            self._addNode(LeafNode, rightId, nodeLabel)

            self._addEdge(nodeId, leftId,
                          self._createEdgeLabel("left", node.featureValue))
            self._addEdge(nodeId, rightId,
                          self._createEdgeLabel("right", node.featureValue))

        elif (type(left) is LeafNode):
            leftLabel = leafNodeLabelFormat.format(left.prediction,
                                                   left.probability)
            rightLabel = decisionNodeLabelFunc(right.feature,
                                               right.featureValue)

            # Assign id to the left node first
            leftId = self._nodeId + 1
            self._addNode(LeafNode, leftId, leftLabel)
            self._nodeId += 1

            # Then assign id to the right node recursively
            rightId = self._nodeId + 1
            self._addNode(DecisionNode, rightId, rightLabel)
            self._nodeId += 1
            self._generateGraph(right)

            self._addEdge(nodeId, leftId,
                          self._createEdgeLabel("left", node.featureValue))
            self._addEdge(nodeId, rightId,
                          self._createEdgeLabel("right", node.featureValue))

        elif (type(right) is LeafNode):
            leftLabel = decisionNodeLabelFunc(left.feature, left.featureValue)
            rightLabel = leafNodeLabelFormat.format(right.prediction,
                                                    right.probability)

            # Assign id to the left node first recursively
            leftId = self._nodeId + 1
            self._addNode(DecisionNode, leftId, leftLabel)
            self._nodeId += 1
            self._generateGraph(left)

            # Then assig id to the right node
            # Don't need to add 1 after each _generateGraph call. It's handled at the end of the method
            rightId = self._nodeId
            self._addNode(LeafNode, rightId, rightLabel)

            self._addEdge(nodeId, leftId,
                          self._createEdgeLabel("left", node.featureValue))
            self._addEdge(nodeId, rightId,
                          self._createEdgeLabel("right", node.featureValue))

        else:
            leftLabel = decisionNodeLabelFunc(left.feature, left.featureValue)
            rightLabel = decisionNodeLabelFunc(right.feature,
                                               right.featureValue)

            # Assign id to the left node first recursively
            leftId = self._nodeId + 1
            self._addNode(DecisionNode, leftId, leftLabel)
            self._nodeId += 1
            self._generateGraph(left)

            # Then assign id to the right node recursively
            # Don't need to add 1 after each _generateGraph call. It's handled at the end of the method
            rightId = self._nodeId
            self._addNode(DecisionNode, rightId, rightLabel)
            self._generateGraph(right)

            self._addEdge(nodeId, leftId,
                          self._createEdgeLabel("left", node.featureValue))
            self._addEdge(nodeId, rightId,
                          self._createEdgeLabel("right", node.featureValue))

        self._nodeId += 1

    def _addEdge(self, fromId: int, toId: int, nodeLabel: str):
        ''' '''
        self._diGraph.edge(str(fromId), str(toId), label=nodeLabel)

    def _addNode(self, nodeType: TreeNode, nodeId: int, nodeLabel: str):
        ''' '''
        if (nodeType is LeafNode):
            self._diGraph.node(str(nodeId), nodeLabel, color="red")
        elif (nodeType is DecisionNode):
            self._diGraph.node(str(nodeId), nodeLabel)
        else:
            raise ValueError("Invalid node type \"{0}\"".format(
                nodeType, LeafNode, DecisionNode))

    def _classifyOneSample(self, row: pd.Series,
                           node: TreeNode) -> (str, float):
        ''' Classfiy one sample 

            @row: row of a data frame
            @node: the root node of the decision tree
            @return: the prediction and probability of that prediction
        '''
        if (type(node) is LeafNode):
            return node.prediction, node.probability
        else:
            # First check if the value type is numeric, then we do inequality check for numbers
            # If the value is not numeric then simply compare using ==
            value = row[node.feature]
            if (DecisionTree.isContinuous(value)
                    and value < node.featureValue) or (value
                                                       == node.featureValue):
                return self._classifyOneSample(row, node.left)
            else:
                return self._classifyOneSample(row, node.right)

    def _buildTreeThread(self, dataFrame, depth):
        ''' Build the trained decision tree using multithreading. This creates 2 working thread.
            Each one is responsible for the left and right branch of the tree.

            @TODO: UNUSED AND IMPCOMPLETE
        '''
        predictionCount = dataFrame[self._targetFeature].value_counts()

        # Stop splitting once the max depth of the tree is reached
        if (depth >= self._maxDepth):
            bestLabel, bestLabelCount = max(predictionCount.items(),
                                            key=lambda x: x[1])
            bestProb = float(bestLabelCount) / sum(predictionCount)
            return LeafNode(prediction=bestLabel, probability=bestProb)

        # Stop splitting if there's no more information to gain
        feature, featureValue, infoGain = self.findBestFeature(dataFrame)
        if (infoGain == 0):
            bestLabel, bestLabelCount = max(predictionCount.items(),
                                            key=lambda x: x[1])
            bestProb = float(bestLabelCount) / sum(predictionCount)
            return LeafNode(prediction=bestLabel, probability=bestProb)

        leftSubTree = None
        rightSubTree = None
        leftData, rightData = self.partition(dataFrame, feature, featureValue)
        if (depth == 0):
            # Start the threads asynchronously
            pool = ThreadPool(processes=2)
            t1 = pool.apply_async(self._buildTreeThread, (leftData, depth + 1))
            t2 = pool.apply_async(self._buildTreeThread,
                                  (rightData, depth + 1))
            print("waiting for threads")
            t1.wait()
            t2.wait()
            leftSubTree = t1.get()
            rightSubTree = t2.get()
        else:
            leftSubTree = self._buildTreeThread(leftData, depth + 1)
            rightSubTree = self._buildTreeThread(rightData, depth + 1)

        return DecisionNode(leftSubTree, rightSubTree, feature, featureValue)

    def _buildTree(self, dataFrame: pd.DataFrame, depth: int) -> TreeNode:
        ''' Build the trained decision tree with the given data frame
        
            @dataFrame: data frame object
            @depth: that maximum depth of the tree. Stop building the tree once
                    the depth of the tree reaches to this value.
            @return: the root node of the decision tree
        '''
        predictionCount = dataFrame[self._targetFeature].value_counts()

        # Stop splitting once the max depth of the tree is reached
        if (depth >= self._maxDepth):
            bestLabel, bestLabelCount = max(predictionCount.items(),
                                            key=lambda x: x[1])
            bestProb = float(bestLabelCount) / sum(predictionCount)
            return LeafNode(prediction=bestLabel, probability=bestProb)

        # Stop splitting if there's no more information to gain
        feature, featureValue, infoGain = self.findBestFeature(dataFrame)
        if (infoGain == 0):
            bestLabel, bestLabelCount = max(predictionCount.items(),
                                            key=lambda x: x[1])
            bestProb = float(bestLabelCount) / sum(predictionCount)
            return LeafNode(prediction=bestLabel, probability=bestProb)

        leftData, rightData = self.partition(dataFrame, feature, featureValue)

        left = self._buildTree(leftData, depth + 1)
        right = self._buildTree(rightData, depth + 1)

        return DecisionNode(left, right, feature, featureValue)

    def _countLeafNodes(self, node: TreeNode) -> int:
        ''' Helper function for counting leaf nodes 
        
            @node: root node of the decision tree node
            @return: number of leaf nodes
        '''
        if (node is None):
            return 0
        elif (type(node) is LeafNode):
            return 1
        else:
            return self._countLeafNodes(node.left) + self._countLeafNodes(
                node.right)

    def _countTreeDepth(self, node: TreeNode) -> int:
        ''' Helper function for counting the tree depth 
        
            @node: root node of the decision tree
            @return: the depth of the decision tree
        '''
        if (node is None) or (node.left == None and node.right == None):
            return 0
        else:
            return 1 + max(self._countTreeDepth(node.left),
                           self._countTreeDepth(node.right))

    def _splitByKTile(self, dataFrame: pd.DataFrame, feature: str,
                      quantiles: [int]) -> (float, float):
        ''' Split continuous feature by using k-tile method. 
        
            @dataFrame: data frame object
            @feature: the feature that is used as the splitting point
            @quantiles: list of quantiles use for determining the best quantile in that list
            @return: tuple containing best information gain and the best quantile value
        '''
        bestGain = 0.0
        bestQuantileValue = None
        currentImpurity = self.giniImpurity(dataFrame)
        quantileValues = dataFrame[feature].quantile(quantiles, "linear")

        # Find the best quantile value
        for quantileValue in quantileValues:
            leftData, rightData = self.partition(dataFrame, feature,
                                                 quantileValue)

            # If one of the splits has no elements, then the split is trivial
            if (len(leftData) == 0 or len(rightData) == 0):
                continue

            infoGain = self.informationGain(leftData, rightData,
                                            currentImpurity)
            if (infoGain > bestGain):
                bestGain = infoGain
                bestQuantileValue = quantileValue

        return bestGain, bestQuantileValue

    def _splitByMean(self, dataFrame: pd.DataFrame,
                     feature: str) -> (float, float):
        ''' Split continuous feature by using mean method.
        
            @dataFrame: data frame object
            @feature: the feature that is used as the splitting point
            @return: tuple containing best information gain and the mean
        '''
        # Use the the mean as the splitting point
        mean = dataFrame[feature].mean()
        currentImpurity = self.giniImpurity(dataFrame)

        leftData, rightData = self.partition(dataFrame, feature, mean)
        infoGain = self.informationGain(leftData, rightData, currentImpurity)
        return infoGain, mean

Example #15

Show file

class Automaton:
    """
    A Finite State Automaton (FSA), a di-graph with the following elements:
        - nodes (or states): can be initial or accepting or neither
        - edges: transitions between nodes or a self-loop edge.
        - each edge has a label from some finite alphabet

    Edges are represented by a matrix (list of lists), where each label is a list
    itself (each element is an transition rule).

    We rely on graphviz and Qt to draw an image of the FSA.

    """
    def __init__(self):
        self.nodes = []
        self.node_index = 0
        self.root = None
        self.accepting_nodes = []
        self.deleted_indices = set()
        self.esize = 100
        self.edges = [ [ [] for i in range(self.esize)] for j in range(self.esize)]
        self.graph_fp = 'graphs/dfs_' + str(round(time.time()))
        self.graph = Digraph('finite_state_machine', format='png', filename=self.graph_fp)
        self.graph.attr(rankdir='LR', size='10')        


    def __str__(self):
        return ' Root: [{}]\n Nodes [{}]: ({})\n Final [{}]: [{}]\n Edges [{}]: [{}]'.format(
            self.root.label if self.root else 'None',
            len(self.nodes),
            ', '.join(n.label for n in self.nodes),
            len(self.final_states),
            ', '.join(n.label for n in self.final_states),
            len(self.edges),
            ', '.join('({}=>{}, "{}")'.format(e.source.name, e.target.name, e.label) for e in self.edges)
                )


    def add_node(self, is_initial=False, is_final=False, label=''):
        """
        Creates a node object and make sure that the matrix self.edges is larger
        than the number of nodes.
        """
        if len(self.nodes) > self.esize:
            self.expand_edges()

        node = Node( self.node_index, label, is_initial, is_final)
        self.node_index += 1
        self.nodes.append( node )

        if not self.root or is_initial:
            self.root = node
        if is_final:
            self.accepting_nodes.append( node )
        return node


    def delete_node(self, node):
        """
        Remove node from lists and keep track of its index
        (this is necessary to ignore edges of deletd nodes later).
        """
        self.deleted_indices.add( node.index )
        self.nodes.remove(node)
        if node in self.accepting_nodes:
            self.accepting_nodes.remove(node)


    def merge_nodes(self, nodes, new_label=''):
        """
        Merges a list of nodes into a new node.
        All edges to and from the merged nodes are reassigned to the new node.

        :args:
            nodes       - list of Node objects
            new_label   - string, label of the new node
        :returns:
            new_node    - Node object
        """

        # Keep track track of the indices of the nodes to be merged, so we 
        # don't try to merge the new node as well (e.g. when it's accepting node)
        merge_indices = [n.index for n in nodes]
        #print(f'preparing to merge nodes: {merge_indices}') 

        # Create new Node object
        # If any of the old nodes is initial or accepting, inherit this property
        new_node = self.add_node(
                        is_initial=True if any([n.is_initial for n in nodes]) else False, 
                        is_final=True if any([n.is_final for n in nodes]) else False,
                        label=new_label
                        )
        new_i = new_node.index
        #print(f'created new node with index {new_i}')
        # Merge edges from nodes and delete
        for n1 in list(self.nodes):
            for n2 in list(self.nodes):
                if self.edges[n1.index][n2.index] and (n1.index in merge_indices or n2.index in merge_indices):
                    # Edge is selfloop or edge is between nodes to be merged
                    if n1.index == n2.index or (n1.index in merge_indices and n2.index in merge_indices):
                        #print('<=>', self.edges[n1.index][n2.index])
                        self.add_edges(new_node, new_node, self.edges[n1.index][n2.index])
                    # Edge is *from* nodes to be merged
                    elif n1.index in merge_indices:
                        #print('<=', self.edges[n1.index][n2.index])
                        self.add_edges(new_node, n2, self.edges[n1.index][n2.index])
                    # Edge is *to* nodes to be merged
                    elif n2.index in merge_indices:
                        #print('=>', self.edges[n1.index][n2.index])
                        self.add_edges(n1, new_node, self.edges[n1.index][n2.index])
                    # Delete old edge
                    self.edges[n1.index][n2.index] = []

        # Delete merged nodes
        for node in list(nodes):
            self.delete_node(node)
        # Update initial and final states
        if new_node.is_initial:
            self.root = new_node
        if new_node.is_final:
            self.accepting_nodes = [new_node]

        return new_node


    def add_edge(self, n1, n2, label):
        self.edges[n1.index][n2.index].append(label)


    def add_edges(self, n1, n2, labels):
        if not self.edges[n1.index][n2.index]:
            self.edges[n1.index][n2.index] = labels
        else:
            for label in labels:
                if label not in self.edges[n1.index][n2.index]:
                    self.edges[n1.index][n2.index].append(label)


    def get_edge(self, n1, n2):
        edge = self.edges[n1.index][n2.index]
        return edge if edge != [''] else None


    def delete_edge(self, n1, n2):
        self.edges[n1.index, n2.index] = ['']


    def expand_edges(self):
        """
        Replaces the current edges matrix with one twice as large.
        """
        #print(f'Expanding E matrix from {self.esize} to {self.esize*2}')
        self.esize *= 2
        new_edges = [[self.edges[i][j] if i <= self.node_index and j <= self.node_index else [] \
            for i in range(self.esize)] for j in range(self.esize)]
        self.edges = new_edges


    def show(self, title='Finite State Automaton'):
        """
        Open a QT window and draw Automaton with graphviz.
        """
        self.reset_graph()
        self.graph.render()
        App = QtWidgets.QApplication(sys.argv)
        W = QtWidgets.QWidget()
        L = QtWidgets.QLabel(W)
        L.setText("Your Finite State Automaton:")
        P = QtGui.QPixmap(self.graph_fp + '.png')
        L.setPixmap(P)
        W.setGeometry(0, 0, P.width()+100, P.height()+50)
        L.move(50,20)
        W.setWindowTitle(title)
        W.show()
        App.exec_()        


    def reset_graph(self):
        """
        Reconstruct a new graphviz graph
        """
        self.graph.clear()
        # Add all nodes
        for node in self.nodes:
            if node == self.root or node.is_initial:
                self.graph.attr('node', 
                                width='0.8', 
                                height='0.8', 
                                shape='circle', 
                                style='filled', 
                                fillcolor='yellow' )
            if node.is_final:
                self.graph.attr( 'node', 
                                shape='doublecircle', 
                                style='filled', 
                                fillcolor='lightskyblue' )
            else:
                self.graph.attr( 'node', 
                                shape='circle', 
                                style='filled', 
                                fillcolor='azure2' )
            label = '<q<SUB><FONT POINT-SIZE="10">' + str(node.index) + '</FONT></SUB>>'
            self.graph.node( str(node.index), label=label )
        # Add edges
        for i in range(self.node_index+1):
            if i not in self.deleted_indices:
                for j in range(self.node_index+1):
                    if self.edges[i][j] and j not in self.deleted_indices:
                        self.graph.edge(str(i), str(j), label=''.join(self.edges[i][j]))

Example #16

Show file

            kakari=line.split(' ')
            count=int(kakari[1])
            dst=kakari[2].replace('D','')
            chunks[count].dst=int(dst)                    #chunk.dst
            chunks[count].counter=count
        else:
            for number in range(count+1):
                if chunks[number].dst != -1:
                    chunks[chunks[number].dst].srcs.append(number)
                dg.node(''.join(chunks[number].morphs))

            for result in range(count+1):
                x="{0}{1}\t{2}{3}"
                if chunks[result].dst == -1:
                    saki=''
                else:
                    saki=''.join(chunks[chunks[result].dst].morphs)
                if saki:
                    moto=''.join(chunks[result].morphs)
                    dg.edge(moto,saki)
            if dg:
                prin+=1
                filename='nock44'+str(prin)
                dg.render(filename)
                dg.clear()

            for ketu in range(count+1):
                chunks[ketu].morphs=[]
                chunks[ketu].srcs=[]
            count=0

Example #17

Show file

File: srl_visualization.py Project: kasinxc/Visualizing-Trend-of-Key-Roles-in-News-Articles

def tree(relations, input_data_entries):
    role_to_relations_of_interest_mappings = get_role_to_relations_of_interest_mappings(
        role_of_interest, relations)

    tree_graph = Digraph(format='png')
    tree_graph.clear()
    tree_graph.attr(rankdir='LR')

    no_punctuation_input_title_desc = dict()  # tfidf use
    for ai, de in input_data_entries.items():
        no_punctuation_input_title_desc[ai] = clean_punctuation(
            de.reduced_title_desc)

    for interested_role_name, relations_of_interest in role_to_relations_of_interest_mappings.items(
    ):
        # print(UseStyle("add interested node: " + interested_role_name, fore='green'))

        verb_counts = dict()
        verb_to_other_roles_mappings = dict()  # currently just object

        object_to_tfidf_mappings_under_verb = dict(
        )  # verb -> obj -> tfidf use

        for relation in relations_of_interest:
            verb_counts, verb_to_other_roles_mappings = update_verb_to_other_roles_mappings(
                relation, verb_counts, verb_to_other_roles_mappings)

            if enable_tfidf == True:
                object_to_tfidf_mappings_under_verb = update_object_to_tfidf_mappings(
                    no_punctuation_input_title_desc, relation,
                    object_to_tfidf_mappings_under_verb)

            # get tfidf for each relation.object, if the difference falls within a certain threshold:
            # Merge them together!

        if enable_word_embedding == True:
            # print(UseStyle('Before', fore='red'))
            # print(verb_counts)
            merge_verb(verb_counts, verb_to_other_roles_mappings,
                       object_to_tfidf_mappings_under_verb)
            # print(UseStyle('After', fore='green'))
            # print(verb_counts)

        sorted_verb_counts = sorted(verb_counts.items(),
                                    key=lambda kv: (kv[1], kv[0]),
                                    reverse=True)
        # sorted_verb_counts = sorted(verb_counts.items(), key=lambda kv: kv[1], reverse=True)

        # if enable_tfidf == True:
        # print(UseStyle('This is tfidf score: ', fore='green'))
        # print(object_to_tfidf_mappings_under_verb)

        if enable_tfidf == True:
            verb_to_other_roles_mappings = merge_object(
                verb_to_other_roles_mappings,
                object_to_tfidf_mappings_under_verb)

        drew_verbs = set()
        for (verb_words, count) in sorted_verb_counts:
            verb_name = interested_role_name + '.' + verb_words
            if count >= min_verb_count_to_draw and count <= max_verb_count_to_draw:
                can_draw = False

                for other_role_words, other_role_count in verb_to_other_roles_mappings[
                        verb_words].items():
                    if other_role_count >= min_verb_other_roles_count_to_draw and other_role_count <= max_verb_other_roles_count_to_draw:
                        can_draw = True
                        break

                if can_draw:
                    tree_graph.node(interested_role_name,
                                    interested_role_name,
                                    color='red')
                    tree_graph.node(verb_name, verb_words)
                    tree_graph.edge(interested_role_name,
                                    verb_name,
                                    label=str(count))
                    drew_verbs.add(verb_words)
                    if len(drew_verbs) >= top_ranking_verbs:
                        break

        for verb_words, other_roles_count in verb_to_other_roles_mappings.items(
        ):
            if not verb_words in drew_verbs:
                continue
            verb_name = interested_role_name + '.' + verb_words
            for other_role_words, count in other_roles_count.items():
                other_role_name = verb_name + '.' + other_role_words
                if count >= min_verb_other_roles_count_to_draw and count <= max_verb_other_roles_count_to_draw:
                    tree_graph.node(other_role_name, other_role_words)
                    tree_graph.edge(verb_name,
                                    other_role_name,
                                    label=str(count))

    return tree_graph

Example #18

Show file

class DecisionTree(SupervisedModel):
    ''' '''
    ContinuousKeyFormat = "{0}{1:.3f}"

    def __init__(self, maxDepth=3, numberFeaturesToSplit=0):
        ''' Constructor '''
        if (maxDepth < 0):
            raise ValueError("Max depth must be at least 0")
        if (numberFeaturesToSplit < 0):
            raise ValueError("Number of features to split must be at least 1")

        super().__init__()
        self._maxDepth = maxDepth
        self._numberFeaturesToSplit = numberFeaturesToSplit
        self._rootNode = None
        self._diGraph = Digraph("G", format="png")
        self._dataFrame = None
        self._targetSeries = None

    @property
    def maxDepth(self) -> int:
        ''' '''
        return self._maxDepth

    @maxDepth.setter
    def maxDepth(self, value: int):
        ''' '''
        if (value < 0):
            raise ValueError("Max depth must be at least 0")
        self._maxDepth = value

    @property
    def numberFeaturesToSplit(self):
        ''' '''
        return self._numberFeaturesToSplit

    @numberFeaturesToSplit.setter
    def numberFeaturesToSplit(self, value):
        ''' '''
        if (value is not None and value < 0):
            raise ValueError("Number of features to split must be at least 1")
        self._numberFeaturesToSplit = value

    @property
    def depth(self) -> int:
        ''' Get the depth of the tree '''
        return self._countTreeDepth(self._rootNode)

    @property
    def numLeafNodes(self) -> int:
        ''' Get the number of leaf nodes in the tree '''
        return self._countLeafNodes(self._rootNode)

    @property
    def treeStructure(self):
        ''' '''
        return self._treeStructure(self._rootNode)

    @property
    def graph(self) -> Digraph:
        ''' Get the graph object representing this decision tree
        @return: a Digraph object from graphviz library
    '''
        self._diGraph.clear()
        self._generateGraph(self._rootNode)
        return self._diGraph

    @property
    def featureImportance(self) -> dict:
        ''' Return a dictionary of features and their associated importance values 
    '''
        featureImportances = {}
        self._calcFeatureImportance(self._rootNode, self._rootNode.sampleCount,
                                    featureImportances)

        # Convert dict to DataFrame, sorted by "Value" from great to small
        featureImportDf = pd.DataFrame(featureImportances.items(),
                                       columns=["Feature", "Value"])
        featureImportDf.sort_values("Value", inplace=True, ascending=False)
        featureImportDf.reset_index(drop=True, inplace=True)
        return featureImportDf

    @dc.elapsedTime
    def train(self, dataFrame, targetSeries, **kwargs):
        ''' '''
        self._dataFrame = dataFrame
        self._targetSeries = targetSeries

        features = [f for f in self._dataFrame.columns]
        self._rootNode = self.buildTree(features, self._dataFrame.index.values,
                                        0)

        self._dataFrame = None
        self._targetSeries = None

    def classify(self, dataFrame):
        ''' '''
        predictions = []
        probabilities = []

        for _, row in dataFrame.iterrows():
            prediction, probability = self.classifyOneSample(row)
            predictions.append(prediction)
            probabilities.append(probability)

        return pd.DataFrame(
            {
                "Prediction": predictions,
                "Probability": probabilities
            },
            index=dataFrame.index)

    def classifyOneSample(self, sample):
        ''' Wrapper method for _classifyOneSample(). This abstracts away the root node from being the required argument. '''
        return self._classifyOneSample(sample, self._rootNode)

    def buildTree(self, features, indices, depth):
        ''' '''
        subDataFrame = self._dataFrame.loc[indices]
        subTargetSeries = self._targetSeries.loc[indices]

        if (depth >= self._maxDepth):
            return self._constructLeafNode(subTargetSeries)

        # Consider a subset of features to split
        subsetFeatures = self.getRandomFeatures(features,
                                                self.numberFeaturesToSplit)
        bestFeature, value, infoGain = self.findBestFeature(
            subsetFeatures, indices)
        if (infoGain == 0 or bestFeature is None):
            return self._constructLeafNode(subTargetSeries)

        # Create decision node
        entropy = self.getEntropy(subTargetSeries)
        parentNode = DecisionNode(bestFeature, entropy, len(subDataFrame))
        partitions = None

        # Partition data depending on its feature type
        if (self.isNumericFeature(bestFeature, subDataFrame)):
            partitions = self.partitionContinuous(bestFeature, value,
                                                  subDataFrame)
            parentNode.numericValue = value  # Store the splitting value for numeric feature
        else:
            partitions = self.partitionCategorical(bestFeature, subDataFrame)

        # Remove the feature that we used to split
        newFeatures = [f for f in features if f != bestFeature]

        for splitValue, childIndices in partitions.items():
            childNode = self.buildTree(newFeatures, childIndices, depth + 1)
            parentNode[splitValue] = childNode

        return parentNode

    def findBestFeature(self, features, indices):
        '''  '''
        bestInfoGain = -sys.maxsize
        bestFeature = None
        bestFeatureValue = None

        parentDataFrame = self._dataFrame.loc[indices]
        parentTargetSeries = self._targetSeries.loc[indices]
        parentEntropy = self.getEntropy(self._targetSeries.loc[indices])
        parentCount = len(parentDataFrame)

        for feature in features:
            # For continuous features, we use different quantile values
            # to determine the best split value
            if (SupervisedModel.isNumericFeature(feature, parentDataFrame)):
                quantiles = [0.2, 0.4, 0.6, 0.8]
                quantileValues = parentDataFrame[feature].quantile(quantiles)

                for q in quantileValues:
                    childrenIndices = self.partitionContinuous(
                        feature, q, parentDataFrame).values()
                    infoGain = self.informationGain(
                        parentEntropy,
                        parentCount,
                        parentTargetSeries,
                        childrenIndices,
                    )

                    if (bestInfoGain < infoGain):
                        bestInfoGain = infoGain
                        bestFeature = feature
                        bestFeatureValue = q
            else:
                childrenIndices = self.partitionCategorical(
                    feature, parentDataFrame).values()
                infoGain = self.informationGain(parentEntropy, parentCount,
                                                parentTargetSeries,
                                                childrenIndices)

                if (bestInfoGain < infoGain):
                    bestInfoGain = infoGain
                    bestFeature = feature
                    bestFeatureValue = None

        return bestFeature, bestFeatureValue, bestInfoGain

    def printTreeStructure(self):
        ''' '''
        def _printTreeStructure(tabSpace, value):
            for k, v in value.items():
                print(tabSpace, k, sep="")
                _printTreeStructure(tabSpace + tabSpace, v)

        _printTreeStructure(" ", self.treeStructure)

    def save(self, filePath, fileFormat):
        ''' Save the graph in a file in the format specified by the parameter fileFormat (pdf, png, etc) '''
        # graphviz includes the format extension after it saves the graph,
        # we need to remove the file extension from filePath
        fileName = os.path.splitext(filePath)[0]
        savedFilePath = self.graph.render(fileName,
                                          format=fileFormat,
                                          cleanup=True)
        return os.path.abspath(savedFilePath)

    @staticmethod
    def _constructContinuousKey(greatOrLessSign, value):
        ''' '''
        if (greatOrLessSign != "<" and greatOrLessSign != ">"
                and greatOrLessSign != "<=" and greatOrLessSign != ">="):
            raise ValueError(
                "Incorrect inequality sign. Must be either: <, >, <=, or >=.")

        return DecisionTree.ContinuousKeyFormat.format(greatOrLessSign, value)

    @staticmethod
    def partitionCategorical(feature, dataFrame):
        ''' '''
        return dataFrame.groupby(feature).groups

    @staticmethod
    def partitionContinuous(feature, value, dataFrame):
        ''' '''
        leftIndices = dataFrame[dataFrame[feature] < value].index.values
        rightIndices = dataFrame[dataFrame[feature] >= value].index.values

        partitions = {}
        if (len(leftIndices) > 0):
            leftKey = DecisionTree._constructContinuousKey("<", value)
            partitions[leftKey] = leftIndices

        if (len(rightIndices) > 0):
            rightKey = DecisionTree._constructContinuousKey(">=", value)
            partitions[rightKey] = rightIndices

        return partitions

    @staticmethod
    def informationGain(parentEntropy, parentCount, targetSeries,
                        childrenIndices):
        ''' '''
        childrenEntropy = 0
        for childIndices in childrenIndices:
            probability = len(childIndices) / parentCount
            childEntropy = DecisionTree.getEntropy(targetSeries[childIndices])
            childrenEntropy += (probability * childEntropy)
        return parentEntropy - childrenEntropy

    @staticmethod
    def getEntropy(series):
        ''' '''
        seriesCount = len(series)
        if (seriesCount == 0):
            return 0

        resultEntropy = 0
        for _, count in series.value_counts().items():
            probability = count / float(seriesCount)
            if (probability > 0):
                resultEntropy += probability * math.log(probability, 2)

        # Add 0 because we may get "-0" entropy. This is for display 0 without the - sign
        return -resultEntropy + 0

    @staticmethod
    def getGiniImpurity(series):
        ''' @TODO '''
        seriesCount = float(len(series))
        if (seriesCount == 0):
            return 0

        impurity = 0
        for _, count in series.value_counts().items():
            probability = count / seriesCount
            impurity += probability * (1 - probability)

        return impurity

    @staticmethod
    def _constructLeafNode(series):
        ''' '''
        predictionCount = series.value_counts()
        bestLabel, bestLabelCount = max(predictionCount.items(),
                                        key=lambda x: x[1])
        bestProb = float(bestLabelCount) / sum(predictionCount)
        entropy = DecisionTree.getEntropy(series)
        return LeafNode(bestLabel, bestProb, entropy, len(series))

    def _generateGraph(self, node, nodeId=0):
        ''' Generate the decision tree graph. 
    
      @node: the root node of the decision tree
      @nodeId: use to help assign unique id to each node.
      @return: the most current node ID. This is only used to keep track of the most current node ID.
    '''
        if (node is None):
            return nodeId

        # If the root node is a leaf node
        if (type(node) is LeafNode):
            self._addNode(LeafNode, nodeId, str(node))
            return nodeId

        # Decision Node starts here
        self._addNode(DecisionNode, nodeId, str(node))
        childNodeId = nodeId + 1

        for featureValue, childNode in node.items():
            if (type(childNode) is LeafNode):
                self._addNode(LeafNode, childNodeId, str(childNode))
                self._addEdge(nodeId, childNodeId, featureValue)
                childNodeId += 1
            else:
                # Node ID will be updated through recursion so we need to save it simply by returning the most current node ID
                currentChildNodeId = self._generateGraph(
                    childNode, childNodeId)
                self._addEdge(nodeId, childNodeId, featureValue)
                childNodeId = currentChildNodeId

        return childNodeId

    def _addEdge(self, fromId, toId, nodeLabel):
        ''' '''
        self._diGraph.edge(str(fromId), str(toId), label=nodeLabel)

    def _addNode(self, nodeType, nodeId, nodeLabel):
        ''' '''
        if (nodeType is LeafNode):
            self._diGraph.node(str(nodeId), nodeLabel, color="red")
        elif (nodeType is DecisionNode):
            self._diGraph.node(str(nodeId), nodeLabel)
        else:
            raise ValueError("Invalid node type \"{0}\"".format(nodeType))

    @staticmethod
    def getRandomFeatures(features, numberFeatures):
        ''' Get m random features that we consider to split at each level of the tree 

      @features: list of features
      @numberFeatures: number of features that we want to use. 0 means use all features
    '''
        if (numberFeatures < 0):
            raise ValueError("numberFeatures must be greather or equal to 0")

        randomFeatures = None

        # If 0 then return the same features list OR
        # If the size of the features is <= than the number of features that
        # we want to split, then we simply return all features
        if (numberFeatures == 0 or len(features) <= numberFeatures):
            randomFeatures = features
        else:
            randomFeatures = np.random.choice(features,
                                              size=numberFeatures,
                                              replace=False)

        return randomFeatures

    def _classifyOneSample(self, sample, node):
        ''' Classify only 1 sample of data. Use recursion. '''
        if (type(node) is LeafNode):
            return node.prediction, node.probability
        else:
            sampleValue = sample[node.feature]

            # Check if this split is from continous feature
            if (node.numericValue is not None):
                # Numeric feature only has 2 branches
                key = None
                if (sampleValue < node.numericValue):
                    key = self._constructContinuousKey("<", node.numericValue)
                else:
                    key = self._constructContinuousKey(">=", node.numericValue)

                return self._classifyOneSample(sample, node[key])

            else:  # Categorical feature
                if (sampleValue not in node.keys()):
                    # Temporarily comment this message print because it will be printed a lot
                    # msg = f"Encounter unknown value {sampleValue} of feature {node.feature}. " + \
                    #      "Reason is a training node does NOT contain this value. " + \
                    #      f"The node contains these values for feature {node.feature}: {list(node.keys())}"
                    # print(msg)

                    # Since the node doesn't contain the unknown value,
                    # we take the the most voted prediction from the
                    # sibling nodes and average the probabilities
                    majorityVotes = {}

                    # Get the prediction and probability from the "siblings'" predictions
                    for v in node.values():
                        prediction, probability = self._classifyOneSample(
                            sample, v)

                        if (prediction not in majorityVotes):
                            majorityVotes[prediction] = {
                                "count": 0,
                                "avgProb": 0
                            }

                        # Update a prediction count so we know which one has the highest count
                        majorityVotes[prediction]["count"] += 1
                        majorityVotes[prediction]["avgProb"] += probability

                    # Average probability for each prediction
                    for prediction, value in majorityVotes.items():
                        value["avgProb"] /= value["count"]

                    # Get the prediction that has the highest count. If there are multiple n highest counts,
                    # then get the prediction that has a higher average probability. Else, get whichever one
                    bestLabel, countAndProb = max(
                        majorityVotes.items(),
                        key=lambda kv: (kv[1]["count"], kv[1]["avgProb"]))
                    return bestLabel, countAndProb["avgProb"]

                return self._classifyOneSample(sample, node[sampleValue])

    def _treeStructure(self, node):
        ''' '''
        if (node is None):
            return {}

        # Check if the current node only contains leaf nodes
        hasOnlyLeafNodes = len(
            [v for v in node.values() if type(v) is DecisionNode]) == 0
        if (hasOnlyLeafNodes):
            return {node.feature: {}}
        else:
            structure = {node.feature: {}}
            for i, (featureValue, childNode) in enumerate(node.items()):
                if (type(childNode) is not LeafNode):
                    childNodeFeature, childNodeChildren = list(
                        self._treeStructure(childNode).items())[0]
                    key = f"{featureValue} --> {childNodeFeature}"
                    structure[node.feature][key] = childNodeChildren
                else:
                    key = f"Leaf Node {i}"
                    structure[node.feature][key] = {}

            return structure

    def _countLeafNodes(self, node) -> int:
        ''' Helper function for counting leaf nodes 
    
      @node: root node of the decision tree node
      @return: number of leaf nodes
    '''
        if (node is None):
            return 0
        elif (type(node) is LeafNode):
            return 1
        else:
            count = 0
            for featureValue in node.keys():
                count += self._countLeafNodes(node[featureValue])
            return count

    def _countTreeDepth(self, node) -> int:
        ''' Helper function for counting the tree depth 
    
      @node: root node of the decision tree
      @return: the depth of the decision tree
    '''
        if (node is None) or (type(node) is LeafNode) or (len(node.keys())
                                                          == 0):
            return 0
        else:
            deepestDepth = 0
            for featureValue in node.keys():
                depth = 1 + self._countTreeDepth(node[featureValue])
                if (deepestDepth < depth):
                    deepestDepth = depth
            return deepestDepth

    @staticmethod
    def _calcFeatureImportance(node, totalSampleCount, featureImportances):
        ''' Compute the feature importance of the given node and its descendant nodes recursively 

        Feature importance is calculated by 
          (currentNode.sampleCount / totalSampleCount) 
    '''
        if (isinstance(node, DecisionNode)):
            # Compute the importance value for the current node
            childrenImpurity = 0
            for childNode in node.values():
                if (isinstance(childNode, DecisionNode)):
                    childrenImpurity += childNode.entropy * (
                        childNode.sampleCount / node.sampleCount)
            importance = (node.sampleCount /
                          totalSampleCount) * (node.entropy - childrenImpurity)

            # Store it in the dictionary
            if (node.feature not in featureImportances):
                featureImportances[node.feature] = 0
            featureImportances[node.feature] += importance

            # Recursively compute the descendant nodes' importance value
            for childNode in node.values():
                DecisionTree._calcFeatureImportance(childNode,
                                                    totalSampleCount,
                                                    featureImportances)

    def __repr__(self):
        ''' '''
        s = "Decision Tree | Depth={0} | Number of Leaf Nodes={1} | Number of features to split={2}"
        return s.format(self.depth, self.numLeafNodes,
                        self.numberFeaturesToSplit)

Example #19

Show file

File: srl_visualization.py Project: kasinxc/Semantic-Role-Labeling-For-News

def tree(relations):
    role_to_relations_of_interest_mappings = get_role_to_relations_of_interest_mappings(
        role_of_interest, relations)

    tree_graph = Digraph(format='png')
    tree_graph.clear()
    tree_graph.attr(rankdir='LR')
    for interested_role_name, relations_of_interest in role_to_relations_of_interest_mappings.items(
    ):

        print(
            UseStyle("add interested node: " + interested_role_name,
                     fore='green'))
        verb_counts = dict()
        verb_to_other_roles_mappings = dict()

        other_labels = set()  # record ARG-TMP and other labels
        for relation in relations_of_interest:
            other_roles = list()
            for role in relation:
                if role.label == 'V':
                    verb_role = role
                elif role.label == 'ARG1':
                    other_roles.append(role.words)
                else:
                    other_labels.add(role.label)

            if not verb_role.words in verb_counts:
                verb_counts[verb_role.words] = 1
            else:
                verb_counts[verb_role.words] += 1
            if not verb_role.words in verb_to_other_roles_mappings:
                verb_to_other_roles_mappings[verb_role.words] = dict()
            for other_role_words in other_roles:
                if not other_role_words in verb_to_other_roles_mappings[
                        verb_role.words]:
                    verb_to_other_roles_mappings[
                        verb_role.words][other_role_words] = 1
                else:
                    verb_to_other_roles_mappings[
                        verb_role.words][other_role_words] += 1

        sorted_verb_counts = sorted(verb_counts.items(),
                                    key=lambda kv: kv[1],
                                    reverse=True)

        drew_verbs = set()
        for (verb_words, count) in sorted_verb_counts:
            verb_name = interested_role_name + '.' + verb_words
            if count >= min_verb_count_to_draw and count <= max_verb_count_to_draw:
                can_draw = False

                for other_role_words, other_role_count in verb_to_other_roles_mappings[
                        verb_words].items():
                    if other_role_count >= min_verb_other_roles_count_to_draw and other_role_count <= max_verb_other_roles_count_to_draw:
                        can_draw = True
                        break

                if can_draw:
                    tree_graph.node(interested_role_name,
                                    interested_role_name,
                                    color='red')
                    tree_graph.node(verb_name, verb_words)
                    tree_graph.edge(interested_role_name,
                                    verb_name,
                                    label=str(count))
                    drew_verbs.add(verb_words)
                    if len(drew_verbs) >= top_ranking_verbs:
                        break

        for verb_words, other_roles_count in verb_to_other_roles_mappings.items(
        ):
            if not verb_words in drew_verbs:
                continue
            verb_name = interested_role_name + '.' + verb_words
            for other_role_words, count in other_roles_count.items():
                other_role_name = verb_name + '.' + other_role_words
                if count >= min_verb_other_roles_count_to_draw and count <= max_verb_other_roles_count_to_draw:
                    tree_graph.node(other_role_name, other_role_words)
                    tree_graph.edge(verb_name,
                                    other_role_name,
                                    label=str(count))

    return tree_graph

Example #20

Show file

File: AFND.py Project: asdf1234Damian/Compilers

class Automata:
    #Variable estatica de la clase para que los nodos sigan una secuencia
    nxtNode = 0

    #Si existe un path, se crea basado en el archivo
    def __init__(self, exp, path=None):
        #Inicializacion de variables
        self.exp = exp
        self.G = Digraph()
        self.estados = {}  # Enteros
        self.alf = set()
        #Caso, crear de archivo
        if path:
            #La primer linea del archivo es el alfabeto
            f = open(path, "r").readlines()
            self.alf = f[0].split()
            # El estado inicial esta en la linea 2 en la segunda posicion
            self.inicial = f[1].split()[0]
            #las lineas de 1 en adelante son los estados y transicones
            for linea in f[1:]:
                linea = linea.split()
                #El ultimo elemento de la linea es el token; numero entero positivo, si es un -, entonces no es final y no le corresponde un token
                terminal = linea[-1] != '-1'
                # El segundo es el nombre del estado o nodo
                nodeName = linea[0]
                #Crea el estado
                self.estados[nodeName] = Estado(terminal)
                for i in range(len(self.alf)):
                    if linea[i + 1] != '-1':
                        simb = self.alf[i]
                        fin = 'S' + linea[i + 1]
                        self.estados[nodeName].addTransicion(simb, simb, fin)
            return
        #Creado desde la expresion
        self.inicial = Automata.nxtNode
        self.final = Automata.nxtNode + 1
        self.estados[self.inicial] = Estado(False)
        self.estados[self.final] = Estado(True)
        Automata.nxtNode += 2
        #Caso basico
        if len(exp) == 1:
            self.alf = {exp}
        else:
            #Rangos
            if exp.count('-'):
                inicio, fin = [ord(x) for x in exp.split('-')]
                for simb in range(inicio, fin + 1):
                    if simb in range(inicio, fin + 1) and chr(simb).isalnum():
                        self.alf.add(chr(simb))
            #Separado por comas
            else:
                self.alf = set(exp.split(','))
        self.estados[self.inicial].addTransicion(exp, self.alf, self.final)

    #Funcion para imprimir los estados y transiciones del automata
    def print(self):
        for origen, destino in self.estados.items():
            print('Origen: ', origen)
            for exp, trns in destino.transiciones.items():
                print('\t',
                      exp,
                      ':= {',
                      ','.join(trns.simbolos),
                      '} ->',
                      trns.destinos,
                      sep='')

    #Funcion para crear la imagen dado el nombre del archivo. Se guarda en images
    def plot(self, path):
        self.G.clear()
        #Esto se cambia para cambiar el tamaño de la imagen
        self.G.attr(ratio='fill',
                    size='3.8,2.77',
                    dpi='300',
                    rank='same',
                    rankdir='LR')
        self.G.edge('S', str(self.inicial))
        for origin, dest in self.estados.items():
            if self.estados[origin].final:
                self.G.node(str(origin), shape='doublecircle')
            for exp, trns in dest.transiciones.items():
                for dest in trns.destinos:
                    self.G.edge(str(origin), str(dest), label=exp)
        self.G.node(
            'S',
            label=None,
            shape='point',
        )
        self.G.render(filename=path,
                      view=False,
                      directory='images',
                      cleanup=True,
                      format='png')

    # Regresa los estados alcanzables por transiciones epsilon desde cualquier estado en edos
    def cEpsilon(self, edos):
        res = []
        i = 0
        while i != len(edos):
            edo = self.estados[edos[i]]
            if EPS in edo.transiciones.keys():
                for d in edo.transiciones[EPS].destinos:
                    if not d in edos:
                        edos.append(d)
                        res.append(d)
            res.append(edos[i])
            i += 1
        return res

    def moverA(self, edos, s):  # edos debe ser un set o lista
        stack = []
        result = set()
        stack = list(edos)
        for edo in stack:
            if edo in self.estados.keys():
                for tr in self.estados[edo].transiciones.values():
                    if s in tr.simbolos:
                        result = result.union(set(tr.destinos))
        return list(result)

    def irA(self, edos, s):
        return self.cEpsilon(self.moverA(edos, s))

    def pertenece(self, sigma):
        edos = [self.inicial]
        for s in sigma:
            edos = self.irA(self.cEpsilon(edos), s)
            if (len(edos) == 0):
                return False
        if isinstance(self.final, int):
            if self.final in edos:
                return True
        else:
            if len(set(self.final).intersection(edos)):
                return True
        return False

    def opcional(self):  # ε
        # Se crean los nuevos estados iniciales y finales
        self.exp = '(' + self.exp + ')' '+eps'
        nInicial = Automata.nxtNode
        nFinal = Automata.nxtNode + 1
        Automata.nxtNode += 2
        self.estados[nInicial] = Estado(False)
        self.estados[nFinal] = Estado(True)
        # El nuevo inicial apunta al inicial original y al nuevo final
        self.estados[nInicial].addEpsTrans(self.inicial)
        self.estados[nInicial].addEpsTrans(nFinal)
        self.estados[self.final].addEpsTrans(nFinal)
        self.estados[self.final].final = False
        # Se actualizan los estados iniciales y finales
        self.inicial = nInicial
        self.final = nFinal

    def concat(self, f2):
        self.exp = '(' + self.exp + ')' + f2.exp
        # Se copian todos los estados con sus transiciones
        self.estados.update(f2.estados)
        # Se copian el alfabeto
        self.alf = self.alf.union(f2.alf)
        # Los concatena y se elimina el estado sobrante de f2
        self.estados.pop(f2.inicial, None)
        self.estados[self.final].transiciones.update(
            f2.estados[f2.inicial].transiciones)
        # Cambia los estados finales e iniciales
        self.estados[self.final].final = False
        self.final = f2.final

    def unirM(self, automatas):
        self.exp = ''
        finales = [self.final]
        #Crea el nuevo estado inicial
        nInicial = Automata.nxtNode
        Automata.nxtNode += 1
        self.estados[nInicial] = Estado(False)
        self.estados[nInicial].addEpsTrans(self.inicial)
        self.inicial = nInicial
        #Copia transiciones
        for a in automatas:
            self.exp += a.exp
            #Compia los simbolos de todos los automatas
            self.alf = self.alf.union(a.alf)
            #Une el nuevo inicial a todos los otros iniciales
            self.estados[nInicial].addEpsTrans(a.inicial)
            #Copia los estados y transicione
            self.estados[nInicial]
            self.estados.update(a.estados)
            finales.append(a.final)
        self.final = finales

    def unir(self, f2):
        # Se actualiza la expresion
        self.exp = '(' + self.exp + ')' + '+' + f2.exp
        # Se copian todos los estados con sus transiciones
        self.estados.update(f2.estados)
        # Se copian el alfabeto
        self.alf = self.alf.union(f2.alf)
        # Se crean los nuevos estados iniciales y finales
        nInicial = Automata.nxtNode
        nFinal = Automata.nxtNode + 1
        Automata.nxtNode += 2
        self.estados[nInicial] = Estado(False)
        self.estados[nFinal] = Estado(True)
        # Se unen a los dos automatas con los nuevso estados
        self.estados[nInicial].addEpsTrans(self.inicial)
        self.estados[nInicial].addEpsTrans(f2.inicial)
        self.estados[self.final].addEpsTrans(nFinal)
        self.estados[f2.final].addEpsTrans(nFinal)
        # Cambia los estados finales e iniciales
        self.estados[f2.final].final = False
        self.estados[self.final].final = False
        # se actualizan los estados finales e inciales
        self.inicial = nInicial
        self.final = nFinal

    def cerradura_positiva(self):
        # Se crean los nuevos estados iniciales y finales
        self.exp = '(' + self.exp + ')^+'
        nInicial = Automata.nxtNode
        nFinal = Automata.nxtNode + 1
        Automata.nxtNode += 2
        self.estados[nInicial] = Estado(False)
        self.estados[nFinal] = Estado(True)
        # El nuevo inicial apunta al inicial original y el final original apunta al inicial original
        self.estados[nInicial].addEpsTrans(self.inicial)
        self.estados[self.final].addEpsTrans(self.inicial)
        self.estados[self.final].addEpsTrans(nFinal)
        self.estados[self.final].final = False
        # Se actualizan los estados iniciales y finales
        self.inicial = nInicial
        self.final = nFinal

    def cerradura_kleene(self):
        self.exp += '^k'
        # Se crean los nuevos estados iniciales y finales
        nInicial = Automata.nxtNode
        nFinal = Automata.nxtNode + 1
        Automata.nxtNode += 2
        self.estados[nInicial] = Estado(False)
        self.estados[nFinal] = Estado(True)
        # El nuevo inicial apunta al inicial original y al nuevo final, y el final original apunta al inicial original
        self.estados[nInicial].addEpsTrans(self.inicial)
        self.estados[nInicial].addEpsTrans(nFinal)
        self.estados[self.final].addEpsTrans(self.inicial)
        self.estados[self.final].addEpsTrans(nFinal)
        self.estados[self.final].final = False
        # Se actualizan los estados iniciales y finales
        self.inicial = nInicial
        self.final = nFinal

    def conversion_A_Archivo(self, path):
        #Checa si solo hay un estado final
        if isinstance(self.final, int):
            self.final = {self.final}
        with open(path, "w") as file:
            #Inicializa la tabla y el indice para recorrerla
            S = [self.cEpsilon([self.inicial])]
            currS = 0
            #Imprime el alfabeto primero
            file.writelines(' '.join(self.alf) + '\n')
            #Mientras no haya llegado al ultimo estado
            while currS != len(S):
                #Guarda el nuevo estado
                si = S[currS]
                #Se imprime el nombre del nuevo estado
                file.write('S' + str(currS) + ' ')
                #Sj es el resultado de irA de si con
                #cada simbolo del alfabeto
                for simb in self.alf:
                    sj = self.irA(si, simb)
                    #Se guarda sj en caso de que no este y despues se
                    #imprime el indice respectivo
                    if len(sj):
                        if not sj in S:
                            S.append(sj)
                        file.write(str(S.index(sj)) + ' ')
                    else:  #Si no, si no tiene transicion a sj
                        file.writelines('-1 ')
                if set(si).intersection(self.final):
                    file.writelines(str((currS + 1) * 10) + '\n')
                else:
                    file.write('-1\n')
                currS += 1

Example #21

Show file

File: graph_catalog.py Project: informatics-isi-edu/deriva-catalog-manage

class DerivaCatalogToGraph:
    def __init__(self, catalog, engine='dot'):
        self.graph = Digraph(
            engine=engine,
            format='pdf',
            edge_attr=None,
            strict=True,
        )

        self.catalog = catalog
        self._model = catalog.getCatalogModel()
        self._chaise_base = "https://{}/chaise/recordset/#{}/".format(
            urlparse(catalog.get_server_uri()).netloc, self.catalog.catalog_id)

        self.graph.attr('graph', rankdir='LR')
        self.graph.attr('graph', overlap='false', splines='true')
        #self.graph.attr('graph', concentrate=True)

    def clear(self):
        self.graph.clear()

    def view(self):
        self.graph.view()

    def catalog_to_graph(self, schemas=None, skip_terms=False, skip_association_tables=False):
        """
        Convert a catalog to a DOT based graph.
        :param schemas:  List of schemas that should be included.  Use whole catalog if None.
        :param skip_terms: Do not include term tables in the graph
        :param skip_association_tables: Collapse association tables so that only edges between endpoints are used
        :return:
        """

        schemas = [s.name for s in self._model.schemas.values() if s.name not in ['_acl_admin', 'public', 'WWW']] \
            if schemas is None else schemas

        for schema in schemas:
            self.schema_to_graph(schema, skip_terms=skip_terms, schemas=schemas,
                                 skip_association_tables=skip_association_tables)

    def schema_to_graph(self, schema_name, schemas=[], skip_terms=False, skip_association_tables=False):
        """
        Create a graph for the specified schema.
        :param schema_name: Name of the schema in the model to be used.
        :param schemas: List of additional schemas to include in the graph.
        :param skip_terms:
        :param skip_association_tables:
        :return:
        """

        schema = self._model.schemas[schema_name]

        # Put nodes for each schema in a seperate subgraph.
        with self.graph.subgraph(name='cluster_' + schema_name, node_attr={'shape': 'box'}) as schema_graph:
            schema_graph.attr(style='invis')
            for table in schema.tables.values():
                node_name = '{}_{}'.format(schema_name, table.name)
                if DerivaCatalogToGraph._is_vocabulary_table(table):
                    if not skip_terms:
                        schema_graph.node(node_name, label='{}:{}'.format(schema_name, table.name),
                                          shape='ellipse',
                                          URL=self._chaise_uri(table))
                else:
                    # Skip over current table if it is a association table and option is set.
                    if not (table.is_association() and skip_association_tables):
                        schema_graph.node(node_name, label='{}:{}'.format(schema_name, table.name),
                                          shape='box',
                                          URL=self._chaise_uri(table))
                    else:
                        print('Skipping node', node_name)

        # We have all the nodes out now, so run over and add edges.
        for table in schema.tables.values():
            self.foreign_key_defs_to_graph(table,
                                           skip_terms=skip_terms,
                                           schemas=schemas,
                                           skip_association_tables=skip_association_tables)
        return

    def foreign_key_defs_to_graph(self, table, skip_terms=False, skip_association_tables=False, schemas=[]):
        """
        Add edges for each foreign key relationship in the specified table.
        :param table:
        :param skip_terms:
        :param skip_association_tables:
        :param skip_schemas:
        :return:
        """

        # If table is an association table, put in a edge between the two endpoints in the relation.
        if table.is_association() == 2 and skip_association_tables:
            t1 = table.foreign_keys[0].referenced_columns[0].table
            t2 = table.foreign_keys[1].referenced_columns[0].table
            t1_name = '{}_{}'.format(t1.schema.name, t1.name)
            t2_name = '{}_{}'.format(t2.schema.name, t2.name)
            self.graph.edge(t1_name, t2_name, dir='both', color='gray')
        else:
            for fkey in table.foreign_keys:
                referenced_table = list(fkey.column_map.values())[0].table
                table_name = '{}_{}'.format(referenced_table.schema.name, referenced_table.name)

                # If the target is a schema we are skipping, do not add an edge.
                if (referenced_table.schema.name not in schemas or table.schema.name not in schemas):
                    continue
                # If the target is a term table, and we are not including terms, do not add an edge.
                if DerivaCatalogToGraph._is_vocabulary_table(referenced_table) and skip_terms:
                    continue

                # Add an edge from the current node to the target table.
                self.graph.edge('{}_{}'.format(table.schema.name, table.name), table_name)

        return

    def save(self, filename=None, format='pdf', view=False):
        (dir, file) = os.path.split(os.path.abspath(filename))
        if 'gv' in format:
            self.graph.save(filename=file, directory=dir)
        else:
            print('dumping graph in file', file, format)
            self.graph.render(filename=file, directory=dir, view=view, cleanup=True, format=format)

    def _repr_svg_(self):
        return self.graph._repr_svg_()

    @staticmethod
    def _is_vocabulary_table(t):
        if t.schema.name.lower() in 'vocabulary':
            return True
        try:
            return t.columns['ID'] and t.columns['Name'] and t.columns['URI'] and t.columns['Synonyms']
        except KeyError:
            return False

    def _chaise_uri(self, table):
        return self._chaise_base + "{}:{}".format(table.schema.name, table.name)

Example #22

Show file

File: gvtest.py Project: mwerevu/procflows

specdot.view()


#### NOTE: Doesn't do outputs yet!

sys.exit("Stopping early")

for idx, edge in dfi2.iterrows():
    #print edge['process_id'], edge['input_process_id']
    if edge['input_process_id'] == process or edge['process_id'] == process:
        specdot.edge(edge['input_process_id'],edge['process_id'],label=edge['instgen'])

    print(specdot.source)
    specdot.render("".join(['./',process,'_processflow.gv']),view=True)
    specdot.clear()



### Process connection graph, specific process
dfi2 = dfi.drop_duplicates(['process_id', 'input_process_id','instgen'])
specdot = Digraph(comment="Process Flow")
specdot.attr('Node',shape='box')

for process in pd.unique(dfi['process_id']):
    for idx, edge in dfi2.iterrows():
        #print edge['process_id'], edge['input_process_id']
        if edge['input_process_id'] == process or edge['process_id'] == process:
            specdot.edge(edge['input_process_id'],edge['process_id'],label=edge['instgen'])

    print(specdot.source)

Example #23

Show file

File: ros_mrcnn.py Project: EricssonResearch/scott-eu

class ros_mask_rcnn:
    def __init__(self):

        # Load model
        config = InferenceConfig()
        config.display()

        self.model = modellib.MaskRCNN(mode="inference",
                                       model_dir=LOG_DIR,
                                       config=config)

        self.model.load_weights(MODEL_PATH, by_name=True)
        self.dot = Digraph(comment='warehouse', format='svg')

        # Set topics
        self.bridge = CvBridge()
        self.check = False
        self.to_display = True

        self.to_display = rospy.get_param('/mrcnn/display_results', False)

        #option = input("Do you want to display the inference result and scene graph? (yes/no): ")
        #if option.lower() != 'yes':
        #    self.to_display = False

        # Use ApproximateTimeSynchronizer if depth and rgb camera doesn't havse same timestamp, otherwise use Time Synchronizer if both cameras have same timestamp.
        self.image_sub = message_filters.Subscriber(
            '/turtlebot2i/camera/rgb/raw_image', Image)
        self.image_depth_sub = message_filters.Subscriber(
            '/turtlebot2i/camera/depth/raw_image', Image)
        #self.ts = message_filters.TimeSynchronizer([self.image_sub, self.image_depth_sub], queue_size=1)
        self.ts = message_filters.ApproximateTimeSynchronizer(
            [self.image_sub, self.image_depth_sub], 10, 0.1)
        self.ts.registerCallback(self.callback)

        self.image_pub = rospy.Publisher("/turtlebot2i/mrcnn_out",
                                         Image,
                                         queue_size=1)
        self.scenegraph_pub = rospy.Publisher('/turtlebot2i/scene_graph',
                                              SceneGraph,
                                              queue_size=10)
        self.time_start_list = []
        self.time_sg_end_list = []
        self.time_all_end_list = []

    def get_overlap_bbox(self, rec1, rec2):
        #y1, x1, y2, x2 = boxes
        y1min, x1min, y1max, x1max = rec1[0], rec1[1], rec1[2], rec1[3]
        y2min, x2min, y2max, x2max = rec2[0], rec2[1], rec2[2], rec2[3]
        # s.view()

        box1 = box(x1min, y1min, x1max, y1max)
        box2 = box(x2min, y2min, x2max, y2max)
        isOverlapping = box1.intersects(box2)
        intersection_area = box1.intersection(box2).area / box1.area * 100
        #print (pol_overl, intersection_area)
        return isOverlapping, intersection_area

        #isOverlapping = (i[1] < j[3] and j[1] < i[3] and i[0] < j[2] and j[0] < i[2])
        #isOverlapping = (x1min < x2max and x2min < x1max and y1min < y2max and y2min < y1max)
        #print (isOverlapping)
        #return isOverlapping

    def get_type(self, i):
        if re.match(r'Wall', i):
            obj_type = 3  #wall
        elif re.match(r'Human', i):
            obj_type = 2  #human
        elif re.match(r'robot', i):
            obj_type = 1  # robot # non-human dynamic objects
        else:
            obj_type = 0  # static objects
        return obj_type

    def callback(self, image, depth_image):

        try:
            self.time_start_list.append(time.time())
            farClippingPlane = 3.5
            nearClippingPlane = 0.0099999
            cv_depth_image = self.bridge.imgmsg_to_cv2(depth_image,
                                                       "passthrough")
            cv_depth_image = cv2.flip(cv_depth_image, 0)

            #print ("Depth Image size: ", cv_depth_image.shape)
            #print ('min', min(cv_depth_image))
            #cv2.imshow("depth image", cv_depth_image)
            #cv2.waitKey(0)

            cv_depth_image = nearClippingPlane + (
                cv_depth_image * (farClippingPlane - nearClippingPlane))
            #print ("Depth Image size: ", cv_depth_image.shape)
            if self.check == True:
                self.dot.clear()
            end = time.time()
            cv_image = self.bridge.imgmsg_to_cv2(image, "rgb8")
            results = self.model.detect([cv_image], verbose=1)

            r = results[0]
            #if self.to_display == True:
            img_out = display_instances(cv_image,
                                        r['rois'],
                                        r['masks'],
                                        r['class_ids'],
                                        class_names,
                                        r['scores'],
                                        show_window=self.to_display)

            #if len(r['class_ids']) > 0:

            count_objects = [0] * len(class_names)
            detected_objects = []
            distances_from_mask = []
            cropped_roi_distances = []

            for i in range(len(r['class_ids'])):
                detected_objects.append(class_names[r['class_ids'][i]] + '#' +
                                        str(count_objects[r['class_ids'][i]]))
                count_objects[r['class_ids'][i]] += 1
                print('Object : ', r['class_ids'][i], detected_objects[i],
                      r['rois'][i])

            self.dot.node_attr['shape'] = 'record'
            #robot_velocity = get_velocity(robot_list[robot_num])
            #robot_label = '{%s|%s|velocity: %.2f}'%(robot_list[robot_num].name, robot_list[robot_num].vision_sensor.name, robot_velocity)
            robot_label = "turtlebot2i"

            self.dot.node('robot', label=robot_label)
            self.dot.node('warehouse', label='warehouse')
            self.dot.node('floor', label='{floor|size: 25*25}')
            self.dot.edge('warehouse', 'floor')

            scene_dot = Digraph(comment='warehouse', format='svg')
            scene_dot.node_attr['shape'] = 'record'
            scene_dot.node('robot', label=robot_label)
            scene_dot.node('warehouse', label='warehouse')
            scene_dot.node('floor', label='{floor|size: 25*25}')
            scene_dot.edge('warehouse', 'floor')

            for i in range(len(r['class_ids'])):
                #_id = r['class_ids'][i]
                node_label = detected_objects[i]
                direction = 0
                y1min, x1min, y1max, x1max = r['rois'][i][0], r['rois'][i][
                    1], r['rois'][i][2], r['rois'][i][3]
                distances_from_mask.append(cv_depth_image[r['masks'][:, :, i]])
                min_distance = min(distances_from_mask[i])
                #min_index = distances_from_mask[i].index(min(min_distance))
                #min_indices = [i for i, x in enumerate(distances_from_mask[i]) if x == min_distance]

                min_indices = np.where(
                    np.array(distances_from_mask[i]) == min_distance)[0]
                #print ('Min Index : ', min_indices[0], ' Min distance: ', min_distance)

                #print ('Mask Shape: ',r['masks'][:,:,i].shape)
                #print ('Mask Shape: ',r['masks'][:,:,i])

                cropped_roi_distances.append(cv_depth_image[y1min:y1max,
                                                            x1min:x1max])

                if re.match(r'Wall*', detected_objects[i]):
                    self.dot.node(detected_objects[i], label=node_label)
                    self.dot.edge('warehouse', detected_objects[i], label='on')

                    node_label_scene_graph = '{%s|type: %s|distance: %.2f|orientation: %.2f|direction: %.2f|velocity: %.2f|size_x: %.2f|size_y: %.2f}' % (
                        detected_objects[i], self.get_type(
                            detected_objects[i]), min(
                                distances_from_mask[i]), 0, 0, 0, 1, 1)
                    scene_dot.node(detected_objects[i],
                                   label=node_label_scene_graph)
                    scene_dot.edge('warehouse',
                                   detected_objects[i],
                                   label='on')

                elif re.match(r'Product*', detected_objects[i]):
                    overlapping_check = False
                    intersection_area = 0.0
                    for j in range(len(r['class_ids'])):
                        if j != i:
                            isOverlapping, intersection_area = self.get_overlap_bbox(
                                r['rois'][i], r['rois'][j])
                            #print ('Comparing :',detected_objects[i],' => ', detected_objects[j], ' Result: ', isOverlapping, ' Intersection Area: ', intersection_area)

                            if isOverlapping and intersection_area > 25.0:
                                #print ("distances_from_mask : ", distances_from_mask[i].shape, 'Min: ', min(distances_from_mask[i]), 'Max: ', max(distances_from_mask[i]), 'Mean: ', np.mean(np.array(distances_from_mask[i])))
                                node_label = "%s|{Distance|Min: %.2f|Max: %.2f|Mean: %.2f}|intersection area: %.2f" % (
                                    detected_objects[i],
                                    min(distances_from_mask[i]),
                                    max(distances_from_mask[i]),
                                    np.mean(np.array(distances_from_mask[i])),
                                    intersection_area)
                                self.dot.node(detected_objects[i],
                                              label=node_label)
                                self.dot.edge(detected_objects[j],
                                              detected_objects[i],
                                              label='on')
                                overlapping_check = True
                                node_label_scene_graph = '{%s|type: %s|distance: %.2f|orientation: %.2f|direction: %.2f|velocity: %.2f|size_x: %.2f|size_y: %.2f}' % (
                                    detected_objects[i],
                                    self.get_type(detected_objects[i]),
                                    min(distances_from_mask[i]), 0, 0, 0, 1, 1)
                                scene_dot.node(detected_objects[i],
                                               label=node_label_scene_graph)
                                scene_dot.edge(detected_objects[j],
                                               detected_objects[i],
                                               label='on')

                                break
                    if overlapping_check == False:
                        node_label = "%s|{Distance|Min: %.2f|Max: %.2f|Mean: %.2f}|intersection area: %.2f" % (
                            detected_objects[i], min(distances_from_mask[i]),
                            max(distances_from_mask[i]),
                            np.mean(np.array(
                                distances_from_mask[i])), intersection_area)
                        self.dot.node(detected_objects[i], label=node_label)
                        self.dot.edge('floor', detected_objects[i], label='on')

                        node_label_scene_graph = '{%s|type: %s|distance: %.2f|orientation: %.2f|direction: %.2f|velocity: %.2f|size_x: %.2f|size_y: %.2f}' % (
                            detected_objects[i],
                            self.get_type(detected_objects[i]),
                            min(distances_from_mask[i]), 0, 0, 0, 1, 1)
                        scene_dot.node(detected_objects[i],
                                       label=node_label_scene_graph)
                        scene_dot.edge('floor',
                                       detected_objects[i],
                                       label='on')
                else:
                    node_label = "%s|{Distance|Min: %.2f|Max: %.2f|Mean: %.2f}" % (
                        detected_objects[i], min(distances_from_mask[i]),
                        max(distances_from_mask[i]),
                        np.mean(np.array(distances_from_mask[i])))
                    self.dot.node(detected_objects[i], label=node_label)
                    self.dot.edge('floor', detected_objects[i], label='on')

                    node_label_scene_graph = '{%s|type: %s|distance: %.2f|orientation: %.2f|direction: %.2f|velocity: %.2f|size_x: %.2f|size_y: %.2f}' % (
                        detected_objects[i], self.get_type(
                            detected_objects[i]), min(
                                distances_from_mask[i]), 0, 0, 0, 1, 1)
                    scene_dot.node(detected_objects[i],
                                   label=node_label_scene_graph)
                    scene_dot.edge('floor', detected_objects[i], label='on')
                    #cv2.imshow(node_label, cv_depth_image[y1min:y1max, x1min:x1max])
                    #cv2.waitKey(0)

            #cv2.imshow('cv_depth_image', cv_depth_image)
            #cv2.waitKey(0)
            # s = Source(dot, filename="scene_graph", format="png")
            if self.to_display == True:
                self.dot.render('scene_graph.gv', view=not self.check)

            if self.check == False:
                # s.view()
                self.check = True

            sg_message = SceneGraph()
            sg_message.header = std_msgs.msg.Header()
            sg_message.header.stamp = rospy.Time.now()
            sg_message.sg_data = scene_dot.source
            print('Time taken to decribe: ', time.time() - end)

            self.scenegraph_pub.publish(sg_message)
            self.image_pub.publish(self.bridge.cv2_to_imgmsg(img_out, "bgr8"))

            self.time_sg_end_list.append(time.time())
            last_duration = self.time_sg_end_list[-1] - self.time_start_list[-1]
            print("ROS MRCNN last duration:", last_duration)

        except CvBridgeError as e:
            print(e)

Example #24

Show file

 def show(self, file_name):
     d = Digraph(filename=file_name, directory="./pdf_data")
     d.clear()
     for node in self.topology_node_list:
         node.print_node(father_nodes=node.fathers, d=d)
     d.view()