Example #1
0
def t1() :
  g=Digraph()
  g.node('1','a')
  g.node('2','b')
  g.edge('1','2')
  g.node('3','c')
  g.edge('1','3')
  g.view()
  g.clear()
Example #2
0
 def show(self, file_name):
     d = Digraph(filename=file_name, directory="./pdf_data")
     d.clear()
     # 将所有节点 画出
     for node in self._nodes:
         d.node(name=node.name, label=node.name)
     for node in self._nodes:
         father_name = node.name
         for son, weight in node.son_weight.items():
             son_name = son.name
             d.edge(tail_name=father_name,
                    head_name=son_name,
                    label=str(weight))
     d.view()
Example #3
0
    def render_current_generation(self, folder_name: str):
        """
        this function render the current generation in PNG format with some extra information
        :param folder_name:
        :return:
        """
        g = Digraph()
        g.format = 'png'
        g.directory = folder_name + "/Generation " + str(self.generation)
        counter = 1

        for tree in self.population:
            g.clear()
            info = '"00_comment_00" [label="G : {0}\nF : {1}\nD : {2}\nW : {3}\nn : {4}" , shape="box" , color="white"]'.format(
                self.generation, tree.fitness, tree.depth, tree.width,
                tree.number_of_nodes_in_tree)
            g.body.append(info)
            g.body.append(tree.print_graph())
            g.render("Individual {0}".format(counter))
            counter += 1
Example #4
0
class Automata(Grafo):
    def __init__(self):
        super().__init__()
        self.estados = []
        self.d = Digraph('Automata',
                         filename='out/process.gv',
                         engine='sfdp',
                         format="png")
        self.d.attr(rankdir='LR', size='8,5')
        self.d.render()

    def nuevo_estado(self, valor):
        estado = Estado(valor)
        self.estados.append(estado)

    def nueva_relacion(self, est1, est2, valor):
        estado1 = self.buscar(est1, self.estados)
        estado2 = self.buscar(est2, self.estados)

        if estado1 is not None and estado2 is not None:
            estado1.relacionar(valor, estado2)
        else:
            print("ERROR: ALGUNO DE LO ESTADOS NO EXISTE")

    def show(self):
        self.d.clear()
        self.d.attr(rankdir='LR', size='8,5')
        self.d.attr('node', shape='doublecircle')
        #self.estados[0].set_final(True)
        '''for estado in self.estados:
            if estado.final is True:
                self.d.node(estado.valor)'''

        self.d.attr('node', shape='circle')
        for estado in self.estados:
            for arista in estado.aristas:
                self.d.edge(estado.valor,
                            arista.vertice.valor,
                            label=arista.valor)

        self.d.render()
Example #5
0
 def show(self, file_name=None):
     d = Digraph(filename=file_name, directory="./pdf_data")
     d.clear()
     node_name = []
     for node in self.heap:
         if node is None:
             node_name.append(None)
             continue
         name = str(id(node))
         d.node(name=name, label=str(node.value))
         node_name.append(name)
     max_father_index = self.size // 2
     for father_index in range(1, max_father_index + 1):
         left_son_index = father_index * 2
         right_son_index = father_index * 2 + 1
         if left_son_index <= self.size:
             d.edge(head_name=node_name[left_son_index],
                    tail_name=node_name[father_index])
         if right_son_index <= self.size:
             d.edge(head_name=node_name[right_son_index],
                    tail_name=node_name[father_index])
     d.view()
Example #6
0
class Automato:
    def __init__(self):  # Construtor

        self.tipo = None
        self.descricao = "Autômato sem descrição"
        self.alfabeto = []
        self.alfabetoPilha = []
        self.transicoes = {}
        self.estadosAtivos = {}
        self.estadoInicial = None
        self.estadosFinais = []
        self.grafo = Digraph(format='svg')

    # Recebe o edereço da entrada, constrói o grafo do autômato e salva com o nome apropriado
    def montaGrafo(self, arquivo):

        self.arquivo = arquivo

        self.grafo.attr(rankdir='LR')
        self.grafo.attr('edge', arrowsize="0.3")

        # Um nó invisível, utilizado para criar a seta do estado inicial
        self.grafo.node('',
                        shape='plaintext',
                        fixedsize='true',
                        height='0.1',
                        width='0.1')

        for no in self.transicoes.keys(
        ):  # Para cada estado, cria um nó no grafo

            if no in self.estadosFinais:  # Caso o estado seja final, círculo duplo

                self.grafo.attr('node', shape='doublecircle')

            else:

                self.grafo.attr('node', shape='circle')

            self.grafo.node(no)

        arestas = {}

        for no in self.transicoes.keys(
        ):  # Para cada transição, cria uma aresta no grafo

            if no == self.estadoInicial:  # Cria a seta do estado inicial

                self.grafo.edge('', no, arrowsize='0.5')

            for transicao in self.transicoes[no]:

                for no2 in self.transicoes[no][transicao]:

                    if (no + ' ' + no2) in arestas.keys():
                        arestas[no + ' ' +
                                no2] = arestas[no + ' ' +
                                               no2] + ", " + transicao

                    else:
                        arestas[no + ' ' + no2] = transicao

        for aresta in arestas.keys():

            self.grafo.edge(aresta.split(' ')[0],
                            aresta.split(' ')[1],
                            label=arestas[aresta])

        # Salva o grafo num arquivo svg, pasta "Grafos"
        self.grafo.render(filename=(arquivo.replace("Entradas", "Grafos")),
                          format='svg',
                          cleanup=True)

    # Devolve um grafo ressaltando os estados ativos de palavra ao processar indice
    def montaGrafoPassoAPasso(self, palavra, indice):

        grafo = Digraph()
        grafo.attr(rankdir='LR')
        # automato.grafo.node(automato.estadoInicial)

        grafo.node('',
                   shape='plaintext',
                   fixedsize='true',
                   height='0.1',
                   width='0.1')

        # Para cada no, decide se ressalta ou não
        for no in self.transicoes.keys():

            if no in self.estadosFinais:
                grafo.attr('node', shape='doublecircle')

            else:
                grafo.attr('node', shape='circle')

            # Caso antes do processamento da palavra
            if indice == -1:

                # O estado inicial se ativa
                if no == self.estadoInicial:
                    grafo.attr('node', color='red')

                else:
                    grafo.attr('node', color='black')

            # Caso contrário
            else:

                # Se o nó estiver aivo, é colorido de vermelho
                if no in self.estadosAtivos[indice]:
                    grafo.attr('node', color='red')

                else:
                    grafo.attr('node', color='black')

            grafo.node(no)

        # Dicionário auxiliar para evitar transições desnecessárias
        # i.e., ao caso haja mais de uma transição saindo do nó a e levando ao no b, elas são
        # condensadas na mesma transição
        arestas = {}

        # Para cada transição, condensa as transições desnecessárias
        for no in self.transicoes.keys():

            for transicao in self.transicoes[no]:

                for no2 in self.transicoes[no][transicao]:

                    if (no + ' ' + no2) in arestas.keys():
                        arestas[no + ' ' + no2].append(transicao)

                    else:
                        arestas[no + ' ' + no2] = [transicao]

        # Para cada transição, decide se ressalta ou não
        for aresta in arestas.keys():

            no = aresta.split(' ')
            no2 = no[1]
            no = no[0]

            if indice > 0 and palavra[indice] in arestas[
                    aresta] and no2 in self.estadosAtivos[
                        indice] and no in self.estadosAtivos[indice - 1]:
                grafo.attr('edge', color='red')

            elif indice == 0 and palavra[indice] in arestas[
                    aresta] and no == self.estadoInicial:
                grafo.attr('edge', color='red')

            else:
                grafo.attr('edge', color='black')

            grafo.edge(no,
                       no2,
                       label=(((str(arestas[aresta])).replace(
                           "'", '')).replace(']', '')).replace('[', ''))

        return grafo

    def destroiAutomato(self):  # Reseta todos os dados do autômato

        self.arquivo = None
        self.tipo = None
        self.descricao = "Autômato sem descrição"
        self.alfabeto.clear()
        self.alfabetoPilha.clear()
        self.transicoes.clear()
        self.estadosAtivos.clear()
        self.estadoInicial = None
        self.estadosFinais.clear()
        self.grafo.clear(keep_attrs=False)

    # Recebe uma palavra e verifica se e quais símbolos não são aceitos pelo alfabeto do autômato
    def verificaAlfabeto(self, palavra):

        caracteres = []  # Armazena os símbolos não aceitos

        for c in palavra:

            if c not in self.alfabeto and c not in caracteres:

                caracteres.append(c)

        if len(caracteres) > 0:

            return "Palavra não aceita! - Os símbolos " + str(caracteres)\
            + " não estão no alfabeto\n<< Alfabeto: " + str(self.alfabeto)

        return True

    # Recebe uma palavra a ser testada e um dicionário onde poderá ser salvo o passo-a-passo
    def testaPalavra(self, palavra, passoAPasso):

        if palavra == '':  # Caso a palavra seja vazia

            if self.estadoInicial in self.estadosFinais:
                retorno = "Palavra aceita!"

            else:
                retorno = "Palavra não aceita!"

        else:

            # Verifica se a palavra é aceita pelo alfabeto do autômato
            retorno = self.verificaAlfabeto(palavra)

        if retorno == True:  # Caso seja, testa as transições

            # Índices da palavra
            caractere = 0
            caractereAnterior = 0

            # A busca começa no estado inicial e tenta alcançar o estado final
            estadoAtual = self.estadoInicial

            # A pilha armazena duplas do tipo (estado, indice da palavra a ser processado no estado)
            fila = Queue()
            fila.put([estadoAtual, caractere])

            # Caso o usuário queira exibir o passo-a-passo, limpa o dicionário de estados,
            # pois ele será diferente para cada palavra
            if passoAPasso:

                self.estadosAtivos.clear()

            while True:

                # Se a pilha não estiver vazia, desempílha uma dupla e processa a transição a partir do estado
                if not fila.empty():

                    aux = fila.get()
                    estadoAtual = aux[0]  # Novo estado atual
                    caractere = aux[
                        1]  # Indice da palavra que armazena o caractere a ser processado

                # Se a pilha estiver vazia, significa que, ao final do processamento,
                # nenhum estado final foi atingido
                else:

                    retorno = "Palavra não aceita!"

                    break

                # Se o índice a ser processado for -1, significa que o autômato chegou
                # ao fim de um dos caminhos de processamento da palavra
                if caractere == -1:

                    # Dessa forma, se o estado atual for final, a palavra deve ser aceita
                    if estadoAtual in self.estadosFinais:

                        retorno = "Palavra aceita!"

                        break

                # Do contrário, processa a transição a prtir do estado atual
                else:

                    # Caso a transição exista na tabela de transições
                    if palavra[caractere] in self.transicoes[estadoAtual].keys(
                    ):

                        # O passo-a-passo funciona armazenando em um dicionário todos os estados
                        # ativos ao processar cada símbolo da palavra
                        if passoAPasso:

                            if caractere not in self.estadosAtivos.keys():

                                self.estadosAtivos[caractere] = []

                        # Para todos os estados atingidos a partir da transição
                        for estado in self.transicoes[estadoAtual][
                                palavra[caractere]]:

                            # Caso o caminho de processamento não tenha terminado, empilha o estado
                            # e o próximo índice da palavra a ser processado
                            if caractere + 1 != len(palavra):
                                fila.put([estado, caractere + 1])

                            # Caso o índice da palavra seja o último, fim de um caminho de processamento
                            else:
                                fila.put([estado, -1])

                            # Armazena o estado no dicionário de passo-a-passo
                            if passoAPasso and estado not in self.estadosAtivos[
                                    caractere]:
                                self.estadosAtivos[caractere].append(estado)

        return retorno

    def testaPalavraPilha(self, palavra, passoAPasso):

        if palavra == '':  # Caso a palavra seja vazia

            if self.estadoInicial in self.estadosFinais:
                retorno = "Palavra aceita!"

            else:
                retorno = "Palavra não aceita!"

        else:

            # Verifica se a palavra é aceita pelo alfabeto do autômato
            retorno = self.verificaAlfabeto(palavra)

        if retorno == True:  # Caso seja, testa as transições

            # Índices da palavra
            caractere = 0
            caractereAnterior = 0

            # A busca começa no estado inicial e tenta alcançar o estado final
            estadoAtual = self.estadoInicial

            # A fila armazena duplas do tipo (estado, indice da palavra a ser processado no estado)
            fila = [[estadoAtual, caractere]]

            # A pilha do autômato começa vazia
            pilha = []

            # Variável usada como tipo 'coringa'
            ANYTHING = Any()

            # Caso o usuário queira exibir o passo-a-passo, limpa o dicionário de estados,
            # pois ele será diferente para cada palavra
            if passoAPasso:

                self.estadosAtivos.clear()

            while True:

                # Se a fila não estiver vazia, desempílha uma dupla e processa a transição a partir do estado
                if len(fila) > 0:

                    aux = fila.pop(0)
                    estadoAtual = aux[0]  # Novo estado atual
                    caractere = aux[
                        1]  # Indice da palavra que armazena o caractere a ser processado

                # Se a pilha estiver vazia, significa que, ao final do processamento,
                # nenhum estado final foi atingido
                else:

                    retorno = "Palavra não aceita!"

                    break

                # Se o índice a ser processado for -1, significa que o autômato chegou
                # ao fim de um dos caminhos de processamento da palavra
                if caractere == -1:

                    # Dessa forma, se o estado atual for final, a palavra deve ser aceita
                    if estadoAtual in self.estadosFinais:

                        retorno = "Palavra aceita!"

                        break

                # Do contrário, processa a transição a prtir do estado atual
                else:

                    # Processa as transições vazias
                    self.transicoesVazias(fila, estadoAtual, caractere)
                    print(fila)
                    #input()

                    # Processa as transições interrogativas
                    self.transicoesInterrogativas(fila, estadoAtual, caractere,
                                                  palavra, pilha)

                    # Processa a transição
                    for transicao in self.transicoes[estadoAtual].keys():

                        # Caso a transição exista na tabela de transições
                        if transicao[0] == palavra[caractere]:

                            # O passo-a-passo funciona armazenando em um dicionário todos os estados
                            # ativos ao processar cada símbolo da palavra
                            if passoAPasso:

                                if caractere not in self.estadosAtivos.keys():

                                    self.estadosAtivos[caractere] = []

                            # Para todos os estados atingidos a partir da transição
                            for estado in self.transicoes[estadoAtual][
                                    transicao]:

                                print(estadoAtual, estado)

                                desempilha = transicao[1]
                                empilha = transicao[2]

                                try:
                                    if desempilha != '&' and desempilha != '?':
                                        pilha.reverse()
                                        pilha.remove(desempilha)
                                        pilha.reverse()

                                except:
                                    pass

                                else:
                                    # Caso o caminho de processamento não tenha terminado, empilha o estado
                                    # e o próximo índice da palavra a ser processado
                                    if caractere + 1 != len(palavra):
                                        fila.append([estado, caractere + 1])

                                    # Caso o índice da palavra seja o último, fim de um caminho de processamento
                                    else:
                                        fila.append([estado, -1])

                                    # Armazena o estado no dicionário de passo-a-passo
                                    if passoAPasso and estado not in self.estadosAtivos[
                                            caractere]:
                                        self.estadosAtivos[caractere].append(
                                            estado)

                                    if empilha != '&' and empilha != '?':
                                        pilha.append(empilha)

        return retorno

    def transicoesVazias(self, fila, estado, caractere):

        filaTemp = [estado]
        transVazia = ('&', '&', '&')

        while len(filaTemp) > 0:

            atual = filaTemp.pop(0)

            if transVazia in self.transicoes[atual]:

                for e in self.transicoes[atual][transVazia]:

                    filaTemp.append(e)
                    fila.append([e, caractere])

    def transicoesInterrogativas(self, fila, estado, caractere, palavra,
                                 pilha):

        for transicao in self.transicoes[estado]:

            if transicao[0] == '?':

                if transicao[1] == '?':

                    if caractere == len(palavra) - 1 and len(pilha) == 0:
                        for e in self.transicoes[estado][transicao]:
                            fila.append([e, -1])

                else:
                    if caractere == len(palavra) - 1:
                        for e in self.transicoes[estado][transicao]:
                            fila.append([e, -1])

            elif transicao[1] == '?':

                for e in self.transicoes[estado][transicao]:

                    if caractere == len(palavra) - 1:
                        fila.append([e, -1])

                    else:
                        fila.append([e, caractere + 1])

    # Imprime o dicionário de passo-a-passo
    def imprimePassoAPasso(self, palavra):

        print("<< Estado inicial: " +
              (self.estadoInicial.replace('*', '')).replace('+', ''))

        for indice in self.estadosAtivos.keys():

            print("<< Simbolo: " + palavra[indice] + " - Estados ativos: " +
                  str(self.estadosAtivos[indice]))
Example #7
0
class ros_msdn:
    def __init__(self):
        # To set the model name automatically
        args = parser.parse_args()
        print args
        args = get_model_name(args)
        print 'Model name: {}'.format(args.model_name)
        self.check = True

        # To set the random seed
        random.seed(args.seed)
        torch.manual_seed(args.seed + 1)
        torch.cuda.manual_seed(args.seed + 2)

        print("Loading training params"),
        self.train_set = visual_genome('normal', 'train')
        print("Done.")

        self.train_loader = torch.utils.data.DataLoader(self.train_set,
                                                        batch_size=1,
                                                        shuffle=True,
                                                        num_workers=8,
                                                        pin_memory=True)
        end = time.time()
        # Model declaration
        self.net = Hierarchical_Descriptive_Model(
            nhidden=args.mps_feature_len,
            n_object_cats=self.train_set.num_object_classes,
            n_predicate_cats=self.train_set.num_predicate_classes,
            n_vocab=self.train_set.voc_size,
            voc_sign=self.train_set.voc_sign,
            max_word_length=self.train_set.max_size,
            MPS_iter=args.MPS_iter,
            use_language_loss=not args.disable_language_model,
            object_loss_weight=self.train_set.inverse_weight_object,
            predicate_loss_weight=self.train_set.inverse_weight_predicate,
            dropout=args.dropout,
            use_kmeans_anchors=not args.use_normal_anchors,
            gate_width=args.gate_width,
            nhidden_caption=args.nhidden_caption,
            nembedding=args.nembedding,
            rnn_type=args.rnn_type,
            rnn_droptout=args.caption_use_dropout,
            rnn_bias=args.caption_use_bias,
            use_region_reg=args.region_bbox_reg,
            use_kernel=args.use_kernel_function)

        params = list(self.net.parameters())
        for param in params:
            print param.size()
        print self.net

        # To group up the features
        vgg_features_fix, vgg_features_var, rpn_features, hdn_features, language_features = group_features(
            self.net)

        # Setting the state of the training model
        self.net.cuda()
        self.net.train()
        network.set_trainable(self.net, False)

        # loading model for inference
        print 'Resume training from: {}'.format(args.resume_model)
        if len(args.resume_model) == 0:
            raise Exception('[resume_model] not specified')
        network.load_net(args.resume_model, self.net)
        args.train_all = True
        optimizer_select = 2

        optimizer = network.get_optimizer(args.lr, optimizer_select, args,
                                          vgg_features_var, rpn_features,
                                          hdn_features, language_features)

        target_net = self.net
        self.net.eval()
        print('Model Loading time: ', time.time() - end)

        # Set topics
        self.bridge = CvBridge()
        self.dot = Digraph(comment='warehouse', format='svg')
        self.regions_dot = Digraph(comment='regions', format='svg')

        self.image_sub = message_filters.Subscriber(
            '/turtlebot2i/camera/rgb/raw_image', Image)
        self.image_depth_sub = message_filters.Subscriber(
            '/turtlebot2i/camera/depth/raw_image', Image)
        self.ts = message_filters.TimeSynchronizer(
            [self.image_sub, self.image_depth_sub], queue_size=1)
        print('calling callback')
        self.ts.registerCallback(self.callback)
        self.scenegraph_pub = rospy.Publisher('/turtlebot2i/scene_graph',
                                              SceneGraph,
                                              queue_size=10)

    def callback(self, image, depth_image):

        try:
            print 'inside callback '
            farClippingPlane = 3.5
            nearClippingPlane = 0.0099999
            cv_depth_image = self.bridge.imgmsg_to_cv2(depth_image,
                                                       "passthrough")
            cv_depth_image = cv2.flip(cv_depth_image, 0)
            cv_depth_image = nearClippingPlane + (
                cv_depth_image * (farClippingPlane - nearClippingPlane))
            cv_image = self.bridge.imgmsg_to_cv2(image, "rgb8")

            predicates_frequency = {
                'behind': 1,
                'on': 1,
                'has': 1000000,
                'in_front_of': 1,
                'next_to': 2,
                'beside': 2,
                'with': 1,
                'attach_to': 1,
                'connected_to': 1,
                'charges': 1,
                'in_hands_of': 1
            }
            all_classes = {
                'slidingdoor': 0,
                'wall': 0,
                'shelf': 0,
                'robot': 0,
                'human': 0,
                'conveyorbelt': 0,
                'dockstation': 0,
                'product': 0,
                'floor': 0
            }
            class_names = [
                'floor', 'wall', 'shelf', 'robot', 'human', 'conveyorbelt',
                'dockstation', 'product', 'slidingdoor'
            ]
            allowed_self_relationship = {
                'slidingdoor': [],
                'wall': ['beside', 'attach_to'],
                'shelf': ['beside', 'next_to'],
                'robot': [],
                'human': ['in_front_of', 'behind'],
                'conveyorbelt': [],
                'dockstation': [],
                'product': ['beside', 'next_to'],
                'floor': []
            }

            print("Describing.....")
            if self.check == False:
                self.dot.clear()
                self.regions_dot.clear()
            im, im_info = self.train_set.get_image_info(cv_image)

            end = time.time()

            region_caption, region_list, region_pred_boxes, region_logprobs, class_pred_boxes, class_scores,\
             class_inds, subject_list, object_list, predicate_list, predicate_inds, predicate_scores = self.net.describe(im.unsqueeze(0), [im_info], top_N=[50])

            class_idx = []
            for class_ in all_classes.keys():
                class_idx.append(self.train_set.word2idx[class_])

            predicate_idx = []
            for predicate in predicates_frequency.keys():
                predicate_idx.append(self.train_set.word2idx[predicate])

            classes_name = []

            predicate_scores = predicate_scores.squeeze()[predicate_list]
            subject_scores = class_scores[subject_list].squeeze()
            object_scores = class_scores[object_list].squeeze()
            relationship_scores = predicate_scores * (subject_scores +
                                                      object_scores) / 2.0

            keep_indexes = np.where((subject_scores > 0.7)
                                    & (object_scores > 0.7)
                                    & (predicate_scores > 0.5))[0]
            keep_classes = np.where(class_scores > 0.7)[0]
            class_name_score = dict()
            for i in keep_classes:
                class_name = self.train_set._object_classes[class_inds[i]]
                score = class_scores[i]
                all_classes[class_name] += 1
                if class_name != 'floor':
                    classes_name.append(
                        str(class_name + '#' + str(all_classes[class_name])))
                    class_name_score[str(class_name + '#' +
                                         str(all_classes[class_name]))] = score
                else:
                    classes_name.append(str(class_name))
                    class_name_score[str(class_name)] = score

            #_ = draw_bbox_label_msdn(cv_image, class_pred_boxes[keep_classes], class_inds[keep_classes], class_scores[keep_classes])

            classes_name = np.array(classes_name)
            subject_scores = subject_scores[keep_indexes]
            object_scores = object_scores[keep_indexes]
            subject_list = subject_list[keep_indexes]
            object_list = object_list[keep_indexes]
            predicate_list = predicate_list[keep_indexes]
            relationship_scores = relationship_scores[keep_indexes]
            predicate_scores = predicate_scores[keep_indexes]

            # subject_inds = class_inds[subject_list]
            # object_inds = class_inds[object_list]

            subjects_name = classes_name[subject_list]
            objects_name = classes_name[object_list]
            predicate_inds = predicate_inds.squeeze()[predicate_list]

            #print (class_inds[subject_list[keep_indexes]])
            relationship_dict = dict()

            last_subject = ''
            last_predicate = ''
            temp_score_list = []
            object_ids = []

            for i in range(len(subjects_name)):

                predicate = self.train_set._predicate_classes[
                    predicate_inds[i]]
                subject = subjects_name[i]
                _object = objects_name[i]

                if subject != _object:
                    if (subject == 'floor' and predicate != 'has') or (
                            _object == 'floor' and predicate != 'on'
                    ) or (subject[:-2] == 'wall' and predicate == 'in_front_of'
                          and _object[:-2] == 'dockstation'):
                        print 'unwanted relationship', subject, '-> ', predicate, ' -> ', _object

                    elif subject == 'floor' and predicate == 'has':
                        if subject not in relationship_dict.keys():
                            relationship_dict[subject] = dict()
                            relationship_dict[subject][predicate] = dict()
                            relationship_dict[subject][predicate][
                                _object] = predicate_scores[i]

                        elif predicate not in relationship_dict[subject].keys(
                        ):
                            relationship_dict[subject][predicate] = dict()
                            relationship_dict[subject][predicate][
                                _object] = predicate_scores[i]

                        elif _object not in relationship_dict[subject][
                                predicate].keys():
                            relationship_dict[subject][predicate][
                                _object] = predicate_scores[i]

                        else:
                            relationship_dict[subject][predicate][
                                _object] = predicate_scores[i]

                    elif _object in relationship_dict.keys() and predicate in relationship_dict[_object].keys()\
                             and subject in relationship_dict[_object][predicate].keys() and  predicate_scores[i] > relationship_dict[_object][predicate][subject]:

                        if subject not in relationship_dict.keys():
                            relationship_dict[subject] = dict()
                            relationship_dict[subject][predicate] = dict()
                            relationship_dict[subject][predicate][
                                _object] = predicate_scores[i]
                        elif predicate not in relationship_dict[subject].keys(
                        ):
                            relationship_dict[subject][predicate] = dict()
                            relationship_dict[subject][predicate][
                                _object] = predicate_scores[i]

                        elif _object not in relationship_dict[subject][
                                predicate].keys():
                            relationship_dict[subject][predicate][
                                _object] = predicate_scores[i]

                        del relationship_dict[_object][predicate][subject]
                        if len(relationship_dict[_object][predicate]) == 0:
                            del relationship_dict[_object][predicate]
                            if len(relationship_dict[_object]) == 0:
                                del relationship_dict[_object]

                    elif subject == last_subject:
                        if predicate == last_predicate:
                            temp_score_list.append(predicate_scores[i])
                            object_ids.append(i)
                        else:
                            if len(temp_score_list) > 1:
                                sorted_scores = np.array(
                                    temp_score_list).argsort()[::-1]
                                indx = np.array(object_ids)[sorted_scores][0]
                            else:
                                indx = object_ids[-1]
                            #print 'Saving relationship 1', subject, '-> ' , last_predicate, ' -> ', objects_name[indx]

                            relationship_dict[subject][last_predicate][
                                objects_name[indx]] = predicate_scores[indx]

                            relationship_dict[subject][predicate] = dict()
                            temp_score_list = [predicate_scores[i]]

                            if subject[:
                                       -2] == _object[:
                                                      -2] and predicate not in allowed_self_relationship[
                                                          subject[:-2]]:
                                object_ids = []
                                last_predicate = ''
                                last_subject = ''
                            else:
                                object_ids = [i]
                                last_predicate = predicate
                    else:
                        relationship_dict[subject] = dict()
                        relationship_dict[subject][predicate] = dict()
                        if last_subject != '':
                            if len(temp_score_list) > 1:
                                sorted_scores = np.array(
                                    temp_score_list).argsort()[::-1]
                                indx = np.array(object_ids)[sorted_scores][0]
                            else:
                                indx = object_ids[-1]

                            #print 'Saving relationship 2', last_subject, '-> ' , last_predicate, ' -> ', objects_name[indx]

                            relationship_dict[last_subject][last_predicate][
                                objects_name[indx]] = predicate_scores[indx]

                        if subject[:
                                   -2] == _object[:
                                                  -2] and predicate not in allowed_self_relationship[
                                                      subject[:-2]]:
                            object_ids = []
                            last_predicate = ''
                            last_subject = ''
                            temp_score_list = []
                        else:
                            last_subject = subject
                            last_predicate = predicate
                            temp_score_list = [predicate_scores[i]]
                            object_ids = [i]

            if last_subject != '':
                if len(temp_score_list) > 1:
                    sorted_scores = np.array(temp_score_list).argsort()[::-1]
                    indx = np.array(object_ids)[sorted_scores][0]
                else:
                    indx = object_ids[-1]
                #print 'Saving relationship 3', last_subject, '-> ' , last_predicate, ' -> ', objects_name[indx]

                relationship_dict[last_subject][last_predicate][
                    objects_name[indx]] = predicate_scores[indx]

            print('Time taken to decribe: ', time.time() - end)

            self.dot.node_attr['shape'] = 'record'
            robot_label = "turtlebot2i"

            #self.dot.node('robot', label=robot_label)
            self.dot.node('warehouse', label='warehouse')
            floor_label = "{floor|Score: 0.7}"
            if 'floor' in class_name_score.keys():
                floor_label = '%s|Score: %.2f' % ('floor',
                                                  class_name_score['floor'])

            self.dot.node('floor', label=floor_label)
            self.dot.edge('warehouse', 'floor')

            list_nodes = ['warehouse', 'floor']

            for subject in relationship_dict.keys():
                for predicate in relationship_dict[subject].keys():
                    for _object in relationship_dict[subject][predicate].keys(
                    ):
                        if subject not in list_nodes:
                            node_label = '%s|Score: %.2f' % (
                                subject, class_name_score[subject])
                            self.dot.node(subject, label=node_label)
                            list_nodes.append(subject)
                        if _object not in list_nodes:
                            node_label = '%s|Score: %.2f' % (
                                _object, class_name_score[_object])
                            self.dot.node(_object, label=node_label)
                            list_nodes.append(_object)
                        self.dot.edge(subject, _object, label=predicate)

                        print 'Subject : ', subject, ' Predicate: ', predicate, ' Object: ', _object, ' Score: ', relationship_dict[
                            subject][predicate][_object]
            print 'END PRINTING Relationships...'

            sorted_regions = region_logprobs.argsort()[::-1]
            regions_dict = dict()
            regions_prob_dict = dict()
            sorted_region_keys = []

            for i in sorted_regions:
                if region_logprobs[i] > -0.5:
                    region_idx = region_caption[i]
                    common = list(frozenset(region_idx) & frozenset(class_idx))
                    #print 'Common classes: ', common
                    if len(common) == 2:
                        class_1 = self.train_set.idx2word[common[0]]
                        class_2 = self.train_set.idx2word[common[1]]
                        if all_classes[class_1] != 0 and all_classes[
                                class_2] != 0:

                            key = frozenset([class_1, class_2])
                            #print key
                            if key not in regions_prob_dict.keys():
                                regions_prob_dict[key] = region_logprobs[i]
                                regions_dict[key] = region_caption[i]
                                sorted_region_keys.append(key)

                            elif regions_prob_dict[key] < region_logprobs[i]:
                                regions_prob_dict[key] = region_logprobs[i]
                                regions_dict[key] = region_caption[i]
                                sorted_region_keys.append(key)

                    elif len(common) == 1:
                        class_1 = self.train_set.idx2word[common[0]]
                        if all_classes[class_1] != 0:
                            key = frozenset([class_1])
                            #print key
                            if key not in regions_prob_dict.keys():
                                regions_prob_dict[key] = region_logprobs[i]
                                regions_dict[key] = region_caption[i]
                                sorted_region_keys.append(key)

                            elif all_classes[class_1] > 1:
                                j = 1
                                while j < all_classes[class_1]:
                                    key = frozenset([class_1 + '#' + str(j)])
                                    #print key

                                    if key not in regions_prob_dict.keys():
                                        regions_prob_dict[
                                            key] = region_logprobs[i]
                                        regions_dict[key] = region_caption[i]
                                        sorted_region_keys.append(key)

                                    elif regions_prob_dict[
                                            key] < region_logprobs[i]:
                                        regions_prob_dict[
                                            key] = region_logprobs[i]
                                        regions_dict[key] = region_caption[i]
                                        sorted_region_keys.append(key)

                                    j += 1

            self.regions_dot.node_attr['shape'] = 'record'
            captions_list = []

            for key in sorted_region_keys:
                region_idx = regions_dict[key]
                log_prob = regions_prob_dict[key]
                caption = ""
                space = ""
                for indx in region_idx:
                    word = self.train_set.idx2word[indx]
                    if word != "<unknown>" and word != "<start>" and word != "<end>":
                        caption += space + word
                        space = " "
                node_label = "%s|Log probability: %.6f" % (caption, log_prob)
                repetition_check = True
                if caption not in captions_list:
                    self.regions_dot.node(caption, label=node_label)
                    repetition_check = False

                if len(captions_list) > 0:
                    self.regions_dot.edge(captions_list[-1], caption)

                if repetition_check == False:
                    captions_list.append(caption)

                #print caption, log_prob
            self.dot.render('scene_graph.gv', view=self.check)
            self.regions_dot.render('region_graph.gv', view=self.check)

            #s = Source(self.dot, filename="scene_graph", format="png")
            #s1 = Source(self.regions_dot, filename="region_graph", format="png")
            # #if self.check == False:
            # s.view()
            # s1.view()

            if self.check == True:
                self.check = False
            print 'END PRINTING Regions...'

        except CvBridgeError as e:
            print(e)
Example #8
0
class GraphViewer(QWidget):
    def __init__(self, parent):
        super().__init__(parent)
        self._y = 0
        self._width = 1
        self._height = 1

        self.dot = Digraph(format='svg', strict=True)
        self._declared_count = 1
        self._declared = dict()
        self._renderer = QSvgRenderer(self.dot.pipe(), self)

        self.scrollbar = QScrollBar(self.parent())
        self.scrollbar.setRange(0, 0)
        self.parent().wheelEvent = self.wheelEvent

    def wheelEvent(self, event):
        if event.x() > self.getScrollWidth():
            return
        if event.y() > self._height:
            return
        self.scrollbar.wheelEvent(event)

    def add(self, data):
        # is variable
        if data in self._declared.keys():
            return self._declared[data]
        if data.is_variable:
            name = data.name
            self._declared[data] = name
            self.dot.node(name)
            if data.toward is not None:
                toward = self.add(data.toward)
                self.dot.edge(toward, name)
            return name
        # is constant
        if data.is_constant:
            name = data.symbol
            self._declared[data] = name
            self.dot.node(name)
            return name
        # is operator
        if data.is_operator:
            name = '[%d] %s' % (self._declared_count, data.name)
            self._declared_count += 1
            self._declared[data] = name
            self.dot.node(name)
            args = [data.sub, data.obj, data.step]
            if data.args is not None:
                args += data.args
            args = [arg for arg in args if arg is not None]
            for arg in args:
                arg = self.add(arg)
                self.dot.edge(arg, name)
            return name

    def paintEvent(self, event):
        self._width = self.width()
        self._height = self.height()
        self.scrollbar.setGeometry(self.getScrollWidth(), 0, 20, self._height)
        self.resize(self._renderer.defaultSize())
        painter = QPainter(self)
        painter.restore()
        drawRect = QRectF(self.rect())

        if self.scrollbar.maximum() == 0:
            draw_y = 0
        else:
            draw_y = drawRect.height() - self._height
            draw_y *= self.scrollbar.value() / self.scrollbar.maximum()

        drawRect.setY(-draw_y)
        drawRect.setHeight(drawRect.y() + drawRect.height())
        self._renderer.render(painter, drawRect)

    def flush(self):
        self._renderer = QSvgRenderer(self.dot.pipe())
        max_h = self._renderer.defaultSize().height() / self._height
        if max_h <= 1:
            max_h = 0
        max_h = int(self.delta() * max_h)
        self.scrollbar.setMaximum(max_h)

    def clear(self):
        self._declared_count = 1
        self._declared = dict()
        self.dot.clear()

    def getScrollWidth(self):
        return self._width - 20

    def delta(self):
        return 3.14
Example #9
0
class FsmDrawer():
    def __init__(self, formalDefinition, label):
        self.label = label
        self.formalDefinition = formalDefinition
        self.createReference()

    def createReference(self):
        self.fsm = Digraph("FSM", format="svg", filename="fsm.txt")
        self.fsm.attr("node",
                      shape="doublecircle",
                      color="#fdfaf6",
                      fontsize="10")  #fontsize can be resize
        self.fsm.attr(rankdir="LR", bgcolor="transparent", size="9,9!")

        try:
            for item in self.formalDefinition.accept:
                self.fsm.node(str(item), fontcolor="#fdfaf6")
        except:
            self.fsm.node(str(self.formalDefinition.accept))
        self.img = Window(self.label)

    def drawFsm(self, fsmStart, fsmNext, fsmLabel):
        self.fsm.attr("node",
                      shape="circle",
                      color="#fdfaf6",
                      fontcolor="#fdfaf6")
        self.fsm.edge(fsmStart,
                      fsmNext,
                      label=fsmLabel,
                      color="#fdfaf6",
                      fontcolor="#fdfaf6")

    def drawFsmColored(self, fsmStart, fsmNext, fsmLabel):
        self.fsm.attr("node",
                      shape="circle",
                      color="#fdfaf6",
                      fontcolor="#fdfaf6")
        self.fsm.edge(fsmStart,
                      fsmNext,
                      fontcolor="#fb743e",
                      label=fsmLabel,
                      color="#fb743e")

    def click(self, char, first):
        for a in self.formalDefinition.transitions:
            if ((str(a.regex) == char) & (str(a.start) == first)):
                self.drawFsmColored(str(a.start), str(a.end), str(a.regex))
            else:
                self.drawFsm(str(a.start), str(a.end), str(a.regex))

        self.fsm.attr("node", shape="plaintext", color="#fdfaf6")
        self.fsm.edge("", str(self.formalDefinition.start), color="#fdfaf6")

        self.fsm.render(view=False)

        self.clearReference()

    def clearReference(self):
        time.sleep(0.8)
        self.fsm.clear()
        self.createReference()
Example #10
0
import time

from urllib.parse import urlparse, urljoin

import treelib as treelib
from bs4 import BeautifulSoup
from fractions import Fraction

from graphviz import Digraph

import _thread

from treelib import Tree

main_graph = Digraph()
main_graph.clear()

# init the colorama module
colorama.init()

GREEN = colorama.Fore.GREEN
GRAY = colorama.Fore.LIGHTBLACK_EX
RED = colorama.Fore.RED
CYAN = colorama.Fore.CYAN
BLACK = colorama.Fore.BLACK
MAGENTA = colorama.Fore.LIGHTMAGENTA_EX
RESET = colorama.Fore.RESET

# initialize the set of links (unique links)
internal_urls = set()
external_urls = set()
Example #11
0
class SndpGraph():
    # Be careful with these parameters
    FLOAT_PERCENT_OF_LOC_WITH_END_PROD = 0.5  # this amount*num locations will be number bins in the problem
    INT_MAX_PRODUCTS_IN_ONE_LOCATION = 3  # might be higher under some conditions
    INT_MAX_DISTANCE = 5  # average is 3, too high value might lead to solution value = 0 (cost > sales)

    FLOAT_INIT_SALES_PRICE = 120  # in the initial instance it was 13. Might be adjusted with adjust_sales_price
    FLOAT_PLANT_COST = 2000
    FLOAT_PLANT_CAPACITY = 5000
    # min possible scenario demand = (1-FLOAT_MAX_PERCENT_DEMAND_DEFICIT) * FLOAT_PLANT_CAPACITY * number end product plants
    # max possible scenario demand = 0.9 * FLOAT_PLANT_CAPACITY * number end product plants
    FLOAT_MAX_PERCENT_DEMAND_DEFICIT = 0.5

    STR_PRODUCT_TYPE_MATERIAL = 'STR_PRODUCT_TYPE_MATERIAL'
    STR_PRODUCT_TYPE_END_PRODUCT = 'STR_PRODUCT_TYPE_END_PRODUCT'

    INT_MIN_MULTITHREAD_LOCATION_LIMIT = 2000  # we force num_cpu to be 1 if number_locations lower this value
    INT_MAX_LOCATIONS_TO_VISUALIZE = 40  # we will not run visualize() if the number of locations exceeds this value
    DEBUG = False

    def __init__(self,
                 name,
                 num_locations,
                 num_products,
                 num_scen,
                 random_seed=None):

        Timer('Core data generated').start()

        self.name = name
        self.dot_graph = None
        self.random_seed = random_seed
        random.seed(random_seed)

        # Initialize data cache
        self._data = {}
        self.sales_price = SndpGraph.FLOAT_INIT_SALES_PRICE
        self._data['PlantCost'] = SndpGraph.FLOAT_PLANT_COST
        self._data['PlantCapacity'] = SndpGraph.FLOAT_PLANT_CAPACITY
        self._data['NrOfLocations'] = 0
        self._data['NrOfProducts'] = 0
        self._data_valid_export = {
            'ScalarData': None
        }  # path to the .dat file that is actual for current data

        list_data_names = [
            'MaterialReq', 'Prob', 'Demand', 'ShipCost', 'ArcProduct', 'arc'
        ]
        self._data_txt = {}  # textual representation for .dat files
        for name in list_data_names:
            self._data[name] = {}
            self._data_txt[name] = ''
            self._data_valid_export[name] = None

        # Initialize products
        if num_products < 2:
            raise (
                'There should be at least two products in the SNDP problem: material and end product.'
            )
        if num_products > 40:
            print(
                f'If num products > 40, the instance might be disbalanced: production too expensive and solution value 0'
            )
        self._products = {
            product_id: _Product(product_id, self)
            for product_id in range(1, num_products + 1)
        }  # +1 since in MPL indexing starts from 1
        self.get_products(
        )[-1].type = SndpGraph.STR_PRODUCT_TYPE_END_PRODUCT  # last product is end product
        max_material_req = math.floor(
            40 / (num_products) *
            2)  # in order to have moderate production costs
        self.material_requirements = [
            random.randint(1, max_material_req)
            for material in self.get_materials()
        ]  # in the end product
        self._data['MaterialReq'] = {
            i: {
                'material': i + 1,
                'value': k
            }
            for (i, k) in enumerate(self.material_requirements)
        }
        self._data_txt['MaterialReq'] += '\n'.join([
            f'{i + 1},{k}' for (i, k) in enumerate(self.material_requirements)
        ])

        # Initialize all locations
        if num_locations < 2:
            raise (
                'There should be at least two locations in the SNDP problem: market and another location.'
            )
        self._locations = {
            location_id: _Location(location_id, self)
            for location_id in range(1, num_locations + 1)
        }

        # Nodes with end product
        self._routes = {}
        plants_for_end_products = random_subset(
            self.get_plants(),
            math.floor(num_locations *
                       SndpGraph.FLOAT_PERCENT_OF_LOC_WITH_END_PROD)
        )  # set it right away for efficiency
        self._end_product_plants = set()
        for plant in plants_for_end_products:
            # end product (at least) should be produced there
            route_object = _Route(
                plant, self.get_end_location(),
                random.randint(1, SndpGraph.INT_MAX_DISTANCE))
            self.add_route(route_object)
            plant.add_product(self.get_end_product())
        end_product_plants = self.get_end_product_plants()

        # Assign materials to plants and create routes
        #num_cpu = mp.cpu_count()
        num_cpu = 1  # multiprocessing does not provide any efficiency improvements
        if num_locations < SndpGraph.INT_MIN_MULTITHREAD_LOCATION_LIMIT:
            num_cpu = 1

        if num_cpu > 1:  # multiprocessing
            raise NotImplementedError(
                'We need to fill in data cache here in the same manner as for single thread case.'
            )
            add_products = []  # will store the changes to be applied
            manager = mp.Manager()
            # normal dict will not be shared among processes but we need it to be shared
            # manager.dict() makes things much slower - maybe use Array.
            add_routes = manager.dict()
            # add_routes = {} # even if we use normal dict (makes no sense) there is no benefit in speed
            pool = mp.Pool(num_cpu)
            results = [
                pool.apply_async(self.generate_plant_data,
                                 args=(worker_id, add_routes, num_cpu))
                for worker_id in range(num_cpu)
            ]
            pool.close()
            pool.join()
            for result in [res.get() for res in results]:
                add_products += result

            # apply changes
            # since data is not shared among processes, .add_product() and .add_route() should not be called from generate_plant_data()
            for record in add_products:
                plant = self.get_location(record['plant'])
                material = self.get_product(record['material'])
                plant.add_product(material)
            for key, distance in add_routes.items():
                star_id = int(key.split('-')[0])
                start_location = self.get_location(star_id)
                end_id = int(key.split('-')[1])
                end_location = self.get_location(end_id)
                self.add_route(_Route(start_location, end_location, distance))
        else:
            # we do not use here generate_plant_data() because making .add_route() and .add_product() right away is much faster
            num_plants = len(self.get_plants())
            for plant in self.get_plants():
                if plant in end_product_plants:
                    min_materials = 0  # in potential plants none of the materials might be manufactured
                    max_materials = math.ceil(
                        len(self.get_materials()) / 4
                    )  # to avoid that all materials are manufactured on the plant site and should not be delivered
                else:
                    min_materials = 1
                    max_materials = len(self.get_materials())
                random_num_materials = min(
                    random.randint(min_materials, max_materials),
                    SndpGraph.INT_MAX_PRODUCTS_IN_ONE_LOCATION)
                if random_num_materials == 0:  # no materials produced, lets go to the next plant
                    continue

                # Define the route to (several or all) potential end product plants for every plant
                random_num_end_product_plants = random.randint(
                    1, len(end_product_plants))
                random_end_product_plants = random_subset(
                    end_product_plants, random_num_end_product_plants)
                # connect the location with the end product plants
                for end_product_plant in random_end_product_plants:
                    # we need route only if product is produced not in the potential plant locations
                    if plant.id == end_product_plant.id:
                        continue
                    # routes can be one directional, omit the route if it already exists in another direction
                    if self.get_route(end_product_plant, plant):
                        continue
                    if not self.get_route(
                            plant, end_product_plant
                    ):  # if the route does not already exist
                        self.add_route(
                            _Route(
                                plant, end_product_plant,
                                random.randint(1, SndpGraph.INT_MAX_DISTANCE)))

                # Define materials to produce
                random_materials = random_subset(
                    self.get_materials(),
                    random_num_materials)  # except the last one
                for material in random_materials:
                    plant.add_product(material)

                progress_bar('Generate data for plants', plant.id, num_plants)

        # Test if graph is valid and solve the issues
        # - check if plant with material has at least one route to potential plant: this is guaranteed during assignment of materials to plants
        # - check if every potential plant has all the materials delivered
        # it will also automatically solve the issue if a material has no plant, since such material will not be delivered to all plants
        for counter, end_product_plant in enumerate(end_product_plants, 1):
            # materials produced in the plant itself
            available_materials = [
                product for product in end_product_plant.get_products()
                if product.type == SndpGraph.STR_PRODUCT_TYPE_MATERIAL
            ]
            # and materials delivered
            connected_plants = [
                route.start for route in end_product_plant.get_inbounds()
            ]
            for connected_plant in connected_plants:
                available_materials += [
                    product for product in connected_plant.get_products()
                    if product.type == SndpGraph.STR_PRODUCT_TYPE_MATERIAL
                ]
            materials_not_delivered_to_plant = [
                material for material in self.get_materials()
                if material not in available_materials
            ]
            for material in materials_not_delivered_to_plant:
                # add material to the potential plant itself or connected plants
                if len(connected_plants) > 0:
                    random_plant = random_subset(connected_plants, 1)[0]
                else:  # produce in plant itself if no plants are connected
                    random_plant = end_product_plant
                random_plant.add_product(material)

            progress_bar('Validate data for plants', counter,
                         len(end_product_plants))

        str(Timer('Core data generated'))
        Timer('Core data generated').reset()

        assert (self._data['NrOfLocations'] == num_locations)
        assert (self._data['NrOfLocations'] == len(self.get_locations()))
        assert (self._data['NrOfProducts'] == num_products)
        assert (self._data['NrOfProducts'] == len(self.get_products()))

        # Stochastic data
        self._scenarios = []
        self.regenerate_stochastic_data(num_scen)

    @property
    def sales_price(self):
        return self._data['SalesPrice']

    @sales_price.setter
    def sales_price(self, value):
        self._data['SalesPrice'] = value

    @property
    def data_as_dict(self):
        result = {}
        # scalar data
        for name in [
                'NrOfLocations', 'NrOfProducts', 'NrOfScen', 'SalesPrice',
                'PlantCost', 'PlantCapacity'
        ]:  # basically we do not need to clear it because it cannot be modified:
            result[name] = self._data[name]
        # array data
        for name in [
                'MaterialReq', 'Prob', 'Demand', 'ShipCost', 'ArcProduct',
                'arc'
        ]:  # 'MaterialReq' are excluded since they cannot be modified:
            result[name] = list(self._data[name].values())

        return result

    def generate_plant_data(self, worker_id, shared_add_routes, num_cpu=0):
        '''Used in multiprocessing. Generates most of the data except the stochastic data'''
        random.seed(self.random_seed * worker_id + 1)  # +1 to avoid 0
        if num_cpu == 0:
            num_cpu = mp.cpu_count()

        add_products = []  # result that we will return
        all_plants = self.get_plants()
        end_product_plants = self.get_end_product_plants()

        plants_per_worker = max(math.floor(len(all_plants) / num_cpu), 1)
        if (worker_id * plants_per_worker
            ) >= len(all_plants):  # we have no plants for this worker
            return []
        plants_start = plants_per_worker * (worker_id - 1)
        if worker_id == 0:  # no previous worker
            plants_start = 0
        plants_end = plants_start + plants_per_worker
        if worker_id == num_cpu - 1:  # last worker might be an exception
            plants_end = len(all_plants)
        plants = all_plants[plants_start:plants_end]

        for plant in plants:
            if plant in end_product_plants:
                min_materials = 0  # in potential plants none of the materials might be manufactured
                max_materials = 1  # to avoid that all materials are manufactured on the plant site and should not be delivered
            else:
                min_materials = 1
                max_materials = len(self.get_materials())
            random_num_materials = min(
                random.randint(min_materials, max_materials),
                SndpGraph.INT_MAX_PRODUCTS_IN_ONE_LOCATION)
            if random_num_materials == 0:
                continue
            random_materials = random_subset(
                self.get_materials(),
                random_num_materials)  # except the last one
            for material in random_materials:
                #plant.add_product(material)
                add_products.append({
                    'plant': plant.id,
                    'material': material.id
                })

            # Define the route to (several or all) potential end product plants for every plant
            random_num_end_product_plants = random.randint(
                1, len(end_product_plants))
            random_end_product_plants = random_subset(
                end_product_plants, random_num_end_product_plants)
            # connect the location with the end product plants
            for end_product_plant in random_end_product_plants:
                # we need route only if product is produced not in the potential plant locations
                if plant.id == end_product_plant.id:
                    continue
                # routes can be one directional, omit the route if it already exists in another direction
                key_opposite = '{}-{}'.format(end_product_plant.id, plant.id)
                if self._routes.get(key_opposite):
                    continue
                key = '{}-{}'.format(plant.id, end_product_plant.id)
                if not self._routes.get(
                        key):  # if the route does not already exist
                    #self.add_route(Route(plant, end_product_plant, random.randint(1, SNDP_Graph.MAX_DISTANCE)))
                    shared_add_routes[key] = random.randint(
                        1, SndpGraph.INT_MAX_DISTANCE)

            print(f'Data generated for plant {plant.id}')

        return add_products

    def regenerate_stochastic_data(self, num_scen):
        Timer('Stochastic data generated').start()

        self._clear_stochastic_data_cache()
        min_scenario_demand = (1 - SndpGraph.FLOAT_MAX_PERCENT_DEMAND_DEFICIT
                               ) * SndpGraph.FLOAT_PLANT_CAPACITY * len(
                                   self.get_end_product_plants())
        max_scenario_demand = 0.9 * SndpGraph.FLOAT_PLANT_CAPACITY * len(
            self.get_end_product_plants())
        if num_scen > (max_scenario_demand - min_scenario_demand):
            raise ValueError(
                "SndpGraph.FLOAT_MAX_PERCENT_DEMAND_DEFICIT is too small for the num_scen."
            )

        self._scenarios = []
        probability_per_scenario = 1 / num_scen  # we assume uniformal distribution
        demands = random.sample(
            range(int(min_scenario_demand), int(max_scenario_demand)),
            num_scen)
        for scenario_id in range(
                1, num_scen
        ):  # indexing starts from 1, all scenarios except the last one
            self.add_scenario(
                _Scenario(scenario_id, probability_per_scenario,
                          demands[scenario_id - 1]))

        left_probability = 1.0 - sum(scen.probability
                                     for scen in self.get_scenarios())
        assert (left_probability > 0)
        self.add_scenario(
            _Scenario(num_scen, left_probability,
                      demands[num_scen - 1]))  # last scenario

        print(Timer('Stochastic data generated'))
        Timer('Stochastic data generated').reset()

        assert (self._data['NrOfScen'] == num_scen)
        assert (self._data['NrOfScen'] == len(self.get_scenarios()))

    def visualize(self, format='jpg', view=False, to_file=None):
        if len(self.get_locations()
               ) > SndpGraph.INT_MAX_LOCATIONS_TO_VISUALIZE:
            print(
                f'Visualization of graph {self.name} with {len(self.get_locations())} locations will take to much time and will not be done.'
            )
            return
        if self.dot_graph is None:
            self.dot_graph = Digraph(comment=self.name)
        # Reload all the data
        self.dot_graph.clear()
        self.dot_graph.format = format

        end_product_plants = self.get_end_product_plants()
        for location in self.get_locations():
            if location == self.get_end_location():
                color = 'red'
                style = 'filled'
            elif location in end_product_plants:
                color = 'grey'
                style = 'filled'
            else:
                color = None
                style = 'solid'
            self.dot_graph.node(name=str(location.id),
                                label=str(location),
                                style=style,
                                color=color)

        for route in self.get_routes():
            self.dot_graph.edge(str(route.start.id),
                                str(route.end.id),
                                label=str(route.distance),
                                len=str(route.distance))

        # print(self.dot_graph.source)
        if to_file is None:
            to_file = self.name
        try:
            self.dot_graph.render(to_file, view=view)
        except Exception as e:
            print(
                f"WARNING: Visulalization of {self.name} failed. Due to this error: {e}"
            )

    def export_mpl(self, filename: str):

        # export .mpl file
        model_formulation = Path(
            resource_filename(__name__, 'SNDP_default.mpl')).read_text()

        # export .dat files
        # scalar
        valid_export = self._data_valid_export['ScalarData']
        if valid_export is not None:
            out_filename = str(valid_export)
        else:
            out_filename = f'{filename}_ScalarData.dat'
            dat_file = Path(
                resource_filename(__name__,
                                  'SNDP_default_ScalarData.dat')).read_text()
            dat_file_lines = dat_file.split('\n')
            for data_item_name in [
                    'NrOfLocations', 'NrOfProducts', 'NrOfScen', 'SalesPrice',
                    'PlantCost', 'PlantCapacity'
            ]:
                # load and modify the data from the current data file
                data_row = dat_file_lines.index('!' + data_item_name) + 1
                dat_file_lines[data_row] = str(self._data[data_item_name])
            # and write to the new file
            out_file = Path(out_filename)
            out_file.write_text('\n'.join(dat_file_lines))
            self._data_valid_export['ScalarData'] = out_file
        # update links in the model formulation
        model_formulation = model_formulation.replace(
            f'SNDP_default_ScalarData.dat', str(out_filename))

        # arrays
        for data_item_name in [
                'ShipCost', 'ArcProduct', 'arc', 'Prob', 'Demand',
                'MaterialReq'
        ]:
            valid_export = self._data_valid_export[data_item_name]
            if valid_export is not None:
                out_filename = str(valid_export)
            else:
                out_filename = f'{filename}_{data_item_name}.dat'
                some_value = next(iter(
                    self._data[data_item_name].values()))  # we get dict
                keys = some_value.keys()
                first_two_lines = '!{}\n!{}\n'.format(data_item_name,
                                                      ','.join(keys))
                dat_contents = first_two_lines + self._data_txt[data_item_name]
                # and write to the new file
                out_file = Path(out_filename)
                out_file.write_text(dat_contents)
                self._data_valid_export[data_item_name] = out_file
            # update links in the model formulation
            model_formulation = model_formulation.replace(
                f'SNDP_default_{data_item_name}.dat', str(out_filename))

        Path(filename + '.mpl').write_text(model_formulation)

    def adjust_sales_price(self):
        '''Find the smallest value of SalesPrice
        that does not decrease the number of open plants.
        Motivation: has as small obj value as possible to avoid numerical issues.'''

        try:
            from sndpgen.sndp_model import SndpModel
        except ImportError:
            warn(
                'optconvert is not installed, adjust_sales_price() will not be executed',
                ImportWarning)
            return
        self.export_mpl(f'{self.name}')
        sndp_model = SndpModel(Path(f'{self.name}.mpl'))
        sndp_model.adjust_sales_price()
        self.sales_price = sndp_model.data_as_dict['SalesPrice']
        self._data_valid_export['ScalarData'] = None

    def _clear_nodes_data_cache(self):
        self._data_valid_export['ScalarData'] = None
        for name in [
                'NrOfLocations', 'NrOfProducts'
        ]:  # basically we do not need to clear it because it cannot be modified:
            self._data[name] = 0
        for name in [
                'ShipCost', 'ArcProduct', 'arc'
        ]:  # 'MaterialReq' are excluded since they cannot be modified:
            self._data[name] = {}
            self._data_txt[name] = ''
            self._data_valid_export[name] = None

    def _clear_stochastic_data_cache(self):
        self._data_valid_export['ScalarData'] = None
        for name in ['NrOfScen']:
            self._data[name] = 0
        for name in ['Prob', 'Demand']:
            self._data[name] = {}
            self._data_txt[name] = ''
            self._data_valid_export[name] = None

    def add_route(self, route):
        if self.get_route(route.start, route.end):
            raise KeyError('Route already exists in the graph.')
        route._graph = self
        self._routes['{}-{}'.format(route.start.id, route.end.id)] = route

        # data cache
        for name in [
                'ScalarData', 'ShipCost', 'ArcProduct', 'arc'
        ]:  # 'MaterialReq' are excluded since they cannot be modified:
            self._data_valid_export[name] = None
        route.start.update_graph_data_cache(product=None, route=route)
        # we check for duplicates above
        new_key = f'{route.start.id},{route.end.id}'
        self._data['ShipCost'][new_key] = {
            'start': route.start.id,
            'finish': route.end.id,
            'value': route.distance
        }
        self._data_txt['ShipCost'] += f'{new_key},{route.distance}\n'

    def add_scenario(self, scenario):
        scenario._graph = self
        self._scenarios.append(scenario)

        # data cache
        self._data['NrOfScen'] += 1
        if scenario.id in self._data['Prob']:
            raise KeyError('Scenario already exists in the graph.')
        assert (scenario.id not in self._data['Demand']
                and 'How would this happen if error obove does not raise?')
        self._data['Prob'][scenario.id] = {
            'SCEN': scenario.id,
            'value': scenario.probability
        }
        self._data_txt['Prob'] += f'{scenario.id},{scenario.probability}\n'
        self._data['Demand'][scenario.id] = {
            'SCEN': scenario.id,
            'value': scenario.demand
        }
        self._data_txt['Demand'] += f'{scenario.id},{scenario.demand}\n'

    def get_products(self):
        return list(self._products.values())

    def get_materials(self):
        return self.get_products(
        )[:-1]  # all except the last products which are the end product

    def get_product(self, id):
        product = self._products.get(id)
        return product

    def get_end_product(self):
        return self.get_products()[-1]

    def get_locations(self):
        return list(self._locations.values())

    def get_plants(self):
        return self.get_locations()[:-1]  # last location is end location

    def get_end_product_plants(self):
        return self._end_product_plants

    def get_location(self, id):
        location = self._locations.get(id)
        if location is None:
            raise (f'Location with {id} was not found.')
        return location

    def get_end_location(self):
        return self.get_locations()[-1]

    def get_routes(self):
        return list(self._routes.values())

    def get_route(self, start, end):
        if not isinstance(start, _Location) or not isinstance(end, _Location):
            raise ('Start and end arguments should be Location objects')
        key = '{}-{}'.format(start.id, end.id)
        return self._routes.get(key)

    def get_scenarios(self):
        return self._scenarios[:]
Example #12
0
 def show_m_nodes(self, file_name):
     d = Digraph(filename=file_name, directory="./pdf_data")
     d.clear()
     self.print_m_nodes(father_name=None, d=d)
     d.view()
Example #13
0
def graph_gen(start=start_state(), trace=False):
  '''This creates the state graph with random variation of inflow'''
  
  dot = Digraph('State Graph')
  
  change = 1
  vizited = []
  in_graph = [start]
  queue = [start]
  dot.node (state_label(start), state_description(start, 1), shape = 'box')
  
  while len(queue):
    cur_state = queue[0]
    del queue[0]
    
    if cur_state not in vizited:
      vizited.append(cur_state)
      
      next_states = next_state(cur_state)
      queue = queue + next_states
      
      for state in next_states:
        if state not in in_graph:
          posn = len(in_graph) + 1
          dot.node(state_label(state), state_description(state, posn), shape = 'box')
          in_graph.append(state)
        
        dot.edge(state_label(cur_state), state_label(state))
  
  dot.render('state_graph')

  if trace:
    dot.clear()
    queue = [start]
    dot, traced_states = create_trace(dot, queue)
    counter = 0
    while (traced_states == [] or traced_states[-1]['inflow']['mag'] == '+') and counter < 100:
      queue = [start]
      dot.clear()
      dot, traced_states = create_trace(dot, queue)
      counter += 1
    if counter >= 100:
      exit("Unknown error while creating a trace, please try again.\n(This may happen many times, sorry)")
    dot.render('trace')

    for i, traced_state in enumerate(traced_states):
      if i == 0:
        print("State 1 (starting state, all quantities are 0):\n" + state_description(traced_state))
      elif i > 0 and traced_state['inflow'] == {'mag': '0', 'der': '0'} and traced_state['volume'] == {'mag': '0', 'der': '0'}:
        print("\nState 1:\n" + state_description(traced_state) + "\n")
      else:
        print("\nState " + str(i+1) + ":\n" + state_description(traced_state) + "\n")

      if traced_states[i-1]['inflow']['der'] == '-' and traced_state['volume']['mag'] == 'M':
        print("Because inflow has decreased in state " + str(i) + ", but outflow is still maximum, volume decreases in state " +
        str(i+1) + ".")
      if traced_states[i-1]['volume']['der'] == '-' and traced_states[i-1]['volume']['mag'] == 'M':
        print("Because volume has decreased from the maximum in state " + str(i) + ", volume is + in state " + str(i+1) + ".")
      if i > 0 and traced_states[i-1]['inflow']['der'] == '+' and traced_state['inflow']['der'] == '0' and \
                      traced_state['inflow']['mag'] == '+':
        print("Inflow stopped increasing in state " + str(i+1) + ", but is still +, so volume remains increasing " +
              "(because of the positive influence (I+) from inflow to volume).")
      if traced_state['inflow']['der'] == '-' and traced_states[i-1]['inflow']['der'] != '-':
        print("Inflow starts decreasing in state " + str(i+1) + ".")
      if i > 0 and traced_states[i-1]['inflow']['der'] == '-' and traced_state['inflow']['mag'] == '0':
        print("Inflow reaches zero in state " + str(i+1) + " (and therefore stops decreasing).")
      if traced_state['volume'] == {'mag': 'M', 'der': '0'} and traced_states[i-1]['volume'] != {'mag': 'M', 'der': '0'}:
        print("Volume reaches the maximum in state " + str(i+1) + ", and therefore stops increasing.")
      if i < len(traced_states) - 1 and i != 0:
        for quant, val in traced_state.items():
          if quant == 'inflow' or quant == 'volume':
            if traced_state[quant]['der'] == '+' and traced_states[i+1][quant]['mag'] in '+M' and traced_state[quant]['mag'] != '+':
              print("Because " + quant + " is increasing (from " + traced_states[i][quant]['mag'] + ") in state " +
                    str(i+1) + ", " + quant + " is " + traced_states[i+1][quant]['mag'] + " in state " + str(i+2) + ".")
        if traced_states[i+1]['inflow']['mag'] == '+' and traced_states[i+1]['volume']['der'] == '+':
          print("Because inflow is + in state " + str(i+2) + ", volume is increasing in state " + str(i+2) +
                " (because of the positive influence (I+) from inflow to volume).")
      if i > 1 and traced_state['volume'] == {'mag': '0', 'der': '0'}:
        print("Volume has reached 0 (and therefore stops decreasing).")
      if i > 1 and traced_states[i-1]['volume'] != traced_state['volume']:
        print("Height, pressure and outflow are also (" + traced_state['volume']['mag'] + ", " + traced_state['volume'][
          'der'] + ") because of the value correspondence between volume, height, pressure and outflow.")
      if i > 0 and traced_state['inflow'] == {'mag': '0', 'der': '0'} and traced_state['volume'] == {'mag': '0', 'der': '0'}:
        print("All quantities have reached 0, thus we are at the starting state again.")

  return len(vizited)
class DecisionTree(SupervisedModel):
    ''' Decision Tree classifiier. It takes a target feature as the predicted feature.
        It contains a reference to a TreeNode object after it is trained.
        Use Gini Impurity and build a binary tree.

        This class uses the example from here as a base https://github.com/random-forests/tutorials/blob/master/decision_tree.ipynb
    '''

    ContinousSplitMethods = ["k-tile", "mean"]

    def __init__(self,
                 targetFeature: str,
                 continuousSplitmethod: str = "k-tile",
                 maxDepth: int = 3,
                 filePath: str = "tree"):
        ''' Constructor '''
        if (continuousSplitmethod not in DecisionTree.ContinousSplitMethods):
            raise ValueError("Continuous split method \"" +
                             continuousSplitmethod + "\" is not supported")
        elif (maxDepth < 0):
            raise ValueError("Max depth must be at least 0")

        super().__init__(targetFeature)
        self._trainedRootNode = None
        self._maxDepth = maxDepth
        self._continuousSplitmethod = continuousSplitmethod
        self._filePath = filePath
        self._nodeId = 0  # Use to keep track of the nodes in DiGraph
        self._diGraph = Digraph("G", filename=filePath, format="png")

    @property
    def name(self) -> str:
        return "Decision Tree"

    @property
    def maxDepth(self) -> int:
        ''' '''
        return self._maxDepth

    @maxDepth.setter
    def maxDepth(self, value: int):
        ''' '''
        if (value < 0):
            raise ValueError("Max depth must be at least 0")
        self._maxDepth = value

    @property
    def continuousSplitMethod(self) -> str:
        ''' '''
        return self._continuousSplitmethod

    @continuousSplitMethod.setter
    def continuousSplitMethod(self, value: str):
        ''' '''
        if (value not in DecisionTree.ContinousSplitMethods):
            raise ValueError("Continuous split method \"" + value +
                             "\" is not supported")
        self._continuousSplitmethod = value

    @property
    def numLeafNodes(self) -> int:
        ''' Get the number of leaf nodes in the tree '''
        return self._countLeafNodes(self._trainedRootNode)

    @property
    def depth(self) -> int:
        ''' Get the depth of the tree '''
        return self._countTreeDepth(self._trainedRootNode)

    def clear(self):
        ''' Clear the current state and all data of the model.
            This doesn't clear the properties of the model, however.
        '''
        self._trainedRootNode = None
        self._diGraph.clear()

    def informationGain(self, left: pd.DataFrame, right: pd.DataFrame,
                        currentImpurity: float) -> float:
        ''' Compute the information gain of the split 

            @left: the left partition of the data
            @right: the right partition of the data
            @currentImpurity: impurity value of the left and right partition data combined
            @return: the information gain obtained from resulting left & right partition
        '''
        p = len(left) / float(len(left) + len(right))
        childrenImpurity = (p * self.giniImpurity(left)) + (
            (1 - p) * self.giniImpurity(right))
        return currentImpurity - childrenImpurity

    def giniImpurity(self, dataFrame: pd.DataFrame) -> float:
        ''' Compute the Gini Impurity of the given data frame 
        
            @dataFrame: data frame object
            @return: gini impurity value of the given data frame
        '''
        labelCounts = dataFrame[self._targetFeature].value_counts()
        impurity = 1
        for label in labelCounts.index:
            probability = labelCounts[label] / float(len(dataFrame))
            impurity -= probability**2
        return impurity

    def partition(self, dataFrame: pd.DataFrame, feature: str,
                  value: object) -> (pd.DataFrame, pd.DataFrame):
        ''' Partition the given data frame into 2 sub-data frames by the given feature and its value 
        
            @dataFrame: the data frame object
            @feature: the featured that is used as the splitting feature
            @value: value of the given feature
            @return: a tuple of 2 partitions after the split
        '''
        leftData, rightData = None, None
        featureType = super()._getFeatureType(dataFrame, feature)

        if (featureType == FeatureType.Continuous):
            if not (DecisionTree.isContinuous(value)):
                raise ValueError(
                    "Numeric feature must be passed with a numeric value")
            leftData, rightData = self.partitionContinuous(
                dataFrame, feature, value)

        elif (featureType == FeatureType.Categorical):
            if not (DecisionTree.isCategorical(value)):
                raise ValueError(
                    "Categorical feature must be passed with a string or boolean value"
                )
            leftData, rightData = self.partitionDiscreteBinary(
                dataFrame, feature, value)

        return leftData, rightData

    def partitionContinuous(self, dataFrame: pd.DataFrame, feature: str,
                            value: float) -> (pd.DataFrame, pd.DataFrame):
        ''' Partition continous values with a given feature and quantile value. 
        
            @dataFrame: the data frame object
            @feature: the featured that is used as the splitting feature
            @value: value of the given feature
            @return: a tuple of 2 partitions after the split
        '''
        leftData = dataFrame[dataFrame[feature] < value]
        rightData = dataFrame[dataFrame[feature] >= value]
        return leftData, rightData

    def partitionDiscrete(self, dataFrame: pd.DataFrame,
                          feature: str) -> (pd.DataFrame, pd.DataFrame):
        ''' Partition a categorical feature into x number of categorical value of the given feature 
        
            @dataFrame: the data frame object
            @feature: the featured that is used as the splitting feature
            @return: a list of x partitions
        '''
        partitions = []
        for value in dataFrame[feature].unique():
            partitions.append(dataFrame[dataFrame[feature] == value])
        return partitions

    def partitionDiscreteBinary(self, dataFrame: pd.DataFrame, feature: str,
                                value: str) -> (pd.DataFrame, pd.DataFrame):
        ''' Partition a categorical feature into 2 sub-panda frames
        
            @dataFrame: the data frame object
            @feature: the featured that is used as the splitting feature
            @value: value of the given feature
            @return: a tuple of 2 partitions after the split
        '''
        leftData = dataFrame[dataFrame[feature] == value]
        rightData = dataFrame[dataFrame[feature] != value]
        return leftData, rightData

    def findBestFeature(
            self,
            dataFrame: pd.DataFrame,
            quantiles: [int] = [0.2, 0.4, 0.6, 0.8]) -> (str, object, float):
        ''' Find the best feature to split the given data frame. Quantiles are optional and 
            are only used for continous features.

            @dataFrame: the data frame object
            @quantiles (optional): list of quantiles to test against to find the best quantile. 
            @return: a tuple of the best feature to be split, its corresponding value, and the best information gain
        '''
        bestGain = 0.0
        currentImpurity = self.giniImpurity(dataFrame)
        bestFeature = None
        bestFeatureValue = None
        features = dataFrame.loc[:, dataFrame.columns != self.
                                 _targetFeature].columns.values

        for feature in features:
            featureType = super()._getFeatureType(dataFrame, feature)

            if (featureType == FeatureType.Continuous):
                infoGain, featureValue = 0.0, 0.0

                if (self._continuousSplitmethod == "k-tile"):
                    infoGain, featureValue = self._splitByKTile(
                        dataFrame, feature, quantiles)
                elif (self._continuousSplitmethod == "mean"):
                    infoGain, featureValue = self._splitByMean(
                        dataFrame, feature)

                # Store the current best values
                if (infoGain > bestGain):
                    bestGain = infoGain
                    bestFeature = feature
                    bestFeatureValue = featureValue

            elif (featureType == FeatureType.Categorical):
                for featureValue in dataFrame[feature].unique():
                    leftData, rightData = self.partition(
                        dataFrame, feature, featureValue)

                    if (len(leftData) == 0 or len(rightData) == 0):
                        continue

                    infoGain = self.informationGain(leftData, rightData,
                                                    currentImpurity)
                    if (infoGain > bestGain):
                        bestGain = infoGain
                        bestFeature = feature
                        bestFeatureValue = featureValue

        return bestFeature, bestFeatureValue, bestGain

    @decor.elapsedTime
    def train(self, dataFrame: pd.DataFrame, **kwargs):
        ''' Train the decision tree with the given data frame input. Build the tree.
        
            @dataFrame: the data frame object
        '''
        self.clear()
        self._trainedRootNode = self._buildTree(dataFrame, 0)

    def classify(self, dataFrame: pd.DataFrame, **kwargs):
        ''' Classify the input data frame and return a data frame with 2 columns: Prediction and Probability.
            Prediction column denotes the predicted label of a data point and Probability column denotes the
            probability that the prediction is drawn from.

            @dataFrame: the data frame object
        '''
        super().classify(dataFrame, **kwargs)
        predictions = []
        probabilities = []
        for i, row in dataFrame.iterrows():
            prediction, probability = self._classifyOneSample(
                row, self._trainedRootNode)
            predictions.append(prediction)
            probabilities.append(probability)

        return self._createResultDataFrame(predictions, probabilities,
                                           dataFrame.index)

    def getTreeGraph(self, regenerate: bool) -> Digraph:
        ''' Get the graph object representing this decision tree
        
            @regenerate: True if we want to regenerate the graph object. False otherwise
            @return: a Digraph object from graphviz library
        '''
        if (regenerate):
            self._diGraph.clear()
            self._nodeId = 0
            self._generateGraph(self._trainedRootNode)
        return self._diGraph

    def _createEdgeLabel(self, branch: str, featureValue: object) -> str:
        ''' Create edge label according to the type of the feature and its value 
        
            @branch: value must be "left" or "right". Case-sensitive
            @featureValue: feature value to be displayed in the edge label
            @return: the edge label 
        '''
        if (branch != "left") and (branch != "right"):
            raise ValueError(
                "Argument branch must be either \"left\" or \"right\"")

        if (DecisionTree.isCategorical(featureValue)):
            if (branch == "left"):
                return "yes"
            else:
                return "no"

        if (DecisionTree.isContinuous(featureValue)):
            if (branch == "left"):
                return "< {0:.2f}".format(featureValue)
            else:
                return ">= {0:.2f}".format(featureValue)

        raise ValueError(
            "Feature type \"{0}\" is not str, int, bool, or float".format(
                featureValueType))

    def _generateGraph(self, node: TreeNode):
        ''' Generate the decision tree graph. Assign unique id to each node 
            starting from the root, left side, and then right side .
        
            @node: the root node of the decision tree
        '''
        if (node is None):
            return

        left = node.left
        right = node.right
        nodeId = self._nodeId

        decisionNodeLabelFormat = "{0}\nValue: {1}"
        leafNodeLabelFormat = "Prediction: {0}\nProbability: {1:.2f}"
        decisionNodeLabelFunc = lambda feature, value : decisionNodeLabelFormat.format(feature,
                                                                                       value if (type(value) is str or \
                                                                                                 type(value) is bool or \
                                                                                                 type(value) is np.bool_)
                                                                                             else round(value, 2))

        # If the root node is a leaf node
        if (type(node) is LeafNode):
            nodeLabel = leafNodeLabelFormat.format(node.prediction,
                                                   node.probability)
            self._addNode(LeafNode, nodeId, nodeLabel)
            return
        else:
            nodeLabel = decisionNodeLabelFunc(node.feature, node.featureValue)
            self._addNode(DecisionNode, nodeId, nodeLabel)

        if (type(left) is LeafNode and type(right) is LeafNode):
            leftLabel = leafNodeLabelFormat.format(left.prediction,
                                                   left.probability)
            rightLabel = leafNodeLabelFormat.format(right.prediction,
                                                    right.probability)

            # Get left and right node id
            leftId = self._nodeId + 1
            rightId = self._nodeId + 2
            self._nodeId += 2

            self._addNode(LeafNode, leftId, nodeLabel)
            self._addNode(LeafNode, rightId, nodeLabel)

            self._addEdge(nodeId, leftId,
                          self._createEdgeLabel("left", node.featureValue))
            self._addEdge(nodeId, rightId,
                          self._createEdgeLabel("right", node.featureValue))

        elif (type(left) is LeafNode):
            leftLabel = leafNodeLabelFormat.format(left.prediction,
                                                   left.probability)
            rightLabel = decisionNodeLabelFunc(right.feature,
                                               right.featureValue)

            # Assign id to the left node first
            leftId = self._nodeId + 1
            self._addNode(LeafNode, leftId, leftLabel)
            self._nodeId += 1

            # Then assign id to the right node recursively
            rightId = self._nodeId + 1
            self._addNode(DecisionNode, rightId, rightLabel)
            self._nodeId += 1
            self._generateGraph(right)

            self._addEdge(nodeId, leftId,
                          self._createEdgeLabel("left", node.featureValue))
            self._addEdge(nodeId, rightId,
                          self._createEdgeLabel("right", node.featureValue))

        elif (type(right) is LeafNode):
            leftLabel = decisionNodeLabelFunc(left.feature, left.featureValue)
            rightLabel = leafNodeLabelFormat.format(right.prediction,
                                                    right.probability)

            # Assign id to the left node first recursively
            leftId = self._nodeId + 1
            self._addNode(DecisionNode, leftId, leftLabel)
            self._nodeId += 1
            self._generateGraph(left)

            # Then assig id to the right node
            # Don't need to add 1 after each _generateGraph call. It's handled at the end of the method
            rightId = self._nodeId
            self._addNode(LeafNode, rightId, rightLabel)

            self._addEdge(nodeId, leftId,
                          self._createEdgeLabel("left", node.featureValue))
            self._addEdge(nodeId, rightId,
                          self._createEdgeLabel("right", node.featureValue))

        else:
            leftLabel = decisionNodeLabelFunc(left.feature, left.featureValue)
            rightLabel = decisionNodeLabelFunc(right.feature,
                                               right.featureValue)

            # Assign id to the left node first recursively
            leftId = self._nodeId + 1
            self._addNode(DecisionNode, leftId, leftLabel)
            self._nodeId += 1
            self._generateGraph(left)

            # Then assign id to the right node recursively
            # Don't need to add 1 after each _generateGraph call. It's handled at the end of the method
            rightId = self._nodeId
            self._addNode(DecisionNode, rightId, rightLabel)
            self._generateGraph(right)

            self._addEdge(nodeId, leftId,
                          self._createEdgeLabel("left", node.featureValue))
            self._addEdge(nodeId, rightId,
                          self._createEdgeLabel("right", node.featureValue))

        self._nodeId += 1

    def _addEdge(self, fromId: int, toId: int, nodeLabel: str):
        ''' '''
        self._diGraph.edge(str(fromId), str(toId), label=nodeLabel)

    def _addNode(self, nodeType: TreeNode, nodeId: int, nodeLabel: str):
        ''' '''
        if (nodeType is LeafNode):
            self._diGraph.node(str(nodeId), nodeLabel, color="red")
        elif (nodeType is DecisionNode):
            self._diGraph.node(str(nodeId), nodeLabel)
        else:
            raise ValueError("Invalid node type \"{0}\"".format(
                nodeType, LeafNode, DecisionNode))

    def _classifyOneSample(self, row: pd.Series,
                           node: TreeNode) -> (str, float):
        ''' Classfiy one sample 

            @row: row of a data frame
            @node: the root node of the decision tree
            @return: the prediction and probability of that prediction
        '''
        if (type(node) is LeafNode):
            return node.prediction, node.probability
        else:
            # First check if the value type is numeric, then we do inequality check for numbers
            # If the value is not numeric then simply compare using ==
            value = row[node.feature]
            if (DecisionTree.isContinuous(value)
                    and value < node.featureValue) or (value
                                                       == node.featureValue):
                return self._classifyOneSample(row, node.left)
            else:
                return self._classifyOneSample(row, node.right)

    def _buildTreeThread(self, dataFrame, depth):
        ''' Build the trained decision tree using multithreading. This creates 2 working thread.
            Each one is responsible for the left and right branch of the tree.

            @TODO: UNUSED AND IMPCOMPLETE
        '''
        predictionCount = dataFrame[self._targetFeature].value_counts()

        # Stop splitting once the max depth of the tree is reached
        if (depth >= self._maxDepth):
            bestLabel, bestLabelCount = max(predictionCount.items(),
                                            key=lambda x: x[1])
            bestProb = float(bestLabelCount) / sum(predictionCount)
            return LeafNode(prediction=bestLabel, probability=bestProb)

        # Stop splitting if there's no more information to gain
        feature, featureValue, infoGain = self.findBestFeature(dataFrame)
        if (infoGain == 0):
            bestLabel, bestLabelCount = max(predictionCount.items(),
                                            key=lambda x: x[1])
            bestProb = float(bestLabelCount) / sum(predictionCount)
            return LeafNode(prediction=bestLabel, probability=bestProb)

        leftSubTree = None
        rightSubTree = None
        leftData, rightData = self.partition(dataFrame, feature, featureValue)
        if (depth == 0):
            # Start the threads asynchronously
            pool = ThreadPool(processes=2)
            t1 = pool.apply_async(self._buildTreeThread, (leftData, depth + 1))
            t2 = pool.apply_async(self._buildTreeThread,
                                  (rightData, depth + 1))
            print("waiting for threads")
            t1.wait()
            t2.wait()
            leftSubTree = t1.get()
            rightSubTree = t2.get()
        else:
            leftSubTree = self._buildTreeThread(leftData, depth + 1)
            rightSubTree = self._buildTreeThread(rightData, depth + 1)

        return DecisionNode(leftSubTree, rightSubTree, feature, featureValue)

    def _buildTree(self, dataFrame: pd.DataFrame, depth: int) -> TreeNode:
        ''' Build the trained decision tree with the given data frame
        
            @dataFrame: data frame object
            @depth: that maximum depth of the tree. Stop building the tree once
                    the depth of the tree reaches to this value.
            @return: the root node of the decision tree
        '''
        predictionCount = dataFrame[self._targetFeature].value_counts()

        # Stop splitting once the max depth of the tree is reached
        if (depth >= self._maxDepth):
            bestLabel, bestLabelCount = max(predictionCount.items(),
                                            key=lambda x: x[1])
            bestProb = float(bestLabelCount) / sum(predictionCount)
            return LeafNode(prediction=bestLabel, probability=bestProb)

        # Stop splitting if there's no more information to gain
        feature, featureValue, infoGain = self.findBestFeature(dataFrame)
        if (infoGain == 0):
            bestLabel, bestLabelCount = max(predictionCount.items(),
                                            key=lambda x: x[1])
            bestProb = float(bestLabelCount) / sum(predictionCount)
            return LeafNode(prediction=bestLabel, probability=bestProb)

        leftData, rightData = self.partition(dataFrame, feature, featureValue)

        left = self._buildTree(leftData, depth + 1)
        right = self._buildTree(rightData, depth + 1)

        return DecisionNode(left, right, feature, featureValue)

    def _countLeafNodes(self, node: TreeNode) -> int:
        ''' Helper function for counting leaf nodes 
        
            @node: root node of the decision tree node
            @return: number of leaf nodes
        '''
        if (node is None):
            return 0
        elif (type(node) is LeafNode):
            return 1
        else:
            return self._countLeafNodes(node.left) + self._countLeafNodes(
                node.right)

    def _countTreeDepth(self, node: TreeNode) -> int:
        ''' Helper function for counting the tree depth 
        
            @node: root node of the decision tree
            @return: the depth of the decision tree
        '''
        if (node is None) or (node.left == None and node.right == None):
            return 0
        else:
            return 1 + max(self._countTreeDepth(node.left),
                           self._countTreeDepth(node.right))

    def _splitByKTile(self, dataFrame: pd.DataFrame, feature: str,
                      quantiles: [int]) -> (float, float):
        ''' Split continuous feature by using k-tile method. 
        
            @dataFrame: data frame object
            @feature: the feature that is used as the splitting point
            @quantiles: list of quantiles use for determining the best quantile in that list
            @return: tuple containing best information gain and the best quantile value
        '''
        bestGain = 0.0
        bestQuantileValue = None
        currentImpurity = self.giniImpurity(dataFrame)
        quantileValues = dataFrame[feature].quantile(quantiles, "linear")

        # Find the best quantile value
        for quantileValue in quantileValues:
            leftData, rightData = self.partition(dataFrame, feature,
                                                 quantileValue)

            # If one of the splits has no elements, then the split is trivial
            if (len(leftData) == 0 or len(rightData) == 0):
                continue

            infoGain = self.informationGain(leftData, rightData,
                                            currentImpurity)
            if (infoGain > bestGain):
                bestGain = infoGain
                bestQuantileValue = quantileValue

        return bestGain, bestQuantileValue

    def _splitByMean(self, dataFrame: pd.DataFrame,
                     feature: str) -> (float, float):
        ''' Split continuous feature by using mean method.
        
            @dataFrame: data frame object
            @feature: the feature that is used as the splitting point
            @return: tuple containing best information gain and the mean
        '''
        # Use the the mean as the splitting point
        mean = dataFrame[feature].mean()
        currentImpurity = self.giniImpurity(dataFrame)

        leftData, rightData = self.partition(dataFrame, feature, mean)
        infoGain = self.informationGain(leftData, rightData, currentImpurity)
        return infoGain, mean
Example #15
0
class Automaton:
    """
    A Finite State Automaton (FSA), a di-graph with the following elements:
        - nodes (or states): can be initial or accepting or neither
        - edges: transitions between nodes or a self-loop edge.
        - each edge has a label from some finite alphabet

    Edges are represented by a matrix (list of lists), where each label is a list
    itself (each element is an transition rule).

    We rely on graphviz and Qt to draw an image of the FSA.

    """
    def __init__(self):
        self.nodes = []
        self.node_index = 0
        self.root = None
        self.accepting_nodes = []
        self.deleted_indices = set()
        self.esize = 100
        self.edges = [ [ [] for i in range(self.esize)] for j in range(self.esize)]
        self.graph_fp = 'graphs/dfs_' + str(round(time.time()))
        self.graph = Digraph('finite_state_machine', format='png', filename=self.graph_fp)
        self.graph.attr(rankdir='LR', size='10')        


    def __str__(self):
        return ' Root: [{}]\n Nodes [{}]: ({})\n Final [{}]: [{}]\n Edges [{}]: [{}]'.format(
            self.root.label if self.root else 'None',
            len(self.nodes),
            ', '.join(n.label for n in self.nodes),
            len(self.final_states),
            ', '.join(n.label for n in self.final_states),
            len(self.edges),
            ', '.join('({}=>{}, "{}")'.format(e.source.name, e.target.name, e.label) for e in self.edges)
                )


    def add_node(self, is_initial=False, is_final=False, label=''):
        """
        Creates a node object and make sure that the matrix self.edges is larger
        than the number of nodes.
        """
        if len(self.nodes) > self.esize:
            self.expand_edges()

        node = Node( self.node_index, label, is_initial, is_final)
        self.node_index += 1
        self.nodes.append( node )

        if not self.root or is_initial:
            self.root = node
        if is_final:
            self.accepting_nodes.append( node )
        return node


    def delete_node(self, node):
        """
        Remove node from lists and keep track of its index
        (this is necessary to ignore edges of deletd nodes later).
        """
        self.deleted_indices.add( node.index )
        self.nodes.remove(node)
        if node in self.accepting_nodes:
            self.accepting_nodes.remove(node)


    def merge_nodes(self, nodes, new_label=''):
        """
        Merges a list of nodes into a new node.
        All edges to and from the merged nodes are reassigned to the new node.

        :args:
            nodes       - list of Node objects
            new_label   - string, label of the new node
        :returns:
            new_node    - Node object
        """

        # Keep track track of the indices of the nodes to be merged, so we 
        # don't try to merge the new node as well (e.g. when it's accepting node)
        merge_indices = [n.index for n in nodes]
        #print(f'preparing to merge nodes: {merge_indices}') 

        # Create new Node object
        # If any of the old nodes is initial or accepting, inherit this property
        new_node = self.add_node(
                        is_initial=True if any([n.is_initial for n in nodes]) else False, 
                        is_final=True if any([n.is_final for n in nodes]) else False,
                        label=new_label
                        )
        new_i = new_node.index
        #print(f'created new node with index {new_i}')
        # Merge edges from nodes and delete
        for n1 in list(self.nodes):
            for n2 in list(self.nodes):
                if self.edges[n1.index][n2.index] and (n1.index in merge_indices or n2.index in merge_indices):
                    # Edge is selfloop or edge is between nodes to be merged
                    if n1.index == n2.index or (n1.index in merge_indices and n2.index in merge_indices):
                        #print('<=>', self.edges[n1.index][n2.index])
                        self.add_edges(new_node, new_node, self.edges[n1.index][n2.index])
                    # Edge is *from* nodes to be merged
                    elif n1.index in merge_indices:
                        #print('<=', self.edges[n1.index][n2.index])
                        self.add_edges(new_node, n2, self.edges[n1.index][n2.index])
                    # Edge is *to* nodes to be merged
                    elif n2.index in merge_indices:
                        #print('=>', self.edges[n1.index][n2.index])
                        self.add_edges(n1, new_node, self.edges[n1.index][n2.index])
                    # Delete old edge
                    self.edges[n1.index][n2.index] = []

        # Delete merged nodes
        for node in list(nodes):
            self.delete_node(node)
        # Update initial and final states
        if new_node.is_initial:
            self.root = new_node
        if new_node.is_final:
            self.accepting_nodes = [new_node]

        return new_node


    def add_edge(self, n1, n2, label):
        self.edges[n1.index][n2.index].append(label)


    def add_edges(self, n1, n2, labels):
        if not self.edges[n1.index][n2.index]:
            self.edges[n1.index][n2.index] = labels
        else:
            for label in labels:
                if label not in self.edges[n1.index][n2.index]:
                    self.edges[n1.index][n2.index].append(label)


    def get_edge(self, n1, n2):
        edge = self.edges[n1.index][n2.index]
        return edge if edge != [''] else None


    def delete_edge(self, n1, n2):
        self.edges[n1.index, n2.index] = ['']


    def expand_edges(self):
        """
        Replaces the current edges matrix with one twice as large.
        """
        #print(f'Expanding E matrix from {self.esize} to {self.esize*2}')
        self.esize *= 2
        new_edges = [[self.edges[i][j] if i <= self.node_index and j <= self.node_index else [] \
            for i in range(self.esize)] for j in range(self.esize)]
        self.edges = new_edges


    def show(self, title='Finite State Automaton'):
        """
        Open a QT window and draw Automaton with graphviz.
        """
        self.reset_graph()
        self.graph.render()
        App = QtWidgets.QApplication(sys.argv)
        W = QtWidgets.QWidget()
        L = QtWidgets.QLabel(W)
        L.setText("Your Finite State Automaton:")
        P = QtGui.QPixmap(self.graph_fp + '.png')
        L.setPixmap(P)
        W.setGeometry(0, 0, P.width()+100, P.height()+50)
        L.move(50,20)
        W.setWindowTitle(title)
        W.show()
        App.exec_()        


    def reset_graph(self):
        """
        Reconstruct a new graphviz graph
        """
        self.graph.clear()
        # Add all nodes
        for node in self.nodes:
            if node == self.root or node.is_initial:
                self.graph.attr('node', 
                                width='0.8', 
                                height='0.8', 
                                shape='circle', 
                                style='filled', 
                                fillcolor='yellow' )
            if node.is_final:
                self.graph.attr( 'node', 
                                shape='doublecircle', 
                                style='filled', 
                                fillcolor='lightskyblue' )
            else:
                self.graph.attr( 'node', 
                                shape='circle', 
                                style='filled', 
                                fillcolor='azure2' )
            label = '<q<SUB><FONT POINT-SIZE="10">' + str(node.index) + '</FONT></SUB>>'
            self.graph.node( str(node.index), label=label )
        # Add edges
        for i in range(self.node_index+1):
            if i not in self.deleted_indices:
                for j in range(self.node_index+1):
                    if self.edges[i][j] and j not in self.deleted_indices:
                        self.graph.edge(str(i), str(j), label=''.join(self.edges[i][j]))
Example #16
0
            kakari=line.split(' ')
            count=int(kakari[1])
            dst=kakari[2].replace('D','')
            chunks[count].dst=int(dst)                    #chunk.dst
            chunks[count].counter=count
        else:
            for number in range(count+1):
                if chunks[number].dst != -1:
                    chunks[chunks[number].dst].srcs.append(number)
                dg.node(''.join(chunks[number].morphs))

            for result in range(count+1):
                x="{0}{1}\t{2}{3}"
                if chunks[result].dst == -1:
                    saki=''
                else:
                    saki=''.join(chunks[chunks[result].dst].morphs)
                if saki:
                    moto=''.join(chunks[result].morphs)
                    dg.edge(moto,saki)
            if dg:
                prin+=1
                filename='nock44'+str(prin)
                dg.render(filename)
                dg.clear()

            for ketu in range(count+1):
                chunks[ketu].morphs=[]
                chunks[ketu].srcs=[]
            count=0
def tree(relations, input_data_entries):
    role_to_relations_of_interest_mappings = get_role_to_relations_of_interest_mappings(
        role_of_interest, relations)

    tree_graph = Digraph(format='png')
    tree_graph.clear()
    tree_graph.attr(rankdir='LR')

    no_punctuation_input_title_desc = dict()  # tfidf use
    for ai, de in input_data_entries.items():
        no_punctuation_input_title_desc[ai] = clean_punctuation(
            de.reduced_title_desc)

    for interested_role_name, relations_of_interest in role_to_relations_of_interest_mappings.items(
    ):
        # print(UseStyle("add interested node: " + interested_role_name, fore='green'))

        verb_counts = dict()
        verb_to_other_roles_mappings = dict()  # currently just object

        object_to_tfidf_mappings_under_verb = dict(
        )  # verb -> obj -> tfidf use

        for relation in relations_of_interest:
            verb_counts, verb_to_other_roles_mappings = update_verb_to_other_roles_mappings(
                relation, verb_counts, verb_to_other_roles_mappings)

            if enable_tfidf == True:
                object_to_tfidf_mappings_under_verb = update_object_to_tfidf_mappings(
                    no_punctuation_input_title_desc, relation,
                    object_to_tfidf_mappings_under_verb)

            # get tfidf for each relation.object, if the difference falls within a certain threshold:
            # Merge them together!

        if enable_word_embedding == True:
            # print(UseStyle('Before', fore='red'))
            # print(verb_counts)
            merge_verb(verb_counts, verb_to_other_roles_mappings,
                       object_to_tfidf_mappings_under_verb)
            # print(UseStyle('After', fore='green'))
            # print(verb_counts)

        sorted_verb_counts = sorted(verb_counts.items(),
                                    key=lambda kv: (kv[1], kv[0]),
                                    reverse=True)
        # sorted_verb_counts = sorted(verb_counts.items(), key=lambda kv: kv[1], reverse=True)

        # if enable_tfidf == True:
        # print(UseStyle('This is tfidf score: ', fore='green'))
        # print(object_to_tfidf_mappings_under_verb)

        if enable_tfidf == True:
            verb_to_other_roles_mappings = merge_object(
                verb_to_other_roles_mappings,
                object_to_tfidf_mappings_under_verb)

        drew_verbs = set()
        for (verb_words, count) in sorted_verb_counts:
            verb_name = interested_role_name + '.' + verb_words
            if count >= min_verb_count_to_draw and count <= max_verb_count_to_draw:
                can_draw = False

                for other_role_words, other_role_count in verb_to_other_roles_mappings[
                        verb_words].items():
                    if other_role_count >= min_verb_other_roles_count_to_draw and other_role_count <= max_verb_other_roles_count_to_draw:
                        can_draw = True
                        break

                if can_draw:
                    tree_graph.node(interested_role_name,
                                    interested_role_name,
                                    color='red')
                    tree_graph.node(verb_name, verb_words)
                    tree_graph.edge(interested_role_name,
                                    verb_name,
                                    label=str(count))
                    drew_verbs.add(verb_words)
                    if len(drew_verbs) >= top_ranking_verbs:
                        break

        for verb_words, other_roles_count in verb_to_other_roles_mappings.items(
        ):
            if not verb_words in drew_verbs:
                continue
            verb_name = interested_role_name + '.' + verb_words
            for other_role_words, count in other_roles_count.items():
                other_role_name = verb_name + '.' + other_role_words
                if count >= min_verb_other_roles_count_to_draw and count <= max_verb_other_roles_count_to_draw:
                    tree_graph.node(other_role_name, other_role_words)
                    tree_graph.edge(verb_name,
                                    other_role_name,
                                    label=str(count))

    return tree_graph
Example #18
0
class DecisionTree(SupervisedModel):
    ''' '''
    ContinuousKeyFormat = "{0}{1:.3f}"

    def __init__(self, maxDepth=3, numberFeaturesToSplit=0):
        ''' Constructor '''
        if (maxDepth < 0):
            raise ValueError("Max depth must be at least 0")
        if (numberFeaturesToSplit < 0):
            raise ValueError("Number of features to split must be at least 1")

        super().__init__()
        self._maxDepth = maxDepth
        self._numberFeaturesToSplit = numberFeaturesToSplit
        self._rootNode = None
        self._diGraph = Digraph("G", format="png")
        self._dataFrame = None
        self._targetSeries = None

    @property
    def maxDepth(self) -> int:
        ''' '''
        return self._maxDepth

    @maxDepth.setter
    def maxDepth(self, value: int):
        ''' '''
        if (value < 0):
            raise ValueError("Max depth must be at least 0")
        self._maxDepth = value

    @property
    def numberFeaturesToSplit(self):
        ''' '''
        return self._numberFeaturesToSplit

    @numberFeaturesToSplit.setter
    def numberFeaturesToSplit(self, value):
        ''' '''
        if (value is not None and value < 0):
            raise ValueError("Number of features to split must be at least 1")
        self._numberFeaturesToSplit = value

    @property
    def depth(self) -> int:
        ''' Get the depth of the tree '''
        return self._countTreeDepth(self._rootNode)

    @property
    def numLeafNodes(self) -> int:
        ''' Get the number of leaf nodes in the tree '''
        return self._countLeafNodes(self._rootNode)

    @property
    def treeStructure(self):
        ''' '''
        return self._treeStructure(self._rootNode)

    @property
    def graph(self) -> Digraph:
        ''' Get the graph object representing this decision tree
        @return: a Digraph object from graphviz library
    '''
        self._diGraph.clear()
        self._generateGraph(self._rootNode)
        return self._diGraph

    @property
    def featureImportance(self) -> dict:
        ''' Return a dictionary of features and their associated importance values 
    '''
        featureImportances = {}
        self._calcFeatureImportance(self._rootNode, self._rootNode.sampleCount,
                                    featureImportances)

        # Convert dict to DataFrame, sorted by "Value" from great to small
        featureImportDf = pd.DataFrame(featureImportances.items(),
                                       columns=["Feature", "Value"])
        featureImportDf.sort_values("Value", inplace=True, ascending=False)
        featureImportDf.reset_index(drop=True, inplace=True)
        return featureImportDf

    @dc.elapsedTime
    def train(self, dataFrame, targetSeries, **kwargs):
        ''' '''
        self._dataFrame = dataFrame
        self._targetSeries = targetSeries

        features = [f for f in self._dataFrame.columns]
        self._rootNode = self.buildTree(features, self._dataFrame.index.values,
                                        0)

        self._dataFrame = None
        self._targetSeries = None

    def classify(self, dataFrame):
        ''' '''
        predictions = []
        probabilities = []

        for _, row in dataFrame.iterrows():
            prediction, probability = self.classifyOneSample(row)
            predictions.append(prediction)
            probabilities.append(probability)

        return pd.DataFrame(
            {
                "Prediction": predictions,
                "Probability": probabilities
            },
            index=dataFrame.index)

    def classifyOneSample(self, sample):
        ''' Wrapper method for _classifyOneSample(). This abstracts away the root node from being the required argument. '''
        return self._classifyOneSample(sample, self._rootNode)

    def buildTree(self, features, indices, depth):
        ''' '''
        subDataFrame = self._dataFrame.loc[indices]
        subTargetSeries = self._targetSeries.loc[indices]

        if (depth >= self._maxDepth):
            return self._constructLeafNode(subTargetSeries)

        # Consider a subset of features to split
        subsetFeatures = self.getRandomFeatures(features,
                                                self.numberFeaturesToSplit)
        bestFeature, value, infoGain = self.findBestFeature(
            subsetFeatures, indices)
        if (infoGain == 0 or bestFeature is None):
            return self._constructLeafNode(subTargetSeries)

        # Create decision node
        entropy = self.getEntropy(subTargetSeries)
        parentNode = DecisionNode(bestFeature, entropy, len(subDataFrame))
        partitions = None

        # Partition data depending on its feature type
        if (self.isNumericFeature(bestFeature, subDataFrame)):
            partitions = self.partitionContinuous(bestFeature, value,
                                                  subDataFrame)
            parentNode.numericValue = value  # Store the splitting value for numeric feature
        else:
            partitions = self.partitionCategorical(bestFeature, subDataFrame)

        # Remove the feature that we used to split
        newFeatures = [f for f in features if f != bestFeature]

        for splitValue, childIndices in partitions.items():
            childNode = self.buildTree(newFeatures, childIndices, depth + 1)
            parentNode[splitValue] = childNode

        return parentNode

    def findBestFeature(self, features, indices):
        '''  '''
        bestInfoGain = -sys.maxsize
        bestFeature = None
        bestFeatureValue = None

        parentDataFrame = self._dataFrame.loc[indices]
        parentTargetSeries = self._targetSeries.loc[indices]
        parentEntropy = self.getEntropy(self._targetSeries.loc[indices])
        parentCount = len(parentDataFrame)

        for feature in features:
            # For continuous features, we use different quantile values
            # to determine the best split value
            if (SupervisedModel.isNumericFeature(feature, parentDataFrame)):
                quantiles = [0.2, 0.4, 0.6, 0.8]
                quantileValues = parentDataFrame[feature].quantile(quantiles)

                for q in quantileValues:
                    childrenIndices = self.partitionContinuous(
                        feature, q, parentDataFrame).values()
                    infoGain = self.informationGain(
                        parentEntropy,
                        parentCount,
                        parentTargetSeries,
                        childrenIndices,
                    )

                    if (bestInfoGain < infoGain):
                        bestInfoGain = infoGain
                        bestFeature = feature
                        bestFeatureValue = q
            else:
                childrenIndices = self.partitionCategorical(
                    feature, parentDataFrame).values()
                infoGain = self.informationGain(parentEntropy, parentCount,
                                                parentTargetSeries,
                                                childrenIndices)

                if (bestInfoGain < infoGain):
                    bestInfoGain = infoGain
                    bestFeature = feature
                    bestFeatureValue = None

        return bestFeature, bestFeatureValue, bestInfoGain

    def printTreeStructure(self):
        ''' '''
        def _printTreeStructure(tabSpace, value):
            for k, v in value.items():
                print(tabSpace, k, sep="")
                _printTreeStructure(tabSpace + tabSpace, v)

        _printTreeStructure(" ", self.treeStructure)

    def save(self, filePath, fileFormat):
        ''' Save the graph in a file in the format specified by the parameter fileFormat (pdf, png, etc) '''
        # graphviz includes the format extension after it saves the graph,
        # we need to remove the file extension from filePath
        fileName = os.path.splitext(filePath)[0]
        savedFilePath = self.graph.render(fileName,
                                          format=fileFormat,
                                          cleanup=True)
        return os.path.abspath(savedFilePath)

    @staticmethod
    def _constructContinuousKey(greatOrLessSign, value):
        ''' '''
        if (greatOrLessSign != "<" and greatOrLessSign != ">"
                and greatOrLessSign != "<=" and greatOrLessSign != ">="):
            raise ValueError(
                "Incorrect inequality sign. Must be either: <, >, <=, or >=.")

        return DecisionTree.ContinuousKeyFormat.format(greatOrLessSign, value)

    @staticmethod
    def partitionCategorical(feature, dataFrame):
        ''' '''
        return dataFrame.groupby(feature).groups

    @staticmethod
    def partitionContinuous(feature, value, dataFrame):
        ''' '''
        leftIndices = dataFrame[dataFrame[feature] < value].index.values
        rightIndices = dataFrame[dataFrame[feature] >= value].index.values

        partitions = {}
        if (len(leftIndices) > 0):
            leftKey = DecisionTree._constructContinuousKey("<", value)
            partitions[leftKey] = leftIndices

        if (len(rightIndices) > 0):
            rightKey = DecisionTree._constructContinuousKey(">=", value)
            partitions[rightKey] = rightIndices

        return partitions

    @staticmethod
    def informationGain(parentEntropy, parentCount, targetSeries,
                        childrenIndices):
        ''' '''
        childrenEntropy = 0
        for childIndices in childrenIndices:
            probability = len(childIndices) / parentCount
            childEntropy = DecisionTree.getEntropy(targetSeries[childIndices])
            childrenEntropy += (probability * childEntropy)
        return parentEntropy - childrenEntropy

    @staticmethod
    def getEntropy(series):
        ''' '''
        seriesCount = len(series)
        if (seriesCount == 0):
            return 0

        resultEntropy = 0
        for _, count in series.value_counts().items():
            probability = count / float(seriesCount)
            if (probability > 0):
                resultEntropy += probability * math.log(probability, 2)

        # Add 0 because we may get "-0" entropy. This is for display 0 without the - sign
        return -resultEntropy + 0

    @staticmethod
    def getGiniImpurity(series):
        ''' @TODO '''
        seriesCount = float(len(series))
        if (seriesCount == 0):
            return 0

        impurity = 0
        for _, count in series.value_counts().items():
            probability = count / seriesCount
            impurity += probability * (1 - probability)

        return impurity

    @staticmethod
    def _constructLeafNode(series):
        ''' '''
        predictionCount = series.value_counts()
        bestLabel, bestLabelCount = max(predictionCount.items(),
                                        key=lambda x: x[1])
        bestProb = float(bestLabelCount) / sum(predictionCount)
        entropy = DecisionTree.getEntropy(series)
        return LeafNode(bestLabel, bestProb, entropy, len(series))

    def _generateGraph(self, node, nodeId=0):
        ''' Generate the decision tree graph. 
    
      @node: the root node of the decision tree
      @nodeId: use to help assign unique id to each node.
      @return: the most current node ID. This is only used to keep track of the most current node ID.
    '''
        if (node is None):
            return nodeId

        # If the root node is a leaf node
        if (type(node) is LeafNode):
            self._addNode(LeafNode, nodeId, str(node))
            return nodeId

        # Decision Node starts here
        self._addNode(DecisionNode, nodeId, str(node))
        childNodeId = nodeId + 1

        for featureValue, childNode in node.items():
            if (type(childNode) is LeafNode):
                self._addNode(LeafNode, childNodeId, str(childNode))
                self._addEdge(nodeId, childNodeId, featureValue)
                childNodeId += 1
            else:
                # Node ID will be updated through recursion so we need to save it simply by returning the most current node ID
                currentChildNodeId = self._generateGraph(
                    childNode, childNodeId)
                self._addEdge(nodeId, childNodeId, featureValue)
                childNodeId = currentChildNodeId

        return childNodeId

    def _addEdge(self, fromId, toId, nodeLabel):
        ''' '''
        self._diGraph.edge(str(fromId), str(toId), label=nodeLabel)

    def _addNode(self, nodeType, nodeId, nodeLabel):
        ''' '''
        if (nodeType is LeafNode):
            self._diGraph.node(str(nodeId), nodeLabel, color="red")
        elif (nodeType is DecisionNode):
            self._diGraph.node(str(nodeId), nodeLabel)
        else:
            raise ValueError("Invalid node type \"{0}\"".format(nodeType))

    @staticmethod
    def getRandomFeatures(features, numberFeatures):
        ''' Get m random features that we consider to split at each level of the tree 

      @features: list of features
      @numberFeatures: number of features that we want to use. 0 means use all features
    '''
        if (numberFeatures < 0):
            raise ValueError("numberFeatures must be greather or equal to 0")

        randomFeatures = None

        # If 0 then return the same features list OR
        # If the size of the features is <= than the number of features that
        # we want to split, then we simply return all features
        if (numberFeatures == 0 or len(features) <= numberFeatures):
            randomFeatures = features
        else:
            randomFeatures = np.random.choice(features,
                                              size=numberFeatures,
                                              replace=False)

        return randomFeatures

    def _classifyOneSample(self, sample, node):
        ''' Classify only 1 sample of data. Use recursion. '''
        if (type(node) is LeafNode):
            return node.prediction, node.probability
        else:
            sampleValue = sample[node.feature]

            # Check if this split is from continous feature
            if (node.numericValue is not None):
                # Numeric feature only has 2 branches
                key = None
                if (sampleValue < node.numericValue):
                    key = self._constructContinuousKey("<", node.numericValue)
                else:
                    key = self._constructContinuousKey(">=", node.numericValue)

                return self._classifyOneSample(sample, node[key])

            else:  # Categorical feature
                if (sampleValue not in node.keys()):
                    # Temporarily comment this message print because it will be printed a lot
                    # msg = f"Encounter unknown value {sampleValue} of feature {node.feature}. " + \
                    #      "Reason is a training node does NOT contain this value. " + \
                    #      f"The node contains these values for feature {node.feature}: {list(node.keys())}"
                    # print(msg)

                    # Since the node doesn't contain the unknown value,
                    # we take the the most voted prediction from the
                    # sibling nodes and average the probabilities
                    majorityVotes = {}

                    # Get the prediction and probability from the "siblings'" predictions
                    for v in node.values():
                        prediction, probability = self._classifyOneSample(
                            sample, v)

                        if (prediction not in majorityVotes):
                            majorityVotes[prediction] = {
                                "count": 0,
                                "avgProb": 0
                            }

                        # Update a prediction count so we know which one has the highest count
                        majorityVotes[prediction]["count"] += 1
                        majorityVotes[prediction]["avgProb"] += probability

                    # Average probability for each prediction
                    for prediction, value in majorityVotes.items():
                        value["avgProb"] /= value["count"]

                    # Get the prediction that has the highest count. If there are multiple n highest counts,
                    # then get the prediction that has a higher average probability. Else, get whichever one
                    bestLabel, countAndProb = max(
                        majorityVotes.items(),
                        key=lambda kv: (kv[1]["count"], kv[1]["avgProb"]))
                    return bestLabel, countAndProb["avgProb"]

                return self._classifyOneSample(sample, node[sampleValue])

    def _treeStructure(self, node):
        ''' '''
        if (node is None):
            return {}

        # Check if the current node only contains leaf nodes
        hasOnlyLeafNodes = len(
            [v for v in node.values() if type(v) is DecisionNode]) == 0
        if (hasOnlyLeafNodes):
            return {node.feature: {}}
        else:
            structure = {node.feature: {}}
            for i, (featureValue, childNode) in enumerate(node.items()):
                if (type(childNode) is not LeafNode):
                    childNodeFeature, childNodeChildren = list(
                        self._treeStructure(childNode).items())[0]
                    key = f"{featureValue} --> {childNodeFeature}"
                    structure[node.feature][key] = childNodeChildren
                else:
                    key = f"Leaf Node {i}"
                    structure[node.feature][key] = {}

            return structure

    def _countLeafNodes(self, node) -> int:
        ''' Helper function for counting leaf nodes 
    
      @node: root node of the decision tree node
      @return: number of leaf nodes
    '''
        if (node is None):
            return 0
        elif (type(node) is LeafNode):
            return 1
        else:
            count = 0
            for featureValue in node.keys():
                count += self._countLeafNodes(node[featureValue])
            return count

    def _countTreeDepth(self, node) -> int:
        ''' Helper function for counting the tree depth 
    
      @node: root node of the decision tree
      @return: the depth of the decision tree
    '''
        if (node is None) or (type(node) is LeafNode) or (len(node.keys())
                                                          == 0):
            return 0
        else:
            deepestDepth = 0
            for featureValue in node.keys():
                depth = 1 + self._countTreeDepth(node[featureValue])
                if (deepestDepth < depth):
                    deepestDepth = depth
            return deepestDepth

    @staticmethod
    def _calcFeatureImportance(node, totalSampleCount, featureImportances):
        ''' Compute the feature importance of the given node and its descendant nodes recursively 

        Feature importance is calculated by 
          (currentNode.sampleCount / totalSampleCount) 
    '''
        if (isinstance(node, DecisionNode)):
            # Compute the importance value for the current node
            childrenImpurity = 0
            for childNode in node.values():
                if (isinstance(childNode, DecisionNode)):
                    childrenImpurity += childNode.entropy * (
                        childNode.sampleCount / node.sampleCount)
            importance = (node.sampleCount /
                          totalSampleCount) * (node.entropy - childrenImpurity)

            # Store it in the dictionary
            if (node.feature not in featureImportances):
                featureImportances[node.feature] = 0
            featureImportances[node.feature] += importance

            # Recursively compute the descendant nodes' importance value
            for childNode in node.values():
                DecisionTree._calcFeatureImportance(childNode,
                                                    totalSampleCount,
                                                    featureImportances)

    def __repr__(self):
        ''' '''
        s = "Decision Tree | Depth={0} | Number of Leaf Nodes={1} | Number of features to split={2}"
        return s.format(self.depth, self.numLeafNodes,
                        self.numberFeaturesToSplit)
def tree(relations):
    role_to_relations_of_interest_mappings = get_role_to_relations_of_interest_mappings(
        role_of_interest, relations)

    tree_graph = Digraph(format='png')
    tree_graph.clear()
    tree_graph.attr(rankdir='LR')
    for interested_role_name, relations_of_interest in role_to_relations_of_interest_mappings.items(
    ):

        print(
            UseStyle("add interested node: " + interested_role_name,
                     fore='green'))
        verb_counts = dict()
        verb_to_other_roles_mappings = dict()

        other_labels = set()  # record ARG-TMP and other labels
        for relation in relations_of_interest:
            other_roles = list()
            for role in relation:
                if role.label == 'V':
                    verb_role = role
                elif role.label == 'ARG1':
                    other_roles.append(role.words)
                else:
                    other_labels.add(role.label)

            if not verb_role.words in verb_counts:
                verb_counts[verb_role.words] = 1
            else:
                verb_counts[verb_role.words] += 1
            if not verb_role.words in verb_to_other_roles_mappings:
                verb_to_other_roles_mappings[verb_role.words] = dict()
            for other_role_words in other_roles:
                if not other_role_words in verb_to_other_roles_mappings[
                        verb_role.words]:
                    verb_to_other_roles_mappings[
                        verb_role.words][other_role_words] = 1
                else:
                    verb_to_other_roles_mappings[
                        verb_role.words][other_role_words] += 1

        sorted_verb_counts = sorted(verb_counts.items(),
                                    key=lambda kv: kv[1],
                                    reverse=True)

        drew_verbs = set()
        for (verb_words, count) in sorted_verb_counts:
            verb_name = interested_role_name + '.' + verb_words
            if count >= min_verb_count_to_draw and count <= max_verb_count_to_draw:
                can_draw = False

                for other_role_words, other_role_count in verb_to_other_roles_mappings[
                        verb_words].items():
                    if other_role_count >= min_verb_other_roles_count_to_draw and other_role_count <= max_verb_other_roles_count_to_draw:
                        can_draw = True
                        break

                if can_draw:
                    tree_graph.node(interested_role_name,
                                    interested_role_name,
                                    color='red')
                    tree_graph.node(verb_name, verb_words)
                    tree_graph.edge(interested_role_name,
                                    verb_name,
                                    label=str(count))
                    drew_verbs.add(verb_words)
                    if len(drew_verbs) >= top_ranking_verbs:
                        break

        for verb_words, other_roles_count in verb_to_other_roles_mappings.items(
        ):
            if not verb_words in drew_verbs:
                continue
            verb_name = interested_role_name + '.' + verb_words
            for other_role_words, count in other_roles_count.items():
                other_role_name = verb_name + '.' + other_role_words
                if count >= min_verb_other_roles_count_to_draw and count <= max_verb_other_roles_count_to_draw:
                    tree_graph.node(other_role_name, other_role_words)
                    tree_graph.edge(verb_name,
                                    other_role_name,
                                    label=str(count))

    return tree_graph
Example #20
0
class Automata:
    #Variable estatica de la clase para que los nodos sigan una secuencia
    nxtNode = 0

    #Si existe un path, se crea basado en el archivo
    def __init__(self, exp, path=None):
        #Inicializacion de variables
        self.exp = exp
        self.G = Digraph()
        self.estados = {}  # Enteros
        self.alf = set()
        #Caso, crear de archivo
        if path:
            #La primer linea del archivo es el alfabeto
            f = open(path, "r").readlines()
            self.alf = f[0].split()
            # El estado inicial esta en la linea 2 en la segunda posicion
            self.inicial = f[1].split()[0]
            #las lineas de 1 en adelante son los estados y transicones
            for linea in f[1:]:
                linea = linea.split()
                #El ultimo elemento de la linea es el token; numero entero positivo, si es un -, entonces no es final y no le corresponde un token
                terminal = linea[-1] != '-1'
                # El segundo es el nombre del estado o nodo
                nodeName = linea[0]
                #Crea el estado
                self.estados[nodeName] = Estado(terminal)
                for i in range(len(self.alf)):
                    if linea[i + 1] != '-1':
                        simb = self.alf[i]
                        fin = 'S' + linea[i + 1]
                        self.estados[nodeName].addTransicion(simb, simb, fin)
            return
        #Creado desde la expresion
        self.inicial = Automata.nxtNode
        self.final = Automata.nxtNode + 1
        self.estados[self.inicial] = Estado(False)
        self.estados[self.final] = Estado(True)
        Automata.nxtNode += 2
        #Caso basico
        if len(exp) == 1:
            self.alf = {exp}
        else:
            #Rangos
            if exp.count('-'):
                inicio, fin = [ord(x) for x in exp.split('-')]
                for simb in range(inicio, fin + 1):
                    if simb in range(inicio, fin + 1) and chr(simb).isalnum():
                        self.alf.add(chr(simb))
            #Separado por comas
            else:
                self.alf = set(exp.split(','))
        self.estados[self.inicial].addTransicion(exp, self.alf, self.final)

    #Funcion para imprimir los estados y transiciones del automata
    def print(self):
        for origen, destino in self.estados.items():
            print('Origen: ', origen)
            for exp, trns in destino.transiciones.items():
                print('\t',
                      exp,
                      ':= {',
                      ','.join(trns.simbolos),
                      '} ->',
                      trns.destinos,
                      sep='')

    #Funcion para crear la imagen dado el nombre del archivo. Se guarda en images
    def plot(self, path):
        self.G.clear()
        #Esto se cambia para cambiar el tamaño de la imagen
        self.G.attr(ratio='fill',
                    size='3.8,2.77',
                    dpi='300',
                    rank='same',
                    rankdir='LR')
        self.G.edge('S', str(self.inicial))
        for origin, dest in self.estados.items():
            if self.estados[origin].final:
                self.G.node(str(origin), shape='doublecircle')
            for exp, trns in dest.transiciones.items():
                for dest in trns.destinos:
                    self.G.edge(str(origin), str(dest), label=exp)
        self.G.node(
            'S',
            label=None,
            shape='point',
        )
        self.G.render(filename=path,
                      view=False,
                      directory='images',
                      cleanup=True,
                      format='png')

    # Regresa los estados alcanzables por transiciones epsilon desde cualquier estado en edos
    def cEpsilon(self, edos):
        res = []
        i = 0
        while i != len(edos):
            edo = self.estados[edos[i]]
            if EPS in edo.transiciones.keys():
                for d in edo.transiciones[EPS].destinos:
                    if not d in edos:
                        edos.append(d)
                        res.append(d)
            res.append(edos[i])
            i += 1
        return res

    def moverA(self, edos, s):  # edos debe ser un set o lista
        stack = []
        result = set()
        stack = list(edos)
        for edo in stack:
            if edo in self.estados.keys():
                for tr in self.estados[edo].transiciones.values():
                    if s in tr.simbolos:
                        result = result.union(set(tr.destinos))
        return list(result)

    def irA(self, edos, s):
        return self.cEpsilon(self.moverA(edos, s))

    def pertenece(self, sigma):
        edos = [self.inicial]
        for s in sigma:
            edos = self.irA(self.cEpsilon(edos), s)
            if (len(edos) == 0):
                return False
        if isinstance(self.final, int):
            if self.final in edos:
                return True
        else:
            if len(set(self.final).intersection(edos)):
                return True
        return False

    def opcional(self):  # ε
        # Se crean los nuevos estados iniciales y finales
        self.exp = '(' + self.exp + ')' '+eps'
        nInicial = Automata.nxtNode
        nFinal = Automata.nxtNode + 1
        Automata.nxtNode += 2
        self.estados[nInicial] = Estado(False)
        self.estados[nFinal] = Estado(True)
        # El nuevo inicial apunta al inicial original y al nuevo final
        self.estados[nInicial].addEpsTrans(self.inicial)
        self.estados[nInicial].addEpsTrans(nFinal)
        self.estados[self.final].addEpsTrans(nFinal)
        self.estados[self.final].final = False
        # Se actualizan los estados iniciales y finales
        self.inicial = nInicial
        self.final = nFinal

    def concat(self, f2):
        self.exp = '(' + self.exp + ')' + f2.exp
        # Se copian todos los estados con sus transiciones
        self.estados.update(f2.estados)
        # Se copian el alfabeto
        self.alf = self.alf.union(f2.alf)
        # Los concatena y se elimina el estado sobrante de f2
        self.estados.pop(f2.inicial, None)
        self.estados[self.final].transiciones.update(
            f2.estados[f2.inicial].transiciones)
        # Cambia los estados finales e iniciales
        self.estados[self.final].final = False
        self.final = f2.final

    def unirM(self, automatas):
        self.exp = ''
        finales = [self.final]
        #Crea el nuevo estado inicial
        nInicial = Automata.nxtNode
        Automata.nxtNode += 1
        self.estados[nInicial] = Estado(False)
        self.estados[nInicial].addEpsTrans(self.inicial)
        self.inicial = nInicial
        #Copia transiciones
        for a in automatas:
            self.exp += a.exp
            #Compia los simbolos de todos los automatas
            self.alf = self.alf.union(a.alf)
            #Une el nuevo inicial a todos los otros iniciales
            self.estados[nInicial].addEpsTrans(a.inicial)
            #Copia los estados y transicione
            self.estados[nInicial]
            self.estados.update(a.estados)
            finales.append(a.final)
        self.final = finales

    def unir(self, f2):
        # Se actualiza la expresion
        self.exp = '(' + self.exp + ')' + '+' + f2.exp
        # Se copian todos los estados con sus transiciones
        self.estados.update(f2.estados)
        # Se copian el alfabeto
        self.alf = self.alf.union(f2.alf)
        # Se crean los nuevos estados iniciales y finales
        nInicial = Automata.nxtNode
        nFinal = Automata.nxtNode + 1
        Automata.nxtNode += 2
        self.estados[nInicial] = Estado(False)
        self.estados[nFinal] = Estado(True)
        # Se unen a los dos automatas con los nuevso estados
        self.estados[nInicial].addEpsTrans(self.inicial)
        self.estados[nInicial].addEpsTrans(f2.inicial)
        self.estados[self.final].addEpsTrans(nFinal)
        self.estados[f2.final].addEpsTrans(nFinal)
        # Cambia los estados finales e iniciales
        self.estados[f2.final].final = False
        self.estados[self.final].final = False
        # se actualizan los estados finales e inciales
        self.inicial = nInicial
        self.final = nFinal

    def cerradura_positiva(self):
        # Se crean los nuevos estados iniciales y finales
        self.exp = '(' + self.exp + ')^+'
        nInicial = Automata.nxtNode
        nFinal = Automata.nxtNode + 1
        Automata.nxtNode += 2
        self.estados[nInicial] = Estado(False)
        self.estados[nFinal] = Estado(True)
        # El nuevo inicial apunta al inicial original y el final original apunta al inicial original
        self.estados[nInicial].addEpsTrans(self.inicial)
        self.estados[self.final].addEpsTrans(self.inicial)
        self.estados[self.final].addEpsTrans(nFinal)
        self.estados[self.final].final = False
        # Se actualizan los estados iniciales y finales
        self.inicial = nInicial
        self.final = nFinal

    def cerradura_kleene(self):
        self.exp += '^k'
        # Se crean los nuevos estados iniciales y finales
        nInicial = Automata.nxtNode
        nFinal = Automata.nxtNode + 1
        Automata.nxtNode += 2
        self.estados[nInicial] = Estado(False)
        self.estados[nFinal] = Estado(True)
        # El nuevo inicial apunta al inicial original y al nuevo final, y el final original apunta al inicial original
        self.estados[nInicial].addEpsTrans(self.inicial)
        self.estados[nInicial].addEpsTrans(nFinal)
        self.estados[self.final].addEpsTrans(self.inicial)
        self.estados[self.final].addEpsTrans(nFinal)
        self.estados[self.final].final = False
        # Se actualizan los estados iniciales y finales
        self.inicial = nInicial
        self.final = nFinal

    def conversion_A_Archivo(self, path):
        #Checa si solo hay un estado final
        if isinstance(self.final, int):
            self.final = {self.final}
        with open(path, "w") as file:
            #Inicializa la tabla y el indice para recorrerla
            S = [self.cEpsilon([self.inicial])]
            currS = 0
            #Imprime el alfabeto primero
            file.writelines(' '.join(self.alf) + '\n')
            #Mientras no haya llegado al ultimo estado
            while currS != len(S):
                #Guarda el nuevo estado
                si = S[currS]
                #Se imprime el nombre del nuevo estado
                file.write('S' + str(currS) + ' ')
                #Sj es el resultado de irA de si con
                #cada simbolo del alfabeto
                for simb in self.alf:
                    sj = self.irA(si, simb)
                    #Se guarda sj en caso de que no este y despues se
                    #imprime el indice respectivo
                    if len(sj):
                        if not sj in S:
                            S.append(sj)
                        file.write(str(S.index(sj)) + ' ')
                    else:  #Si no, si no tiene transicion a sj
                        file.writelines('-1 ')
                if set(si).intersection(self.final):
                    file.writelines(str((currS + 1) * 10) + '\n')
                else:
                    file.write('-1\n')
                currS += 1
class DerivaCatalogToGraph:
    def __init__(self, catalog, engine='dot'):
        self.graph = Digraph(
            engine=engine,
            format='pdf',
            edge_attr=None,
            strict=True,
        )

        self.catalog = catalog
        self._model = catalog.getCatalogModel()
        self._chaise_base = "https://{}/chaise/recordset/#{}/".format(
            urlparse(catalog.get_server_uri()).netloc, self.catalog.catalog_id)

        self.graph.attr('graph', rankdir='LR')
        self.graph.attr('graph', overlap='false', splines='true')
        #self.graph.attr('graph', concentrate=True)

    def clear(self):
        self.graph.clear()

    def view(self):
        self.graph.view()

    def catalog_to_graph(self, schemas=None, skip_terms=False, skip_association_tables=False):
        """
        Convert a catalog to a DOT based graph.
        :param schemas:  List of schemas that should be included.  Use whole catalog if None.
        :param skip_terms: Do not include term tables in the graph
        :param skip_association_tables: Collapse association tables so that only edges between endpoints are used
        :return:
        """

        schemas = [s.name for s in self._model.schemas.values() if s.name not in ['_acl_admin', 'public', 'WWW']] \
            if schemas is None else schemas

        for schema in schemas:
            self.schema_to_graph(schema, skip_terms=skip_terms, schemas=schemas,
                                 skip_association_tables=skip_association_tables)

    def schema_to_graph(self, schema_name, schemas=[], skip_terms=False, skip_association_tables=False):
        """
        Create a graph for the specified schema.
        :param schema_name: Name of the schema in the model to be used.
        :param schemas: List of additional schemas to include in the graph.
        :param skip_terms:
        :param skip_association_tables:
        :return:
        """

        schema = self._model.schemas[schema_name]

        # Put nodes for each schema in a seperate subgraph.
        with self.graph.subgraph(name='cluster_' + schema_name, node_attr={'shape': 'box'}) as schema_graph:
            schema_graph.attr(style='invis')
            for table in schema.tables.values():
                node_name = '{}_{}'.format(schema_name, table.name)
                if DerivaCatalogToGraph._is_vocabulary_table(table):
                    if not skip_terms:
                        schema_graph.node(node_name, label='{}:{}'.format(schema_name, table.name),
                                          shape='ellipse',
                                          URL=self._chaise_uri(table))
                else:
                    # Skip over current table if it is a association table and option is set.
                    if not (table.is_association() and skip_association_tables):
                        schema_graph.node(node_name, label='{}:{}'.format(schema_name, table.name),
                                          shape='box',
                                          URL=self._chaise_uri(table))
                    else:
                        print('Skipping node', node_name)

        # We have all the nodes out now, so run over and add edges.
        for table in schema.tables.values():
            self.foreign_key_defs_to_graph(table,
                                           skip_terms=skip_terms,
                                           schemas=schemas,
                                           skip_association_tables=skip_association_tables)
        return

    def foreign_key_defs_to_graph(self, table, skip_terms=False, skip_association_tables=False, schemas=[]):
        """
        Add edges for each foreign key relationship in the specified table.
        :param table:
        :param skip_terms:
        :param skip_association_tables:
        :param skip_schemas:
        :return:
        """

        # If table is an association table, put in a edge between the two endpoints in the relation.
        if table.is_association() == 2 and skip_association_tables:
            t1 = table.foreign_keys[0].referenced_columns[0].table
            t2 = table.foreign_keys[1].referenced_columns[0].table
            t1_name = '{}_{}'.format(t1.schema.name, t1.name)
            t2_name = '{}_{}'.format(t2.schema.name, t2.name)
            self.graph.edge(t1_name, t2_name, dir='both', color='gray')
        else:
            for fkey in table.foreign_keys:
                referenced_table = list(fkey.column_map.values())[0].table
                table_name = '{}_{}'.format(referenced_table.schema.name, referenced_table.name)

                # If the target is a schema we are skipping, do not add an edge.
                if (referenced_table.schema.name not in schemas or table.schema.name not in schemas):
                    continue
                # If the target is a term table, and we are not including terms, do not add an edge.
                if DerivaCatalogToGraph._is_vocabulary_table(referenced_table) and skip_terms:
                    continue

                # Add an edge from the current node to the target table.
                self.graph.edge('{}_{}'.format(table.schema.name, table.name), table_name)

        return

    def save(self, filename=None, format='pdf', view=False):
        (dir, file) = os.path.split(os.path.abspath(filename))
        if 'gv' in format:
            self.graph.save(filename=file, directory=dir)
        else:
            print('dumping graph in file', file, format)
            self.graph.render(filename=file, directory=dir, view=view, cleanup=True, format=format)

    def _repr_svg_(self):
        return self.graph._repr_svg_()

    @staticmethod
    def _is_vocabulary_table(t):
        if t.schema.name.lower() in 'vocabulary':
            return True
        try:
            return t.columns['ID'] and t.columns['Name'] and t.columns['URI'] and t.columns['Synonyms']
        except KeyError:
            return False

    def _chaise_uri(self, table):
        return self._chaise_base + "{}:{}".format(table.schema.name, table.name)
Example #22
0
specdot.view()


#### NOTE: Doesn't do outputs yet!

sys.exit("Stopping early")

for idx, edge in dfi2.iterrows():
    #print edge['process_id'], edge['input_process_id']
    if edge['input_process_id'] == process or edge['process_id'] == process:
        specdot.edge(edge['input_process_id'],edge['process_id'],label=edge['instgen'])

    print(specdot.source)
    specdot.render("".join(['./',process,'_processflow.gv']),view=True)
    specdot.clear()



### Process connection graph, specific process
dfi2 = dfi.drop_duplicates(['process_id', 'input_process_id','instgen'])
specdot = Digraph(comment="Process Flow")
specdot.attr('Node',shape='box')

for process in pd.unique(dfi['process_id']):
    for idx, edge in dfi2.iterrows():
        #print edge['process_id'], edge['input_process_id']
        if edge['input_process_id'] == process or edge['process_id'] == process:
            specdot.edge(edge['input_process_id'],edge['process_id'],label=edge['instgen'])

    print(specdot.source)
Example #23
0
class ros_mask_rcnn:
    def __init__(self):

        # Load model
        config = InferenceConfig()
        config.display()

        self.model = modellib.MaskRCNN(mode="inference",
                                       model_dir=LOG_DIR,
                                       config=config)

        self.model.load_weights(MODEL_PATH, by_name=True)
        self.dot = Digraph(comment='warehouse', format='svg')

        # Set topics
        self.bridge = CvBridge()
        self.check = False
        self.to_display = True

        self.to_display = rospy.get_param('/mrcnn/display_results', False)

        #option = input("Do you want to display the inference result and scene graph? (yes/no): ")
        #if option.lower() != 'yes':
        #    self.to_display = False

        # Use ApproximateTimeSynchronizer if depth and rgb camera doesn't havse same timestamp, otherwise use Time Synchronizer if both cameras have same timestamp.
        self.image_sub = message_filters.Subscriber(
            '/turtlebot2i/camera/rgb/raw_image', Image)
        self.image_depth_sub = message_filters.Subscriber(
            '/turtlebot2i/camera/depth/raw_image', Image)
        #self.ts = message_filters.TimeSynchronizer([self.image_sub, self.image_depth_sub], queue_size=1)
        self.ts = message_filters.ApproximateTimeSynchronizer(
            [self.image_sub, self.image_depth_sub], 10, 0.1)
        self.ts.registerCallback(self.callback)

        self.image_pub = rospy.Publisher("/turtlebot2i/mrcnn_out",
                                         Image,
                                         queue_size=1)
        self.scenegraph_pub = rospy.Publisher('/turtlebot2i/scene_graph',
                                              SceneGraph,
                                              queue_size=10)
        self.time_start_list = []
        self.time_sg_end_list = []
        self.time_all_end_list = []

    def get_overlap_bbox(self, rec1, rec2):
        #y1, x1, y2, x2 = boxes
        y1min, x1min, y1max, x1max = rec1[0], rec1[1], rec1[2], rec1[3]
        y2min, x2min, y2max, x2max = rec2[0], rec2[1], rec2[2], rec2[3]
        # s.view()

        box1 = box(x1min, y1min, x1max, y1max)
        box2 = box(x2min, y2min, x2max, y2max)
        isOverlapping = box1.intersects(box2)
        intersection_area = box1.intersection(box2).area / box1.area * 100
        #print (pol_overl, intersection_area)
        return isOverlapping, intersection_area

        #isOverlapping = (i[1] < j[3] and j[1] < i[3] and i[0] < j[2] and j[0] < i[2])
        #isOverlapping = (x1min < x2max and x2min < x1max and y1min < y2max and y2min < y1max)
        #print (isOverlapping)
        #return isOverlapping

    def get_type(self, i):
        if re.match(r'Wall', i):
            obj_type = 3  #wall
        elif re.match(r'Human', i):
            obj_type = 2  #human
        elif re.match(r'robot', i):
            obj_type = 1  # robot # non-human dynamic objects
        else:
            obj_type = 0  # static objects
        return obj_type

    def callback(self, image, depth_image):

        try:
            self.time_start_list.append(time.time())
            farClippingPlane = 3.5
            nearClippingPlane = 0.0099999
            cv_depth_image = self.bridge.imgmsg_to_cv2(depth_image,
                                                       "passthrough")
            cv_depth_image = cv2.flip(cv_depth_image, 0)

            #print ("Depth Image size: ", cv_depth_image.shape)
            #print ('min', min(cv_depth_image))
            #cv2.imshow("depth image", cv_depth_image)
            #cv2.waitKey(0)

            cv_depth_image = nearClippingPlane + (
                cv_depth_image * (farClippingPlane - nearClippingPlane))
            #print ("Depth Image size: ", cv_depth_image.shape)
            if self.check == True:
                self.dot.clear()
            end = time.time()
            cv_image = self.bridge.imgmsg_to_cv2(image, "rgb8")
            results = self.model.detect([cv_image], verbose=1)

            r = results[0]
            #if self.to_display == True:
            img_out = display_instances(cv_image,
                                        r['rois'],
                                        r['masks'],
                                        r['class_ids'],
                                        class_names,
                                        r['scores'],
                                        show_window=self.to_display)

            #if len(r['class_ids']) > 0:

            count_objects = [0] * len(class_names)
            detected_objects = []
            distances_from_mask = []
            cropped_roi_distances = []

            for i in range(len(r['class_ids'])):
                detected_objects.append(class_names[r['class_ids'][i]] + '#' +
                                        str(count_objects[r['class_ids'][i]]))
                count_objects[r['class_ids'][i]] += 1
                print('Object : ', r['class_ids'][i], detected_objects[i],
                      r['rois'][i])

            self.dot.node_attr['shape'] = 'record'
            #robot_velocity = get_velocity(robot_list[robot_num])
            #robot_label = '{%s|%s|velocity: %.2f}'%(robot_list[robot_num].name, robot_list[robot_num].vision_sensor.name, robot_velocity)
            robot_label = "turtlebot2i"

            self.dot.node('robot', label=robot_label)
            self.dot.node('warehouse', label='warehouse')
            self.dot.node('floor', label='{floor|size: 25*25}')
            self.dot.edge('warehouse', 'floor')

            scene_dot = Digraph(comment='warehouse', format='svg')
            scene_dot.node_attr['shape'] = 'record'
            scene_dot.node('robot', label=robot_label)
            scene_dot.node('warehouse', label='warehouse')
            scene_dot.node('floor', label='{floor|size: 25*25}')
            scene_dot.edge('warehouse', 'floor')

            for i in range(len(r['class_ids'])):
                #_id = r['class_ids'][i]
                node_label = detected_objects[i]
                direction = 0
                y1min, x1min, y1max, x1max = r['rois'][i][0], r['rois'][i][
                    1], r['rois'][i][2], r['rois'][i][3]
                distances_from_mask.append(cv_depth_image[r['masks'][:, :, i]])
                min_distance = min(distances_from_mask[i])
                #min_index = distances_from_mask[i].index(min(min_distance))
                #min_indices = [i for i, x in enumerate(distances_from_mask[i]) if x == min_distance]

                min_indices = np.where(
                    np.array(distances_from_mask[i]) == min_distance)[0]
                #print ('Min Index : ', min_indices[0], ' Min distance: ', min_distance)

                #print ('Mask Shape: ',r['masks'][:,:,i].shape)
                #print ('Mask Shape: ',r['masks'][:,:,i])

                cropped_roi_distances.append(cv_depth_image[y1min:y1max,
                                                            x1min:x1max])

                if re.match(r'Wall*', detected_objects[i]):
                    self.dot.node(detected_objects[i], label=node_label)
                    self.dot.edge('warehouse', detected_objects[i], label='on')

                    node_label_scene_graph = '{%s|type: %s|distance: %.2f|orientation: %.2f|direction: %.2f|velocity: %.2f|size_x: %.2f|size_y: %.2f}' % (
                        detected_objects[i], self.get_type(
                            detected_objects[i]), min(
                                distances_from_mask[i]), 0, 0, 0, 1, 1)
                    scene_dot.node(detected_objects[i],
                                   label=node_label_scene_graph)
                    scene_dot.edge('warehouse',
                                   detected_objects[i],
                                   label='on')

                elif re.match(r'Product*', detected_objects[i]):
                    overlapping_check = False
                    intersection_area = 0.0
                    for j in range(len(r['class_ids'])):
                        if j != i:
                            isOverlapping, intersection_area = self.get_overlap_bbox(
                                r['rois'][i], r['rois'][j])
                            #print ('Comparing :',detected_objects[i],' => ', detected_objects[j], ' Result: ', isOverlapping, ' Intersection Area: ', intersection_area)

                            if isOverlapping and intersection_area > 25.0:
                                #print ("distances_from_mask : ", distances_from_mask[i].shape, 'Min: ', min(distances_from_mask[i]), 'Max: ', max(distances_from_mask[i]), 'Mean: ', np.mean(np.array(distances_from_mask[i])))
                                node_label = "%s|{Distance|Min: %.2f|Max: %.2f|Mean: %.2f}|intersection area: %.2f" % (
                                    detected_objects[i],
                                    min(distances_from_mask[i]),
                                    max(distances_from_mask[i]),
                                    np.mean(np.array(distances_from_mask[i])),
                                    intersection_area)
                                self.dot.node(detected_objects[i],
                                              label=node_label)
                                self.dot.edge(detected_objects[j],
                                              detected_objects[i],
                                              label='on')
                                overlapping_check = True
                                node_label_scene_graph = '{%s|type: %s|distance: %.2f|orientation: %.2f|direction: %.2f|velocity: %.2f|size_x: %.2f|size_y: %.2f}' % (
                                    detected_objects[i],
                                    self.get_type(detected_objects[i]),
                                    min(distances_from_mask[i]), 0, 0, 0, 1, 1)
                                scene_dot.node(detected_objects[i],
                                               label=node_label_scene_graph)
                                scene_dot.edge(detected_objects[j],
                                               detected_objects[i],
                                               label='on')

                                break
                    if overlapping_check == False:
                        node_label = "%s|{Distance|Min: %.2f|Max: %.2f|Mean: %.2f}|intersection area: %.2f" % (
                            detected_objects[i], min(distances_from_mask[i]),
                            max(distances_from_mask[i]),
                            np.mean(np.array(
                                distances_from_mask[i])), intersection_area)
                        self.dot.node(detected_objects[i], label=node_label)
                        self.dot.edge('floor', detected_objects[i], label='on')

                        node_label_scene_graph = '{%s|type: %s|distance: %.2f|orientation: %.2f|direction: %.2f|velocity: %.2f|size_x: %.2f|size_y: %.2f}' % (
                            detected_objects[i],
                            self.get_type(detected_objects[i]),
                            min(distances_from_mask[i]), 0, 0, 0, 1, 1)
                        scene_dot.node(detected_objects[i],
                                       label=node_label_scene_graph)
                        scene_dot.edge('floor',
                                       detected_objects[i],
                                       label='on')
                else:
                    node_label = "%s|{Distance|Min: %.2f|Max: %.2f|Mean: %.2f}" % (
                        detected_objects[i], min(distances_from_mask[i]),
                        max(distances_from_mask[i]),
                        np.mean(np.array(distances_from_mask[i])))
                    self.dot.node(detected_objects[i], label=node_label)
                    self.dot.edge('floor', detected_objects[i], label='on')

                    node_label_scene_graph = '{%s|type: %s|distance: %.2f|orientation: %.2f|direction: %.2f|velocity: %.2f|size_x: %.2f|size_y: %.2f}' % (
                        detected_objects[i], self.get_type(
                            detected_objects[i]), min(
                                distances_from_mask[i]), 0, 0, 0, 1, 1)
                    scene_dot.node(detected_objects[i],
                                   label=node_label_scene_graph)
                    scene_dot.edge('floor', detected_objects[i], label='on')
                    #cv2.imshow(node_label, cv_depth_image[y1min:y1max, x1min:x1max])
                    #cv2.waitKey(0)

            #cv2.imshow('cv_depth_image', cv_depth_image)
            #cv2.waitKey(0)
            # s = Source(dot, filename="scene_graph", format="png")
            if self.to_display == True:
                self.dot.render('scene_graph.gv', view=not self.check)

            if self.check == False:
                # s.view()
                self.check = True

            sg_message = SceneGraph()
            sg_message.header = std_msgs.msg.Header()
            sg_message.header.stamp = rospy.Time.now()
            sg_message.sg_data = scene_dot.source
            print('Time taken to decribe: ', time.time() - end)

            self.scenegraph_pub.publish(sg_message)
            self.image_pub.publish(self.bridge.cv2_to_imgmsg(img_out, "bgr8"))

            self.time_sg_end_list.append(time.time())
            last_duration = self.time_sg_end_list[-1] - self.time_start_list[-1]
            print("ROS MRCNN last duration:", last_duration)

        except CvBridgeError as e:
            print(e)
Example #24
0
 def show(self, file_name):
     d = Digraph(filename=file_name, directory="./pdf_data")
     d.clear()
     for node in self.topology_node_list:
         node.print_node(father_nodes=node.fathers, d=d)
     d.view()