Ejemplo n.º 1
0
    def __init__(self):
        # Can manually select the device too
        self.device = torch.device(
            'cuda:0' if torch.cuda.is_available() else 'cpu')

        self.model_path = 'model_save/' + 'model_name'

        self.num_epochs = get_model_attribute(
            'epoch', self.model_path, self.device)

        # Whether to generate networkx format graphs for real datasets
        self.generate_graphs = True

        self.count = 2560
        self.batch_size = 32  # Must be a factor of count

        self.metric_eval_batch_size = 256

        # Specific DFScodeRNN
        self.max_num_edges = 50

        # Specific to GraphRNN
        self.min_num_node = 0
        self.max_num_node = 40

        self.train_args = get_model_attribute(
            'saved_args', self.model_path, self.device)

        self.graphs_save_path = 'graphs/'
        self.current_graphs_save_path = self.graphs_save_path + self.train_args.fname + '_' + \
            self.train_args.time + '/' + str(self.num_epochs) + '/'
Ejemplo n.º 2
0
def train(args, dataloader_train, model, feature_map, dataloader_validate=None):
    # initialize optimizer
    optimizer = {}
    for name, net in model.items():
        optimizer['optimizer_' + name] = optim.Adam(
            list(net.parameters()), lr=args.lr, weight_decay=5e-5)

    scheduler = {}
    for name, net in model.items():
        optimizer['scheduler_' + name] = MultiStepLR(
            optimizer['optimizer_' + name], milestones=args.milestones, gamma=args.gamma)

    if args.load_model:
        load_model(args.load_model_path, args.device, model, optimizer, scheduler)
        print('Model loaded')

        epoch = get_model_attribute('epoch', args.load_model_path, args.device)
    else:
        epoch = 0
    
    while epoch < args.epochs:
        loss = train_epoch(epoch, args, model, dataloader_train, optimizer, scheduler, feature_map)
        epoch += 1

        # logging
        if args.log_tensorboard:
            log_value('train_loss ' + args.fname, loss, epoch)
        else:
            print('Epoch: {}/{}, train loss: {:.6f}'.format(epoch, args.epochs, loss))

        # save model checkpoint
        if args.save_model and epoch != 0 and epoch % args.epochs_save == 0:
            save_model(epoch, args, model, optimizer, scheduler, feature_map=feature_map)
            print('Model Saved - Epoch: {}/{}, train loss: {:.6f}'.format(epoch, args.epochs, loss))
        
        if dataloader_validate is not None and epoch % args.epochs_validate == 0:
            loss_validate = test_data(args, model, dataloader_validate, feature_map)
            if args.log_tensorboard:
                log_value('validate_loss ' + args.fname, loss_validate, epoch)
            else:
                print('Epoch: {}/{}, validation loss: {:.6f}'.format(epoch, args.epochs, loss_validate))

    save_model(epoch, args, model, optimizer, scheduler, feature_map=feature_map)
    print('Model Saved - Epoch: {}/{}, train loss: {:.6f}'.format(epoch, args.epochs, loss))
Ejemplo n.º 3
0
    def update_args(self):
        if self.load_model:
            args = get_model_attribute(
                'saved_args', self.load_model_path, self.load_device)
            args.device = self.load_device
            args.load_model = True
            args.load_model_path = self.load_model_path
            args.epochs = self.epochs_end

            args.clean_tensorboard = False
            args.clean_temp = False

            args.produce_graphs = False
            args.produce_min_dfscodes = False
            args.produce_min_dfscode_tensors = False

            return args

        return self
Ejemplo n.º 4
0
def predict_graphs(eval_args):
    train_args = eval_args.train_args
    feature_map = get_model_attribute('feature_map', eval_args.model_path,
                                      eval_args.device)
    train_args.device = eval_args.device

    model = create_model(train_args, feature_map)
    load_model(eval_args.model_path, eval_args.device, model)

    for _, net in model.items():
        net.eval()

    graphs = []

    for _ in range(eval_args.count // eval_args.batch_size):
        sampled_graphs = model['generator'](eval_args.batch_size,
                                            training=False)

        nb = feature_map['node_backward']
        eb = feature_map['edge_backward']
        for sampled_graph in sampled_graphs:
            graph = sampled_graph.to_networkx(node_attrs=['label'],
                                              edge_attrs=['label'
                                                          ]).to_undirected()

            labeled_graph = nx.Graph()

            for v in graph.nodes():
                labeled_graph.add_node(
                    v, label=nb[graph.nodes[v]['label'].item() - 1])

            for u, v in graph.edges():
                labeled_graph.add_edge(
                    u, v, label=eb[graph.edges[u, v]['label'].item()])

            # Take maximum connected component
            if len(labeled_graph.nodes()) > 0:
                max_comp = max(nx.connected_components(labeled_graph), key=len)
                labeled_graph = labeled_graph.subgraph(max_comp)

            graphs.append(labeled_graph)

    return graphs
Ejemplo n.º 5
0
def train(args, dataloader_train, model, feature_map, dataloader_validate=None):
    # initialize optimizer
    optimizer = {}
    for name, net in model.items():
        optimizer['optimizer_' + name] = optim.Adam(
            filter(lambda p: p.requires_grad, net.parameters()), lr=args.lr,
            weight_decay=5e-5)

    scheduler = {}
    for name, net in model.items():
        scheduler['scheduler_' + name] = MultiStepLR(
            optimizer['optimizer_' + name], milestones=args.milestones,
            gamma=args.gamma)

    if args.load_model:
        load_model(args.load_model_path, args.device,
                   model, optimizer, scheduler)
        print('Model loaded')

        epoch = get_model_attribute('epoch', args.load_model_path, args.device)
    else:
        epoch = 0

    if args.log_tensorboard:
        writer = SummaryWriter(
            log_dir=args.tensorboard_path + args.fname + ' ' + args.time, flush_secs=5)
    else:
        writer = None

    while epoch < args.epochs:
        loss = train_epoch(
            epoch, args, model, dataloader_train, optimizer, scheduler, feature_map, writer)
        epoch += 1

        # logging
        if args.log_tensorboard:
            writer.add_scalar('{} {} Loss/train'.format(
                args.note, args.graph_type), loss, epoch)
        else:
            print('Epoch: {}/{}, train loss: {:.6f}'.format(epoch, args.epochs, loss))

        # save model checkpoint
        if args.save_model and epoch != 0 and epoch % args.epochs_save == 0:
            save_model(
                epoch, args, model, optimizer, scheduler, feature_map=feature_map)
            print(
                'Model Saved - Epoch: {}/{}, train loss: {:.6f}'.format(epoch, args.epochs, loss))

        if dataloader_validate is not None and epoch % args.epochs_validate == 0:
            loss_validate = test_data(
                args, model, dataloader_validate, feature_map)
            if args.log_tensorboard:
                writer.add_scalar('{} {} Loss/validate'.format(
                    args.note, args.graph_type), loss_validate, epoch)
            else:
                print('Epoch: {}/{}, validation loss: {:.6f}'.format(
                    epoch, args.epochs, loss_validate))

    save_model(epoch, args, model, optimizer,
               scheduler, feature_map=feature_map)
    print('Model Saved - Epoch: {}/{}, train loss: {:.6f}'.format(epoch, args.epochs, loss))
Ejemplo n.º 6
0
def predict_graphs(eval_args):
    train_args = eval_args.train_args
    feature_map = get_model_attribute('feature_map', eval_args.model_path,
                                      eval_args.device)
    train_args.device = eval_args.device

    model = create_model(train_args, feature_map)
    load_model(eval_args.model_path, eval_args.device, model)

    for _, net in model.items():
        net.eval()

    max_nodes = feature_map['max_nodes']
    len_node_vec, len_edge_vec = len(feature_map['node_forward']) + 1, len(
        feature_map['edge_forward']) + 1
    feature_len = 2 * (max_nodes + 1) + 2 * len_node_vec + len_edge_vec

    graphs = []

    for _ in range(eval_args.count // eval_args.batch_size):
        # initialize dfs_code_rnn hidden according to batch size
        model['dfs_code_rnn'].hidden = model['dfs_code_rnn'].init_hidden(
            batch_size=eval_args.batch_size)

        rnn_input = torch.zeros((eval_args.batch_size, 1, feature_len),
                                device=eval_args.device)
        pred = torch.zeros((eval_args.batch_size, eval_args.max_num_edges, 5),
                           device=eval_args.device)

        for i in range(eval_args.max_num_edges):
            rnn_output = model['dfs_code_rnn'](rnn_input)

            # Evaluating dfscode tuple
            timestamp1 = model['output_timestamp1'](rnn_output).reshape(
                eval_args.batch_size, -1)
            timestamp2 = model['output_timestamp2'](rnn_output).reshape(
                eval_args.batch_size, -1)
            vertex1 = model['output_vertex1'](rnn_output).reshape(
                eval_args.batch_size, -1)
            edge = model['output_edge'](rnn_output).reshape(
                eval_args.batch_size, -1)
            vertex2 = model['output_vertex2'](rnn_output).reshape(
                eval_args.batch_size, -1)

            if train_args.loss_type == 'BCE':
                timestamp1 = Categorical(timestamp1).sample()
                timestamp2 = Categorical(timestamp2).sample()
                vertex1 = Categorical(vertex1).sample()
                edge = Categorical(edge).sample()
                vertex2 = Categorical(vertex2).sample()

            elif train_args.loss_type == 'NLL':
                timestamp1 = Categorical(logits=timestamp1).sample()
                timestamp2 = Categorical(logits=timestamp2).sample()
                vertex1 = Categorical(logits=vertex1).sample()
                edge = Categorical(logits=edge).sample()
                vertex2 = Categorical(logits=vertex2).sample()

            rnn_input = torch.zeros((eval_args.batch_size, 1, feature_len),
                                    device=eval_args.device)

            rnn_input[torch.arange(eval_args.batch_size), 0, timestamp1] = 1
            rnn_input[torch.arange(eval_args.batch_size), 0,
                      max_nodes + 1 + timestamp2] = 1
            rnn_input[torch.arange(eval_args.batch_size), 0,
                      2 * max_nodes + 2 + vertex1] = 1
            rnn_input[torch.arange(eval_args.batch_size), 0,
                      2 * max_nodes + 2 + len_node_vec + edge] = 1
            rnn_input[torch.arange(eval_args.batch_size), 0, 2 * max_nodes +
                      2 + len_node_vec + len_edge_vec + vertex2] = 1

            pred[:, i, 0] = timestamp1
            pred[:, i, 1] = timestamp2
            pred[:, i, 2] = vertex1
            pred[:, i, 3] = edge
            pred[:, i, 4] = vertex2

        nb = feature_map['node_backward']
        eb = feature_map['edge_backward']
        for i in range(eval_args.batch_size):
            dfscode = []
            for j in range(eval_args.max_num_edges):
                if pred[i, j, 0] == max_nodes or pred[i, j, 1] == max_nodes \
                        or pred[i, j, 2] == len_node_vec - 1 or pred[i, j, 3] == len_edge_vec - 1 \
                        or pred[i, j, 4] == len_node_vec - 1:
                    break

                dfscode.append(
                    (int(pred[i, j, 0].data), int(pred[i, j, 1].data),
                     nb[int(pred[i, j, 2].data)], eb[int(pred[i, j, 3].data)],
                     nb[int(pred[i, j, 4].data)]))

            graph = graph_from_dfscode(dfscode)

            # Remove self loops
            graph.remove_edges_from(nx.selfloop_edges(graph))

            # Take maximum connected component
            if len(graph.nodes()):
                max_comp = max(nx.connected_components(graph), key=len)
                graph = nx.Graph(graph.subgraph(max_comp))

            graphs.append(graph)

    return graphs
Ejemplo n.º 7
0
def train(args,
          dataloader_train,
          model,
          feature_map,
          dataloader_validate=None):
    # initialize optimizer
    optimizer = get_optimizer(model, args)

    scheduler = get_scheduler(model, optimizer, args)

    if args.load_model:
        load_model(args.load_model_path, args.device, model, optimizer,
                   scheduler)
        print('Model loaded')

        epoch = get_model_attribute('epoch', args.load_model_path, args.device)
    else:
        epoch = 0

    if args.log_tensorboard:
        writer = SummaryWriter(log_dir=args.tensorboard_path + args.fname +
                               ' ' + args.time,
                               flush_secs=5)
    else:
        writer = None

    while epoch < args.epochs:
        loss, acc = train_epoch(epoch, args, model, dataloader_train,
                                optimizer, scheduler, feature_map, writer)
        epoch += 1
        print('Epoch: {}/{}, train loss: {:.3f}, accuray: {:.3f}'.format(
            epoch, args.epochs, loss, acc))

        # logging
        if args.log_tensorboard:
            writer.add_scalar(
                '{} {} Loss/train'.format(args.note, args.graph_type), loss,
                epoch)

        # save model checkpoint
        if args.save_model and epoch != 0 and epoch % args.epochs_save == 0:
            save_model(epoch,
                       args,
                       model,
                       optimizer,
                       scheduler,
                       feature_map=feature_map)
            print('Model Saved - Epoch: {}/{}, train loss: {:.6f}'.format(
                epoch, args.epochs, loss))

        if dataloader_validate is not None and epoch % args.epochs_validate == 0:
            loss_validate = test_data(args, model, dataloader_validate,
                                      feature_map)
            if args.log_tensorboard:
                writer.add_scalar(
                    '{} {} Loss/validate'.format(args.note, args.graph_type),
                    loss_validate, epoch)
            else:
                print('Epoch: {}/{}, validation loss: {:.6f}'.format(
                    epoch, args.epochs, loss_validate))

    save_model(epoch,
               args,
               model,
               optimizer,
               scheduler,
               feature_map=feature_map)
    print('Model Saved - Epoch: {}/{}, train loss: {:.6f}'.format(
        epoch, args.epochs, loss))
Ejemplo n.º 8
0
def predict_graphs(eval_args):
    """
    Generate graphs (networkx format) given a trained generative graphRNN model
    :param eval_args: ArgsEvaluate object
    """

    train_args = eval_args.train_args
    feature_map = get_model_attribute(
        'feature_map', eval_args.model_path, eval_args.device)
    train_args.device = eval_args.device

    model = create_model(train_args, feature_map)
    load_model(eval_args.model_path, eval_args.device, model)

    for _, net in model.items():
        net.eval()

    max_num_node = eval_args.max_num_node
    len_node_vec, len_edge_vec, num_nodes_to_consider = get_attributes_len_for_graph_rnn(
        len(feature_map['node_forward']), len(feature_map['edge_forward']),
        train_args.max_prev_node, train_args.max_head_and_tail)
    feature_len = len_node_vec + num_nodes_to_consider * len_edge_vec

    graphs = []

    for _ in range(eval_args.count // eval_args.batch_size):
        model['node_level_rnn'].hidden = model['node_level_rnn'].init_hidden(
            batch_size=eval_args.batch_size)

        # [batch_size] * [num of nodes]
        x_pred_node = np.zeros(
            (eval_args.batch_size, max_num_node), dtype=np.int32)
        # [batch_size] * [num of nodes] * [num_nodes_to_consider]
        x_pred_edge = np.zeros(
            (eval_args.batch_size, max_num_node, num_nodes_to_consider), dtype=np.int32)

        node_level_input = torch.zeros(
            eval_args.batch_size, 1, feature_len, device=eval_args.device)
        # Initialize to node level start token
        node_level_input[:, 0, len_node_vec - 2] = 1
        for i in range(max_num_node):
            # [batch_size] * [1] * [hidden_size_node_level_rnn]
            node_level_output = model['node_level_rnn'](node_level_input)
            # [batch_size] * [1] * [node_feature_len]
            node_level_pred = model['output_node'](node_level_output)
            # [batch_size] * [node_feature_len] for torch.multinomial
            node_level_pred = node_level_pred.reshape(
                eval_args.batch_size, len_node_vec)
            # [batch_size]: Sampling index to set 1 in next node_level_input and x_pred_node
            # Add a small probability for each node label to avoid zeros
            node_level_pred[:, :-2] += EPS
            # Start token should not be sampled. So set it's probability to 0
            node_level_pred[:, -2] = 0
            # End token should not be sampled if i less than min_num_node
            if i < eval_args.min_num_node:
                node_level_pred[:, -1] = 0
            sample_node_level_output = torch.multinomial(
                node_level_pred, 1).reshape(-1)
            node_level_input = torch.zeros(
                eval_args.batch_size, 1, feature_len, device=eval_args.device)
            node_level_input[torch.arange(
                eval_args.batch_size), 0, sample_node_level_output] = 1

            # [batch_size] * [num of nodes]
            x_pred_node[:, i] = sample_node_level_output.cpu().data

            # [batch_size] * [1] * [hidden_size_edge_level_rnn]
            hidden_edge = model['embedding_node_to_edge'](node_level_output)

            hidden_edge_rem_layers = torch.zeros(
                train_args.num_layers -
                1, eval_args.batch_size, hidden_edge.size(2),
                device=eval_args.device)
            # [num_layers] * [batch_size] * [hidden_len]
            model['edge_level_rnn'].hidden = torch.cat(
                (hidden_edge.permute(1, 0, 2), hidden_edge_rem_layers), dim=0)

            # [batch_size] * [1] * [edge_feature_len]
            edge_level_input = torch.zeros(
                eval_args.batch_size, 1, len_edge_vec, device=eval_args.device)
            # Initialize to edge level start token
            edge_level_input[:, 0, len_edge_vec - 2] = 1
            for j in range(min(num_nodes_to_consider, i)):
                # [batch_size] * [1] * [edge_feature_len]
                edge_level_output = model['edge_level_rnn'](edge_level_input)
                # [batch_size] * [edge_feature_len] needed for torch.multinomial
                edge_level_output = edge_level_output.reshape(
                    eval_args.batch_size, len_edge_vec)

                # [batch_size]: Sampling index to set 1 in next edge_level input and x_pred_edge
                # Add a small probability for no edge to avoid zeros
                edge_level_output[:, -3] += EPS
                # Start token and end should not be sampled. So set it's probability to 0
                edge_level_output[:, -2:] = 0
                sample_edge_level_output = torch.multinomial(
                    edge_level_output, 1).reshape(-1)
                edge_level_input = torch.zeros(
                    eval_args.batch_size, 1, len_edge_vec, device=eval_args.device)
                edge_level_input[:, 0, sample_edge_level_output] = 1

                # Setting edge feature for next node_level_input
                node_level_input[:, 0, len_node_vec + j * len_edge_vec: len_node_vec + (j + 1) * len_edge_vec] = \
                    edge_level_input[:, 0, :]

                # [batch_size] * [num of nodes] * [num_nodes_to_consider]
                x_pred_edge[:, i, j] = sample_edge_level_output.cpu().data

        # Save the batch of graphs
        for k in range(eval_args.batch_size):
            G = nx.Graph()

            for v in range(max_num_node):
                # End node token
                if x_pred_node[k, v] == len_node_vec - 1:
                    break
                elif x_pred_node[k, v] < len(feature_map['node_forward']):
                    G.add_node(
                        v, label=feature_map['node_backward'][x_pred_node[k, v]])
                else:
                    print('Error in sampling node features')
                    exit()

            for u in range(len(G.nodes())):
                for p in range(min(num_nodes_to_consider, u)):
                    if x_pred_edge[k, u, p] < len(feature_map['edge_forward']):
                        if train_args.max_prev_node is not None:
                            v = u - p - 1
                        elif train_args.max_head_and_tail is not None:
                            if p < train_args.max_head_and_tail[1]:
                                v = u - p - 1
                            else:
                                v = p - train_args.max_head_and_tail[1]

                        G.add_edge(
                            u, v, label=feature_map['edge_backward'][x_pred_edge[k, u, p]])
                    elif x_pred_edge[k, u, p] == len(feature_map['edge_forward']):
                        # No edge
                        pass
                    else:
                        print('Error in sampling edge features')
                        exit()

            # Take maximum connected component
            if len(G.nodes()):
                max_comp = max(nx.connected_components(G), key=len)
                G = nx.Graph(G.subgraph(max_comp))

            graphs.append(G)

    return graphs