Exemple #1
0
    def __init__(self, args, search_space, action_list, submodel_manager):
        """
        Constructor for training algorithm.
        Build sub-model manager and controller.
        Build optimizer and cross entropy loss for controller.

        Args:
            args: From command line, picked up by `argparse`.
        """
        self.args = args
        self.controller_step = 0  # counter for controller
        self.cuda = args.cuda
        self.epoch = 0
        self.start_epoch = 0
        self.submodel_manager = None
        self.controller = None

        self.early_stop_manager = EarlyStop(10)
        self.reward_manager = TopAverage(10)

        super(RL_Selector, self).__init__(args, search_space, action_list,
                                          submodel_manager)
        self.build_model()  # build controller
        self.max_length = self.args.shared_rnn_max_length

        controller_optimizer = _get_optimizer(self.args.controller_optim)
        self.controller_optim = \
            controller_optimizer(self.controller.parameters(),
                                 lr=self.args.controller_lr)
Exemple #2
0
    def __init__(self, args):

        self.args = args

        if hasattr(args, 'dataset') and args.dataset in ["cora", "citeseer", "pubmed"]:
            self.data = load(args)
            self.args.in_feats = self.in_feats = self.data.features.shape[1]
            self.args.num_class = self.n_classes = self.data.num_labels

        self.early_stop_manager = EarlyStop(10)
        self.reward_manager = TopAverage(10)
        print('the experiment config:', '\n', args)
        self.args = args
        self.drop_out = args.in_drop
        self.multi_label = args.multi_label
        self.lr = args.lr
        self.weight_decay = args.weight_decay
        self.retrain_epochs = args.retrain_epochs
        self.loss_fn = torch.nn.BCELoss() # binary cross entropy loss
        self.epochs = args.epochs
        self.train_graph_index = 0
        self.train_set_length = 10

        self.param_file = args.param_file
        self.shared_params = None

        self.loss_fn = torch.nn.functional.nll_loss
Exemple #3
0
    def __init__(self, args):

        self.args = args

        if hasattr(args, 'dataset') and args.dataset in [
                "cora", "citeseer", "pubmed"
        ]:
            self.data = load(args)
            self.args.in_feats = self.in_feats = self.data.features.shape[1]
            self.args.num_class = self.n_classes = self.data.num_labels

        self.early_stop_manager = EarlyStop(10)

        self.reward_manager = TopAverage(10)
        """
class TopAverage(object):
    def __init__(self, top_k=10):
        self.scores = []
        self.top_k = top_k

    def get_top_average(self):
        if len(self.scores) > 0:
            return np.mean(self.scores)
        else:
            return 0

    def get_average(self, score):
        if len(self.scores) > 0:
            avg = np.mean(self.scores)
        else:
            avg = 0
        # print("Top %d average: %f" % (self.top_k, avg))
        self.scores.append(score)
        self.scores.sort(reverse=True)
        self.scores = self.scores[:self.top_k]
        return avg

    def get_reward(self, score):
        reward = score - self.get_average(score)
        return np.clip(reward, -0.5, 0.5)
        """
        self.args = args
        self.drop_out = args.in_drop
        self.multi_label = args.multi_label
        self.lr = args.lr
        self.weight_decay = args.weight_decay
        self.retrain_epochs = args.retrain_epochs
        self.loss_fn = torch.nn.BCELoss()
        self.epochs = args.epochs
        self.train_graph_index = 0
        self.train_set_length = 10

        self.param_file = args.param_file
        self.shared_params = None

        self.loss_fn = torch.nn.functional.nll_loss
Exemple #4
0
class CitationGNNManager(object):

    def __init__(self, args):

        self.args = args

        if hasattr(args, 'dataset') and args.dataset in ["cora", "citeseer", "pubmed"]:
            self.data = load(args)
            self.args.in_feats = self.in_feats = self.data.features.shape[1]
            self.args.num_class = self.n_classes = self.data.num_labels

        self.early_stop_manager = EarlyStop(10)
        self.reward_manager = TopAverage(10)
        print('the experiment config:', '\n', args)
        self.args = args
        self.drop_out = args.in_drop
        self.multi_label = args.multi_label
        self.lr = args.lr
        self.weight_decay = args.weight_decay
        self.retrain_epochs = args.retrain_epochs
        self.loss_fn = torch.nn.BCELoss() # binary cross entropy loss
        self.epochs = args.epochs
        self.train_graph_index = 0
        self.train_set_length = 10

        self.param_file = args.param_file
        self.shared_params = None

        self.loss_fn = torch.nn.functional.nll_loss

    def load_param(self):
        # don't share param
        pass

    def save_param(self, model, update_all=False):
        # don't share param
        pass

    # train from scratch
    def evaluate(self, actions=None, format="two"):
        actions = process_action(actions, format, self.args)
        print("train action:", actions)

        # create model
        model = self.build_gnn(actions)

        if self.args.cuda:
            model.cuda()

        # use optimizer
        optimizer = torch.optim.Adam(model.parameters(), lr=self.args.lr, weight_decay=self.args.weight_decay)
        try:
            model, val_acc, test_acc = self.run_model(model, optimizer, self.loss_fn, self.data, self.epochs,
                                                      cuda=self.args.cuda, return_best=True,
                                                      half_stop_score=max(self.reward_manager.get_top_average() * 0.7,
                                                                          0.4))
        except RuntimeError as e:
            if "cuda" in str(e) or "CUDA" in str(e):
                print(e)
                val_acc = 0
                test_acc = 0
            else:
                raise e
        return val_acc, test_acc

    # train from scratch
    def train(self, actions=None, format="two"):
        origin_action = actions
        actions = process_action(actions, format, self.args)
        print("train gnn structures:", actions)

        # create model
        model = self.build_gnn(actions)

        try:
            if self.args.cuda:
                model.cuda()
            # use optimizer
            optimizer = torch.optim.Adam(model.parameters(), lr=self.args.lr, weight_decay=self.args.weight_decay)
            model, val_acc = self.run_model(model, optimizer, self.loss_fn, self.data, self.epochs, cuda=self.args.cuda,
                                            half_stop_score=max(self.reward_manager.get_top_average() * 0.7, 0.4)
                                            # , show_info=True
                                            )
        except RuntimeError as e:
            if "cuda" in str(e) or "CUDA" in str(e):
                print(e)
                val_acc = 0
            else:
                raise e
        reward = self.reward_manager.get_reward(val_acc)
        # 模型gnn, reward, val_acc
        # self.record_action_info(origin_action, reward, val_acc)

        return reward, val_acc

    def record_action_info(self, origin_action, reward, val_acc):
        with open(self.args.dataset + "_" + self.args.search_mode + self.args.submanager_log_file, "a") as file:
            file.write(str(origin_action))
            file.write(";")
            file.write(str(val_acc))
            file.write("\n")

    def build_gnn(self, actions):
        model = GraphNet(actions, self.in_feats, self.n_classes, drop_out=self.args.in_drop, multi_label=False,
                         batch_normal=False)
        return model

    def retrain(self, actions, format="two"):
        return self.train(actions, format)

    def test_with_param(self, actions=None, format="two", with_retrain=False):
        return self.train(actions, format)

    @staticmethod
    def run_model(model, optimizer, loss_fn, data, epochs, early_stop=5, tmp_model_file="geo_citation.pkl",
                  half_stop_score=0, return_best=False, cuda=True, need_early_stop=False, show_info=False):

        print('chamou o run_model da CitationGNNManager')
        dur = []
        begin_time = time.time()
        best_performance = 0
        min_val_loss = float("inf")
        min_train_loss = float("inf")
        model_val_acc = 0
        features, g, labels, mask, val_mask, test_mask, n_edges = CitationGNNManager.prepare_data(data, cuda)

        for epoch in range(1, epochs + 1):
            model.train()
            t0 = time.time()
            # forward
            logits = model(features, g)
            logits = F.log_softmax(logits, 1)
            loss = loss_fn(logits[mask], labels[mask])
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss = loss.item()

            # evaluate
            model.eval()
            logits = model(features, g)
            logits = F.log_softmax(logits, 1)
            train_acc = evaluate(logits, labels, mask)
            dur.append(time.time() - t0)

            val_loss = float(loss_fn(logits[val_mask], labels[val_mask]))
            val_acc = evaluate(logits, labels, val_mask)
            test_acc = evaluate(logits, labels, test_mask)

            if val_loss < min_val_loss:  # and train_loss < min_train_loss
                min_val_loss = val_loss
                min_train_loss = train_loss
                model_val_acc = val_acc
                if test_acc > best_performance:
                    best_performance = test_acc
            if show_info:
                print(
                    "Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f} | acc {:.4f} | val_acc {:.4f} | test_acc {:.4f}".format(
                        epoch, loss.item(), np.mean(dur), train_acc, val_acc, test_acc))

                end_time = time.time()
                print("Each Epoch Cost Time: %f " % ((end_time - begin_time) / epoch))
        print(f"val_score:{model_val_acc},test_score:{best_performance}")
        if return_best:
            return model, model_val_acc, best_performance
        else:
            return model, model_val_acc

    @staticmethod
    def prepare_data(data, cuda=True):
        features = torch.FloatTensor(data.features)
        print('features: ', features)
        labels = torch.LongTensor(data.labels)
        print('labels: ', labels)
        mask = torch.ByteTensor(data.train_mask)
        print('mask: ', mask)
        test_mask = torch.ByteTensor(data.test_mask)
        print('test_mask: ', test_mask)
        val_mask = torch.ByteTensor(data.val_mask)
        print('val_mask: ', val_mask)
        n_edges = data.graph.number_of_edges()
        print('n_edges: ', n_edges)
        # create DGL graph
        g = DGLGraph(data.graph)
        # add self loop
        g.add_edges(g.nodes(), g.nodes())
        degs = g.in_degrees().float()
        norm = torch.pow(degs, -0.5)
        norm[torch.isinf(norm)] = 0

        if cuda:
            features = features.cuda()
            labels = labels.cuda()
            norm = norm.cuda()
        g.ndata['norm'] = norm.unsqueeze(1)
        return features, g, labels, mask, val_mask, test_mask, n_edges
Exemple #5
0
class RL_Selector(ModelSelector):
    """Manage the training process"""
    def __init__(self, args, search_space, action_list, submodel_manager):
        """
        Constructor for training algorithm.
        Build sub-model manager and controller.
        Build optimizer and cross entropy loss for controller.

        Args:
            args: From command line, picked up by `argparse`.
        """
        self.args = args
        self.controller_step = 0  # counter for controller
        self.cuda = args.cuda
        self.epoch = 0
        self.start_epoch = 0
        self.submodel_manager = None
        self.controller = None

        self.early_stop_manager = EarlyStop(10)
        self.reward_manager = TopAverage(10)

        super(RL_Selector, self).__init__(args, search_space, action_list,
                                          submodel_manager)
        self.build_model()  # build controller
        self.max_length = self.args.shared_rnn_max_length

        controller_optimizer = _get_optimizer(self.args.controller_optim)
        self.controller_optim = \
            controller_optimizer(self.controller.parameters(),
                                 lr=self.args.controller_lr)

    def build_model(self):
        # CALLS THIS ONE
        self.args.share_param = False
        self.args.shared_initial_step = 0
        self.controller = SimpleNASController(self.args,
                                              action_list=self.action_list,
                                              search_space=self.search_space,
                                              cuda=self.args.cuda)
        if self.cuda:
            self.controller.cuda()

    def train(self):
        """
        Each epoch consists of two phase:
        - In the first phase, shared parameters are trained to exploration.
        - In the second phase, the controller's parameters are trained.
        """

        for self.epoch in range(self.start_epoch, self.args.max_epoch):
            start_epoch_time = time.time()
            # 1. Training the shared parameters of the child graphnas
            self.train_shared(max_step=self.args.shared_initial_step)
            # 2. Training the controller parameters theta
            self.train_controller()

            if self.epoch % self.args.save_epoch == 0:
                self.save_model()
            end_epoch_time = time.time()
            print("epoch ", str(self.epoch), " took: ",
                  str(end_epoch_time - start_epoch_time))

        self.save_model()

    def train_shared(self, max_step=50, gnn_list=None):
        """
        Args:
            max_step: Used to run extra training steps as a warm-up.
            gnn: If not None, is used instead of calling sample().

        """
        if max_step == 0:  # no train shared
            return

        print("*" * 35, "training model", "*" * 35)
        gnn_list = gnn_list if gnn_list else self.controller.sample(max_step)

        for gnn in gnn_list:
            gnn = self.form_gnn_info(gnn)
            try:
                _, val_score = \
                    self.submodel_manager.train(gnn,
                                                format=self.args.format)
                logger.info(str(gnn) + ", val_score:" + str(val_score))
            except RuntimeError as e:
                if 'CUDA' in str(e):  # usually CUDA Out of Memory
                    print(e)
                else:
                    raise e

        print("*" * 35, "training over", "*" * 35)

    def get_reward(self, gnn_list, entropies, hidden):
        """
        Computes the reward of a single sampled model on validation data.
        """
        if not isinstance(entropies, np.ndarray):
            entropies = entropies.data.cpu().numpy()
        if isinstance(gnn_list, dict):
            gnn_list = [gnn_list]
        if isinstance(gnn_list[0], list) or isinstance(gnn_list[0], dict):
            pass
        else:
            gnn_list = [gnn_list]  # when structure_list is one structure

        reward_list = []
        for gnn in gnn_list:
            gnn = self.form_gnn_info(gnn)
            val_acc, metrics = \
                self.submodel_manager.train(
                    gnn,
                    format=self.args.format)
            # Manage Hall of Fame
            if self.args.opt_metric not in metrics:
                print("Could not find optimization metric",
                      self.args.opt_metric, "in metrics dict.")
                reward = self.reward_manager.get_reward(0)
            else:
                self.hof.add(gnn, metrics[self.args.opt_metric])
                # Calculate reward in terms of the optimization metric selected
                reward = self.reward_manager.get_reward(
                    metrics[self.args.opt_metric])
            reward_list.append(reward)

        if self.args.entropy_mode == 'reward':
            rewards = reward_list + self.args.entropy_coeff * entropies
        elif self.args.entropy_mode == 'regularizer':
            rewards = reward_list * np.ones_like(entropies)
        else:
            raise NotImplementedError('Unkown entropy mode:' +
                                      str(self.args.entropy_mode))

        return rewards, hidden

    def train_controller(self):
        """
            Train controller to find better structure.
        """
        print("*" * 35, "training controller", "*" * 35)
        model = self.controller
        model.train()

        baseline = None
        adv_history = []
        entropy_history = []
        reward_history = []

        hidden = self.controller.init_hidden(self.args.batch_size)
        total_loss = 0
        for step in range(self.args.controller_max_step):
            # sample graphnas
            structure_list, log_probs, entropies = \
                self.controller.sample(with_details=True)

            # calculate reward
            np_entropies = entropies.data.cpu().numpy()
            results = self.get_reward(structure_list, np_entropies, hidden)
            torch.cuda.empty_cache()

            if results:  # has reward
                rewards, hidden = results
            else:
                # CUDA Error happens, drop structure
                # and step into next iteration
                continue

            # discount
            if 1 > self.args.discount > 0:
                rewards = discount(rewards, self.args.discount)

            reward_history.extend(rewards)
            entropy_history.extend(np_entropies)

            # moving average baseline
            if baseline is None:
                baseline = rewards
            else:
                decay = self.args.ema_baseline_decay
                baseline = decay * baseline + (1 - decay) * rewards

            adv = rewards - baseline
            history.append(adv)
            adv = scale(adv, scale_value=0.5)
            adv_history.extend(adv)

            adv = utils.get_variable(adv, self.cuda, requires_grad=False)
            # policy loss
            loss = -log_probs * adv
            if self.args.entropy_mode == 'regularizer':
                loss -= self.args.entropy_coeff * entropies

            loss = loss.sum()  # or loss.mean()

            # update
            self.controller_optim.zero_grad()
            loss.backward()

            if self.args.controller_grad_clip > 0:
                torch.nn.utils.clip_grad_norm(model.parameters(),
                                              self.args.controller_grad_clip)
            self.controller_optim.step()

            total_loss += utils.to_item(loss.data)

            self.controller_step += 1
            torch.cuda.empty_cache()

        print("*" * 35, "training controller over", "*" * 35)

    def evaluate(self, gnn):
        """
        Evaluate a structure on the validation set.
        """
        self.controller.eval()
        gnn = self.form_gnn_info(gnn)
        reward, scores, metrics = \
            self.submodel_manager.train(gnn,
                                        format=self.args.format)
        logger.info("".join([
            'eval | ',
            str(gnn), ' | reward: {:8.2f}'.format(reward),
            ' | scores: {:8.2f}'.format(scores)
        ]))

    @property
    def controller_path(self):
        return "".join([
            str(self.args.dataset), "/controller_epoch",
            str(self.epoch), "_step",
            str(self.controller_step), ".pth"
        ])

    @property
    def controller_optimizer_path(self):
        return "".join([
            str(self.args.dataset), "/controller_epoch",
            str(self.epoch), "_step",
            str(self.controller_step), "_optimizer.pth"
        ])

    def get_saved_models_info(self):
        paths = glob.glob(os.path.join(self.args.dataset, '*.pth'))
        paths.sort()

        def get_numbers(items, delimiter, idx, replace_word, must_contain=''):
            return list(
                set([
                    int(name.split(delimiter)[idx].replace(replace_word, ''))
                    for name in items if must_contain in name
                ]))

        basenames = [
            os.path.basename(path.rsplit('.', 1)[0]) for path in paths
        ]
        epochs = get_numbers(basenames, '_', 1, 'epoch')
        shared_steps = get_numbers(basenames, '_', 2, 'step', 'shared')
        controller_steps = get_numbers(basenames, '_', 2, 'step', 'controller')

        epochs.sort()
        shared_steps.sort()
        controller_steps.sort()

        return epochs, shared_steps, controller_steps

    def save_model(self):

        torch.save(self.controller.state_dict(), self.controller_path)
        torch.save(self.controller_optim.state_dict(),
                   self.controller_optimizer_path)

        logger.info('[*] SAVED: ' + str(self.controller_path))

        epochs, shared_steps, controller_steps = self.get_saved_models_info()

        for epoch in epochs[:-self.args.max_save_num]:
            paths = glob.glob(
                os.path.join(self.args.dataset,
                             '*_epoch' + str(epoch) + '_*.pth'))

            for path in paths:
                utils.remove_file(path)

    def load_model(self):
        epochs, shared_steps, controller_steps = self.get_saved_models_info()

        if len(epochs) == 0:
            logger.info('[!] No checkpoint found in ' +
                        str(self.args.dataset) + '...')
            return

        self.epoch = self.start_epoch = max(epochs)
        self.controller_step = max(controller_steps)

        self.controller.load_state_dict(torch.load(self.controller_path))
        self.controller_optim.load_state_dict(
            torch.load(self.controller_optimizer_path))
        logger.info('[*] LOADED: ' + str(self.controller_path))
Exemple #6
0
class CitationGNNManager(object):
    def __init__(self, args):

        self.args = args

        if hasattr(args, 'dataset') and args.dataset in [
                "cora", "citeseer", "pubmed"
        ]:
            self.data = load(args)
            self.args.in_feats = self.in_feats = self.data.features.shape[1]
            self.args.num_class = self.n_classes = self.data.num_labels

        self.early_stop_manager = EarlyStop(10)

        self.reward_manager = TopAverage(10)
        """
class TopAverage(object):
    def __init__(self, top_k=10):
        self.scores = []
        self.top_k = top_k

    def get_top_average(self):
        if len(self.scores) > 0:
            return np.mean(self.scores)
        else:
            return 0

    def get_average(self, score):
        if len(self.scores) > 0:
            avg = np.mean(self.scores)
        else:
            avg = 0
        # print("Top %d average: %f" % (self.top_k, avg))
        self.scores.append(score)
        self.scores.sort(reverse=True)
        self.scores = self.scores[:self.top_k]
        return avg

    def get_reward(self, score):
        reward = score - self.get_average(score)
        return np.clip(reward, -0.5, 0.5)
        """
        self.args = args
        self.drop_out = args.in_drop
        self.multi_label = args.multi_label
        self.lr = args.lr
        self.weight_decay = args.weight_decay
        self.retrain_epochs = args.retrain_epochs
        self.loss_fn = torch.nn.BCELoss()
        self.epochs = args.epochs
        self.train_graph_index = 0
        self.train_set_length = 10

        self.param_file = args.param_file
        self.shared_params = None

        self.loss_fn = torch.nn.functional.nll_loss

    def load_param(self):
        # don't share param
        pass

    def save_param(self, model, update_all=False):
        # don't share param
        pass

    # train from scratch
    def evaluate(self, actions=None, format="two"):
        actions = process_action(actions, format, self.args)
        print("train action:", actions)

        # create model
        model = self.build_gnn(actions)

        if self.args.cuda:
            model.cuda()

        # use optimizer
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=self.args.lr,
                                     weight_decay=self.args.weight_decay)
        try:
            model, val_acc, test_acc = self.run_model(
                model,
                optimizer,
                self.loss_fn,
                self.data,
                self.epochs,
                cuda=self.args.cuda,
                return_best=True,
                half_stop_score=max(
                    self.reward_manager.get_top_average() * 0.7, 0.4))
            """
class TopAverage(object):
    def __init__(self, top_k=10):
        self.scores = []
        self.top_k = top_k

    def get_top_average(self):
        if len(self.scores) > 0:
            return np.mean(self.scores)
        else:
            return 0
            """
        except RuntimeError as e:
            if "cuda" in str(e) or "CUDA" in str(e):
                print(e)
                val_acc = 0
                test_acc = 0
            else:
                raise e
        return val_acc, test_acc

    # train from scratch
    def train(self, actions=None, format="two"):
        # actions = ['gat', 'sum', 'relu', 2, 8, 'linear', 'mlp', 'tanh', 2, 4]
        # format="two"
        origin_action = actions

        # 分类任务类别数修改第二层GNN模型的输出维度
        # 修改为任务类别的维度数,Citeseer数据集的类别为6,则第二层GNN最后输出维度为6
        actions = process_action(actions, format, self.args)
        # actions = ['gat', 'sum', 'relu', 2, 8, 'linear', 'mlp', 'tanh', 2, 6]

        print("train action:", actions)

        # create model
        # 基于选择出的GNN结构构建GNN
        model = self.build_gnn(actions)

        try:
            if self.args.cuda:
                model.cuda()
            # use optimizer
            optimizer = torch.optim.Adam(model.parameters(),
                                         lr=self.args.lr,
                                         weight_decay=self.args.weight_decay)

            model, val_acc = self.run_model(
                model,
                optimizer,
                self.loss_fn,
                self.data,
                self.epochs,
                cuda=self.args.cuda,
                half_stop_score=max(
                    self.reward_manager.get_top_average() * 0.7, 0.4))
            """
class TopAverage(object):
    def __init__(self, top_k=10):
        self.scores = []
        self.top_k = top_k

    def get_top_average(self):
        if len(self.scores) > 0:
            return np.mean(self.scores)
        else:
            return 0
            """

        except RuntimeError as e:
            if "cuda" in str(e) or "CUDA" in str(e):
                print(e)
                val_acc = 0
            else:
                raise e

        reward = self.reward_manager.get_reward(val_acc)

        # 当val_acc大于历史top10的val_acc的均值,则reward为正
        # reward被限制在-0.5到0.5之间
        """
class TopAverage(object):
    def get_average(self, score):
        if len(self.scores) > 0:
            avg = np.mean(self.scores)
        else:
            avg = 0
        # print("Top %d average: %f" % (self.top_k, avg))
        self.scores.append(score)
        self.scores.sort(reverse=True)
        self.scores = self.scores[:self.top_k]
        return avg

    def get_reward(self, score):
        reward = score - self.get_average(score)
        return np.clip(reward, -0.5, 0.5)
        """
        self.save_param(model, update_all=(reward > 0))
        # 模型没有共享参数

        self.record_action_info(origin_action, reward, val_acc)
        # 将gnn结构,reward,val_acc信息记录到log文件中

        return reward, val_acc

    def record_action_info(self, origin_action, reward, val_acc):
        with open(
                self.args.dataset + "_" + self.args.search_mode +
                self.args.submanager_log_file, "a") as file:
            # with open(f'{self.args.dataset}_{self.args.search_mode}_{self.args.format}_manager_result.txt', "a") as file:
            file.write(str(origin_action))

            file.write(";")
            file.write(str(reward))

            file.write(";")
            file.write(str(val_acc))
            file.write("\n")

    def build_gnn(self, actions):
        model = GraphNet(actions,
                         self.in_feats,
                         self.n_classes,
                         drop_out=self.args.in_drop,
                         multi_label=False,
                         batch_normal=False)
        # actions = ['gat', 'sum', 'relu', 2, 8, 'linear', 'mlp', 'tanh', 2, 6]
        # self.in_feats = 3703
        # self.n_classes = 6
        # drop_out = 0.6

        return model

    # 验证模型
    def retrain(self, actions, format="two"):
        return self.train(actions, format)

    def test_with_param(self, actions=None, format="two", with_retrain=False):
        # actions = ['gat', 'sum', 'relu', 2, 8, 'linear', 'mlp', 'tanh', 2, 4]
        # format = "two"
        # with_retrain = True
        return self.train(actions, format)

    @staticmethod
    def run_model(model,
                  optimizer,
                  loss_fn,
                  data,
                  epochs,
                  early_stop=5,
                  tmp_model_file="geo_citation.pkl",
                  half_stop_score=0,
                  return_best=False,
                  cuda=True,
                  need_early_stop=False,
                  show_info=False):

        dur = []
        begin_time = time.time()
        best_performance = 0
        min_val_loss = float("inf")  # 正无穷
        min_train_loss = float("inf")  # 正无穷
        model_val_acc = 0
        features, g, labels, mask, val_mask, test_mask, n_edges = CitationGNNManager.prepare_data(
            data, cuda)

        for epoch in range(1, epochs + 1):
            model.train()
            t0 = time.time()

            # forward
            logits = model(features, g)
            logits = F.log_softmax(logits, 1)
            loss = loss_fn(logits[mask], labels[mask])
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss = loss.item()

            # evaluate
            """
def evaluate(output, labels, mask):
    _, indices = torch.max(output, dim=1)
    correct = torch.sum(indices[mask] == labels[mask])
    return correct.item() * 1.0 / mask.sum().item()
            """
            model.eval()
            logits = model(features, g)
            logits = F.log_softmax(logits, 1)
            train_acc = evaluate(logits, labels, mask)
            dur.append(time.time() - t0)

            val_loss = float(loss_fn(logits[val_mask], labels[val_mask]))

            val_acc = evaluate(logits, labels, val_mask)
            test_acc = evaluate(logits, labels, test_mask)

            # 当训练中本次val_loss 低于历史最低值,才会更新 model_val_acc,min_train_loss
            if val_loss < min_val_loss:  # and train_loss < min_train_loss
                min_val_loss = val_loss
                min_train_loss = train_loss
                model_val_acc = val_acc
                if test_acc > best_performance:
                    best_performance = test_acc
            if show_info:
                print(
                    "Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f} | acc {:.4f} | val_acc {:.4f} | test_acc {:.4f}"
                    .format(epoch, loss.item(), np.mean(dur), train_acc,
                            val_acc, test_acc))

                end_time = time.time()
                print("Each Epoch Cost Time: %f " %
                      ((end_time - begin_time) / epoch))
        print(f"val_score:{model_val_acc},test_score:{best_performance}")
        if return_best:
            return model, model_val_acc, best_performance
        else:
            return model, model_val_acc

    # @staticmethod
    # def run_model(model, optimizer, loss_fn, data, epochs, early_stop=5, tmp_model_file="citation_testing_2.pkl",
    #               half_stop_score=0, return_best=False, cuda=True, need_early_stop=False):
    #
    #     early_stop_manager = EarlyStop(early_stop)
    #     # initialize graph
    #     dur = []
    #     begin_time = time.time()
    #     features, g, labels, mask, val_mask, test_mask, n_edges = CitationGNNManager.prepare_data(data, cuda)
    #     saved = False
    #     best_performance = 0
    #     for epoch in range(1, epochs + 1):
    #         should_break = False
    #         t0 = time.time()
    #
    #         model.train()
    #         logits = model(features, g)
    #         logits = F.log_softmax(logits, 1)
    #         loss = loss_fn(logits[mask], labels[mask])
    #         optimizer.zero_grad()
    #         loss.backward()
    #         optimizer.step()
    #
    #         model.eval()
    #         logits = model(features, g)
    #         logits = F.log_softmax(logits, 1)
    #         train_acc = evaluate(logits, labels, mask)
    #         train_loss = float(loss)
    #         dur.append(time.time() - t0)
    #
    #         val_loss = float(loss_fn(logits[val_mask], labels[val_mask]))
    #         val_acc = evaluate(logits, labels, val_mask)
    #         test_acc = evaluate(logits, labels, test_mask)
    #
    #         print(
    #             "Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f} | acc {:.4f} | val_acc {:.4f} | test_acc {:.4f}".format(
    #                 epoch, loss.item(), np.mean(dur), train_acc, val_acc, test_acc))
    #
    #         end_time = time.time()
    #         print("Each Epoch Cost Time: %f " % ((end_time - begin_time) / epoch))
    #         # print("Test Accuracy {:.4f}".format(acc))
    #         if early_stop_manager.should_save(train_loss, train_acc, val_loss, val_acc):
    #             saved = True
    #             torch.save(model.state_dict(), tmp_model_file)
    #             if test_acc > best_performance:
    #                 best_performance = test_acc
    #         if need_early_stop and early_stop_manager.should_stop(train_loss, train_acc, val_loss, val_acc):
    #             should_break = True
    #         if should_break and epoch > 50:
    #             print("early stop")
    #             break
    #         if half_stop_score > 0 and epoch > (epochs / 2) and val_acc < half_stop_score:
    #             print("half_stop")
    #             break
    #     if saved:
    #         model.load_state_dict(torch.load(tmp_model_file))
    #     model.eval()
    #     val_acc = evaluate(model(features, g), labels, val_mask)
    #     print(evaluate(model(features, g), labels, test_mask))
    #     if return_best:
    #         return model, val_acc, best_performance
    #     else:
    #         return model, val_acc

    @staticmethod
    def prepare_data(data, cuda=True):
        features = torch.FloatTensor(data.features)
        labels = torch.LongTensor(data.labels)
        mask = torch.ByteTensor(data.train_mask)
        test_mask = torch.ByteTensor(data.test_mask)
        val_mask = torch.ByteTensor(data.val_mask)
        n_edges = data.graph.number_of_edges()
        # create DGL graph
        g = DGLGraph(data.graph)
        # add self loop
        g.add_edges(g.nodes(), g.nodes())
        degs = g.in_degrees().float()
        norm = torch.pow(degs, -0.5)
        norm[torch.isinf(norm)] = 0

        if cuda:
            features = features.cuda()
            labels = labels.cuda()
            norm = norm.cuda()
        g.ndata['norm'] = norm.unsqueeze(1)
        return features, g, labels, mask, val_mask, test_mask, n_edges
Exemple #7
0
class CitationGNNManager(object):
    def __init__(self, args):

        self.args = args

        if hasattr(args, 'dataset') and args.dataset in [
                "cora", "citeseer", "pubmed"
        ]:
            self.data = load(args)
            self.args.in_feats = self.in_feats = self.data.features.shape[1]
            self.args.num_class = self.n_classes = self.data.num_labels

        self.early_stop_manager = EarlyStop(10)
        self.reward_manager = TopAverage(10)

        self.args = args
        self.drop_out = args.in_drop
        self.multi_label = args.multi_label
        self.lr = args.lr
        self.weight_decay = args.weight_decay
        self.retrain_epochs = args.retrain_epochs
        self.loss_fn = torch.nn.BCELoss()
        self.epochs = args.epochs
        self.train_graph_index = 0
        self.train_set_length = 10

        self.param_file = args.param_file
        self.shared_params = None

        self.loss_fn = torch.nn.functional.nll_loss

    def load_param(self):
        # don't share param
        pass

    def save_param(self, model, update_all=False):
        # don't share param
        pass

    # train from scratch
    def evaluate(self, actions=None, format="two"):
        actions = process_action(actions, format, self.args)
        print("train action:", actions)

        # create model
        model = self.build_gnn(actions)

        if self.args.cuda:
            model.cuda()

        # use optimizer
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=self.args.lr,
                                     weight_decay=self.args.weight_decay)
        try:
            model, val_acc, test_acc = self.run_model(
                model,
                optimizer,
                self.loss_fn,
                self.data,
                self.epochs,
                cuda=self.args.cuda,
                return_best=True,
                half_stop_score=max(
                    self.reward_manager.get_top_average() * 0.7, 0.4))
        except RuntimeError as e:
            if "cuda" in str(e) or "CUDA" in str(e):
                print(e)
                val_acc = 0
                test_acc = 0
            else:
                raise e
        return val_acc, test_acc

    # train from scratch
    def train(self, actions=None, format="two"):
        origin_action = actions  # ['gat', 'max', 'tanh', 1, 128, 'cos', 'sum', 'tanh', 4, 16]
        actions = process_action(
            actions, format, self.args
        )  # ['gat', 'max', 'tanh', 1, 128, 'cos', 'sum', 'tanh', 4, 6]
        print("train action:", actions)

        # create model
        model = self.build_gnn(
            actions
        )  # -> micro_model_manager.py -> ZengManager(GeoCitationManager)

        try:
            if self.args.cuda:
                model.cuda()
            # use optimizer
            optimizer = torch.optim.Adam(model.parameters(),
                                         lr=self.args.lr,
                                         weight_decay=self.args.weight_decay
                                         )  # lr = 0.2  weight_decay = 0.0005
            model, val_acc = self.run_model(
                model,
                optimizer,
                self.loss_fn,
                self.data,
                self.epochs,
                cuda=self.args.cuda,
                half_stop_score=max(
                    self.reward_manager.get_top_average() * 0.7, 0.4))
        except RuntimeError as e:
            if "cuda" in str(e) or "CUDA" in str(e):
                print(e)
                val_acc = 0
            else:
                raise e
        reward = self.reward_manager.get_reward(val_acc)
        self.save_param(model, update_all=(reward > 0))

        self.record_action_info(origin_action, reward, val_acc)

        return reward, val_acc

    def record_action_info(self, origin_action, reward, val_acc):
        with open(
                self.args.dataset + "_" + self.args.search_mode +
                self.args.submanager_log_file, "a") as file:
            # with open(f'{self.args.dataset}_{self.args.search_mode}_{self.args.format}_manager_result.txt', "a") as file:
            file.write(str(origin_action))

            file.write(";")
            file.write(str(reward))

            file.write(";")
            file.write(str(val_acc))
            file.write("\n")

    def build_gnn(self, actions):
        model = GraphNet(actions,
                         self.in_feats,
                         self.n_classes,
                         drop_out=self.args.in_drop,
                         multi_label=False,
                         batch_normal=False)
        return model

    def retrain(self, actions, format="two"):
        return self.train(actions, format)

    def test_with_param(self, actions=None, format="two", with_retrain=False):
        return self.train(actions, format)

    @staticmethod
    def run_model(model,
                  optimizer,
                  loss_fn,
                  data,
                  epochs,
                  early_stop=5,
                  tmp_model_file="geo_citation.pkl",
                  half_stop_score=0,
                  return_best=False,
                  cuda=True,
                  need_early_stop=False,
                  show_info=False):

        dur = []
        begin_time = time.time()
        best_performance = 0
        min_val_loss = float("inf")
        min_train_loss = float("inf")
        model_val_acc = 0
        features, g, labels, mask, val_mask, test_mask, n_edges = CitationGNNManager.prepare_data(
            data, cuda)

        for epoch in range(1, epochs + 1):
            model.train()
            t0 = time.time()
            # forward
            logits = model(features, g)
            logits = F.log_softmax(logits, 1)
            loss = loss_fn(logits[mask], labels[mask])
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss = loss.item()

            # evaluate
            model.eval()
            logits = model(features, g)
            logits = F.log_softmax(logits, 1)
            train_acc = evaluate(logits, labels, mask)
            dur.append(time.time() - t0)

            val_loss = float(loss_fn(logits[val_mask], labels[val_mask]))
            val_acc = evaluate(logits, labels, val_mask)
            test_acc = evaluate(logits, labels, test_mask)

            if val_loss < min_val_loss:  # and train_loss < min_train_loss
                min_val_loss = val_loss
                min_train_loss = train_loss
                model_val_acc = val_acc
                if test_acc > best_performance:
                    best_performance = test_acc
            if show_info:
                print(
                    "Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f} | acc {:.4f} | val_acc {:.4f} | test_acc {:.4f}"
                    .format(epoch, loss.item(), np.mean(dur), train_acc,
                            val_acc, test_acc))

                end_time = time.time()
                print("Each Epoch Cost Time: %f " %
                      ((end_time - begin_time) / epoch))
        print(f"val_score:{model_val_acc},test_score:{best_performance}")
        if return_best:
            return model, model_val_acc, best_performance
        else:
            return model, model_val_acc

    # @staticmethod
    # def run_model(model, optimizer, loss_fn, data, epochs, early_stop=5, tmp_model_file="citation_testing_2.pkl",
    #               half_stop_score=0, return_best=False, cuda=True, need_early_stop=False):
    #
    #     early_stop_manager = EarlyStop(early_stop)
    #     # initialize graph
    #     dur = []
    #     begin_time = time.time()
    #     features, g, labels, mask, val_mask, test_mask, n_edges = CitationGNNManager.prepare_data(data, cuda)
    #     saved = False
    #     best_performance = 0
    #     for epoch in range(1, epochs + 1):
    #         should_break = False
    #         t0 = time.time()
    #
    #         model.train()
    #         logits = model(features, g)
    #         logits = F.log_softmax(logits, 1)
    #         loss = loss_fn(logits[mask], labels[mask])
    #         optimizer.zero_grad()
    #         loss.backward()
    #         optimizer.step()
    #
    #         model.eval()
    #         logits = model(features, g)
    #         logits = F.log_softmax(logits, 1)
    #         train_acc = evaluate(logits, labels, mask)
    #         train_loss = float(loss)
    #         dur.append(time.time() - t0)
    #
    #         val_loss = float(loss_fn(logits[val_mask], labels[val_mask]))
    #         val_acc = evaluate(logits, labels, val_mask)
    #         test_acc = evaluate(logits, labels, test_mask)
    #
    #         print(
    #             "Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f} | acc {:.4f} | val_acc {:.4f} | test_acc {:.4f}".format(
    #                 epoch, loss.item(), np.mean(dur), train_acc, val_acc, test_acc))
    #
    #         end_time = time.time()
    #         print("Each Epoch Cost Time: %f " % ((end_time - begin_time) / epoch))
    #         # print("Test Accuracy {:.4f}".format(acc))
    #         if early_stop_manager.should_save(train_loss, train_acc, val_loss, val_acc):
    #             saved = True
    #             torch.save(model.state_dict(), tmp_model_file)
    #             if test_acc > best_performance:
    #                 best_performance = test_acc
    #         if need_early_stop and early_stop_manager.should_stop(train_loss, train_acc, val_loss, val_acc):
    #             should_break = True
    #         if should_break and epoch > 50:
    #             print("early stop")
    #             break
    #         if half_stop_score > 0 and epoch > (epochs / 2) and val_acc < half_stop_score:
    #             print("half_stop")
    #             break
    #     if saved:
    #         model.load_state_dict(torch.load(tmp_model_file))
    #     model.eval()
    #     val_acc = evaluate(model(features, g), labels, val_mask)
    #     print(evaluate(model(features, g), labels, test_mask))
    #     if return_best:
    #         return model, val_acc, best_performance
    #     else:
    #         return model, val_acc

    @staticmethod
    def prepare_data(data, cuda=True):
        features = torch.FloatTensor(data.features)
        labels = torch.LongTensor(data.labels)
        mask = torch.ByteTensor(data.train_mask)
        test_mask = torch.ByteTensor(data.test_mask)
        val_mask = torch.ByteTensor(data.val_mask)
        n_edges = data.graph.number_of_edges()
        # create DGL graph
        g = DGLGraph(data.graph)
        # add self loop
        g.add_edges(g.nodes(), g.nodes())
        degs = g.in_degrees().float()
        norm = torch.pow(degs, -0.5)
        norm[torch.isinf(norm)] = 0

        if cuda:
            features = features.cuda()
            labels = labels.cuda()
            norm = norm.cuda()
        g.ndata['norm'] = norm.unsqueeze(1)
        return features, g, labels, mask, val_mask, test_mask, n_edges