예제 #1
0
def main_clean():
    args = parse_args()
    dataset = "citeseer"

    seed = random.randint(1, 1000000000)
    # "feat_type": "neighbors",
    conf = {
        "kipf": {
            "hidden": args.hidden,
            "dropout": args.dropout,
            "lr": args.lr,
            "weight_decay": args.weight_decay
        },
        "hidden_layers": [16],
        "multi_hidden_layers": [100, 35],
        "dropout": 0.6,
        "lr": 0.01,
        "weight_decay": 0.001,
        "dataset": dataset,
        "epochs": args.epochs,
        "cuda": args.cuda,
        "fastmode": args.fastmode,
        "seed": seed
    }

    init_seed(conf['seed'], conf['cuda'])
    dataset_path = os.path.join(PROJ_DIR, "data", dataset)

    products_path = os.path.join(CUR_DIR, "logs", args.prefix + dataset,
                                 time.strftime("%Y_%m_%d_%H_%M_%S"))
    if not os.path.exists(products_path):
        os.makedirs(products_path)

    logger = multi_logger([
        PrintLogger("IdansLogger", level=logging.DEBUG),
        FileLogger("results_%s" % conf["dataset"],
                   path=products_path,
                   level=logging.INFO),
        FileLogger("results_%s_all" % conf["dataset"],
                   path=products_path,
                   level=logging.DEBUG),
    ],
                          name=None)

    data_logger = CSVLogger("results_%s" % conf["dataset"], path=products_path)
    data_logger.info("model_name", "loss", "acc", "train_p")

    runner = ModelRunner(dataset_path,
                         conf,
                         logger=logger,
                         data_logger=data_logger)
    # execute_runner(runner, logger, 5, num_iter=30)

    for train_p in range(5, 90, 10):
        execute_runner(runner, logger, train_p, num_iter=10)
    logger.info("Finished")
def build_model(rand_test_indices, train_indices,traint,testt, labels ,X, adj_tr, adj_te, in_features,
                hid_features,out_features,ds_name, activation, optimizer, epochs, dropout, lr, l2_pen,
                beta, gamma, dumping_name, GS,is_nni=False):
    optim_name="SGD"
    if optimizer==optim.Adam:
        optim_name = "Adam"
    conf = {"in_features":in_features, "hid_features": hid_features, "out_features":out_features,"ds_name":ds_name,
            "dropout": dropout, "lr": lr, "weight_decay": l2_pen,
             "beta": beta, "gamma": gamma,
            #"training_mat": training_data, "training_labels": training_labels,
            # "test_mat": test_data, "test_labels": test_labels,
            "train_ind": train_indices, "test_ind": rand_test_indices, "traint":traint,"testt":testt, "labels":labels, "X":X,
            "adj_tr": adj_tr,"adj_te": adj_te,
            "optimizer": optimizer, "epochs": epochs, "activation": activation,"optim_name":optim_name}

    products_path = os.path.join(os.getcwd(), "logs", dumping_name, time.strftime("%Y%m%d_%H%M%S"))
    if not os.path.exists(products_path):
        os.makedirs(products_path)

    logger = multi_logger([
        PrintLogger("MyLogger", level=logging.DEBUG),
        FileLogger("results_%s" % dumping_name, path=products_path, level=logging.INFO)], name=None)

    data_logger = CSVLogger("results_%s" % dumping_name, path=products_path)
    data_logger.info("model_name", "loss", "acc")



    runner = ModelRunner(conf, GS,logger=logger, data_logger=data_logger, is_nni=is_nni)
    return runner
예제 #3
0
def build_model(training_data, training_adj, training_labels, eval_data, eval_adj, eval_labels,
                test_data, test_adj, test_labels, learning_hyperparams, class_weights,
                graph_params, dumping_name, is_nni=False, device=1):
    activations = [learning_hyperparams.activation] * (len(learning_hyperparams.hidden_layers) + 1)
    conf = {"model": learning_hyperparams.model, "hidden_layers": learning_hyperparams.hidden_layers,
            "dropout": learning_hyperparams.dropout, "lr": learning_hyperparams.learning_rate,
            "weight_decay": learning_hyperparams.l2_regularization, "training_mat": training_data,
            "training_adj": training_adj, "training_labels": training_labels,
            "eval_mat": eval_data, "eval_adj": eval_adj, "eval_labels": eval_labels,
            "test_mat": test_data, "test_adj": test_adj, "test_labels": test_labels,
            "optimizer": learning_hyperparams.optimizer, "epochs": learning_hyperparams.epochs,
            "activations": activations, "loss_coeffs": learning_hyperparams.loss_coefficients,
            "unary": learning_hyperparams.unary_loss_type,
            "edge_normalization": learning_hyperparams.edge_normalization}

    products_path = os.path.join(os.getcwd(), "logs", *dumping_name, datetime.now().strftime("%Y%m%d_%H%M%S_%f"))
    check_make_dir(products_path)

    logger = multi_logger([
        PrintLogger("MyLogger", level=logging.DEBUG),
        FileLogger("results_" + dumping_name[1], path=products_path, level=logging.INFO)], name=None)

    runner = ModelRunner(conf, logger=logger, weights=class_weights, graph_params=graph_params,
                         early_stop=learning_hyperparams.early_stop, is_nni=is_nni, tmp_path=products_path,
                         device=device)
    return runner
예제 #4
0
def build_model(training_data, training_labels, test_data, test_labels, adjacency_matrices,
                hid_features, activation, optimizer, epochs, dropout, lr, l2_pen, temporal_pen,
                dumping_name, feature_matrices, is_nni=False):
    optim_name="SGD"
    if optimizer==optim.Adam:
        optim_name = "Adam"
    conf = {"hid_features": hid_features, "dropout": dropout, "lr": lr, "weight_decay": l2_pen,
            "temporal_pen": temporal_pen,
            "training_mat": training_data, "training_labels": training_labels,
            "test_mat": test_data, "test_labels": test_labels, "adj_matrices": adjacency_matrices,
            "optimizer": optimizer, "epochs": epochs, "feature_matrices": feature_matrices, "activation": activation,"optim_name":optim_name}

    products_path = os.path.join(os.getcwd(), "logs", dumping_name, time.strftime("%Y%m%d_%H%M%S"))
    if not os.path.exists(products_path):
        os.makedirs(products_path)

    logger = multi_logger([
        PrintLogger("MyLogger", level=logging.DEBUG),
        FileLogger("results_%s" % dumping_name, path=products_path, level=logging.INFO)], name=None)

    data_logger = CSVLogger("results_%s" % dumping_name, path=products_path)
    data_logger.info("model_name", "loss", "acc")

    ##
    logger.info('STARTING with lr= {:.4f} '.format(lr) + ' dropout= {:.4f} '.format(dropout)+ ' regulariztion_l2_pen= {:.4f} '.format(l2_pen)
                + ' temporal_pen= {:.10f} '.format(temporal_pen)+ ' optimizer= %s ' %optim_name)
    logger.debug('STARTING with lr=  {:.4f} '.format(lr) + ' dropout= {:.4f} '.format(dropout) + ' regulariztion_l2_pen= {:.4f} '.format(l2_pen)
        + ' temporal_pen= {:.10f} '.format(temporal_pen) + ' optimizer= %s ' %optim_name)
    ##

    runner = ModelRunner(conf, logger=logger, data_logger=data_logger, is_nni=is_nni)
    return runner
예제 #5
0
    def __init__(self,
                 edge_path,
                 dir_path,
                 features,
                 acc=True,
                 directed=False,
                 gpu=False,
                 device=2,
                 verbose=True,
                 params=None):
        """
        A class used to calculate features for a given graph, input as a text-like file.

        :param edge_path: str
        Path to graph edges file (text-like file, e.g. txt or csv), from which the graph is built using networkx.
        The graph must be unweighted. If its vertices are not [0, 1, ..., n-1], they are mapped to become
        [0, 1, ..., n-1] and the mapping is saved.
        Every row in the edges file should include "source_id,distance_id", without a header row.
        :param dir_path: str
        Path to the directory in which the feature calculations will be (or already are) located.
        :param features: list of strings
        List of the names of each feature. Could be any name from features_meta.py or "additional_features".
        :param acc: bool
        Whether to run the accelerated features, assuming it is possible to do so.
        :param directed: bool
        Whether the built graph is directed.
        :param gpu: bool
        Whether to use GPUs, assuming it is possible to do so (i.e. the GPU exists and the CUDA matches).
        :param device: int
        If gpu is True, indicates on which GPU device to calculate. Will return error if the index doesn't match the
        available GPUs.
        :param verbose: bool
        Whether to print things indicating the phases of calculations.
        :param params: dict, or None
        For clique detection uses, this is a dictionary of the graph settings
        (size, directed, clique size, edge probability). Ignored for any other use.
        """

        self._dir_path = dir_path
        self._features = features  # By their name as appears in accelerated_features_meta
        self._gpu = gpu
        self._device = device
        self._verbose = verbose
        self._logger = multi_logger([PrintLogger("Logger", level=logging.DEBUG),
                                     FileLogger("FLogger", path=dir_path, level=logging.INFO)], name=None) \
            if verbose else None
        self._params = params
        self._load_graph(edge_path, directed)
        self._get_feature_meta(
            features,
            acc)  # acc determines whether to use the accelerated features

        self._adj_matrix = None
        self._raw_features = None
        self._other_features = None
예제 #6
0
def get_loggers(name, products_path, is_debug=True, set_titles=True):
    logger = multi_logger([
        PrintLogger("IdansLogger",
                    level=logging.DEBUG if is_debug else logging.INFO),
        FileLogger("results_%s" % name, path=products_path,
                   level=logging.INFO),
        FileLogger(
            "results_%s_all" % name, path=products_path, level=logging.DEBUG),
    ],
                          name=None)

    data_logger = CSVLogger("results_%s" % name, path=products_path)
    if set_titles:
        data_logger.set_titles("feat_type", "year", "loss_val", "loss_test",
                               "acc", "auc_test", "train_p", "norm_adj",
                               "epoch")

    logger.dump_location()
    data_logger.dump_location()
    return logger, data_logger
    def fix_logger(self, dumping_name):
        # os.getcwd() returns current working directory of a process
        products_path = os.path.join(os.getcwd(), 'dataset', Dataset_name, "logs", dumping_name,
                                     time.strftime("%Y%m%d_%H%M%S"))
        if not os.path.exists(products_path):
            os.makedirs(products_path)

        logger = multi_logger([PrintLogger("MyLogger", level=logging.DEBUG),
                               FileLogger("results_%s" % dumping_name,
                                          path=products_path, level=logging.INFO)], name=None)
        return logger
예제 #8
0
def build_model(training_data,
                training_adj,
                training_labels,
                test_data,
                test_adj,
                test_labels,
                optimizer,
                epochs,
                lr,
                l2_pen,
                class_weights,
                graph_params,
                dumping_name,
                iterations,
                is_nni=False):
    conf = {
        "lr": lr,
        "weight_decay": l2_pen,
        "training_mat": training_data,
        "training_adj": training_adj,
        "training_labels": training_labels,
        "test_mat": test_data,
        "test_adj": test_adj,
        "test_labels": test_labels,
        "optimizer": optimizer,
        "epochs": epochs,
        "iterations": iterations
    }

    products_path = os.path.join(os.getcwd(), "logs", dumping_name,
                                 time.strftime("%Y%m%d_%H%M%S"))
    if not os.path.exists(products_path):
        os.makedirs(products_path)

    logger = multi_logger([
        PrintLogger("MyLogger", level=logging.DEBUG),
        FileLogger(
            "results_" + dumping_name, path=products_path, level=logging.INFO)
    ],
                          name=None)

    data_logger = CSVLogger("results_" + dumping_name, path=products_path)
    data_logger.info("model_name", "loss", "acc", "auc")

    runner = ModelRunner(conf,
                         logger=logger,
                         data_logger=data_logger,
                         weights=class_weights,
                         graph_params=graph_params,
                         is_nni=is_nni)
    return runner
def calculate_gpu_one(run, level, size, p, directed):
    from features_infra.graph_features import GraphFeatures
    from features_infra.feature_calculators import FeatureMeta
    from features_algorithms.accelerated_graph_features.motifs import nth_nodes_motif
    from loggers import FileLogger
    feature_meta = {
        "motif" + str(level):
        FeatureMeta(nth_nodes_motif(level, gpu=True, device=3),
                    {"m" + str(level)})
    }
    head_path = os.path.join(
        "size{}_p{}_directed{}_runs".format(size, p, directed),
        "run_" + str(run))
    dump_path = os.path.join(head_path, "motifs_gpu")
    graph = pickle.load(open(os.path.join(head_path, "gnx.pkl"), "rb"))
    logger = FileLogger("CalculationLogger" + str(level),
                        path=dump_path,
                        level=logging.DEBUG)
    raw_feature = GraphFeatures(gnx=graph,
                                features=feature_meta,
                                dir_path=dump_path,
                                logger=logger)
    raw_feature.build(should_dump=True)
예제 #10
0
def main(product_params, args):
    train_p = 50
    num_samples = 3

    config = {
        "hidden_layers": [70, 35],
        "dropout": KIPF_BASE["dropout"],
        "learning_rate": KIPF_BASE["lr"],
        "weight_decay": KIPF_BASE["weight_decay"],
        "epochs": args.epochs,
        "train_p": 0,
        "feat_type": "neighbors",
        "dataset": "firms",
        "seed": 12345678
    }

    products_path = os.path.join(PROJ_DIR, "logs", config["dataset"],
                                 time.strftime("%Y_%m_%d_%H_%M_%S"))
    if not os.path.exists(products_path):
        os.makedirs(products_path)

    logger = multi_logger([
        PrintLogger("IdansLogger", level=logging.INFO),
        FileLogger("results_%s" % config["dataset"],
                   path=products_path,
                   level=logging.INFO),
        FileLogger("results_%s_all" % config["dataset"],
                   path=products_path,
                   level=logging.DEBUG),
    ],
                          name=None)

    # data_logger = CSVLogger("results_%s" % config["dataset"], path=products_path)
    # all_args = set(config).union(map(at(0), product_params))
    # data_logger.info("name", "loss", "accuracy", *sorted(all_args))

    runner = ModelRunner(DATA_PATH, args.cuda, logger, None)  # data_logger)

    train_p /= 100.
    config["test_p"] = 1 - train_p
    config["train_p"] = train_p

    # for train_p in [5]:  # + list(range(5, 100, 10)):
    for pars in product(*map(at(1), product_params)):
        current_params = list(zip(map(at(0), product_params), pars))
        # cur_seed = 214899513 # random.randint(1, 1000000000)
        cur_seed = random.randint(1, 1000000000)
        current_params.append(("seed", cur_seed))
        config.update(current_params)

        if "seed" in config:
            np.random.seed(config["seed"])
            torch.manual_seed(config["seed"])
            if args.cuda is not None:
                torch.cuda.manual_seed(config["seed"])

        config_args = sorted(config.items(), key=at(0))
        logger.info(
            "Arguments: (train %1.2f) " +
            ", ".join("%s: %s" % (name, val) for name, val in current_params),
            train_p)
        res = []
        for _ in range(num_samples):
            res.append(runner.run(config_args))

        # res = [runner.run(config) for _ in range(num_samples)]
        pickle.dump({
            "params": current_params,
            "res": res
        }, open(os.path.join(products_path, "quant_res.pkl"), "ab"))
예제 #11
0
def build_model(training_data,
                training_adj,
                training_labels,
                eval_data,
                eval_adj,
                eval_labels,
                test_data,
                test_adj,
                test_labels,
                hidden_layers,
                activations,
                optimizer,
                epochs,
                dropout,
                lr,
                l2_pen,
                coeffs,
                unary,
                class_weights,
                graph_params,
                dumping_name,
                edge_normalization="correct",
                early_stop=True,
                is_nni=False,
                device=1):
    if coeffs is None:
        coeffs = [1., 0., 0.]
    conf = {
        "hidden_layers": hidden_layers,
        "dropout": dropout,
        "lr": lr,
        "weight_decay": l2_pen,
        "training_mat": training_data,
        "training_adj": training_adj,
        "training_labels": training_labels,
        "eval_mat": eval_data,
        "eval_adj": eval_adj,
        "eval_labels": eval_labels,
        "test_mat": test_data,
        "test_adj": test_adj,
        "test_labels": test_labels,
        "optimizer": optimizer,
        "epochs": epochs,
        "activations": activations,
        "loss_coeffs": coeffs,
        "unary": unary,
        "edge_normalization": edge_normalization
    }

    products_path = os.path.join(os.getcwd(), "logs", *dumping_name,
                                 time.strftime("%Y%m%d_%H%M%S"))
    if not os.path.exists(products_path):
        os.makedirs(products_path)

    logger = multi_logger([
        PrintLogger("MyLogger", level=logging.DEBUG),
        FileLogger("results_" + dumping_name[1],
                   path=products_path,
                   level=logging.INFO)
    ],
                          name=None)

    runner = ModelRunner(conf,
                         logger=logger,
                         weights=class_weights,
                         graph_params=graph_params,
                         early_stop=early_stop,
                         is_nni=is_nni,
                         tmp_path=products_path,
                         device=device)
    return runner
예제 #12
0
def main():
    args = parse_args()
    dataset = "cora"  # args.dataset

    seed = random.randint(1, 1000000000)

    conf = {
        "kipf": {
            "hidden": 16,
            "dropout": 0.5,
            "lr": 0.01,
            "weight_decay": 5e-4
        },
        "hidden_layers": [16],
        "multi_hidden_layers": [100, 20],
        "dropout": 0.6,
        "lr": 0.01,
        "weight_decay": 0.001,
        "dataset": dataset,
        "epochs": args.epochs,
        "cuda": args.cuda,
        "fastmode": args.fastmode,
        "seed": seed
    }

    init_seed(conf['seed'], conf['cuda'])
    dataset_path = os.path.join(PROJ_DIR, "data", dataset)

    products_path = os.path.join(CUR_DIR, "logs", dataset,
                                 time.strftime("%Y_%m_%d_%H_%M_%S"))
    if not os.path.exists(products_path):
        os.makedirs(products_path)

    logger = multi_logger([
        PrintLogger("IdansLogger", level=logging.INFO),
        FileLogger("results_%s" % conf["dataset"],
                   path=products_path,
                   level=logging.INFO),
        FileLogger("results_%s_all" % conf["dataset"],
                   path=products_path,
                   level=logging.DEBUG),
    ],
                          name=None)

    data_logger = CSVLogger("results_%s" % conf["dataset"], path=products_path)
    data_logger.set_titles("model_name", "loss", "acc", "train_p", "norm_adj",
                           "feat_type")

    num_iter = 5
    for norm_adj in [True, False]:
        conf["norm_adj"] = norm_adj
        runner = ModelRunner(products_path,
                             dataset_path,
                             conf,
                             logger=logger,
                             data_logger=data_logger)

        for train_p in chain([1], range(5, 90, 10)):
            conf["train_p"] = train_p

            train_p /= 100
            val_p = test_p = (1 - train_p) / 2.
            train_p /= (val_p + train_p)

            runner.loader.split_test(test_p)

            for ft, feat_type in enumerate(
                ["combined", "neighbors", "features"]):
                conf["feat_type"] = feat_type
                results = [
                    runner.run(train_p, feat_type) for _ in range(num_iter)
                ]
                conf_path = os.path.join(
                    runner.products_path, "t%d_n%d_ft%d.pkl" % (
                        conf["train_p"],
                        norm_adj,
                        ft,
                    ))
                pickle.dump({
                    "res": results,
                    "conf": conf
                }, open(conf_path, "wb"))

    logger.info("Finished")