Beispiel #1
0
def main_ensemble(data, num_features, num_class):
    torch.backends.cudnn.deterministic = True
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    x = load_data(file_path("AOE_ENSEMBLE.data"))
    model = ModelEnsemble(num_features=num_features, num_class=num_class)

    data.split_train_valid()
    model = model.to(device)
    mask_train, mask_valid, mask_test, y = data.mask_train, data.mask_valid, data.mask_test, data.y
    mask_train = mask_train.to(device)
    mask_valid = mask_valid.to(device)
    mask_test = mask_test.to(device)
    y = y.to(device)
    for i in range(len(x)):
        x[i] = x[i].to(device)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=0.005,
                                 weight_decay=5e-4)

    epoch = 1
    loss_train = float("inf")
    loss_valid = float("inf")
    best_loss_train = float("inf")
    best_loss_valid = float("inf")
    best_result = None
    best_epoch = 0
    while best_epoch + 10 >= epoch:
        model.train()
        optimizer.zero_grad()
        predict = model(x)
        loss_train = nll_loss(predict[mask_train], y[mask_train])
        loss_valid = nll_loss(predict[mask_valid], y[mask_valid])
        loss_train.backward()
        optimizer.step()
        if loss_valid < best_loss_valid:
            best_loss_train = loss_train
            best_loss_valid = loss_valid
            best_result = predict
            best_epoch = epoch
        epoch += 1

    return best_result[mask_test].max(1)[1].cpu().numpy().flatten()
Beispiel #2
0
from tools import file_path

result = []
for line in open(file_path("result.txt")):
    if 'Average' in line:
        result.append(line.split()[-1])
assert len(result) % 6 == 0
for i in range(int(len(result) / 6)):
    print("{0}\t{1}\t{2}\t{3}\t{4}\t{5}".format(
        result[i * 6], result[i * 6 + 1], result[i * 6 + 2], result[i * 6 + 3],
        result[i * 6 + 4], result[i * 6 + 5]))
Beispiel #3
0
    def train_predict(data, time_budget, n_class, schema):
        start_time = time.time()
        LOGGER.info("Start!")
        LOGGER.info("time_budget: {0}".format(time_budget))

        data = generate_data(data, LOGGER)
        LOGGER.info("Num of features: {0}".format(data.num_features))
        LOGGER.info("Num of classes: {0}".format(data.num_class))
        params = [
            Param("ModelGCN", [1, [16, 16], "leaky_relu"]),
            Param("ModelGCN", [1, [32, 32], "leaky_relu"]),
            Param("ModelGAT", [1, [32, 32], "leaky_relu"]),
            Param("ModelGAT4", [1, [32, 32], "leaky_relu"]),
            Param("ModelGCN", [1, [64, 64], "leaky_relu"]),
            Param("ModelGAT", [1, [64, 64], "leaky_relu"]),
            Param("ModelGAT4", [1, [64, 64], "leaky_relu"]),
            Param("ModelGCN", [1, [128, 128], "leaky_relu"]),
            Param("ModelGAT", [1, [128, 128], "leaky_relu"]),
            Param("ModelGAT4", [1, [128, 128], "leaky_relu"]),
            Param("ModelGCN", [1, [256, 256], "leaky_relu"]),
            Param("ModelGAT", [1, [256, 256], "leaky_relu"]),
            Param("ModelGAT4", [1, [256, 256], "leaky_relu"]),
            Param("ModelGCN", [2, [16, 16, 16], "leaky_relu"]),
            Param("ModelGCN", [2, [32, 32, 32], "leaky_relu"]),
            Param("ModelGCN", [2, [64, 64, 64], "leaky_relu"]),
            Param("ModelGCN", [2, [128, 128, 128], "leaky_relu"]),
            Param("ModelGCN", [2, [256, 256, 256], "leaky_relu"]),
            Param("ModelGCN", [3, [16, 16, 16, 16], "leaky_relu"]),
            Param("ModelGCN", [3, [32, 32, 32, 32], "leaky_relu"]),
            Param("ModelGCN", [3, [64, 64, 64, 64], "leaky_relu"]),
            Param("ModelGCN", [3, [128, 128, 128, 128], "leaky_relu"]),
            Param("ModelGCN", [3, [256, 256, 256, 256], "leaky_relu"]),
            Param("ModelGCN", [1, [16, 16], "relu"]),
            Param("ModelGCN", [1, [32, 32], "relu"]),
            Param("ModelGAT", [1, [32, 32], "relu"]),
            Param("ModelGAT4", [1, [32, 32], "relu"]),
            Param("ModelGCN", [1, [64, 64], "relu"]),
            Param("ModelGAT", [1, [64, 64], "relu"]),
            Param("ModelGAT4", [1, [64, 64], "relu"]),
            Param("ModelGCN", [1, [128, 128], "relu"]),
            Param("ModelGAT", [1, [128, 128], "relu"]),
            Param("ModelGAT4", [1, [128, 128], "relu"]),
            Param("ModelGCN", [1, [256, 256], "relu"]),
            Param("ModelGAT", [1, [256, 256], "relu"]),
            Param("ModelGAT4", [1, [256, 256], "relu"]),
            Param("ModelGCN", [2, [16, 16, 16], "relu"]),
            Param("ModelGCN", [2, [32, 32, 32], "relu"]),
            Param("ModelGCN", [2, [64, 64, 64], "relu"]),
            Param("ModelGCN", [2, [128, 128, 128], "relu"]),
            Param("ModelGCN", [2, [256, 256, 256], "relu"]),
            Param("ModelGCN", [3, [16, 16, 16, 16], "relu"]),
            Param("ModelGCN", [3, [32, 32, 32, 32], "relu"]),
            Param("ModelGCN", [3, [64, 64, 64, 64], "relu"]),
            Param("ModelGCN", [3, [128, 128, 128, 128], "relu"]),
            Param("ModelGCN", [3, [256, 256, 256, 256], "relu"]),
            Param("ModelGCN", [1, [16, 16], "leaky_relu"]),
            Param("ModelGCN", [1, [32, 32], "leaky_relu"]),
            Param("ModelGAT", [1, [32, 32], "leaky_relu"]),
            Param("ModelGAT4", [1, [32, 32], "leaky_relu"]),
            Param("ModelGCN", [1, [64, 64], "leaky_relu"]),
            Param("ModelGAT", [1, [64, 64], "leaky_relu"]),
            Param("ModelGAT4", [1, [64, 64], "leaky_relu"]),
            Param("ModelGCN", [1, [128, 128], "leaky_relu"]),
            Param("ModelGAT", [1, [128, 128], "leaky_relu"]),
            Param("ModelGAT4", [1, [128, 128], "leaky_relu"]),
            Param("ModelGCN", [1, [256, 256], "leaky_relu"]),
            Param("ModelGAT", [1, [256, 256], "leaky_relu"]),
            Param("ModelGAT4", [1, [256, 256], "leaky_relu"]),
            Param("ModelGCN", [2, [16, 16, 16], "leaky_relu"]),
            Param("ModelGCN", [2, [32, 32, 32], "leaky_relu"]),
            Param("ModelGCN", [2, [64, 64, 64], "leaky_relu"]),
            Param("ModelGCN", [2, [128, 128, 128], "leaky_relu"]),
            Param("ModelGCN", [2, [256, 256, 256], "leaky_relu"]),
            Param("ModelGCN", [3, [16, 16, 16, 16], "leaky_relu"]),
            Param("ModelGCN", [3, [32, 32, 32, 32], "leaky_relu"]),
            Param("ModelGCN", [3, [64, 64, 64, 64], "leaky_relu"]),
            Param("ModelGCN", [3, [128, 128, 128, 128], "leaky_relu"]),
            Param("ModelGCN", [3, [256, 256, 256, 256], "leaky_relu"]),
            Param("ModelGCN", [1, [16, 16], "relu"]),
            Param("ModelGCN", [1, [32, 32], "relu"]),
            Param("ModelGAT", [1, [32, 32], "relu"]),
            Param("ModelGAT4", [1, [32, 32], "relu"]),
            Param("ModelGCN", [1, [64, 64], "relu"]),
            Param("ModelGAT", [1, [64, 64], "relu"]),
            Param("ModelGAT4", [1, [64, 64], "relu"]),
            Param("ModelGCN", [1, [128, 128], "relu"]),
            Param("ModelGAT", [1, [128, 128], "relu"]),
            Param("ModelGAT4", [1, [128, 128], "relu"]),
            Param("ModelGCN", [1, [256, 256], "relu"]),
            Param("ModelGAT", [1, [256, 256], "relu"]),
            Param("ModelGAT4", [1, [256, 256], "relu"]),
            Param("ModelGCN", [2, [16, 16, 16], "relu"]),
            Param("ModelGCN", [2, [32, 32, 32], "relu"]),
            Param("ModelGCN", [2, [64, 64, 64], "relu"]),
            Param("ModelGCN", [2, [128, 128, 128], "relu"]),
            Param("ModelGCN", [2, [256, 256, 256], "relu"]),
            Param("ModelGCN", [3, [16, 16, 16, 16], "relu"]),
            Param("ModelGCN", [3, [32, 32, 32, 32], "relu"]),
            Param("ModelGCN", [3, [64, 64, 64, 64], "relu"]),
            Param("ModelGCN", [3, [128, 128, 128, 128], "relu"]),
            Param("ModelGCN", [3, [256, 256, 256, 256], "relu"]),
            Param("ModelGCN", [1, [16, 16], "leaky_relu"]),
            Param("ModelGCN", [1, [32, 32], "leaky_relu"]),
            Param("ModelGAT", [1, [32, 32], "leaky_relu"]),
            Param("ModelGAT4", [1, [32, 32], "leaky_relu"]),
            Param("ModelGCN", [1, [64, 64], "leaky_relu"]),
            Param("ModelGAT", [1, [64, 64], "leaky_relu"]),
            Param("ModelGAT4", [1, [64, 64], "leaky_relu"]),
            Param("ModelGCN", [1, [128, 128], "leaky_relu"]),
            Param("ModelGAT", [1, [128, 128], "leaky_relu"]),
            Param("ModelGAT4", [1, [128, 128], "leaky_relu"]),
            Param("ModelGCN", [1, [256, 256], "leaky_relu"]),
            Param("ModelGAT", [1, [256, 256], "leaky_relu"]),
            Param("ModelGAT4", [1, [256, 256], "leaky_relu"]),
            Param("ModelGCN", [2, [16, 16, 16], "leaky_relu"]),
            Param("ModelGCN", [2, [32, 32, 32], "leaky_relu"]),
            Param("ModelGCN", [2, [64, 64, 64], "leaky_relu"]),
            Param("ModelGCN", [2, [128, 128, 128], "leaky_relu"]),
            Param("ModelGCN", [2, [256, 256, 256], "leaky_relu"]),
            Param("ModelGCN", [3, [16, 16, 16, 16], "leaky_relu"]),
            Param("ModelGCN", [3, [32, 32, 32, 32], "leaky_relu"]),
            Param("ModelGCN", [3, [64, 64, 64, 64], "leaky_relu"]),
            Param("ModelGCN", [3, [128, 128, 128, 128], "leaky_relu"]),
            Param("ModelGCN", [3, [256, 256, 256, 256], "leaky_relu"]),
            Param("ModelGCN", [1, [16, 16], "relu"]),
            Param("ModelGCN", [1, [32, 32], "relu"]),
            Param("ModelGAT", [1, [32, 32], "relu"]),
            Param("ModelGAT4", [1, [32, 32], "relu"]),
            Param("ModelGCN", [1, [64, 64], "relu"]),
            Param("ModelGAT", [1, [64, 64], "relu"]),
            Param("ModelGAT4", [1, [64, 64], "relu"]),
            Param("ModelGCN", [1, [128, 128], "relu"]),
            Param("ModelGAT", [1, [128, 128], "relu"]),
            Param("ModelGAT4", [1, [128, 128], "relu"]),
            Param("ModelGCN", [1, [256, 256], "relu"]),
            Param("ModelGAT", [1, [256, 256], "relu"]),
            Param("ModelGAT4", [1, [256, 256], "relu"]),
            Param("ModelGCN", [2, [16, 16, 16], "relu"]),
            Param("ModelGCN", [2, [32, 32, 32], "relu"]),
            Param("ModelGCN", [2, [64, 64, 64], "relu"]),
            Param("ModelGCN", [2, [128, 128, 128], "relu"]),
            Param("ModelGCN", [2, [256, 256, 256], "relu"]),
            Param("ModelGCN", [3, [16, 16, 16, 16], "relu"]),
            Param("ModelGCN", [3, [32, 32, 32, 32], "relu"]),
            Param("ModelGCN", [3, [64, 64, 64, 64], "relu"]),
            Param("ModelGCN", [3, [128, 128, 128, 128], "relu"]),
            Param("ModelGCN", [3, [256, 256, 256, 256], "relu"]),
            Param("ModelGCN", [1, [16, 16], "leaky_relu"]),
            Param("ModelGCN", [1, [32, 32], "leaky_relu"]),
            Param("ModelGAT", [1, [32, 32], "leaky_relu"]),
            Param("ModelGAT4", [1, [32, 32], "leaky_relu"]),
            Param("ModelGCN", [1, [64, 64], "leaky_relu"]),
            Param("ModelGAT", [1, [64, 64], "leaky_relu"]),
            Param("ModelGAT4", [1, [64, 64], "leaky_relu"]),
            Param("ModelGCN", [1, [128, 128], "leaky_relu"]),
            Param("ModelGAT", [1, [128, 128], "leaky_relu"]),
            Param("ModelGAT4", [1, [128, 128], "leaky_relu"]),
            Param("ModelGCN", [1, [256, 256], "leaky_relu"]),
            Param("ModelGAT", [1, [256, 256], "leaky_relu"]),
            Param("ModelGAT4", [1, [256, 256], "leaky_relu"]),
            Param("ModelGCN", [2, [16, 16, 16], "leaky_relu"]),
            Param("ModelGCN", [2, [32, 32, 32], "leaky_relu"]),
            Param("ModelGCN", [2, [64, 64, 64], "leaky_relu"]),
            Param("ModelGCN", [2, [128, 128, 128], "leaky_relu"]),
            Param("ModelGCN", [2, [256, 256, 256], "leaky_relu"]),
            Param("ModelGCN", [3, [16, 16, 16, 16], "leaky_relu"]),
            Param("ModelGCN", [3, [32, 32, 32, 32], "leaky_relu"]),
            Param("ModelGCN", [3, [64, 64, 64, 64], "leaky_relu"]),
            Param("ModelGCN", [3, [128, 128, 128, 128], "leaky_relu"]),
            Param("ModelGCN", [3, [256, 256, 256, 256], "leaky_relu"]),
            Param("ModelGCN", [1, [16, 16], "relu"]),
            Param("ModelGCN", [1, [32, 32], "relu"]),
            Param("ModelGAT", [1, [32, 32], "relu"]),
            Param("ModelGAT4", [1, [32, 32], "relu"]),
            Param("ModelGCN", [1, [64, 64], "relu"]),
            Param("ModelGAT", [1, [64, 64], "relu"]),
            Param("ModelGAT4", [1, [64, 64], "relu"]),
            Param("ModelGCN", [1, [128, 128], "relu"]),
            Param("ModelGAT", [1, [128, 128], "relu"]),
            Param("ModelGAT4", [1, [128, 128], "relu"]),
            Param("ModelGCN", [1, [256, 256], "relu"]),
            Param("ModelGAT", [1, [256, 256], "relu"]),
            Param("ModelGAT4", [1, [256, 256], "relu"]),
            Param("ModelGCN", [2, [16, 16, 16], "relu"]),
            Param("ModelGCN", [2, [32, 32, 32], "relu"]),
            Param("ModelGCN", [2, [64, 64, 64], "relu"]),
            Param("ModelGCN", [2, [128, 128, 128], "relu"]),
            Param("ModelGCN", [2, [256, 256, 256], "relu"]),
            Param("ModelGCN", [3, [16, 16, 16, 16], "relu"]),
            Param("ModelGCN", [3, [32, 32, 32, 32], "relu"]),
            Param("ModelGCN", [3, [64, 64, 64, 64], "relu"]),
            Param("ModelGCN", [3, [128, 128, 128, 128], "relu"]),
            Param("ModelGCN", [3, [256, 256, 256, 256], "relu"]),
        ]

        logger_killed_model_process = [True for _ in range(max_num_parallel)]
        params_running = [None for _ in range(max_num_parallel)]
        while True:
            for i in range(max_num_parallel):
                if time.time() - start_time >= time_budget - 5:
                    break
                if not is_subprocess_alive(pid_model[i]):
                    if logger_killed_model_process[i]:
                        LOGGER.info(
                            "Model process {0} has been killed".format(i))
                        if params_running[i]:
                            params_running[i].running = False
                            params_running[
                                i].retry = params_running[i].retry - 1
                        logger_killed_model_process[i] = False
                if os.path.exists(file_path("AOE_MODEL_{0}.result".format(i))):
                    with FileLock(file_path("AOE_MODEL_{0}.lock".format(i))):
                        temp_result = load_data(
                            file_path("AOE_MODEL_{0}.result".format(i)))
                        if temp_result.result is None:
                            params_running[i].running = False
                            params_running[
                                i].retry = params_running[i].retry - 1
                            os.remove(
                                file_path(
                                    file_path(
                                        "AOE_MODEL_{0}.result".format(i))))
                            LOGGER.info("Result of Model {0} is None".format(
                                params_running[i].index))
                        else:
                            params[
                                params_running[i].index].result = temp_result
                            os.remove(
                                file_path(
                                    file_path(
                                        "AOE_MODEL_{0}.result".format(i))))
                            LOGGER.info(
                                "Get result of Model {0}, {1}, {2}, {3}, {4}".
                                format(
                                    params_running[i].index,
                                    "loss_train = {0:.6f}".format(
                                        params_running[i].result.loss_train),
                                    "loss_valid = {0:.6f}".format(
                                        params_running[i].result.loss_valid),
                                    "acc_train = {0:.6f}".format(
                                        params_running[i].result.acc_train),
                                    "acc_valid = {0:.6f}".format(
                                        params_running[i].result.acc_valid)))
                if not os.path.exists(
                        file_path("AOE_MODEL_{0}.param".format(
                            i))) and not os.path.exists(
                                file_path("AOE_MODEL_{0}.result".format(i))):
                    with FileLock(file_path("AOE_MODEL_{0}.lock".format(i))):
                        for params_index in range(len(params)):
                            if not params[params_index].running and params[
                                    params_index].retry > 0:
                                params[params_index].index = params_index
                                params[params_index].running = True
                                params[
                                    params_index].time_budget = time_budget - (
                                        time.time() - start_time)
                                params_running[i] = params[params_index]
                                save_data(
                                    file_path("AOE_MODEL_{0}.param".format(i)),
                                    params[params_index])
                                LOGGER.info(
                                    "Start Model {0}".format(params_index))
                                break
            if time.time() - start_time >= time_budget - 5:
                break
            if_continue = False
            for i in range(len(params)):
                if params[i].result is None:
                    if_continue = True
                    break
            if not if_continue:
                break

        os.system(
            "kill -9 `ps -ef | grep AutoGraphModel.py | awk '{print $2}' `")
        LOGGER.info("Start merge the result")
        params_result = []
        for i in range(len(params)):
            if params[i].result is not None:
                params_result.append(params[i])
        LOGGER.info("Num of result: {0}".format(len(params_result)))
        for i in range(len(params_result)):
            for j in range(i + 1, len(params_result)):
                if params_result[i].result.acc_valid > params_result[
                        j].result.acc_valid:
                    params_result[i], params_result[j] = params_result[
                        j], params_result[i]
        params_result = params_result[-4:]
        # 下面这段话?
        # params_result.reverse()
        # for i in range(1, len(params_result)):
        #     if params_result[i].result.acc_valid + 0.01 < params_result[0].result.acc_valid:
        #         params_result = params_result[0:i]
        #         break
        # params_result.reverse()
        # 上面这段话?
        for param in params_result:
            LOGGER.info("Final Model {0} {1}".format(param.index, param.model))
        result = [item.result.result for item in params_result]

        # ensemble
        torch.backends.cudnn.deterministic = True
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        model = ModelEnsemble(num_features=data.num_features,
                              num_class=data.num_class)
        data.split_train_valid()
        model = model.to(device)
        mask_train, mask_valid, mask_test, y = data.mask_train, data.mask_valid, data.mask_test, data.y
        mask_train = mask_train.to(device)
        mask_valid = mask_valid.to(device)
        mask_test = mask_test.to(device)
        y = y.to(device)
        for i in range(len(result)):
            result[i] = result[i].to(device)
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=0.005,
                                     weight_decay=5e-4)

        epoch = 1
        best_loss_train = float("inf")
        best_loss_valid = float("inf")
        best_result = None
        best_epoch = 0
        while best_epoch + 10 >= epoch:
            model.train()
            optimizer.zero_grad()
            predict = model(result)
            loss_train = nll_loss(predict[mask_train], y[mask_train])
            loss_valid = nll_loss(predict[mask_valid], y[mask_valid])
            loss_train.backward()
            optimizer.step()
            if loss_valid < best_loss_valid:
                best_loss_train = loss_train
                best_loss_valid = loss_valid
                best_result = predict
                best_epoch = epoch
            epoch += 1
        LOGGER.info("Finish merge the result")
        return best_result[mask_test].max(1)[1].cpu().numpy().flatten()
Beispiel #4
0
 start_time = time.time()
 time_budget = float("inf")
 parser = argparse.ArgumentParser()
 parser.add_argument("--index", type=int)
 parser.add_argument("--file_param", type=str)
 parser.add_argument("--file_ready", type=str)
 parser.add_argument("--file_result", type=str)
 parser.add_argument("--file_lock", type=str)
 args = parser.parse_args()
 aoe_data = None
 if torch.cuda.is_available():
     torch.zeros(1).cuda()
 with FileLock(args.file_lock):
     save_data(args.file_ready, os.getpid())
 while True:
     if aoe_data is None and os.path.exists(file_path("AOE.data")):
         with FileLock(file_path("AOE.ready")):
             aoe_data = load_data(file_path("AOE.data"))
     if os.path.exists(args.file_param):
         if aoe_data is None and os.path.exists(file_path("AOE.data")):
             with FileLock(file_path("AOE.ready")):
                 aoe_data = load_data(file_path("AOE.data"))
         start_time = time.time()  # 重置开始时间
         with FileLock(args.file_lock):
             param = load_data(args.file_param)
         time_budget = param.time_budget  # 重置时间限制
         assert param.model == "ModelEnsemble"
         result = main_ensemble(data=aoe_data,
                                num_features=param.num_features,
                                num_class=param.num_class)
         with FileLock(args.file_lock):
Beispiel #5
0
import torch
from common import get_logger
from filelock import FileLock
from torch.nn.functional import nll_loss

from AutoGraphEnsemble import ModelEnsemble
from Param import Param
from tools import file_path, save_data, load_data, generate_data, is_subprocess_alive

VERBOSITY_LEVEL = 'INFO'
LOGGER = get_logger(VERBOSITY_LEVEL, __file__)

for root, dirs, files in os.walk(os.path.dirname(os.path.realpath(__file__))):
    for file in files:
        if file.startswith("AOE"):
            os.remove(file_path(file))
os.system("kill -9 `ps -ef | grep AutoGraphModel.py | awk '{print $2}' `")

max_num_parallel = 4
pid_model = []
pid_ensemble = None
for k in range(max_num_parallel):
    os.system("python {0} {1} {2} {3} {4} {5} {6} &".format(
        file_path("AutoGraphModel.py"),
        "--index {0}".format(k),
        "--file_param {0}".format(file_path("AOE_MODEL_{0}.param".format(k))),
        "--file_ready {0}".format(file_path("AOE_MODEL_{0}.ready".format(k))),
        "--file_result {0}".format(file_path(
            "AOE_MODEL_{0}.result".format(k))),
        "--file_lock {0}".format(file_path("AOE_MODEL_{0}.lock".format(k))),
        "--if_kill {0}".format(1 if k != 0 else 0),