def __init__(self, device, trainData, validData, args):
        self.device = device
        self.history = {'train': [], 'valid': []}
        self.trainData = trainData
        self.validData = validData

        self.fadding_model = SimpleNet(input_size=9,
                                       output_size=12,
                                       hidden_size=args.hidden_size).to(device)
        self.fadding_model.load_state_dict(
            torch.load("model0.33/model.pkl.904"))
        self.fixed_model = SimpleNet(input_size=9,
                                     output_size=12,
                                     hidden_size=args.hidden_size).to(device)
        self.fixed_model.load_state_dict(torch.load("model0.33/model.pkl.904"))

        self.criteria = torch.nn.MSELoss()
        self.opt = torch.optim.AdamW(self.fadding_model.parameters(),
                                     lr=8e-5,
                                     weight_decay=9e-3)
        # self.scheduler = scheduler = torch.optim.lr_scheduler.StepLR(self.opt, step_size=200, gamma=args.step_lr)
        self.batch_size = args.batch_size
        self.model_dir = args.arch
        if not os.path.exists(self.model_dir):
            os.makedirs(self.model_dir)
        self.best_val = 0.0
Esempio n. 2
0
 def __init__(self,
              trainData,
              validData,
              hidden_size,
              device,
              model_dir="model"):
     self.history = {'train': [], 'valid': []}
     self.trainData = trainData
     self.validData = validData
     self.classficationA = SimpleNet(input_size=8,
                                     output_size=12,
                                     hidden_size=hidden_size).to(device)
     self.classficationB = SimpleNet(input_size=9,
                                     output_size=12,
                                     hidden_size=hidden_size).to(device)
     self.criterion = nn.CrossEntropyLoss()
     self.mse_loss = nn.MSELoss()
     self.opt_C_A = torch.optim.Adam(self.classficationA.parameters(),
                                     lr=1e-4)
     self.opt_C_B = torch.optim.Adam(self.classficationB.parameters(),
                                     lr=1e-4)
     self.device = device
     self.model_dir = model_dir
     if not os.path.exists(self.model_dir):
         os.makedirs(self.model_dir)
     self.best_val = 0.0
 def __init__(self,
              trainData,
              validData,
              hidden_size,
              device,
              model_dir="model"):
     self.history = {'train': [], 'valid': []}
     self.trainData = trainData
     self.validData = validData
     self.generator = Generator(input_size=8,
                                output_size=1,
                                hidden_size=hidden_size).to(device)
     self.discriminator = Discriminator(input_size=1,
                                        output_size=1,
                                        hidden_size=hidden_size).to(device)
     self.classfication = SimpleNet(input_size=9,
                                    output_size=12,
                                    hidden_size=hidden_size).to(device)
     self.adversarial_loss = nn.BCEWithLogitsLoss()
     self.criterion = nn.CrossEntropyLoss()
     self.opt_G = torch.optim.Adam(self.generator.parameters(), lr=1e-4)
     self.opt_D = torch.optim.Adam(self.discriminator.parameters(), lr=1e-4)
     self.opt_C = torch.optim.Adam(self.classfication.parameters(), lr=1e-4)
     self.device = device
     self.model_dir = model_dir
     if not os.path.exists(self.model_dir):
         os.makedirs(self.model_dir)
     self.best_val = 0.0
 def __init__(self, device, trainData, validData, args):
     self.device = device
     self.history = {'train': [], 'valid': []}
     self.trainData = trainData
     self.validData = validData
     self.model = SimpleNet(input_size=9, output_size=12, hidden_size=args.hidden_size).to(device)
     self.criteria = torch.nn.CrossEntropyLoss()
     self.opt = torch.optim.AdamW(self.model.parameters(), lr=args.lr, weight_decay=3.3e-1)
     self.scheduler = scheduler = torch.optim.lr_scheduler.StepLR(self.opt, step_size=200, gamma=args.step_lr)
     self.batch_size = args.batch_size
     self.model_dir = args.arch
     if not os.path.exists(self.model_dir):
         os.makedirs(self.model_dir)
     self.best_val = 0.0
def model_fn(model_dir):
    """Load the PyTorch model from the `model_dir` directory."""
    print("Loading model.")

    # First, load the parameters used to create the model.
    model_info = {}
    model_info_path = os.path.join(model_dir, 'model_info.pth')
    with open(model_info_path, 'rb') as f:
        model_info = torch.load(f)

    print("model_info: {}".format(model_info))

    # Determine the device and construct the model.
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = SimpleNet(model_info['input_dim'], model_info['hidden_dim'],
                      model_info['num_hidden'], model_info['output_dim'])

    # Load the stored model parameters.
    model_path = os.path.join(model_dir, 'model.pth')
    with open(model_path, 'rb') as f:
        model.load_state_dict(torch.load(f))

    # prep for testing
    model.to(device).eval()

    print("Done loading model.")
    return model
Esempio n. 6
0
def model_fn(model_dir):
    logger.info("Loading model.")

    # First, load the parameters used to create the model.
    model_info = {}
    model_info_path = os.path.join(model_dir, 'model_info.pth')
    with open(model_info_path, 'rb') as f:
        model_info = torch.load(f)

    logger.info("model_info: {}".format(model_info))

    # Determine the device and construct the model.
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = SimpleNet(model_info['input_dim'], model_info['hidden_dim'],
                      model_info['output_dim'])

    # Load the stored model parameters.
    model_path = os.path.join(model_dir, 'model.pth')
    with open(model_path, 'rb') as f:
        model.load_state_dict(torch.load(f))

    return model.to(device)
    def train(self, batch_size, epochs):
        num_classes = 10
        if self.isLogEnabled:
            print('Trainning MNIST!')
        if K.image_data_format() == 'channels_first':
            self.x_train = self.x_train.reshape(self.x_train.shape[0], 1,
                                                self.img_rows, self.img_cols)
            self.x_test = self.x_test.reshape(self.x_test.shape[0], 1,
                                              self.img_rows, self.img_cols)
            input_shape = (1, self.img_rows, self.img_cols)
        else:
            self.x_train = self.x_train.reshape(self.x_train.shape[0],
                                                self.img_rows, self.img_cols,
                                                1)
            self.x_test = self.x_test.reshape(self.x_test.shape[0],
                                              self.img_rows, self.img_cols, 1)
            input_shape = (self.img_rows, self.img_cols, 1)

        self.x_train = self.x_train.astype('float32')
        self.x_test = self.x_test.astype('float32')
        self.x_train /= 255
        self.x_test /= 255
        if self.isLogEnabled:
            print('x_train shape:', self.x_train.shape)
            print(self.x_train.shape[0], 'train samples')
            print(self.x_test.shape[0], 'test samples')

        # convert class vectors to binary class matrices
        self.y_train = keras.utils.to_categorical(self.y_train, num_classes)
        self.y_test = keras.utils.to_categorical(self.y_test, num_classes)

        self.model = SimpleNet.build(num_classes, input_shape)
        self.model.compile(loss=keras.losses.categorical_crossentropy,
                           optimizer=keras.optimizers.Adadelta(),
                           metrics=['accuracy'])

        self.model.fit(self.x_train,
                       self.y_train,
                       batch_size=batch_size,
                       epochs=epochs,
                       verbose=1,
                       validation_data=(self.x_test, self.y_test))
        score = self.model.evaluate(self.x_test, self.y_test, verbose=0)
        if self.isLogEnabled:
            print('Test loss:', score[0])
            print('Test accuracy:', score[1])
Esempio n. 8
0
def main():
    global actor_critic, directory, weight
    num_cls = args.wave_num * args.k + 1  # 所有的路由和波长选择组合,加上啥都不选

    if args.append_route.startswith("True"):
        channel_num = args.wave_num+args.k
    else:
        channel_num = args.wave_num

    # 解析weight
    if args.weight.startswith('None'):
        weight = None
    else:
        weight = args.weight

    # CNN学习模式下,osb的shape应该是CHW
    assert args.mode.startswith('learning')
    # 模型初始化
    if args.cnn.startswith('mobilenetv2'):
        actor_critic = MobileNetV2(in_channels=channel_num, num_classes=num_cls, t=6)
    elif args.cnn.startswith('simplenet'):
        actor_critic = SimpleNet(in_channels=channel_num, num_classes=num_cls)
    elif args.cnn.startswith('simplestnet'):
        actor_critic = SimplestNet(in_channels=channel_num, num_classes=num_cls)
    elif args.cnn.startswith('alexnet'):
        actor_critic = AlexNet(in_channels=channel_num, num_classes=num_cls)
    elif args.cnn.startswith('squeezenet'):
        actor_critic = SqueezeNet(in_channels=channel_num, num_classes=num_cls, version=1.0)
    else:
        raise NotImplementedError

    times = 1  # 重复次数
    prefix = "trained_models"
    directory = os.path.join(prefix, 'a2c', args.cnn, args.step_over)

    if args.comp.startswith("states"):
        all_states_comp()
    elif args.comp.startswith("random"):
        random_comp(times=times)
    elif args.comp.startswith("None"):
        raise ValueError("Wrong call for this script")
    else:
        raise NotImplementedError
Esempio n. 9
0
hidden_layer = 256  # hidden layer dimension
batch_size = 4

# load data
train_data = EmotionDataset(root_dir='./train_test', dataset='train')
train_loader = DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True)
train_data_pytorch = torch.from_numpy(train_data.data).float().to(device)
train_label = train_data.labels.squeeze()

test_set = sio.loadmat(os.path.join(root_dir, 'test_data.mat'))['test_data']
test_label = (sio.loadmat(os.path.join(root_dir, 'test_label.mat'))['test_label'] + 1).squeeze()
test_data_pytorch = torch.from_numpy(test_set).float().to(device)
print("data load finished.")

# instantiate model
model = SimpleNet(310, hidden_layer, 3).to(device)


# training process
def train():
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=lr)

    if DRAW:
        loss_data = []
        acc_data = []

    start = time.time()
    for ep in range(max_epoch_num):
        # train
        model.train()
Esempio n. 10
0
    increases in computing performance by harnessing the power of the graphics processing unit (GPU).
    """
    # set the seed for generating random numbers
    torch.manual_seed(args.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(args.seed)

    # get train loader
    train_loader = _get_train_loader(args.batch_size,
                                     args.data_dir)  # data_dir from above..

    ## TODO:  Build the model by passing in the input params
    # To get params from the parser, call args.argument_name, ex. args.epochs or ards.hidden_dim
    # Don't forget to move your model .to(device) to move to GPU , if appropriate

    model = SimpleNet(args.input_dim, args.hidden_dim,
                      args.output_dim).to(device)  # Instantiate the model

    # Given: save the parameters used to construct the model
    save_model_params(model, args.model_dir)

    ## TODO: Define an optimizer and loss function for training

    optimizer = optim.Adam(
        model.parameters(),
        lr=args.lr)  # You can use stochastic gradient descent instead of Adam

    criterion = nn.BCELoss(
    )  # BCELoss() returns one value. Cannot use torch.nn.CrossEntropyLoss() because returns several values

    # Trains the model (given line of code, which calls the above training function)
    # This function *also* saves the model state dictionary
Esempio n. 11
0
                       transforms.Normalize((0.1307,), (0.3081,))
                   ]))
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True)

    test_dataset = datasets.MNIST(root='data', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ]))
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False) """

    train_loader = torch.utils.data.DataLoader(dataset('train'),
                                               batch_size=64,
                                               shuffle=True)
    test_loader = torch.utils.data.DataLoader(dataset('test'), batch_size=64)
    # net = SimpleNet_Bin(10)
    net = SimpleNet(10)
    net.cuda()
    optimizer = optim.Adam(net.parameters(),
                           lr=1e-2,
                           weight_decay=1e-6,
                           betas=(0.9, 0.999))
    criterion = nn.CrossEntropyLoss().cuda()
    criterion_test = nn.CrossEntropyLoss(reduction='sum').cuda()

    log_path = 'logs/bin'
    writer = SummaryWriter(log_dir=log_path)

    epoch_num = 20
    lr0 = 1e-4
    for epoch in range(epoch_num):
        current_lr = lr0 / 2**int(epoch / 4)
Esempio n. 12
0
                        help='level of verbosity (default: 0)')

    args = parser.parse_args()

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # set the seed for generating random numbers
    torch.manual_seed(args.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(args.seed)

    # get train loader
    train_loader = _get_train_loader(args.batch_size,
                                     args.data_dir)  # data_dir from above..

    model = SimpleNet(args.input_dim, args.hidden_dim, args.num_hidden,
                      args.output_dim)
    if args.verbosity:
        header('Model Architecture:')
        print(model)

    optimizer = optim.SGD(model.parameters(), lr=args.lr)
    criterion = nn.BCELoss(reduction='mean')

    if args.verbosity:
        header('Starting model training...')
    train(model, train_loader, args.epochs, optimizer, criterion, device)

    # Given: save the parameters used to construct the model
    save_model_params(model, args.model_dir)
Esempio n. 13
0
def main():
    logger = logging.getLogger(__name__)

    ## Load config file
    with open("config.json", "r") as f:
        config = json.load(f)

    ## Cleaning TensorBoard events
    clean_events(config)

    ## Load data
    data_loader = DataLoader(config)
    X_train, X_test, y_train, y_test = data_loader.get_data()

    ## Create placeholders
    X = tf.placeholder(tf.float64, [None, 13])
    # y = tf.placeholder(tf.float32, [None, 2])
    y = tf.placeholder(tf.float64, [None])

    ## Create model and outputs
    net = SimpleNet(config)
    net_output = net.forward(X)
    y_pred, log_sigma = net_output[..., 0], net_output[..., 1]
    # Track mean of log_sigma across batch of data
    tf.summary.scalar("mean_log_sigma", tf.reduce_mean(log_sigma))

    ## Define metrics based on experiment
    # Loss
    type_exp = '_'.join(config['exp_name'].split('_')[:2])
    if type_exp == 'vanilla_loss':
        loss = compute_loss(y_true=y, y_pred=y_pred)
    elif type_exp == 'loss_bnn':
        loss = compute_loss_bnn(y_true=y, y_pred=y_pred, log_sigma=log_sigma)

    # Root Mean Squared Error (RMSE)
    rmse = compute_rmse(y_true=y, y_pred=y_pred)

    ## Define optimizer
    optimizer = net.train_optimizer(loss)

    ## Merging all summaries
    merged_summary = tf.summary.merge_all()

    ## Launching the execution graph for training
    with tf.Session() as sess:
        # Initializing all variables
        sess.run(tf.global_variables_initializer())

        # Create train and test writer
        train_writer = tf.summary.FileWriter("./tensorboard/" +
                                             config["exp_name"] + "/train/")
        test_writer = tf.summary.FileWriter("./tensorboard/" +
                                            config["exp_name"] + "/test/")

        # Visualizing the Graph
        train_writer.add_graph(sess.graph)

        for epoch in range(config["trainer"]["num_epochs"]):
            for batch in range(config["trainer"]["num_iter_per_epoch"]):
                # Yield next batch of data
                batch_X, batch_y = next(
                    data_loader.get_next_batch(
                        config["trainer"]["batch_size"]))
                # Run the optimizer
                sess.run(optimizer, feed_dict={X: batch_X, y: batch_y})
                # Compute train loss and rmse
                train_loss, train_rmse = sess.run([loss, rmse],
                                                  feed_dict={
                                                      X: batch_X,
                                                      y: batch_y
                                                  })

            if (epoch % config["trainer"]["writer_step"] == 0):
                # Run the merged summary and write it to disk
                s = sess.run(merged_summary,
                             feed_dict={
                                 X: batch_X,
                                 y: batch_y
                             })

                train_writer.add_summary(s, (epoch + 1))

                # Evaluate test data
                test_loss, test_rmse = sess.run([loss, rmse],
                                                feed_dict={
                                                    X: X_test,
                                                    y: y_test
                                                })
                s = sess.run(merged_summary, feed_dict={X: X_test, y: y_test})

                test_writer.add_summary(s, (epoch + 1))

            if (epoch % config["trainer"]["display_step"] == 0):
                print("Epoch: {:03d},".format(epoch + 1), \
                         "train_loss= {:03f},".format(train_loss), \
                         "train_rmse= {:03f},".format(train_rmse), \
                         "test_loss= {:03f},".format(test_loss), \
                         "test_rmse={:03f}".format(test_rmse)
                         )

        print("Training complete")
Esempio n. 14
0
def main():
    """
    主程序
    :return:
    """
    num_cls = args.wave_num * args.k + 1  # 所有的路由和波长选择组合,加上啥都不选
    action_shape = 1  # action的维度,默认是1.
    num_updates = int(
        args.steps) // args.workers // args.num_steps  # 梯度一共需要更新的次数
    if args.append_route.startswith("True"):
        channel_num = args.wave_num + args.k
    else:
        channel_num = args.wave_num

    # 解析weight
    if args.weight.startswith('None'):
        weight = None
    else:
        weight = args.weight
    # 创建actor_critic
    if args.mode.startswith('alg'):
        # ksp(args, weight)
        return
    elif args.mode.startswith('learning'):
        # CNN学习模式下,osb的shape应该是CHW
        obs_shape = (channel_num, args.img_height, args.img_width)
        if args.cnn.startswith('mobilenetv2'):
            actor_critic = MobileNetV2(in_channels=channel_num,
                                       num_classes=num_cls,
                                       t=6)
        elif args.cnn.startswith('simplenet'):
            actor_critic = SimpleNet(in_channels=channel_num,
                                     num_classes=num_cls)
        elif args.cnn.startswith('simplestnet'):
            actor_critic = SimplestNet(in_channels=channel_num,
                                       num_classes=num_cls)
        elif args.cnn.startswith('alexnet'):
            actor_critic = AlexNet(in_channels=channel_num,
                                   num_classes=num_cls)
        elif args.cnn.startswith('squeezenet'):
            actor_critic = SqueezeNet(in_channels=channel_num,
                                      num_classes=num_cls,
                                      version=1.0)
        elif args.cnn.startswith('expandsimplenet'):
            actor_critic = ExpandSimpleNet(in_channels=channel_num,
                                           num_classes=num_cls,
                                           expand_factor=args.expand_factor)
        elif args.cnn.startswith('deepersimplenet'):
            actor_critic = DeeperSimpleNet(in_channels=channel_num,
                                           num_classes=num_cls,
                                           expand_factor=args.expand_factor)
        else:
            raise NotImplementedError

        # 创建optimizer
        if args.algo.startswith("a2c"):
            optimizer = optim.RMSprop(actor_critic.parameters(),
                                      lr=args.base_lr,
                                      eps=args.epsilon,
                                      alpha=args.alpha)
        elif args.algo.startswith("ppo"):
            optimizer = optim.Adam(actor_critic.parameters(),
                                   lr=args.base_lr,
                                   eps=args.epsilon)
        else:
            raise NotImplementedError
    else:
        raise NotImplementedError

    if args.cuda.startswith("True"):
        # 如果要使用cuda进行计算
        actor_critic.cuda()
        # actor_critic = DistModule(actor_critic)

    # 判断是否是评估模式
    if args.evaluate:
        print("evaluate mode")
        models = {}
        times = 1
        prefix = "trained_models"
        directory = os.path.join(prefix, 'a2c', args.cnn, args.step_over)
        env = RwaGame(net_config=args.net,
                      wave_num=args.wave_num,
                      rou=args.rou,
                      miu=args.miu,
                      max_iter=args.max_iter,
                      k=args.k,
                      mode=args.mode,
                      img_width=args.img_width,
                      img_height=args.img_height,
                      weight=weight,
                      step_over=args.step_over)

        for model_file in reversed(
                sorted(os.listdir(directory),
                       key=lambda item: int(item.split('.')[0]))):
            model_file = os.path.join(directory, model_file)
            print("evaluate model {}".format(model_file))
            params = torch.load(model_file)
            actor_critic.load_state_dict(params['state_dict'])
            actor_critic.eval()

            models[params['update_i']] = {}

            print("model loading is finished")
            for t in range(times):
                total_reward, total_services, allocated_services = 0, 0, 0
                obs, reward, done, info = env.reset()
                while not done:
                    inp = Variable(torch.Tensor(obs).unsqueeze(0),
                                   volatile=True)  # 禁止梯度更新
                    value, action, action_log_prob = actor_critic.act(
                        inputs=inp, deterministic=True)  # 确定性决策
                    action = action.data.numpy()[0]
                    obs, reward, done, info = env.step(action=action[0])
                    total_reward += reward
                    if reward == ARRIVAL_NEWPORT or reward == ARRIVAL_NOPORT:
                        allocated_services += 1
                    if args.step_over.startswith('one_time'):
                        if info:
                            total_services += 1
                    elif args.step_over.startswith('one_service'):
                        total_services += 1
                    else:
                        raise NotImplementedError
                models[params['update_i']]['time'] = t
                models[params['update_i']]['reward'] = total_reward
                models[params['update_i']]['total_services'] = total_services
                models[params['update_i']][
                    'allocated_services'] = allocated_services
                models[params['update_i']]['bp'] = (
                    total_services - allocated_services) / total_services
        # 输出仿真结果
        # print("|updated model|test index|reward|bp|total services|allocated services|")
        # print("|:-----|:-----|:-----|:-----|:-----|:-----|")
        # for m in sorted(models):
            for i in range(times):
                print("|{up}|{id}|{r}|{bp:.4f}|{ts}|{als}|".format(
                    up=params['update_i'],
                    id=models[params['update_i']]['time'],
                    r=models[params['update_i']]['reward'],
                    bp=models[params['update_i']]['bp'],
                    ts=models[params['update_i']]['total_services'],
                    als=models[params['update_i']]['allocated_services']))
        return

    # 创建游戏环境
    envs = [
        make_env(net_config=args.net,
                 wave_num=args.wave_num,
                 k=args.k,
                 mode=args.mode,
                 img_width=args.img_width,
                 img_height=args.img_height,
                 weight=weight,
                 step_over=args.step_over) for _ in range(args.workers)
    ]
    envs = SubprocEnv(envs)
    # 创建游戏运行过程中相关变量存储更新的容器
    rollout = RolloutStorage(num_steps=args.num_steps,
                             num_processes=args.workers,
                             obs_shape=obs_shape,
                             action_shape=action_shape)
    current_obs = torch.zeros(args.workers, *obs_shape)

    observation, _, _, _ = envs.reset()
    update_current_obs(current_obs, observation, channel_num)

    rollout.observations[0].copy_(current_obs)
    # These variables are used to compute average rewards for all processes.
    episode_rewards = torch.zeros([args.workers, 1])
    final_rewards = torch.zeros([args.workers, 1])

    if args.cuda.startswith("True"):
        current_obs = current_obs.cuda()
        rollout.cuda()

    start = time.time()
    log_start = time.time()
    total_services = 0  # log_interval期间一共有多少个业务到达
    allocated_services = 0  # log_interval期间一共有多少个业务被分配成功
    update_begin = 0

    # 判断是否是接续之前的训练
    if args.resume:
        pms = torch.load(args.resume)
        actor_critic.load_state_dict(pms['state_dict'])
        optimizer.load_state_dict(pms['optimizer'])
        update_begin = pms['update_i']
        print("resume process from update_i {}, with base_lr {}".format(
            update_begin, args.base_lr))

    for updata_i in range(update_begin, num_updates):
        update_start = time.time()
        for step in range(args.num_steps):
            # 选择行为
            inp = Variable(rollout.observations[step], volatile=True)  # 禁止梯度更新
            value, action, action_log_prob = actor_critic.act(
                inputs=inp, deterministic=False)
            # print(action)
            # 压缩维度,放到cpu上执行。因为没有用到GPU,所以并没有什么卵用,权当提示
            cpu_actions = action.data.squeeze(1).cpu().numpy()
            # 观察observation,以及下一个observation
            envs.step_async(cpu_actions)
            obs, reward, done, info = envs.step_wait(
            )  # reward和done都是(n,)的numpy.ndarray向量
            #  if reward == ARRIVAL_NEWPORT_NEWPORT or reward == ARRIVAL_NOPORT_NEWPORT or reward == ARRIVAL_NOPORT_NOPORT:
            #     allocated_services += 1
            print(reward)
            for i in reward:
                if i == ARRIVAL_NEWPORT or i == ARRIVAL_NOPORT:
                    allocated_services += 1
        #  allocated_services += (reward==ARRIVAL_NEWPORT_NEWPORT or reward==ARRIVAL_NOPORT_NEWPORT or reward==ARRIVAL_NOPORT_NOPORT).any().sum()  # 计算分配成功的reward的次数
        # TODO 未解决
            if args.step_over.startswith('one_service'):
                total_services += (info == True).sum()  # 计算本次step中包含多少个业务到达事件
            # elif args.step_over.startswith('one_service'):
            #     total_services += args.workers
            else:
                raise NotImplementedError
            reward = torch.from_numpy(np.expand_dims(reward, 1)).float()
            episode_rewards += reward  # 累加reward分数
            # 如果游戏结束,则重新开始计算episode_rewards和final_rewards,并且以返回的reward为初始值重新进行累加。
            masks = torch.FloatTensor([[0.0] if d else [1.0] for d in done
                                       ])  # True --> 0, False --> 1
            final_rewards *= masks
            final_rewards += (1 - masks) * episode_rewards
            episode_rewards *= masks
            #            if done[len(done)-1]:
            #               print('游戏结束最终端口数量:',envs.get_all_edges_port())

            if args.cuda.startswith("True"):
                masks = masks.cuda()

            # 给masks扩充2个维度,与current_obs相乘。则运行结束的游戏进程对应的obs值会变成0,图像上表示全黑,即游戏结束的画面。
            current_obs *= masks.unsqueeze(2).unsqueeze(2)
            update_current_obs(current_obs=current_obs,
                               obs=obs,
                               channel_num=channel_num)
            # 把本步骤得到的结果存储起来
            rollout.insert(step=step,
                           current_obs=current_obs,
                           action=action.data,
                           action_log_prob=action_log_prob.data,
                           value_pred=value.data,
                           reward=reward,
                           mask=masks)

        # TODO 强行停止
        # envs.close()
        # return

        # 注意不要引用上述for循环定义的变量。下面变量的命名和使用都要注意。
        next_inp = Variable(rollout.observations[-1], volatile=True)  # 禁止梯度更新
        next_value = actor_critic(next_inp)[0].data  # 获取下一步的value值
        rollout.compute_returns(next_value=next_value,
                                use_gae=False,
                                gamma=args.gamma,
                                tau=None)

        if args.algo.startswith('a2c'):
            # 下面进行A2C算法梯度更新
            inps = Variable(rollout.observations[:-1].view(-1, *obs_shape))
            acts = Variable(rollout.actions.view(-1, action_shape))

            # print("a2cs's acts size is {}".format(acts.size()))
            value, action_log_probs, cls_entropy = actor_critic.evaluate_actions(
                inputs=inps, actions=acts)
            print(cls_entropy.data)
            # print("inputs' shape is {}".format(inps.size()))
            # print("value's shape is {}".format(value.size()))
            value = value.view(args.num_steps, args.workers, 1)
            # print("action_log_probs's shape is {}".format(action_log_probs.size()))
            action_log_probs = action_log_probs.view(args.num_steps,
                                                     args.workers, 1)
            # 计算loss
            advantages = Variable(rollout.returns[:-1]) - value
            value_loss = advantages.pow(2).mean()  # L2Loss or MSE Loss
            action_loss = -(Variable(advantages.data) *
                            action_log_probs).mean()
            total_loss = value_loss * args.value_loss_coef + action_loss - cls_entropy * args.entropy_coef

            optimizer.zero_grad()
            total_loss.backward()
            # 下面进行迷之操作。。梯度裁剪(https://www.cnblogs.com/lindaxin/p/7998196.html)
            nn.utils.clip_grad_norm(actor_critic.parameters(),
                                    args.max_grad_norm)
            # average_gradients(actor_critic)
            optimizer.step()
        elif args.algo.startswith('ppo'):
            # 下面进行PPO算法梯度更新
            advantages = rollout.returns[:-1] - rollout.value_preds[:-1]
            advantages = (advantages - advantages.mean()) / (advantages.std() +
                                                             1e-5)
            for e in range(args.ppo_epoch):
                data_generator = rollout.feed_forward_generator(
                    advantages, args.num_mini_batch)

                for sample in data_generator:
                    observations_batch, actions_batch, \
                    return_batch, masks_batch, old_action_log_probs_batch, \
                    adv_targ = sample

                    # Reshape to do in a single forward pass for all steps
                    values, action_log_probs, cls_entropy = actor_critic.evaluate_actions(
                        Variable(observations_batch), Variable(actions_batch))

                    adv_targ = Variable(adv_targ)
                    ratio = torch.exp(action_log_probs -
                                      Variable(old_action_log_probs_batch))
                    surr1 = ratio * adv_targ
                    surr2 = torch.clamp(ratio, 1.0 - args.clip_param,
                                        1.0 + args.clip_param) * adv_targ
                    action_loss = -torch.min(
                        surr1,
                        surr2).mean()  # PPO's pessimistic surrogate (L^CLIP)

                    value_loss = (Variable(return_batch) -
                                  values).pow(2).mean()

        # 事后一支烟
        rollout.after_update()
        update_time = time.time() - update_start
        print("updates {} finished, cost time {}:{}".format(
            updata_i, update_time // 60, update_time % 60))
        # print("total services is {}".format(total_services))
        # 存储模型
        if updata_i % args.save_interval == 0:
            save_path = os.path.join(args.save_dir, 'a2c')
            save_path = os.path.join(save_path, args.cnn)
            save_path = os.path.join(save_path, args.step_over)
            save_path = os.path.join(save_path, args.parameter)
            if os.path.exists(save_path) and os.path.isdir(save_path):
                pass
            else:
                os.makedirs(save_path)
            save_file = os.path.join(save_path, str(updata_i) + '.tar')
            save_content = {
                'update_i': updata_i,
                'state_dict': actor_critic.state_dict(),
                'optimizer': optimizer.state_dict(),
                'mean_reward': final_rewards.mean()
            }
            torch.save(save_content, save_file)

        # 输出日志
        if updata_i % args.log_interval == 0:
            end = time.time()
            interval = end - log_start
            remaining_seconds = (num_updates - updata_i -
                                 1) / args.log_interval * interval
            remaining_hours = int(remaining_seconds // 3600)
            remaining_minutes = int((remaining_seconds % 3600) / 60)
            total_num_steps = (updata_i + 1) * args.workers * args.num_steps
            blocked_services = total_services - allocated_services
            bp = blocked_services / total_services
            wave_port_num, total_port_num = envs.get_all_edges_port()
            wave_occ_sum, resource_utilization_rate = envs.get_resourceUtilization(
            )

            print(
                "Updates {}, num timesteps {}, FPS {}, mean/median reward {:.1f}/{:.1f}, min/max reward {:.1f}/{:.1f}, \
            entropy {:.5f}, value loss {:.5f}, policy loss {:.8f}, remaining time {}:{}, 阻塞率为{}/{}={}, \
                  各个波长端口数量为{}, 总的端口数量为{}, 带宽占用情况为{}, 资源占用率为{}".format(
                    updata_i, total_num_steps,
                    int(total_num_steps / (end - start)), final_rewards.mean(),
                    final_rewards.median(), final_rewards.min(),
                    final_rewards.max(), cls_entropy.data, value_loss.data,
                    action_loss.data, remaining_hours, remaining_minutes,
                    blocked_services, total_services, bp, wave_port_num,
                    total_port_num, wave_occ_sum, resource_utilization_rate))
            # raise NotImplementedError
            total_services = 0
            allocated_services = 0
            log_start = time.time()

    envs.close()
Esempio n. 15
0
    def _train_pred(self, column, test_features, SEED=None):
        '''
        One training session.
        self.train_features are features to be used for training.
        self.train_targets[column] are the answers to the training questions.
        self.val_features are features to be used for validate
        self.val_targets[column] is the answer to validate.
        '''

        x_train, x_val, y_train, y_val = train_test_split(
            self.features,
            self.targets[column],
            test_size=0.1,
            shuffle=True,
            # stratify=True,
        )

        model = SimpleNet(
            self.num_hidden_layers,
            self.dropout_rate,
            len(x_train.columns),
            self.hidden_size,
            1,
        )

        model.to(self.device)
        train_dataset = TrainDataset(x_train, y_train)
        trainloader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=self.batch_size,
            shuffle=True,
        )

        optimizer = torch.optim.AdamW(model.parameters(),
                                      lr=self.learning_rate,
                                      weight_decay=1e-3)

        best_loss = np.inf
        i = torch.tensor(self.test_features.values, dtype=torch.float)

        for epoch in range(1, self.epochs):
            train_loss = train_fn(model, optimizer, nn.BCEWithLogitsLoss(),
                                  trainloader, self.device)
            valid_loss = valid_fn(model, nn.BCEWithLogitsLoss(), x_val, y_val,
                                  self.device)
            self.logger.info(
                'Epoch:{}, train_loss:{:.5f}, valid_loss:{:.5f}'.format(
                    epoch, train_loss, valid_loss))

            if valid_loss < best_loss:
                not_update_epoch = 0
                best_loss = valid_loss
                torch.save(model.state_dict(),
                           'best_model_{}.pth'.format(column))
            else:
                not_update_epoch += 1
            # if early_stopping_epoch == not_update_epoch:
            #     print('early stopping')
            #     torch.save(model.state_dict(), 'best_model_{}.pth'.format(column))
            #     break

        self.score += best_loss
        self.num_add += 1
        self.logger.info("column:{} validation loss {}".format(
            column, best_loss))
        gc.collect()
        y_pred = inference_fn(model, self.test_features, self.device)
        return y_pred, best_loss
class Trainer():
    def __init__(self,
                 trainData,
                 validData,
                 hidden_size,
                 device,
                 model_dir="model"):
        self.history = {'train': [], 'valid': []}
        self.trainData = trainData
        self.validData = validData
        self.generator = Generator(input_size=8,
                                   output_size=1,
                                   hidden_size=hidden_size).to(device)
        self.discriminator = Discriminator(input_size=1,
                                           output_size=1,
                                           hidden_size=hidden_size).to(device)
        self.classfication = SimpleNet(input_size=9,
                                       output_size=12,
                                       hidden_size=hidden_size).to(device)
        self.adversarial_loss = nn.BCEWithLogitsLoss()
        self.criterion = nn.CrossEntropyLoss()
        self.opt_G = torch.optim.Adam(self.generator.parameters(), lr=1e-4)
        self.opt_D = torch.optim.Adam(self.discriminator.parameters(), lr=1e-4)
        self.opt_C = torch.optim.Adam(self.classfication.parameters(), lr=1e-4)
        self.device = device
        self.model_dir = model_dir
        if not os.path.exists(self.model_dir):
            os.makedirs(self.model_dir)
        self.best_val = 0.0

    def run_epoch(self, epoch, training):
        self.generator.train(training)
        self.discriminator.train(training)
        self.classfication.train(training)

        if training:
            description = 'Train'
            dataset = self.trainData
            shuffle = True
        else:
            description = 'Valid'
            dataset = self.validData
            shuffle = False
        dataloader = DataLoader(dataset=dataset,
                                batch_size=256,
                                shuffle=shuffle,
                                collate_fn=dataset.collate_fn,
                                num_workers=4)

        trange = tqdm(enumerate(dataloader),
                      total=len(dataloader),
                      desc=description)

        g_loss = 0
        d_loss = 0
        loss = 0
        acc = accuracy()

        for i, (ft, missing_ft, labels) in trange:
            ft = ft.to(self.device)
            missing_ft = missing_ft.to(self.device)
            labels = labels.to(self.device)
            batch_size = ft.shape[0]
            true = Variable(torch.FloatTensor(batch_size, 1).fill_(1.0),
                            requires_grad=False).to(self.device)  # (batch, 1)
            fake = Variable(torch.FloatTensor(batch_size, 1).fill_(0.0),
                            requires_grad=False).to(self.device)  # (batch, 1)

            # -----------------
            #  Train Generator
            # -----------------

            gen_missing = self.generator(ft.detach())
            validity = self.discriminator(gen_missing)
            batch_g_loss = self.adversarial_loss(validity, true)

            if training:
                self.opt_G.zero_grad()
                batch_g_loss.backward()
                self.opt_G.step()
            g_loss += batch_g_loss.item()

            # ---------------------
            #  Train Discriminator
            # ---------------------
            real_pred = self.discriminator(missing_ft)
            d_real_loss = self.adversarial_loss(real_pred, true)

            fake_missing = self.generator(ft.detach())
            fake_pred = self.discriminator(fake_missing)
            d_fake_loss = self.adversarial_loss(fake_pred, fake)
            batch_d_loss = (d_real_loss + d_fake_loss) / 2

            if training:
                self.opt_D.zero_grad()
                batch_d_loss.backward()
                self.opt_D.step()
            d_loss += batch_d_loss.item()

            # ------------------
            #  Train Classifier
            # ------------------

            gen_missing = self.generator(ft.detach())
            all_features = torch.cat((ft, gen_missing), dim=1)
            o_labels = self.classfication(all_features)
            batch_loss = self.criterion(o_labels, labels)
            if training:
                self.opt_C.zero_grad()
                batch_loss.backward()
                self.opt_C.step()
            loss += batch_loss.item()

            acc.update(o_labels, labels)

            trange.set_postfix(acc=acc.print_score(),
                               g_loss=g_loss / (i + 1),
                               d_loss=d_loss / (i + 1),
                               loss=loss / (i + 1))

        if training:
            self.history['train'].append({
                'acc': acc.get_score(),
                'g_loss': g_loss / len(trange),
                'd_loss': d_loss / len(trange),
                'loss': loss / len(trange)
            })
            self.save_hist()

        else:
            self.history['valid'].append({
                'acc': acc.get_score(),
                'g_loss': g_loss / len(trange),
                'd_loss': d_loss / len(trange),
                'loss': loss / len(trange)
            })
            self.save_hist()
            if self.best_val < acc.get_score():
                self.best_val = acc.get_score()
                self.save_best(epoch)

    def save_best(self, epoch):
        torch.save(
            {
                'cls': self.classfication.state_dict(),
                'generator': self.generator.state_dict(),
                'discriminator': self.discriminator.state_dict()
            }, self.model_dir + '/model.pkl.' + str(epoch))

    def save_hist(self):
        with open(self.model_dir + '/history.json', 'w') as f:
            json.dump(self.history, f, indent=4)
Esempio n. 17
0
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # set the seed for generating random numbers
    torch.manual_seed(args.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(args.seed)

    # get train loader
    train_loader = _get_train_loader(args.batch_size,
                                     args.data_dir)  # data_dir from above..

    ## TODO:  Build the model by passing in the input params
    # To get params from the parser, call args.argument_name, ex. args.epochs or args.hidden_dim
    # Don't forget to move your model .to(device) to move to GPU , if appropriate
    model = SimpleNet(input_dim=args.input_dim,
                      hidden_dim=args.hidden_dim,
                      output_dim=args.output_dim)
    model.to(device)

    # Given: save the parameters used to construct the model
    save_model_params(model, args.model_dir)

    ## TODO: Define an optimizer and loss function for training
    optimizer = optim.Adam(model.parameters(), lr=args.lr)
    criterion = nn.BCELoss()

    # Trains the model (given line of code, which calls the above training function)
    # This function *also* saves the model state dictionary
    train(model, train_loader, args.epochs, optimizer, criterion, device)
class Trainer():
    def __init__(self, device, trainData, validData, args):
        self.device = device
        self.history = {'train': [], 'valid': []}
        self.trainData = trainData
        self.validData = validData

        self.fadding_model = SimpleNet(input_size=9,
                                       output_size=12,
                                       hidden_size=args.hidden_size).to(device)
        self.fadding_model.load_state_dict(
            torch.load("model0.33/model.pkl.904"))
        self.fixed_model = SimpleNet(input_size=9,
                                     output_size=12,
                                     hidden_size=args.hidden_size).to(device)
        self.fixed_model.load_state_dict(torch.load("model0.33/model.pkl.904"))

        self.criteria = torch.nn.MSELoss()
        self.opt = torch.optim.AdamW(self.fadding_model.parameters(),
                                     lr=8e-5,
                                     weight_decay=9e-3)
        # self.scheduler = scheduler = torch.optim.lr_scheduler.StepLR(self.opt, step_size=200, gamma=args.step_lr)
        self.batch_size = args.batch_size
        self.model_dir = args.arch
        if not os.path.exists(self.model_dir):
            os.makedirs(self.model_dir)
        self.best_val = 0.0

    def run_epoch(self, epoch, training):
        self.fadding_model.train(training)
        self.fixed_model.train(False)

        if training:
            description = 'Train'
            dataset = self.trainData
            shuffle = True
        else:
            description = 'Valid'
            dataset = self.validData
            shuffle = False
        dataloader = DataLoader(dataset=dataset,
                                batch_size=self.batch_size,
                                shuffle=shuffle,
                                collate_fn=dataset.collate_fn,
                                num_workers=4)

        trange = tqdm(enumerate(dataloader),
                      total=len(dataloader),
                      desc=description)

        loss = 0
        acc_fadding = accuracy()
        acc_fixed = accuracy()

        for i, (ft, missing_ft, labels) in trange:
            ft = ft.to(self.device)
            missing_ft = missing_ft.to(self.device)
            labels = labels.to(self.device)
            missing_fadding_ft = missing_ft * (0.9**((epoch * 100)**(1 / 2)))
            missing_0_ft = missing_ft * 0

            fadding_ft = torch.cat([missing_fadding_ft, ft], dim=1)
            zero_ft = torch.cat([missing_0_ft, ft], dim=1)
            raw_ft = torch.cat([missing_ft, ft], dim=1)

            fadding_out, fadding_hiddens = self.fadding_model(fadding_ft)
            zero_out, _ = self.fadding_model(zero_ft)
            raw_out, raw_hiddens = self.fixed_model(raw_ft)

            batch_loss = 0
            for raw_hidden, fadding_hidden in zip(raw_hiddens,
                                                  fadding_hiddens):
                batch_loss += self.criteria(raw_hidden, fadding_hidden)

            batch_loss += self.criteria(raw_out, fadding_out)

            if training:
                self.opt.zero_grad()
                batch_loss.backward()
                self.opt.step()

            loss += batch_loss.item()
            acc_fadding.update(fadding_out, labels)
            acc_fixed.update(zero_out, labels)

            trange.set_postfix(loss=loss / (i + 1),
                               acc_fadding=acc_fadding.print_score(),
                               acc_fixed=acc_fixed.print_score())

        # self.scheduler.step()

        if training:
            self.history['train'].append({
                'acc-fadding': acc_fadding.get_score(),
                'acc_fixed': acc_fixed.get_score(),
                'loss': loss / len(trange)
            })
            self.save_hist()
        else:
            self.history['valid'].append({
                'acc-fadding': acc_fadding.get_score(),
                'acc_fixed': acc_fixed.get_score(),
                'loss': loss / len(trange)
            })
            self.save_hist()
            if acc_fixed.get_score() > self.best_val:
                self.best_val = acc_fixed.get_score()
                self.save_best(epoch)

    def run_iter(self, x, y):
        features = x.to(self.device)
        labels = y.to(self.device)
        o_labels, hiddens = self.model(features)
        l_loss = self.criteria(o_labels, labels)
        return o_labels, l_loss

    def save_best(self, epoch):
        torch.save(self.fadding_model.state_dict(),
                   self.model_dir + '/model.pkl.' + str(epoch))

    def save_hist(self):
        with open(self.model_dir + '/history.json', 'w') as f:
            json.dump(self.history, f, indent=4)
    args = parser.parse_args()

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Using device {}.".format(device))

    torch.manual_seed(args.seed)

    # Load the training data.
    train_loader = _get_train_data_loader(args.batch_size, args.data_dir)

    ## --- Your code here --- ##

    ## TODO:  Build the model by passing in the input params
    # To get params from the parser, call args.argument_name, ex. args.epochs or ards.hidden_dim
    # Don't forget to move your model .to(device) to move to GPU , if appropriate
    model = SimpleNet(args.input_dim, args.hidden_dim,
                      args.output_dim).to(device)

    ## TODO: Define an optimizer and loss function for training
    optimizer = optim.Adam(model.parameters(), lr=args.lr)
    criterion = nn.BCELoss()

    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    # Trains the model (given line of code, which calls the above training function)
    train(model, train_loader, args.epochs, criterion, optimizer, device)

    ## TODO: complete in the model_info by adding three argument names, the first is given
    # Keep the keys of this dictionary as they are
    model_info_path = os.path.join(args.model_dir, 'model_info.pth')
    with open(model_info_path, 'wb') as f:
        model_info = {
Esempio n. 20
0
class Trainer():
    def __init__(self,
                 trainData,
                 validData,
                 hidden_size,
                 device,
                 model_dir="model"):
        self.history = {'train': [], 'valid': []}
        self.trainData = trainData
        self.validData = validData
        self.classficationA = SimpleNet(input_size=8,
                                        output_size=12,
                                        hidden_size=hidden_size).to(device)
        self.classficationB = SimpleNet(input_size=9,
                                        output_size=12,
                                        hidden_size=hidden_size).to(device)
        self.criterion = nn.CrossEntropyLoss()
        self.mse_loss = nn.MSELoss()
        self.opt_C_A = torch.optim.Adam(self.classficationA.parameters(),
                                        lr=1e-4)
        self.opt_C_B = torch.optim.Adam(self.classficationB.parameters(),
                                        lr=1e-4)
        self.device = device
        self.model_dir = model_dir
        if not os.path.exists(self.model_dir):
            os.makedirs(self.model_dir)
        self.best_val = 0.0

    def run_epoch(self, epoch, training):
        self.classficationA.train(training)
        self.classficationB.train(training)

        if training:
            description = 'Train'
            dataset = self.trainData
            shuffle = True
        else:
            description = 'Valid'
            dataset = self.validData
            shuffle = False
        dataloader = DataLoader(dataset=dataset,
                                batch_size=256,
                                shuffle=shuffle,
                                collate_fn=dataset.collate_fn,
                                num_workers=4)

        trange = tqdm(enumerate(dataloader),
                      total=len(dataloader),
                      desc=description)

        mse_loss = 0
        lossA = 0
        lossB = 0
        accA = accuracy()
        accB = accuracy()

        for i, (ft, missing_ft, labels) in trange:
            ft = ft.to(self.device)
            missing_ft = missing_ft.to(self.device)
            all_ft = torch.cat([ft, missing_ft], dim=1)
            labels = labels.to(self.device)

            # ------------------
            #  Train ClassifierA
            # ------------------

            missing_out, missing_hidden_out = self.classficationA(ft)
            all_out, all_hidden_out = self.classficationB(all_ft)
            batch_loss = self.criterion(missing_out, labels)
            batch_mse_loss = 0
            for missing_hidden, all_hidden in zip(missing_hidden_out,
                                                  all_hidden_out):
                batch_mse_loss += self.mse_loss(missing_hidden, all_hidden)
            mse_loss += batch_mse_loss.item()

            if training:
                self.opt_C_A.zero_grad()
                (batch_mse_loss + batch_loss).backward()
                self.opt_C_A.step()
            lossA += batch_loss.item()
            accA.update(missing_out, labels)

            # ------------------
            #  Train ClassifierB
            # ------------------

            all_out, _ = self.classficationB(all_ft)
            batch_loss = self.criterion(all_out, labels)
            if training:
                self.opt_C_B.zero_grad()
                batch_loss.backward()
                self.opt_C_B.step()
            lossB += batch_loss.item()
            accB.update(all_out, labels)

            trange.set_postfix(accA=accA.print_score(),
                               accB=accB.print_score(),
                               lossA=lossA / (i + 1),
                               lossB=lossB / (i + 1),
                               mseLoss=mse_loss / (i + 1))
        if training:
            self.history['train'].append({
                'accA': accA.get_score(),
                'accB': accB.get_score(),
                'lossA': lossA / len(trange),
                'lossB': lossB / len(trange),
                'mseLoss': mse_loss / len(trange)
            })
            self.save_hist()

        else:
            self.history['valid'].append({
                'accA': accA.get_score(),
                'accB': accB.get_score(),
                'lossA': lossA / len(trange),
                'lossB': lossB / len(trange),
                'mseLoss': mse_loss / len(trange)
            })
            self.save_hist()
            if self.best_val < accA.get_score():
                self.best_val = accA.get_score()
                self.save_best(epoch)

    def save_best(self, epoch):
        torch.save(
            {
                'classficationA': self.classficationA.state_dict(),
                'classficationB': self.classficationB.state_dict(),
            }, self.model_dir + '/model.pkl.' + str(epoch))

    def save_hist(self):
        with open(self.model_dir + '/history.json', 'w') as f:
            json.dump(self.history, f, indent=4)
Esempio n. 21
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--arch',
                        default="model",
                        help='architecture (model_dir)')
    parser.add_argument('--do_train', action='store_true')
    parser.add_argument('--do_predict', action='store_true')
    parser.add_argument('--do_plot', action='store_true')
    parser.add_argument('--hidden_size', default=256, type=int)
    parser.add_argument('--batch_size', default=256, type=int)
    parser.add_argument('--max_epoch', default=10000, type=int)
    parser.add_argument('--lr', default=1e-3, type=float)
    parser.add_argument('--step_lr', default=0.5, type=float)
    parser.add_argument('--cuda', default=0, type=int)
    parser.add_argument('--ckpt',
                        type=int,
                        help='load pre-trained model epoch')
    args = parser.parse_args()

    if args.do_train:

        dataset = pd.read_csv("../../data/train.csv")
        dataset.drop("Id", axis=1, inplace=True)
        train_set, valid_set = train_test_split(dataset,
                                                test_size=0.1,
                                                random_state=73)
        feature_for_training = ["F2", "F3", "F4", "F5", "F6", "F7", "F8", "F9"]
        feature_for_prediction = ["F1"]

        train = preprocess_samples(train_set, feature_for_training,
                                   feature_for_prediction)
        valid = preprocess_samples(valid_set, feature_for_training,
                                   feature_for_prediction)

        trainData = FeatureDataset(train)
        validData = FeatureDataset(valid)

        device = torch.device(
            'cuda:%d' % args.cuda if torch.cuda.is_available() else 'cpu')
        max_epoch = args.max_epoch
        trainer = Trainer(device, trainData, validData, args)

        for epoch in range(1, max_epoch + 1):
            print('Epoch: {}'.format(epoch))
            trainer.run_epoch(epoch, True)
            trainer.run_epoch(epoch, False)

    if args.do_predict:

        dataset = pd.read_csv("../../data/test.csv")
        dataset.drop("Id", axis=1, inplace=True)
        feature_for_testing = ["F2", "F3", "F4", "F5", "F6", "F7", "F8", "F9"]
        test = preprocess_samples(dataset, feature_for_testing)

        testData = FeatureDataset(test)

        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        model = SimpleNet(input_size=9,
                          output_size=12,
                          hidden_size=args.hidden_size)
        model.load_state_dict(
            torch.load('%s/model.pkl.%d' % (args.arch, args.ckpt)))
        model.train(False)
        model.to(device)
        dataloader = DataLoader(dataset=testData,
                                batch_size=args.batch_size,
                                shuffle=False,
                                collate_fn=testData.collate_fn,
                                num_workers=4)
        trange = tqdm(enumerate(dataloader),
                      total=len(dataloader),
                      desc='Predict')
        prediction = []
        for i, (ft, _, y) in trange:
            b = ft.shape[0]
            missing_ft = torch.zeros(b, 1)
            all_ft = torch.cat([missing_ft, ft], dim=1)
            o_labels, _ = model(all_ft.to(device))
            o_labels = torch.argmax(o_labels, axis=1)
            prediction.append(o_labels.to('cpu').numpy().tolist())

        prediction = sum(prediction, [])
        SubmitGenerator(prediction, "../../data/sampleSubmission.csv")

    if args.do_plot:
        plot_history("{file}/history.json".format(file=args.arch))
Esempio n. 22
0
    args = parser.parse_args()

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # set the seed for generating random numbers
    torch.manual_seed(args.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(args.seed)

    # get train loader
    train_loader = _get_train_loader(args.batch_size,
                                     args.data_dir)  # data_dir from above..

    ## DONE:  Build the model by passing in the input params
    # To get params from the parser, call args.argument_name, ex. args.epochs or ards.hidden_dim
    # Don't forget to move your model .to(device) to move to GPU , if appropriate
    model = SimpleNet(args.input_dim, args.hidden_dim, args.output_dim).to(
        device)  # device is GPU if avaliable or CPU

    # Given: save the parameters used to construct the model
    save_model_params(model, args.model_dir)

    ## DONE: Define an optimizer and loss function for training
    optimizer = optim.SGD(model.parameters(), lr=args.lr)
    criterion = nn.BCELoss()  # binary cross entropy loss

    # Trains the model (given line of code, which calls the above training function)
    # This function *also* saves the model state dictionary
    train(model, train_loader, args.epochs, optimizer, criterion, device)
Esempio n. 23
0
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # set the seed for generating random numbers
    torch.manual_seed(args.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(args.seed)
        
    # get train loader
    train_loader = _get_train_loader(args.batch_size, args.data_dir) # data_dir from above..
    
    
    ## TODO:  Build the model by passing in the input params
    # To get params from the parser, call args.argument_name, ex. args.epochs or ards.hidden_dim
    # Don't forget to move your model .to(device) to move to GPU , if appropriate
    model = SimpleNet(args.input_dim, args.hidden_dim, args.output_dim).to(device)
    
    # Given: save the parameters used to construct the model
    save_model_params(model, args.model_dir)

    ## TODO: Define an optimizer and loss function for training
    optimizer = optim.Adam(model.parameters(), lr=args.lr) if args.optimizer == 'Adam' else optim.SGD(model.parameters(), lr=args.lr)
    criterion = nn.BCELoss()

    
    # Trains the model (given line of code, which calls the above training function)
    # This function *also* saves the model state dictionary
    train(model, train_loader, args.epochs, optimizer, criterion, device)
    
    
Esempio n. 24
0
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # set the seed for generating random numbers
    torch.manual_seed(args.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(args.seed)

    # get train loader
    train_loader = _get_train_loader(args.batch_size,
                                     args.data_dir)  # data_dir from above..

    ## TODO:  Build the model by passing in the input params
    # To get params from the parser, call args.argument_name, ex. args.epochs or ards.hidden_dim
    # Don't forget to move your model .to(device) to move to GPU , if appropriate
    model = SimpleNet(args.input_dim, args.hidden_dim,
                      args.output_dim).to(device)

    # Given: save the parameters used to construct the model
    save_model_params(model, args.model_dir)

    ## TODO: Define an optimizer and loss function for training
    criterion = nn.BCELoss()

    # specify optimizer (stochastic gradient descent) and learning rate = 0.01
    optimizer = torch.optim.SGD(model.parameters(), lr=args.lr)

    # Trains the model (given line of code, which calls the above training function)
    # This function *also* saves the model state dictionary
    train(model, train_loader, args.epochs, optimizer, criterion, device)
Esempio n. 25
0
    args = parser.parse_args()

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # set the seed for generating random numbers
    torch.manual_seed(args.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(args.seed)
        
    # get train loader
    train_loader = _get_train_loader(args.batch_size, args.data_dir) # data_dir from above..
    
    
    ## TODO:  Build the model by passing in the input params
    # To get params from the parser, call args.argument_name, ex. args.epochs or ards.hidden_dim
    # Don't forget to move your model .to(device) to move to GPU , if appropriate
    model = SimpleNet(args.input_dim, args.hidden_dim, args.output_dim)
    
    # Given: save the parameters used to construct the model
    save_model_params(model, args.model_dir)

    ## TODO: Define an optimizer and loss function for training
    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9)
    criterion = nn.BCELoss()

    
    # Trains the model (given line of code, which calls the above training function)
    # This function *also* saves the model state dictionary
    train(model, train_loader, args.epochs, optimizer, criterion, device)
    
Esempio n. 26
0
                          num_workers=12)
val_loader = DataLoader(val_dataset,
                        batch_size=batch_size,
                        shuffle=True,
                        num_workers=12)
test_loader = DataLoader(test_dataset,
                         batch_size=batch_size,
                         shuffle=False,
                         num_workers=12)

train_data_loader = {'train': train_loader, 'val': val_loader}

#%% 2| Model - Preparation
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model = SimpleNet()
model = model.to(device)

criterion = torch.nn.CrossEntropyLoss()

optimizer_ft = optim.SGD(
    model.parameters(), lr=learning_rate,
    momentum=0.9)  # Observe that all parameters are being optimized
lr_scheduler = lr_scheduler.StepLR(
    optimizer_ft, step_size=20,
    gamma=0.1)  # Decay LR by a factor of 0.1 every 7 epochs

#%% 3| Model - Training

model_trained = train_model(device=device,
                            data_loaders=train_data_loader,
Esempio n. 27
0
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # set the seed for generating random numbers
    torch.manual_seed(args.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(args.seed)

    # get train loader
    train_loader = _get_train_loader(args.batch_size,
                                     args.data_dir)  # data_dir from above..

    ## TODO:  Build the model by passing in the input params
    # To get params from the parser, call args.argument_name, ex. args.epochs or args.hidden_dim
    # Don't forget to move your model .to(device) to move to GPU , if appropriate
    model = SimpleNet(args.input_dim, args.hidden_dim,
                      args.output_dim).to(device)

    # Given: save the parameters used to construct the model
    save_model_params(model, args.model_dir)

    ## TODO: Define an optimizer and loss function for training

    #overral good results
    optimizer = optim.Adam(model.parameters(), lr=agrs.lr)
    criterion = nn.BCELoss()

    # Trains the model (given line of code, which calls the above training function)
    # This function *also* saves the model state dictionary
    train(model, train_loader, args.epochs, optimizer, criterion, device)
class Trainer():
    def __init__(self, device, trainData, validData, args):
        self.device = device
        self.history = {'train': [], 'valid': []}
        self.trainData = trainData
        self.validData = validData
        self.model = SimpleNet(input_size=9, output_size=12, hidden_size=args.hidden_size).to(device)
        self.criteria = torch.nn.CrossEntropyLoss()
        self.opt = torch.optim.AdamW(self.model.parameters(), lr=args.lr, weight_decay=3.3e-1)
        self.scheduler = scheduler = torch.optim.lr_scheduler.StepLR(self.opt, step_size=200, gamma=args.step_lr)
        self.batch_size = args.batch_size
        self.model_dir = args.arch
        if not os.path.exists(self.model_dir):
            os.makedirs(self.model_dir)
        self.best_val = 0.0

    def run_epoch(self, epoch, training):
        self.model.train(training)

        if training:
            description = 'Train'
            dataset = self.trainData
            shuffle = True
        else:
            description = 'Valid'
            dataset = self.validData
            shuffle = False
        dataloader = DataLoader(dataset=dataset,
                                batch_size=self.batch_size,
                                shuffle=shuffle,
                                collate_fn=dataset.collate_fn,
                                num_workers=4)

        trange = tqdm(enumerate(dataloader), total=len(dataloader), desc=description)

        loss = 0
        acc = accuracy()

        for i, (x, _, y) in trange:
            o_labels, batch_loss = self.run_iter(x, y)
            if training:
                self.opt.zero_grad()
                batch_loss.backward()
                self.opt.step()

            loss += batch_loss.item()
            acc.update(o_labels.cpu(), y)

            trange.set_postfix(
                loss=loss / (i + 1), acc=acc.print_score())
        if training:
            self.history['train'].append({'acc': acc.get_score(), 'loss': loss / len(trange)})
            self.save_hist()
        else:
            self.history['valid'].append({'acc': acc.get_score(), 'loss': loss / len(trange)})
            self.save_hist()
            if acc.get_score() > self.best_val:
                self.best_val = acc.get_score()
                self.save_best(epoch)

    def run_iter(self, x, y):
        features = x.to(self.device)
        labels = y.to(self.device)
        o_labels = self.model(features)
        l_loss = self.criteria(o_labels, labels)
        return o_labels, l_loss

    def save_best(self, epoch):
        torch.save(self.model.state_dict(), self.model_dir + '/model.pkl.'+str(epoch))

    def save_hist(self):
        with open(self.model_dir + '/history.json', 'w') as f:
            json.dump(self.history, f, indent=4)