Beispiel #1
0
def init(args):
    if not os.path.exists('checkpoints'):
        os.makedirs('checkpoints')
    if not os.path.exists('checkpoints/' + args.exp_name):
        os.makedirs('checkpoints/' + args.exp_name)
        os.system('cp ./mixup.yml ./checkpoints/' + args.exp_name)
    io = IOStream('checkpoints/' + args.exp_name + '/run.log')
    return io
Beispiel #2
0
def init(args, configpath):
    if not os.path.exists('checkpoints'):
        os.makedirs('checkpoints')
    if not os.path.exists('checkpoints/' + args.exp_name):
        os.makedirs('checkpoints/' + args.exp_name)
        os.system('cp ' + configpath + ' ./checkpoints/' + args.exp_name)
    io = IOStream('checkpoints/' + args.exp_name + '/run.log')
    return io
Beispiel #3
0
def _init_(args):
    # initialize parameters
    path = 'results/' + args.exp_name + str(args.nFold)
    if not os.path.exists(path):
        os.mkdir(path)
    args.resume = path + '/checkpoint.pth.tar'
    args.best = path + '/bestmodel.pth.tar'
    args.io = IOStream(path + '/run.log')
    args.start_epoch = 0
    args.best_prec1 = 0
    args.device = device
def voting(net, testloader, device, args):
    name = '/evaluate_voting' + str(
        datetime.datetime.now().strftime('-%Y%m%d%H%M%S')) + '.log'
    io = IOStream(args.checkpoint + name)
    io.cprint(str(args))

    net.eval()
    best_acc = 0
    best_mean_acc = 0
    pointscale = PointcloudScale(scale_low=0.8,
                                 scale_high=1.18)  # set the range of scaling

    for i in range(args.NUM_PEPEAT):
        test_true = []
        test_pred = []

        for batch_idx, (data, label) in enumerate(testloader):
            data, label = data.to(device), label.to(device).squeeze()
            pred = 0
            for v in range(args.NUM_VOTE):
                new_data = data
                # batch_size = data.size()[0]
                if v > 0:
                    new_data.data = pointscale(new_data.data)
                with torch.no_grad():
                    pred += F.softmax(net(new_data.permute(0, 2, 1)),
                                      dim=1)  # sum 10 preds
            pred /= args.NUM_VOTE  # avg the preds!
            label = label.view(-1)
            pred_choice = pred.max(dim=1)[1]
            test_true.append(label.cpu().numpy())
            test_pred.append(pred_choice.detach().cpu().numpy())
        test_true = np.concatenate(test_true)
        test_pred = np.concatenate(test_pred)
        test_acc = 100. * metrics.accuracy_score(test_true, test_pred)
        test_mean_acc = 100. * metrics.balanced_accuracy_score(
            test_true, test_pred)
        if test_acc > best_acc:
            best_acc = test_acc
        if test_mean_acc > best_mean_acc:
            best_mean_acc = test_mean_acc
        outstr = 'Voting %d, test acc: %.3f, test mean acc: %.3f,  [current best(mean_acc: %.3f all_acc: %.3f)]' % \
                 (i, test_acc, test_mean_acc, best_acc, best_mean_acc)
        io.cprint(outstr)

    final_outstr = 'Final voting test acc: %.6f,' % (best_acc * 100)
    io.cprint(final_outstr)
Beispiel #5
0
                        default=1,
                        metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument('--dropout',
                        type=float,
                        default=0.0,
                        help='dropout rate')
    parser.add_argument('--model_path',
                        type=str,
                        default='',
                        metavar='N',
                        help='Pretrained model path')
    args = parser.parse_args()
    config = Config.from_json_file('config.json')
    args.feat_dim = config.n_address

    _init_()

    io = IOStream('checkpoints/' + args.exp_name + '/run.log')
    io.cprint(str(args))

    args.cuda = not args.no_cuda and torch.cuda.is_available()
    torch.manual_seed(args.seed)
    if args.cuda:
        io.cprint('Using GPU')
        torch.cuda.manual_seed(args.seed)
    else:
        torch.manual_seed(args.seed)
        io.cprint('Using CPU')

    train(args, config, io)
Beispiel #6
0
                        help='Dimension of embeddings')
    parser.add_argument('--k',
                        type=int,
                        default=20,
                        metavar='N',
                        help='Num of nearest neighbors to use')
    parser.add_argument('--model_path',
                        type=str,
                        default='',
                        metavar='N',
                        help='Pretrained model path')
    args = parser.parse_args()

    _init_()

    io = IOStream('checkpoints/' + args.exp_name + '/run.log')
    io.cprint(str(args))

    args.cuda = not args.no_cuda and torch.cuda.is_available()
    torch.manual_seed(args.seed)
    if args.cuda:
        io.cprint('Using GPU : ' + str(torch.cuda.current_device()) +
                  ' from ' + str(torch.cuda.device_count()) + ' devices')
        torch.cuda.manual_seed(args.seed)
    else:
        io.cprint('Using CPU')

    if not args.eval:
        train(args, io)
    else:
        test(args, io)
Beispiel #7
0
def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--manualSeed', type=int, help='manual seed')
    parser.add_argument('--batch_size', type=int, default=30)
    parser.add_argument('--epochs', type=int, default=241)
    parser.add_argument('--workers',
                        type=int,
                        default=6,
                        help='num of workers to load data for each DataLoader')
    parser.add_argument('--checkpoints_dir',
                        '-CDIR',
                        default='experiments_deco',
                        help='Folder where all experiments get stored')
    parser.add_argument(
        '--exp_name',
        '-EXP',
        default='exp',
        help='will create an exp_name folder under checkpoints_dir')
    parser.add_argument('--config',
                        '-C',
                        required=True,
                        help='path to valid configuration file')
    parser.add_argument('--parallel',
                        action='store_true',
                        help="Multi-GPU Training")
    parser.add_argument(
        '--it_test',
        type=int,
        default=10,
        help='at each it_test epoch: perform test and checkpoint')
    parser.add_argument('--restart_from',
                        default='',
                        help='restart interrupted training from checkpoint')
    parser.add_argument(
        '--class_choice',
        default=
        "Airplane,Bag,Cap,Car,Chair,Guitar,Lamp,Laptop,Motorbike,Mug,Pistol,Skateboard,Table",
        help='Classes to train on: default is 13 classes used in PF-Net')
    parser.add_argument(
        '--data_root',
        default=
        "/home/antonioa/data/shapenetcore_partanno_segmentation_benchmark_v0")

    # crop params
    parser.add_argument('--crop_point_num',
                        type=int,
                        default=512,
                        help='number of points to crop')
    parser.add_argument('--context_point_num',
                        type=int,
                        default=512,
                        help='number of points of the frame region')
    parser.add_argument('--num_holes',
                        type=int,
                        default=1,
                        help='number of crop_point_num holes')
    parser.add_argument(
        '--pool1_points',
        '-P1',
        type=int,
        default=1280,
        help=
        'points selected at pooling layer 1, we use 1280 in all experiments')
    parser.add_argument(
        '--pool2_points',
        '-P2',
        type=int,
        default=512,
        help=
        'points selected at pooling layer 2, should match crop_point_num i.e. 512'
    )
    # parser.add_argument('--fps_centroids', '-FPS', action='store_true', help='different crop logic than pfnet')
    parser.add_argument(
        '--raw_weight',
        '-RW',
        type=float,
        default=1,
        help=
        'weights the intermediate pred (frame reg.) loss, use 0 this to disable regularization.'
    )

    args = parser.parse_args()
    args.fps_centroids = False

    # make experiment dirs
    args.save_dir = os.path.join(args.checkpoints_dir, args.exp_name)
    args.models_dir = os.path.join(args.save_dir, 'models')
    args.vis_dir = os.path.join(args.save_dir, 'train_visz')
    safe_make_dirs([
        args.save_dir, args.models_dir, args.vis_dir,
        os.path.join(args.save_dir, 'backup_code')
    ])

    # instantiate loggers
    io_logger = IOStream(os.path.join(args.save_dir, 'log.txt'))
    tb_logger = SummaryWriter(logdir=args.save_dir)

    return args, io_logger, tb_logger
parser.add_argument('--tau',
                    type=float,
                    default=1e2,
                    help='balancing weight for loss function [default: 1e2]')
args = parser.parse_args()
args.adj_lr = {
    'steps': [int(temp) for temp in args.step],
    'decay_rates': [float(temp) for temp in args.dr]
}
args.feature_transform, args.augment = bool(args.feature_transform), bool(
    args.augment)
### Set random seed
args.seed = args.seed if args.seed > 0 else random.randint(1, 10000)
if not os.path.exists('checkpoints/' + args.lggan):
    os.mkdir('checkpoints/' + args.lggan)
io = IOStream('checkpoints/' + args.lggan + '/run.log')
io.cprint(str(args))
TAU = args.tau
ITERATION = 100

# create adversarial example path
ADV_PATH = args.adv_path
if not os.path.exists('results'): os.mkdir('results')
ADV_PATH = os.path.join('results', ADV_PATH)
if not os.path.exists(ADV_PATH): os.mkdir(ADV_PATH)
ADV_PATH = os.path.join(ADV_PATH, 'test')

NUM_CLASSES = 40


def write_h5(data, data_orig, label, label_orig, num_batches):
Beispiel #9
0
    os.makedirs(os.path.join(save_dir, 'models'))
if not os.path.exists(point_netG_saving):
    os.makedirs(point_netG_saving)
if not os.path.exists(point_netD_saving):
    os.makedirs(point_netD_saving)
if not os.path.exists(os.path.join(save_dir, 'backup-code')):
    os.makedirs(os.path.join(save_dir, 'backup-code'))
if not os.path.exists(os.path.join(save_dir, "train_visz")):
    os.makedirs(os.path.join(save_dir, "train_visz"))

filename = os.path.abspath(__file__).split('/')[-1]
os.system('cp {} {}'.format(
    os.path.abspath(__file__),
    os.path.join(save_dir, 'backup-code', '{}.backup'.format(filename))))

io = IOStream(os.path.join(save_dir, 'log.txt'))
tb = SummaryWriter(logdir=save_dir)
io.cprint("PFNet training -\n num holes: %d, cropped points around each: %d" %
          (opt.num_holes, opt.crop_point_num))
io.cprint('-' * 30)
io.cprint('Arguments: ')
io.cprint(str(opt) + '\n')

USE_CUDA = True
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
point_netG = _netG(opt.num_scales, opt.each_scales_size, opt.point_scales_list,
                   opt.crop_point_num * opt.num_holes)
if opt.D_choose == 1:
    point_netD = _netlocalD(opt.crop_point_num * opt.num_holes)
resume_epoch = 0
        torch.nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif classname.find("Conv1d") != -1:
        torch.nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif classname.find("BatchNorm2d") != -1:
        torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
        torch.nn.init.constant_(m.bias.data, 0.0)
    elif classname.find("BatchNorm1d") != -1:
        torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
        torch.nn.init.constant_(m.bias.data, 0.0)


args = parse_args()
exp_dir = os.path.join(args.checkpoints_dir, args.exp_name + '_' + str(int(time.time())))
tb_dir, models_dir = osp.join(exp_dir, "tb_logs"), osp.join(exp_dir, "models")
safe_make_dirs([tb_dir, models_dir])
io = IOStream(osp.join(exp_dir, "log.txt"))
io.cprint(f"Arguments: {str(args)} \n")
tb_writer = SummaryWriter(logdir=tb_dir)
centroids = np.asarray([[1, 0, 0], [0, 0, 1], [1, 0, 1], [-1, 0, 0], [-1, 1, 0]])  # same as PFNet

if args.num_positive_samples > 2:
    criterion = SupConLoss(temperature=args.temp, base_temperature=1, contrast_mode='all')
else:
    criterion = SimCLRLoss(temperature=args.temp)

io.cprint("Contrastive learning params: ")
io.cprint(f"criterion: {str(criterion)}")
io.cprint(f"num positive samples: {args.num_positive_samples}")
io.cprint(f"centroids cropping: {str(centroids)}")

train_transforms = transforms.Compose(
Beispiel #11
0
def main(opt):
    exp_dir = osp.join(opt.checkpoints_dir, opt.exp_name)
    tb_dir, models_dir = osp.join(exp_dir,
                                  "tb_logs"), osp.join(exp_dir, "models")
    safe_make_dirs([tb_dir, models_dir])
    io = IOStream(osp.join(exp_dir, "log.txt"))
    tb_logger = SummaryWriter(logdir=tb_dir)
    assert os.path.exists(opt.config), "wrong config path"
    with open(opt.config) as cf:
        config = json.load(cf)
    io.cprint(f"Arguments: {str(opt)}")
    io.cprint(f"Config: {str(config)} \n")

    if len(opt.class_choice) > 0:
        class_choice = ''.join(opt.class_choice.split()).split(
            ",")  # sanitize + split(",")
        io.cprint("Class choice: {}".format(str(class_choice)))
    else:
        class_choice = None

    train_dataset = PretextDataset(root=opt.data_root,
                                   task='denoise',
                                   class_choice=class_choice,
                                   npoints=config["num_points"],
                                   split='train',
                                   normalize=True,
                                   noise_mean=config["noise_mean"],
                                   noise_std=config["noise_std"])

    test_dataset = PretextDataset(root=opt.data_root,
                                  task='denoise',
                                  class_choice=class_choice,
                                  npoints=config["num_points"],
                                  split='test',
                                  normalize=True,
                                  noise_mean=config["noise_mean"],
                                  noise_std=config["noise_std"])

    train_loader = DataLoader(train_dataset,
                              batch_size=opt.batch_size,
                              shuffle=True,
                              drop_last=True,
                              num_workers=opt.workers)

    test_loader = DataLoader(test_dataset,
                             batch_size=opt.batch_size,
                             shuffle=False,
                             drop_last=False,
                             num_workers=opt.workers)

    criterion = nn.MSELoss()  # loss function for denoising
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # MODEL
    model = GPDLocalFE(config)
    if opt.parallel:
        io.cprint(
            f"DataParallel training with {torch.cuda.device_count()} GPUs")
        model = nn.DataParallel(model)

    model = model.to(device)
    io.cprint(f'model: {str(model)}')

    # OPTIMIZER + SCHEDULER
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=20,
                                                gamma=0.5)

    train_start = time.time()
    for epoch in range(opt.epochs):
        # TRAIN
        # we compute both MSE and Chamfer Distance distances between the cleaned pointcloud and the clean GT,
        # where cleaned = model(noised)
        # .. Anyway MSE is used as loss function and Chamfer Distance is just an additional metric
        ep_start = time.time()
        train_mse, train_cd = train_one_epoch(train_loader, model, optimizer,
                                              criterion, device)
        train_time = time.strftime("%M:%S",
                                   time.gmtime(time.time() - ep_start))
        io.cprint("Train %d, time: %s, MSE (loss): %.6f, CD (dist): %.6f" %
                  (epoch, train_time, train_mse, train_cd))
        tb_logger.add_scalar("Train/MSE_loss", train_mse, epoch)
        tb_logger.add_scalar("Train/CD_dist", train_cd, epoch)

        # TEST
        mse_test, cd_test = test(test_loader, model, criterion, device)
        io.cprint("Test %d, MSE (loss): %.6f, CD (dist): %.6f" %
                  (epoch, mse_test, cd_test))
        tb_logger.add_scalar("Test/MSE", mse_test, epoch)
        tb_logger.add_scalar("Test/CD", cd_test, epoch)

        # LR SCHEDULING
        scheduler.step()

        if epoch % 10 == 0:
            torch.save(
                {
                    'epoch':
                    epoch,
                    'model_state_dict':
                    model.state_dict()
                    if not opt.parallel else model.module.state_dict(),
                    'optimizer_state_dict':
                    optimizer.state_dict(),
                    'scheduler_state_dict':
                    scheduler.state_dict(),
                }, osp.join(models_dir, "local_denoise_{}.pth".format(epoch)))

    hours, rem = divmod(time.time() - train_start, 3600)
    minutes, seconds = divmod(rem, 60)
    io.cprint("Training ended in {:0>2}:{:0>2}:{:05.2f}".format(
        int(hours), int(minutes), seconds))
Beispiel #12
0
    def __init__(self,
                 actions,
                 calculate_reward,
                 get_legal_actions,
                 transition,
                 version=0,
                 load_model=True,
                 load_memories=False,
                 best=False,
                 trainer=True,
                 memories=[]):
        create_folders()

        if memories != []:
            self.memories = memories
        else:
            self.memories = []

        self.load_model = load_model
        self.load_memories = load_memories

        self.actions = actions
        self.get_legal_actions = get_legal_actions
        self.calculate_reward = calculate_reward
        self.transition = transition

        self.best = best

        self.io = IOStream("checkpoints/run.log")

        self.cuda = False
        self.models = setup_models(self.io, load_model, self.cuda, trainer)
        self.optims = setup_optims(self.models, self.cuda)
        self.version = version

        if not best:
            if load_memories and version is not "best" and memories == []:
                print("Loading Memories...")
                try:
                    self.memories = pickle.load(
                        open("checkpoints/memories.p", "rb"))
                except FileNotFoundError:
                    print("Memories not found, making new memories.")

            print("Loading History...")
            try:
                self.history = pickle.load(open("checkpoints/history.p", "rb"))
            except FileNotFoundError:
                print("Loss history not found, starting new history.")
                self.history = {
                    "readout": [],
                    "policy": [],
                    "value": [],
                    "total": []
                }

            self.best_net = MCTSnet(self.actions,
                                    self.calculate_reward,
                                    self.get_legal_actions,
                                    self.transition,
                                    self.version,
                                    self.load_model,
                                    self.load_memories,
                                    best=True,
                                    trainer=False)
Beispiel #13
0
class MCTSnet:
    def __init__(self,
                 actions,
                 calculate_reward,
                 get_legal_actions,
                 transition,
                 version=0,
                 load_model=True,
                 load_memories=False,
                 best=False,
                 trainer=True,
                 memories=[]):
        create_folders()

        if memories != []:
            self.memories = memories
        else:
            self.memories = []

        self.load_model = load_model
        self.load_memories = load_memories

        self.actions = actions
        self.get_legal_actions = get_legal_actions
        self.calculate_reward = calculate_reward
        self.transition = transition

        self.best = best

        self.io = IOStream("checkpoints/run.log")

        self.cuda = False
        self.models = setup_models(self.io, load_model, self.cuda, trainer)
        self.optims = setup_optims(self.models, self.cuda)
        self.version = version

        if not best:
            if load_memories and version is not "best" and memories == []:
                print("Loading Memories...")
                try:
                    self.memories = pickle.load(
                        open("checkpoints/memories.p", "rb"))
                except FileNotFoundError:
                    print("Memories not found, making new memories.")

            print("Loading History...")
            try:
                self.history = pickle.load(open("checkpoints/history.p", "rb"))
            except FileNotFoundError:
                print("Loss history not found, starting new history.")
                self.history = {
                    "readout": [],
                    "policy": [],
                    "value": [],
                    "total": []
                }

            self.best_net = MCTSnet(self.actions,
                                    self.calculate_reward,
                                    self.get_legal_actions,
                                    self.transition,
                                    self.version,
                                    self.load_model,
                                    self.load_memories,
                                    best=True,
                                    trainer=False)

    def choose_row(self):
        while True:
            try:
                inp = int(input("Pick a row, 1-7: "))
                inp -= 1
                return inp
            except Exception as e:
                print("Invalid choice.")

    def play_cpu(self, root_state, curr_player=0):
        eval_mode(self.models)

        root_state = np.array(root_state, dtype="float32")
        joint_state = [np.copy(root_state), np.copy(root_state)]
        results = dict()
        results["player_one"] = 0
        results["player_two"] = 0
        results["draw"] = 0
        np.set_printoptions(precision=3)
        # if (curr_player==0):
        #     first_player=True
        # else:
        #     first_player=False

        game_over = False
        joint = np.copy(joint_state)
        while not game_over:
            legal_actions = self.get_legal_actions(joint)
            if len(legal_actions) == 0:
                results["draw"] += 1
                break
            if curr_player == 0:
                joint_copy = np.copy(joint)
                blank = [["_" for _ in range(7)] for _ in range(6)]
                # dsp = np.array(blank, dtype=object)
                m1 = np.ma.masked_where(joint_copy[0] > 0, blank)
                np.ma.set_fill_value(m1, "O")
                m1 = m1.filled()
                m2 = np.ma.masked_where(joint_copy[1] > 0, m1)
                np.ma.set_fill_value(m2, "X")
                m2 = m2.filled()
                print(m2)

                row = self.choose_row()
                idx = legal_actions[row]
                action = self.actions[idx]
            else:
                pi, _ = self.run_simulations(joint, curr_player, 0)

                print(pi)

                pi = self.apply_temp_to_policy(pi, 0, T=0)

                idx = np.random.choice(len(self.actions), p=pi)

                action = self.actions[idx]

            joint[curr_player] = self.transition(joint[curr_player], action)
            reward, game_over = self.calculate_reward(joint)

            if game_over:
                if reward == -1:
                    results["player_two"] += 1
                elif reward == 1:
                    results["player_one"] += 1
            else:
                curr_player += 1
                curr_player = curr_player % 2

        print(results)

    def do_round(self,
                 results,
                 joint_state,
                 curr_player,
                 T=config.TAU,
                 record_memories=True):
        if record_memories:
            memories = []
        game_over = False
        joint = np.copy(joint_state)
        turn = 0
        while not game_over:
            turn += 1
            legal_actions = self.get_legal_actions(joint)
            if len(legal_actions) == 0:
                results["draw"] += 1
                break
            if curr_player == 0:
                pi, memory = self.run_simulations(joint, curr_player, turn)
            else:
                pi, memory = self.best_net.run_simulations(
                    joint, curr_player, turn)

            pre_temp_idx = np.random.choice(len(self.actions), p=pi)
            pi = self.apply_temp_to_policy(pi, turn, T)
            idx = np.random.choice(len(self.actions), p=pi)

            memory["readout"]["output"] = F.log_softmax(
                memory["readout"]["output"], dim=0)[pre_temp_idx]

            if record_memories:
                memories.extend([memory])

            action = self.actions[idx]

            joint[curr_player] = self.transition(joint[curr_player], action)
            reward, game_over = self.calculate_reward(joint)

            if game_over:
                if reward == -1:
                    results["player_two"] += 1
                elif reward == 1:
                    results["player_one"] += 1
            else:
                curr_player += 1
                curr_player = curr_player % 2
        if record_memories:
            for memory in memories:
                if memory["curr_player"] == 0:
                    memory["result"] = reward
                else:
                    memory["result"] = -1 * reward
            self.memories.extend(memories)

    def self_play(self,
                  root_state,
                  curr_player=0,
                  save_model=True,
                  T=config.TAU,
                  record_memories=True):
        # Consider separating the network evaluation from the games, since
        # the network evaluation will be through deterministic games
        # So we want a stochastic policy since it will see more states and be more robust
        # but we need to save the best model according to what the best deterministic policy is
        # since that is ultimately what we want.
        eval_mode(self.models)

        root_state = np.array(root_state, dtype="float32")
        joint_state = [np.copy(root_state), np.copy(root_state)]
        results = dict()
        results["player_one"] = 0
        results["player_two"] = 0
        results["draw"] = 0
        np.set_printoptions(precision=3)

        for _ in tqdm(range(config.EPISODES)):
            self.do_round(results,
                          joint_state,
                          curr_player,
                          T=T,
                          record_memories=record_memories)

        # results["player_one"] = 0
        # results["player_two"] = 0
        # results["draw"] = 0
        # for _ in tqdm(range(config.EVALUATION_EPISODES)):
        #     self.do_round(results, joint_state, curr_player,
        #                   T=0, record_memories=False)
        # print("Deterministic Results: ", results)
        if T == 0:
            name = "Deterministic"
        else:
            name = "Stochastic"
        print("{} Results: ".format(name), results)
        if save_model:
            if results["player_one"] > results[
                    "player_two"] * config.SCORING_THRESHOLD:
                self.save_best_model()
                self.best_net.models = setup_models(self.best_net.io,
                                                    self.best_net.load_model,
                                                    self.best_net.cuda,
                                                    trainer=False)
                self.best_net.optims = setup_optims(self.best_net.models,
                                                    self.best_net.cuda)

            elif results["player_two"] > results[
                    "player_one"] * config.SCORING_THRESHOLD:
                # load best model to training model
                self.models = setup_models(self.io,
                                           self.load_model,
                                           self.cuda,
                                           trainer=False)
                self.optims = setup_optims(self.models, self.cuda)

            # self.save_training_model()
        # self.memories = self.memories[-config.MAX_MEMORIES:]
        print("Num memories: {}".format(len(self.memories)))
        # Note, I am loading old memories from a bad version
        # It will eventually get overwritten, but it is a little inefficient to reference those
        return self.memories

    def save_best_model(self):
        self.io.cprint("Saving best model")
        for name, model in self.models.items():
            torch.save(model, "checkpoints/models/%s.t7" % (name + "_best"))

    def save_training_model(self):
        self.io.cprint("Saving training model")
        for name, model in self.models.items():
            torch.save(model,
                       "checkpoints/models/%s.t7" % (name + "_training"))

    def load_training_model(self):
        self.models = setup_models(self.io,
                                   self.load_model,
                                   self.cuda,
                                   trainer=True)
        self.optims = setup_optims(self.models, self.cuda)

    def save_memories(self):
        print("Saving Memories...")
        pickle.dump(self.memories, open("checkpoints/memories.p", "wb"))

    def plot_losses(self):
        plt.plot(self.history["readout"], "r")
        plt.plot(self.history["policy"], "m")
        plt.plot(self.history["value"], "c")
        plt.plot(self.history["total"], "y")
        plt.show()

    def run_simulations(self, joint_states, curr_player, turn):
        self.embeddings = dict()
        S = dict()
        A = dict()
        R = dict()
        H = dict()
        N = dict()
        game_over = False
        memory = {
            "curr_player": curr_player,
            "result": None,
            "policy": {
                "output": []
            },
            "readout": {
                "output": None
            },
            "value": {
                "output": None
            }
        }

        root_state = np.concatenate(
            (np.expand_dims(joint_states[0],
                            0), np.expand_dims(joint_states[1], 0),
             np.zeros(shape=np.expand_dims(joint_states[1], 0).shape) +
             curr_player),
            axis=0)

        def convert_to_pytorch_state(state):
            channel_one = cast_to_torch(state[0], self.cuda).unsqueeze(0)
            channel_two = cast_to_torch(state[1], self.cuda).unsqueeze(0)
            channel_three = cast_to_torch(state[2], self.cuda).unsqueeze(0)
            return torch.cat([channel_one, channel_two, channel_three],
                             0).unsqueeze(0)

        def get_state_mask(state, legal_actions):
            flattened = state[:2].flatten()
            flattened[legal_actions] = 1
            return flattened.reshape(state[0].shape)

        input_state = convert_to_pytorch_state(state)
        memory = torch.tensor(root_state.shape)
        memory = 0
        set_trace()

        for _ in range(config.MCTS_SIMS + 1):
            #consider adding a probas to do another sim and tradeoff between number of sims
            #vs performance, i.e. maximize perf minimize sims
            (exploratory_state, strongest_transition, updated_memory,
             input_state_value) = mcts(input_state, memory)

            input_state = exploratory_state
            memory = updated_memory
            # if sim < config.MCTS_SIMS:
            #     memory["strongest_transitions"].append(strongest_transition)

        #So basically I want to accumulate a bunch of moves from running the network

        legal_actions = self.get_legal_actions(root_state[:2])
        view = root_state[legal_actions]
        probas = F.softmax(view, dim=0)
        idx = np.random.choice(probas.data.numpy(), p=probas)
        log_probas = F.log_softmax(view, dim=0)
        memory["final_transition"] = strongest_transition
        memory["log_probas"] = log_probas
        memory["value"] = input_state_value
        new_state = np.copy(root_state) * get_state_mask(
            root_state, legal_actions)
        new_state[legal_actions[idx]] = 1
        return new_state

        t = 0
        #+1 sims since the first is used to expand the embedding
        for sim in range(config.MCTS_SIMS + 1):
            while True:
                try:
                    N[hashed] += 1
                except:
                    N[hashed] = 0
                    break

                legal_actions = self.get_legal_actions(S[t][:2])

                reward, game_over = self.calculate_reward(S[t][:2])

                R[t] = reward
                if len(legal_actions) == 0 or game_over:
                    game_over = True
                    break

                # consider moving the value head here and using it in the backups
                action = self.simulate(self.embeddings[hashed], S[t], sim,
                                       memory)

                A[t] = action

                new_state = self.transition(np.copy(S[t][:2][curr_player]),
                                            A[t])
                S[t + 1] = np.copy(S[t])
                S[t + 1][curr_player] = np.copy(new_state)
                t += 1
                curr_player += 1
                curr_player = curr_player % 2
                S[t][2] = curr_player
                S[t].flags.writeable = False
                hashed = hash(S[t].data.tobytes())
                S[t].flags.writeable = True

            if not game_over and len(legal_actions) > 0:
                state_one = cast_to_torch(S[t][0], self.cuda).unsqueeze(0)
                state_two = cast_to_torch(S[t][1], self.cuda).unsqueeze(0)
                state_three = cast_to_torch(S[t][2], self.cuda).unsqueeze(0)
                state = torch.cat([state_one, state_two, state_three],
                                  0).unsqueeze(0)
                self.models["emb"].eval()
                H[t] = self.embeddings[hashed] = self.models["emb"](state)

            if t > 0:
                H = self.backup(H, R, S, t, memory)
                t = 0

        self.models["readout"].eval()

        logits = self.models["readout"](H[0])

        memory["readout"]["output"] = logits

        pi = self.correct_policy(logits, joint_states, is_root=False)

        return pi, memory

    def apply_temp_to_policy(self, pi, turn, T=config.TAU):
        if turn == config.TURNS_UNTIL_TAU0 or T == 0:
            temp = np.zeros(shape=pi.shape)
            temp[np.argmax(pi)] = 1
            pi = temp
        else:
            return pi
        # T = T - ((1 / config.TURNS_UNTIL_TAU0) * (turn+1))
        # if T <= .1:
        #     T = 0
        #     temp = np.zeros(shape=pi.shape)
        #     temp[np.argmax(pi)] = 1
        #     pi = temp
        # else:
        #     pi = pi**(1 / T)
        #     pol_sum = (np.sum(pi) * 1.0)
        #     if pol_sum != 0:
        #         pi = pi / pol_sum

        return pi

    def simulate(self, emb, joint_state, sim, memory):
        emb = emb.view(1, 1, 8, 16)
        self.models["policy"].eval()
        logits, value = self.models["policy"](emb)

        if sim == 1:
            is_root = True
        else:
            is_root = False
        # might want to use uncorrected policy, idk
        pi = self.correct_policy(logits, joint_state, is_root=is_root)
        # if sim == 1:
        # I think I actually want this to be the last sim since I want the most recent
        # output from the policy net

        idx = np.random.choice(len(self.actions), p=pi)

        action = self.actions[idx]
        memory["policy"]["output"].append({
            "log_action_prob":
            F.log_softmax(logits, dim=0)[idx],
            "value":
            value,
            "is_root":
            is_root
        })

        return action

    def backup(self, H, R, S, _t, memory, is_for_inp=False):
        for t in reversed(range(_t)):
            reward = cast_to_torch([R[t]], self.cuda)
            comb_state_1 = S[t + 1][0] + S[t + 1][1]
            comb_state_2 = S[t][0] + S[t][1]
            action = comb_state_1 - comb_state_2
            action = cast_to_torch(action, self.cuda).view(-1)

            inp = torch.cat([H[t], H[t + 1], reward, action], 0)

            self.models["backup"].eval()
            H[t] = self.models["backup"](inp, H[t])

        return H

    def correct_policy(self, logits, joint_state, is_root):
        odds = np.exp(logits.data.numpy())
        policy = odds / np.sum(odds)
        if is_root:
            nu = np.random.dirichlet([config.ALPHA] * len(self.actions))
            policy = policy * (1 - config.EPSILON) + nu * config.EPSILON

        mask = np.zeros(policy.shape)
        legal_actions = self.get_legal_actions(joint_state[:2])
        mask[legal_actions] = 1
        policy = policy * mask

        pol_sum = (np.sum(policy) * 1.0)
        if pol_sum == 0:
            return policy
        else:
            return policy / pol_sum

        return policy

    def zero_grad(self):
        for _, optim in self.optims.items():
            optim.zero_grad()

    def optim_step(self):
        for _, optim in self.optims.items():
            optim.step()

    # todo: update model to use CLR and stuff
    # https://github.com/fastai/fastai/blob/master/fastai/learner.py
    # def save(self):
    #     for name, model in self.models.items():
    #         torch.save(model, "checkpoints/%s.t7" % (name + "_tmp"))
    # def load(self):
    #     for name, model in self.models.items():
    #         torch.load(model, "checkpoints/%s.t7" % (name + "_tmp"))

    # def find_lr(self):
    #     self.save()
    # layer_opt = self.get_layer_opt

    def train(self, minibatches, last_loop=False):
        for e in range(config.EPOCHS):
            last_epoch = (e == (config.EPOCHS - 1))
            if e > 0:
                shuffle(minibatches)
            read_loss_data = 0
            pol_loss_data = 0
            val_loss_data = 0
            total_loss_data = 0

            for mb in minibatches:
                self.zero_grad()

                pol_loss = 0
                val_loss = 0
                read_loss = 0
                weights = [1, 1, 1]

                num_val_losses = 0
                num_pol_losses = 0
                num_read_losses = 0

                for i, memory in enumerate(mb):
                    result = memory["result"]
                    pol_trajectories = memory["policy"]["output"]

                    for action in pol_trajectories:
                        if action["is_root"]:
                            root_value = action["value"]
                            root_log_action_prob = action["log_action_prob"]
                        else:
                            pol_loss += - \
                                action["log_action_prob"] * \
                                (result - action["value"])
                            num_pol_losses += 1
                            val_loss += F.mse_loss(
                                action["value"],
                                Variable(torch.FloatTensor(np.array([result])),
                                         volatile=True))
                            num_val_losses += 1
                    val_loss += F.mse_loss(
                        root_value,
                        Variable(torch.FloatTensor(np.array([result])),
                                 volatile=True))
                    pol_loss += -root_log_action_prob * (result - root_value)
                    read_loss += - \
                        memory["readout"]["output"]*(result - root_value)
                    num_pol_losses += 1
                    num_val_losses += 1
                    num_read_losses += 1
                val_loss = val_loss / (len(mb))
                pol_loss = pol_loss / (len(mb))
                read_loss = read_loss / (len(mb))

                total_loss = (read_loss * weights[0] + pol_loss * weights[1] +
                              val_loss * weights[2])

                read_loss_data += read_loss.data.numpy()[0] * weights[0]
                pol_loss_data += pol_loss.data.numpy()[0] * weights[1]
                val_loss_data += val_loss.data.numpy()[0] * weights[2]
                total_loss_data += total_loss.data.numpy()[0]

                # if (last_epoch):
                #     total_loss.backward(retain_graph=False)
                # else:
                #     total_loss.backward(retain_graph=True)
                total_loss.backward()
                assert (root_value.grad is not None
                        and root_log_action_prob.grad is not None
                        and mb[0]["readout"]["output"].grad is not None)
                set_trace()

                # orig_params = {}
                # for name, model in self.models.items():
                #     orig_params[name] = []
                #     for parameters in model.parameters():
                #         orig_params[name].extend([np.copy(parameters.detach().data.numpy())])

                self.optim_step()
                # for name, model in self.models.items():
                #     for i, parameters in enumerate(model.parameters()):
                #         if not (orig_params[name][i] == parameters.detach().data.numpy()).all():
                #             print(name)
                #             set_trace()
                #             test = "hi"

            read_loss_data /= len(minibatches)
            pol_loss_data /= len(minibatches)
            val_loss_data /= len(minibatches)
            total_loss_data /= len(minibatches)

            if len(self.history["readout"]) == 0:
                self.history["readout"].extend([read_loss_data])
                self.history["policy"].extend([pol_loss_data])
                self.history["value"].extend([val_loss_data])
                self.history["total"].extend([total_loss_data])
                pickle.dump(self.history, open("checkpoints/history.p", "wb"))

            elif last_loop and last_epoch and len(self.history["readout"]) > 0:
                prev_readout = self.history["readout"][-1]
                prev_policy = self.history["policy"][-1]
                prev_value = self.history["value"][-1]
                prev_total = self.history["total"][-1]
                r_sign = "" if prev_readout > read_loss_data else "+"
                p_sign = "" if prev_policy > pol_loss_data else "+"
                v_sign = "" if prev_value > val_loss_data else "+"
                t_sign = "" if prev_total > total_loss_data else "+"
                r_diff = ((read_loss_data - prev_readout) / prev_readout) * 100
                p_diff = ((pol_loss_data - prev_policy) / prev_policy) * 100
                v_diff = ((val_loss_data - prev_value) / prev_value) * 100
                t_diff = ((total_loss_data - prev_total) / prev_total) * 100

                print("readout loss: {} ({}{}%)".format(
                    np.round(read_loss_data, 4), r_sign, r_diff))
                print("policy loss: {} ({}{}%)".format(
                    np.round(pol_loss_data, 4), p_sign, p_diff))
                print("value loss: {} ({}{}%)".format(
                    np.round(val_loss_data, 4), v_sign, v_diff))
                print("total loss: {} ({}{}%)".format(
                    np.round(total_loss_data, 4), t_sign, t_diff))

                self.history["readout"].extend([read_loss_data])
                self.history["policy"].extend([pol_loss_data])
                self.history["value"].extend([val_loss_data])
                self.history["total"].extend([total_loss_data])
                pickle.dump(self.history, open("checkpoints/history.p", "wb"))

    def train_memories(self):
        train_mode(self.models)
        self.io.cprint("Training memories")

        # add a test there that takes the oldest memories, creates a minibatches with them
        # and runs one test that sees to see if the loss is changing all of the parts
        # of the network, i.e. the parameters before and after the update are different

        # https://blog.slavv.com/37-reasons-why-your-neural-network-is-not-working-4020854bd607
        # gives some hints. param update magnitudes should be 1e-3
        # if len(self.memories) > config.MIN_MEMORIES:
        # num_samples = config.NUM_SAMPLES - (config.NUM_SAMPLES%config.BATCH_SIZE)

        for i in tqdm(range(config.TRAINING_LOOPS)):
            last_loop = (i == (config.TRAINING_LOOPS - 1))
            shuffle(self.memories)
            minibatches = [self.memories[:config.BATCH_SIZE]]
            # minibatches = [
            #     data[x:x + config.BATCH_SIZE]
            #     for x in range(0, len(data), config.BATCH_SIZE)
            # ]
            self.train(minibatches, last_loop)
Beispiel #14
0
    bD = [sp.csr_matrix(s[0, ...]) for s in bD]
    bU = [sp.csr_matrix(s[0, ...]) for s in bU] 
    with open(os.path.join(args['downsample_directory'],'pai_matrices.pkl'), 'wb') as fp:
        pickle.dump([Adj, sizes, bD, bU], fp)
else: 
    print("Loading adj Matrices ..")
    with open(os.path.join(args['downsample_directory'],'pai_matrices.pkl'), 'rb') as fp:
        [Adj, sizes, bD, bU] = pickle.load(fp)

tD = [sparse_mx_to_torch_sparse_tensor(s) for s in bD]
tU = [sparse_mx_to_torch_sparse_tensor(s) for s in bU]

#%%
torch.manual_seed(args['seed'])
print(device)
io = IOStream(os.path.join(args['results_folder']) + '/run.log')
io.cprint(str(args))
#%%
# Building model, optimizer, and loss function

dataset_train = autoencoder_dataset(
    root_dir=args['data'],
    points_dataset='train',
    shapedata=shapedata,
    normalization=args['normalization'])

dataloader_train = DataLoader(
    dataset_train, batch_size=args['batch_size'],
    shuffle=args['shuffle'],
    num_workers = args['num_workers']
    )