def __init__(self,
                 in_path=None,
                 num_epochs=100,
                 batch_size=50,
                 learning_rate=0.0001,
                 name=None,
                 verbose=False):
        """
        This class implements the training procedure for the autoencoders.
        :param in_path: (string) the file path indicating the location of the training data.
        :param num_epochs: (int) the number of epochs.
        :param batch_size: (int) the batch size.
        :param learning_rate: (int) the learning rate for the Adam optimizer.
        :param name: (string) the name of the model (used when saving the parameters to file)
        :param verbose: (boolean) if true, the training process is printed to console
        """
        self.device = torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu')
        self.use_cuda = torch.cuda.is_available()
        self.in_path = in_path
        self.num_epochs = num_epochs
        self.batch_size = batch_size
        self.name = name

        self.net = model.AutoEncoder().cuda(
        ) if self.use_cuda else model.AutoEncoder()
        self.optimizer = torch.optim.Adam(self.net.parameters(),
                                          lr=learning_rate,
                                          weight_decay=1e-5)
        self.losses = []
        self.verbose = verbose
        self.start_epoch = 1
Exemple #2
0
def train_autoencoder():
    ae = model.AutoEncoder()

    imagenet_im_num = ae.imagenet_images.shape[0]
    autoencoder_train_op = ae.autoencoder_train_op()
    batch_size = ae.batch_size
    
    print("AUTOENCODER TRAINING...")

    with tf.Session() as session:
        session.run(tf.global_variables_initializer())

        for i in range(imagenet_im_num // batch_size):
            batch_xs = ae.imagenet_images[i * batch_size:(i + 1) * batch_size, :]

            session.run(
                [autoencoder_train_op], {
                    ae.x: batch_xs
                })

        ae_path_prefix = ae.saver.save(
            session,
            os.path.join(ae.args.auto_encoder_dir , "homework_2"))

    print("AUTOENCODER TRAINED")
    return ae_path_prefix
Exemple #3
0
    def __init__(self):
        self.epochs = 1000
        self.batch_size = 32

        self.img_size = (256, 256, 3)
        self.voxel_size = (256, 256, 256)

        AutoEncoder = model.AutoEncoder()
        #strategy = tf.distribute.MirroredStrategy()
        #with strategy.scope():
        self.autoencoder = AutoEncoder.build_autoencoder()
        self.autoencoder.compile(loss=tf.keras.losses.MeanSquaredError(),
                                 optimizer=tf.keras.optimizers.Adam(0.0002))

        print("Loading dataset...")
        self.x = np.asarray(pickle.load(open("data/images.pkl",
                                             "rb"))).repeat(64, axis=0)
        self.y = np.asarray(pickle.load(open("data/voxels.pkl",
                                             "rb"))).repeat(64, axis=0)

        self.x = self.x.astype(np.float32)
        self.y = self.y.astype(np.int32)

        self.x = self.x.reshape((-1, 3, 256, 256, 3))
        self.y = np.squeeze(self.y)

        print(self.x.shape, self.y.shape)
Exemple #4
0
def loadAE(filename=None):
    # Load AutoEncoder
    if filename is not None and os.path.isfile(filename):
        AE = torch.load(filename)
    else:
        AE = md.AutoEncoder()

    if HAVE_CUDA == True:
        AE = AE.cuda()

    return AE
Exemple #5
0
def transform_images(x=None):
	tt = transforms.ToTensor()
	if x == None:
		x = get_item_id_buckets()[0]
	if os.path.isfile(os.getcwd()+"/Checkpoints/auto_encoder2"):
		AE = torch.load(os.getcwd()+"/Checkpoints/auto_encoder2")
	else:
		AE = md.AutoEncoder()
	for item_id in x:
		print item_id
		item_image = Image.open("../../Data/Resize_images_50/"+item_id.rstrip()+".jpg")			
		item_image = ag.Variable(tt(item_image)).view(1,-1,SIDELENGTH,SIDELENGTH)
		break
	y = AE(item_image)
	print item_image
	print y
	return y
Exemple #6
0
def test(infile, outfile):
    params = dict()
    params['data_dir'] = args.path_to_train_data
    params['batch_size'] = args.batch_size
    all_outputs = []

    with open(infile, 'rb') as f:
        test_data_layer = pickle.load(f)
        params['vector_dim'] = test_data_layer[0].shape[0]
        test_data_layer = input_v2.PlaySongRecDataProvider(
            test_data_layer, params)  #Use my own input data

    print("Data loaded")
    print("Vector dim: {}".format(test_data_layer.vector_dim))
    rencoder = model.AutoEncoder(
        layer_sizes=[test_data_layer.vector_dim] +
        [int(l) for l in args.hidden_layers.split(',')],
        nl_type=args.non_linearity_type,
        is_constrained=args.constrained,
        dp_drop_prob=args.drop_prob,
        last_layer_activations=not args.skip_last_layer_nl)
    os.makedirs(args.logdir, exist_ok=True)
    model_checkpoint = args.logdir + "/model.last"
    path_to_model = Path(model_checkpoint)
    if path_to_model.is_file():
        print("Loading model from: {}".format(model_checkpoint))
        rencoder.load_state_dict(torch.load(model_checkpoint))
        print(rencoder)
    gpu_ids = [int(g) for g in args.gpu_ids.split(',')]
    print('Using GPUs: {}'.format(gpu_ids))
    if len(gpu_ids) > 1:
        rencoder = nn.DataParallel(rencoder, device_ids=gpu_ids)

    if use_gpu: rencoder = rencoder.cuda()

    rencoder.eval()
    for i, mb in enumerate(
            test_data_layer.iterate_test_epoch()):  #Work on this
        inputs = Variable(mb.cuda() if use_gpu else mb)
        outputs = rencoder(inputs)
        outputs = outputs.cpu() if use_gpu else outputs
        outputs = [out.data.numpy() for out in outputs]
        all_outputs += outputs
    print('Made', str(len(all_outputs)), 'predictions.')
    with open(args.logdir + outfile, 'wb') as f:
        pickle.dump(all_outputs, f)
def main():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    path = './Images/'
    batch_size = 1

    print("Started Training")
    dataset = dl.REcolorDataset(path)
    train_loader = DataLoader(dataset, batch_size)

    model = md.AutoEncoder()

    criterion = nn.L1Loss()
    optimizer = optim.AdamW(model.parameters(), lr=1e-3)
    num_epochs = 50

    model = model.to(device)

    tr.train(model, train_loader, criterion, optimizer, num_epochs, device)

    torch.save(model, ".temp")
Exemple #8
0
Test_Dataset = Auto_Dataset(score, 'test')

tot_score = Train_Dataset.get_all_data()
test_stds = Test_Dataset.get_all_data()

print(f'tot len: {len(tot_score)}')
print(f'test len: {len(test_stds)}')

Train_Loader = Data.DataLoader(
    dataset = Train_Dataset,
    batch_size = Batch_size,
    shuffle = True,
    num_workers = 2
)
Loss = torch.nn.MSELoss()
auto_model = model.AutoEncoder(get_init_dim())
optimizer = torch.optim.SGD(auto_model.parameters(), lr=LR, momentum=0.8)
cos = nn.CosineSimilarity(dim=0)

top_list = [1, 5, 10, 15]


for epoch in range(Epoch):
    print(f'Epoch {epoch}')
    tot_loss = 0
    for data in Train_Loader:
        _, dec = auto_model(data['input'])
        loss = Loss(data['input'], dec)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
Exemple #9
0
def main():
    if len(sys.argv) == 1 or sys.argv[1] not in ('new', 'resume'):
        print(parse_tools.top_usage, file=stderr)
        return

    print('Command line: ', ' '.join(sys.argv), file=stderr)
    stderr.flush()

    mode = sys.argv[1]
    del sys.argv[1]
    if mode == 'new':
        cold_parser = parse_tools.cold_parser()
        opts = parse_tools.two_stage_parse(cold_parser)
    elif mode == 'resume':
        resume_parser = parse_tools.resume_parser()
        opts = resume_parser.parse_args()

    opts.device = None
    if not opts.disable_cuda and torch.cuda.is_available():
        opts.device = torch.device('cuda')
        print('Using GPU', file=stderr)
    else:
        opts.device = torch.device('cpu')
        print('Using CPU', file=stderr)
    stderr.flush()

    ckpt_path = util.CheckpointPath(opts.ckpt_template)
    learning_rates = dict(
        zip(opts.learning_rate_steps, opts.learning_rate_rates))

    # Construct model
    if mode == 'new':
        # Initialize model
        pre_params = parse_tools.get_prefixed_items(vars(opts), 'pre_')
        enc_params = parse_tools.get_prefixed_items(vars(opts), 'enc_')
        bn_params = parse_tools.get_prefixed_items(vars(opts), 'bn_')
        dec_params = parse_tools.get_prefixed_items(vars(opts), 'dec_')

        # Initialize data
        data_source = data.Slice(opts.index_file_prefix,
                                 opts.max_gpu_data_bytes, opts.n_batch)

        dec_params['n_speakers'] = data_source.num_speakers()

        model = ae.AutoEncoder(pre_params, enc_params, bn_params, dec_params,
                               opts.n_sam_per_slice)
        optim = torch.optim.Adam(params=model.parameters(),
                                 lr=learning_rates[0])
        state = checkpoint.State(0, model, data_source, optim)

    else:
        state = checkpoint.State()
        state.load(opts.ckpt_file)
        state.model.set_slice_size(opts.n_sam_per_slice)
        print('Restored model, data, and optim from {}'.format(opts.ckpt_file),
              file=stderr)
        #print('Data state: {}'.format(state.data), file=stderr)
        #print('Model state: {}'.format(state.model.checksum()))
        #print('Optim state: {}'.format(state.optim_checksum()))
        stderr.flush()

    start_step = state.step

    print('Model input size: {}'.format(state.model.input_size), file=stderr)
    stderr.flush()

    # set this to zero if you want to print out a logging header in resume mode as well
    netmisc.set_print_iter(0)

    state.data.init_geometry(state.model.preprocess.rf, state.model)

    #state.data.set_geometry(opts.n_batch, state.model.input_size,
    #        state.model.output_size)
    state.to(device=opts.device)

    # Initialize optimizer
    metrics = ae.Metrics(state)
    batch_gen = state.data.batch_slice_gen_fn()

    #for p in list(state.model.encoder.parameters()):
    #    with torch.no_grad():
    #        p *= 1

    # Start training
    print('Training parameters used:', file=stderr)
    pprint(opts, stderr)

    state.init_torch_generator()

    while state.step < opts.max_steps:
        if state.step in learning_rates:
            state.update_learning_rate(learning_rates[state.step])
        # do 'pip install --upgrade scipy' if you get 'FutureWarning: ...'
        # print('in main loop')

        if state.step in (1, 10, 50, 100, 300,
                          500) and state.model.bn_type == 'vqvae':
            print('Reinitializing embed with current distribution',
                  file=stderr)
            stderr.flush()
            state.model.init_vq_embed(batch_gen)

        metrics.update(batch_gen)
        loss = metrics.state.optim.step(metrics.loss)
        avg_peak_dist = metrics.peak_dist()
        avg_max = metrics.avg_max()
        avg_prob_target = metrics.avg_prob_target()

        if False:
            for n, p in list(state.model.encoder.named_parameters()):
                g = p.grad
                if g is None:
                    print('{:60s}\tNone'.format(n), file=stderr)
                else:
                    fmt = '{:s}\t{:.5f}\t{:.5f}\t{:.5f}\t{:.5f}'
                    print(fmt.format(n, g.max(), g.min(), g.mean(), g.std()),
                          file=stderr)

        # Progress reporting
        if state.step % opts.progress_interval == 0:
            current_stats = {
                'step': state.step,
                'loss': loss,
                'tprb_m': avg_prob_target,
                'pk_d_m': avg_peak_dist
            }
            #fmt = "M\t{:d}\t{:.5f}\t{:.5f}\t{:.5f}\t{:.5f}"
            #print(fmt.format(state.step, loss, avg_prob_target, avg_peak_dist,
            #    avg_max), file=stderr)
            if state.model.bn_type == 'vqvae':
                current_stats.update(state.model.objective.metrics)

            netmisc.print_metrics(current_stats, 1000000)
            stderr.flush()

        state.step += 1

        # Checkpointing
        if ((state.step % opts.save_interval == 0 and state.step != start_step)
                or (mode == 'new' and state.step == 1)):
            ckpt_file = ckpt_path.path(state.step)
            state.save(ckpt_file)
            print('Saved checkpoint to {}'.format(ckpt_file), file=stderr)
            #print('Optim state: {}'.format(state.optim_checksum()), file=stderr)
            stderr.flush()
Exemple #10
0
    def __init__(self, mode, opts):
        print('Initializing model and data source...', end='', file=stderr)
        stderr.flush()
        self.learning_rates = dict(zip(opts.learning_rate_steps,
            opts.learning_rate_rates))
        self.opts = opts

        if mode == 'new':
            torch.manual_seed(opts.random_seed)
            pre_par = parse_tools.get_prefixed_items(vars(opts), 'pre_')
            enc_par = parse_tools.get_prefixed_items(vars(opts), 'enc_')
            bn_par = parse_tools.get_prefixed_items(vars(opts), 'bn_')
            dec_par = parse_tools.get_prefixed_items(vars(opts), 'dec_')

            # Initialize data
            jprob = dec_par.pop('jitter_prob')
            dataset = data.Slice(opts.n_batch, opts.n_win_batch, jprob,
                    pre_par['sample_rate'], pre_par['mfcc_win_sz'],
                    pre_par['mfcc_hop_sz'], pre_par['n_mels'],
                    pre_par['n_mfcc'])
            dataset.load_data(opts.dat_file)
            dec_par['n_speakers'] = dataset.num_speakers()
            model = ae.AutoEncoder(pre_par, enc_par, bn_par, dec_par,
                    dataset.num_mel_chan(), training=True)
            model.post_init(dataset)
            dataset.post_init(model)
            optim = torch.optim.Adam(params=model.parameters(), lr=self.learning_rates[0])
            self.state = checkpoint.State(0, model, dataset, optim)
            self.start_step = self.state.step

        else:
            self.state = checkpoint.State()
            self.state.load(opts.ckpt_file, opts.dat_file)
            self.start_step = self.state.step
            # print('Restored model, data, and optim from {}'.format(opts.ckpt_file), file=stderr)
            #print('Data state: {}'.format(state.data), file=stderr)
            #print('Model state: {}'.format(state.model.checksum()))
            #print('Optim state: {}'.format(state.optim_checksum()))
            stderr.flush()

        self.ckpt_path = util.CheckpointPath(self.opts.ckpt_template)
        self.quant = None
        self.target = None
        self.softmax = torch.nn.Softmax(1) # input to this is (B, Q, N)

        if self.opts.hwtype == 'GPU':
            self.device = torch.device('cuda')
            self.data_loader = self.state.data_loader
            self.data_loader.set_target_device(self.device)
            self.optim_step_fn = (lambda: self.state.optim.step(self.loss_fn))
            self.data_iter = GPULoaderIter(iter(self.data_loader))
        else:
            import torch_xla.core.xla_model as xm
            import torch_xla.distributed.parallel_loader as pl
            self.device = xm.xla_device()
            self.data_loader = pl.ParallelLoader(self.state.data_loader, [self.device])
            self.data_iter = TPULoaderIter(self.data_loader, self.device)
            self.optim_step_fn = (lambda : xm.optimizer_step(self.state.optim,
                    optimizer_args={'closure': self.loss_fn}))

        self.state.init_torch_generator()
        print('Done.', file=stderr)
        stderr.flush()
Exemple #11
0
def main():
    #logger = Logger(args.logdir)
    params = dict()
    params['data_dir'] = args.path_to_train_data
    params['batch_size'] = args.batch_size

    with open('v2/annoy-dim-256-tree-100-mincount-10/playlist_embeddings.pkl',
              'rb') as f:
        train_data_layer = pickle.load(f)
        train_data_layer = list(train_data_layer.values())
        params['vector_dim'] = train_data_layer[0].shape[0]
        train_data_layer = input_v2.PlaySongRecDataProvider(
            train_data_layer, params)  #Use my own input data

    print("Data loaded")
    print("Vector dim: {}".format(train_data_layer.vector_dim))
    '''
    print("Loading eval data")
    #eval_params = copy.deepcopy(params)
    # must set eval batch size to 1 to make sure no examples are missed
    with open('test-1000k.msgpack', 'rb') as f:
        eval_data_layer = msgpack.load(f)
        eval_data_layer = input_v2.PlaySongRecDataProvider(eval_data_layer, params, test=True)    #Use my own input data
    #eval_params['data_dir'] = args.path_to_eval_data
    eval_data_layer.src_data = train_data_layer.data
    '''
    rencoder = model.AutoEncoder(
        layer_sizes=[train_data_layer.vector_dim] +
        [int(l) for l in args.hidden_layers.split(',')],
        nl_type=args.non_linearity_type,
        is_constrained=args.constrained,
        dp_drop_prob=args.drop_prob,
        last_layer_activations=not args.skip_last_layer_nl)
    os.makedirs(args.logdir, exist_ok=True)
    model_checkpoint = args.logdir + "/model.last"
    path_to_model = Path(model_checkpoint)
    if path_to_model.is_file():
        print("Loading model from: {}".format(model_checkpoint))
        rencoder.load_state_dict(torch.load(model_checkpoint))

    print('######################################################')
    print('######################################################')
    print('############# AutoEncoder Model: #####################')
    print(rencoder)
    print('######################################################')
    print('######################################################')

    gpu_ids = [int(g) for g in args.gpu_ids.split(',')]
    print('Using GPUs: {}'.format(gpu_ids))
    if len(gpu_ids) > 1:
        rencoder = nn.DataParallel(rencoder, device_ids=gpu_ids)

    if use_gpu: rencoder = rencoder.cuda()

    if args.optimizer == "adam":
        optimizer = optim.Adam(rencoder.parameters(),
                               lr=args.lr,
                               weight_decay=args.weight_decay)
    elif args.optimizer == "adagrad":
        optimizer = optim.Adagrad(rencoder.parameters(),
                                  lr=args.lr,
                                  weight_decay=args.weight_decay)
    elif args.optimizer == "momentum":
        optimizer = optim.SGD(rencoder.parameters(),
                              lr=args.lr,
                              momentum=0.9,
                              weight_decay=args.weight_decay)
        scheduler = MultiStepLR(optimizer,
                                milestones=[24, 36, 48, 66, 72],
                                gamma=0.5)
    elif args.optimizer == "rmsprop":
        optimizer = optim.RMSprop(rencoder.parameters(),
                                  lr=args.lr,
                                  momentum=0.9,
                                  weight_decay=args.weight_decay)
    else:
        raise ValueError('Unknown optimizer kind')

    t_loss = 0.0
    t_loss_denom = 0.0
    global_step = 0

    if args.noise_prob > 0.0:
        dp = nn.Dropout(p=args.noise_prob)

    for epoch in range(args.num_epochs):
        print('Doing epoch {} of {}'.format(epoch, args.num_epochs))
        e_start_time = time.time()
        rencoder.train()
        total_epoch_loss = 0.0
        denom = 0.0
        if args.optimizer == "momentum":
            scheduler.step()
        for i, mb in enumerate(
                train_data_layer.iterate_one_epoch()):  #Work on this
            inputs = Variable(mb.cuda() if use_gpu else mb)
            optimizer.zero_grad()
            outputs = rencoder(inputs)
            loss, num_ratings = model.CosLoss(
                outputs, inputs,
                torch.FloatTensor(np.ones((1, len(mb)))).cuda())
            loss = loss / num_ratings
            loss.backward()
            optimizer.step()
            global_step += 1
            t_loss += loss.item()
            t_loss_denom += 1

            if i % args.summary_frequency == 0:
                print('Batch [%d, %5d] RMSE: %.7f' %
                      (epoch, i, t_loss / t_loss_denom))
                #logger.scalar_summary("Training_RMSE", sqrt(t_loss/t_loss_denom), global_step)
                t_loss = 0
                t_loss_denom = 0.0
                #log_var_and_grad_summaries(logger, rencoder.encode_w, global_step, "Encode_W")
                #log_var_and_grad_summaries(logger, rencoder.encode_b, global_step, "Encode_b")
                if not rencoder.is_constrained:
                    #log_var_and_grad_summaries(logger, rencoder.decode_w, global_step, "Decode_W")
                    pass
                #log_var_and_grad_summaries(logger, rencoder.decode_b, global_step, "Decode_b")

            total_epoch_loss += loss.item()
            denom += 1

            #if args.aug_step > 0 and i % args.aug_step == 0 and i > 0:
            if args.aug_step > 0:
                # Magic data augmentation trick happen here
                for t in range(args.aug_step):
                    inputs = Variable(outputs.data, volatile=True)
                    if args.noise_prob > 0.0:
                        inputs = dp(inputs)
                    optimizer.zero_grad()
                    outputs = rencoder(inputs)
                    loss, num_ratings = model.MSEloss(outputs, inputs)
                    loss = loss / num_ratings
                    loss.backward()
                    optimizer.step()
        if use_gpu:
            torch.cuda.empty_cache()
        e_end_time = time.time()
        print(
            'Total epoch {} finished in {} seconds with TRAINING RMSE loss: {}'
            .format(epoch, e_end_time - e_start_time,
                    total_epoch_loss / denom))
        #logger.scalar_summary("Training_RMSE_per_epoch", sqrt(total_epoch_loss/denom), epoch)
        #logger.scalar_summary("Epoch_time", e_end_time - e_start_time, epoch)
        '''
        if epoch % 5 == 0 or epoch == (args.num_epochs - 1):
            #Test     
            dcg_score, ndcg_score, prec_score = evaluate(rencoder, eval_data_layer)
            print('Epoch: {} : TESTING LOSS:: DCG: {}, NDCG: {}, HR: {}'.format(epoch, dcg_score, ndcg_score, prec_score))
            print("Saving model to {}".format(model_checkpoint + ".epoch_"+str(epoch)))
            torch.save(rencoder.state_dict(), model_checkpoint + ".epoch_"+str(epoch))
        '''

    print("Saving model to {}".format(model_checkpoint + ".last"))
    torch.save(rencoder.state_dict(), model_checkpoint + ".last")
Exemple #12
0
    # convert word to word_id
    util.apply_vocabs(in_file, wid_file, voc_file)

    datas = []
    with open(wid_file) as f:
        for l in f:
            xs = l.split()
            #tmpdata = xp.zeros(voc_size, dtype=xp.float32)
            tmpdata = [0 for _ in range(voc_size)]
            for x in xs:
                tmpdata[int(x)] += 1
            for x in xs:
                datas.append((tmpdata, x))

    # model setup
    mdl = model.AutoEncoder(voc_size, topic_size, hidden_size)
    # optimizer set up
    opt = chainer.optimizers.Adam()
    opt.setup(mdl)
    # use gpu
    chainer.cuda.get_device(gpu).use()
    mdl.to_gpu()

    for i in range(10):
        all_loss = 0
        #batches = create_batch(datas)
        batches = datas
        start = time.time()
        for b in batches:
            mdl.zerograds()
            loss = mdl.forward([b[0]], [b[1]])
Exemple #13
0
def main():
    if len(sys.argv) == 1 or sys.argv[1] not in ('new', 'resume'):
        print(parse_tools.top_usage, file=stderr)
        return

    mode = sys.argv[1]
    del sys.argv[1]
    if mode == 'new':
        opts = parse_tools.two_stage_parse(parse_tools.cold)
    elif mode == 'resume':
        opts = parse_tools.resume.parse_args()  

    opts.device = None
    if not opts.disable_cuda and torch.cuda.is_available():
        opts.device = torch.device('cuda')
    else:
        opts.device = torch.device('cpu') 

    ckpt_path = util.CheckpointPath(opts.ckpt_template)

    # Construct model
    if mode == 'new':
        # Initialize model
        pre_params = parse_tools.get_prefixed_items(vars(opts), 'pre_')
        enc_params = parse_tools.get_prefixed_items(vars(opts), 'enc_')
        bn_params = parse_tools.get_prefixed_items(vars(opts), 'bn_')
        dec_params = parse_tools.get_prefixed_items(vars(opts), 'dec_')

        # Initialize data
        sample_catalog = D.parse_sample_catalog(opts.sam_file)
        data = D.WavSlices(sample_catalog, pre_params['sample_rate'],
                opts.frac_permutation_use, opts.requested_wav_buf_sz)
        dec_params['n_speakers'] = data.num_speakers()

        #with torch.autograd.set_detect_anomaly(True):
        model = ae.AutoEncoder(pre_params, enc_params, bn_params, dec_params)
        print('Initializing model parameters', file=stderr)
        model.initialize_weights()

        # Construct overall state
        state = checkpoint.State(0, model, data)

    else:
        state = checkpoint.State()
        state.load(opts.ckpt_file)
        print('Restored model and data from {}'.format(opts.ckpt_file), file=stderr)

    state.model.set_geometry(opts.n_sam_per_slice)

    state.data.set_geometry(opts.n_batch, state.model.input_size,
            state.model.output_size)

    state.model.to(device=opts.device)

    #total_bytes = 0
    #for name, par in model.named_parameters():
    #    n_bytes = par.data.nelement() * par.data.element_size()
    #    total_bytes += n_bytes
    #    print(name, type(par.data), par.size(), n_bytes)
    #print('total_bytes: ', total_bytes)

    # Initialize optimizer
    model_params = state.model.parameters()
    metrics = ae.Metrics(state.model, None)
    batch_gen = state.data.batch_slice_gen_fn()

    #loss_fcn = state.model.loss_factory(state.data.batch_slice_gen_fn())

    # Start training
    print('Starting training...', file=stderr)
    print("Step\tLoss\tAvgProbTarget\tPeakDist\tAvgMax", file=stderr)
    stderr.flush()

    learning_rates = dict(zip(opts.learning_rate_steps, opts.learning_rate_rates))
    start_step = state.step
    if start_step not in learning_rates:
        ref_step = util.greatest_lower_bound(opts.learning_rate_steps, start_step)
        metrics.optim = torch.optim.Adam(params=model_params,
                lr=learning_rates[ref_step])

    while state.step < opts.max_steps:
        if state.step in learning_rates:
            metrics.optim = torch.optim.Adam(params=model_params,
                    lr=learning_rates[state.step])
        # do 'pip install --upgrade scipy' if you get 'FutureWarning: ...'
        metrics.update(batch_gen)
        loss = metrics.optim.step(metrics.loss)
        avg_peak_dist = metrics.peak_dist()
        avg_max = metrics.avg_max()
        avg_prob_target = metrics.avg_prob_target()

        # Progress reporting
        if state.step % opts.progress_interval == 0:
            fmt = "{}\t{:.5f}\t{:.5f}\t{:.5f}\t{:.5f}"
            print(fmt.format(state.step, loss, avg_prob_target, avg_peak_dist,
                avg_max), file=stderr)
            stderr.flush()

        # Checkpointing
        if state.step % opts.save_interval == 0 and state.step != start_step:
            ckpt_file = ckpt_path.path(state.step)
            state.save(ckpt_file)
            print('Saved checkpoint to {}'.format(ckpt_file), file=stderr)

        state.step += 1
Exemple #14
0
import sys
import model

from torch import optim
from torch.utils.data import DataLoader, Dataset
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

use_gpu = torch.cuda.is_available()

data_set = np.load(str(sys.argv[1]))
data = np.transpose(data_set, (0, 3, 1, 2)) / 255. * 2 - 1
data = torch.Tensor(data)

auto_encoder = model.AutoEncoder()
auto_encoder.load_state_dict(torch.load('./ae2.pth'))
if use_gpu:
  auto_encoder.cuda()
  data = data.cuda()

test_loader = DataLoader(data, batch_size=60, shuffle=False)

latents = []
reconstructs = []
          
for x in test_loader:
  latent, reconstruct = auto_encoder(x)
  latents.append(latent.cpu().detach().numpy())
  reconstructs.append(reconstruct.cpu().detach().numpy())
    
Exemple #15
0
import numpy as np

import midiHandler
import model

if __name__ == "__main__":
    features, labels = midiHandler.load_data()
    # labels not needed right now because they are all valid [1]

    shape = features.shape
    features = np.reshape(features, (shape[0], shape[2], shape[1], 1))
    print(features.shape)

    net = model.AutoEncoder()
    net.train(features[0:2], epochs=20)

    e = net.encode(features[0:1])
    # midiHandler.print_roll(features[0])
    d = net.decode(e)

    mse = np.square(np.subtract(features[0:1], d)).mean()
    print(mse)
    print(d.shape)
    d = np.squeeze(d)

    d = midiHandler.clip_velocity(d, act=0.9)
    # midiHandler.print_roll(d)
    midiHandler.single_save_roll(d)
    print(d.shape)
    # midiHandler.print_roll(d)
Exemple #16
0
def main(_):
    torch.random.manual_seed(FLAGS.seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    path = FLAGS.dataset_path
    train_path = os.path.join(path, "train.txt")
    validation_path = os.path.join(path, "valid.txt")
    test_path = os.path.join(path, "test.txt")

    if FLAGS.en_reln_mapping is 'file':
        entity2id, relation2id = data.create_mappings_for_WN9(path)
    elif FLAGS.en_reln_mapping is 'wn9':
        entity2id, relation2id = data.create_mappings(train_path, 'WN9')
    elif FLAGS.en_reln_mapping is 'fb15k':
        entity2id, relation2id = data.create_mappings(train_path, 'FB15K')
    else:
        entity2id, relation2id = data.create_mappings(train_path, 'WN9')

    #for key, value in sorted(entity2id.items(), key=lambda x: x[1]):
    #    print("{} : {}".format(key, value))   # No OOKB entityid for now
    # input("wait")

    batch_size = FLAGS.batch_size
    vector_length = FLAGS.vector_length
    margin = FLAGS.margin
    norm = FLAGS.norm
    learning_rate = FLAGS.lr
    epochs = FLAGS.epochs
    device = torch.device('cuda') if FLAGS.use_gpu else torch.device('cpu')

    # train_set = data.FB15KDataset(train_path, entity2id, relation2id)
    train_set = data.WN9Dataset(train_path, entity2id, relation2id)
    # collate_fn (callable, optional) – merges a list of samples to form a mini-batch of Tensor(s). Used when using batched loading from a map-style dataset.
    train_generator = torch_data.DataLoader(train_set,
                                            batch_size=batch_size,
                                            collate_fn=collate_fn)
    # validation_set = data.FB15KDataset(validation_path, entity2id, relation2id)
    validation_set = data.WN9Dataset(validation_path, entity2id, relation2id)
    validation_generator = torch_data.DataLoader(
        validation_set,
        batch_size=FLAGS.validation_batch_size,
        collate_fn=collate_fn)
    # test_set = data.FB15KDataset(test_path, entity2id, relation2id)
    test_set = data.WN9Dataset(test_path, entity2id, relation2id)
    test_generator = torch_data.DataLoader(
        test_set,
        batch_size=FLAGS.validation_batch_size,
        collate_fn=collate_fn)

    autoencoder = model_definition.AutoEncoder(
        entity2id,
        retrain_text_layer=FLAGS.retrain_text_layer,
        hidden_dimension=vector_length
    )  # for autoencoder embedding layer weight initialization
    model = model_definition.TransE(
        entity_count=len(entity2id),
        relation_count=len(relation2id),
        dim=vector_length,
        margin=margin,
        beta=FLAGS.beta,
        device=device,
        norm=norm,
        autoencoder=autoencoder)  # type: torch.nn.Module
    model = model.to(device)
    # optimizer = optim.SGD(model.parameters(), lr=learning_rate)
    optimizer = optim.RMSprop(model.parameters(), lr=learning_rate)

    summary_writer = tensorboard.SummaryWriter(
        log_dir=FLAGS.tensorboard_log_dir)
    start_epoch_id = 1
    step = 0
    best_score = 0.0

    if FLAGS.checkpoint_path:
        start_epoch_id, step, best_score = storage.load_checkpoint(
            FLAGS.checkpoint_path, model, optimizer)

    print(model)

    if FLAGS.test_only:
        epochs = -1
        start_epoch_id = 0
    # Training loop
    for epoch_id in range(start_epoch_id, epochs + 1):
        print("Starting epoch: ", epoch_id)
        loss_impacting_samples_count = 0
        samples_count = 0
        model.train()

        for local_heads, local_relations, local_tails in train_generator:
            # data was not well prepared, had to use collate_fn in Dataloaders to fix it, will find it above
            local_heads, local_relations, local_tails = (
                local_heads.to(device), local_relations.to(device),
                local_tails.to(device))

            positive_triples = torch.stack(
                (local_heads, local_relations, local_tails), dim=1)

            # Preparing negatives.
            # Generate binary tensor to replace either head or tail. 1 means replace head, 0 means replace tail.
            head_or_tail = torch.randint(high=2,
                                         size=local_heads.size(),
                                         device=device)
            random_entities = torch.randint(high=len(entity2id),
                                            size=local_heads.size(),
                                            device=device)
            broken_heads = torch.where(head_or_tail == 1, random_entities,
                                       local_heads)
            broken_tails = torch.where(head_or_tail == 0, random_entities,
                                       local_tails)
            negative_triples = torch.stack(
                (broken_heads, local_relations, broken_tails), dim=1)

            optimizer.zero_grad()

            loss, pd, nd = model(positive_triples, negative_triples)
            # loss.mean().backward()
            loss.backward()

            summary_writer.add_scalar('Loss/train',
                                      loss.data.cpu().numpy(),
                                      global_step=step)
            #           summary_writer.add_scalar('Loss/train', loss.mean().data.cpu().numpy(), global_step=step)
            summary_writer.add_scalar('Distance/positive',
                                      pd.sum().data.cpu().numpy(),
                                      global_step=step)
            summary_writer.add_scalar('Distance/negative',
                                      nd.sum().data.cpu().numpy(),
                                      global_step=step)

            loss = loss.data.cpu()
            #loss_impacting_samples_count += loss.nonzero().size()[0]
            #samples_count += loss.size()[0]

            optimizer.step()
            step += 1

        #summary_writer.add_scalar('Metrics/loss_impacting_samples', loss_impacting_samples_count / samples_count * 100,
        #                          global_step=epoch_id)

        if epoch_id % FLAGS.validation_freq == 0:
            model.eval()
            _, _, hits_at_10, _ = test(model=model,
                                       data_generator=validation_generator,
                                       entities_count=len(entity2id),
                                       device=device,
                                       summary_writer=summary_writer,
                                       epoch_id=epoch_id,
                                       metric_suffix="val")
            score = hits_at_10
            print(score)
            if score > best_score:
                best_score = score
                storage.save_checkpoint(model, optimizer, epoch_id, step,
                                        best_score)

    # Testing the best checkpoint on test dataset
    storage.load_checkpoint("checkpoint.tar", model, optimizer)
    best_model = model.to(device)
    best_model.eval()
    scores = test(model=best_model,
                  data_generator=test_generator,
                  entities_count=len(entity2id),
                  device=device,
                  summary_writer=summary_writer,
                  epoch_id=1,
                  metric_suffix="test")
    print("Test scores: ", scores)
Exemple #17
0
# Load the model if available
if os.path.isfile(os.getcwd() + "/Checkpoints/img_model"):
    img_model = torch.load(os.getcwd() + "/Checkpoints/img_model")
else:
    img_model = md.ExtractImageVectors(EMBEDDING_DIM)
# Load user vectors
if os.path.isfile(os.getcwd() + "/Checkpoints/user_vts"):
    user_vts = torch.load(os.getcwd() + "/Checkpoints/user_vts")
else:
    user_vts = nn.Embedding(len(users_to_ix), EMBEDDING_DIM)  #,max_norm = 1.0)

# Load AutoEncoder
if os.path.isfile(os.getcwd() + "/Checkpoints/auto_encoder"):
    AE = torch.load(os.getcwd() + "/Checkpoints/auto_encoder")
else:
    AE = md.AutoEncoder()

if os.path.isfile(os.getcwd() + "/Checkpoints/optm"):
    optimizer = torch.load(os.getcwd() + "/Checkpoints/optm")
else:
    optimizer = optim.Adam(AE.parameters(), lr=0.001)

start_time = time.time()
iterind = 0
t_loss = 0
while 1:
    iterind += 1
    # print len(data)
    # print len(itemsbin)
    # Selecting a data bucket
    i_b_no = random.randint(0, len(itemsbin) - 1)
Exemple #18
0
args.dropout = args.dropout if args.augmentation else 0.

train_data = MyData_Autoencoder.MyDataset(maskroot=args.maskpicroot,
                                          rawroot=args.rawpicroot,
                                          datatxt='wuhanlists.txt',
                                          transform=None)
data_loader = DataLoader(train_data, batch_size=args.batch_size, shuffle=True)
#test_data=MyData_Autoencoder.MyDataset(maskroot=args.maskpicroot,rawroot=args.rawpicroot,datatxt='test.txt', transform=None)
#test_loader = DataLoader(test_data, batch_size=args.batch_size, shuffle=True)
print(len(data_loader))

# ##############################################################################
# Build model
# ##############################################################################

autoencoder = model.AutoEncoder(args)
if args.Train == True:
    autoencoder._initialize_weights()
else:
    autoencoder.load_state_dict(torch.load(args.save)['model'])
if use_cuda:
    SFCNET = autoencoder.cuda()
optimizer = torch.optim.Adam(SFCNET.parameters(), lr=args.lr)

criterion = torch.nn.MSELoss()
testtransform = transforms.ToTensor()
# ##############################################################################
# Training
# ##############################################################################