def main(_): pp.pprint(flags.FLAGS.__flags) if not os.path.exists(FLAGS.checkpoint_dir): os.makedirs(FLAGS.checkpoint_dir) if not os.path.exists(FLAGS.sample_dir): os.makedirs(FLAGS.sample_dir) if not os.path.exists(FLAGS.output_dir): os.makedirs(FLAGS.output_dir) config = tf.ConfigProto( gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9), device_count = {'GPU': 1}, allow_soft_placement=True #log_device_placement=True, ) config.device_filters.append('/gpu:0') config.device_filters.append('/cpu:0') with tf.Session(config=config) as sess: #with tf.device('/gpu:0'): autoencoder = Autoencoder(sess, image_size=FLAGS.image_size, batch_size=FLAGS.batch_size, dataset_name=FLAGS.dataset, noise = FLAGS.noise, is_crop=FLAGS.is_crop, checkpoint_dir=FLAGS.checkpoint_dir) if FLAGS.is_train: autoencoder.train(FLAGS) elif FLAGS.is_run: autoencoder.run(FLAGS) else: autoencoder.load(FLAGS.checkpoint_dir)
trainX = np.asarray(trainX).astype("float64") / 255.0 testX = np.asarray(testX).astype("float64") / 255.0 trainX = np.reshape(trainX, (len(trainX), 64, 64, 3)) testX = np.reshape(testX, (len(testX), 64, 64, 3)) # Шумы trainNoise = np.random.normal(loc=0.5, scale=0.5, size=trainX.shape) testNoise = np.random.normal(loc=0.5, scale=0.5, size=testX.shape) trainXNoisy = np.clip(trainX + trainNoise, 0, 1) testXNoisy = np.clip(testX + testNoise, 0, 1) print("[INFO] building autoencoder...") opt = 'adadelta' autoencoder = Autoencoder().build(IMAGE_HEIGHT, IMAGE_WIDTH, 3) autoencoder.compile(loss="mse", optimizer=opt, metrics=["accuracy"]) autoencoder.summary() H = autoencoder.fit(trainXNoisy, trainX, validation_data=(testXNoisy, testX), epochs=EPOCHS, batch_size=BS) N = np.arange(0, EPOCHS) plt.style.use("ggplot") plt.figure() plt.plot(N, H.history["loss"], label="train_loss") plt.plot(N, H.history["val_loss"], label="val_loss") plt.title("Training Loss and Accuracy")
class DataLoader: def __init__(self, tokenizer, max_len, use_vae=False, batch_size=64, ae_epochs=20): self._train_set = [] self._dev_set = [] self._test_set = [] self.use_vae = use_vae self.batch_size = batch_size self.ae_latent_dim = max_len # latent dim equal to max len self.ae_epochs = ae_epochs self.train_steps = 0 self.max_len = max_len self.tokenizer = tokenizer self.tcol_info = defaultdict(dict) self.tcol = {} self.label2idx = {} self.token2cnt = defaultdict(int) self.pad = '<pad>' self.unk = '<unk>' self.autoencoder = None def init_autoencoder(self): if self.autoencoder is None: if self.use_vae: self.autoencoder = VariationalAutoencoder( latent_dim=self.ae_latent_dim, epochs=self.ae_epochs, batch_size=self.batch_size) else: self.autoencoder = Autoencoder(latent_dim=self.ae_latent_dim, epochs=self.ae_epochs, batch_size=self.batch_size) self.autoencoder._compile(self.label_size * self.max_len) def save_vocab(self, save_path): with open(save_path, 'wb') as writer: pickle.dump( { 'tcol_info': self.tcol_info, 'tcol': self.tcol, 'label2idx': self.label2idx, 'token2cnt': self.token2cnt }, writer) def load_vocab(self, save_path): with open(save_path, 'rb') as reader: obj = pickle.load(reader) for key, val in obj.items(): setattr(self, key, val) def save_autoencoder(self, save_path): self.autoencoder.autoencoder.save_weights(save_path) def load_autoencoder(self, save_path): self.init_autoencoder() self.autoencoder.autoencoder.load_weights(save_path) def set_train(self, train_path): """set train dataset""" self._train_set = self._read_data(train_path, build_vocab=True) def set_dev(self, dev_path): """set dev dataset""" self._dev_set = self._read_data(dev_path) def set_test(self, test_path): """set test dataset""" self._test_set = self._read_data(test_path) @property def train_set(self): return self._train_set @property def dev_set(self): return self._dev_set @property def test_set(self): return self._test_set @property def label_size(self): return len(self.label2idx) def save_dataset(self, setname, fpath): if setname == 'train': dataset = self.train_set elif setname == 'dev': dataset = self.dev_set elif setname == 'test': dataset = self.test_set else: raise ValueError(f'not support set {setname}') with open(fpath, 'w') as writer: for data in dataset: writer.writelines(json.dumps(data, ensure_ascii=False) + "\n") def load_dataset(self, setname, fpath): if setname not in ['train', 'dev', 'test']: raise ValueError(f'not support set {setname}') dataset = [] with open(fpath, 'r') as reader: for line in reader: dataset.append(json.loads(line.strip())) if setname == 'train': self._train_set = dataset elif setname == 'dev': self._dev_set = dataset elif setname == 'test': self._test_set = dataset def add_tcol_info(self, token, label): """ add TCoL """ if label not in self.tcol_info[token]: self.tcol_info[token][label] = 1 else: self.tcol_info[token][label] += 1 def set_tcol(self): """ set TCoL """ self.tcol[0] = np.array([0] * self.label_size) # pad self.tcol[1] = np.array([0] * self.label_size) # unk self.tcol[0] = np.reshape(self.tcol[0], (1, -1)) self.tcol[1] = np.reshape(self.tcol[1], (1, -1)) for token, label_dict in self.tcol_info.items(): vector = [0] * self.label_size for label_id, cnt in label_dict.items(): vector[label_id] = cnt / self.token2cnt[token] vector = np.array(vector) self.tcol[token] = np.reshape(vector, (1, -1)) def parse_tcol_ids(self, data, build_vocab=False): if self.use_vae: print("batch alignment...") print("previous data size:", len(data)) keep_size = len(data) // self.batch_size data = data[:keep_size * self.batch_size] print("alignment data size:", len(data)) if build_vocab: print("set tcol....") self.set_tcol() print("token size:", len(self.tcol)) print("done to set tcol...") tcol_vectors = [] for obj in data: padded = [0] * (self.max_len - len(obj['token_ids'])) token_ids = obj['token_ids'] + padded tcol_vector = np.concatenate([ self.tcol.get(token, self.tcol[1]) for token in token_ids[:self.max_len] ]) tcol_vector = np.reshape(tcol_vector, (1, -1)) tcol_vectors.append(tcol_vector) print("train vae...") if len(tcol_vectors) > 1: X = np.concatenate(tcol_vectors) else: X = tcol_vectors[0] if build_vocab: self.init_autoencoder() self.autoencoder.fit(X) X = self.autoencoder.encoder.predict(X, batch_size=self.batch_size) # decomposite assert len(X) == len(data) for x, obj in zip(X, data): obj['tcol_ids'] = x.tolist() return data def _read_data(self, fpath, build_vocab=False): data = [] with open(fpath, "r", encoding="utf-8") as reader: for line in reader: obj = json.loads(line) obj['text'] = clean_str(obj['text']) if build_vocab: if obj['label'] not in self.label2idx: self.label2idx[obj['label']] = len(self.label2idx) tokenized = self.tokenizer.encode(obj['text']) token_ids, segment_ids = tokenized.ids, tokenized.segment_ids for token in token_ids: self.token2cnt[token] += 1 self.add_tcol_info(token, self.label2idx[obj['label']]) data.append({ 'token_ids': token_ids, 'segment_ids': segment_ids, 'label_id': self.label2idx[obj['label']] }) data = self.parse_tcol_ids(data, build_vocab=build_vocab) return data
if args.eval_negative: save_label = 0 else: save_label = 1 with open(folder + '{}_{}.pkl'.format(prefix, save_label), 'wb') as f: pickle.dump(total_latent_lst, f) print("Save laten in ", folder + '{}_{}.pkl'.format(prefix, save_label)) if __name__ == '__main__': preparation() ae_model = Autoencoder(d_model=args.transformer_model_size, d_ff=args.transformer_ff_size, nlayers=args.num_layers_AE, args=args, device=device) dis_model = Attr_Classifier(latent_size=args.latent_size, output_size=args.label_size) if args.load_prev: try: ae_model.load_state_dict(torch.load(args.current_save_path / 'ae_model_params.pkl')) dis_model.load_state_dict(torch.load(args.current_save_path / 'dis_model_params.pkl')) except Exception: print("Cannot find model pkl! You need to train the model first") exit(1) if args.training: train_iters(ae_model, dis_model)
transform=transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))])), batch_size=args.batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(datasets.MNIST( '../data', train=False, transform=transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))])), batch_size=args.batch_size, shuffle=True) autoencoder = Autoencoder(args.input_size, args.output_size, args.hidden_size, bn=False) optimizer = optim.Adam(autoencoder.parameters(), lr=0.0002) criterion = nn.BCELoss() hmc = Hamiltonian(autoencoder.decoder, args.output_size, 0.1, args.num_steps_in_leap, args.num_samples) if args.cuda: autoencoder = autoencoder.cuda() for epoch in range(1, args.epochs + 1): autoencoder.train() train_loss = 0 mnist_data = list(iter(train_loader)) for batch_idx in range(0, 1000): data = torch.FloatTensor(mnist_data[batch_idx][0])
def main(): parser = argparse.ArgumentParser( description= 'py, train_data_txt, train_data_ture_txt, validation_data_txt, outdir') parser.add_argument('--train_data_txt', '-i1', default='', help='train data list') parser.add_argument('--train_ground_truth_txt', '-i2', default='', help='train ground truth list') parser.add_argument('--validation_data_txt', '-i3', default='', help='validation data list') parser.add_argument('--validation_ground_truth_txt', '-i4', default='', help='validation ground truth list') parser.add_argument('--outdir', '-i5', default='', help='outdir') args = parser.parse_args() # check folder if not (os.path.exists(args.outdir)): os.mkdir(args.outdir) # define batch_size = 3 epoch = 2500 # load train data train_data = io.load_matrix_data(args.train_data_txt, 'float32') train_data = np.expand_dims(train_data, axis=4) # load train ground truth train_truth = io.load_matrix_data(args.train_ground_truth_txt, 'float32') train_truth = np.expand_dims(train_truth, axis=4) # load validation data val_data = io.load_matrix_data(args.validation_data_txt, 'float32') val_data = np.expand_dims(val_data, axis=4) # load validation ground truth val_truth = io.load_matrix_data(args.validation_ground_truth_txt, 'float32') val_truth = np.expand_dims(val_truth, axis=4) print(' number of training: {}'.format(len(train_data))) print('size of traning: {}'.format(train_data.shape)) print(' number of validation: {}'.format(len(val_data))) print('size of validation: {}'.format(val_data.shape)) image_size = [] image_size.extend([ list(train_data.shape)[1], list(train_data.shape)[2], list(train_data.shape)[3] ]) # set network network = Autoencoder(*image_size) model = network.model() model.summary() model.compile(optimizer='Nadam', loss=losses.mean_squared_error, metrics=['mse']) # set data_set train_steps, train_data = batch_iter(train_data, train_truth, batch_size) valid_steps, val_data = batch_iter(val_data, val_truth, batch_size) # fit network model_checkpoint = ModelCheckpoint(os.path.join( args.outdir, 'weights.{epoch:02d}-{val_loss:.2f}.hdf5'), verbose=1) history = model.fit_generator(train_data, steps_per_epoch=train_steps, epochs=epoch, validation_data=val_data, validation_steps=valid_steps, verbose=1, callbacks=[model_checkpoint]) plot_history(history, args.outdir)
def __init__(self, path): self.model = Autoencoder().float() # self.model.eval() checkpoint = load_checkpoint(path) self.model.load_state_dict(checkpoint['model_state']) self.model.eval()
data_transformed = myIsomap(n_components=d).fit_transform(dist_mfd) elif method == 'LLE': data_transformed = myLocallyLinearEmbedding( n_components=d, eigen_solver='auto').fit_transform(adj_mfd, data) elif method == 'LTSA': data_transformed = myLocallyLinearEmbedding( n_components=d, eigen_solver='auto', method='ltsa').fit_transform(adj_mfd, data) elif method == 'AE': data_torch = torch.from_numpy(data).float() AE = Autoencoder(D, d) loss_fn = nn.MSELoss() optimizer = torch.optim.Adam(AE.parameters(), lr=0.1) epochs = 1000 for i in range(epochs): optimizer.zero_grad() output = AE(data_torch) loss = loss_fn(output, data_torch) loss.backward() optimizer.step() if i % 100 == 0: print('Epoch {}: {:.4f}'.format(i, loss))
def main(): parser = argparse.ArgumentParser(description="train autoencoder") parser.add_argument('--data_dir', type=str, default="MNIST_data/", help='directory of input dataset') parser.add_argument('--n_input', type=int, default=28 * 28, help='number of input nodes') parser.add_argument('--n_hidden', type=int, default=49, help='number of hidden nodes') parser.add_argument('--weight_coef', type=float, default=0.1, help='coefficient for weight decay term') parser.add_argument('--iter_max', type=int, default=100000, help='maximum number of iterations') parser.add_argument('--learning_rate', type=float, default=0.001, help='learning rate for gradient-like method') parser.add_argument('--batch_size', type=int, default=50, help='size of mini batch') parser.add_argument('--display_step', type=int, default=1000, help='how often show learning state') args = parser.parse_args() mnist = input_data.read_data_sets(args.data_dir) inputs = tf.placeholder(tf.float32, [None, args.n_input]) encoder = Autoencoder(inputs, n_input=args.n_input, n_hidden=args.n_hidden) cost = (encoder.reconst_error() + args.weight_coef * encoder.weight_decay()) optimizer = tf.train.AdamOptimizer(args.learning_rate).minimize(cost) with tf.Session() as sess: sess.run(tf.initialize_all_variables()) for i in xrange(args.iter_max): batch = mnist.train.next_batch(args.batch_size)[0] sess.run(optimizer, feed_dict={inputs: batch}) if i % args.display_step == 0: loss = sess.run(cost, feed_dict={inputs: batch}) print "step %5d, cost %f" % (i, loss) weights = sess.run(encoder.fc1w) len_ = int(args.n_hidden**0.5) assert len_**2 == args.n_hidden plt.figure() for i in xrange(args.n_hidden): plt.subplot(len_, len_, i + 1) plt.imshow(weights[:, i].reshape(28, 28), cmap='gray') plt.axis('off') plt.show()
import torch import glob import os import argparse from model import Autoencoder model = Autoencoder(2319) model.load_state_dict(torch.load('model.pt')) model = model.to('cpu') torch.save(model.state_dict(), 'model.pt')
recon_x = recon_x.cpu() w2 = float(self.weight_swd)*sliced_wasserstein_distance(z, self.distribution_fn, self.num_projections, self.p) w2 = w2.cuda() loss = l1+bce+w2 return {'loss': loss, 'bce': bce, 'l1': l1, 'w2': w2, 'encode': z, 'decode': recon_x} mnist = torch.utils.data.DataLoader(datasets.MNIST("./mnist/", train=True, download=True, transform=transforms.Compose([ transforms.ToTensor() ])), batch_size=128, shuffle=True) cudnn.benchmark = True ae = Autoencoder().cuda() print(ae) optimizer = torch.optim.Adam(ae.parameters()) total_epoch = 50 trainer = SAE(ae, optimizer, random_uniform, num_projections=25) ae.train() for epoch in range(total_epoch): for index, (img, label) in enumerate(mnist): img = img.cuda() #img = img.expand(img.data.shape[0], 3, 28, 28) batch_result = trainer.train(img)
def predict(): parser = argparse.ArgumentParser( description='py, test_data_list, name_list, outdir') parser.add_argument('--test_data_list', '-i1', default='', help='test data') parser.add_argument('--name_list', '-i2', default='', help='name list') parser.add_argument('--model', '-i3', default='', help='model') parser.add_argument('--outdir', '-i4', default='', help='outdir') args = parser.parse_args() if not (os.path.exists(args.outdir)): os.mkdir(args.outdir) # load name_list name_list = [] with open(args.name_list) as paths_file: for line in paths_file: line = line.split() if not line: continue name_list.append(line[:]) print('number of test data : {}'.format(len(name_list))) test_data = io.load_matrix_data(args.test_data_list, 'float32') test_data = np.expand_dims(test_data, axis=4) print(test_data.shape) image_size = [] image_size.extend([ list(test_data.shape)[1], list(test_data.shape)[2], list(test_data.shape)[3] ]) print(image_size) # set network network = Autoencoder(*image_size) model = network.model() model.load_weights(args.model) preds = model.predict(test_data, 1) preds = preds[:, :, :, :, 0] print(preds.shape) for i in range(preds.shape[0]): # EUDT eudt_image = sitk.GetImageFromArray(preds[i]) eudt_image.SetSpacing([1, 1, 1]) eudt_image.SetOrigin([0, 0, 0]) # label label = np.where(preds[i] > 0, 0, 1) label_image = sitk.GetImageFromArray(label) label_image.SetSpacing([1, 1, 1]) label_image.SetOrigin([0, 0, 0]) io.write_mhd_and_raw( eudt_image, '{}.mhd'.format(os.path.join(args.outdir, 'EUDT', *name_list[i]))) io.write_mhd_and_raw( label_image, '{}.mhd'.format(os.path.join(args.outdir, 'label', *name_list[i])))
img_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) train_dataset = MNIST(data_dir, train=True, download=True, transform=img_transform) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) test_dataset = MNIST(data_dir, train=False, download=True, transform=img_transform) test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True) model = Autoencoder() if use_gpu: model.cuda() criterion = nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5) loss_list = [] test_loss_list = [] for epoch in range(num_epochs + 1): # train train_loss = 0 num_iters = 0
if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('-e', '--epochs', help='Number of epochs to train for', type=int, default=3) parser.add_argument('-l', '--sample-length', help='Number of values in sample', type=int, default=500) parser.add_argument('-i', '--input-data-path', help='Where the .wav files to train are', type=str, default="input_data/") parser.add_argument('-o', '--output-save-path', help='Where to save the trained model', type=str, default="saved_models/model.pth") args = parser.parse_args() model = Autoencoder(args.sample_length) model = train(model, args.input_data_path, num_epochs=args.epochs, sample_length=args.sample_length) torch.save(model, args.output_save_path)
from torchvision.utils import save_image from model import Autoencoder from visualize import * # hyperparameters num_epochs = 100 batch_size = 128 lr = 1e-3 # get images from MNIST database dataset = MNIST('../data', transform=transforms.ToTensor(), download=True) dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True) # create autoencoder and optimizer for it autoencoder = Autoencoder() optimizer = optim.Adam(autoencoder.parameters(), lr=lr) # start training for epoch in range(num_epochs): # minibatch optimization with Adam for data in dataloader: img, _ = data # change the images to be 1D img = img.view(img.size(0), -1) # get output from network out = autoencoder(img)
x_test, y_test = load_mnist(is_train=False, flatten=True) train_cnt = int(x_train.size(0) * config.train_ratio) valid_cnt = x_train.size(0) - train_cnt # Shuffle dataset (Train - Valid) index = torch.randperm(x_train.size(0)) x_train, x_valid = torch.index_select(x_train, dim=0, index=index).split([train_cnt, valid_cnt], dim=0) y_train, y_valid = torch.index_select(y_train, dim=0, index=index).split([train_cnt, valid_cnt], dim=0) print("Train: ", x_train.shape, y_train.shape) print("Valid: ", x_valid.shape, y_valid.shape) print("Test: ", x_test.shape, y_test.shape) # Model Object & Optimizer, Criterion Settings model = Autoencoder(btl_size=config.btl_size) optimizer = optim.Adam(model.parameters()) criterion = nn.MSELoss() # Model Train trainer = Trainer(model, optimizer, criterion) trainer.train((x_train, x_train), (x_valid, x_valid), config) # Encoder - Decoder 구조이기 때문에 x 에 대한 것만 사용 # Model Test with torch.no_grad(): import random idx = int(random.random() * x_test.size(0)) recon = model(x_test[idx].view(1, -1)).squeeze()
args = parser.parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu with open(args.topology, "r") as topology: num_filters = tuple(map(int, topology.readline()[1:-1].split(', '))) files = glob.glob(os.path.join(args.file_dir, "*.png")) input_shape = (None, None, 3) # Reconstruct model from saved weights model = Autoencoder(input_shape=input_shape, num_filters=num_filters) model = model.build() print(model.summary()) model.load_weights(args.weights) model.compile(optimizer="adam", loss="MSE", metrics=["accuracy"]) # Generate time stamp for unique id of the result time_stamp = "{date:%Y-%m-%d-%H-%M-%S}".format(date=datetime.datetime.now()) # Pass images to network for file, i in zip(files, range(len(files))): inp_img = cv2.imread(file) / 255 inp_img = np.expand_dims(inp_img, axis=0)
random.shuffle(files) files = dataloader.split_files(files, train=0.8, valid=0.2) train_ds = dataloader.load_and_patch(files[0], "fit", args.patch_shape, args.n_patches, args.batch_size, args.prefetch, args.num_parallel_calls, shuffle=None, repeat=True) valid_ds = dataloader.load_and_patch(files[1], "fit", args.patch_shape, args.n_patches, args.batch_size, args.prefetch, args.num_parallel_calls, shuffle=None, repeat=True) test_ds, test_gt = dataloader.load_and_patch(test_files, "inf", num_parallel_calls=args.num_parallel_calls, batch_size=8) input_shape = (None, None, 3) model = Autoencoder(input_shape=input_shape, num_filters=num_filters) model = model.build() print(model.summary()) if args.train_continue: model.load_weights(args.weights_path) # Train the model model.compile(optimizer=optimizer, loss="MSE", metrics=['accuracy']) history = model.fit(train_ds, steps_per_epoch=500, epochs=args.n_epochs, validation_data=valid_ds, validation_steps=250,
from model import Autoencoder model = Autoencoder(True) model.sample()
class Main(Frame): def __init__( self, master, color_palette, home_path, pieces_name, pieces_path, arrows_path, images_sizes, selected_piece, pieces_padding, header_height = 60, option_width = 100, message_width = 600, message_height = 200, find_options_width = 600, find_options_height = 400, ): Frame.__init__(self, master, bg=color_palette[0]) master.rowconfigure(0, weight=1) master.columnconfigure(0, weight=1) self.main = self self.coder_set = False self.home_path = home_path self.current_pages = 0 self.pages_fens = ["rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1"] self.pieces_name = pieces_name self.pieces_path = pieces_path self.arrows_path = arrows_path self.images_sizes = images_sizes self.selected_piece = selected_piece self.pieces_padding = pieces_padding self.color_palette = color_palette self.entering = True self.follow_fen = True self.fen_placement = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR" self.fen_player = "w" self.fen_castling = "KQkq" self.header_height = header_height self.option_width = option_width self._create_widgets(message_width, message_height, find_options_width, find_options_height) self.bind("<Configure>", self._resize) self.winfo_toplevel().minsize(600, 600) self.display_fen() self.coder = None self.games = None self.store_games = None self.lichess_set = False self.coder_launcher = None self.set_fen("rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1") self.set_coder(settings.CODER_PATH) def _create_widgets(self, message_width, message_height,find_options_width, find_options_height): self.board_box = BoardBox(self) self.option_box = Options(self, self.option_width) self.header = Header(self, header_height=self.header_height) self.pgn_box = PGNOptions(self, self.option_width) self.tensor_message = TensorDisplayer(self, message_width, message_height) self.find_option = FindOptions(self,find_options_width, find_options_height) self.lichess_set_option = LichessSetOptions(self,find_options_width,find_options_height) self.board_box.grid(row=1, column=0, sticky=N + S + E + W) self.option_box.grid(row=1, column=1, sticky=N + S + E + W) self.header.grid(row=0, column=0, columnspan=2, sticky=N + S + E + W) self.rowconfigure(1, weight=1) self.columnconfigure(0, weight=1) def show_lichess_options(self): self.lichess_set_option.place(relx=0.5, rely=0.5, anchor=CENTER) self.find_option.place_forget() def set_lichess(self,name,n_games): try: with urlopen("https://lichess.org/api/games/user/{}?max={}&perfType=ultraBullet,bullet,blitz,rapid,classical,correspondence".format(name, n_games)) as pgn: self.games = pgn_games(pgn,n_games) self.lichess_set_option.place_forget() self.header.coder_label["text"]="Account set" self.follow_fen = True self.entering = False self.lichess_set = True self.set_fen() self.option_box.grid_forget() self.pgn_box.grid(row=1, column=1, sticky=N + S + E + W) self.pgn_box.set_game_number() except: self.header.display_fen("Lichess Account couldn't be set", "", "") def _resize(self, event): """Modify padding when window is resized.""" w, h = event.width, event.height self.rowconfigure(1, weight=h - self.header_height) self.columnconfigure(0, weight=w - self.option_width) def display_fen(self): self.header.display_fen(self.fen_placement, self.fen_player, self.fen_castling) if self.follow_fen: self.pages_fens[self.current_pages] = " ".join( [self.fen_placement, self.fen_player, self.fen_castling, "- 0 0"] ) def set_fen(self, fen=None): if self.entering == False: fen = self.games.board.fen() split_fen = fen.split() self.fen_placement = split_fen[0] self.fen_player = split_fen[1] self.fen_castling = split_fen[2] self.board_box.board.set_board(self.fen_placement) if self.follow_fen: self.pages_fens[self.current_pages] = fen self.display_fen() return try: a = chess.Board(fen) fen = a.fen() del a split_fen = fen.split() self.fen_placement = split_fen[0] self.fen_player = split_fen[1] self.fen_castling = split_fen[2] self.option_box.set_option(self.fen_player, self.fen_castling) self.board_box.board.set_board(self.fen_placement) self.pages_fens[self.current_pages] = fen except ValueError: self.header.display_fen("Incorrect fen", "", "") def set_coder(self, filename): try: self.coder = Autoencoder(settings.BOARD_SHAPE, settings.LATENT_SIZE).to( settings.DEVICE ) self.coder.load_state_dict(torch.load(filename, map_location=settings.DEVICE)) self.coder = self.coder.coder self.coder.eval() self.coder_launcher = Inference( settings.DEVICE, self.coder, ) self.coder_set = True self.header.coder_label["text"]="Coder Set" return True except: return False def show_find_option(self): if self.coder_set: self.find_option.place(relx=0.5, rely=0.5, anchor=CENTER) self.lichess_set_option.place_forget() else: self.header.coder_label["text"]="Set Coder first" def run_coder(self,number,comparison): if self.coder_set: if self.lichess_set: self.store_games = self.games output = str( self.coder_launcher.predict([self.pages_fens[self.current_pages]]) ) self.find_option.place_forget() self.display_tensor(output) self.games = find_similar(self.pages_fens[self.current_pages], number, similarity_functions[comparison]) self.entering = False self.follow_fen = False self.set_fen() self.option_box.grid_forget() self.pgn_box.grid(row=1, column=1, sticky=N + S + E + W) self.pgn_box.set_game_number() def exit_pgn_options(self): if self.lichess_set: if self.follow_fen : self.lichess_set = False else: self.games = self.store_games self.follow_fen = True self.set_fen() self.option_box.grid_forget() self.pgn_box.set_game_number() return self.pgn_box.grid_forget() self.option_box.grid(row=1, column=1, sticky=N + S + E + W) self.entering = True self.follow_fen = True self.set_fen(self.pages_fens[self.current_pages]) def display_tensor(self, message): self.tensor_message.set_message(message) self.tensor_message.place(relx=0.5, rely=0.5, anchor=CENTER) def stop_display_tensor(self): self.tensor_message.place_forget()
# Online run. Use dataset provided by training notebook. else: logger.info('Running in online mode...') experiment = run.experiment workspace = experiment.workspace # Prepare the datasets. dataset_train, dataset_validate, dataset_anomaly = create_datasets( workspace, experiment, run, offline_run, CONFIG) # Create the model. model = Autoencoder(family=CONFIG.MODEL_FAMILY, input_shape=(CONFIG.IMAGE_TARGET_HEIGHT, CONFIG.IMAGE_TARGET_WIDTH, CONFIG.IMAGE_TARGET_DEPTH), filters=CONFIG.FILTERS, latent_dim=CONFIG.LATENT_DIM, size=CONFIG.MODEL_SIZE) #model.summary() # Make sure that output path exists. outputs_path = "outputs" if not os.path.exists(outputs_path): os.mkdir(outputs_path) # TODO Make some checkpoints work. #best_model_path = str(DATA_DIR / f'outputs/{MODEL_CKPT_FILENAME}') #checkpoint_callback = tf.keras.callbacks.ModelCheckpoint( # filepath=best_model_path, # monitor="val_loss",
# # test_mtx=np.array(test_mtx).T # # print(test_mtx.shape) # # plt.figure(dpi=500) # # librosa.display.specshow(test_mtx) # # plt.savefig('./test_mtx.pdf') # # # org # # file_name = Info.test_path+Info.test_arr[0] # # org = np.load(Info.test_path+Info.test_arr[0]) # # org = librosa.amplitude_to_db(org, ref=1.0) # # plt.figure(dpi=500) # # librosa.display.specshow(org) # # plt.savefig('./org.pdf') # ######################################Test End################################## model = Autoencoder().cpu() distance = nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(),weight_decay=1e-5) num_epochs = Info.num_epochs print(model) ####################################Training#################################### print("Start training {} epochs.".format(num_epochs)) loss_arr=[] embed_mtx=[] decode_mtx=[] for epoch in range(num_epochs): for data in train_loader: data = Variable(data).cpu() _, output = model(data)
batch_size=batch_size, chunk_size=1, shuffle=shuffle) stream = data_io.threaded(stream, queue_size=5) return stream if __name__ == "__main__": directory = sys.argv[1] filenames = [directory + "/%05d_batched.pkl.gz" % i for i in range(9000)] # print(filenames) train_count = int(len(filenames) * 0.9) train_filenames = filenames[:train_count] valid_filenames = filenames[train_count:] model = Autoencoder(0, 1) # valid_data = torch.from_numpy(signal_data_valid).cuda()[:, None, :] for p in model.parameters(): if p.dim() > 1: torch.nn.init.xavier_uniform_(p) # model = torch.load('model.pt') model = model.cuda() parameters = model.parameters() optimizer = optim.Adam(parameters, lr=1e-3) # , weight_decay=1e-6) # optimizer = optim.SGD(parameters, lr=0.05, momentum=0.999) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=10, verbose=True,
class Searcher: """This class wraps the searcher, creating a searchespace and giving recommendations. """ def __init__(self): self.data = [] self.space_data = [] self.space_meta = [] self.files = [] self.collector = DataCollector(conf.dir_path) self.ae = Autoencoder() self.ae.load_model(conf.model_path) self.encoder = Model(self.ae.model.input, self.ae.model.layers[conf.encoder_output_layer].output) def get_feature_vector(self, spec_data): """ This generates a feature vector from provided spectrogram data using the trained encoder model. :param spec_data: spectrogram data :return: A feature vector with 1024 dimensions. """ spec_data = (librosa.power_to_db(spec_data, ref=np.max) + 80) / 80 spec_data = np.reshape(spec_data, (1,) + self.ae.shape) return self.encoder.predict(spec_data) def draw_features(self, spec_data, recon_data, feature_vector): """Draw data in parameters. :param spec_data: :param recon_data: :param feature_vector: """ if spec_data is not None: plt.figure() plt.axis('off') librosa.display.specshow(spec_data.reshape(128, 256), y_axis='mel', fmax=8000, x_axis='time') plt.show() if recon_data is not None: plt.figure() plt.axis('off') back_scaled = recon_data * 80 - 80 librosa.display.specshow(back_scaled.reshape(128, 256), y_axis='mel', fmax=8000, x_axis='time') plt.show() if feature_vector is not None: plt.figure() plt.axis('off') plt.imshow(feature_vector.reshape(16, 8 * 8)) plt.show() def create_search_space(self): """ This creates the search space for the recommendation. Uses data from DataCollector class. From this data Feature vectors are computed. These are also saved on disk. """ self.data = self.collector.collect_training_data() result = collections.defaultdict(list) for d in self.data: result[d['filename']].append(d) result_list = list(result.values()) for entry in result_list: d = entry[0] feature_vectors = [] if len(entry) > 1: for part in entry: vector = self.get_feature_vector(part['data']) feature_vectors.append(vector) vec = np.average(np.array(feature_vectors), axis=0)[0] else: vec = self.get_feature_vector(entry[0]['data']) vec = np.reshape(vec, (8, 16, 8)) d['data'] = len(self.space_data) self.space_data.append(vec / np.max(vec)) if d['filename'] not in self.files: self.files.append(d['filename']) self.space_meta.append(d) self.space_data = np.asarray(self.space_data) print('Writing search space files.') np.save(conf.dir_path + conf.space_file, self.space_data) with open(conf.dir_path + conf.space_meta_file, 'w') as f: json.dump(self.space_meta, f) def load_space(self): """ Loads a saved feature vector file. """ try: self.space_data = np.load(conf.dir_path + conf.space_file) with open(conf.dir_path + conf.space_meta_file, 'r') as f: self.space_meta = json.load(f) except FileNotFoundError: print('Required files not found, running data operations.') self.create_search_space() def compute_tsne(self, draw=False): """ Creates a 2D feature space and cann plot an image of this space. :param draw: Draws pyplot when true. """ tsne = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=300) shape = self.space_data.shape[1] * self.space_data.shape[2] * self.space_data.shape[3] data_reshaped = np.reshape(self.space_data, (self.space_data.shape[0], shape)) feat_cols = ['value'+str(i) for i in range(data_reshaped.shape[1])] df = pd.DataFrame(data_reshaped, columns=feat_cols) df['label'] = [d['label'] for d in self.space_meta] print('Fitting tsne.') tsne_results = tsne.fit_transform(df[feat_cols].values) np.save(conf.dir_path + conf.tsne_space, tsne_results) if draw: df['tsne-2d-one'] = tsne_results[:, 0] df['tsne-2d-two'] = tsne_results[:, 1] plt.figure(figsize=(16, 10)) sns.scatterplot( x='tsne-2d-one', y='tsne-2d-two', hue='label', palette=sns.color_palette("hls", len(conf.labels)), data=df, legend="full", alpha=0.3 ) plt.show() def compute_pca(self, draw=False): """ Creates a 2D feature space and can plot an image of this space. :param draw: Draws pyplot when true. """ pca = PCA(n_components=2) shape = self.space_data.shape[1] * self.space_data.shape[2] * self.space_data.shape[3] data_reshaped = np.reshape(self.space_data, (self.space_data.shape[0], shape)) feat_cols = ['value' + str(i) for i in range(data_reshaped.shape[1])] df = pd.DataFrame(data_reshaped, columns=feat_cols) df['label'] = [d['label'] for d in self.space_meta] pca_result = pca.fit_transform(df[feat_cols].values) np.save(conf.dir_path + conf.pca_space, pca_result) if draw: df['pca-one'] = pca_result[:, 0] df['pca-two'] = pca_result[:, 1] plt.figure(figsize=(16, 10)) sns.scatterplot( x="pca-one", y="pca-two", hue="label", palette=sns.color_palette("hls", len(conf.labels)), data=df, legend="full", alpha=0.3 ) plt.show() def get_recommendation(self, filename): """ This searches the recommended audio files for the provided audio sample. From the input a spectrogram is extracted and the feature vector is computed. A kd-tree is utilized to find five neighbours to the feature vector of the input. :param filename: Audio sample to find recommendations for. :return: The filenames of the five closest samples. """ results = [] splits = [] feature_vectors = [] file_path = conf.dir_path + filename amount_of_samples = conf.load_duration * conf.sample_rate audio, rate = librosa.load(file_path, sr=conf.sample_rate, res_type='kaiser_fast') if audio.size < amount_of_samples: print('File is to small.') return for i in range(0, audio.size, amount_of_samples): splits.append(audio[i:i + amount_of_samples]) if splits[-1].size < amount_of_samples: del splits[-1] for s in splits: if s.size < amount_of_samples: continue data = self.collector.extract_mel_spectrogram(s) data = data[:, :-3] results.append(data) if len(results) > 1: for part in results: vector = self.get_feature_vector(part) feature_vectors.append(vector) vec = np.average(np.array(feature_vectors), axis=0)[0] else: vec = self.get_feature_vector(results[0]) vec = np.reshape(vec, 8 * 16 * 8) vec = vec / np.max(vec) # self.draw_features(None, None, vec) pca = PCA(n_components=64) shape = self.space_data.shape[1] * self.space_data.shape[2] * self.space_data.shape[3] data_reshaped = np.reshape(self.space_data, (self.space_data.shape[0], shape)) data_reshaped = np.append(data_reshaped, [vec], axis=0) pca_result = pca.fit_transform(data_reshaped) input = pca_result[-1] pca_result = pca_result[:-1] kd_tree = cKDTree(pca_result, leafsize=100) d, index = kd_tree.query(input, k=5, distance_upper_bound=6) print('Input was ' + filename) print('Your recommended files are:') for i in index: print(self.space_meta[i]['filename']) return