Ejemplo n.º 1
0
 def run_PCA(self, num_dim, percent_variability):
     # get covariance matrix (uses compact trick)
     N = self.data_matrix.shape[0]
     data_matrix_2d = self.data_matrix.reshape(
         self.data_matrix.shape[0],
         -1).T  # flatten data instances and transpose
     mean = np.mean(data_matrix_2d, axis=1)
     centered_data_matrix_2d = (data_matrix_2d.T - mean).T
     trick_cov_matrix = np.dot(
         centered_data_matrix_2d.T,
         centered_data_matrix_2d) * 1.0 / np.sqrt(N - 1)
     # get eignevectors and eigenvalues
     eigen_values, eigen_vectors = np.linalg.eigh(trick_cov_matrix)
     eigen_vectors = np.dot(centered_data_matrix_2d, eigen_vectors)
     for i in range(N):
         eigen_vectors[:, i] = eigen_vectors[:, i] / np.linalg.norm(
             eigen_vectors[:, i])
     eigen_values = np.flip(eigen_values)
     eigen_vectors = np.flip(eigen_vectors, 1)
     # get num PCA components
     cumDst = np.cumsum(eigen_values) / np.sum(eigen_values)
     if num_dim == 0:
         cumDst = np.cumsum(eigen_values) / np.sum(eigen_values)
         num_dim = np.where(cumDst > float(percent_variability))[0][0] + 1
     W = eigen_vectors[:, :num_dim]
     PCA_scores = np.matmul(centered_data_matrix_2d.T, W)
     sw_message(f"The PCA modes of particles being retained : {num_dim}")
     sw_message(f"Variablity preserved: {str(float(cumDst[num_dim-1]))}")
     self.num_dim = num_dim
     self.PCA_scores = PCA_scores
     self.eigen_vectors = eigen_vectors
     self.eigen_values = eigen_values
     return num_dim
Ejemplo n.º 2
0
def visualizeAugmentation(data_csv, viz_type='splom', show=True):
    if viz_type == 'splom':
        Visualize.splom(data_csv)
    elif viz_type == 'violin':
        Visualize.violin(data_csv, show)
    else:
        sw_message("Error visualization type unrecognized.")
Ejemplo n.º 3
0
	def fit(self, embedded_matrix, mixture_num):
		sw_message("Fitting Gaussian mixture model...")
		self.embedded_matrix = embedded_matrix
		if mixture_num == 0:
			mixture_num = self.selectClusterNum()
		self.GMM = GaussianMixture(mixture_num, covariance_type='full', random_state=0)
		self.GMM.fit(self.embedded_matrix)
		sw_message("Gaussian mixture model converged: " + str(self.GMM.converged_))
Ejemplo n.º 4
0
	def selectClusterNum(self):
		n_components = np.arange(1, self.embedded_matrix.shape[1])
		models = [GaussianMixture(n, covariance_type='full', random_state=0).fit(self.embedded_matrix) for n in n_components]
		bic_min_index = np.argmin(np.array([m.bic(self.embedded_matrix) for m in models]))
		aic_min_index = np.argmin(np.array([m.aic(self.embedded_matrix) for m in models]))
		avg_index = int((bic_min_index + aic_min_index) / 2)
		mixture_num = n_components[avg_index]
		sw_message("Using " + str(mixture_num) + " components.")
		return mixture_num
Ejemplo n.º 5
0
def get_test_loader(loader_dir, test_img_list, down_factor=1, down_dir=None):
    sw_message("Creating test torch loader:")
    # get data
    image_paths = []
    scores = []
    models = []
    test_names = []
    for index in range(len(test_img_list)):
        image_path = test_img_list[index]
        # add name
        prefix = get_prefix(image_path)
        test_names.append(prefix)
        image_paths.append(image_path)
        # add label placeholders
        scores.append([1])
        models.append([1])
    images = get_images(loader_dir, image_paths, down_factor, down_dir)
    test_data = DeepSSMdataset(images, scores, models)
    # Write test names to file so they are saved somewhere
    name_file = open(loader_dir + 'test_names.txt', 'w+')
    name_file.write(str(test_names))
    name_file.close()
    sw_message("Test names saved to: " + loader_dir + "test_names.txt")
    # Make loader
    sw_message("Creating and saving test dataloader...")
    testloader = DataLoader(test_data,
                            batch_size=1,
                            shuffle=False,
                            num_workers=8,
                            pin_memory=torch.cuda.is_available())
    test_path = loader_dir + 'test'
    torch.save(testloader, test_path)
    sw_message("Test loader complete.\n")
    return test_path, test_names
Ejemplo n.º 6
0
	def fit(self, embedded_matrix):
		sw_message("Fitting KDE...")
		self.embedded_matrix = embedded_matrix
		# get sigma squared
		nearest_neighbor_dists = []
		cov = np.cov(embedded_matrix.T)
		for i in embedded_matrix:
			smallest = np.Inf
			for j in embedded_matrix:
				dist = Mdist(i,j,cov)
				if dist < smallest and dist != 0:
					smallest = dist
			nearest_neighbor_dists.append(smallest)
		self.sigma_squared = np.mean(np.array(nearest_neighbor_dists))/embedded_matrix.shape[1]
Ejemplo n.º 7
0
def compute_pvalues_for_group_difference_data(group_0_data,
                                              group_1_data,
                                              permutations=100):
    number_of_particles = group_0_data.shape[0]
    group_0_size = group_0_data.shape[-1]
    group_1_size = group_1_data.shape[-1]
    subset_size = min(group_0_size, group_1_size)

    pvalues_matrix = np.zeros((number_of_particles, permutations))
    idx = 0
    for p in range(permutations):
        sw_progress(float(idx) / permutations)
        idx = idx + 1
        if sw_check_abort():
            sw_message("Aborted")
            return

        group_0_index = np.random.choice(group_0_size,
                                         subset_size,
                                         replace=False)
        group_1_index = np.random.choice(group_1_size,
                                         subset_size,
                                         replace=False)

        group_0_subset = group_0_data[:, :, group_0_index]
        group_1_subset = group_1_data[:, :, group_1_index]

        for particle_id in range(number_of_particles):
            x = group_0_subset[particle_id, :, :].T
            y = group_1_subset[particle_id, :, :].T

            stats, f, pv, s = hotelling_t2(x, y)
            pvalues_matrix[particle_id, p] = pv
    corrected_pvalue_matrix = np.zeros((number_of_particles, 1))
    for particle_id in range(number_of_particles):
        r, pval = multi.fdrcorrection(pvalues_matrix[particle_id, :],
                                      alpha=0.05)
        corrected_pvalue_matrix[particle_id, 0] = np.mean(pval)
    return corrected_pvalue_matrix
Ejemplo n.º 8
0
def get_all_train_data(loader_dir, data_csv, down_factor, down_dir):
    # get all data and targets
    sw_message("Reading all data...")
    image_paths = []
    scores = []
    models = []
    prefixes = []
    with open(data_csv, newline='') as csvfile:
        datareader = csv.reader(csvfile)
        index = 0
        for row in datareader:
            image_path = row[0]
            model_path = row[1]
            pca_scores = row[2:]
            # add name
            prefix = get_prefix(image_path)
            # data error check
            if prefix not in get_prefix(model_path):
                print("Error: Images and models mismatched in csv.")
                print(index)
                print(prefix)
                print(get_prefix(model_path))
                exit()
            prefixes.append(prefix)
            # add image path
            image_paths.append(image_path)
            # add score (un-normalized)
            pca_scores = [float(i) for i in pca_scores]
            scores.append(pca_scores)
            # add model
            mdl = get_particles(model_path)
            models.append(mdl)
            index += 1
    images = get_images(loader_dir, image_paths, down_factor, down_dir)
    scores = whiten_PCA_scores(scores, loader_dir)
    return images, scores, models, prefixes
Ejemplo n.º 9
0
def get_train_val_loaders(loader_dir,
                          data_csv,
                          batch_size=1,
                          down_factor=1,
                          down_dir=None,
                          train_split=0.80):
    sw_message("Creating training and validation torch loaders:")
    if not os.path.exists(loader_dir):
        os.makedirs(loader_dir)
    images, scores, models, prefixes = get_all_train_data(
        loader_dir, data_csv, down_factor, down_dir)
    images, scores, models, prefixes = shuffle_data(images, scores, models,
                                                    prefixes)
    # split into train and validation (e.g. 80% vs 20%)
    cut = int(len(images) * train_split)
    sw_message("Turning to tensors...")
    train_data = DeepSSMdataset(images[:cut], scores[:cut], models[:cut])
    sw_message(str(len(train_data)) + ' in training set')
    val_data = DeepSSMdataset(images[cut:], scores[cut:], models[cut:])
    sw_message(str(len(val_data)) + ' in validation set')

    sw_message("Saving data loaders...")
    trainloader = DataLoader(train_data,
                             batch_size=batch_size,
                             shuffle=True,
                             num_workers=8,
                             pin_memory=torch.cuda.is_available())
    train_path = loader_dir + 'train'
    torch.save(trainloader, train_path)

    validationloader = DataLoader(val_data,
                                  batch_size=1,
                                  shuffle=True,
                                  num_workers=8,
                                  pin_memory=torch.cuda.is_available())
    val_path = loader_dir + 'validation'
    torch.save(validationloader, val_path)
    sw_message("Training and validation loaders complete.\n")
    return train_path, val_path
Ejemplo n.º 10
0
def shuffle_data(images, scores, models, prefixes):
    sw_message("Shuffling.")
    c = list(zip(images, scores, models, prefixes))
    random.shuffle(c)
    images, scores, models, prefixes = zip(*c)
    return images, scores, models, prefixes
Ejemplo n.º 11
0
def test(config_file):
    with open(config_file) as json_file:
        parameters = json.load(json_file)
    model_dir = parameters["paths"]["out_dir"] + parameters["model_name"] + '/'
    pred_dir = model_dir + 'predictions/'
    if not os.path.exists(pred_dir):
        os.makedirs(pred_dir)
    if parameters["use_best_model"]:
        model_path = model_dir + 'best_model.torch'
    else:
        model_path = model_dir + 'final_model.torch'
    if parameters["fine_tune"]["enabled"]:
        model_path_ft = model_path.replace(".torch", "_ft.torch")
    else:
        model_path_ft = model_path
    loader_dir = parameters["paths"]["loader_dir"]

    # load the loaders
    sw_message("Loading test data loader...")
    test_loader = torch.load(loader_dir + "test")
    print("Done.\n")
    # initalizations
    sw_message("Loading trained model...")
    model_pca = model.DeepSSMNet(config_file)
    model_pca.load_state_dict(torch.load(model_path))
    device = model_pca.device
    model_pca.to(device)
    model_pca.eval()
    model_ft = model.DeepSSMNet(config_file)
    model_ft.load_state_dict(torch.load(model_path_ft))
    model_ft.to(device)
    model_ft.eval()

    # Get test names
    test_names_file = loader_dir + 'test_names.txt'
    f = open(test_names_file, 'r')
    test_names_string = f.read()
    f.close()
    test_names_string = test_names_string.replace("[", "").replace(
        "]", "").replace("'", "").replace(" ", "")
    test_names = test_names_string.split(",")
    print("Done.\n")
    sw_message("Predicting for test images...")
    index = 0
    pred_scores = []
    predPath_ft = pred_dir + '/FT_Predictions'
    if not os.path.exists(predPath_ft):
        os.makedirs(predPath_ft)
    predPath_pca = pred_dir + '/PCA_Predictions'
    if not os.path.exists(predPath_pca):
        os.makedirs(predPath_pca)

    for img, pca, mdl in test_loader:
        if sw_check_abort():
            sw_message("Aborted")
            return
        sw_message(f"Predicting {index+1}/{len(test_loader)}")
        sw_progress((index + 1) / len(test_loader))

        img = img.to(device)
        [pred, pred_mdl_pca] = model_pca(img)
        [pred, pred_mdl_ft] = model_ft(img)
        pred_scores.append(pred.cpu().data.numpy()[0])
        nmpred = predPath_pca + '/predicted_pca_' + test_names[
            index] + '.particles'
        np.savetxt(nmpred, pred_mdl_pca.squeeze().detach().cpu().numpy())
        nmpred = predPath_ft + '/predicted_ft_' + test_names[
            index] + '.particles'
        np.savetxt(nmpred, pred_mdl_ft.squeeze().detach().cpu().numpy())
        index += 1
        print("Predicted test " + str(index) + ".")
    print("Done.\n")
    return pred_dir
Ejemplo n.º 12
0
def supervised_train(config_file):
	with open(config_file) as json_file: 
		parameters = json.load(json_file)
	model_dir = parameters['paths']['out_dir'] + parameters['model_name'] + '/'
	if not os.path.exists(model_dir):
		os.makedirs(model_dir)
	loader_dir = parameters['paths']['loader_dir']
	aug_dir = parameters['paths']['aug_dir']
	num_epochs = parameters['trainer']['epochs']
	learning_rate = parameters['trainer']['learning_rate']
	eval_freq = parameters['trainer']['val_freq']
	decay_lr = parameters['trainer']['decay_lr']
	fine_tune = parameters['fine_tune']['enabled']
	loss_func = method_to_call = getattr(losses, parameters["loss"]["function"])
	# load the loaders
	train_loader_path = loader_dir + "train"
	validation_loader_path = loader_dir + "validation"
	print("Loading data loaders...")
	train_loader = torch.load(train_loader_path)
	val_loader = torch.load(validation_loader_path)
	print("Done.")
	# initializations
	num_pca = train_loader.dataset.pca_target[0].shape[0]
	num_corr = train_loader.dataset.mdl_target[0].shape[0]
	print("Defining net...")
	net = model.DeepSSMNet(config_file)
	device = net.device
	net.to(device)
	# initialize model weights
	net.apply(weight_init(module=nn.Conv2d, initf=nn.init.xavier_normal_))	
	net.apply(weight_init(module=nn.Linear, initf=nn.init.xavier_normal_))

	# these lines are for the fine tuning layer initialization
	whiten_mean = np.load(loader_dir + '/mean_PCA.npy')
	whiten_std = np.load(loader_dir + '/std_PCA.npy')
	orig_mean = np.loadtxt(aug_dir + '/PCA_Particle_Info/mean.particles')
	orig_pc = np.zeros([num_pca, num_corr*3])
	for i in range(num_pca):
		temp = np.loadtxt(aug_dir + '/PCA_Particle_Info/pcamode' + str(i) + '.particles')
		orig_pc[i, :] = temp.flatten()
	
	bias = torch.from_numpy(orig_mean.flatten()).to(device) # load the mean here
	weight = torch.from_numpy(orig_pc.T).to(device) # load the PCA vectors here
	net.decoder.fc_fine.bias.data.copy_(bias)
	net.decoder.fc_fine.weight.data.copy_(weight)

	# define optimizer
	# for the initial steps set the gradient of the final layer to be zero
	for param in net.decoder.fc_fine.parameters():
		param.requires_grad = False
	
	train_params = net.parameters()
	opt = torch.optim.Adam(train_params, learning_rate)
	opt.zero_grad()
	scheduler = StepLR(opt, step_size=1, gamma=0.99)
	print("Done.")
	# train
	print("Beginning training on device = " + device + '\n')
	# Initialize logger
	logger = open(model_dir + "train_log.csv", "w+", buffering=1)
	log_print(logger, ["Epoch", "LR", "Train_Err", "Train_Rel_Err", "Val_Err", "Val_Rel_Err", "Sec"])
	# Initialize training plot
	train_plot = plt.figure()
	axe = train_plot.add_subplot(111)
	axe.set_title('DeepSSM Training')
	sp_train, = axe.plot([],[],label='Training',ms=10,color='b',marker='o',ls='')
	sp_val, = axe.plot([],[],label='Validation',ms=10,color='r',marker='o',ls='')
	axe.set_xlabel('Epochs')
	axe.set_xlim(0,num_epochs+1)
	axe.set_ylabel('PCA MSE')
	axe.legend()
	train_plot.savefig(model_dir + "training_plot.png", dpi=300)
	# initialize
	epochs = []
	plot_train_losses = []
	plot_val_losses = []
	t0 = time.time()
	best_val_rel_error = np.Inf
	for e in range(1, num_epochs + 1):
		if sw_check_abort():
			sw_message("Aborted")
			return
		sw_message(f"Epoch {e}/{num_epochs}")
		sw_progress(e / (num_epochs+1))

		torch.cuda.empty_cache()
		# train
		net.train()
		train_losses = []
		train_rel_losses = []
		pred_particles = []
		true_particles = []
		for img, pca, mdl in train_loader:
			opt.zero_grad()
			img = img.to(device)
			pca = pca.to(device)
			[pred_pca, pred_mdl] = net(img)
			loss = loss_func(pred_pca, pca)
			loss.backward()
			opt.step()
			train_losses.append(loss.item())
			train_rel_loss = loss_func(pred_pca, pca) / loss_func(pred_pca*0, pca)
			train_rel_losses.append(train_rel_loss.item())
			pred_particles.extend(pred_mdl.detach().cpu().numpy())
			true_particles.extend(mdl.detach().cpu().numpy())
		train_viz.write_examples(np.array(pred_particles), np.array(true_particles), model_dir + "examples/train_")
		# test validation
		pred_particles = []
		true_particles = []
		if ((e % eval_freq) == 0 or e == 1):
			net.eval()
			val_losses = []
			val_rel_losses = []
			for img, pca, mdl in val_loader:
				opt.zero_grad()
				img = img.to(device)
				pca = pca.to(device)
				[pred_pca, pred_mdl] = net(img)
				v_loss = loss_func(pred_pca, pca)
				val_losses.append(v_loss.item())
				val_rel_loss = loss_func(pred_pca, pca) / loss_func(pred_pca*0, pca)
				val_rel_losses.append(val_rel_loss.item())
				pred_particles.extend(pred_mdl.detach().cpu().numpy())
				true_particles.extend(mdl.detach().cpu().numpy())
			train_viz.write_examples(np.array(pred_particles), np.array(true_particles), model_dir + "examples/validation_")
			# log
			train_mr_MSE = np.mean(np.sqrt(train_losses))
			val_mr_MSE = np.mean(np.sqrt(val_losses))
			train_rel_err = np.mean(train_rel_losses)
			val_rel_err =  np.mean(val_rel_losses)
			log_print(logger, [e, scheduler.get_lr()[0], train_mr_MSE, train_rel_err, val_mr_MSE, val_rel_err, time.time()-t0])
			# plot
			epochs.append(e)
			plot_train_losses.append(train_mr_MSE)
			plot_val_losses.append(val_mr_MSE)
			sp_train.set_data(epochs, plot_train_losses)
			sp_val.set_data(epochs, plot_val_losses)
			axe.set_ylim(0,max(max(plot_train_losses), max(plot_val_losses))+3)
			train_plot.canvas.draw()
			train_plot.savefig(model_dir + "training_plot.png")
			# save
			if val_rel_err < best_val_rel_error:
				best_val_rel_error = val_rel_err
				best_epoch = e
				torch.save(net.state_dict(), os.path.join(model_dir, 'best_model.torch'))
			t0 = time.time()
		if decay_lr:
			scheduler.step()
	logger.close()

	torch.save(net.state_dict(), os.path.join(model_dir, 'final_model.torch'))
	parameters['best_model_epochs'] = best_epoch
	with open(config_file, "w") as json_file:
		json.dump(parameters, json_file, indent=2) 
	print("Training complete, model saved. Best model after epoch " + str(best_epoch))

	# now commence the fine tuning model if present on best model
	if fine_tune:
		print("Beginning fine tuning training step on device = ", device)
		net = model.DeepSSMNet(config_file)
		if parameters["use_best_model"]:
			model_path = os.path.join(model_dir, 'best_model.torch')
		else:
			model_path = os.path.join(model_dir, 'final_model.torch')
		net.load_state_dict(torch.load(model_path))
		net.to(device)
		logger = open(model_dir + "train_log_ft.csv", "w+")
		log_print(logger, ["Epoch", "Train_Err_mdl", "Train_Rel_Err_mdl", "Val_Err_mdl", "Val_Rel_Err_mdl", "Sec"])
		ft_epochs = parameters['fine_tune']['epochs']
		learning_rate = parameters['fine_tune']['learning_rate']
		eval_freq = parameters['fine_tune']['val_freq']
		decay_lr = parameters['fine_tune']['decay_lr']
		loss_func = method_to_call = getattr(losses, parameters['fine_tune']["loss"])
		# free the last params
		for param in net.decoder.fc_fine.parameters():
			param.requires_grad=True
		# train on the corr loss
		best_ft_val_rel_error = np.Inf
		for e in range(1, ft_epochs + 1):
			if sw_check_abort():
				sw_message("Aborted")
				return
			sw_message(f"Fine Tuning, Epoch {e}/{ft_epochs}")
			sw_progress(e / (num_epochs+1))

			torch.cuda.empty_cache()
			# train
			net.train()
			train_losses = []
			train_rel_losses = []
			pred_particles = []
			true_particles = []
			for img, pca, mdl in train_loader:
				opt.zero_grad()
				img = img.to(device)
				mdl = mdl.to(device)
				[pred_pca, pred_mdl] = net(img)
				loss = torch.mean((pred_mdl - mdl)**2)
				loss.backward()
				opt.step()
				train_losses.append(loss.item())
				train_rel_loss = F.mse_loss(pred_mdl, mdl) / F.mse_loss(pred_mdl*0, mdl)
				train_rel_losses.append(train_rel_loss.item())
				pred_particles.extend(pred_mdl.detach().cpu().numpy())
				true_particles.extend(mdl.detach().cpu().numpy())
			train_viz.write_examples(np.array(pred_particles), np.array(true_particles), model_dir + "examples/train_")
			# test validation
			pred_particles = []
			true_particles = []
			if ((e % eval_freq) == 0 or e == 1):
				net.eval()
				val_losses = []
				val_rel_losses = []
				for img, pca, mdl in val_loader:
					opt.zero_grad()
					img = img.to(device)
					mdl = mdl.to(device)
					[pred_pca, pred_mdl] = net(img)
					v_loss = torch.mean((pred_mdl - mdl)**2)
					val_losses.append(v_loss.item())
					val_rel_loss = (F.mse_loss(pred_mdl, mdl) / F.mse_loss(pred_mdl*0, mdl)).item()
					val_rel_losses.append(val_rel_loss)
					if val_rel_loss < best_ft_val_rel_error:
						best_ft_val_rel_error = val_rel_loss
						best_ft_epoch = e
						torch.save(net.state_dict(), os.path.join(model_dir, 'best_model_ft.torch'))
					pred_particles.extend(pred_mdl.detach().cpu().numpy())
					true_particles.extend(mdl.detach().cpu().numpy())
				train_viz.write_examples(np.array(pred_particles), np.array(true_particles), model_dir + "examples/validation_")
				# log
				train_mr_MSE = np.mean(np.sqrt(train_losses))
				val_mr_MSE = np.mean(np.sqrt(val_losses))
				train_rel_err = np.mean(train_rel_losses)
				val_rel_err =  np.mean(val_rel_losses)
				log_print(logger, [e, train_mr_MSE, train_rel_err, val_mr_MSE, val_rel_err, time.time()-t0])
				t0 = time.time()
		
		logger.close()
		torch.save(net.state_dict(), os.path.join(model_dir, 'final_model_ft.torch'))
		parameters['best_ft_model_epochs'] = best_ft_epoch
		with open(config_file, "w") as json_file:
			json.dump(parameters, json_file, indent=2) 
		print("Fine tuning complete, model saved. Best model after epoch " + str(best_ft_epoch))
Ejemplo n.º 13
0
def runDataAugmentation(out_dir, img_list, local_point_list, num_samples=3, num_dim=0, percent_variability=0.95, sampler_type="KDE", mixture_num=0, processes=1, world_point_list=None):
    sw_message("Running point based data augmentation.")
    num_dim = DataAugmentation.point_based_aug(out_dir, img_list, local_point_list, num_samples, num_dim, percent_variability, sampler_type, mixture_num, processes, world_point_list)
    sw_message("Done.")
    return num_dim
Ejemplo n.º 14
0
def point_based_aug(out_dir, orig_img_list, orig_point_list, num_samples, num_dim=0, percent_variability=0.95, sampler_type="kde", mixture_num=0, processes=1, orig_world_point_list=None):
	# Get Embedder
	point_matrix = Utils.create_data_matrix(orig_point_list)
	if orig_world_point_list is not None:
		world_point_matrix = Utils.create_data_matrix(orig_world_point_list)
		world_get_local = [Utils.estimate_homogeneous_similar_transform(
			x=p.reshape((-1,3)).T,
			y=point_matrix[i].reshape((-1,3)).T,
		) for i,p in enumerate(world_point_matrix)]
		world_get_local_info = {
			'world_get_local_list':[
				{
					'homogeneous_transformation': m.tolist(),
					'origi_world_point': orig_world_point_list[i],
					'origi_local_point': orig_point_list[i],
				} for i, m in enumerate(world_get_local)
			],
			'generated_particle__world_get_local_index':[],
		}
		PointEmbedder = Embedder.PCA_Embbeder(world_point_matrix, num_dim, percent_variability)
	else:
		PointEmbedder = Embedder.PCA_Embbeder(point_matrix, num_dim, percent_variability)
	num_dim = PointEmbedder.num_dim
	PointEmbedder.write_PCA(out_dir + "PCA_Particle_Info/", "particles") # write PCA info for DeepSSM testing
	embedded_matrix = PointEmbedder.getEmbeddedMatrix()
	# Get sampler
	if sampler_type == "gaussian":
		PointSampler = Sampler.Gaussian_Sampler()
		PointSampler.fit(embedded_matrix) 
	elif sampler_type == "mixture":
		PointSampler = Sampler.Mixture_Sampler()
		PointSampler.fit(embedded_matrix, mixture_num) 
	elif sampler_type == "kde":
		PointSampler = Sampler.KDE_Sampler()
		PointSampler.fit(embedded_matrix) 
	else:
		sw_message("Error sampler_type unrecognized.")
		sw_message("Gaussian, mixture, and KDE currently supported.")
		return 0
	
	# Initialize output folders and lists
	gen_point_dir = out_dir + "Generated-Particles/"
	if not os.path.exists(gen_point_dir):
		os.makedirs(gen_point_dir)
	gen_image_dir = out_dir + "Generated-Images/"
	if not os.path.exists(gen_image_dir):
		os.makedirs(gen_image_dir)
	gen_embeddings = []
	gen_points_paths = []
	gen_image_paths = []
	if processes != 1:
		generate_image_params_list = []
	# Sample to generate new examples
	for index in range(1, num_samples+1):
		if sw_check_abort():
			sw_message("Aborted")
			return 0
		sw_message("Generating " +str(index)+'/'+str(num_samples))
		sw_progress(index / (num_samples+1))
		name = 'Generated_sample_' + Utils.pad_index(index)
		# Generate embedding
		sampled_embedding, base_index = PointSampler.sample()
		gen_embeddings.append(sampled_embedding)
		# Generate particles
		if orig_world_point_list is not None:
			p = PointEmbedder.project(sampled_embedding)
			i = int(np.nanargmin(np.linalg.norm(p-world_point_matrix, axis=(1,2))))
			# TODO add Randomness in the transformation from world to local particles
			gen_points = (world_get_local[i] @ Utils.get_homogeneous_coordinates(p.reshape((-1,3)).T))[:3,:].T
			world_get_local_info['generated_particle__world_get_local_index'].append(i)
		else:
			gen_points = PointEmbedder.project(sampled_embedding)
		gen_points_path = gen_point_dir + name + ".particles"
		np.savetxt(gen_points_path, gen_points)
		gen_points_paths.append(gen_points_path)
		# Generate image
		base_image_path = orig_img_list[base_index]
		base_particles_path = orig_point_list[base_index]
		if processes==1:
			gen_image_path = Utils.generate_image(out_dir, gen_points_path, base_image_path, base_particles_path)
			gen_image_paths.append(gen_image_path)
		else:
			generate_image_params_list.append({
				'out_dir':out_dir,
				'gen_points_path':gen_points_path,
				'base_image_path':base_image_path,
				'base_particles_path':base_particles_path,
			})
	if orig_world_point_list is not None:
		# write world to local transformation information for generated particles
		with open(out_dir + '/world_get_local_info.json', 'w') as f:
			json.dump(world_get_local_info, f)
	if processes!=1:
		with mtps.Pool(processes=processes) as p:
			gen_image_paths = p.map(generate_image, generate_image_params_list)
	csv_file = out_dir + "TotalData.csv"
	Utils.make_CSV(out_dir + "TotalData.csv", orig_img_list, orig_point_list, embedded_matrix, gen_image_paths, gen_points_paths, gen_embeddings)
	return num_dim
Ejemplo n.º 15
0
	def fit(self, embedded_matrix):
		sw_message("Fitting Gaussian distribution...")
		self.embedded_matrix = embedded_matrix
		self.mean = np.mean(embedded_matrix, axis=0)
		self.cov = np.cov(embedded_matrix, rowvar=0)