def merge_result_files(result_files, output): metadata = {} merged_data = [] statistics = {} what = None for entry in result_files: with open(entry) as jsonfile: try: data = json.load(jsonfile) except JSONDecodeError: eprint('File {} was not json parsable'.format(entry)) continue if 'what' in data: if not what: what = data['what'] else: if data['what'] != what: raise RuntimeError('Merged files of different type') metadata.update(data['metadata']) merged_data.extend(data['data']) if 'statistics' in data: statistics = merge_statistics(statistics, data['statistics']) output_dict = {'metadata': metadata, 'data': merged_data} if what: output_dict['what'] = what json.dump(output_dict, output) output.flush()
def lie_vectors_of_registrations(json_data, key='result', left_multiply=np.identity(4), right_multiply=np.identity(4)): """ Outputs the lie vectors of a json registration dataset. :param json_data: A full registration dataset. :param key: The key of the matrix to evaluate, inside the registration result. :param prealignment: A transform to apply to the results before outputting them. :returns: A Nx6 numpy matrix containing the lie algebra representation of the results. """ lie_results = np.empty((len(json_data['data']), 6)) for i, registration in enumerate(json_data['data']): m = np.array(registration[key]) res = np.dot(left_multiply, np.dot(m, right_multiply)) try: lie_results[i, :] = se3_log(res) except RuntimeError: lie_results[i, :] = np.zeros(6) eprint('Warning: failed conversion to lie algebra of matrix {}'. format(m)) return lie_results
def cli(): parser = argparse.ArgumentParser() parser.add_argument('database', type=str, help='Location of the registration result database to use.') parser.add_argument('dataset', type=str) parser.add_argument('reading', type=int) parser.add_argument('reference', type=int) parser.add_argument('-c', '--config', type=str, help='Path to a json file containing the descriptor configuration.') parser.add_argument('-r', '--rotation', type=float, help='Rotation around z to apply to the pointcloud', default=0.0) args = parser.parse_args() db = RegistrationPairDatabase(args.database) pair = db.get_registration_pair(args.dataset, args.reading, args.reference) pair.rotation_around_z = args.rotation if args.config: with open(args.config) as f: descriptor = descriptor_factory(json.load(f)) else: config = {'mask': {'name': 'grid'}, 'algo': {'name': 'normals_histogram'}} descriptor = descriptor_factory(config) descriptor_compute_start = time.time() computed_descriptor = descriptor.compute(pair) eprint('Descriptor took {} seconds'.format(time.time() - descriptor_compute_start)) print(computed_descriptor) print(descriptor.labels())
def raw_centered_clustering(dataset, radius, n=12, seed=np.zeros(6), n_seed_init=100, seed_selector='greedy', logging=False): """ :arg dataset: The dataset to cluster (as a numpy matrix). :arg radius: The radius in which we have to have enough neighbours to be a core point. :arg n: The number of points that have to be within the radius to be a core point. :returns: The indices of the points that are inside the central cluster as a list. """ strings_of_seed = list(map(str, seed.tolist())) string_of_seed = ','.join(strings_of_seed) command = 'centered_clustering -n_seed_init {} -seed_selector {} -k {} -radius {} -seed {} {}'.format( n_seed_init, seed_selector, n, radius, string_of_seed, ('--pointcloud_log' if logging else '--nopointcloud_log')) eprint(command) stream = io.StringIO() #DEBUG # with open('/home/dlandry/clustering_input.json', 'w') as f: # json.dump(dataset.tolist(), f) json.dump(dataset.tolist(), stream) response = subprocess.run(command, input=json.dumps(dataset.tolist()), stdout=subprocess.PIPE, shell=True, universal_newlines=True) return json.loads(response.stdout)
def run_one_clustering_thread(radius, k, registration_data, rescale_data=False, n_seed_init=100, seed_selector='localized'): eprint('Clustering with radius {}'.format(radius)) var_translation = float(registration_data['metadata']['var_translation']) lie_vectors = positions_of_registration_data(registration_data) ground_truth = np.array(registration_data['metadata']['ground_truth']) algo = CenteredClusteringAlgorithm(radius, k, n_seed_init) algo.rescale = rescale_data algo.n_seed_init = n_seed_init algo.seed_selector = seed_selector se3exp = parallel.FunctionWrapper('log', 'lieroy.se3') clustering = algo.cluster(lie_vectors, seed=se3exp(ground_truth)) clustering_with_distribution = compute_distribution( registration_data, clustering) eprint('Done clustering with radius {} '.format(radius)) return clustering_with_distribution
def generate_descriptor_worker(dataset, i, output_dir): pointcloud = dataset.points_of_cloud(i) filename = 'descriptor_{}_{}.json'.format(dataset.name, i) eprint('Generating descriptor for {}'.format(filename)) descriptor = generate_descriptor(pointcloud) with (output_dir / filename).open('w') as output_file: json.dump(descriptor, output_file)
def distance_mean_ground_truth(pair, distribution_algo): eprint(pair) ground_truth = pair.ground_truth() distribution = distribution_algo.compute(pair) mean = np.array(distribution['mean']) delta = np.linalg.inv(ground_truth) @ mean return np.linalg.norm(se3.log(delta))
def unprocess(self, m): covariances = np.empty((len(m)), 6, 6) for i, v in enumerate(m): up = to_upper_triangular(v) covariances[i] = np.dot(up, up.T) eprint(covariances[i]) return covariances
def _dets(self, xs, ys): ys_predicted = self.model(xs) covariances_predicted = ys_predicted.view(len(ys_predicted), 6, 6) dets = torch.Tensor(len(covariances_predicted)) for i, cov in enumerate(covariances_predicted): dets[i] = torch.det(cov) worst_cov = torch.argmin(torch.abs(dets)) eprint(dets[worst_cov]) eprint(covariances_predicted[worst_cov])
def compute(self, pointcloud): command_string = 'grid_pointcloud_separator -spanx {} -spany {} -spanz {} -nx {} -ny {} -nz {}'.format( self.spanx, self.spany, self.spanz, self.nx, self.ny, self.nz) eprint(command_string) response = subprocess.check_output(command_string, universal_newlines=True, shell=True, input=json.dumps(pointcloud)) return json.loads(response)
def process(self, covariances): vectors = np.empty((len(covariances), 21)) for i, cov in enumerate(covariances): try: L = np.linalg.cholesky(cov) except np.linalg.LinAlgError: m = nearestPD(cov) L = np.linalg.cholesky(m) vectors[i] = upper_triangular_to_vector(L.T) eprint(vectors[i]) return vectors
def cluster(self, dataset, seed=None): distances = np.linalg.norm(dataset - seed, axis=1) eprint('Distances shape: {}'.format(distances.shape)) percentile = np.percentile(distances, self.quantile) cluster = np.where(distances < percentile)[0] eprint('Removing all points more that {} away'.format(percentile)) return { 'clustering': [cluster.tolist()], 'n_clusters': 1, 'outliers': inverse_of_cluster(cluster, len(dataset)).tolist(), 'outlier_ratio': 1.0 - (len(cluster) / len(dataset)), }
def compute_one_summary_line(registration_pair, covariance_algo): eprint(registration_pair) covariance = covariance_algo.compute(registration_pair) d = { 'dataset': registration_pair.dataset, 'reading': registration_pair.reading, 'reference': registration_pair.reference, 'condition_number': np.linalg.cond(covariance), 'trace': np.trace(covariance), } eprint('%s done' % str(registration_pair)) return d
def compute_overlapping_region(radius): reading = self.points_of_reading() reference = self.points_of_reference() t = self.transform() input_dict = { 'reading': reading.tolist(), 'reference': reference.tolist(), 't': t.tolist() } cmd_string = 'overlapping_region -radius {} -mask'.format(radius) eprint(cmd_string) response = run_subprocess(cmd_string, json.dumps(input_dict)) return json.loads(response)
def apply_mask_cli(): parser = argparse.ArgumentParser(description='Apply as point selection mask on a pair of pointclouds.') parser.add_argument('database', type=str, help='Location of the registration result database to use.') parser.add_argument('dataset', type=str) parser.add_argument('reading', type=int) parser.add_argument('reference', type=int) parser.add_argument('--output', type=str, default='.', help='Output directory of the visualization.') parser.add_argument('--radius', type=float, default=0.1, help='For the overlap mask generator, the max distance between points for them to be neighbors.') parser.add_argument('--range', type=float, help='For the angle mask generator, the range of angles accepted.', default=0.0) parser.add_argument('--offset', type=float, help='For the angle mask generator, the offset of angles accepted.', default=0.0) parser.add_argument('-c', '--config', type=str, help='Path to a json config for the mask') parser.add_argument('-r', '--rotation', type=float, help='Rotation around the z axis to apply to the cloud pair before computing the descriptor, in radians.', default=0.0) args = parser.parse_args() db = RegistrationPairDatabase(args.database) pair = db.get_registration_pair(args.dataset, args.reading, args.reference) pair.rotation_around_z = args.rotation reading = pair.points_of_reading() reference = pair.points_of_reference() with open(args.config) as f: config = json.load(f) print(config) mask_generator = mask_factory(config) reading_masks, reference_masks = mask_generator.compute(pair) eprint('Transform of pair: ') eprint(pair.transform()) pointcloud_to_vtk(reference, args.output + '/reference') pointcloud_to_vtk(transform_points(reading, pair.transform()), args.output + '/reading') for i in range(len(reading_masks)): if reference_masks[i].any(): pointcloud_to_vtk(reference[reference_masks[i]], args.output + '/' + '{}_reference_{}'.format(mask_generator.__repr__(), i)) if reading_masks[i].any(): transformed_masked_reading = transform_points(reading[reading_masks[i]], pair.transform()) pointcloud_to_vtk(transformed_masked_reading, args.output + '/' + '{}_reading_{}'.format(mask_generator.__repr__(), i))
def json_cat_cli(): parser = argparse.ArgumentParser( description='Merge json documents into a json list.') parser.add_argument('inputs', type=str, nargs='+', help='The files to concatenate') args = parser.parse_args() json_documents = [] for f in args.inputs: with open(f) as jsonfile: try: json_document = json.load(jsonfile) json_documents.append(json_document) except: eprint('Problem merging file {}'.format(f)) json.dump(json_documents, sys.stdout)
def import_pointclouds(self, pointcloud_dataset, use_odometry=False): def generate_transform(): if use_odometry: algo = IcpAlgorithm() initial_estimate = pointcloud_dataset.odometry_estimate( self.reading, self.reference) transform, _ = compute_icp( self.database.reading_pcd(self.dataset, self.reading), self.database.reference_pcd(self.dataset, self.reference), initial_estimate, algo) else: transform = pointcloud_dataset.ground_truth( self.reading, self.reference) return transform eprint('Generating transform') self.cache.get_or_generate('transform', generate_transform) eprint('Transform generated for {}'.format(repr(self)))
def error_landscape_of_pair(pair, icp_algo, nx=100, ny=100, s=0.02, nicp=100, axis1=0, axis2=1): reading_fifo = random_fifo('.qpc') reference_fifo = random_fifo('.qpc') config_fifo = random_fifo('.yaml') cmd_string = ( 'recov_icp_error_landscape -reading {} -reference {} -ground_truth {}' ' -config {} -nx {} -ny {} -nicp {}' ' -icp_output {} -delta {} -axis1 {} -axis2 {} -center').format( reading_fifo, reference_fifo, shlex.quote(json.dumps(pair.ground_truth().tolist())), config_fifo, nx, ny, nicp, '/tmp/toto.json', s, axis1, axis2) eprint(cmd_string) proc = subprocess.Popen(cmd_string, shell=True, stdin=None, stdout=subprocess.PIPE, universal_newlines=True) pointcloud_to_qpc_file(pair.points_of_reading(), reading_fifo) pointcloud_to_qpc_file(pair.points_of_reference(), reference_fifo) with open(config_fifo, 'w') as f: yaml.dump(icp_algo.config_dict(), f) response = proc.stdout.read() os.unlink(reading_fifo) os.unlink(reference_fifo) os.unlink(config_fifo) return json.loads(response)
def find_central_cluster(lie_registrations, clustering, ground_truth): """ :arg dataset: A dataset as a facet. :arg clustering: A list of lists reprensenting the points indices :returns: The cluster itself (as a list of indices). """ eprint(len(clustering)) if len(clustering) == 1: if len(clustering[0]) == 0: raise RuntimeError('Empty central cluster') eprint('Returning early') return clustering[0] closest_point = index_of_closest_to_ground_truth(lie_registrations) norms = np.linalg.norm(lie_registrations, axis=1) cluster_distances = list( map(lambda x: distance_of_cluster(lie_registrations, x), clustering)) eprint('Clustering distances: {}'.format(cluster_distances)) if cluster_distances: best_cluster = clustering[np.argmin(cluster_distances)] else: best_cluster = [] return best_cluster
def index_of_closest_to_ground_truth(dataset): """ Find the index of the point in the dataset that is the closest to ground truth. :arg dataset: The registration dataset as a facet. """ gt = np.array(dataset['metadata']['ground_truth']) inv_of_gt = np.linalg.inv(gt) id_of_min = None min_distance = np.inf for i, registration in enumerate(dataset['data']): print(registration) reg = np.array(registration['result']) distance_to_gt = np.linalg.norm(se3_log(np.dot(inv_of_gt, reg))) if distance_to_gt < min_distance: id_of_min = i min_distance = distance_to_gt eprint('Min distance to ground truth: {}'.format(min_distance)) return id_of_min
def cluster(self, dataset, seed=np.array([0., 0., 0., 0., 0., 0.])): if self.rescale: # Rescale translation and rotation separately so that they don't crush one another. radius_translation = englobing_radius(dataset[:, 0:3], 90.0) radius_rotation = englobing_radius(dataset[:, 3:6], 90.0) if radius_translation <= 1e-9: radius_translation = 1.0 if radius_rotation <= 1e-9: radius_rotation = 1.0 dataset[:, 0:3] = dataset[:, 0:3] / radius_translation dataset[:, 3:6] = dataset[:, 3:6] / radius_rotation seed[0:3] = seed[0:3] / radius_translation seed[3:6] = seed[3:6] / radius_rotation center_cluster = raw_centered_clustering( dataset, self.radius, self.k, seed, self.n_seed_init, seed_selector=self.seed_selector, logging=self.logging) clustering_row = { 'clustering': [center_cluster], 'n_clusters': 1, 'radius': self.radius, 'n': self.k, 'outliers': inverse_of_cluster(center_cluster, len(dataset)).tolist(), 'outlier_ratio': 1.0 - (len(center_cluster) / len(dataset)), } eprint('{} radius'.format(self.radius)) eprint('{} outliers'.format(len(clustering_row['outliers']))) eprint('{} inliers'.format(len(center_cluster))) return clustering_row
def generate_one_example(registration_pair, descriptor, covariance_algo, descriptor_only=False, rotation=0.0): registration_pair.rotation_around_z = rotation eprint(registration_pair) descriptor_start = time.time() descriptor = descriptor.compute(registration_pair) eprint('Descriptor took {} seconds'.format(time.time() - descriptor_start)) if not descriptor_only: covariance = covariance_algo.compute(registration_pair) else: covariance = None eprint('Example took {} seconds'.format(time.time() - descriptor_start)) return (descriptor, np.array(covariance))
def prediction_cli(): parser = argparse.ArgumentParser() parser.add_argument('dataset', help='Path to the dataset used to train the model', type=str) parser.add_argument('model', help='Path to the trained model', type=str) parser.add_argument('output', help='Where to output the vtk files', type=str) parser.add_argument('--registration-database', help='Fetch the pointclouds to give some context to the generated covariances.') parser.add_argument('--filter', help='Locations to filter during the query', type=str, default='') args = parser.parse_args() print('Loading dataset...') with open(args.dataset) as f: dataset = json.load(f) print('Done') filtering_re = re.compile(args.filter) model = model_from_file(args.model, 'cello') eprint(model) xs = np.array(dataset['data']['xs']) pairs = dataset['data']['pairs'] selection = np.ones(len(pairs), dtype=np.bool) for i, pair in enumerate(pairs): if filtering_re.match(pair['dataset']) and args.filter: selection[i] = 0 eprint(len(selection)) eprint(selection.sum()) xs = xs[selection] ys_predicted = model.predict(xs) np.save(args.output + '/predictions.npy', ys_predicted) db = RegistrationPairDatabase(args.registration_database) parallel_starmap_progressbar(generate_one_prediction, [(i, ys_predicted[i], dataset['data']['pairs'][i], db, args.output) for i in range(len(ys_predicted))])
def import_files_cli(): parser = argparse.ArgumentParser() parser.add_argument('--files', nargs='*', type=str, help='The files to import') parser.add_argument('--root', help='Location of the registration result database', type=str) parser.add_argument( '--pointcloud_root', help='Location of the point clouds designated by the pairs', type=str) parser.add_argument( '--pointcloud_dataset_type', help='The type of pointcloud dataset we import pointclouds from', type=str, default='ethz') parser.add_argument('--pointcloud_only', help='Only do the pointcloud importation', action='store_true') parser.add_argument('-j', '--n-cores', default=8, type=int) args = parser.parse_args() db = RegistrationPairDatabase(args.root) if not args.pointcloud_only: for registration_file in args.files: print(registration_file) pair_id = db.import_file(registration_file) pointcloud_root = pathlib.Path(args.pointcloud_root) readings = {} references = {} for pair in db.registration_pairs(): if pair.dataset not in readings: readings[pair.dataset] = set([pair.reading]) else: readings[pair.dataset].add(pair.reading) if pair.dataset not in references: references[pair.dataset] = set([pair.reference]) else: references[pair.dataset].add(pair.reference) with concurrent.futures.ProcessPoolExecutor( max_workers=args.n_cores) as executor: fs = [] progress_bar = tqdm.tqdm(total=5 * len(db.registration_pairs()), file=sys.stdout) for dataset_name in readings: dataset = create_registration_dataset( args.pointcloud_dataset_type, pointcloud_root / dataset_name) for reading in readings[dataset_name]: future = executor.submit(import_reading, dataset_name, reading, dataset, db) future.add_done_callback(lambda _: progress_bar.update()) fs.append(future) for reference in references[dataset_name]: future = executor.submit(import_reference, dataset_name, reference, dataset, db) future.add_done_callback(lambda _: progress_bar.update()) fs.append(future) concurrent.futures.wait(fs) fs = [] for dataset_name in readings: for reading in readings[dataset_name]: eprint('{}: {}'.format(dataset_name, reading)) future = executor.submit(compute_data_reading, dataset_name, reading, db) future.add_done_callback(lambda _: progress_bar.update()) fs.append(future) for reference in references[dataset_name]: eprint('{}: {}'.format(dataset_name, reference)) future = executor.submit(compute_data_reference, dataset_name, reference, db) future.add_done_callback(lambda _: progress_bar.update()) fs.append(future) concurrent.futures.wait(fs) fs = [] for pair in db.registration_pairs(): pointcloud_dataset = create_registration_dataset( args.pointcloud_dataset_type, pointcloud_root / pair.dataset) future = executor.submit(import_pointclouds_of_one_pair, pair, pointcloud_dataset) future.add_done_callback(lambda _: progress_bar.update()) fs.append(future) concurrent.futures.wait(fs)
def _fit(self, predictors_validation, covariances_validation): """ Given a validation set, train weights theta that optimize the validation error of the model. """ predictors_validation_cuda = predictors_validation.cuda() self.theta = self.create_metric_weights() selector = sklearn.model_selection.RepeatedKFold(n_splits=5, n_repeats=10) optimizer = optim.SGD([self.theta], lr=self.learning_rate) # optimizer = optim.Adam([self.theta], lr=self.learning_rate) validation_losses = [] validation_stds = [] optimization_losses = [] optimization_stds = [] validation_errors_log = [] optimization_errors_log = [] kll_errors_log = [] kll_validation_losses = [] kll_validation_stds = [] epoch = 0 keep_going = True best_loss = np.inf best_model = [] n_epoch_without_improvement = 0 n_epoch_without_min_delta = 0 while ( epoch < self.n_iterations or self.n_iterations == 0 ) and keep_going and n_epoch_without_improvement < self.patience and n_epoch_without_min_delta < self.patience: self.logger.debug('Starting epoch %d' % epoch) epoch_start = time.time() optimizer.zero_grad() losses = Variable(torch.zeros(len(self.model_predictors))) optimization_loss = 0.0 metric_matrix = self.theta_to_metric_matrix(self.theta) perms = torch.randperm(len(self.model_predictors)) identity = torch.Tensor(np.identity(6)) for i in perms: distances = self._compute_distances_cuda( self.model_predictors_cuda, metric_matrix.cuda(), self.model_predictors[i].cuda()) # eprint('Distances') # eprint(distances) # eprint('Sum of distances') # eprint(distances.sum()) # eprint('N of weights larger than almost nothing') # eprint((self.distances_to_weights(distances) > 1e-20).sum()) # eprint('Sum of weights') # eprint(self.distances_to_weights(distances).sum()) prediction = self._prediction_from_distances_cuda( self.model_covariances_cuda, distances).cpu() # prediction = self._prediction_from_distances_cu(self.model_covariances, distances) # eprint('Prediction') # eprint(prediction) # eprint('Det') # eprint(torch.det(prediction + identity * 1e-9)) regur_prediction = prediction + identity * 1e-12 det_pred = torch.det(regur_prediction) log_det = torch.log(det_pred + 1e-18) loss_A = log_det # loss_B = torch.norm(torch.mm(torch.inverse(prediction), self.model_covariances[i]) - identity) loss_B = torch.trace( torch.mm(torch.inverse(regur_prediction), self.model_covariances[i])) distances_cpu = distances.cpu() nonzero_distances = torch.gather( distances_cpu, 0, torch.nonzero(distances_cpu).squeeze()) regularization_term = torch.sum(torch.log(nonzero_distances)) loss_of_pair = (1 - self.alpha) * ( loss_A + loss_B) + self.alpha * regularization_term optimization_loss += loss_of_pair losses[i] = loss_of_pair # eprint('Index: {}'.format(i)) # eprint('loss_A: {}'.format(loss_A)) # eprint('loss_B: {}'.format(loss_B)) # eprint('Regur: {}'.format(self.alpha * regularization_term)) # eprint(torch.inverse(regur_prediction)) if i % 4 == 0: self.logger.debug('Backprop') optimization_loss.backward() optimizer.step() optimizer.zero_grad() optimization_loss = 0.0 metric_matrix = self.theta_to_metric_matrix(self.theta) predictions = self._predict(self.model_predictors_cuda) self.logger.debug('Predictions have size %d kb' % sys.getsizeof(predictions)) indiv_optimization_errors = self._validation_errors( predictions, self.model_covariances.data) self.logger.debug('Indiv optimization errors have size %d kb' % sys.getsizeof(indiv_optimization_errors)) optimization_score = torch.mean(indiv_optimization_errors) optimization_errors_log.append( indiv_optimization_errors.numpy().tolist()) optimization_losses.append(optimization_score.item()) optimization_stds.append( torch.std(indiv_optimization_errors).item()) metric_matrix = self.theta_to_metric_matrix(self.theta) if self.queries_have_neighbor_cuda(self.model_predictors_cuda, metric_matrix.cuda(), predictors_validation_cuda): predictions = self._predict(predictors_validation_cuda) validation_errors = self._validation_errors( predictions, covariances_validation.data) validation_score = torch.mean(validation_errors) klls = self._kll(predictions, covariances_validation.data) kll_errors_log.append(klls.numpy().tolist()) kll_validation_losses.append(torch.mean(klls).numpy().item()) kll_validation_stds.append(torch.std(klls).numpy().item()) eprint('-- Validation of epoch %d --' % epoch) if validation_score < best_loss: eprint('** New best model! **') n_epoch_without_improvement = 0 best_loss = validation_score best_model = self.export_model() else: n_epoch_without_improvement += 1 eprint('Avg Optim Loss: {:.5E}'.format(optimization_score)) eprint('Validation score: {:.5E}'.format(validation_score)) eprint() if epoch > 0: eprint('Optim. delta: {:.5E}'.format( optimization_losses[-2] - optimization_losses[-1])) eprint('Validation delta: {:.5E}'.format( validation_losses[-1] - validation_score)) eprint() eprint('Validation std: {:.5E}'.format( validation_errors.std())) eprint('Validation kll: {:.5E}'.format(klls.mean())) eprint('Validation kll std: {:.5E}'.format(klls.std())) eprint('N epoch without improvement: %d' % n_epoch_without_improvement) eprint('N epoch without min delta: %d' % n_epoch_without_min_delta) eprint() validation_errors_log.append( validation_errors.numpy().tolist()) validation_losses.append(validation_score.numpy().tolist()) validation_stds.append( torch.std(validation_errors).numpy().tolist()) else: keep_going = False eprint( 'Stopping because elements in the validation dataset have no neighbors.' ) if (epoch > 0) and (validation_losses[-1] - validation_losses[-2] > -1.0 * self.min_delta): n_epoch_without_min_delta += 1 else: n_epoch_without_min_delta = 0 eprint('Epoch took {} seconds'.format(time.time() - epoch_start)) epoch = epoch + 1 return { 'best_loss': float(best_loss), 'metadata': self.metadata(), 'model': best_model, 'optimization_errors': optimization_errors_log, 'optimization_loss': optimization_losses, 'optimization_std': optimization_stds, 'validation_errors': validation_errors_log, 'validation_loss': validation_losses, 'validation_std': validation_stds, 'kll_validation': kll_validation_losses, 'kll_std': kll_validation_stds, 'kll_errors': kll_errors_log, 'what': 'model learning', }
def cli(): parser = argparse.ArgumentParser() parser.add_argument('algorithm', type=str) parser.add_argument('output', type=str, help='Where to save the learning run') parser.add_argument('-lr', '--learning_rate', type=float, default=1e-7) parser.add_argument('-a', '--alpha', type=float, default=1e-6) parser.add_argument('-b', '--beta', type=float, default=1e3) parser.add_argument('-n', '--n_iterations', type=int, default=0, help='Maximum number of iterations. 0 means no maximum and wait for convergence.') parser.add_argument('-pa', '--patience', type=int, default=20, help='N of iterations without improvement before ending training.') parser.add_argument('-cv', '--cross-validate', type=str, help='Name of the dataset to use as a validation set', default='') parser.add_argument('-wd', '--weight-decay', type=float, default=1e-10, help='For the MLP, set the weight decay parameter.') parser.add_argument('--filter', type=str, help='Filter out datasets from the learning.', default='') parser.add_argument('--preprocessing', '-p', type=str, help='Name of the preprocessing algorithm to use.', default='identity') parser.add_argument('-md', '--min-delta', type=float, help='Minimum gain on the validation loss before the learning stops.', default=1e-4) parser.add_argument('--validate-on-end', action='store_true', help='Train on the first part of the dataset, validate on the second part.') parser.add_argument('-d', '--debug', action='store_true', help='Verbose debug') args = parser.parse_args() logging.basicConfig(stream=sys.stderr) logger = logging.getLogger() if args.debug: logger.setLevel(logging.DEBUG) logger.info('Loading document') input_document = json.load(sys.stdin) logger.info('Done loading document') if args.filter: regex = re.compile(args.filter) mask = filter_dataset(input_document, regex) else: mask = np.ones(len(input_document['data']['pairs']), dtype=bool) print(np.sum(mask)) train_indices = [] validation_indices = [] if args.cross_validate: train_indices, validation_indices = train_test_split_cross_validate(input_document, mask, args.cross_validate) elif args.validate_on_end: train_indices, validation_indices = train_test_split_validate_on_end(input_document, mask) else: train_indices, validation_indices = train_test_split(input_document, mask) eprint('Training set size: {}. Validation set size: {}'.format(len(train_indices), len(validation_indices))) predictors = np.array(input_document['data']['xs']) covariances = np.array(input_document['data']['ys']) model = model_factory(args.algorithm) model.learning_rate = args.learning_rate model.alpha = args.alpha model.beta = args.beta model.n_iterations = args.n_iterations model.weight_decay = args.weight_decay model.min_delta = args.min_delta model.patience = args.patience preprocessing_algo = preprocessing_factory(args.preprocessing) model.preprocessing = preprocessing_algo if train_indices and validation_indices: learning_run = model.fit(predictors, covariances, train_set=train_indices, test_set=validation_indices) else: learning_run = model.fit(predictors, covariances) learning_run['metadata']['descriptor_config'] = input_document['metadata']['descriptor_config'] if args.cross_validate: learning_run['metadata']['cross_validation'] = args.cross_validate model_path = args.output + '.model' model.save_model(model_path) learning_run['model'] = os.getcwd() + '/' + model_path for key in learning_run: print(key) _ = json.dumps(learning_run[key]) with open(args.output + '.json', 'w') as f: json.dump(learning_run, f)
def generate_examples_cli(): parser = argparse.ArgumentParser() parser.add_argument('--output', type=str, help='Where to store the examples', default='dataset.json') parser.add_argument('--input', type=str, help='Where the registration results are stored', default='.', required=True) parser.add_argument('--exclude', type=str, help='Regex of names of datasets to exclude', default='') parser.add_argument('-j', '--n_cores', type=int, help='N of cores to use for the computation', default=8) parser.add_argument('-c', '--config', type=str, help='Path to a json config for the descriptor.') parser.add_argument('--descriptor-only', action='store_true', help='Generate only the descriptor.') parser.add_argument('--rotations', '-r', nargs='+', type=float, default=[0.0]) parser.add_argument('--max-mean-gt-distance', type=float, default=0.3) args = parser.parse_args() np.set_printoptions(linewidth=120) db = RegistrationPairDatabase(args.input, args.exclude) registration_pairs = db.registration_pairs() output_path = pathlib.Path(args.output) clustering_algorithm = CenteredClusteringAlgorithm(radius=1.0, k=16, n_seed_init=32) clustering_algorithm.seed_selector = 'localized' clustering_algorithm.rescale = True clustering_algorithm = RegistrationPairClusteringAdapter( clustering_algorithm) distribution_algorithm = FixedCenterSamplingDistributionAlgorithm( clustering_algorithm) covariance_algo = DistributionAlgorithmToCovarianceAlgorithm( distribution_algorithm) with open(args.config) as f: descriptor_config = json.load(f) descriptor = descriptor_factory(descriptor_config) eprint('Using descriptor: {}'.format(repr(descriptor))) eprint('Generating with rotations: {}'.format(args.rotations)) examples = [] pairs = [] for x in registration_pairs: examples.extend([(x, descriptor, covariance_algo, args.descriptor_only, r) for r in args.rotations]) pairs.extend([{ 'dataset': x.dataset, 'reading': x.reading, 'reference': x.reference, 'rotation': r } for r in args.rotations]) random.shuffle(examples) results = parallel_starmap_progressbar(generate_one_example, examples, n_cores=args.n_cores) # results = [generate_one_example(*x) for x in examples] xs = [] ys = [] for p in results: x, y = p xs.append(x.tolist()) if not args.descriptor_only: ys.append(y.tolist()) output_dict = { 'metadata': { 'what': 'learning_dataset', 'date': str(datetime.datetime.today()), 'descriptor': str(descriptor), 'covariance_algo': str(covariance_algo), 'descriptor_labels': descriptor.labels(), 'descriptor_config': descriptor_config, 'filter': args.exclude }, 'statistics': { 'n_examples': len(xs) }, 'data': { 'pairs': pairs, 'xs': xs, } } if not args.descriptor_only: output_dict['data']['ys'] = ys with open(args.output, 'w') as dataset_file: json.dump(output_dict, dataset_file)
def cli(): parser = argparse.ArgumentParser() parser.add_argument('dataset', help='Path to the dataset used to train the model', type=str) parser.add_argument('learningrun', help='Path to the learning run', type=str) parser.add_argument('model', help='Path to the trained model', type=str) parser.add_argument('output', help='Where to output the vtk files', type=str) args = parser.parse_args() print('Loading dataset...') with open(args.dataset) as f: dataset = json.load(f) print('Done') print('Loading model...') with open(args.learningrun) as f: learning_run = json.load(f) print('Done') model = model_from_file(args.model, learning_run['metadata']['algorithm']) xs = np.array(dataset['data']['xs']) ys = np.array(dataset['data']['ys']) xs_validation = xs[learning_run['validation_set']] ys_validation = ys[learning_run['validation_set']] pred_begin = time.time() ys_predicted = model.predict(xs_validation) print((time.time() - pred_begin) / len(xs)) errors = frobenius(ys_validation - ys_predicted) klls = [] for i in range(len(ys_validation)): kll_left = kullback_leibler(ys_validation[i], ys_predicted[i]) klls.append(kll_left) print(np.mean(errors)) print(np.mean(np.array(klls))) with open(args.output + '/summary.csv', 'w') as summary_file: writer = csv.DictWriter(summary_file, ['location', 'reading', 'reference', 'loss', 'kullback_leibler', 'predicted_trace', 'reference_trace']) writer.writeheader() for i in range(len(ys_predicted)): distribution_to_vtk_ellipsoid(np.zeros(3), ys_validation[i][0:3,0:3], args.output + '/translation_validation_' + str(i).zfill(4)) distribution_to_vtk_ellipsoid(np.zeros(3), ys_predicted[i][0:3,0:3], args.output + '/translation_predicted_' + str(i).zfill(4)) distribution_to_vtk_ellipsoid(np.zeros(3), ys_validation[i][3:6,3:6], args.output + '/rotation_validation_' + str(i).zfill(4)) distribution_to_vtk_ellipsoid(np.zeros(3), ys_predicted[i][3:6,3:6], args.output + '/rotation_predicted_' + str(i).zfill(4)) eprint(learning_run['validation_set'][i]) index_of_example = learning_run['validation_set'][i] writer.writerow({ 'location': dataset['data']['pairs'][index_of_example]['dataset'], 'reading': dataset['data']['pairs'][index_of_example]['reading'], 'reference': dataset['data']['pairs'][index_of_example]['reference'], 'loss': errors[i], 'kullback_leibler': klls[i], 'predicted_trace': np.trace(ys_predicted[i]), 'reference_trace': np.trace(ys_validation[i]) })
def _fit(self, xs_train, ys_train, xs_test, ys_test): self.model = torch.nn.Sequential( torch.nn.Linear(len(xs_train[0]), self.hidden_sizes[0]), torch.nn.Hardtanh(), torch.nn.Dropout(0.1), torch.nn.BatchNorm1d(self.hidden_sizes[0]), torch.nn.Linear(self.hidden_sizes[0], self.hidden_sizes[1]), torch.nn.Hardtanh(), torch.nn.Dropout(0.1), torch.nn.BatchNorm1d(self.hidden_sizes[1]), torch.nn.Linear(self.hidden_sizes[1], self.hidden_sizes[2]), torch.nn.Hardtanh(), torch.nn.Dropout(0.1), torch.nn.BatchNorm1d(self.hidden_sizes[2]), torch.nn.Linear(self.hidden_sizes[2], self.hidden_sizes[3]), torch.nn.Hardtanh(), torch.nn.Dropout(0.1), torch.nn.BatchNorm1d(self.hidden_sizes[3]), torch.nn.Linear(self.hidden_sizes[3], self.hidden_sizes[4]), torch.nn.Hardtanh(), torch.nn.Dropout(0.1), torch.nn.BatchNorm1d(self.hidden_sizes[4]), torch.nn.Linear(self.hidden_sizes[4], self.hidden_sizes[5]), torch.nn.Hardtanh(), torch.nn.Dropout(0.1), torch.nn.BatchNorm1d(self.hidden_sizes[5]), torch.nn.Linear(self.hidden_sizes[5], self.hidden_sizes[6]), torch.nn.Hardtanh(), torch.nn.Dropout(0.1), torch.nn.BatchNorm1d(self.hidden_sizes[6]), torch.nn.Linear(self.hidden_sizes[6], 36)).to(self.device) self.best_loss = float('inf') n_iter_without_improvement = 0 epoch = 0 train_losses = [] test_losses = [] train_stds = [] test_stds = [] train_errors_log = [] test_errors_log = [] optimizer = torch.optim.Adam(self.model.parameters(), lr=self.learning_rate, weight_decay=self.weight_decay) while n_iter_without_improvement < self.patience and ( epoch < self.n_iterations or self.n_iterations == 0): loss = self._validate(xs_train, ys_train) self.model.zero_grad() loss.backward() optimizer.zero_grad() optimizer.step() # with torch.no_grad(): # for param in self.model.parameters(): # param.data -= self.learning_rate * param.grad test_loss = self._validate(xs_test, ys_test) if test_loss < self.best_loss: self.best_loss = test_loss n_iter_without_improvement = 0 else: n_iter_without_improvement += 1 if epoch % self.logging_rate == 0: test_errors = self._validation_errors(xs_test, ys_test) test_errors_log.append(test_errors.data.cpu().numpy().tolist()) train_errors = self._validation_errors(xs_train, ys_train) train_errors_log.append( train_errors.data.cpu().numpy().tolist()) train_losses.append(loss.data.cpu().numpy().item()) test_losses.append(test_loss.data.cpu().numpy().item()) train_stds.append(train_errors.std().data.cpu().numpy().item()) test_stds.append(test_errors.std().data.cpu().numpy().item()) eprint('Train Loss: {:.8E}'.format(loss.data)) eprint('Test loss: {:.8E}'.format(test_loss.data)) # self._dets(xs_test, ys_test) eprint('{} iterations without improvement (out of {})'.format( n_iter_without_improvement, self.patience)) eprint() epoch += 1 return { 'best_loss': self.best_loss.cpu().detach().numpy().item(), 'metadata': { 'algorithm': 'mlp', 'learning_rate': self.learning_rate, 'logging_rate': self.logging_rate, 'n_iterations': self.n_iterations, 'patience': self.patience, }, 'optimization_errors': train_errors_log, 'optimization_loss': train_losses, 'validation_errors': test_errors_log, 'validation_loss': test_losses, 'validation_std': test_stds, 'optimization_std': train_stds, 'what': 'model learning', }
def accept_raw_file(self, filename): p = pathlib.Path(filename) eprint(self.directory_of_pair) dest = str(self.directory_of_pair / 'raw' / p.name) eprint('{} to {}'.format(p, dest)) os.rename(str(p), dest)