def merge_result_files(result_files, output):
    metadata = {}
    merged_data = []
    statistics = {}
    what = None

    for entry in result_files:
        with open(entry) as jsonfile:
            try:
                data = json.load(jsonfile)
            except JSONDecodeError:
                eprint('File {} was not json parsable'.format(entry))
                continue

        if 'what' in data:
            if not what:
                what = data['what']
            else:
                if data['what'] != what:
                    raise RuntimeError('Merged files of different type')

        metadata.update(data['metadata'])
        merged_data.extend(data['data'])

        if 'statistics' in data:
            statistics = merge_statistics(statistics, data['statistics'])

    output_dict = {'metadata': metadata, 'data': merged_data}

    if what:
        output_dict['what'] = what

    json.dump(output_dict, output)
    output.flush()
Beispiel #2
0
def lie_vectors_of_registrations(json_data,
                                 key='result',
                                 left_multiply=np.identity(4),
                                 right_multiply=np.identity(4)):
    """
    Outputs the lie vectors of a json registration dataset.

    :param json_data: A full registration dataset.
    :param key: The key of the matrix to evaluate, inside the registration result.
    :param prealignment: A transform to apply to the results before outputting them.
    :returns: A Nx6 numpy matrix containing the lie algebra representation of the results.
    """
    lie_results = np.empty((len(json_data['data']), 6))
    for i, registration in enumerate(json_data['data']):
        m = np.array(registration[key])

        res = np.dot(left_multiply, np.dot(m, right_multiply))

        try:
            lie_results[i, :] = se3_log(res)
        except RuntimeError:
            lie_results[i, :] = np.zeros(6)
            eprint('Warning: failed conversion to lie algebra of matrix {}'.
                   format(m))

    return lie_results
def cli():
    parser = argparse.ArgumentParser()
    parser.add_argument('database', type=str, help='Location of the registration result database to use.')
    parser.add_argument('dataset', type=str)
    parser.add_argument('reading', type=int)
    parser.add_argument('reference', type=int)
    parser.add_argument('-c', '--config', type=str, help='Path to a json file containing the descriptor configuration.')
    parser.add_argument('-r', '--rotation', type=float, help='Rotation around z to apply to the pointcloud', default=0.0)
    args = parser.parse_args()

    db = RegistrationPairDatabase(args.database)
    pair = db.get_registration_pair(args.dataset, args.reading, args.reference)
    pair.rotation_around_z = args.rotation

    if args.config:
        with open(args.config) as f:
            descriptor = descriptor_factory(json.load(f))
    else:
        config = {'mask': {'name': 'grid'},
                  'algo': {'name': 'normals_histogram'}}
        descriptor = descriptor_factory(config)

    descriptor_compute_start = time.time()
    computed_descriptor = descriptor.compute(pair)
    eprint('Descriptor took {} seconds'.format(time.time() - descriptor_compute_start))

    print(computed_descriptor)
    print(descriptor.labels())
Beispiel #4
0
def raw_centered_clustering(dataset,
                            radius,
                            n=12,
                            seed=np.zeros(6),
                            n_seed_init=100,
                            seed_selector='greedy',
                            logging=False):
    """
    :arg dataset: The dataset to cluster (as a numpy matrix).
    :arg radius: The radius in which we have to have enough neighbours to be a core point.
    :arg n: The number of points that have to be within the radius to be a core point.
    :returns: The indices of the points that are inside the central cluster as a list.
    """
    strings_of_seed = list(map(str, seed.tolist()))
    string_of_seed = ','.join(strings_of_seed)

    command = 'centered_clustering -n_seed_init {} -seed_selector {} -k {} -radius {} -seed {} {}'.format(
        n_seed_init, seed_selector, n, radius, string_of_seed,
        ('--pointcloud_log' if logging else '--nopointcloud_log'))

    eprint(command)
    stream = io.StringIO()

    #DEBUG
    # with open('/home/dlandry/clustering_input.json', 'w') as f:
    #     json.dump(dataset.tolist(), f)

    json.dump(dataset.tolist(), stream)
    response = subprocess.run(command,
                              input=json.dumps(dataset.tolist()),
                              stdout=subprocess.PIPE,
                              shell=True,
                              universal_newlines=True)

    return json.loads(response.stdout)
def run_one_clustering_thread(radius,
                              k,
                              registration_data,
                              rescale_data=False,
                              n_seed_init=100,
                              seed_selector='localized'):
    eprint('Clustering with radius {}'.format(radius))

    var_translation = float(registration_data['metadata']['var_translation'])

    lie_vectors = positions_of_registration_data(registration_data)

    ground_truth = np.array(registration_data['metadata']['ground_truth'])

    algo = CenteredClusteringAlgorithm(radius, k, n_seed_init)
    algo.rescale = rescale_data
    algo.n_seed_init = n_seed_init
    algo.seed_selector = seed_selector

    se3exp = parallel.FunctionWrapper('log', 'lieroy.se3')
    clustering = algo.cluster(lie_vectors, seed=se3exp(ground_truth))

    clustering_with_distribution = compute_distribution(
        registration_data, clustering)

    eprint('Done clustering with radius {} '.format(radius))

    return clustering_with_distribution
def generate_descriptor_worker(dataset, i, output_dir):
    pointcloud = dataset.points_of_cloud(i)

    filename = 'descriptor_{}_{}.json'.format(dataset.name, i)
    eprint('Generating descriptor for {}'.format(filename))
    descriptor = generate_descriptor(pointcloud)

    with (output_dir / filename).open('w') as output_file:
        json.dump(descriptor, output_file)
def distance_mean_ground_truth(pair, distribution_algo):
    eprint(pair)
    ground_truth = pair.ground_truth()
    distribution = distribution_algo.compute(pair)
    mean = np.array(distribution['mean'])

    delta = np.linalg.inv(ground_truth) @ mean

    return np.linalg.norm(se3.log(delta))
Beispiel #8
0
    def unprocess(self, m):
        covariances = np.empty((len(m)), 6, 6)

        for i, v in enumerate(m):
            up = to_upper_triangular(v)
            covariances[i] = np.dot(up, up.T)

            eprint(covariances[i])

        return covariances
Beispiel #9
0
    def _dets(self, xs, ys):
        ys_predicted = self.model(xs)
        covariances_predicted = ys_predicted.view(len(ys_predicted), 6, 6)

        dets = torch.Tensor(len(covariances_predicted))
        for i, cov in enumerate(covariances_predicted):
            dets[i] = torch.det(cov)

        worst_cov = torch.argmin(torch.abs(dets))
        eprint(dets[worst_cov])
        eprint(covariances_predicted[worst_cov])
Beispiel #10
0
    def compute(self, pointcloud):
        command_string = 'grid_pointcloud_separator -spanx {} -spany {} -spanz {} -nx {} -ny {} -nz {}'.format(
            self.spanx, self.spany, self.spanz, self.nx, self.ny, self.nz)
        eprint(command_string)

        response = subprocess.check_output(command_string,
                                           universal_newlines=True,
                                           shell=True,
                                           input=json.dumps(pointcloud))

        return json.loads(response)
Beispiel #11
0
    def process(self, covariances):
        vectors = np.empty((len(covariances), 21))
        for i, cov in enumerate(covariances):
            try:
                L = np.linalg.cholesky(cov)
            except np.linalg.LinAlgError:
                m = nearestPD(cov)
                L = np.linalg.cholesky(m)
            vectors[i] = upper_triangular_to_vector(L.T)
            eprint(vectors[i])

        return vectors
Beispiel #12
0
    def cluster(self, dataset, seed=None):
        distances = np.linalg.norm(dataset - seed, axis=1)
        eprint('Distances shape: {}'.format(distances.shape))

        percentile = np.percentile(distances, self.quantile)
        cluster = np.where(distances < percentile)[0]
        eprint('Removing all points more that {} away'.format(percentile))

        return {
            'clustering': [cluster.tolist()],
            'n_clusters': 1,
            'outliers': inverse_of_cluster(cluster, len(dataset)).tolist(),
            'outlier_ratio': 1.0 - (len(cluster) / len(dataset)),
        }
def compute_one_summary_line(registration_pair, covariance_algo):
    eprint(registration_pair)
    covariance = covariance_algo.compute(registration_pair)

    d = {
        'dataset': registration_pair.dataset,
        'reading': registration_pair.reading,
        'reference': registration_pair.reference,
        'condition_number': np.linalg.cond(covariance),
        'trace': np.trace(covariance),
    }

    eprint('%s done' % str(registration_pair))

    return d
Beispiel #14
0
        def compute_overlapping_region(radius):
            reading = self.points_of_reading()
            reference = self.points_of_reference()
            t = self.transform()

            input_dict = {
                'reading': reading.tolist(),
                'reference': reference.tolist(),
                't': t.tolist()
            }

            cmd_string = 'overlapping_region -radius {} -mask'.format(radius)
            eprint(cmd_string)
            response = run_subprocess(cmd_string, json.dumps(input_dict))

            return json.loads(response)
def apply_mask_cli():
    parser = argparse.ArgumentParser(description='Apply as point selection mask on a pair of pointclouds.')
    parser.add_argument('database', type=str, help='Location of the registration result database to use.')
    parser.add_argument('dataset', type=str)
    parser.add_argument('reading', type=int)
    parser.add_argument('reference', type=int)
    parser.add_argument('--output', type=str, default='.', help='Output directory of the visualization.')
    parser.add_argument('--radius', type=float, default=0.1, help='For the overlap mask generator, the max distance between points for them to be neighbors.')
    parser.add_argument('--range', type=float, help='For the angle mask generator, the range of angles accepted.', default=0.0)
    parser.add_argument('--offset', type=float, help='For the angle mask generator, the offset of angles accepted.', default=0.0)
    parser.add_argument('-c', '--config', type=str, help='Path to a json config for the mask')
    parser.add_argument('-r', '--rotation', type=float, help='Rotation around the z axis to apply to the cloud pair before computing the descriptor, in radians.', default=0.0)
    args = parser.parse_args()

    db = RegistrationPairDatabase(args.database)
    pair = db.get_registration_pair(args.dataset, args.reading, args.reference)

    pair.rotation_around_z = args.rotation

    reading = pair.points_of_reading()
    reference = pair.points_of_reference()

    with open(args.config) as f:
        config = json.load(f)
        print(config)
        mask_generator = mask_factory(config)

    reading_masks, reference_masks = mask_generator.compute(pair)


    eprint('Transform of pair: ')
    eprint(pair.transform())


    pointcloud_to_vtk(reference, args.output + '/reference')
    pointcloud_to_vtk(transform_points(reading, pair.transform()), args.output + '/reading')


    for i in range(len(reading_masks)):
        if reference_masks[i].any():
            pointcloud_to_vtk(reference[reference_masks[i]], args.output + '/' + '{}_reference_{}'.format(mask_generator.__repr__(), i))

        if reading_masks[i].any():
            transformed_masked_reading = transform_points(reading[reading_masks[i]], pair.transform())

            pointcloud_to_vtk(transformed_masked_reading, args.output + '/' + '{}_reading_{}'.format(mask_generator.__repr__(), i))
def json_cat_cli():
    parser = argparse.ArgumentParser(
        description='Merge json documents into a json list.')
    parser.add_argument('inputs',
                        type=str,
                        nargs='+',
                        help='The files to concatenate')
    args = parser.parse_args()

    json_documents = []
    for f in args.inputs:
        with open(f) as jsonfile:
            try:
                json_document = json.load(jsonfile)
                json_documents.append(json_document)
            except:
                eprint('Problem merging file {}'.format(f))

    json.dump(json_documents, sys.stdout)
Beispiel #17
0
    def import_pointclouds(self, pointcloud_dataset, use_odometry=False):
        def generate_transform():
            if use_odometry:
                algo = IcpAlgorithm()
                initial_estimate = pointcloud_dataset.odometry_estimate(
                    self.reading, self.reference)
                transform, _ = compute_icp(
                    self.database.reading_pcd(self.dataset, self.reading),
                    self.database.reference_pcd(self.dataset, self.reference),
                    initial_estimate, algo)
            else:
                transform = pointcloud_dataset.ground_truth(
                    self.reading, self.reference)

            return transform

        eprint('Generating transform')
        self.cache.get_or_generate('transform', generate_transform)
        eprint('Transform generated for {}'.format(repr(self)))
def error_landscape_of_pair(pair,
                            icp_algo,
                            nx=100,
                            ny=100,
                            s=0.02,
                            nicp=100,
                            axis1=0,
                            axis2=1):
    reading_fifo = random_fifo('.qpc')
    reference_fifo = random_fifo('.qpc')
    config_fifo = random_fifo('.yaml')

    cmd_string = (
        'recov_icp_error_landscape -reading {} -reference {} -ground_truth {}'
        ' -config {} -nx {} -ny {} -nicp {}'
        ' -icp_output {} -delta {} -axis1 {} -axis2 {} -center').format(
            reading_fifo, reference_fifo,
            shlex.quote(json.dumps(pair.ground_truth().tolist())), config_fifo,
            nx, ny, nicp, '/tmp/toto.json', s, axis1, axis2)

    eprint(cmd_string)

    proc = subprocess.Popen(cmd_string,
                            shell=True,
                            stdin=None,
                            stdout=subprocess.PIPE,
                            universal_newlines=True)

    pointcloud_to_qpc_file(pair.points_of_reading(), reading_fifo)
    pointcloud_to_qpc_file(pair.points_of_reference(), reference_fifo)

    with open(config_fifo, 'w') as f:
        yaml.dump(icp_algo.config_dict(), f)

    response = proc.stdout.read()

    os.unlink(reading_fifo)
    os.unlink(reference_fifo)
    os.unlink(config_fifo)

    return json.loads(response)
Beispiel #19
0
def find_central_cluster(lie_registrations, clustering, ground_truth):
    """
    :arg dataset: A dataset as a facet.
    :arg clustering: A list of lists reprensenting the points indices
    :returns: The cluster itself (as a list of indices).
    """

    eprint(len(clustering))
    if len(clustering) == 1:
        if len(clustering[0]) == 0:
            raise RuntimeError('Empty central cluster')

        eprint('Returning early')
        return clustering[0]

    closest_point = index_of_closest_to_ground_truth(lie_registrations)

    norms = np.linalg.norm(lie_registrations, axis=1)

    cluster_distances = list(
        map(lambda x: distance_of_cluster(lie_registrations, x), clustering))
    eprint('Clustering distances: {}'.format(cluster_distances))

    if cluster_distances:
        best_cluster = clustering[np.argmin(cluster_distances)]
    else:
        best_cluster = []

    return best_cluster
Beispiel #20
0
def index_of_closest_to_ground_truth(dataset):
    """
    Find the index of the point in the dataset that is the closest to ground truth.
    :arg dataset: The registration dataset as a facet.
    """
    gt = np.array(dataset['metadata']['ground_truth'])
    inv_of_gt = np.linalg.inv(gt)

    id_of_min = None
    min_distance = np.inf
    for i, registration in enumerate(dataset['data']):
        print(registration)
        reg = np.array(registration['result'])
        distance_to_gt = np.linalg.norm(se3_log(np.dot(inv_of_gt, reg)))

        if distance_to_gt < min_distance:
            id_of_min = i
            min_distance = distance_to_gt

    eprint('Min distance to ground truth: {}'.format(min_distance))

    return id_of_min
Beispiel #21
0
    def cluster(self, dataset, seed=np.array([0., 0., 0., 0., 0., 0.])):
        if self.rescale:
            # Rescale translation and rotation separately so that they don't crush one another.
            radius_translation = englobing_radius(dataset[:, 0:3], 90.0)
            radius_rotation = englobing_radius(dataset[:, 3:6], 90.0)

            if radius_translation <= 1e-9:
                radius_translation = 1.0

            if radius_rotation <= 1e-9:
                radius_rotation = 1.0

            dataset[:, 0:3] = dataset[:, 0:3] / radius_translation
            dataset[:, 3:6] = dataset[:, 3:6] / radius_rotation

            seed[0:3] = seed[0:3] / radius_translation
            seed[3:6] = seed[3:6] / radius_rotation

        center_cluster = raw_centered_clustering(
            dataset,
            self.radius,
            self.k,
            seed,
            self.n_seed_init,
            seed_selector=self.seed_selector,
            logging=self.logging)

        clustering_row = {
            'clustering': [center_cluster],
            'n_clusters': 1,
            'radius': self.radius,
            'n': self.k,
            'outliers': inverse_of_cluster(center_cluster,
                                           len(dataset)).tolist(),
            'outlier_ratio': 1.0 - (len(center_cluster) / len(dataset)),
        }

        eprint('{} radius'.format(self.radius))
        eprint('{} outliers'.format(len(clustering_row['outliers'])))
        eprint('{} inliers'.format(len(center_cluster)))

        return clustering_row
def generate_one_example(registration_pair,
                         descriptor,
                         covariance_algo,
                         descriptor_only=False,
                         rotation=0.0):
    registration_pair.rotation_around_z = rotation

    eprint(registration_pair)
    descriptor_start = time.time()
    descriptor = descriptor.compute(registration_pair)
    eprint('Descriptor took {} seconds'.format(time.time() - descriptor_start))

    if not descriptor_only:
        covariance = covariance_algo.compute(registration_pair)
    else:
        covariance = None

    eprint('Example took {} seconds'.format(time.time() - descriptor_start))

    return (descriptor, np.array(covariance))
def prediction_cli():
    parser = argparse.ArgumentParser()
    parser.add_argument('dataset', help='Path to the dataset used to train the model', type=str)
    parser.add_argument('model', help='Path to the trained model', type=str)
    parser.add_argument('output', help='Where to output the vtk files', type=str)
    parser.add_argument('--registration-database', help='Fetch the pointclouds to give some context to the generated covariances.')
    parser.add_argument('--filter', help='Locations to filter during the query', type=str, default='')
    args = parser.parse_args()

    print('Loading dataset...')
    with open(args.dataset) as f:
        dataset = json.load(f)
    print('Done')

    filtering_re = re.compile(args.filter)


    model = model_from_file(args.model, 'cello')

    eprint(model)

    xs = np.array(dataset['data']['xs'])

    pairs = dataset['data']['pairs']
    selection = np.ones(len(pairs), dtype=np.bool)
    for i, pair in enumerate(pairs):
        if filtering_re.match(pair['dataset']) and args.filter:
            selection[i] = 0

    eprint(len(selection))
    eprint(selection.sum())

    xs = xs[selection]

    ys_predicted = model.predict(xs)
    np.save(args.output + '/predictions.npy', ys_predicted)

    db = RegistrationPairDatabase(args.registration_database)

    parallel_starmap_progressbar(generate_one_prediction, [(i, ys_predicted[i], dataset['data']['pairs'][i], db, args.output) for i in range(len(ys_predicted))])
def import_files_cli():
    parser = argparse.ArgumentParser()
    parser.add_argument('--files',
                        nargs='*',
                        type=str,
                        help='The files to import')
    parser.add_argument('--root',
                        help='Location of the registration result database',
                        type=str)
    parser.add_argument(
        '--pointcloud_root',
        help='Location of the point clouds designated by the pairs',
        type=str)
    parser.add_argument(
        '--pointcloud_dataset_type',
        help='The type of pointcloud dataset we import pointclouds from',
        type=str,
        default='ethz')
    parser.add_argument('--pointcloud_only',
                        help='Only do the pointcloud importation',
                        action='store_true')
    parser.add_argument('-j', '--n-cores', default=8, type=int)
    args = parser.parse_args()

    db = RegistrationPairDatabase(args.root)

    if not args.pointcloud_only:
        for registration_file in args.files:
            print(registration_file)
            pair_id = db.import_file(registration_file)

    pointcloud_root = pathlib.Path(args.pointcloud_root)

    readings = {}
    references = {}
    for pair in db.registration_pairs():
        if pair.dataset not in readings:
            readings[pair.dataset] = set([pair.reading])
        else:
            readings[pair.dataset].add(pair.reading)

        if pair.dataset not in references:
            references[pair.dataset] = set([pair.reference])
        else:
            references[pair.dataset].add(pair.reference)

    with concurrent.futures.ProcessPoolExecutor(
            max_workers=args.n_cores) as executor:
        fs = []
        progress_bar = tqdm.tqdm(total=5 * len(db.registration_pairs()),
                                 file=sys.stdout)
        for dataset_name in readings:
            dataset = create_registration_dataset(
                args.pointcloud_dataset_type, pointcloud_root / dataset_name)

            for reading in readings[dataset_name]:
                future = executor.submit(import_reading, dataset_name, reading,
                                         dataset, db)
                future.add_done_callback(lambda _: progress_bar.update())
                fs.append(future)

            for reference in references[dataset_name]:
                future = executor.submit(import_reference, dataset_name,
                                         reference, dataset, db)
                future.add_done_callback(lambda _: progress_bar.update())
                fs.append(future)

        concurrent.futures.wait(fs)

        fs = []
        for dataset_name in readings:
            for reading in readings[dataset_name]:
                eprint('{}: {}'.format(dataset_name, reading))
                future = executor.submit(compute_data_reading, dataset_name,
                                         reading, db)
                future.add_done_callback(lambda _: progress_bar.update())
                fs.append(future)

            for reference in references[dataset_name]:
                eprint('{}: {}'.format(dataset_name, reference))
                future = executor.submit(compute_data_reference, dataset_name,
                                         reference, db)
                future.add_done_callback(lambda _: progress_bar.update())
                fs.append(future)

        concurrent.futures.wait(fs)

        fs = []
        for pair in db.registration_pairs():
            pointcloud_dataset = create_registration_dataset(
                args.pointcloud_dataset_type, pointcloud_root / pair.dataset)

            future = executor.submit(import_pointclouds_of_one_pair, pair,
                                     pointcloud_dataset)
            future.add_done_callback(lambda _: progress_bar.update())
            fs.append(future)

        concurrent.futures.wait(fs)
    def _fit(self, predictors_validation, covariances_validation):
        """
        Given a validation set, train weights theta that optimize the validation error of the model.
        """

        predictors_validation_cuda = predictors_validation.cuda()

        self.theta = self.create_metric_weights()

        selector = sklearn.model_selection.RepeatedKFold(n_splits=5,
                                                         n_repeats=10)

        optimizer = optim.SGD([self.theta], lr=self.learning_rate)
        # optimizer = optim.Adam([self.theta], lr=self.learning_rate)

        validation_losses = []
        validation_stds = []
        optimization_losses = []
        optimization_stds = []
        validation_errors_log = []
        optimization_errors_log = []

        kll_errors_log = []
        kll_validation_losses = []
        kll_validation_stds = []

        epoch = 0
        keep_going = True

        best_loss = np.inf
        best_model = []
        n_epoch_without_improvement = 0
        n_epoch_without_min_delta = 0

        while (
                epoch < self.n_iterations or self.n_iterations == 0
        ) and keep_going and n_epoch_without_improvement < self.patience and n_epoch_without_min_delta < self.patience:
            self.logger.debug('Starting epoch %d' % epoch)

            epoch_start = time.time()
            optimizer.zero_grad()

            losses = Variable(torch.zeros(len(self.model_predictors)))
            optimization_loss = 0.0
            metric_matrix = self.theta_to_metric_matrix(self.theta)

            perms = torch.randperm(len(self.model_predictors))
            identity = torch.Tensor(np.identity(6))
            for i in perms:
                distances = self._compute_distances_cuda(
                    self.model_predictors_cuda, metric_matrix.cuda(),
                    self.model_predictors[i].cuda())
                # eprint('Distances')
                # eprint(distances)
                # eprint('Sum of distances')
                # eprint(distances.sum())
                # eprint('N of weights larger than almost nothing')
                # eprint((self.distances_to_weights(distances) > 1e-20).sum())
                # eprint('Sum of weights')
                # eprint(self.distances_to_weights(distances).sum())
                prediction = self._prediction_from_distances_cuda(
                    self.model_covariances_cuda, distances).cpu()
                # prediction = self._prediction_from_distances_cu(self.model_covariances, distances)

                # eprint('Prediction')
                # eprint(prediction)
                # eprint('Det')
                # eprint(torch.det(prediction + identity * 1e-9))

                regur_prediction = prediction + identity * 1e-12

                det_pred = torch.det(regur_prediction)
                log_det = torch.log(det_pred + 1e-18)

                loss_A = log_det
                # loss_B = torch.norm(torch.mm(torch.inverse(prediction), self.model_covariances[i]) - identity)

                loss_B = torch.trace(
                    torch.mm(torch.inverse(regur_prediction),
                             self.model_covariances[i]))

                distances_cpu = distances.cpu()
                nonzero_distances = torch.gather(
                    distances_cpu, 0,
                    torch.nonzero(distances_cpu).squeeze())
                regularization_term = torch.sum(torch.log(nonzero_distances))

                loss_of_pair = (1 - self.alpha) * (
                    loss_A + loss_B) + self.alpha * regularization_term

                optimization_loss += loss_of_pair
                losses[i] = loss_of_pair

                # eprint('Index: {}'.format(i))
                # eprint('loss_A: {}'.format(loss_A))
                # eprint('loss_B: {}'.format(loss_B))
                # eprint('Regur: {}'.format(self.alpha * regularization_term))
                # eprint(torch.inverse(regur_prediction))

                if i % 4 == 0:
                    self.logger.debug('Backprop')
                    optimization_loss.backward()
                    optimizer.step()
                    optimizer.zero_grad()
                    optimization_loss = 0.0
                    metric_matrix = self.theta_to_metric_matrix(self.theta)

            predictions = self._predict(self.model_predictors_cuda)
            self.logger.debug('Predictions have size %d kb' %
                              sys.getsizeof(predictions))

            indiv_optimization_errors = self._validation_errors(
                predictions, self.model_covariances.data)
            self.logger.debug('Indiv optimization errors have size %d kb' %
                              sys.getsizeof(indiv_optimization_errors))

            optimization_score = torch.mean(indiv_optimization_errors)
            optimization_errors_log.append(
                indiv_optimization_errors.numpy().tolist())
            optimization_losses.append(optimization_score.item())
            optimization_stds.append(
                torch.std(indiv_optimization_errors).item())

            metric_matrix = self.theta_to_metric_matrix(self.theta)

            if self.queries_have_neighbor_cuda(self.model_predictors_cuda,
                                               metric_matrix.cuda(),
                                               predictors_validation_cuda):
                predictions = self._predict(predictors_validation_cuda)
                validation_errors = self._validation_errors(
                    predictions, covariances_validation.data)
                validation_score = torch.mean(validation_errors)

                klls = self._kll(predictions, covariances_validation.data)
                kll_errors_log.append(klls.numpy().tolist())
                kll_validation_losses.append(torch.mean(klls).numpy().item())
                kll_validation_stds.append(torch.std(klls).numpy().item())

                eprint('-- Validation of epoch %d --' % epoch)
                if validation_score < best_loss:
                    eprint('** New best model! **')
                    n_epoch_without_improvement = 0
                    best_loss = validation_score
                    best_model = self.export_model()
                else:
                    n_epoch_without_improvement += 1

                eprint('Avg Optim Loss:     {:.5E}'.format(optimization_score))
                eprint('Validation score:   {:.5E}'.format(validation_score))
                eprint()
                if epoch > 0:
                    eprint('Optim. delta:       {:.5E}'.format(
                        optimization_losses[-2] - optimization_losses[-1]))
                    eprint('Validation delta:   {:.5E}'.format(
                        validation_losses[-1] - validation_score))
                    eprint()
                eprint('Validation std:     {:.5E}'.format(
                    validation_errors.std()))
                eprint('Validation kll:     {:.5E}'.format(klls.mean()))
                eprint('Validation kll std: {:.5E}'.format(klls.std()))
                eprint('N epoch without improvement: %d' %
                       n_epoch_without_improvement)
                eprint('N epoch without min delta:   %d' %
                       n_epoch_without_min_delta)
                eprint()

                validation_errors_log.append(
                    validation_errors.numpy().tolist())
                validation_losses.append(validation_score.numpy().tolist())
                validation_stds.append(
                    torch.std(validation_errors).numpy().tolist())
            else:
                keep_going = False
                eprint(
                    'Stopping because elements in the validation dataset have no neighbors.'
                )

            if (epoch > 0) and (validation_losses[-1] - validation_losses[-2] >
                                -1.0 * self.min_delta):
                n_epoch_without_min_delta += 1
            else:
                n_epoch_without_min_delta = 0

            eprint('Epoch took {} seconds'.format(time.time() - epoch_start))
            epoch = epoch + 1

        return {
            'best_loss': float(best_loss),
            'metadata': self.metadata(),
            'model': best_model,
            'optimization_errors': optimization_errors_log,
            'optimization_loss': optimization_losses,
            'optimization_std': optimization_stds,
            'validation_errors': validation_errors_log,
            'validation_loss': validation_losses,
            'validation_std': validation_stds,
            'kll_validation': kll_validation_losses,
            'kll_std': kll_validation_stds,
            'kll_errors': kll_errors_log,
            'what': 'model learning',
        }
def cli():
    parser = argparse.ArgumentParser()
    parser.add_argument('algorithm', type=str)
    parser.add_argument('output', type=str, help='Where to save the learning run')
    parser.add_argument('-lr', '--learning_rate', type=float, default=1e-7)
    parser.add_argument('-a', '--alpha', type=float, default=1e-6)
    parser.add_argument('-b', '--beta', type=float, default=1e3)
    parser.add_argument('-n', '--n_iterations', type=int, default=0, help='Maximum number of iterations. 0 means no maximum and wait for convergence.')
    parser.add_argument('-pa', '--patience', type=int, default=20, help='N of iterations without improvement before ending training.')
    parser.add_argument('-cv', '--cross-validate', type=str, help='Name of the dataset to use as a validation set', default='')
    parser.add_argument('-wd', '--weight-decay', type=float, default=1e-10, help='For the MLP, set the weight decay parameter.')
    parser.add_argument('--filter', type=str, help='Filter out datasets from the learning.', default='')
    parser.add_argument('--preprocessing', '-p', type=str, help='Name of the preprocessing algorithm to use.', default='identity')
    parser.add_argument('-md', '--min-delta', type=float, help='Minimum gain on the validation loss before the learning stops.', default=1e-4)
    parser.add_argument('--validate-on-end', action='store_true', help='Train on the first part of the dataset, validate on the second part.')
    parser.add_argument('-d', '--debug', action='store_true', help='Verbose debug')
    args = parser.parse_args()

    logging.basicConfig(stream=sys.stderr)
    logger = logging.getLogger()

    if args.debug:
        logger.setLevel(logging.DEBUG)

    logger.info('Loading document')
    input_document = json.load(sys.stdin)
    logger.info('Done loading document')

    if args.filter:
        regex = re.compile(args.filter)
        mask = filter_dataset(input_document, regex)
    else:
        mask = np.ones(len(input_document['data']['pairs']), dtype=bool)

    print(np.sum(mask))


    train_indices = []
    validation_indices = []
    if args.cross_validate:
        train_indices, validation_indices = train_test_split_cross_validate(input_document, mask, args.cross_validate)
    elif args.validate_on_end:
        train_indices, validation_indices = train_test_split_validate_on_end(input_document, mask)
    else:
        train_indices, validation_indices = train_test_split(input_document, mask)

    eprint('Training set size: {}. Validation set size: {}'.format(len(train_indices), len(validation_indices)))

    predictors = np.array(input_document['data']['xs'])
    covariances = np.array(input_document['data']['ys'])


    model = model_factory(args.algorithm)

    model.learning_rate = args.learning_rate
    model.alpha = args.alpha
    model.beta = args.beta
    model.n_iterations = args.n_iterations
    model.weight_decay = args.weight_decay
    model.min_delta = args.min_delta
    model.patience = args.patience

    preprocessing_algo = preprocessing_factory(args.preprocessing)
    model.preprocessing = preprocessing_algo

    if train_indices and validation_indices:
        learning_run = model.fit(predictors, covariances, train_set=train_indices, test_set=validation_indices)
    else:
        learning_run = model.fit(predictors, covariances)

    learning_run['metadata']['descriptor_config'] = input_document['metadata']['descriptor_config']

    if args.cross_validate:
        learning_run['metadata']['cross_validation'] = args.cross_validate

    model_path = args.output + '.model'
    model.save_model(model_path)
    learning_run['model'] = os.getcwd() + '/' + model_path


    for key in learning_run:
        print(key)
        _ = json.dumps(learning_run[key])

    with open(args.output + '.json', 'w') as f:
        json.dump(learning_run, f)
def generate_examples_cli():
    parser = argparse.ArgumentParser()
    parser.add_argument('--output',
                        type=str,
                        help='Where to store the examples',
                        default='dataset.json')
    parser.add_argument('--input',
                        type=str,
                        help='Where the registration results are stored',
                        default='.',
                        required=True)
    parser.add_argument('--exclude',
                        type=str,
                        help='Regex of names of datasets to exclude',
                        default='')
    parser.add_argument('-j',
                        '--n_cores',
                        type=int,
                        help='N of cores to use for the computation',
                        default=8)
    parser.add_argument('-c',
                        '--config',
                        type=str,
                        help='Path to a json config for the descriptor.')
    parser.add_argument('--descriptor-only',
                        action='store_true',
                        help='Generate only the descriptor.')
    parser.add_argument('--rotations',
                        '-r',
                        nargs='+',
                        type=float,
                        default=[0.0])
    parser.add_argument('--max-mean-gt-distance', type=float, default=0.3)
    args = parser.parse_args()

    np.set_printoptions(linewidth=120)

    db = RegistrationPairDatabase(args.input, args.exclude)
    registration_pairs = db.registration_pairs()

    output_path = pathlib.Path(args.output)

    clustering_algorithm = CenteredClusteringAlgorithm(radius=1.0,
                                                       k=16,
                                                       n_seed_init=32)
    clustering_algorithm.seed_selector = 'localized'
    clustering_algorithm.rescale = True

    clustering_algorithm = RegistrationPairClusteringAdapter(
        clustering_algorithm)

    distribution_algorithm = FixedCenterSamplingDistributionAlgorithm(
        clustering_algorithm)
    covariance_algo = DistributionAlgorithmToCovarianceAlgorithm(
        distribution_algorithm)

    with open(args.config) as f:
        descriptor_config = json.load(f)

    descriptor = descriptor_factory(descriptor_config)

    eprint('Using descriptor: {}'.format(repr(descriptor)))
    eprint('Generating with rotations: {}'.format(args.rotations))

    examples = []
    pairs = []
    for x in registration_pairs:
        examples.extend([(x, descriptor, covariance_algo, args.descriptor_only,
                          r) for r in args.rotations])
        pairs.extend([{
            'dataset': x.dataset,
            'reading': x.reading,
            'reference': x.reference,
            'rotation': r
        } for r in args.rotations])

    random.shuffle(examples)

    results = parallel_starmap_progressbar(generate_one_example,
                                           examples,
                                           n_cores=args.n_cores)

    # results = [generate_one_example(*x) for x in examples]

    xs = []
    ys = []
    for p in results:
        x, y = p
        xs.append(x.tolist())
        if not args.descriptor_only:
            ys.append(y.tolist())

    output_dict = {
        'metadata': {
            'what': 'learning_dataset',
            'date': str(datetime.datetime.today()),
            'descriptor': str(descriptor),
            'covariance_algo': str(covariance_algo),
            'descriptor_labels': descriptor.labels(),
            'descriptor_config': descriptor_config,
            'filter': args.exclude
        },
        'statistics': {
            'n_examples': len(xs)
        },
        'data': {
            'pairs': pairs,
            'xs': xs,
        }
    }

    if not args.descriptor_only:
        output_dict['data']['ys'] = ys

    with open(args.output, 'w') as dataset_file:
        json.dump(output_dict, dataset_file)
def cli():
    parser = argparse.ArgumentParser()
    parser.add_argument('dataset', help='Path to the dataset used to train the model', type=str)
    parser.add_argument('learningrun', help='Path to the learning run', type=str)
    parser.add_argument('model', help='Path to the trained model', type=str)
    parser.add_argument('output', help='Where to output the vtk files', type=str)
    args = parser.parse_args()

    print('Loading dataset...')
    with open(args.dataset) as f:
        dataset = json.load(f)
    print('Done')

    print('Loading model...')
    with open(args.learningrun) as f:
        learning_run = json.load(f)
    print('Done')


    model = model_from_file(args.model, learning_run['metadata']['algorithm'])

    xs = np.array(dataset['data']['xs'])
    ys = np.array(dataset['data']['ys'])

    xs_validation = xs[learning_run['validation_set']]
    ys_validation = ys[learning_run['validation_set']]

    pred_begin = time.time()
    ys_predicted = model.predict(xs_validation)
    print((time.time() - pred_begin) / len(xs))

    errors = frobenius(ys_validation - ys_predicted)

    klls = []
    for i in range(len(ys_validation)):
        kll_left = kullback_leibler(ys_validation[i], ys_predicted[i])
        klls.append(kll_left)

    print(np.mean(errors))
    print(np.mean(np.array(klls)))

    with open(args.output + '/summary.csv', 'w') as summary_file:
        writer = csv.DictWriter(summary_file, ['location', 'reading', 'reference', 'loss', 'kullback_leibler', 'predicted_trace', 'reference_trace'])
        writer.writeheader()

        for i in range(len(ys_predicted)):
            distribution_to_vtk_ellipsoid(np.zeros(3), ys_validation[i][0:3,0:3], args.output + '/translation_validation_' + str(i).zfill(4))
            distribution_to_vtk_ellipsoid(np.zeros(3), ys_predicted[i][0:3,0:3], args.output + '/translation_predicted_' + str(i).zfill(4))


            distribution_to_vtk_ellipsoid(np.zeros(3), ys_validation[i][3:6,3:6], args.output + '/rotation_validation_' + str(i).zfill(4))
            distribution_to_vtk_ellipsoid(np.zeros(3), ys_predicted[i][3:6,3:6], args.output + '/rotation_predicted_' + str(i).zfill(4))

            eprint(learning_run['validation_set'][i])
            index_of_example = learning_run['validation_set'][i]
            writer.writerow({
                'location': dataset['data']['pairs'][index_of_example]['dataset'],
                'reading': dataset['data']['pairs'][index_of_example]['reading'],
                'reference': dataset['data']['pairs'][index_of_example]['reference'],
                'loss': errors[i],
                'kullback_leibler': klls[i],
                'predicted_trace': np.trace(ys_predicted[i]),
                'reference_trace': np.trace(ys_validation[i])
            })
Beispiel #29
0
    def _fit(self, xs_train, ys_train, xs_test, ys_test):
        self.model = torch.nn.Sequential(
            torch.nn.Linear(len(xs_train[0]), self.hidden_sizes[0]),
            torch.nn.Hardtanh(), torch.nn.Dropout(0.1),
            torch.nn.BatchNorm1d(self.hidden_sizes[0]),
            torch.nn.Linear(self.hidden_sizes[0], self.hidden_sizes[1]),
            torch.nn.Hardtanh(), torch.nn.Dropout(0.1),
            torch.nn.BatchNorm1d(self.hidden_sizes[1]),
            torch.nn.Linear(self.hidden_sizes[1], self.hidden_sizes[2]),
            torch.nn.Hardtanh(), torch.nn.Dropout(0.1),
            torch.nn.BatchNorm1d(self.hidden_sizes[2]),
            torch.nn.Linear(self.hidden_sizes[2], self.hidden_sizes[3]),
            torch.nn.Hardtanh(), torch.nn.Dropout(0.1),
            torch.nn.BatchNorm1d(self.hidden_sizes[3]),
            torch.nn.Linear(self.hidden_sizes[3], self.hidden_sizes[4]),
            torch.nn.Hardtanh(), torch.nn.Dropout(0.1),
            torch.nn.BatchNorm1d(self.hidden_sizes[4]),
            torch.nn.Linear(self.hidden_sizes[4], self.hidden_sizes[5]),
            torch.nn.Hardtanh(), torch.nn.Dropout(0.1),
            torch.nn.BatchNorm1d(self.hidden_sizes[5]),
            torch.nn.Linear(self.hidden_sizes[5], self.hidden_sizes[6]),
            torch.nn.Hardtanh(), torch.nn.Dropout(0.1),
            torch.nn.BatchNorm1d(self.hidden_sizes[6]),
            torch.nn.Linear(self.hidden_sizes[6], 36)).to(self.device)

        self.best_loss = float('inf')
        n_iter_without_improvement = 0
        epoch = 0
        train_losses = []
        test_losses = []

        train_stds = []
        test_stds = []

        train_errors_log = []
        test_errors_log = []

        optimizer = torch.optim.Adam(self.model.parameters(),
                                     lr=self.learning_rate,
                                     weight_decay=self.weight_decay)

        while n_iter_without_improvement < self.patience and (
                epoch < self.n_iterations or self.n_iterations == 0):

            loss = self._validate(xs_train, ys_train)

            self.model.zero_grad()
            loss.backward()

            optimizer.zero_grad()
            optimizer.step()

            # with torch.no_grad():
            #     for param in self.model.parameters():
            #         param.data -= self.learning_rate * param.grad

            test_loss = self._validate(xs_test, ys_test)

            if test_loss < self.best_loss:
                self.best_loss = test_loss
                n_iter_without_improvement = 0
            else:
                n_iter_without_improvement += 1

            if epoch % self.logging_rate == 0:
                test_errors = self._validation_errors(xs_test, ys_test)

                test_errors_log.append(test_errors.data.cpu().numpy().tolist())

                train_errors = self._validation_errors(xs_train, ys_train)

                train_errors_log.append(
                    train_errors.data.cpu().numpy().tolist())

                train_losses.append(loss.data.cpu().numpy().item())
                test_losses.append(test_loss.data.cpu().numpy().item())
                train_stds.append(train_errors.std().data.cpu().numpy().item())
                test_stds.append(test_errors.std().data.cpu().numpy().item())

                eprint('Train Loss: {:.8E}'.format(loss.data))
                eprint('Test loss:  {:.8E}'.format(test_loss.data))
                # self._dets(xs_test, ys_test)
                eprint('{} iterations without improvement (out of {})'.format(
                    n_iter_without_improvement, self.patience))
                eprint()

            epoch += 1

        return {
            'best_loss': self.best_loss.cpu().detach().numpy().item(),
            'metadata': {
                'algorithm': 'mlp',
                'learning_rate': self.learning_rate,
                'logging_rate': self.logging_rate,
                'n_iterations': self.n_iterations,
                'patience': self.patience,
            },
            'optimization_errors': train_errors_log,
            'optimization_loss': train_losses,
            'validation_errors': test_errors_log,
            'validation_loss': test_losses,
            'validation_std': test_stds,
            'optimization_std': train_stds,
            'what': 'model learning',
        }
Beispiel #30
0
 def accept_raw_file(self, filename):
     p = pathlib.Path(filename)
     eprint(self.directory_of_pair)
     dest = str(self.directory_of_pair / 'raw' / p.name)
     eprint('{} to {}'.format(p, dest))
     os.rename(str(p), dest)