Ejemplo n.º 1
0
    def do_task(self, task):
        """
        Submit a task back to the task manager for execution.
        Simply getting the task is not enough, you need to pass it here for the task to be run.

        Will check if the task already exists
        :param task: The task object to run, must be an instance of Task returned by the above get methods
        :return: void
        """
        if isinstance(task, batch_analysis.task.Task
                      ) and task.identifier is None and task.is_unstarted:
            existing_query = {}
            # Each different task type has a different set of properties that identify it.
            if isinstance(task, import_dataset_task.ImportDatasetTask):
                existing_query['module_name'] = task.module_name
                existing_query['path'] = task.path
                existing_query['additional_args'] = task.additional_args
                dh.query_to_dot_notation(existing_query)
            elif isinstance(task, generate_dataset_task.GenerateDatasetTask):
                existing_query['controller_id'] = task.controller_id
                existing_query['simulator_id'] = task.simulator_id
                existing_query['simulator_config'] = copy.deepcopy(
                    task.simulator_config)
                existing_query['repeat'] = task.repeat
                existing_query = dh.query_to_dot_notation(existing_query)
            elif isinstance(task, train_system_task.TrainSystemTask):
                existing_query['trainer_id'] = task.trainer
                existing_query['trainee_id'] = task.trainee
            elif isinstance(task, run_system_task.RunSystemTask):
                existing_query['system_id'] = task.system
                existing_query['image_source_id'] = task.image_source
            elif isinstance(task, benchmark_task.BenchmarkTrialTask):
                existing_query['trial_result_id'] = task.trial_result
                existing_query['benchmark_id'] = task.benchmark
            elif isinstance(task, compare_trials_task.CompareTrialTask):
                existing_query['trial_result1_id'] = task.trial_result1
                existing_query['trial_result2_id'] = task.trial_result2
                existing_query['comparison_id'] = task.comparison
            elif isinstance(task,
                            compare_benchmarks_task.CompareBenchmarksTask):
                existing_query['benchmark_result1_id'] = task.benchmark_result1
                existing_query['benchmark_result2_id'] = task.benchmark_result2
                existing_query['comparison_id'] = task.comparison

            # Make sure none of this task already exists
            if existing_query != {} and self._collection.find(
                    existing_query).limit(1).count() == 0:
                task.save_updates(self._collection)
 def test_flatten_array_recursive(self):
     result = dh.query_to_dot_notation({
         'a': [{
             'b': 1.12,
             'd': [{
                 'a': 1.1411,
                 'b': 1.1412,
             },{
                 'b': 1.1422
             },{
                 'a': 1.1431,
                 'b': 1.1432
             },{
                 'c': 1.1443
             }]
         },{
             'a': 1.21,
             'd': 1.22
         }]
     }, flatten_arrays=True)
     self.assertEqual(result, {
         'a.0.b': 1.12,
         'a.0.d.0.a': 1.1411,
         'a.0.d.0.b': 1.1412,
         'a.0.d.1.b': 1.1422,
         'a.0.d.2.a': 1.1431,
         'a.0.d.2.b': 1.1432,
         'a.0.d.3.c': 1.1443,
         'a.1.a': 1.21,
         'a.1.d': 1.22
     })
 def test_flatten_array(self):
     result = dh.query_to_dot_notation({
         'a': [{
             'b': 1.12
         },{
             'a': 1.21,
             'b': 1.22
         },{
             'c': 1.33
         }]
     }, flatten_arrays=True)
     self.assertEqual(result, {'a.0.b': 1.12, 'a.1.a': 1.21, 'a.1.b': 1.22, 'a.2.c': 1.33})
Ejemplo n.º 4
0
def save_image(db_client, image):
    """
    Save an image to the database.
    First checks if the image already exists,
    and does not insert if it does.
    :param db_client: A database client object to use to save the image.
    :param image: An image entity or image object to be saved to the database
    :return: the id of the image in the database
    """
    if not isinstance(image, ImageEntity):
        if isinstance(image, core.image.Image):
            image = image_to_entity(image)
        else:
            return None
    existing_query = image.serialize()

    # Don't look at the GridFS links when determining if the image exists, only use metadata.
    delete_keys = [
        'data', 'depth_data', 'ground_truth_depth_data', 'labels_data',
        'world_normals_data'
    ]
    for key in delete_keys:
        if key in existing_query:
            del existing_query[key]
        if 'left_' + key in existing_query:
            del existing_query['left_' + key]
        if 'right_' + key in existing_query:
            del existing_query['right_' + key]
    db_help.query_to_dot_notation(existing_query, flatten_arrays=True)

    existing = db_client.image_collection.find_one(existing_query,
                                                   {'_id': True})
    if existing is None:
        image.save_image_data(db_client)
        # Need to serialize again so we can store the newly created data ids.
        return db_client.image_collection.insert(image.serialize())
    else:
        # An identical image already exists, use that.
        return existing['_id']
Ejemplo n.º 5
0
 def get_generate_dataset_task(self,
                               controller_id,
                               simulator_id,
                               simulator_config,
                               repeat=0,
                               num_cpus=1,
                               num_gpus=0,
                               memory_requirements='3GB',
                               expected_duration='1:00:00'):
     """
     Get a task to generate a synthetic dataset.
     Generate dataset tasks are unique to particular combinations of controller, simulator and config,
     so that the same controller can generate different datasets with the same simulator.
     This is further enabled by the repeat parameter.
     Most of the parameters are resources requirements passed to the job system.
     :param controller_id: The id of the controller to use
     :param simulator_id: The id of the simulator to use
     :param simulator_config: configuration parameters passed to the simulator at run time.
     :param repeat: The repeat of this trial, so we can run the same system more than once.
     :param num_cpus: The number of CPUs required for the job. Default 1.
     :param num_gpus: The number of GPUs required for the job. Default 0.
     :param memory_requirements: The memory required for this job. Default 3 GB.
     :param expected_duration: The expected time this job will take. Default 1 hour.
     :return: An ImportDatasetTask containing the task state.
     """
     existing = self._collection.find_one(
         dh.query_to_dot_notation({
             'controller_id':
             controller_id,
             'simulator_id':
             simulator_id,
             'simulator_config':
             copy.deepcopy(simulator_config),
             'repeat':
             repeat
         }))
     if existing is not None:
         return self._db_client.deserialize_entity(existing)
     else:
         return generate_dataset_task.GenerateDatasetTask(
             controller_id=controller_id,
             simulator_id=simulator_id,
             simulator_config=simulator_config,
             repeat=repeat,
             num_cpus=num_cpus,
             num_gpus=num_gpus,
             memory_requirements=memory_requirements,
             expected_duration=expected_duration)
Ejemplo n.º 6
0
 def get_import_dataset_task(self,
                             module_name,
                             path,
                             additional_args=None,
                             num_cpus=1,
                             num_gpus=0,
                             memory_requirements='3GB',
                             expected_duration='1:00:00'):
     """
     Get a task to import a dataset.
     Most of the parameters are resources requirements passed to the job system.
     :param module_name: The name of the python module to use to do the import as a string.
     It must have a function 'import_dataset', taking a directory and the database client
     :param path: The root file or directory describing the dataset to import
     :param num_cpus: The number of CPUs required for the job. Default 1.
     :param num_gpus: The number of GPUs required for the job. Default 0.
     :param memory_requirements: The memory required for this job. Default 3 GB.
     :param expected_duration: The expected time this job will take. Default 1 hour.
     :return: An ImportDatasetTask containing the task state.
     """
     if additional_args is None:
         additional_args = {}
     existing = self._collection.find_one(
         dh.query_to_dot_notation({
             'module_name':
             module_name,
             'path':
             path,
             'additional_args':
             copy.deepcopy(additional_args)
         }))
     if existing is not None:
         return self._db_client.deserialize_entity(existing)
     else:
         return import_dataset_task.ImportDatasetTask(
             module_name=module_name,
             path=path,
             additional_args=additional_args,
             num_cpus=num_cpus,
             num_gpus=num_gpus,
             memory_requirements=memory_requirements,
             expected_duration=expected_duration)
 def test_flatten_array_does_not_flatten_arrays_that_are_not_arrays_of_dicts(self):
     result = dh.query_to_dot_notation({
         'a': (11, 12, 13),
         'b': [{
             'b': 1.12,
             'd': [{
                 'a': 1.1411,
                 'b': (1.14121, 1.14122)
             },{
                 'b': 1.1422
             },{
                 'a': (1.14311,),
                 'b': 1.1432
             },{
                 'c': 1.1443
             }]
         },{
             'a': 1.21,
             'd': 1.22
         }]
     }, flatten_arrays=True)
     self.assertEqual({
         'a.0': 11,
         'a.1': 12,
         'a.2': 13,
         'b.0.b': 1.12,
         'b.0.d.0.a': 1.1411,
         'b.0.d.0.b.0': 1.14121,
         'b.0.d.0.b.1': 1.14122,
         'b.0.d.1.b': 1.1422,
         'b.0.d.2.a.0': 1.14311,
         'b.0.d.2.b': 1.1432,
         'b.0.d.3.c': 1.1443,
         'b.1.a': 1.21,
         'b.1.d': 1.22
     }, result)
 def test_combines_keys(self):
     result = dh.query_to_dot_notation({'a': {'b': 1}})
     self.assertEqual(result, {'a.b': 1})
 def test_big(self):
     result = dh.query_to_dot_notation({
         'a': 1,
         'b': 2,
         'c': {
             'a': 3.1,
             'b': {
                 'a': 3.21,
                 'b': 3.22
             },
             'c': {
                 'a': 3.31,
                 'b': 3.32,
                 'c': 3.33
             },
             'd': {
                 'a': 3.41,
                 'b': {
                     'a': 3.421
                 }
             }
         },
         'd': {
             'a': {
                 'a': {
                     'a': { # 4.111
                         'a': {
                             'a': 4.11111
                         }
                     },
                     'b': {  # 4.112
                         'a': {
                             'a': {  # 4.11211
                                 'a': {
                                     'a': 4.1121111
                                 }
                             },
                             'b': 4.11212
                         }
                     }
                 }
             }
         },
         'e': 5,
         'f': {
             'a': 6.1,
             'b': {
                 'a': 6.21
             }
         }
     })
     self.assertEqual(result, {
         'a': 1,
         'b': 2,
         'c.a': 3.1,
         'c.b.a': 3.21,
         'c.b.b': 3.22,
         'c.c.a': 3.31,
         'c.c.b': 3.32,
         'c.c.c': 3.33,
         'c.d.a': 3.41,
         'c.d.b.a': 3.421,
         'd.a.a.a.a.a': 4.11111,
         'd.a.a.b.a.a.a.a': 4.1121111,
         'd.a.a.b.a.b': 4.11212,
         'e': 5,
         'f.a': 6.1,
         'f.b.a': 6.21
     })
 def test_works_recursively(self):
     result = dh.query_to_dot_notation({'a': {'b': {'c': {'d': 1}}}})
     self.assertEqual(result, {'a.b.c.d': 1})
 def test_handles_nested_and_non_nested_keys(self):
     result = dh.query_to_dot_notation({'a': 1, 'b': { 'a': 2.1 }})
     self.assertEqual(result, {'a': 1, 'b.a': 2.1})
 def test_passes_through_non_nested_keys(self):
     result = dh.query_to_dot_notation({'a': 1})
     self.assertEqual(result, {'a': 1})
Ejemplo n.º 13
0
def compare_results(benchmark,
                    database_client,
                    config=None,
                    trained_state_id=None):
    if (not isinstance(benchmark, core.trial_comparison.TrialComparison) or
            not isinstance(database_client, database.client.DatabaseClient)):
        return

    if config is None:
        config = {}
    else:
        config = dict(config)
    config = du.defaults(config, {})

    # Get all the reference datasets, ones that have maximum quality
    reference_dataset_ids = database_client.dataset_collection.find(
        {
            'material_properties.RoughnessQuality': 1,
            'material_properties.BaseMipMapBias': 0,
            'material_properties.NormalQuality': 1,
            'geometry_properties.Forced LOD level': 0
        }, {'_id': True})
    reference_dataset_ids = [result['_id'] for result in reference_dataset_ids]

    # Get the reference trial results, as IDs so the cursor doesn't expire
    reference_trial_ids_query = du.defaults(
        benchmark.get_benchmark_requirements(), {
            'success': True,
            'dataset': {
                '$in': reference_dataset_ids
            }
        })
    if trained_state_id is not None:
        reference_trial_ids_query['trained_state'] = trained_state_id
    reference_trial_ids = database_client.trials_collection.find(
        du.defaults(benchmark.get_benchmark_requirements(), {
            'success': True,
            'dataset': {
                '$in': reference_dataset_ids
            }
        }), {'_id': True})
    reference_trial_ids = [result['_id'] for result in reference_trial_ids]

    # For each reference trial result
    for ref_trial_id in reference_trial_ids:
        s_temp = database_client.trials_collection.find_one(
            {'_id': ref_trial_id})
        reference_trial = database_client.deserialize_entity(s_temp)

        s_temp = database_client.dataset_collection.find_one(
            {'_id': reference_trial.image_source_id})
        reference_dataset = database_client.deserialize_entity(s_temp)
        reference_dataset_images = reference_dataset.load_images(
            database_client)

        # Find all dataset ids with the same world details, but different quality settings
        comparison_query = dbutil.query_to_dot_notation({
            'world_name':
            reference_dataset.world_name,
            'world_information':
            copy.deepcopy(reference_dataset.world_information)
        })
        #comparison_query['_id'] = {'$ne': ref_trial_id}
        comparison_dataset_ids = database_client.dataset_collection.find(
            comparison_query, {'_id': True})
        comparison_dataset_ids = [
            result['_id'] for result in comparison_dataset_ids
        ]

        # Existing comparisons
        existing_compared_trials = database_client.results_collection.find(
            {
                'benchmark': benchmark.identifier,
                'reference': reference_trial.identifier
            }, {
                'trial_result': True,
                '_id': False
            })
        existing_compared_trials = [
            val['trial_result'] for val in existing_compared_trials
        ]

        # Find all trials on these comparison datasets by the same system as the reference trial
        s_comparison_trials = database_client.trials_collection.find(
            du.defaults(
                benchmark.get_benchmark_requirements(), {
                    '_id': {
                        '$ne': ref_trial_id,
                        '$nin': existing_compared_trials
                    },
                    'dataset': {
                        '$in': comparison_dataset_ids
                    },
                    'system': reference_trial.system_id,
                    'trained_state': reference_trial.trained_state_id
                }))

        for s_comparision_trial in s_comparison_trials:
            comparison_trial = database_client.deserialize_entity(
                s_comparision_trial)
            existing_count = database_client.results_collection.find({
                'benchmark':
                benchmark.identifier,
                'trial_result':
                comparison_trial.identifier,
                'reference':
                reference_trial.identifier
            }).count()
            if existing_count <= 0:
                benchmark_result = benchmark.compare_trial_results(
                    comparison_trial, reference_trial,
                    reference_dataset_images)
                database_client.results_collection.insert(
                    benchmark_result.serialize())