Beispiel #1
0
    def add_run(self, run: Run):
        """Add Run to the dataset

        The input Run URI is created by this method

        Parameters
        ----------
        run
            Run to add

        """
        run.md_uri = self.service.add_run_processeddataset(run.metadata,
                                                           self.md_uri)
Beispiel #2
0
 def test_write_run(self):
     run = Run(self.tst_run_file)
     run.metadata = create_run_metadata()
     run.write()
     self.assertTrue(
         filecmp.cmp(self.tst_run_file, self.ref_run_file, shallow=False))
Beispiel #3
0
 def test_read_run(self):
     run_read = Run(self.ref_run_file)
     run_ref_metadata = create_run_metadata()
     self.assertEqual(run_read.metadata.serialize(),
                      run_ref_metadata.serialize())
Beispiel #4
0
    def run_merged(self):
        """Run the process that merge txt number inputs

        This is the main function that run the process on the experiment data

        Raises
        ------
        RunnerExecError

        """
        for observer in self._observers:
            observer.notify({'progress': 0, 'message': 'start'})

        # 1- Query all the input data and verify that the size
        # are equal, if not raise an exception
        input_data, data_count = self._query_inputs()

        # 2- Create the ProcessedDataSet
        processed_dataset = self.experiment.create_processed_dataset(
            self._output_dataset
        )

        # 3- Create run
        run = Run()
        run.metadata.process_name = self.process.metadata.fullname()
        run.metadata.process_uri = self.process.uri
        for t in range(len(self._inputs_names)):
            run.metadata.inputs.append(
                RunInputContainer(
                    self._inputs_names[t],
                    self._inputs_datasets[t],
                    self._inputs_query[t],
                    self._inputs_origin_output_name[t],
                )
            )
        for i in range(0, len(self._process_params), 2):
            run.metadata.parameters.append(
                RunParameterContainer(
                    self._process_params[i], self._process_params[i + 1]
                )
            )

        processed_dataset.add_run(run)

        # 4- merge Inputs
        inputs_values = [0 for i in range(len(self._inputs_names))]

        for n in range(len(self._inputs_names)):
            inputs_values[n] = list()
            for i in range(data_count):
                data_info = RawData(input_data[n][i])
                if (
                    data_info.metadata.format == "numbercsv"
                ):
                    with open(data_info.metadata.uri, 'r') as file:
                        value = file.read().replace('\n', '').replace(' ', '')
                        inputs_values[n].append(value)
                else:
                    raise RunnerExecError(
                        'run merge can use only number datatype')

        # 5- save data in tmp files files in the processed dataset dir
        tmp_inputs_files = [0 for i in range(len(self._inputs_names))]
        processed_data_dir = processed_dataset.md_uri.replace(
            "processeddataset.md.json", ""
        )
        for n in range(len(self._inputs_names)):
            tmp_inputs_files[n] = os.path.join(
                processed_data_dir, self._inputs_names[n] + '.csv'
            )
            f = open(tmp_inputs_files[n], 'w')
            for i in range(len(inputs_values[n])):
                value = str(inputs_values[n][i])
                if i < len(inputs_values[n]) - 1:
                    f.write(value + ",")
                else:
                    f.write(value)
            f.close()

        # 6- create input metadata for output .md.json
        inputs_metadata = []
        for n in range(len(tmp_inputs_files)):
            inp_metadata = ProcessedDataInputContainer()
            inp_metadata.name = self._inputs_names[n]
            inp_metadata.uri = tmp_inputs_files[n]
            inp_metadata.type = 'txt'
            inputs_metadata.append(inp_metadata)

        # 7- run process on generated files
        args = []

        # 7.1- inputs
        for n in range(len(self._inputs_names)):
            args.append(self._inputs_names[n])
            args.append(tmp_inputs_files[n])

        # 7.2- params
        for param in self._process_params:
            args.append(param)

        # 4.3- outputs
        for output in self.process.metadata.outputs:
            extension = '.' + FormatsAccess.instance().get(output.type).extension

            # args
            args.append(output.name)
            output_file_name = output.name
            args.append(os.path.join(processed_data_dir,
                                     output_file_name + extension))

            # output metadata
            processed_data = ProcessedData()
            processed_data.metadata.name = output.name
            processed_data.metadata.author = \
                ConfigAccess.instance().get('user')['name']
            processed_data.metadata.date = format_date('now')
            processed_data.metadata.format = output.type

            processed_data.metadata.run_uri = run.md_uri
            processed_data.metadata.inputs = inputs_metadata

            processed_data.metadata.output = {
                'name': output.name,
                'label': output.description,
            }
            # save the metadata and create its md_uri and uri
            processed_dataset.create_data(processed_data)

        # 8- exec
        runner = Runner(self.process)
        runner.exec(*args)

        # notify observers
        for observer in self._observers:
            observer.notify({'progress': 100, 'message': 'done'})
Beispiel #5
0
    def run_sequence(self):
        """Run the process in a sequence

        This is the main function that run the process on the experiment data

        Raises
        ------
        RunnerExecError

        """
        # 1- Query all the input data and verify that the size
        # are equal, if not raise an exception
        input_data, data_count = self._query_inputs()

        # 2- Create the ProcessedDataSet
        processed_dataset = self.experiment.create_processed_dataset(
            self._output_dataset
        )

        # 3- Create run
        run = Run()
        run.metadata.process_name = self.process.metadata.fullname()
        run.metadata.process_uri = self.process.uri
        for t in range(len(self._inputs_names)):
            run.metadata.inputs.append(
                RunInputContainer(
                    self._inputs_names[t],
                    self._inputs_datasets[t],
                    self._inputs_query[t],
                    self._inputs_origin_output_name[t],
                )
            )
        for i in range(0, len(self._process_params), 2):
            run.metadata.parameters.append(
                RunParameterContainer(
                    self._process_params[i], self._process_params[i + 1]
                )
            )

        processed_dataset.add_run(run)

        # 4- loop over the input data
        for i in range(data_count):

            data_info_zero = RawData(input_data[0][i].uri())

            # 4.0- notify observers
            for observer in self._observers:
                notification = dict()
                notification['progress'] = int(100 * i / data_count)
                notification['message'] = "Process " + \
                                          data_info_zero.metadata.name
                observer.notify(notification)

            # 4.1- Parse IO
            args = []
            # get the input arguments
            inputs_metadata = []

            for n in range(len(self._inputs_names)):
                args.append(self._inputs_names[n])
                data_info = RawData(input_data[n][i].uri())
                # input data can be a processedData but
                # we only read the common metadata
                args.append(data_info.metadata.uri)

                inp_metadata = ProcessedDataInputContainer()
                inp_metadata.name = self._inputs_names[n]
                inp_metadata.uri = input_data[n][i].uri()
                inp_metadata.type = data_info.metadata.type
                inputs_metadata.append(inp_metadata)

            # get the params arguments
            for param in self._process_params:
                args.append(param)

            # setup outputs
            for output in self.process.metadata.outputs:

                # output metadata
                processed_data = ProcessedData()
                processed_data.metadata.name = (
                    data_info_zero.metadata.name + "_" + output.name
                )
                processed_data.metadata.author = \
                    ConfigAccess.instance().get('user')['name']
                processed_data.metadata.date = format_date('now')
                processed_data.metadata.format = output.type

                processed_data.metadata.run_uri = run.md_uri
                processed_data.metadata.inputs = inputs_metadata

                processed_data.metadata.output = {
                    'name': output.name,
                    'label': output.description,
                }

                # save the metadata and create its md_uri and uri
                processed_dataset.create_data(processed_data)

                # args
                args.append(output.name)
                args.append(processed_data.metadata.uri)

            # 4.2- exec
            runner = Runner(self.process)
            # print("args = ", args)
            runner.exec(*args)

        # 4.0- notify observers
        for observer in self._observers:
            observer.notify({'progress': 100, 'message': 'done'})