def save_outputs(self, run): output_path = os.path.join(run.full_sandbox_path, 'output') upload_path = os.path.join(run.full_sandbox_path, 'upload') os.mkdir(upload_path) for argument in run.app.arguments.filter(type=ContainerArgument.OUTPUT): argument_path = os.path.join(output_path, argument.name) dataset_name = self.build_dataset_name(run, argument.name) new_argument_path = os.path.join(upload_path, dataset_name) try: os.rename(argument_path, new_argument_path) dataset = Dataset.create_dataset(new_argument_path, name=dataset_name, user=run.user) dataset.copy_permissions(run) run.datasets.create(dataset=dataset, argument=argument) except (OSError, IOError) as ex: if ex.errno != errno.ENOENT: raise logs_path = os.path.join(run.full_sandbox_path, 'logs') for file_name, log_type in (('stdout.txt', ContainerLog.STDOUT), ('stderr.txt', ContainerLog.STDERR)): run.load_log(os.path.join(logs_path, file_name), log_type) run.set_md5() run.state = (ContainerRun.COMPLETE if run.return_code == 0 else ContainerRun.FAILED) run.end_time = timezone.now()
def _save_output_directory_argument(cls, run: ContainerRun, argument: ContainerArgument, output_path: str, upload_path: str) -> None: output_path = pathlib.Path(output_path).absolute() dirarg_path = output_path / argument.name for dirpath, _, filenames in os.walk(dirarg_path): dirpath = pathlib.Path(dirpath) for filename in filenames: datafile_path: pathlib.Path = (dirpath / filename).absolute() dataset_filename = cls._build_directory_file_name( run.id, output_path, datafile_path) destination_path = os.path.join(upload_path, dataset_filename) dataset_name = cls._build_directory_dataset_name( run.id, output_path, datafile_path) try: os.rename(datafile_path, destination_path) dataset = Dataset.create_dataset( destination_path, name=dataset_name, user=run.user, ) dataset.copy_permissions(run) run.datasets.create(dataset=dataset, argument=argument) except (OSError, IOError) as ex: if ex.errno != errno.ENOENT: raise
def create(self, validated_data): """ Create a Dataset object from deserialized and validated data. """ # The default behaviour for keep_file depends on the mode of creation. keep_file = True file_path = validated_data.get("external_path", "") efd = validated_data.get("externalfiledirectory", None) # Both or neither are specified (this is enforced in serializer validation). if file_path: file_path = os.path.join(efd.path, file_path) keep_file = False # don't retain a copy by default # Override the default if specified. keep_file = validated_data.get("save_in_db", keep_file) dataset = Dataset.create_dataset( is_uploaded=True, # Assume serializer is only used for uploads. file_path=file_path, user=self.context["request"].user, users_allowed=validated_data["users_allowed"], groups_allowed=validated_data["groups_allowed"], keep_file=keep_file, name=validated_data["name"], description=validated_data.get("description"), file_source=None, check=True, file_handle=validated_data.get( "dataset_file", None), # should be freshly opened so cursor is at start externalfiledirectory=efd) return dataset
def save_outputs(self, run): output_path = os.path.join(run.full_sandbox_path, 'output') upload_path = os.path.join(run.full_sandbox_path, 'upload') os.mkdir(upload_path) for argument in run.app.arguments.filter( type=ContainerArgument.OUTPUT): argument_path = os.path.join(output_path, argument.name) dataset_name = self.build_dataset_name(run, argument.name) new_argument_path = os.path.join(upload_path, dataset_name) try: os.rename(argument_path, new_argument_path) dataset = Dataset.create_dataset(new_argument_path, name=dataset_name, user=run.user) dataset.copy_permissions(run) run.datasets.create(dataset=dataset, argument=argument) except (OSError, IOError) as ex: if ex.errno != errno.ENOENT: raise logs_path = os.path.join(run.full_sandbox_path, 'logs') for file_name, log_type in (('stdout.txt', ContainerLog.STDOUT), ('stderr.txt', ContainerLog.STDERR)): run.load_log(os.path.join(logs_path, file_name), log_type) run.set_md5() run.state = (ContainerRun.COMPLETE if run.return_code == 0 else ContainerRun.FAILED) run.end_time = timezone.now()
def create_datasets(self, user): """ Creates the Datasets and the corresponding SymbolicDatasets in same order as cleaned_data["dataset_files"]. Will still save successful Datasets to database even if some of the Datasets fail to create. :return: None and a list of the created Dataset objects in the same order as cleaned_data["dataset_files"]. If particular Dataset failed to create, then the list element contains a dict that can be used to inform the user about the file. """ results = [] for file_size, uploaded_file in self.cleaned_data['dataset_file']: # Note that uploaded_file should be seek'd to the beginning. It was presumably # just opened so that should be OK but if this ever changes we will have to fix this. dataset = error_str = auto_name = None try: # TODO: use correct unique constraints name_prefix = "" if self.cleaned_data["name_prefix"]: name_prefix = self.cleaned_data["name_prefix"] + "_" auto_name = name_prefix + uploaded_file.name + "_" + datetime.now( ).strftime('%Y%m%d%H%M%S%f') if self.cleaned_data["description"]: auto_description = self.cleaned_data["description"] else: auto_description = "Bulk Uploaded File " + uploaded_file.name dataset = Dataset.create_dataset(is_uploaded=True, file_path=None, user=user, keep_file=True, name=auto_name, description=auto_description, file_source=None, check=True, file_handle=uploaded_file) dataset.grant_from_json(self.cleaned_data["permissions"]) except Exception as e: error_str = str(e) LOGGER.exception( "Error while creating Dataset for file with original file name=" + str(uploaded_file.name) + " and autogenerated Dataset name = " + str(auto_name)) if dataset and error_str is None: results.append(dataset) elif error_str and dataset is None: results.append({ "name": uploaded_file.name, "errstr": error_str, "size": file_size }) else: raise ValueError( "Invalid situation. Must either have a dataset or error. Can not have both or none." ) return None, results
def setUp(self): super(RawTests, self).setUp() self.addTypeEqualityFunc(str, self.assertMultiLineEqual) self.pipeline_raw = tools.make_first_pipeline( "raw noop", "a pipeline to do nothing to raw data", self.user_bob) tools.create_linear_pipeline(self.pipeline_raw, [self.method_noop_raw], "raw_in", "raw_out") self.pipeline_raw.create_outputs() self.dataset_raw = Dataset.create_dataset( "/usr/share/dict/words", user=self.user_bob, cdt=None, keep_file=True, name="raw", description="some raw data" )
def build(self): user = User.objects.first() assert user is not None input_path = os.path.abspath( os.path.join( __file__, '../../../../../samplecode/singularity/host_input/example_names.csv' )) family = ContainerFamily.objects.create(name='fixture family', user=user) container_path = os.path.abspath( os.path.join( __file__, '../../../../../samplecode/singularity/python2-alpine-trimmed.simg' )) with open(container_path, "rb") as f: container_md5 = compute_md5(f) container = family.containers.create( tag='vFixture', user=user, file='Containers/kive-default.simg', md5=container_md5) app = container.apps.create() arg1 = app.arguments.create(type=ContainerArgument.INPUT, name='names_csv', position=1) app.arguments.create(type=ContainerArgument.OUTPUT, name='greetings_csv', position=2) dataset = Dataset.create_dataset(input_path, name='names.csv', user=user) run = app.runs.create(name='fixture run', user=user) run.sandbox_path = "" # blank this out as it won't be accessible in testing anyway run.slurm_job_id = None # this also would cause tests to fail on a fresh system run.save(schedule=False) # scheduling would overwrite sandbox_path run.datasets.create(argument=arg1, dataset=dataset) upload_path = os.path.join(settings.MEDIA_ROOT, Container.UPLOAD_DIR) readme_path = os.path.join(upload_path, 'README.md') os.makedirs(upload_path) with open(readme_path, 'w') as f: f.write('Just a placeholder to create the folder for containers.')
def _save_output_argument( self, run: ContainerRun, argument: ContainerArgument, output_path: str, upload_path: str, ): argument_path = os.path.join(output_path, argument.name) dataset_name = self.build_dataset_name(run, argument.name) new_argument_path = os.path.join(upload_path, dataset_name) try: os.rename(argument_path, new_argument_path) dataset = Dataset.create_dataset(new_argument_path, name=dataset_name, user=run.user) dataset.copy_permissions(run) run.datasets.create(dataset=dataset, argument=argument) except (OSError, IOError) as ex: if ex.errno != errno.ENOENT: raise
def create(self, validated_data): """ Create a Dataset object from deserialized and validated data. """ cdt = None if "structure" in validated_data: cdt = validated_data["structure"].get("compounddatatype", None) # The default behaviour for keep_file depends on the mode of creation. keep_file = True file_path = validated_data.get("external_path", "") efd = validated_data.get("externalfiledirectory", None) # Both or neither are specified (this is enforced in serializer validation). if file_path: file_path = os.path.join(efd.path, file_path) keep_file = False # don't retain a copy by default # Override the default if specified. keep_file = validated_data.get("save_in_db", keep_file) dataset = Dataset.create_dataset( is_uploaded=True, # Assume serializer is only used for uploads. file_path=file_path, user=self.context["request"].user, users_allowed=validated_data["users_allowed"], groups_allowed=validated_data["groups_allowed"], cdt=cdt, keep_file=keep_file, name=validated_data["name"], description=validated_data.get("description"), file_source=None, check=True, file_handle=validated_data.get("dataset_file", None), # should be freshly opened so cursor is at start externalfiledirectory=efd ) return dataset
def create_datasets(self, user): """ Creates the Datasets and the corresponding SymbolicDatasets in same order as cleaned_data["dataset_files"]. Will still save successful Datasets to database even if some of the Datasets fail to create. :return: CDT object and a list of the created Dataset objects in the same order as cleaned_data["dataset_files"]. If particular Dataset failed to create, then the list element contains a dict that can be used to inform the user about the file. """ compound_datatype_obj = None if self.cleaned_data['compound_datatype'] != CompoundDatatype.RAW_ID: compound_datatype_obj = CompoundDatatype.objects.get(pk=self.cleaned_data['compound_datatype']) results = [] for file_size, uploaded_file in self.cleaned_data['dataset_file']: # Note that uploaded_file should be seek'd to the beginning. It was presumably # just opened so that should be OK but if this ever changes we will have to fix this. dataset = error_str = auto_name = None try: # TODO: use correct unique constraints name_prefix = "" if self.cleaned_data["name_prefix"]: name_prefix = self.cleaned_data["name_prefix"] + "_" auto_name = name_prefix + uploaded_file.name + "_" + datetime.now().strftime('%Y%m%d%H%M%S%f') if self.cleaned_data["description"]: auto_description = self.cleaned_data["description"] else: auto_description = "Bulk Uploaded File " + uploaded_file.name dataset = Dataset.create_dataset( is_uploaded=True, file_path=None, user=user, cdt=compound_datatype_obj, keep_file=True, name=auto_name, description=auto_description, file_source=None, check=True, file_handle=uploaded_file ) dataset.grant_from_json(self.cleaned_data["permissions"]) except Exception as e: error_str = str(e) LOGGER.exception("Error while creating Dataset for file with original file name=" + str(uploaded_file.name) + " and autogenerated Dataset name = " + str(auto_name)) if dataset and error_str is None: results.append(dataset) elif error_str and dataset is None: results.append({"name": uploaded_file.name, "errstr": error_str, "size": file_size}) else: raise ValueError("Invalid situation. Must either have a dataset or error. Can not have both or none.") return compound_datatype_obj, results
# A dummy Datatype with a prototype. with tempfile.TemporaryFile() as f: f.write("""example,valid True,True true,False y,False n,False False,False false,false""" ) f.seek(0) proto_SD = Dataset.create_dataset( file_path=None, user=kive_user(), cdt=CompoundDatatype.objects.get(pk=CDTs.PROTOTYPE_PK), name="AlwaysTruePrototype", description="Prototype for dummy Datatype", file_handle=f ) always_true = Datatype( user=kive_user(), name="Python True", description="True in python", proto_SD=proto_SD ) always_true.save() always_true.restricts.add(Datatype.objects.get(pk=datatypes.BOOL_PK)) always_true.basic_constraints.create( ruletype=BasicConstraint.REGEXP,
prototype_CDT = CompoundDatatype.objects.get(pk=CDTs.PROTOTYPE_PK) # A dummy Datatype with a prototype. with tempfile.TemporaryFile() as f: f.write("""example,valid True,True true,False y,False n,False False,False false,false""") f.seek(0) proto_SD = Dataset.create_dataset( file_path=None, user=kive_user(), cdt=CompoundDatatype.objects.get(pk=CDTs.PROTOTYPE_PK), name="AlwaysTruePrototype", description="Prototype for dummy Datatype", file_handle=f) always_true = Datatype(user=kive_user(), name="Python True", description="True in python", proto_SD=proto_SD) always_true.save() always_true.restricts.add(Datatype.objects.get(pk=datatypes.BOOL_PK)) always_true.basic_constraints.create(ruletype=BasicConstraint.REGEXP, rule="True")
from django.core.files import File from django.contrib.auth.models import User import metadata.models from librarian.models import Dataset import method.models import kive.testing_utils as tools # This comes from the initial_user fixture. kive_user = User.objects.get(pk=1) test_fasta = Dataset.create_dataset( file_path="../samplecode/step_0_raw.fasta", user=kive_user, cdt=None, keep_file=True, name="TestFASTA", description="Toy FASTA file for testing pipelines" ) # Set up a test Pipeline. resource = method.models.CodeResource(name="Fasta2CSV", description="FASTA converter script", filename="Fasta2CSV.py") resource.clean() resource.save() with open("../samplecode/fasta2csv.py", "rb") as f: revision = method.models.CodeResourceRevision( coderesource=resource, revision_name="v1", revision_desc="First version", content_file=File(f)) revision.clean()
from django.core.files import File from django.contrib.auth.models import User import metadata.models from librarian.models import Dataset import method.models import kive.testing_utils as tools # This comes from the initial_user fixture. kive_user = User.objects.get(pk=1) test_fasta = Dataset.create_dataset( file_path="../samplecode/step_0_raw.fasta", user=kive_user, cdt=None, keep_file=True, name="TestFASTA", description="Toy FASTA file for testing pipelines") # Set up a test Pipeline. resource = method.models.CodeResource(name="Fasta2CSV", description="FASTA converter script", filename="Fasta2CSV.py") resource.clean() resource.save() with open("../samplecode/fasta2csv.py", "rb") as f: revision = method.models.CodeResourceRevision( coderesource=resource, revision_name="v1", revision_desc="First version",