Exemple #1
0
def set_meta_with_tool_provided(dataset_instance, file_dict, set_meta_kwds,
                                datatypes_registry, max_metadata_value_size):
    # This method is somewhat odd, in that we set the metadata attributes from tool,
    # then call set_meta, then set metadata attributes from tool again.
    # This is intentional due to interplay of overwrite kwd, the fact that some metadata
    # parameters may rely on the values of others, and that we are accepting the
    # values provided by the tool as Truth.
    extension = dataset_instance.extension
    if extension == "_sniff_":
        try:
            from galaxy.datatypes import sniff
            extension = sniff.handle_uploaded_dataset_file(
                dataset_instance.dataset.external_filename, datatypes_registry)
            # We need to both set the extension so it is available to set_meta
            # and record it in the metadata so it can be reloaded on the server
            # side and the model updated (see MetadataCollection.{from,to}_JSON_dict)
            dataset_instance.extension = extension
            # Set special metadata property that will reload this on server side.
            setattr(dataset_instance.metadata, "__extension__", extension)
        except Exception:
            log.exception("Problem sniffing datatype.")

    for metadata_name, metadata_value in file_dict.get('metadata', {}).items():
        setattr(dataset_instance.metadata, metadata_name, metadata_value)
    dataset_instance.datatype.set_meta(dataset_instance, **set_meta_kwds)
    for metadata_name, metadata_value in file_dict.get('metadata', {}).items():
        setattr(dataset_instance.metadata, metadata_name, metadata_value)

    if max_metadata_value_size:
        for k, v in list(dataset_instance.metadata.items()):
            if total_size(v) > max_metadata_value_size:
                log.info("Key %s too large for metadata, discarding" % k)
                dataset_instance.metadata.remove_key(k)
Exemple #2
0
def set_metadata():
    # locate galaxy_root for loading datatypes
    galaxy_root = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, os.pardir))
    galaxy.datatypes.metadata.MetadataTempFile.tmp_dir = tool_job_working_directory = os.path.abspath(os.getcwd())

    # This is ugly, but to transition from existing jobs without this parameter
    # to ones with, smoothly, it has to be the last optional parameter and we
    # have to sniff it.
    try:
        max_metadata_value_size = int(sys.argv[-1])
        sys.argv = sys.argv[:-1]
    except ValueError:
        max_metadata_value_size = 0
        # max_metadata_value_size is unspecified and should be 0

    # Set up datatypes registry
    datatypes_config = sys.argv.pop(1)
    datatypes_registry = galaxy.datatypes.registry.Registry()
    datatypes_registry.load_datatypes(root_dir=galaxy_root, config=datatypes_config)
    galaxy.model.set_datatypes_registry(datatypes_registry)

    job_metadata = sys.argv.pop(1)
    existing_job_metadata_dict = {}
    new_job_metadata_dict = {}
    if job_metadata != "None" and os.path.exists(job_metadata):
        for line in open(job_metadata, "r"):
            try:
                line = stringify_dictionary_keys(json.loads(line))
                if line["type"] == "dataset":
                    existing_job_metadata_dict[line["dataset_id"]] = line
                elif line["type"] == "new_primary_dataset":
                    new_job_metadata_dict[line["filename"]] = line
            except:
                continue

    for filenames in sys.argv[1:]:
        fields = filenames.split(",")
        filename_in = fields.pop(0)
        filename_kwds = fields.pop(0)
        filename_out = fields.pop(0)
        filename_results_code = fields.pop(0)
        dataset_filename_override = fields.pop(0)
        # Need to be careful with the way that these parameters are populated from the filename splitting,
        # because if a job is running when the server is updated, any existing external metadata command-lines
        # will not have info about the newly added override_metadata file
        if fields:
            override_metadata = fields.pop(0)
        else:
            override_metadata = None
        set_meta_kwds = stringify_dictionary_keys(
            json.load(open(filename_kwds))
        )  # load kwds; need to ensure our keywords are not unicode
        try:
            dataset = cPickle.load(open(filename_in))  # load DatasetInstance
            dataset.dataset.external_filename = dataset_filename_override
            files_path = os.path.abspath(
                os.path.join(tool_job_working_directory, "dataset_%s_files" % (dataset.dataset.id))
            )
            dataset.dataset.external_extra_files_path = files_path
            if dataset.dataset.id in existing_job_metadata_dict:
                dataset.extension = existing_job_metadata_dict[dataset.dataset.id].get("ext", dataset.extension)
            # Metadata FileParameter types may not be writable on a cluster node, and are therefore temporarily substituted with MetadataTempFiles
            if override_metadata:
                override_metadata = json.load(open(override_metadata))
                for metadata_name, metadata_file_override in override_metadata:
                    if galaxy.datatypes.metadata.MetadataTempFile.is_JSONified_value(metadata_file_override):
                        metadata_file_override = galaxy.datatypes.metadata.MetadataTempFile.from_JSON(
                            metadata_file_override
                        )
                    setattr(dataset.metadata, metadata_name, metadata_file_override)
            file_dict = existing_job_metadata_dict.get(dataset.dataset.id, {})
            set_meta_with_tool_provided(dataset, file_dict, set_meta_kwds, datatypes_registry)
            if max_metadata_value_size:
                for k, v in dataset.metadata.items():
                    if total_size(v) > max_metadata_value_size:
                        log.info("Key %s too large for metadata, discarding" % k)
                        dataset.metadata.remove_key(k)
            dataset.metadata.to_JSON_dict(filename_out)  # write out results of set_meta
            json.dump(
                (True, "Metadata has been set successfully"), open(filename_results_code, "wb+")
            )  # setting metadata has succeeded
        except Exception, e:
            json.dump((False, str(e)), open(filename_results_code, "wb+"))  # setting metadata has failed somehow
def set_metadata():
    # locate galaxy_root for loading datatypes
    galaxy_root = os.path.abspath(
        os.path.join(os.path.dirname(__file__), os.pardir, os.pardir,
                     os.pardir))
    import galaxy.model
    galaxy.model.metadata.MetadataTempFile.tmp_dir = tool_job_working_directory = os.path.abspath(
        os.getcwd())

    # This is ugly, but to transition from existing jobs without this parameter
    # to ones with, smoothly, it has to be the last optional parameter and we
    # have to sniff it.
    try:
        max_metadata_value_size = int(sys.argv[-1])
        sys.argv = sys.argv[:-1]
    except ValueError:
        max_metadata_value_size = 0
        # max_metadata_value_size is unspecified and should be 0

    # Set up datatypes registry
    datatypes_config = sys.argv.pop(1)
    if not os.path.exists(datatypes_config):
        # This path should exist, except for jobs that started running on release 17.05, where a global
        # datatypes_config (instead of a datatypes_config per job) was used. For a while release 17.05
        # would remove the global datatypes config on shutdown and toolbox reload, which would lead to
        # failed metadata jobs. To remedy this we scan jobs at startup for missing registry.xml files,
        # and if we detect such a job we write out the current registry.xml file.
        datatypes_config = os.path.join(tool_job_working_directory,
                                        "registry.xml")
        if not os.path.exists(datatypes_config):
            print(
                "Metadata setting failed because registry.xml could not be found. You may retry setting metadata."
            )
            sys.exit(1)
    import galaxy.datatypes.registry
    datatypes_registry = galaxy.datatypes.registry.Registry()
    datatypes_registry.load_datatypes(root_dir=galaxy_root,
                                      config=datatypes_config)
    galaxy.model.set_datatypes_registry(datatypes_registry)

    job_metadata = sys.argv.pop(1)
    existing_job_metadata_dict = {}
    new_job_metadata_dict = {}
    if job_metadata != "None" and os.path.exists(job_metadata):
        for line in open(job_metadata, 'r'):
            try:
                line = stringify_dictionary_keys(json.loads(line))
                if line['type'] == 'dataset':
                    existing_job_metadata_dict[line['dataset_id']] = line
                elif line['type'] == 'new_primary_dataset':
                    new_job_metadata_dict[line['filename']] = line
            except Exception:
                continue

    for filenames in sys.argv[1:]:
        fields = filenames.split(',')
        filename_in = fields.pop(0)
        filename_kwds = fields.pop(0)
        filename_out = fields.pop(0)
        filename_results_code = fields.pop(0)
        dataset_filename_override = fields.pop(0)
        # Need to be careful with the way that these parameters are populated from the filename splitting,
        # because if a job is running when the server is updated, any existing external metadata command-lines
        # will not have info about the newly added override_metadata file
        if fields:
            override_metadata = fields.pop(0)
        else:
            override_metadata = None
        set_meta_kwds = stringify_dictionary_keys(
            json.load(open(filename_kwds))
        )  # load kwds; need to ensure our keywords are not unicode
        try:
            dataset = cPickle.load(open(filename_in,
                                        'rb'))  # load DatasetInstance
            dataset.dataset.external_filename = dataset_filename_override
            files_path = os.path.abspath(
                os.path.join(tool_job_working_directory,
                             "dataset_%s_files" % (dataset.dataset.id)))
            dataset.dataset.external_extra_files_path = files_path
            if dataset.dataset.id in existing_job_metadata_dict:
                dataset.extension = existing_job_metadata_dict[
                    dataset.dataset.id].get('ext', dataset.extension)
            # Metadata FileParameter types may not be writable on a cluster node, and are therefore temporarily substituted with MetadataTempFiles
            if override_metadata:
                override_metadata = json.load(open(override_metadata))
                for metadata_name, metadata_file_override in override_metadata:
                    if galaxy.datatypes.metadata.MetadataTempFile.is_JSONified_value(
                            metadata_file_override):
                        metadata_file_override = galaxy.datatypes.metadata.MetadataTempFile.from_JSON(
                            metadata_file_override)
                    setattr(dataset.metadata, metadata_name,
                            metadata_file_override)
            file_dict = existing_job_metadata_dict.get(dataset.dataset.id, {})
            set_meta_with_tool_provided(dataset, file_dict, set_meta_kwds,
                                        datatypes_registry)
            if max_metadata_value_size:
                for k, v in list(dataset.metadata.items()):
                    if total_size(v) > max_metadata_value_size:
                        log.info("Key %s too large for metadata, discarding" %
                                 k)
                        dataset.metadata.remove_key(k)
            dataset.metadata.to_JSON_dict(
                filename_out)  # write out results of set_meta
            json.dump((True, 'Metadata has been set successfully'),
                      open(filename_results_code,
                           'wt+'))  # setting metadata has succeeded
        except Exception as e:
            json.dump((False, str(e)),
                      open(filename_results_code,
                           'wt+'))  # setting metadata has failed somehow

    for i, (filename, file_dict) in enumerate(new_job_metadata_dict.items(),
                                              start=1):
        new_dataset_filename = os.path.join(tool_job_working_directory,
                                            "working", file_dict['filename'])
        new_dataset = galaxy.model.Dataset(
            id=-i, external_filename=new_dataset_filename)
        extra_files = file_dict.get('extra_files', None)
        if extra_files is not None:
            new_dataset._extra_files_path = os.path.join(
                tool_job_working_directory, "working", extra_files)
        new_dataset.state = new_dataset.states.OK
        new_dataset_instance = galaxy.model.HistoryDatasetAssociation(
            id=-i, dataset=new_dataset, extension=file_dict.get('ext', 'data'))
        set_meta_with_tool_provided(new_dataset_instance, file_dict,
                                    set_meta_kwds, datatypes_registry)
        file_dict['metadata'] = json.loads(
            new_dataset_instance.metadata.to_JSON_dict()
        )  # storing metadata in external form, need to turn back into dict, then later jsonify
    if existing_job_metadata_dict or new_job_metadata_dict:
        with open(job_metadata, 'wt') as job_metadata_fh:
            for value in list(existing_job_metadata_dict.values()) + list(
                    new_job_metadata_dict.values()):
                job_metadata_fh.write("%s\n" % (json.dumps(value)))

    clear_mappers()
Exemple #4
0
def set_metadata():
    # locate galaxy_root for loading datatypes
    galaxy_root = os.path.abspath(
        os.path.join(os.path.dirname(__file__), os.pardir, os.pardir,
                     os.pardir))
    galaxy.datatypes.metadata.MetadataTempFile.tmp_dir = tool_job_working_directory = os.path.abspath(
        os.getcwd())

    # This is ugly, but to transition from existing jobs without this parameter
    # to ones with, smoothly, it has to be the last optional parameter and we
    # have to sniff it.
    try:
        max_metadata_value_size = int(sys.argv[-1])
        sys.argv = sys.argv[:-1]
    except ValueError:
        max_metadata_value_size = 0
        # max_metadata_value_size is unspecified and should be 0

    # Set up datatypes registry
    datatypes_config = sys.argv.pop(1)
    datatypes_registry = galaxy.datatypes.registry.Registry()
    datatypes_registry.load_datatypes(root_dir=galaxy_root,
                                      config=datatypes_config)
    galaxy.model.set_datatypes_registry(datatypes_registry)

    job_metadata = sys.argv.pop(1)
    existing_job_metadata_dict = {}
    new_job_metadata_dict = {}
    if job_metadata != "None" and os.path.exists(job_metadata):
        for line in open(job_metadata, 'r'):
            try:
                line = stringify_dictionary_keys(json.loads(line))
                if line['type'] == 'dataset':
                    existing_job_metadata_dict[line['dataset_id']] = line
                elif line['type'] == 'new_primary_dataset':
                    new_job_metadata_dict[line['filename']] = line
            except:
                continue

    for filenames in sys.argv[1:]:
        fields = filenames.split(',')
        filename_in = fields.pop(0)
        filename_kwds = fields.pop(0)
        filename_out = fields.pop(0)
        filename_results_code = fields.pop(0)
        dataset_filename_override = fields.pop(0)
        # Need to be careful with the way that these parameters are populated from the filename splitting,
        # because if a job is running when the server is updated, any existing external metadata command-lines
        # will not have info about the newly added override_metadata file
        if fields:
            override_metadata = fields.pop(0)
        else:
            override_metadata = None
        set_meta_kwds = stringify_dictionary_keys(
            json.load(open(filename_kwds))
        )  # load kwds; need to ensure our keywords are not unicode
        try:
            dataset = cPickle.load(open(filename_in))  # load DatasetInstance
            dataset.dataset.external_filename = dataset_filename_override
            files_path = os.path.abspath(
                os.path.join(tool_job_working_directory,
                             "dataset_%s_files" % (dataset.dataset.id)))
            dataset.dataset.external_extra_files_path = files_path
            if dataset.dataset.id in existing_job_metadata_dict:
                dataset.extension = existing_job_metadata_dict[
                    dataset.dataset.id].get('ext', dataset.extension)
            # Metadata FileParameter types may not be writable on a cluster node, and are therefore temporarily substituted with MetadataTempFiles
            if override_metadata:
                override_metadata = json.load(open(override_metadata))
                for metadata_name, metadata_file_override in override_metadata:
                    if galaxy.datatypes.metadata.MetadataTempFile.is_JSONified_value(
                            metadata_file_override):
                        metadata_file_override = galaxy.datatypes.metadata.MetadataTempFile.from_JSON(
                            metadata_file_override)
                    setattr(dataset.metadata, metadata_name,
                            metadata_file_override)
            file_dict = existing_job_metadata_dict.get(dataset.dataset.id, {})
            set_meta_with_tool_provided(dataset, file_dict, set_meta_kwds,
                                        datatypes_registry)
            if max_metadata_value_size:
                for k, v in dataset.metadata.items():
                    if total_size(v) > max_metadata_value_size:
                        log.info("Key %s too large for metadata, discarding" %
                                 k)
                        dataset.metadata.remove_key(k)
            dataset.metadata.to_JSON_dict(
                filename_out)  # write out results of set_meta
            json.dump((True, 'Metadata has been set successfully'),
                      open(filename_results_code,
                           'wb+'))  # setting metadata has succeeded
        except Exception, e:
            json.dump((False, str(e)),
                      open(filename_results_code,
                           'wb+'))  # setting metadata has failed somehow
Exemple #5
0
def set_metadata():
    # locate galaxy_root for loading datatypes
    galaxy_root = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, os.pardir))
    galaxy.datatypes.metadata.MetadataTempFile.tmp_dir = tool_job_working_directory = os.path.abspath(os.getcwd())

    # This is ugly, but to transition from existing jobs without this parameter
    # to ones with, smoothly, it has to be the last optional parameter and we
    # have to sniff it.
    try:
        max_metadata_value_size = int(sys.argv[-1])
        sys.argv = sys.argv[:-1]
    except ValueError:
        max_metadata_value_size = 0
        # max_metadata_value_size is unspecified and should be 0

    # Set up datatypes registry
    datatypes_config = sys.argv.pop(1)
    if not os.path.exists(datatypes_config):
        # This path should exist, except for jobs that started running on release 17.05, where a global
        # datatypes_config (instead of a datatypes_config per job) was used. For a while release 17.05
        # would remove the global datatypes config on shutdown and toolbox reload, which would lead to
        # failed metadata jobs. To remedy this we scan jobs at startup for missing registry.xml files,
        # and if we detect such a job we write out the current registry.xml file.
        datatypes_config = os.path.join(tool_job_working_directory, "registry.xml")
        if not os.path.exists(datatypes_config):
            print("Metadata setting failed because registry.xml could not be found. You may retry setting metadata.")
            sys.exit(1)
    datatypes_registry = galaxy.datatypes.registry.Registry()
    datatypes_registry.load_datatypes(root_dir=galaxy_root, config=datatypes_config)
    galaxy.model.set_datatypes_registry(datatypes_registry)

    job_metadata = sys.argv.pop(1)
    existing_job_metadata_dict = {}
    new_job_metadata_dict = {}
    if job_metadata != "None" and os.path.exists(job_metadata):
        for line in open(job_metadata, 'r'):
            try:
                line = stringify_dictionary_keys(json.loads(line))
                if line['type'] == 'dataset':
                    existing_job_metadata_dict[line['dataset_id']] = line
                elif line['type'] == 'new_primary_dataset':
                    new_job_metadata_dict[line['filename']] = line
            except:
                continue

    for filenames in sys.argv[1:]:
        fields = filenames.split(',')
        filename_in = fields.pop(0)
        filename_kwds = fields.pop(0)
        filename_out = fields.pop(0)
        filename_results_code = fields.pop(0)
        dataset_filename_override = fields.pop(0)
        # Need to be careful with the way that these parameters are populated from the filename splitting,
        # because if a job is running when the server is updated, any existing external metadata command-lines
        # will not have info about the newly added override_metadata file
        if fields:
            override_metadata = fields.pop(0)
        else:
            override_metadata = None
        set_meta_kwds = stringify_dictionary_keys(json.load(open(filename_kwds)))  # load kwds; need to ensure our keywords are not unicode
        try:
            dataset = cPickle.load(open(filename_in))  # load DatasetInstance
            dataset.dataset.external_filename = dataset_filename_override
            files_path = os.path.abspath(os.path.join(tool_job_working_directory, "dataset_%s_files" % (dataset.dataset.id)))
            dataset.dataset.external_extra_files_path = files_path
            if dataset.dataset.id in existing_job_metadata_dict:
                dataset.extension = existing_job_metadata_dict[dataset.dataset.id].get('ext', dataset.extension)
            # Metadata FileParameter types may not be writable on a cluster node, and are therefore temporarily substituted with MetadataTempFiles
            if override_metadata:
                override_metadata = json.load(open(override_metadata))
                for metadata_name, metadata_file_override in override_metadata:
                    if galaxy.datatypes.metadata.MetadataTempFile.is_JSONified_value(metadata_file_override):
                        metadata_file_override = galaxy.datatypes.metadata.MetadataTempFile.from_JSON(metadata_file_override)
                    setattr(dataset.metadata, metadata_name, metadata_file_override)
            file_dict = existing_job_metadata_dict.get(dataset.dataset.id, {})
            set_meta_with_tool_provided(dataset, file_dict, set_meta_kwds, datatypes_registry)
            if max_metadata_value_size:
                for k, v in list(dataset.metadata.items()):
                    if total_size(v) > max_metadata_value_size:
                        log.info("Key %s too large for metadata, discarding" % k)
                        dataset.metadata.remove_key(k)
            dataset.metadata.to_JSON_dict(filename_out)  # write out results of set_meta
            json.dump((True, 'Metadata has been set successfully'), open(filename_results_code, 'wb+'))  # setting metadata has succeeded
        except Exception as e:
            json.dump((False, str(e)), open(filename_results_code, 'wb+'))  # setting metadata has failed somehow

    for i, (filename, file_dict) in enumerate(new_job_metadata_dict.items(), start=1):
        new_dataset_filename = os.path.join(tool_job_working_directory, "working", file_dict['filename'])
        new_dataset = galaxy.model.Dataset(id=-i, external_filename=new_dataset_filename)
        extra_files = file_dict.get('extra_files', None)
        if extra_files is not None:
            new_dataset._extra_files_path = os.path.join(tool_job_working_directory, "working", extra_files)
        new_dataset.state = new_dataset.states.OK
        new_dataset_instance = galaxy.model.HistoryDatasetAssociation(id=-i, dataset=new_dataset, extension=file_dict.get('ext', 'data'))
        set_meta_with_tool_provided(new_dataset_instance, file_dict, set_meta_kwds, datatypes_registry)
        file_dict['metadata'] = json.loads(new_dataset_instance.metadata.to_JSON_dict())  # storing metadata in external form, need to turn back into dict, then later jsonify
    if existing_job_metadata_dict or new_job_metadata_dict:
        with open(job_metadata, 'wb') as job_metadata_fh:
            for value in list(existing_job_metadata_dict.values()) + list(new_job_metadata_dict.values()):
                job_metadata_fh.write("%s\n" % (json.dumps(value)))

    clear_mappers()