Example #1
0
    def RegisterArtifact(self,
                         artifact_rdfvalue,
                         source="datastore",
                         overwrite_if_exists=False,
                         overwrite_system_artifacts=False):
        """Registers a new artifact."""
        artifact_name = artifact_rdfvalue.name
        if artifact_name in self._artifacts:
            if not overwrite_if_exists:
                details = "artifact already exists and `overwrite_if_exists` is unset"
                raise rdf_artifacts.ArtifactDefinitionError(
                    artifact_name, details)
            elif not overwrite_system_artifacts:
                artifact_obj = self._artifacts[artifact_name]
                if not artifact_obj.loaded_from.startswith("datastore:"):
                    # This artifact was not uploaded to the datastore but came from a
                    # file, refuse to overwrite.
                    details = "system artifact cannot be overwritten"
                    raise rdf_artifacts.ArtifactDefinitionError(
                        artifact_name, details)

        # Preserve where the artifact was loaded from to help debugging.
        artifact_rdfvalue.loaded_from = source
        # Clear any stale errors.
        artifact_rdfvalue.error_message = None
        self._artifacts[artifact_rdfvalue.name] = artifact_rdfvalue
Example #2
0
    def ArtifactsFromYaml(self, yaml_content):
        """Get a list of Artifacts from yaml."""
        raw_list = list(yaml.safe_load_all(yaml_content))

        # TODO(hanuszczak): I am very sceptical about that "doing the right thing"
        # below. What are the real use cases?

        # Try to do the right thing with json/yaml formatted as a list.
        if (isinstance(raw_list, list) and len(raw_list) == 1
                and isinstance(raw_list[0], list)):
            raw_list = raw_list[0]

        # Convert json into artifact and validate.
        valid_artifacts = []
        for artifact_dict in raw_list:
            # In this case we are feeding parameters directly from potentially
            # untrusted yaml/json to our RDFValue class. However, safe_load ensures
            # these are all primitive types as long as there is no other
            # deserialization involved, and we are passing these into protobuf
            # primitive types.
            try:
                artifact_value = rdf_artifacts.Artifact(**artifact_dict)
                valid_artifacts.append(artifact_value)
            except (TypeError, AttributeError, type_info.TypeValueError) as e:
                name = artifact_dict.get("name")
                raise rdf_artifacts.ArtifactDefinitionError(
                    name, "invalid definition", cause=e)

        return valid_artifacts
Example #3
0
    def ArtifactsFromYaml(self, yaml_content):
        """Get a list of Artifacts from yaml."""
        raw_list = yaml.ParseMany(yaml_content)

        # TODO(hanuszczak): I am very sceptical about that "doing the right thing"
        # below. What are the real use cases?

        # Try to do the right thing with json/yaml formatted as a list.
        if (isinstance(raw_list, list) and len(raw_list) == 1
                and isinstance(raw_list[0], list)):
            raw_list = raw_list[0]

        # Convert json into artifact and validate.
        valid_artifacts = []
        for artifact_dict in raw_list:
            # Old artifacts might still use deprecated fields, so we have to ignore
            # such. Here, we simply delete keys from the dictionary as otherwise the
            # RDF value constructor would raise on unknown fields.
            for field in DEPRECATED_ARTIFACT_FIELDS:
                artifact_dict.pop(field, None)

            # In this case we are feeding parameters directly from potentially
            # untrusted yaml/json to our RDFValue class. However, safe_load ensures
            # these are all primitive types as long as there is no other
            # deserialization involved, and we are passing these into protobuf
            # primitive types.
            try:
                artifact_value = rdf_artifacts.Artifact(**artifact_dict)
                valid_artifacts.append(artifact_value)
            except (TypeError, AttributeError, type_info.TypeValueError) as e:
                name = artifact_dict.get("name")
                raise rdf_artifacts.ArtifactDefinitionError(
                    name, "invalid definition", cause=e)

        return valid_artifacts
Example #4
0
    def _LoadArtifactsFromDatastore(self):
        """Load artifacts from the data store."""
        loaded_artifacts = []

        # TODO(hanuszczak): Why do we have to remove anything? If some artifact
        # tries to shadow system artifact shouldn't we just ignore them and perhaps
        # issue some warning instead? The datastore being loaded should be read-only
        # during upload.

        # A collection of artifacts that shadow system artifacts and need
        # to be deleted from the data store.
        to_delete = []

        artifact_list = []

        if data_store.RelationalDBEnabled():
            artifact_list = data_store.REL_DB.ReadAllArtifacts()
        else:
            for artifact_coll_urn in self._sources.GetDatastores():
                artifact_list.extend(ArtifactCollection(artifact_coll_urn))

        for artifact_value in artifact_list:
            try:
                self.RegisterArtifact(artifact_value,
                                      source="datastore:",
                                      overwrite_if_exists=True)
                loaded_artifacts.append(artifact_value)
            except rdf_artifacts.ArtifactDefinitionError as e:
                # TODO(hanuszczak): String matching on exception message is rarely
                # a good idea. Instead this should be refectored to some exception
                # class and then handled separately.
                if "system artifact" in str(e):
                    to_delete.append(artifact_value.name)
                else:
                    raise

        if to_delete:
            DeleteArtifactsFromDatastore(to_delete, reload_artifacts=False)
            self._dirty = True

            # TODO(hanuszczak): This is connected to the previous TODO comment. Why
            # do we throw exception at this point? Why do we delete something and then
            # abort the whole upload procedure by throwing an exception?
            detail = "system artifacts were shadowed and had to be deleted"
            raise rdf_artifacts.ArtifactDefinitionError(to_delete, detail)

        # Once all artifacts are loaded we can validate.
        revalidate = True
        while revalidate:
            revalidate = False
            for artifact_obj in loaded_artifacts[:]:
                try:
                    Validate(artifact_obj)
                except rdf_artifacts.ArtifactDefinitionError as e:
                    logging.error("Artifact %s did not validate: %s",
                                  artifact_obj.name, e)
                    artifact_obj.error_message = utils.SmartStr(e)
                    loaded_artifacts.remove(artifact_obj)
                    revalidate = True