Esempio n. 1
0
 def load_data(self, data_folder):
     # there's one vcf file there, let's get it
     input_file = glob.glob(os.path.join(data_folder, "*.vcf"))
     if len(input_file) != 1:
         raise uploader.ResourceError(
             "Expecting only one VCF file, got: %s" % input_file)
     input_file = input_file.pop()
     self.logger.info("Load data from file '%s'" % input_file)
     return load_data(input_file)
Esempio n. 2
0
 def load_data(self, data_folder):
     files = glob.glob(
         os.path.join(data_folder, "exomes", self.__class__.GLOB_PATTERN))
     self.logger.info(
         "papapap %s" %
         os.path.join(data_folder, "exomes", self.__class__.GLOB_PATTERN))
     if len(files) != 1:
         raise uploader.ResourceError(
             "Expecting only one VCF file, got: %s" % files)
     input_file = files.pop()
     assert os.path.exists(
         "%s%s" % (input_file, self.__class__.tbi_suffix)), "%s%s" % (
             input_file, self.__class__.tbi_suffix)
     self.logger.info("Load data from file '%s'" % input_file)
     res = load_data_exomes(input_file)
     return res
Esempio n. 3
0
    def restore_snapshot(self, build_meta, job_manager, **kwargs):
        idxr = self.target_backend.target_esidxer
        # first check if snapshot repo exists
        repo_name, repo_settings = list(
            build_meta["metadata"]["repository"].items())[0]
        # do we need to enrich with some credentials ? (there are part of repo creation JSON settings)
        if repo_settings.get(
                "type") == "s3" and btconfig.STANDALONE_AWS_CREDENTIALS.get(
                    "AWS_ACCESS_KEY_ID"):
            repo_settings["settings"][
                "access_key"] = btconfig.STANDALONE_AWS_CREDENTIALS[
                    "AWS_ACCESS_KEY_ID"]
            repo_settings["settings"][
                "secret_key"] = btconfig.STANDALONE_AWS_CREDENTIALS[
                    "AWS_SECRET_ACCESS_KEY"]
            repo_settings["settings"]["readonly"] = True
        try:
            repo = idxr.get_repository(repo_name)
            # ok it exists, check if settings are the same
            if repo[repo_name] != repo_settings:
                # different, raise exception so it's handles in the except
                self.logger.info(
                    "Repository '%s' was found but settings are different, it needs to be created again"
                    % repo_name)
                self.logger.debug("Existing setting: %s" % repo[repo_name])
                self.logger.debug("Required (new) setting: %s" % repo_settings)
                raise IndexerException
        except IndexerException:
            # ok, it doesn't exist let's try to create it
            try:
                repo = idxr.create_repository(repo_name, repo_settings)
            except IndexerException as e:
                if repo_settings["settings"].get("url"):
                    raise uploader.ResourceError("Could not create snapshot repository. Check elasticsearch.yml configuration " + \
                            "file, you should have a line like this: " + \
                            'repositories.url.allowed_urls: "%s*" ' % repo_settings["settings"]["url"] + \
                            "allowing snapshot to be restored from this URL. Error was: %s" % e)
                else:
                    # try to create repo without key/secret, assuming it's already configured in ES keystore
                    if repo_settings["settings"].get("access_key"):
                        repo_settings["settings"].pop("access_key")
                        repo_settings["settings"].pop("secret_key")
                        try:
                            repo = idxr.create_repository(
                                repo_name, repo_settings)
                        except IndexerException as e:
                            raise uploader.ResourceError("Could not create snapshot repository, even assuming " + \
                                    "credentials configured in keystore: %s" % e)
                    else:
                        raise uploader.ResourceError(
                            "Could not create snapshot repository: %s" % e)

        # repository is now ready, let's trigger the restore
        snapshot_name = build_meta["metadata"]["snapshot_name"]
        pinfo = self.get_pinfo()
        pinfo["step"] = "restore"
        pinfo["description"] = snapshot_name

        def get_status_info():
            try:
                res = idxr.get_restore_status(idxr._index)
                return res
            except Exception as e:
                # somethng went wrong, report as failure
                return "FAILED %s" % e

        def restore_launched(f):
            try:
                self.logger.info("Restore launched: %s" % f.result())
            except Exception as e:
                self.logger.error("Error while lauching restore: %s" % e)
                raise e

        self.logger.info("Restoring snapshot '%s' to index '%s' on host '%s'" %
                         (snapshot_name, idxr._index, idxr.es_host))
        job = yield from job_manager.defer_to_thread(
            pinfo,
            partial(idxr.restore,
                    repo_name,
                    snapshot_name,
                    idxr._index,
                    purge=self.__class__.AUTO_PURGE_INDEX))
        job.add_done_callback(restore_launched)
        yield from job
        while True:
            status_info = get_status_info()
            status = status_info["status"]
            self.logger.info("Recovery status for index '%s': %s" %
                             (idxr._index, status_info))
            if status in ["INIT", "IN_PROGRESS"]:
                yield from asyncio.sleep(
                    getattr(btconfig, "MONITOR_SNAPSHOT_DELAY", 60))
            else:
                if status == "DONE":
                    self.logger.info("Snapshot '%s' successfully restored to index '%s' (host: '%s')" % \
                            (snapshot_name,idxr._index,idxr.es_host),extra={"notify":True})
                else:
                    e = uploader.ResourceError("Failed to restore snapshot '%s' on index '%s', status: %s" % \
                            (snapshot_name,idxr._index,status))
                    self.logger.error(e)
                    raise e
                break
        # return current number of docs in index
        return self.target_backend.count()
Esempio n. 4
0
    def restore_snapshot(self, build_meta, job_manager, **kwargs):
        self.logger.debug("Restoring snapshot...")
        idxr = self.target_backend.target_esidxer
        es_host = idxr.es_host
        self.logger.debug("Got ES Host: %s", es_host)
        repo_name, repo_settings = self.get_snapshot_repository_config(
            build_meta)
        self.logger.debug("Got repo name: %s", repo_name)
        self.logger.debug("With settings: %s", repo_settings)
        # pull authentication settings from config
        auth = btconfig.STANDALONE_CONFIG.get(self.name, {}).get(
            'auth', btconfig.STANDALONE_CONFIG['_default'].get('auth')
        )
        if auth:
            self.logger.debug("Obtained Auth settings, using them.")
        else:
            self.logger.debug("No Auth settings found")

        # all restore repos should be r/o
        repo_settings["settings"]["readonly"] = True

        # populate additional settings
        additional_settings = btconfig.STANDALONE_CONFIG.get(self.name, {}).get(
            'repo_settings', btconfig.STANDALONE_CONFIG['_default'].get('repo_settings')
        )
        if additional_settings:
            self.logger.debug("Adding additional settings: %s", additional_settings)
            repo_settings['settings'].update(additional_settings)

        if 'client' not in repo_settings['settings']:
            self.logger.warning(
                "\"client\" not set in repository settings. The 'default' "
                "client will be used."
            )
            self.logger.warning(
                "Make sure keys are in the Elasticsearch keystore. "
                "If you are trying to work with EOL versions of "
                "Elasticsearch, or if you intentionally enabled "
                "allow_insecure_settings, set \"access_key\", \"secret_key\","
                " and potentially \"region\" in additional 'repo_settings'."
            )

        # first check if snapshot repo exists
        self.logger.info("Getting current repository settings")
        existing_repo_settings = self._get_repository(es_host, repo_name, auth)
        if existing_repo_settings:
            if existing_repo_settings[repo_name] != repo_settings:
                # TODO update comparison logic
                self.logger.info(
                    f"Repository '{repo_name}' was found but settings are different, "
                    "it may need to be created again"
                    )
                self.logger.debug("Existing setting: %s", existing_repo_settings[repo_name])
                self.logger.debug("Required (new) setting: %s" % repo_settings)
            else:
                self.logger.info("Repo exists with correct settings")
        else:
            # ok, it doesn't exist let's try to create it
            self.logger.info("Repo does not exist")
            try:
                self.logger.info("Creating repo...")
                self._create_repository(es_host, repo_name, repo_settings, auth)
            except Exception as e:
                self.logger.info("Creation failed: %s", e)
                if 'url' in repo_settings["settings"]:
                    raise uploader.ResourceError("Could not create snapshot repository. Check elasticsearch.yml configuration "
                                                 + "file, you should have a line like this: "
                                                 + 'repositories.url.allowed_urls: "%s*" ' % repo_settings["settings"]["url"]
                                                 + "allowing snapshot to be restored from this URL. Error was: %s" % e)
                else:
                    raise uploader.ResourceError("Could not create snapshot repository: %s" % e)

        # repository is now ready, let's trigger the restore
        snapshot_name = build_meta["metadata"]["snapshot_name"]
        pinfo = self.get_pinfo()
        pinfo["step"] = "restore"
        pinfo["description"] = snapshot_name

        def get_status_info():
            try:
                res = idxr.get_restore_status(idxr._index)
                return res
            except Exception as e:
                # somethng went wrong, report as failure
                return {"status": "FAILED %s" % e}

        def restore_launched(f):
            try:
                self.logger.info("Restore launched: %s" % f.result())
            except Exception as e:
                self.logger.error("Error while lauching restore: %s" % e)
                raise e

        self.logger.info("Restoring snapshot '%s' to index '%s' on host '%s'" %
                         (snapshot_name, idxr._index, idxr.es_host))
        job = yield from job_manager.defer_to_thread(
            pinfo,
            partial(idxr.restore,
                    repo_name,
                    snapshot_name,
                    idxr._index,
                    purge=self.__class__.AUTO_PURGE_INDEX))
        job.add_done_callback(restore_launched)
        yield from job
        while True:
            status_info = get_status_info()
            status = status_info["status"]
            self.logger.info("Recovery status for index '%s': %s" %
                             (idxr._index, status_info))
            if status in ["INIT", "IN_PROGRESS"]:
                yield from asyncio.sleep(
                    getattr(btconfig, "MONITOR_SNAPSHOT_DELAY", 60))
            else:
                if status == "DONE":
                    self.logger.info("Snapshot '%s' successfully restored to index '%s' (host: '%s')" %
                                     (snapshot_name, idxr._index, idxr.es_host), extra={"notify": True})
                else:
                    e = uploader.ResourceError("Failed to restore snapshot '%s' on index '%s', status: %s" %
                                               (snapshot_name, idxr._index, status))
                    self.logger.error(e)
                    raise e
                break
        # return current number of docs in index
        return self.target_backend.count()