Exemple #1
0
 def register_loader(self):
     dp = get_data_plugin()
     dp.update({"_id": self.plugin_name},
               {"$set": {
                   "plugin.loader": self.loader.loader_type
               }},
               upsert=True)
Exemple #2
0
 def unregister_url(self, url=None, name=None):
     dp = get_data_plugin()
     if url:
         url = url.strip()
         doc = dp.find_one({"plugin.url": url})
     elif name:
         doc = dp.find_one({"_id": name})
         url = doc["plugin"]["url"]
     else:
         raise ValueError("Specify 'url' or 'name'")
     if not doc:
         raise AssistantException(
             "Plugin is not registered (url=%s, name=%s)" % (url, name))
     # should be only one but just in case
     dp.remove({"_id": doc["_id"]})
     # delete plugin code so it won't be auto-register
     # by 'local' plugin assistant (issue studio #7)
     if doc.get("download", {}).get("data_folder"):
         codefolder = doc["download"]["data_folder"]
         self.logger.info("Delete plugin source code in '%s'" % codefolder)
         rmdashfr(codefolder)
     assistant = self.submit(url)
     try:
         self.data_plugin_manager.register.pop(assistant.plugin_name)
     except KeyError:
         raise AssistantException("Plugin '%s' is not registered" % url)
     self.dumper_manager.register.pop(assistant.plugin_name, None)
     self.uploader_manager.register.pop(assistant.plugin_name, None)
Exemple #3
0
 def get_plugin_obj(self):
     dp = get_data_plugin()
     plugin = dp.find_one({"_id": self.plugin_name})
     if not plugin.get("download", {}).get("data_folder"):
         raise LoaderException(
             "Can't find data_folder, not available yet ?")
     return plugin
 def register_url(self, url):
     url = url.strip()
     dp = get_data_plugin()
     if dp.find_one({"plugin.url":url}):
         raise AssistantException("Plugin '%s' already registered" % url)
     assistant = self.submit(url)
     if assistant:
         # register plugin info
         dp.update({"_id":assistant.plugin_name},
                 {"$set":{"plugin":{"url":url,"type":assistant.plugin_type,"active":True}}},
                 upsert=True)
         assistant.handle()
         job = self.data_plugin_manager.load(assistant.plugin_name)
         assert len(job) == 1, "Expecting one job, got: %s" % job
         job = job.pop()
         def loaded(f):
             try:
                 res = f.result()
                 self.logger.debug("Plugin '%s' loaded, now loading manifest" % assistant.plugin_name)
                 assistant.load_manifest()
             except Exception as e:
                 self.logger.exception("Unable to load plugin '%s': %s" % (assistant.plugin_name,e))
         job.add_done_callback(loaded)
         return job
     else:
         raise AssistantException("Could not find any assistant able to handle URL '%s'" % url)
Exemple #5
0
    def export_mapping(self, plugin_name, folder):
        res = {
            "mapping": {
                "status": None,
                "file": None,
                "message": None,
                "origin": None
            }
        }
        # first check if plugin defines a custom mapping in manifest
        # if that's the case, we don't need to export mapping there
        # as it'll be exported with "uploader" code
        plugindoc = get_data_plugin().find_one({"_id": plugin_name})
        assert plugindoc, "Can't find plugin named '%s'" % plugin_name
        plugin_folder = plugindoc.get("download", {}).get("data_folder")
        assert plugin_folder, "Can't find plugin folder for '%s'" % plugin_name
        try:
            manifest = json.load(
                open(os.path.join(plugin_folder, "manifest.json")))
            if "mapping" in manifest.get("uploader", {}):
                res["mapping"][
                    "message"] = "Custom mapping included in uploader export"
                res["mapping"]["status"] = "warning"
                res["mapping"]["origin"] = "custom"
                return res
        except Exception as e:
            self.logger.error("Can't read manifest while exporting code: %s" %
                              e)
        # try to export mapping from src_master (official)
        doc = get_src_master().find_one({"_id": plugin_name})
        if doc:
            mapping = doc.get("mapping")
            res["mapping"]["origin"] = "registered"
        else:
            doc = get_src_dump().find_one({"_id": plugin_name})
            mapping = doc and doc.get("inspect", {}).get("jobs", {}).get(plugin_name, {}).get("inspect", {}).\
                get("results", {}).get("mapping")
            res["mapping"]["origin"] = "inspection"
        if not mapping:
            res["mapping"]["origin"] = None
            res["mapping"]["status"] = "warning"
            res["mapping"][
                "message"] = "Can't find registered or generated (inspection) mapping"
            return res
        else:
            ufile = os.path.join(folder, "upload.py")
            strmap, _ = yapf_api.FormatCode(pprint.pformat(mapping))
            with open(ufile, "a") as fout:
                fout.write("""
    @classmethod
    def get_mapping(klass):
        return %s\n""" % textwrap.indent((strmap), prefix="    " * 2))

        res["mapping"]["file"] = ufile
        res["mapping"]["status"] = "ok"

        return res
    def load(self, autodiscover=True):
        """
        Load plugins registered in internal Hub database and generate/register
        dumpers & uploaders accordingly.
        If autodiscover is True, also search DATA_PLUGIN_FOLDER for existing
        plugin directories not registered yet in the database, and register
        them automatically.
        """
        plugin_dirs = []
        if autodiscover:
            try:
                plugin_dirs = os.listdir(btconfig.DATA_PLUGIN_FOLDER)
            except FileNotFoundError as e:
                raise AssistantException("Invalid DATA_PLUGIN_FOLDER: %s" % e)
        dp = get_data_plugin()
        cur = dp.find()
        for plugin in cur:
            # remove plugins from folder list if already register
            if plugin_dirs and plugin["_id"] in plugin_dirs:
                plugin_dirs.remove(plugin["_id"])
            try:
                self.load_plugin(plugin)
            except Exception as e:
                self.logger.warning("Couldn't load plugin '%s': %s" %
                                    (plugin["_id"], e))
                continue

        # some still unregistered ? (note: list always empty if autodiscover=False)
        if plugin_dirs:
            for pdir in plugin_dirs:
                fulldir = os.path.join(btconfig.DATA_PLUGIN_FOLDER, pdir)
                # basic sanity check to make sure it's plugin
                try:
                    if "manifest.json" in os.listdir(fulldir) and \
                            json.load(open(os.path.join(fulldir,"manifest.json"))):
                        self.logger.info(
                            "Found unregistered plugin '%s', auto-register it"
                            % pdir)
                        self.register_url("local://%s" %
                                          pdir.strip().strip("/"))
                except Exception as e:
                    self.logger.exception(
                        "Couldn't auto-register plugin '%s': %s" % (pdir, e))
                    continue
                else:
                    self.logger.warning(
                        "Directory '%s' doesn't contain a plugin, skip it" %
                        pdir)
                    continue
Exemple #7
0
    def register_url(self, url):
        url = url.strip()
        dp = get_data_plugin()
        if dp.find_one({"plugin.url": url}):
            self.logger.info("Plugin '%s' already registered" % url)
            return
        assistant = self.submit(url)
        self.logger.info(
            "For data-plugin URL '%s', selected assistant is: %s" %
            (url, assistant))
        if assistant:
            # register plugin info
            # if a github url was used, by default, we assume it's a manifest-based plugin
            # (we can't know until we have a look at the content). So assistant will have
            # manifest-based loader. If it fails, another assistant with advanced loader will
            # be used to try again.
            dp.update({"_id": assistant.plugin_name}, {
                "$set": {
                    "plugin": {
                        "url": url,
                        "type": assistant.plugin_type,
                        "active": True
                    }
                }
            },
                      upsert=True)
            assistant.handle()
            job = self.data_plugin_manager.load(assistant.plugin_name)
            assert len(job) == 1, "Expecting one job, got: %s" % job
            job = job.pop()

            def loaded(f):
                try:
                    _ = f.result()
                    self.logger.debug(
                        "Plugin '%s' downloaded, now loading manifest" %
                        assistant.plugin_name)
                    assistant.loader.load_plugin()
                except Exception as e:
                    self.logger.exception(
                        "Unable to download plugin '%s': %s" %
                        (assistant.plugin_name, e))

            job.add_done_callback(loaded)
            return job
        else:
            raise AssistantException(
                "Could not find any assistant able to handle URL '%s'" % url)
 def unregister_url(self, url):
     url = url.strip()
     dp = get_data_plugin()
     doc = dp.find_one({"plugin.url": url})
     # should be only one but just in case
     dp.remove({"plugin.url": url})
     # delete plugin code so it won't be auto-register
     # by 'local' plugin assistant (issue studio #7)
     if doc.get("download", {}).get("data_folder"):
         codefolder = doc["download"]["data_folder"]
         self.logger.info("Delete plugin source code in '%s'" % codefolder)
         rmdashfr(codefolder)
     assistant = self.submit(url)
     try:
         self.data_plugin_manager.register.pop(assistant.plugin_name)
     except KeyError:
         raise AssistantException("Plugin '%s' is not registered" % url)
     self.dumper_manager.register.pop(assistant.plugin_name, None)
     self.uploader_manager.register.pop(assistant.plugin_name, None)
 def load_manifest(self):
     dp = get_data_plugin()
     p = dp.find_one({"_id":self.plugin_name})
     if not p.get("download",{}).get("data_folder"):
         # not yet available
         self.logger.warning("Can't find data_folder, not available yet ?")
         return
     df = p["download"]["data_folder"]
     if os.path.exists(df):
         mf = os.path.join(df,"manifest.json")
         if os.path.exists(mf):
             try:
                 manifest = json.load(open(mf))
                 self.logger.debug("Loading manifest: %s" % pprint.pformat(manifest))
                 self.interpret_manifest(manifest)
             except Exception as e:
                 self.invalidate_plugin("Error loading manifest: %s" % str(e))
         else:
             self.logger.info("No manifest found for plugin: %s" % p["plugin"]["url"])
             self.invalidate_plugin("No manifest found")
     else:
         self.invalidate_plugin("Missing plugin folder '%s'" % df)
Exemple #10
0
 def get_sources(self, id=None, debug=False, detailed=False):
     dm = self.dump_manager
     um = self.upload_manager
     dpm = self.data_plugin_manager
     ids = set()
     if id and id in dm.register:
         ids.add(id)
     elif id and id in um.register:
         ids.add(id)
     elif id and id in dpm.register:
         ids.add(id)
     else:
         # either no id passed, or doesn't exist
         if id and not len(ids):
             raise ValueError("Source %s doesn't exist" % repr(id))
         ids = set(dm.register)
         ids.update(um.register)
         ids.update(dpm.register)
     sources = {}
     bydsrcs = {}
     byusrcs = {}
     bydpsrcs = {}
     plugins = get_data_plugin().find()
     [bydsrcs.setdefault(src["_id"], src) for src in dm.source_info() if dm]
     [byusrcs.setdefault(src["_id"], src) for src in um.source_info() if um]
     [bydpsrcs.setdefault(src["_id"], src) for src in plugins]
     for _id in ids:
         # start with dumper info
         if dm:
             src = bydsrcs.get(_id)
             if src:
                 if debug:
                     sources[src["name"]] = src
                 else:
                     sources[src["name"]] = self.sumup_source(src, detailed)
         # complete with uploader info
         if um:
             src = byusrcs.get(_id)
             if src:
                 # collection-only source don't have dumpers and only exist in
                 # the uploader manager
                 if not src["_id"] in sources:
                     sources[src["_id"]] = self.sumup_source(src, detailed)
                 if src.get("upload"):
                     for subname in src["upload"].get("jobs", {}):
                         try:
                             sources[src["name"]].setdefault(
                                 "upload",
                                 {"sources": {}})["sources"].setdefault(
                                     subname, {})
                             sources[src["name"]]["upload"]["sources"][
                                 subname]["uploader"] = src["upload"][
                                     "jobs"][subname].get("uploader")
                         except Exception as e:
                             logging.error("Source is invalid: %s\n%s" %
                                           (e, pformat(src)))
         # deal with plugin info if any
         if dpm:
             src = bydpsrcs.get(_id)
             if src:
                 assert len(
                     dpm[_id]
                 ) == 1, "Expected only one uploader, got: %s" % dpm[_id]
                 klass = dpm[_id][0]
                 src.pop("_id")
                 if hasattr(klass, "data_plugin_error"):
                     src["error"] = klass.data_plugin_error
                 sources.setdefault(_id, {"data_plugin": {}})
                 if src.get("download", {}).get("err"):
                     src["download"]["error"] = src["download"].pop("err")
                 sources[_id]["data_plugin"] = src
                 sources[_id]["_id"] = _id
                 sources[_id]["name"] = _id
     if id:
         src = list(sources.values()).pop()
         # enrich with metadata (uploader > dumper)
         ks = []
         if dm:
             try:
                 ks.extend(dm.register[id])
             except KeyError:
                 pass
         if um:
             try:
                 ks.extend(um.register[id])
             except KeyError:
                 pass
         for upk in ks:
             # name either from uploader or dumper
             name = getattr(upk, "name", None) or upk.SRC_NAME
             if getattr(upk, "__metadata__", {}).get("src_meta"):
                 src.setdefault("__metadata__", {}).setdefault(name, {})
                 src["__metadata__"][name] = upk.__metadata__["src_meta"]
         # simplify as needed (if only one source in metadata, remove source key level,
         # or if licenses are the same amongst sources, keep one copy)
         if len(src.get("__metadata__", {})) == 1:
             src["__metadata__"] = list(src["__metadata__"].values()).pop()
         elif len(src.get("__metadata__", {})) > 1:
             metas = list(src["__metadata__"].values())
             simplified = [metas.pop()]
             same = True
             while metas:
                 m = metas.pop()
                 if not m in simplified:
                     same = False
                     break
             if same:
                 # we consume all of them, ie. they're all equals
                 src["__metadata__"] = list(
                     src["__metadata__"].values()).pop()
             else:
                 # convert to a list of dict (so it's easier to detect if one or more
                 # licenses just by checking if type is dict (one) or array (more))
                 metas = src.pop("__metadata__")
                 src["__metadata__"] = []
                 for m in metas:
                     src["__metadata__"].append({m: metas[m]})
         return src
     else:
         return list(sources.values())
Exemple #11
0
def migrate_0dot1_to_0dot2():
    """
    mongodb src_dump/data_plugin changed:
        1. "data_folder" and "release" under "download"
        2. "data_folder" and "release" in upload.jobs[subsrc] taken from "download"
        3. no more "err" under "upload"
        4. no more "status" under "upload"
        5. "pending_to_upload" is now "pending": ["upload"]
    """
    src_dump = get_src_dump()
    data_plugin = get_data_plugin()
    for srccol in [src_dump, data_plugin]:
        logging.info("Converting collection %s" % srccol)
        srcs = [src for src in srccol.find()]
        wasdue = False
        for src in srcs:
            logging.info("\tConverting '%s'" % src["_id"])
            # 1.
            for field in ["data_folder", "release"]:
                if field in src:
                    logging.debug(
                        "\t\t%s: found '%s' in document, moving under 'download'"
                        % (src["_id"], field))
                    try:
                        src["download"][field] = src.pop(field)
                        wasdue = True
                    except KeyError as e:
                        logging.warning(
                            "\t\t%s: no such field '%s' found, skip it (error: %s)"
                            % (src["_id"], field, e))
            # 2.
            for subsrc_name in src.get("upload", {}).get("jobs", {}):
                for field in ["data_folder", "release"]:
                    if field not in src["upload"]["jobs"][subsrc_name]:
                        logging.debug(
                            "\t\t%s: no '%s' found in upload jobs, taking it from 'download' (or from root keys)"
                            % (src["_id"], field))
                        try:
                            src["upload"]["jobs"][subsrc_name][field] = src[
                                "download"][field]
                            wasdue = True
                        except KeyError:
                            try:
                                src["upload"]["jobs"][subsrc_name][
                                    field] = src[field]
                                wasdue = True
                            except KeyError:
                                logging.warning(
                                    "\t\t%s: no such field '%s' found, skip it"
                                    % (src["_id"], field))
            # 3. & 4.
            for field in ["err", "status"]:
                if field in src.get("upload", {}):
                    logging.debug("\t\t%s: removing '%s' key from 'upload'" %
                                  (src["_id"], field))
                    src["upload"].pop(field)
                    wasdue = True
            # 5.
            if "pending_to_upload" in src:
                logging.debug(
                    "\t%s: found 'pending_to_upload' field, moving to 'pending' list"
                    % src["_id"])
                src.pop("pending_to_upload")
                wasdue = True
                if "upload" not in src.get("pending", []):
                    src.setdefault("pending", []).append("upload")
            if wasdue:
                logging.info("\tFinishing converting document for '%s'" %
                             src["_id"])
                srccol.save(src)
            else:
                logging.info("\tDocument for '%s' already converted" %
                             src["_id"])
Exemple #12
0
 def prepare_src_dump(self):
     self.src_dump = get_data_plugin()
     self.src_doc = self.src_dump.find_one({'_id': self.src_name}) or {}