def register_loader(self): dp = get_data_plugin() dp.update({"_id": self.plugin_name}, {"$set": { "plugin.loader": self.loader.loader_type }}, upsert=True)
def unregister_url(self, url=None, name=None): dp = get_data_plugin() if url: url = url.strip() doc = dp.find_one({"plugin.url": url}) elif name: doc = dp.find_one({"_id": name}) url = doc["plugin"]["url"] else: raise ValueError("Specify 'url' or 'name'") if not doc: raise AssistantException( "Plugin is not registered (url=%s, name=%s)" % (url, name)) # should be only one but just in case dp.remove({"_id": doc["_id"]}) # delete plugin code so it won't be auto-register # by 'local' plugin assistant (issue studio #7) if doc.get("download", {}).get("data_folder"): codefolder = doc["download"]["data_folder"] self.logger.info("Delete plugin source code in '%s'" % codefolder) rmdashfr(codefolder) assistant = self.submit(url) try: self.data_plugin_manager.register.pop(assistant.plugin_name) except KeyError: raise AssistantException("Plugin '%s' is not registered" % url) self.dumper_manager.register.pop(assistant.plugin_name, None) self.uploader_manager.register.pop(assistant.plugin_name, None)
def get_plugin_obj(self): dp = get_data_plugin() plugin = dp.find_one({"_id": self.plugin_name}) if not plugin.get("download", {}).get("data_folder"): raise LoaderException( "Can't find data_folder, not available yet ?") return plugin
def register_url(self, url): url = url.strip() dp = get_data_plugin() if dp.find_one({"plugin.url":url}): raise AssistantException("Plugin '%s' already registered" % url) assistant = self.submit(url) if assistant: # register plugin info dp.update({"_id":assistant.plugin_name}, {"$set":{"plugin":{"url":url,"type":assistant.plugin_type,"active":True}}}, upsert=True) assistant.handle() job = self.data_plugin_manager.load(assistant.plugin_name) assert len(job) == 1, "Expecting one job, got: %s" % job job = job.pop() def loaded(f): try: res = f.result() self.logger.debug("Plugin '%s' loaded, now loading manifest" % assistant.plugin_name) assistant.load_manifest() except Exception as e: self.logger.exception("Unable to load plugin '%s': %s" % (assistant.plugin_name,e)) job.add_done_callback(loaded) return job else: raise AssistantException("Could not find any assistant able to handle URL '%s'" % url)
def export_mapping(self, plugin_name, folder): res = { "mapping": { "status": None, "file": None, "message": None, "origin": None } } # first check if plugin defines a custom mapping in manifest # if that's the case, we don't need to export mapping there # as it'll be exported with "uploader" code plugindoc = get_data_plugin().find_one({"_id": plugin_name}) assert plugindoc, "Can't find plugin named '%s'" % plugin_name plugin_folder = plugindoc.get("download", {}).get("data_folder") assert plugin_folder, "Can't find plugin folder for '%s'" % plugin_name try: manifest = json.load( open(os.path.join(plugin_folder, "manifest.json"))) if "mapping" in manifest.get("uploader", {}): res["mapping"][ "message"] = "Custom mapping included in uploader export" res["mapping"]["status"] = "warning" res["mapping"]["origin"] = "custom" return res except Exception as e: self.logger.error("Can't read manifest while exporting code: %s" % e) # try to export mapping from src_master (official) doc = get_src_master().find_one({"_id": plugin_name}) if doc: mapping = doc.get("mapping") res["mapping"]["origin"] = "registered" else: doc = get_src_dump().find_one({"_id": plugin_name}) mapping = doc and doc.get("inspect", {}).get("jobs", {}).get(plugin_name, {}).get("inspect", {}).\ get("results", {}).get("mapping") res["mapping"]["origin"] = "inspection" if not mapping: res["mapping"]["origin"] = None res["mapping"]["status"] = "warning" res["mapping"][ "message"] = "Can't find registered or generated (inspection) mapping" return res else: ufile = os.path.join(folder, "upload.py") strmap, _ = yapf_api.FormatCode(pprint.pformat(mapping)) with open(ufile, "a") as fout: fout.write(""" @classmethod def get_mapping(klass): return %s\n""" % textwrap.indent((strmap), prefix=" " * 2)) res["mapping"]["file"] = ufile res["mapping"]["status"] = "ok" return res
def load(self, autodiscover=True): """ Load plugins registered in internal Hub database and generate/register dumpers & uploaders accordingly. If autodiscover is True, also search DATA_PLUGIN_FOLDER for existing plugin directories not registered yet in the database, and register them automatically. """ plugin_dirs = [] if autodiscover: try: plugin_dirs = os.listdir(btconfig.DATA_PLUGIN_FOLDER) except FileNotFoundError as e: raise AssistantException("Invalid DATA_PLUGIN_FOLDER: %s" % e) dp = get_data_plugin() cur = dp.find() for plugin in cur: # remove plugins from folder list if already register if plugin_dirs and plugin["_id"] in plugin_dirs: plugin_dirs.remove(plugin["_id"]) try: self.load_plugin(plugin) except Exception as e: self.logger.warning("Couldn't load plugin '%s': %s" % (plugin["_id"], e)) continue # some still unregistered ? (note: list always empty if autodiscover=False) if plugin_dirs: for pdir in plugin_dirs: fulldir = os.path.join(btconfig.DATA_PLUGIN_FOLDER, pdir) # basic sanity check to make sure it's plugin try: if "manifest.json" in os.listdir(fulldir) and \ json.load(open(os.path.join(fulldir,"manifest.json"))): self.logger.info( "Found unregistered plugin '%s', auto-register it" % pdir) self.register_url("local://%s" % pdir.strip().strip("/")) except Exception as e: self.logger.exception( "Couldn't auto-register plugin '%s': %s" % (pdir, e)) continue else: self.logger.warning( "Directory '%s' doesn't contain a plugin, skip it" % pdir) continue
def register_url(self, url): url = url.strip() dp = get_data_plugin() if dp.find_one({"plugin.url": url}): self.logger.info("Plugin '%s' already registered" % url) return assistant = self.submit(url) self.logger.info( "For data-plugin URL '%s', selected assistant is: %s" % (url, assistant)) if assistant: # register plugin info # if a github url was used, by default, we assume it's a manifest-based plugin # (we can't know until we have a look at the content). So assistant will have # manifest-based loader. If it fails, another assistant with advanced loader will # be used to try again. dp.update({"_id": assistant.plugin_name}, { "$set": { "plugin": { "url": url, "type": assistant.plugin_type, "active": True } } }, upsert=True) assistant.handle() job = self.data_plugin_manager.load(assistant.plugin_name) assert len(job) == 1, "Expecting one job, got: %s" % job job = job.pop() def loaded(f): try: _ = f.result() self.logger.debug( "Plugin '%s' downloaded, now loading manifest" % assistant.plugin_name) assistant.loader.load_plugin() except Exception as e: self.logger.exception( "Unable to download plugin '%s': %s" % (assistant.plugin_name, e)) job.add_done_callback(loaded) return job else: raise AssistantException( "Could not find any assistant able to handle URL '%s'" % url)
def unregister_url(self, url): url = url.strip() dp = get_data_plugin() doc = dp.find_one({"plugin.url": url}) # should be only one but just in case dp.remove({"plugin.url": url}) # delete plugin code so it won't be auto-register # by 'local' plugin assistant (issue studio #7) if doc.get("download", {}).get("data_folder"): codefolder = doc["download"]["data_folder"] self.logger.info("Delete plugin source code in '%s'" % codefolder) rmdashfr(codefolder) assistant = self.submit(url) try: self.data_plugin_manager.register.pop(assistant.plugin_name) except KeyError: raise AssistantException("Plugin '%s' is not registered" % url) self.dumper_manager.register.pop(assistant.plugin_name, None) self.uploader_manager.register.pop(assistant.plugin_name, None)
def load_manifest(self): dp = get_data_plugin() p = dp.find_one({"_id":self.plugin_name}) if not p.get("download",{}).get("data_folder"): # not yet available self.logger.warning("Can't find data_folder, not available yet ?") return df = p["download"]["data_folder"] if os.path.exists(df): mf = os.path.join(df,"manifest.json") if os.path.exists(mf): try: manifest = json.load(open(mf)) self.logger.debug("Loading manifest: %s" % pprint.pformat(manifest)) self.interpret_manifest(manifest) except Exception as e: self.invalidate_plugin("Error loading manifest: %s" % str(e)) else: self.logger.info("No manifest found for plugin: %s" % p["plugin"]["url"]) self.invalidate_plugin("No manifest found") else: self.invalidate_plugin("Missing plugin folder '%s'" % df)
def get_sources(self, id=None, debug=False, detailed=False): dm = self.dump_manager um = self.upload_manager dpm = self.data_plugin_manager ids = set() if id and id in dm.register: ids.add(id) elif id and id in um.register: ids.add(id) elif id and id in dpm.register: ids.add(id) else: # either no id passed, or doesn't exist if id and not len(ids): raise ValueError("Source %s doesn't exist" % repr(id)) ids = set(dm.register) ids.update(um.register) ids.update(dpm.register) sources = {} bydsrcs = {} byusrcs = {} bydpsrcs = {} plugins = get_data_plugin().find() [bydsrcs.setdefault(src["_id"], src) for src in dm.source_info() if dm] [byusrcs.setdefault(src["_id"], src) for src in um.source_info() if um] [bydpsrcs.setdefault(src["_id"], src) for src in plugins] for _id in ids: # start with dumper info if dm: src = bydsrcs.get(_id) if src: if debug: sources[src["name"]] = src else: sources[src["name"]] = self.sumup_source(src, detailed) # complete with uploader info if um: src = byusrcs.get(_id) if src: # collection-only source don't have dumpers and only exist in # the uploader manager if not src["_id"] in sources: sources[src["_id"]] = self.sumup_source(src, detailed) if src.get("upload"): for subname in src["upload"].get("jobs", {}): try: sources[src["name"]].setdefault( "upload", {"sources": {}})["sources"].setdefault( subname, {}) sources[src["name"]]["upload"]["sources"][ subname]["uploader"] = src["upload"][ "jobs"][subname].get("uploader") except Exception as e: logging.error("Source is invalid: %s\n%s" % (e, pformat(src))) # deal with plugin info if any if dpm: src = bydpsrcs.get(_id) if src: assert len( dpm[_id] ) == 1, "Expected only one uploader, got: %s" % dpm[_id] klass = dpm[_id][0] src.pop("_id") if hasattr(klass, "data_plugin_error"): src["error"] = klass.data_plugin_error sources.setdefault(_id, {"data_plugin": {}}) if src.get("download", {}).get("err"): src["download"]["error"] = src["download"].pop("err") sources[_id]["data_plugin"] = src sources[_id]["_id"] = _id sources[_id]["name"] = _id if id: src = list(sources.values()).pop() # enrich with metadata (uploader > dumper) ks = [] if dm: try: ks.extend(dm.register[id]) except KeyError: pass if um: try: ks.extend(um.register[id]) except KeyError: pass for upk in ks: # name either from uploader or dumper name = getattr(upk, "name", None) or upk.SRC_NAME if getattr(upk, "__metadata__", {}).get("src_meta"): src.setdefault("__metadata__", {}).setdefault(name, {}) src["__metadata__"][name] = upk.__metadata__["src_meta"] # simplify as needed (if only one source in metadata, remove source key level, # or if licenses are the same amongst sources, keep one copy) if len(src.get("__metadata__", {})) == 1: src["__metadata__"] = list(src["__metadata__"].values()).pop() elif len(src.get("__metadata__", {})) > 1: metas = list(src["__metadata__"].values()) simplified = [metas.pop()] same = True while metas: m = metas.pop() if not m in simplified: same = False break if same: # we consume all of them, ie. they're all equals src["__metadata__"] = list( src["__metadata__"].values()).pop() else: # convert to a list of dict (so it's easier to detect if one or more # licenses just by checking if type is dict (one) or array (more)) metas = src.pop("__metadata__") src["__metadata__"] = [] for m in metas: src["__metadata__"].append({m: metas[m]}) return src else: return list(sources.values())
def migrate_0dot1_to_0dot2(): """ mongodb src_dump/data_plugin changed: 1. "data_folder" and "release" under "download" 2. "data_folder" and "release" in upload.jobs[subsrc] taken from "download" 3. no more "err" under "upload" 4. no more "status" under "upload" 5. "pending_to_upload" is now "pending": ["upload"] """ src_dump = get_src_dump() data_plugin = get_data_plugin() for srccol in [src_dump, data_plugin]: logging.info("Converting collection %s" % srccol) srcs = [src for src in srccol.find()] wasdue = False for src in srcs: logging.info("\tConverting '%s'" % src["_id"]) # 1. for field in ["data_folder", "release"]: if field in src: logging.debug( "\t\t%s: found '%s' in document, moving under 'download'" % (src["_id"], field)) try: src["download"][field] = src.pop(field) wasdue = True except KeyError as e: logging.warning( "\t\t%s: no such field '%s' found, skip it (error: %s)" % (src["_id"], field, e)) # 2. for subsrc_name in src.get("upload", {}).get("jobs", {}): for field in ["data_folder", "release"]: if field not in src["upload"]["jobs"][subsrc_name]: logging.debug( "\t\t%s: no '%s' found in upload jobs, taking it from 'download' (or from root keys)" % (src["_id"], field)) try: src["upload"]["jobs"][subsrc_name][field] = src[ "download"][field] wasdue = True except KeyError: try: src["upload"]["jobs"][subsrc_name][ field] = src[field] wasdue = True except KeyError: logging.warning( "\t\t%s: no such field '%s' found, skip it" % (src["_id"], field)) # 3. & 4. for field in ["err", "status"]: if field in src.get("upload", {}): logging.debug("\t\t%s: removing '%s' key from 'upload'" % (src["_id"], field)) src["upload"].pop(field) wasdue = True # 5. if "pending_to_upload" in src: logging.debug( "\t%s: found 'pending_to_upload' field, moving to 'pending' list" % src["_id"]) src.pop("pending_to_upload") wasdue = True if "upload" not in src.get("pending", []): src.setdefault("pending", []).append("upload") if wasdue: logging.info("\tFinishing converting document for '%s'" % src["_id"]) srccol.save(src) else: logging.info("\tDocument for '%s' already converted" % src["_id"])
def prepare_src_dump(self): self.src_dump = get_data_plugin() self.src_doc = self.src_dump.find_one({'_id': self.src_name}) or {}