def load_broadinstitute_exac(data_folder): t0 = time.time() exacs = load_broadinstitute_exac_all(data_folder) for k,v in load_broadinstitute_exac_nontcga(data_folder).items(): try: exacs[k]["exac"]["nontcga"] = v["exac"]["nontcga"] except KeyError: exacs[k] = v for k,v in load_broadinstitute_exac_nonpsych(data_folder).items(): try: exacs[k]["exac"]["nonpsych"] = v["exac"]["nonpsych"] except KeyError: exacs[k] = v logging.info("Convert transcript ID to EntrezID") from ..ensembl.parser import EnsemblParser from biothings.utils.hub_db import get_src_dump ensembl_doc = get_src_dump().find_one({"_id":"ensembl"}) or {} ensembl_dir = ensembl_doc.get("data_folder") assert ensembl_dir, "Can't find Ensembl data directory (used for id conversion)" ensembl_parser = EnsemblParser(ensembl_dir) ensembl_parser._load_ensembl2entrez_li() ensembl2entrez = list2dict(ensembl_parser.ensembl2entrez_li, 0, alwayslist=True) for line in tabfile_feeder(os.path.join(ensembl_dir,"gene_ensembl__translation__main.txt")): _,ensid,transid,_ = line if transid in exacs: data = exacs.pop(transid) # pop so no-match means no data in the end for entrezid in ensembl2entrez.get(ensid,[ensid]): exacs[entrezid] = data return exacs
def source_info(self, source=None): src_dump = get_src_dump() src_ids = list(self.register.keys()) if source: if source in src_ids: src_ids = [source] else: return None res = [] for _id in src_ids: src = src_dump.find_one({"_id": _id}) or {} assert len( self.register[_id] ) == 1, "Found more than one dumper for source '%s': %s" % ( _id, self.register[_id]) dumper = self.register[_id][0] src.setdefault("download", {}) src["download"]["dumper"] = { "name": "%s.%s" % (inspect.getmodule(dumper).__name__, dumper.__name__), "bases": [ "%s.%s" % (inspect.getmodule(k).__name__, k.__name__) for k in dumper.__bases__ ], "manual": issubclass(dumper, ManualDumper), } src["name"] = _id src["_id"] = _id res.append(src) if source: return res.pop() else: return res
def main(confirm=True): src_dump = get_src_dump() ensembl_doc = src_dump.find_one({"_id": "ensembl"}) or {} ENSEMBL_DATA_FOLDER = ensembl_doc.get("data_folder") assert ENSEMBL_DATA_FOLDER, "Can't find Ensembl data folder" entrez_doc = src_dump.find_one({"_id": "entrez"}) or {} ENTREZ_DATA_FOLDER = entrez_doc.get("data_folder") assert ENTREZ_DATA_FOLDER, "Can't find Entrez data folder" gene_ensembl_1_xref_dm_file = os.path.join( ENSEMBL_DATA_FOLDER, "gene_ensembl__xref_entrezgene__dm.txt") gene_ensembl_2_main_file = os.path.join(ENSEMBL_DATA_FOLDER, "gene_ensembl__gene__main.txt") gene2ensembl_file = os.path.join(ENTREZ_DATA_FOLDER, "gene2ensembl.gz") gene_main_file = os.path.join(ENTREZ_DATA_FOLDER, "gene_info.gz") outfile = os.path.join(ENSEMBL_DATA_FOLDER, "gene_ensembl__gene__extra.txt") multi_mapping_dict, total_ensembl_IDs = find_multiple_mappings_from_entrezgene_file( gene_ensembl_1_xref_dm_file) ensembl_dict = create_ensembl_gene_id_dict(gene_ensembl_2_main_file, multi_mapping_dict) ensembl_dict, ensembl_match_count = find_ncbi_ids_from_gene2ensembl( ensembl_dict, gene2ensembl_file) ncbi_id_symbols = find_ncbi_symbols(gene_main_file, ensembl_dict) mapping_generator = merge_mapping(ensembl_dict, ncbi_id_symbols, add_source=False) total_mapped = write_mapping_file(mapping_generator, outfile, confirm=confirm) run_stats(total_ensembl_IDs, ensembl_dict, ensembl_match_count, total_mapped)
def prepare_src_dump(self): src_dump = get_src_dump() # just populate/initiate an src_dump record (b/c no dump before) if needed self.src_doc = src_dump.find_one({'_id': self.main_source}) if not self.src_doc: src_dump.save({"_id": self.main_source}) self.src_doc = src_dump.find_one({'_id': self.main_source}) return src_dump
def export_mapping(self, plugin_name, folder): res = { "mapping": { "status": None, "file": None, "message": None, "origin": None } } # first check if plugin defines a custom mapping in manifest # if that's the case, we don't need to export mapping there # as it'll be exported with "uploader" code plugindoc = get_data_plugin().find_one({"_id": plugin_name}) assert plugindoc, "Can't find plugin named '%s'" % plugin_name plugin_folder = plugindoc.get("download", {}).get("data_folder") assert plugin_folder, "Can't find plugin folder for '%s'" % plugin_name try: manifest = json.load( open(os.path.join(plugin_folder, "manifest.json"))) if "mapping" in manifest.get("uploader", {}): res["mapping"][ "message"] = "Custom mapping included in uploader export" res["mapping"]["status"] = "warning" res["mapping"]["origin"] = "custom" return res except Exception as e: self.logger.error("Can't read manifest while exporting code: %s" % e) # try to export mapping from src_master (official) doc = get_src_master().find_one({"_id": plugin_name}) if doc: mapping = doc.get("mapping") res["mapping"]["origin"] = "registered" else: doc = get_src_dump().find_one({"_id": plugin_name}) mapping = doc and doc.get("inspect", {}).get("jobs", {}).get(plugin_name, {}).get("inspect", {}).\ get("results", {}).get("mapping") res["mapping"]["origin"] = "inspection" if not mapping: res["mapping"]["origin"] = None res["mapping"]["status"] = "warning" res["mapping"][ "message"] = "Can't find registered or generated (inspection) mapping" return res else: ufile = os.path.join(folder, "upload.py") strmap, _ = yapf_api.FormatCode(pprint.pformat(mapping)) with open(ufile, "a") as fout: fout.write(""" @classmethod def get_mapping(klass): return %s\n""" % textwrap.indent((strmap), prefix=" " * 2)) res["mapping"]["file"] = ufile res["mapping"]["status"] = "ok" return res
def clean_stale_status(self): src_dump = get_src_dump() srcs = src_dump.find() for src in srcs: jobs = src.get("inspect", {}).get("jobs", {}) for subsrc in jobs: if jobs[subsrc].get("status") == "inspecting": logging.warning( "Found stale datasource '%s', marking inspect status as 'canceled'" % src["_id"]) jobs[subsrc]["status"] = "canceled" src_dump.replace_one({"_id": src["_id"]}, src)
def __init__(self, source_list, dump_manager, upload_manager, data_plugin_manager): self._orig_source_list = source_list self.source_list = None self.dump_manager = dump_manager self.upload_manager = upload_manager self.data_plugin_manager = data_plugin_manager self.reload() self.src_master = get_src_master() self.src_dump = get_src_dump() # honoring BaseSourceManager interface (gloups...- self.register = {}
def create_todump_list(self, force=False): uni_doc = get_src_dump().find_one({"_id":UniprotDumper.SRC_NAME}) or {} if uni_doc: remotefile = os.path.join(uni_doc["download"]["data_folder"],self.__class__.UNIPROT_FILE) if not os.path.exists(remotefile): self.logger.warning("File '%s' doesn't exist (yet?)" % self.__class__.UNIPROT_FILE) return self.release = uni_doc["download"]["release"] localfile = os.path.join(self.current_data_folder,self.__class__.UNIPROT_FILE) if force or not os.path.exists(localfile) or self.remote_is_better(remotefile,localfile): self.to_dump.append({"remote":remotefile,"local":localfile}) else: self.logger.error("Dependent uniprot datasource has not been loaded (not src_dump doc)")
def load(self, aslist=False): ''' loading ncbi "homologene.data" file adding "homologene" field in gene doc ''' from biothings.utils.hub_db import get_src_dump homo_d = tab2dict(self.datafile,(2,1),0,header=0) entrez_doc = get_src_dump().find_one({"_id":"entrez"}) or {} entrez_dir = entrez_doc.get("data_folder") assert entrez_dir, "Can't find Entez data directory" DATAFILE = os.path.join(entrez_dir, 'gene_history.gz') assert os.path.exists(DATAFILE), "gene_history.gz is missing (entrez_dir: %s)" % entrez_dir retired2gene = tab2dict(DATAFILE, (1, 2), 1, alwayslist=0,includefn=lambda ld: ld[1] != '-') for id in list(homo_d.keys()): homo_d[retired2gene.get(id,id)] = homo_d[id] with open(self.datafile) as df: homologene_d = {} doc_li = [] print() geneid_d = get_geneid_d(entrez_dir, self.species_li,load_cache=False,save_cache=False,only_for=homo_d) for line in df: ld = line.strip().split('\t') hm_id, tax_id, geneid = [int(x) for x in ld[:3]] if (self.taxid_set is None or tax_id in self.taxid_set) and geneid in geneid_d: # for selected species only # and also ignore those geneid does not match any # existing gene doc # in case of orignal geneid is retired, replaced with the # new one, if available. geneid = geneid_d[geneid] genes = homologene_d.get(hm_id, []) genes.append((tax_id, geneid)) homologene_d[hm_id] = genes doc_li.append(dict(_id=str(geneid), taxid=tax_id, homologene={'id': hm_id})) for i, gdoc in enumerate(doc_li): gdoc['homologene']['genes'] = self._sorted_homologenes( set(homologene_d[gdoc['homologene']['id']])) doc_li[i] = gdoc if aslist: return doc_li else: gene_d = dict([(d['_id'], d) for d in doc_li]) return gene_d
def create_todump_list(self, force=False): self.from_src = get_src_dump().find_one( {"_id": self.__class__.FROM_SOURCE["name"]}) self.to_src = get_src_dump().find_one( {"_id": self.__class__.TO_SOURCE["name"]}) from_folder = self.from_src.get("download", {}).get("data_folder") to_folder = self.to_src.get("download", {}).get("data_folder") assert from_folder, "Couldn't find folder for source %s (tried '%s')" % ( self.from_src, from_folder) assert to_folder, "Couldn't find folder for source %s (tried '%s')" % ( self.to_src, to_folder) self.set_release() # so we can generate new_data_folder for attr, folder in [("FROM_SOURCE", from_folder), ("TO_SOURCE", to_folder)]: files = getattr(self.__class__, attr, {}).get("files") assert files, "No files specified in %s" % attr for one_file in files: remote_file = os.path.join(folder, one_file) assert os.path.exists( remote_file), "Remote file '%s' doesn't exist in %s" % ( remote_file, attr) new_localfile = os.path.join(self.new_data_folder, one_file) current_localfile = os.path.join(self.current_data_folder, one_file) try: remote_better = self.remote_is_better( remote_file, current_localfile) except FileNotFoundError: # no local file, we want the remote remote_better = True if force or current_localfile is None or remote_better: self.to_dump.append({ "remote": remote_file, "local": new_localfile })
def clean_stale_status(self): src_dump = get_src_dump() srcs = src_dump.find() for src in srcs: jobs = src.get("upload", {}).get("jobs", {}) dirty = False for subsrc in jobs: if jobs[subsrc].get("status") == "uploading": logging.warning( "Found stale datasource '%s', marking upload status as 'canceled'" % src["_id"]) jobs[subsrc]["status"] = "canceled" dirty = True if dirty: src_dump.replace_one({"_id": src["_id"]}, src)
def load_data(self, data_folder): """ Loads gene data from NCBI's refseq2gene.gz file. Parses it based on genomic position data and refseq status provided by the list of taxids from get_ref_microbe_taxids() as lookup table :return: """ refsrc = get_src_dump().find_one({"_id":"ref_microbe_taxids"}) assert refsrc, "ref_microbe_taxids dump not found" taxids_file = os.path.join(refsrc["download"]["data_folder"], "ref_microbe_taxids.pyobj") datafile = os.path.join(data_folder, 'gene2refseq.gz') taxids = loadobj(taxids_file) taxid_set = set(taxids) def _includefn(ld): return ld[0] in taxid_set # match taxid from taxid_set cols_included = [0, 1, 7, 9, 10, 11] # 0-based col idx gene2genomic_pos_li = tab2list(datafile, cols_included, header=1, includefn=_includefn) count = 0 last_id = None for gene in gene2genomic_pos_li: count += 1 strand = 1 if gene[5] == '+' else -1 _id = gene[1] mgi_dict = { '_id': _id, 'genomic_pos': { 'entrezgene': _id, 'start': int(gene[3]), 'end': int(gene[4]), 'chr': gene[2], 'strand': strand } } if _id != last_id: # rows with dup _id will be skipped yield mgi_dict last_id = _id
def source_info(self, source=None): src_dump = get_src_dump() src_ids = list(self.register.keys()) if source: if source in src_ids: src_ids = [source] else: return None res = [] cur = src_dump.find({"_id": {"$in": src_ids}}) bysrcs = {} [bysrcs.setdefault(src["_id"], src) for src in cur] for _id in src_ids: src = bysrcs.get(_id, {}) uploaders = self.register[_id] src.setdefault("upload", {}) for uploader in uploaders: upl = { "name": "%s.%s" % (inspect.getmodule(uploader).__name__, uploader.__name__), "bases": [ "%s.%s" % (inspect.getmodule(k).__name__, k.__name__) for k in uploader.__bases__ ], "dummy": issubclass(uploader, DummySourceUploader), } src["upload"].setdefault("jobs", {}).setdefault(uploader.name, {}) src["upload"]["jobs"][uploader.name]["uploader"] = upl src["name"] = _id src["_id"] = _id res.append(src) if source: if res: return res.pop() else: # no information, just return what was passed to honor return type # + minimal information return {"name": source, "_id": source} else: return res
def export_mapping(self, plugin_name, folder): res = { "mapping": { "status": None, "file": None, "message": None, "origin": None } } # first try to export mapping from src_master (official) doc = get_src_master().find_one({"_id": plugin_name}) if doc: mapping = doc.get("mapping") res["mapping"]["origin"] = "registered" else: doc = get_src_dump().find_one({"_id": plugin_name}) mapping = doc and doc.get("inspect",{}).get("jobs",{}).get(plugin_name,{}).get("inspect",{}).\ get("results",{}).get("mapping") res["mapping"]["origin"] = "inspection" if not mapping: res["mapping"]["origin"] = None res["mapping"]["status"] = "warning" res["mapping"][ "message"] = "Can't find registered or generated (inspection) mapping" return res else: ufile = os.path.join(folder, "upload.py") strmap, _ = yapf_api.FormatCode(pprint.pformat(mapping)) with open(ufile, "a") as fout: fout.write(""" @classmethod def get_mapping(klass): return %s\n""" % textwrap.indent((strmap), prefix=" " * 2)) res["mapping"]["file"] = ufile res["mapping"]["status"] = "ok" return res
def prepare_src_dump(self): # Mongo side self.src_dump = get_src_dump() self.src_doc = self.src_dump.find_one({'_id': self.src_name}) or {}
def set_pending_to_upload(src_name): src_dump = get_src_dump() src_dump.update({"_id": src_name}, {"$addToSet": {"pending": "upload"}})
def migrate_0dot1_to_0dot2(): """ mongodb src_dump/data_plugin changed: 1. "data_folder" and "release" under "download" 2. "data_folder" and "release" in upload.jobs[subsrc] taken from "download" 3. no more "err" under "upload" 4. no more "status" under "upload" 5. "pending_to_upload" is now "pending": ["upload"] """ src_dump = get_src_dump() data_plugin = get_data_plugin() for srccol in [src_dump, data_plugin]: logging.info("Converting collection %s" % srccol) srcs = [src for src in srccol.find()] wasdue = False for src in srcs: logging.info("\tConverting '%s'" % src["_id"]) # 1. for field in ["data_folder", "release"]: if field in src: logging.debug( "\t\t%s: found '%s' in document, moving under 'download'" % (src["_id"], field)) try: src["download"][field] = src.pop(field) wasdue = True except KeyError as e: logging.warning( "\t\t%s: no such field '%s' found, skip it (error: %s)" % (src["_id"], field, e)) # 2. for subsrc_name in src.get("upload", {}).get("jobs", {}): for field in ["data_folder", "release"]: if field not in src["upload"]["jobs"][subsrc_name]: logging.debug( "\t\t%s: no '%s' found in upload jobs, taking it from 'download' (or from root keys)" % (src["_id"], field)) try: src["upload"]["jobs"][subsrc_name][field] = src[ "download"][field] wasdue = True except KeyError: try: src["upload"]["jobs"][subsrc_name][ field] = src[field] wasdue = True except KeyError: logging.warning( "\t\t%s: no such field '%s' found, skip it" % (src["_id"], field)) # 3. & 4. for field in ["err", "status"]: if field in src.get("upload", {}): logging.debug("\t\t%s: removing '%s' key from 'upload'" % (src["_id"], field)) src["upload"].pop(field) wasdue = True # 5. if "pending_to_upload" in src: logging.debug( "\t%s: found 'pending_to_upload' field, moving to 'pending' list" % src["_id"]) src.pop("pending_to_upload") wasdue = True if "upload" not in src.get("pending", []): src.setdefault("pending", []).append("upload") if wasdue: logging.info("\tFinishing converting document for '%s'" % src["_id"]) srccol.save(src) else: logging.info("\tDocument for '%s' already converted" % src["_id"])
def prepare_src_dump(self): """Sync with src_dump collection, collection information (src_doc) Return src_dump collection""" src_dump = get_src_dump() self.src_doc = src_dump.find_one({'_id': self.main_source}) or {} return src_dump
def inspect(self, data_provider, mode="type", batch_size=10000, limit=None, sample=None, **kwargs): """ Inspect given data provider: - backend definition, see bt.hub.dababuild.create_backend for supported format), eg "merged_collection" or ("src","clinvar") - or callable yielding documents Mode: - "type": will inspect and report type map found in data (internal/non-standard format) - "mapping": will inspect and return a map compatible for later ElasticSearch mapping generation (see bt.utils.es.generate_es_mapping) - "stats": will inspect and report types + different counts found in data, giving a detailed overview of the volumetry of each fields and sub-fields - "jsonschema", same as "type" but result is formatted as json-schema standard - limit: when set to an integer, will inspect only x documents. - sample: combined with limit, for each document, if random.random() <= sample (float), the document is inspected. This option allows to inspect only a sample of data. """ # /!\ attention: this piece of code is critical and not easy to understand... # Depending on the source of data to inspect, this method will create an # uploader or a builder. These objects don't be behave the same while they # pass through pickle: uploader needs to be "unprepare()"ed so it can be # pickled (remove some db connection, socket), while builder must *not* be # unprepare() because it would reset the underlying target_name (the actual # target collection). Also, the way results and statuses are registered is # different for uploader and builder... # So, there are lots of "if", be careful if you want to modify that code. data_provider_type = None # where to register results (if possible to do so) registerer_obj = None # who should register result t0 = time.time() started_at = datetime.now().astimezone() self.logger.info("Inspecting data with mode %s and data_provider %s" % (repr(mode), repr(data_provider))) if callable(data_provider): raise NotImplementedError("data_provider as callable untested...") else: if data_provider[0] == "src": data_provider_type = "source" # find src_dump doc # is it a full source name (dot notation) ? fullname = get_source_fullname(data_provider[1]) if fullname: # it's a dot-notation src_name = fullname.split(".")[0] else: # no subsource, full source name is the passed name src_name = data_provider[1] fullname = src_name doc = get_src_dump().find_one({"_id": src_name }) # query by main source if not doc: raise InspectorError( "Can't find document associated to '%s'" % src_name) # get an uploader instance (used to get the data if type is "uploader" # but also used to update status of the datasource via register_status() ups = self.upload_manager[ fullname] # potentially using dot notation if len(ups) > 1: # recursively call inspect(), collect and return corresponding tasks self.logger.debug( "Multiple uploaders found, running inspector for each of them: %s" % ups) res = [] for up in ups: r = self.inspect((data_provider[0], "%s" % up.name), mode=mode, batch_size=batch_size, limit=limit, sample=sample, **kwargs) res.append(r) return res assert len( ups ) == 1, "More than one uploader found for '%s', not supported (yet), use main_source.source notation" % data_provider[ 1] # create uploader registerer_obj = self.upload_manager.create_instance(ups[0]) backend_provider = data_provider else: try: data_provider_type = "build" registerer_obj = self.build_manager.get_builder( data_provider) backend_provider = data_provider except Exception as e: raise InspectorError( "Unable to create backend from '%s': %s" % (repr(data_provider), e)) got_error = None try: @asyncio.coroutine def do(): yield from asyncio.sleep(0.0) nonlocal mode pinfo = { "category": INSPECTOR_CATEGORY, "source": "%s" % repr(data_provider), "step": "", "description": "" } # register begin of inspection (differ slightly depending on type) if data_provider_type == "source": registerer_obj.register_status("inspecting", subkey="inspect") elif data_provider_type == "build": registerer_obj.register_status("inspecting", transient=True, init=True, job={"step": "inspect"}) self.logger.info( "Running inspector on %s (type:%s,data_provider:%s)" % (repr(data_provider), data_provider_type, backend_provider)) if sample is not None: self.logger.info( "Sample set to %s, inspect only a subset of data", sample) if limit is None: self.logger.info("Inspecting all the documents") else: nonlocal batch_size # adjust batch_size so we inspect only "limit" docs if batch is smaller than the limit if batch_size > limit: batch_size = limit self.logger.info("Inspecting only %s documents", limit) # make it pickleable if data_provider_type == "source": # because register_obj is also used to fetch data, it has to be unprepare() for pickling registerer_obj.unprepare() else: # NOTE: do not unprepare() the builder, we'll loose the target name # (it's be randomly generated again) and we won't be able to register results pass cnt = 0 doccnt = 0 jobs = [] # normalize mode param and prepare global results if type(mode) == str: mode = [mode] converters, mode = btinspect.get_converters(mode) inspected = {} for m in mode: inspected.setdefault(m, {}) backend = create_backend(backend_provider).target_collection for ids in id_feeder(backend, batch_size=batch_size): if sample is not None: if random.random() > sample: continue cnt += 1 doccnt += batch_size if limit and doccnt > limit: break pinfo["description"] = "batch #%s" % cnt def batch_inspected(bnum, i, f): nonlocal inspected nonlocal got_error nonlocal mode try: res = f.result() for m in mode: inspected[m] = btinspect.merge_record( inspected[m], res[m], m) except Exception as e: got_error = e self.logger.error( "Error while inspecting data from batch #%s: %s" % (bnum, e)) raise pre_mapping = "mapping" in mode # we want to generate intermediate mapping so we can merge # all maps later and then generate the ES mapping from there self.logger.info("Creating inspect worker for batch #%s" % cnt) job = yield from self.job_manager.defer_to_process( pinfo, partial(inspect_data, backend_provider, ids, mode=mode, pre_mapping=pre_mapping, **kwargs)) job.add_done_callback(partial(batch_inspected, cnt, ids)) jobs.append(job) yield from asyncio.gather(*jobs) # compute metadata (they were skipped before) for m in mode: if m == "mapping": try: inspected["mapping"] = es.generate_es_mapping( inspected["mapping"]) # metadata for mapping only once generated inspected = btinspect.compute_metadata( inspected, m) except es.MappingError as e: inspected["mapping"] = { "pre-mapping": inspected["mapping"], "errors": e.args[1] } else: inspected = btinspect.compute_metadata(inspected, m) # just potential converters btinspect.run_converters(inspected, converters) def fully_inspected(res): nonlocal got_error try: res = btinspect.stringify_inspect_doc(res) _map = {"results": res} _map["data_provider"] = repr(data_provider) _map["started_at"] = started_at _map["duration"] = timesofar(t0) # when inspecting with "stats" mode, we can get huge number but mongo # can't store more than 2^64, make sure to get rid of big nums there def clean_big_nums(k, v): # TODO: same with float/double? seems mongo handles more there ? if isinstance(v, int) and v > 2**64: return k, math.nan else: return k, v dict_traverse(_map, clean_big_nums) # register begin of inspection (differ slightly depending on type) if "mapping" in mode and "errors" in res[ "mapping"] and "pre-mapping" in res["mapping"]: registerer_obj.register_status("failed", subkey="inspect", inspect=_map) got_error = InspectorError( res["mapping"]["errors"]) else: if data_provider_type == "source": registerer_obj.register_status( "success", subkey="inspect", inspect=_map) elif data_provider_type == "build": registerer_obj.register_status( "success", job={"step": "inspect"}, build={"inspect": _map}) except Exception as e: self.logger.exception( "Error while inspecting data: %s" % e) got_error = e if data_provider_type == "source": registerer_obj.register_status("failed", subkey="inspect", err=repr(e)) elif data_provider_type == "build": registerer_obj.register_status( "failed", job={"err": repr(e)}) fully_inspected(inspected) if data_provider_type is None: return if got_error: raise got_error task = asyncio.ensure_future(do()) return task except Exception as e: self.logger.error("Error while inspecting '%s': %s" % (repr(data_provider), e)) raise
def poll(self, state, func): super(UploaderManager, self).poll(state, func, col=get_src_dump())