def retrieve(mod_name, oid_list, opts=None, lock=False): """ Returns the results of calling a module over an oid_list. """ logger.debug("retrieve %s %s", mod_name, oid_list) if not opts: opts = {} # Clean up and validate inputs mod_type = get_mod_type(mod_name) if not mod_type: logger.error("Module %s not found", mod_name) return None oid_list = cleanup_oid_list(mod_name, oid_list) # Validate only mangle options unless we have to actually call the module if not options.validate_opts(mod_name, opts, True): logger.warning("Failed to validate opts for %s : %s", mod_name, opts) return None try: if not config.multiproc_on or mod_type in ["analyzers"]: if mod_type in ["extractors", "source"]: if len(oid_list) == 1: return single_retrieve(mod_name, oid_list[0], opts, lock) else: return multi_retrieve(mod_name, oid_list, opts, lock) else: if len(oid_list) == 1: if datastore.exists(mod_name, oid_list[0], opts): return datastore.retrieve(mod_name, oid_list[0], opts) if not options.validate_opts(mod_name, opts): logger.warning("Failed to validate opts for %s : %s", mod_name, opts) return False return single_call_module(mod_type, mod_name, oid_list, opts) else: # Multiprocessing is on and not an analysis module if mod_type in ["extractors", "source"]: if len(oid_list) == 1: return single_retrieve(mod_name, oid_list[0], opts, lock) else: new_list = [] for oid in oid_list: if not exists(mod_name, oid, opts): new_list.append(oid) if new_list and not options.validate_opts(mod_name, opts): logger.warning("Failed to validate opts for %s : %s", mod_name, opts) return None func = initialized_modules[mod_name].process mp.multi_map(func, new_list, opts, True) return multi_retrieve(mod_name, oid_list, opts, lock) else: # Map Reducer module if len(oid_list) == 1: if datastore.exists(mod_name, oid_list[0], opts): return datastore.retrieve(mod_name, oid_list[0], opts) if not options.validate_opts(mod_name, opts): logger.warning("Failed to validate opts for %s : %s", mod_name, opts) return False jobid = get_cid_from_oid_list(oid_list) map_func = initialized_modules[mod_name].mapper reduce_func = initialized_modules[mod_name].reducer return mp.multi_mapreduce(map_func, reduce_func, oid_list, opts, jobid) except: datastore.cleanup() raise
def exists(mod_name, oid, opts={}): if not options.validate_opts(mod_name, opts, only_mangle=True): return False try: val = datastore.exists(mod_name, oid, opts) except TypeError: val = False return val
def single_retrieve(mod_name, oid, opts, lock): if not datastore.exists(mod_name, oid, opts): if not options.validate_opts(mod_name, opts): logger.warning("Failed to validate opts for %s : %s", mod_name, opts) return None process(mod_name, oid, opts) if lock: return datastore.retrieve_lock(mod_name, oid, opts) return datastore.retrieve(mod_name, oid, opts)
def process(mod_name, oid_list, opts=None, force=False): """ Calls a module over an oid_list without returning results. """ logger.debug("process %s %s", mod_name, oid_list) if not opts: opts = {} # Clean up and validate inputs mod_type = get_mod_type(mod_name) if not mod_type: logger.error("Module %s not found", mod_name) return False oid_list = cleanup_oid_list(mod_name, oid_list) if not options.validate_opts(mod_name, opts): logger.error("Failed to validate opts for %s : %s", mod_name, opts) return False try: # Prune analysis that already exists new_list = [] for oid in oid_list: if not exists(mod_name, oid, opts) or force: new_list.append(oid) if len(new_list) == 0: # Everything was already processed return True # Process the oid_list if len(new_list) == 1 or not config.multiproc_on or mod_type in ["analyzers"]: ret_val = True if mod_type in ["extractors", "source"]: p = progress.progress(len(new_list)) for oid in new_list: if not single_call_module(mod_type, mod_name, oid, opts): ret_val = False p.tick() return ret_val else: # Don't keep the return value of analyzers and map_reducers, return False if they return None if not single_call_module(mod_type, mod_name, new_list, opts): ret_val = False return ret_val else: # Multiprocessing is on and not an analysis module if mod_type in ["extractors", "source"]: func = initialized_modules[mod_name].process elif mod_type in ["map_reducers"]: func = initialized_modules[mod_name].mapper else: raise otypes.UnrecognizedModule("Attempt to call module not of known type.") return mp.multi_map(func, new_list, opts, True) except: datastore.cleanup() raise