Ejemplo n.º 1
0
def retrieve(mod_name, oid_list, opts=None, lock=False):
    """ Returns the results of calling a module over an oid_list.
    """
    logger.debug("retrieve %s %s", mod_name, oid_list)
    if not opts: opts = {}

    # Clean up and validate inputs
    mod_type = get_mod_type(mod_name)
    if not mod_type:
        logger.error("Module %s not found", mod_name)
        return None
    oid_list = cleanup_oid_list(mod_name, oid_list)
    # Validate only mangle options unless we have to actually call the module
    if not options.validate_opts(mod_name, opts, True):
        logger.warning("Failed to validate opts for %s : %s", mod_name, opts)
        return None
    try:
        if not config.multiproc_on or mod_type in ["analyzers"]:
            if mod_type in ["extractors", "source"]:
                if len(oid_list) == 1:
                    return single_retrieve(mod_name, oid_list[0], opts, lock)
                else:
                    return multi_retrieve(mod_name, oid_list, opts, lock)
            else:
                if len(oid_list) == 1:
                    if datastore.exists(mod_name, oid_list[0], opts):
                        return datastore.retrieve(mod_name, oid_list[0], opts)
                if not options.validate_opts(mod_name, opts):
                    logger.warning("Failed to validate opts for %s : %s", mod_name, opts)
                    return False
                return single_call_module(mod_type, mod_name, oid_list, opts)
        else:   # Multiprocessing is on and not an analysis module
            if mod_type in ["extractors", "source"]:
                if len(oid_list) == 1:
                    return single_retrieve(mod_name, oid_list[0], opts, lock)
                else:
                    new_list = []
                    for oid in oid_list:
                        if not exists(mod_name, oid, opts):
                            new_list.append(oid)
                    if new_list and not options.validate_opts(mod_name, opts):
                        logger.warning("Failed to validate opts for %s : %s", mod_name, opts)
                        return None
                    func = initialized_modules[mod_name].process
                    mp.multi_map(func, new_list, opts, True)
                    return multi_retrieve(mod_name, oid_list, opts, lock)
            else:  # Map Reducer module
                if len(oid_list) == 1:
                    if datastore.exists(mod_name, oid_list[0], opts):
                        return datastore.retrieve(mod_name, oid_list[0], opts)
                if not options.validate_opts(mod_name, opts):
                    logger.warning("Failed to validate opts for %s : %s", mod_name, opts)
                    return False
                jobid = get_cid_from_oid_list(oid_list)
                map_func = initialized_modules[mod_name].mapper
                reduce_func = initialized_modules[mod_name].reducer
                return mp.multi_mapreduce(map_func, reduce_func, oid_list, opts, jobid)
    except:
        datastore.cleanup()
        raise
Ejemplo n.º 2
0
def process(mod_name, oid_list, opts=None, force=False):
    """ Calls a module over an oid_list without returning results.
    """
    logger.debug("process %s %s", mod_name, oid_list)
    if not opts: opts = {}
    # Clean up and validate inputs
    mod_type = get_mod_type(mod_name)
    if not mod_type:
        logger.error("Module %s not found", mod_name)
        return False 
    oid_list = cleanup_oid_list(mod_name, oid_list)
    if not options.validate_opts(mod_name, opts):
        logger.error("Failed to validate opts for %s : %s", mod_name, opts)
        return False
    try:
        # Prune analysis that already exists
        new_list = []
        for oid in oid_list:
            if not exists(mod_name, oid, opts) or force:
                new_list.append(oid)
        if len(new_list) == 0:  # Everything was already processed
            return True
        # Process the oid_list        
        if len(new_list) == 1 or not config.multiproc_on or mod_type in ["analyzers"]:
            ret_val = True
            if mod_type in ["extractors", "source"]:
                p = progress.progress(len(new_list))
                for oid in new_list:
                    if not single_call_module(mod_type, mod_name, oid, opts):
                        ret_val = False
                    p.tick()
                return ret_val
            else:
                # Don't keep the return value of analyzers and map_reducers, return False if they return None
                
                if not single_call_module(mod_type, mod_name, new_list, opts):
                    ret_val = False
                return ret_val
            
        else:  # Multiprocessing is on and not an analysis module
            if mod_type in ["extractors", "source"]:
                func = initialized_modules[mod_name].process
            elif mod_type in ["map_reducers"]:
                func = initialized_modules[mod_name].mapper
            else:
                raise otypes.UnrecognizedModule("Attempt to call module not of known type.")
            return mp.multi_map(func, new_list, opts, True)
    except:
        datastore.cleanup()
        raise
Ejemplo n.º 3
0
def import_files(files_list):
    if not isinstance(files_list, list):
        logger.error("files must be of type list.")
        return None, 0
    try:
        new_file_count = 0
        oids = []
        p = progress.progress(len(files_list))
        for file_location in files_list:
            oid, new_file = import_file(file_location)
            p.tick()
            if oid:
                oids.append(oid)
                if new_file:
                    new_file_count += 1
    except:
        datastore.cleanup()
        raise

    oids = list(set(oids)) # assert uniqueness 
    return oids, new_file_count