Exemple #1
0
 def fmt_metadata(metadata): 
     '''
     Escape the dictionary suitable for boto.
     
     Parameters
     ----------
     metadata : dict
     '''
     def fmt(v):
         fallback = 'N/A'
         if v is None:
             return fallback
         return v
         
     metadata = Util.escape_dict(metadata,
                             fmt, 
                             escape_keys = False,
                             escape_values = True)
     
     metadata = Util.escape_dict(metadata,
                             lambda v: re.sub("\s+", "_", v), 
                             escape_keys = True,
                             escape_values = False)
     
     return metadata
Exemple #2
0
    def fmt_metadata(metadata):
        '''
        Escape the dictionary suitable for boto.
        
        Parameters
        ----------
        metadata : dict
        '''
        def fmt(v):
            fallback = 'N/A'
            if v is None:
                return fallback
            return v

        metadata = Util.escape_dict(metadata,
                                    fmt,
                                    escape_keys=False,
                                    escape_values=True)

        metadata = Util.escape_dict(metadata,
                                    lambda v: re.sub("\s+", "_", v),
                                    escape_keys=True,
                                    escape_values=False)

        return metadata
Exemple #3
0
    def get_apk(self, _hash, apk = None, **kwargs):
        '''
        Get the `EAndroApk` from `_hash`.

        Parameters
        ----------
        _hash : str
            Hash of the .apk (sha256)
        apk : Apk
            Carries metainformation needed to build the whole path to the element in S3.

        Raises
        ------
        S3StorageLoadException

        Returns
        -------
        EAndroApk
            Apk constructed from raw data and meta infos.
        '''
        # use to hold apk meta infos
        #fast_apk = FastApk(package_name, version_name, path, _hash, import_date, tag)
        try:
            apk_raw = BytesIO()
            if apk is None:
                raise S3StorageLoadException(self, content = "Apk:%s" % apk.short_description(), caused_by = RuntimeError("No APK metainformation given!")), None, sys.exc_info()[2]
            _id = Util.get_apk_path_incl_filename(apk)
            self.bucket_get(_id).get_contents_to_file(apk_raw)
            apk_raw.seek(0)
            eandro_apk = AnalyzeUtil.open_apk(apk_raw.read(), None, raw = True)
            return eandro_apk 
        except (BotoClientError, S3ResponseError) as e:
            raise S3StorageLoadException(self, content = "Apk:%s" % apk.short_description(), caused_by = e), None, sys.exc_info()[2]
Exemple #4
0
    def get_apk_sub_path(self, apk):
        '''
        Returns the sub path structure for the `apk`.

        The structure is:
        ...
        |-> package
          |-> version
            |-> sha256

        Parameters
        ----------
        apk: Apk

        Returns
        -------
        str: path

        Raises
        ------
        CouldNotOpenApk
            If the APK could no be opened
        '''
        package = apk.package_name
        version_name = apk.version_name
        # if hash caluclated from file, can raise CouldNotOpenApk
        sha256 = apk.hash
        return StorageUtil.get_apk_path(package, version_name, sha256)
Exemple #5
0
 def get_s3_id(apk):
     '''
     Parameters
     ----------
     apk : Apk
     
     Returns
     -------
     str
         The id for the apk in the S3 storage
     '''
     return Util.get_apk_path_incl_filename(apk)
Exemple #6
0
 def get_s3_id(apk):
     '''
     Parameters
     ----------
     apk : Apk
     
     Returns
     -------
     str
         The id for the apk in the S3 storage
     '''
     return Util.get_apk_path_incl_filename(apk)
Exemple #7
0
def escape_keys(_dict):
    ''' Escape the keys in the `_dict` so that the `_dict` can be inserted into mongodb.

    Will do a deepcopy of the `dict`!
    So escaping isn't in-place!

    Parameters
    ----------
    _dict : dict

    Returns
    -------
    dict
    '''
    return Util.escape_dict(_dict, escape_key, escape_keys = True, escape_values = False)
Exemple #8
0
    def get_apk_res_path_all_args(self, package_name, version_name, _hash):
        '''
        Returns the path structure for result storage.

        Parameters
        ----------
        package_name : str
            Package name of the apk.
            Unique apk identifier (at least in the store)
        version_name : str
            Version name
        _hash : str
            The hash of the apk.

        Returns
        -------
        str: path
        '''
        return join(self.store_root_dir, self.APK_RES_DIRNAME, StorageUtil.get_apk_path(package_name, version_name, _hash))
Exemple #9
0
def action_delete_apks_import(storage, delete_apk = False, hashes = None, package_names = None, tags = None, select_whole_db = False):
    ''' Delete from the import storage (database and/or filesys)

    Parameters
    ----------
    storage : RedundantStorage
        The store to use.
    delete_apk : boolean, optional (default is False)
    hashes : iterable<str>, optional (default is None)
    package_names : iterable<str>, optional (default is None)
    tags : iterable<str>, optional (default is None)
    select_whole_db : boolean, optional (default is False)
        If true, select the whole import database! Be careful!
        This means we do not take `hashes`, `package_names` and `tags` into acccount!

    Raises
    ------
    ValueError
    '''
    try:
        apks = None
        if select_whole_db:
            apks = action_query_import_db(storage, COMMAND_QUERY_APKS, hashes, package_names, tags)
        # If don't delete whole database!!!!!
        elif len(Util.filter_not_none((hashes, package_names, tags))) > 0:
            apks = action_query_import_db(storage, COMMAND_QUERY_APKS, hashes, package_names, tags)
        else:
            raise ValueError('''Neither hashes nor package names nor tags specified!
             If you wan't do use the whole database, set `select_whole_db` to true.
             ''')

        # use list, otherwise we have duplicates due to the generator
        for apk in list(apks):
            if delete_apk:
                clilog.info("Will delete from database and file system: \n%s ", apk.detailed_description())
            else:
                clilog.info("Will delete %s from database: %s ", apk.short_description(), storage.import_db_storage)
            storage.delete_entry_for_apk(apk, delete_apk)
    except StorageException as e:
        log.warn(e)
Exemple #10
0
    def get_apk(self, _hash, apk=None, **kwargs):
        '''
        Get the `EAndroApk` from `_hash`.

        Parameters
        ----------
        _hash : str
            Hash of the .apk (sha256)
        apk : Apk
            Carries metainformation needed to build the whole path to the element in S3.

        Raises
        ------
        S3StorageLoadException

        Returns
        -------
        EAndroApk
            Apk constructed from raw data and meta infos.
        '''
        # use to hold apk meta infos
        #fast_apk = FastApk(package_name, version_name, path, _hash, import_date, tag)
        try:
            apk_raw = BytesIO()
            if apk is None:
                raise S3StorageLoadException(
                    self,
                    content="Apk:%s" % apk.short_description(),
                    caused_by=RuntimeError("No APK metainformation given!"
                                           )), None, sys.exc_info()[2]
            _id = Util.get_apk_path_incl_filename(apk)
            self.bucket_get(_id).get_contents_to_file(apk_raw)
            apk_raw.seek(0)
            eandro_apk = AnalyzeUtil.open_apk(apk_raw.read(), None, raw=True)
            return eandro_apk
        except (BotoClientError, S3ResponseError) as e:
            raise S3StorageLoadException(self,
                                         content="Apk:%s" %
                                         apk.short_description(),
                                         caused_by=e), None, sys.exc_info()[2]
Exemple #11
0
def action_import_apks(storage, apk_paths,
                       copy_apk = False, copy_to_mongodb = False,
                       update = False, tag = None,
                       # shared memory
                       cnt_imported_apks = None, total_apk_count = None, import_finished = None,
                       # concurrent settings
                       concurrency = None
                       ):

    ''' Import the apks from the `apk_paths` and create the file system structure
    where the results will be kept, specified by `storage`.

    Parameters
    ----------
    storage : RedundantStorage
        The store to use.
    apk_paths : iterable<str>
        The apk files and/or directories.
    copy_apk : bool
        Import the apk file to the `import_dir` (copy it).
    copy_to_mongodb : bool, optional (default is False)
        Also import into MongoDB. Useful for the distributed analysis.
    update : bool
        Update apks that have already been imported.
    tag : str, optional (default is None)
        Some tag
    cnt_imported_apks : multiprocessing.Value<int>, optional (default is None)
        If given, use for progress updating.
    total_apk_count : multiprocessing.Value<int>, optional (default is None)
        If given, use for total count of apks.
    import_finished : multiprocessing.Value<byte>, optional (default is None)
        If given, use to signal that import has been completed.
    concurrency : int, optional (default is number of cpus)
        Number of processes to use for the import.
    '''
    from androlyze.loader.ApkImporter import ApkImporter

    # get single paths to apks so we get the correct total count of apks
    clilog.info("looking for apks in given paths ... ")
    apk_paths = ApkImporter.get_apks_from_list_or_dir(apk_paths)

    if total_apk_count is not None:
        # may be time consuming for recursive lookup
        apk_paths, total_apk_count.value = Util.count_iterable_n_clone(apk_paths)

    # create count if not given
    if cnt_imported_apks is None:
        cnt_imported_apks = Value('i', 0, lock = RLock())

    # set concurrency
    if concurrency is None:
        concurrency = cpu_count()
    log.warn("Using %d processes", concurrency)

    clilog.info("Storage dir is %s" % storage.fs_storage.store_root_dir)
    if copy_apk:
        clilog.info("Copying APKs to %s ..." % storage.fs_storage.store_root_dir)

    def import_apks(apk_paths):
        apk_importer = ApkImporter(apk_paths, storage)
        for apk in apk_importer.import_apks(copy_apk = copy_apk, copy_to_mongodb = copy_to_mongodb,
                                                update = update, tag = tag):

            clilog.info("imported %s", apk.short_description())

            # use shared memory counter if given
            if cnt_imported_apks is not None:
                with cnt_imported_apks.get_lock():
                    cnt_imported_apks.value += 1

    pool = []


    # don't convert generator to list if only 1 process wanted
    apk_paths = [apk_paths] if concurrency == 1 else Util.split_n_uniform_distri(list(apk_paths), concurrency)

    # start parallel import
    # multiprocessing's pool causes pickle errors
    for i in range(concurrency):
        p = Process(target = import_apks, args = (apk_paths[i], ))
        log.debug("starting process %s", p)
        pool.append(p)
        p.start()

    for it in pool:
        log.debug("joined on process %s", p)
        it.join()

    apks_imported = cnt_imported_apks.value != 0
    # show some message that no APK has been imported
    if not apks_imported:
        log.warn("No .apk file has been imported! This means no .apk file has been found or they already have been imported.")
    else:
        clilog.info("done")

    # because not all apks may be importable, we cannot use we count for signal that the import is done
    if import_finished is not None:
        import_finished.value = 1

    clilog.info("Imported %d apks", cnt_imported_apks.value)
Exemple #12
0
from androlyze.storage import Util
from androlyze.storage.exception import StorageException
from androlyze.storage.resultdb import MongoUtil
from androlyze.util import Util
from androlyze.Constants import *

__all__ = []
__author__ = u"Nils Tobias Schmidt, Lars Baumgärtner"
__copyright__ = PROJECT_COPYRIGHT
__license__ = PROJECT_LICENSE
__version__ = PROJECT_VERSION
__email__ = "{schmidt89,lbaumgaertner}@informatik.uni-marburg.de"

try:
    # load androguard
    Util.set_androguard_path(settings.singleton)
    # import namespace of androguards androlyze.py module
    imp.load_source("androlyze", "%s/androlyze.py" % settings.singleton[(SECTION_ANDROGUARD, KEY_ANDROGUARD_PATH)])
    from androlyze import *
except Exception as e:
    log.error(e)

############################################################
#---Import
############################################################

def action_import_apks(storage, apk_paths,
                       copy_apk = False, copy_to_mongodb = False,
                       update = False, tag = None,
                       # shared memory
                       cnt_imported_apks = None, total_apk_count = None, import_finished = None,