def create_idx(coll):
     ''' Create index on a single collection '''
     # apk meta
     coll.ensure_index([(MongoUtil.get_attr_str(RESOBJ_SCRIPT_META, RESOBJ_APK_META_PACKAGE_NAME, gridfs = False), -1)])
     coll.ensure_index([(MongoUtil.get_attr_str(RESOBJ_SCRIPT_META, RESOBJ_APK_META_BUILD_DATE, gridfs = False), -1)])
     # script meta
     coll.ensure_index([(MongoUtil.get_attr_str(RESOBJ_SCRIPT_META, RESOBJ_SCRIPT_META_ANALYSIS_DATE, gridfs = False), -1)])
     coll.ensure_index([(MongoUtil.get_attr_str(RESOBJ_SCRIPT_META, RESOBJ_SCRIPT_META_NAME, gridfs = False), -1)])
Example #2
0
    def action_query(self, hashes, package_names, tags, yes):
        ''' Query the database '''
        args = self.args
        parser = self.parser

        # check on which database to query
        # get from argparser
        query_dst = args.query_dst
        if query_dst == SUBCOMMAND_QUERY_IMPORT:
            clilog.info('\n'.join(androlyze.action_query_import_db(self.storage, args.query_import_cmd, hashes, package_names, tags)))
        elif query_dst == SUBCOMMAND_QUERY_RESULT:
            kwargs = CLIUtil.get_result_db_filter_args_from_argparser(args)
            if args.show_id:
                kwargs["remove_id_field"] = not args.show_id

            distinct_key = None
            if args.distinct is not None:
                distinct_key = args.distinct
            # get distinct values for script name
            elif args.list_ran_scripts:
                distinct_key = MongoUtil.get_attr_str(RESOBJ_SCRIPT_META, RESOBJ_SCRIPT_META_NAME, args.non_document)

            no_args_supplied = len(kwargs) == 0 and not args.latest and not args.count and distinct_key is None
            whole_db = args.all
            raw = args.raw

            # update with basic result query options
            kwargs.update(CLIUtil.get_basic_result_query_options(args))

            kwargs.update(dict(include_fields=args.include_fields, exclude_fields=args.exclude_fields, non_document_raw=raw, distinct_key = distinct_key))

            if no_args_supplied and not whole_db:
                raise CLIError('Not enough arguments supplied!\nIf you want to dump the whole db, use the --all switch!', parser)

            res = cli_check_n_exec(
                androlyze.action_query_result_db,
                prompt_prefix='Will print whole results db!',
                circumvent_check=not no_args_supplied or yes,
                args=(self.storage, CLIUtil.get_checks_from_cli(args)),
                kwargs=kwargs)

            # log results
            print_query_result_db(res, distict_generator=distinct_key is not None, count=args.count, raw=raw, interactive = not args.not_interactive)
Example #3
0
def action_query_result_db(storage, checks = {}, **kwargs):
    '''
    Get results from the database.

    Parameters
    ----------
    storage : ResultsStorageInterface
        The store to use.
    checks : dict, optional (default is {})
        Dictionary describing the checks to perform on some values.
        Will be passed to :py:method:`.MongoUtil.build_checks_filter` (as keyword arguments)
        checks_non_empty_list : iterable<str>, optional (default is ())
            Check the keys against a non empty list.
        checks_empty_list : iterable<str>, optional (default is ())
            Check the keys against an empty list.
        checks_true : iterable<str>, optional (default is ())
            Check if the values of the given keys are true.
        checks_false : iterable<str>, optional (default is ())
            Check if the values of the given keys are false.
        checks_not_null : iterable<str>, optional (default is ())
            Check if the values of the given keys are null (python None).
        checks_null : iterable<str>, optional (default is ())
            Check if the values of the given keys are not null (python None).
        conjunction : str, optional (default is 'or')
            Choose between 'or' and 'and'.
            Specifies how to to link the checks together.

    Other Parameters
    ----------------
    include_fields : list<str>, optional (default is [])
        List of fields to include in the result.
        Mutually exclusive with `exclude_fields`.
    exclude_fields : list<str>, optional (default is [])
        List of fields to exclude from the result.
        Mutually exclusive with `include_fields`.

    where : dict, optional (default is {})
        A filter.
    remove_id_field : bool, optional (default is True)
        Will remove the `_id` field by default.

    distinct_key : str, optional (default is None)
        If given, list the distinct values for the `distinct_key.
    list_ran_scripts: bool, optional (default is False)
        List all scripts that have been run on the given selection.
        Normally you want to supply the `package_name`.
        Overrides `distinct_key`.

    sort : bool, optional (default is True)
        If true sort by analysis date.
    latest : bool, optional (default is False)
        Get the result of the latest script run.
        Will only return one result.
    n : int, optional (default is None)
        Number of results to return.
        None means no limit.

    non_document : bool, optional (default is False)
        Get custom data from mongodb's gridfs.
    non_document_raw : bool, optional (default is False)
        Get the raw data from the database. Otherwise meta infos will be returned.
        Only interesting if `non_document`.

    package_name : str, optional (default is None)
    apk_hash : str, optional (default is None)
    version_name : str, optional (default is None)
    tag : str, optional (default is None)

    script_hash : str, optional (default is None)
    script_name : str, optional (default is None)
    script_version : str, optional (default is None)

    Notes
    -----
    If any of the other parameters is None it won't be used for filtering.

    Returns
    -------
    gridfs.grid_file.GridOutCursor
        If non_document and non_document_raw.
    pymongo.cursor.Cursor
        Otherwise

    Raises
    ------
    DatabaseLoadException

    Examples
    --------
    >>> import androlyzelab
    ... from androlyze.storage.resultdb.ResultDatabaseStorage import ResultDatabaseStorage
    ... from androlyze.model.script.ScriptUtil import dict2json
    ... storage = ResultDatabaseStorage('127.0.0.1', 27017)
    ... res = androlyze.action_query_result_db(storage, n = 2, script_name = "ChainedApkInfos", include_fields = ["apkinfo.components.activities"])
    ... for r in res:
    ...     # get dict
    ...     # print r
    ...     # get json
    ...     print dict2json(r)
    {
    "apkinfo": {
        "components": {
            "activities": {
                "all": [
                    "cn.wps.impress.test.selfvalidate.lmj.TestServiceActivity",
    ...
    '''
    # build check filter dict if some checks are given which shall be done on some attributes
    if checks:
        checks = MongoUtil.build_checks_filter(**checks)

    # update with checks dict or {}
    if 'where' in kwargs and kwargs['where'] is not None:
        kwargs['where'].update(checks)
    else:
        kwargs['where'] = checks

    non_document = kwargs.get("non_document", False)
    if kwargs.get("list_ran_scripts", False):
        kwargs['distinct_key'] = MongoUtil.get_attr_str(StaticResultKeys.RESOBJ_SCRIPT_META, StaticResultKeys.RESOBJ_SCRIPT_META_NAME, non_document)

    return storage.get_results(**kwargs)
    def get_results(self,
                    include_fields = None, exclude_fields = None,
                    where = None, distinct_key = None,
                    n = None, sort = True, latest = False,
                    non_document = False, non_document_raw = False,
                    remove_id_field = True,
                    **kwargs):
        ''' See doc of :py:meth:`.ResultStorageInterface.get_results` '''

        if include_fields is not None and exclude_fields is not None:
            raise ValueError("include_fields and exclude_fields are mutually exclusive!")

        if include_fields is None:
            include_fields = []
        if exclude_fields is None:
            exclude_fields = []
        if where is None:
            where = {}

        # latest means enable sorting and only return one result
        if latest:
            sort = True
            n = 1

        # create projection dict
        fields = [(p, 0) for p in exclude_fields] + [(p, 1) for p in include_fields]

        if remove_id_field:
            # we don't want the id field
            fields += [(RESOBJ_ID, 0)]

        select = dict(fields)

        # no projection criteria given, disable!
            # because empty dict means only id
        if not select:
            select = None

        where.update(self.create_where_clause(kwargs, from_gridfs = non_document))

        try:
            res_cursor = None
            # get appropriate collection
            coll = self.__get_collection(gridfs_files_coll = non_document and not non_document_raw,
                                         gridfs_obj = non_document and non_document_raw)

            # pymongo 3.0 removed the as_class option in the collection.find method
            # this is the fix
            find_kwargs = {}
            if int(pymongo.version[0]) < 3:
                find_kwargs['as_class'] = OrderedDict
                
            # grid fs
            if non_document:
                if non_document_raw:
                    log.debug("mongodb query: find(%s) on gridfs", where)
                    res_cursor = coll.find(where)
                else:
                    # using the gridfs files collection directly enables us projection an attributes
                    log.debug("mongodb query: find(%s, %s) ", where, select)
                    res_cursor = coll.find(where, select, **find_kwargs)

            # normal collection
            else:
                res_cursor = coll.find(where, select, **find_kwargs)
                log.debug("mongodb query: find(%s, %s) ", where, select)


            # enable sorting if wanted
            if sort:
                # construct sorting criteria structure, structure is different if using gridfs
                sort_crit = [(
                  MongoUtil.get_attr_str(RESOBJ_SCRIPT_META, RESOBJ_SCRIPT_META_ANALYSIS_DATE, gridfs=non_document)
                  , -1)]
                res_cursor = res_cursor.sort(sort_crit)

            # limit results if wanted
            if n is not None:
                res_cursor = res_cursor.limit(n)

            # generator that abstracts if normal collection or is gridfs
            if non_document:
                if non_document_raw:
                    return res_cursor

            if distinct_key is not None:
                res_cursor = res_cursor.distinct(distinct_key)

            return res_cursor

        except PyMongoError as e:
            raise DatabaseLoadException(self, "find(%s, %s)", where, select, caused_by = e), None, sys.exc_info()[2]