def create_idx(coll): ''' Create index on a single collection ''' # apk meta coll.ensure_index([(MongoUtil.get_attr_str(RESOBJ_SCRIPT_META, RESOBJ_APK_META_PACKAGE_NAME, gridfs = False), -1)]) coll.ensure_index([(MongoUtil.get_attr_str(RESOBJ_SCRIPT_META, RESOBJ_APK_META_BUILD_DATE, gridfs = False), -1)]) # script meta coll.ensure_index([(MongoUtil.get_attr_str(RESOBJ_SCRIPT_META, RESOBJ_SCRIPT_META_ANALYSIS_DATE, gridfs = False), -1)]) coll.ensure_index([(MongoUtil.get_attr_str(RESOBJ_SCRIPT_META, RESOBJ_SCRIPT_META_NAME, gridfs = False), -1)])
def action_query(self, hashes, package_names, tags, yes): ''' Query the database ''' args = self.args parser = self.parser # check on which database to query # get from argparser query_dst = args.query_dst if query_dst == SUBCOMMAND_QUERY_IMPORT: clilog.info('\n'.join(androlyze.action_query_import_db(self.storage, args.query_import_cmd, hashes, package_names, tags))) elif query_dst == SUBCOMMAND_QUERY_RESULT: kwargs = CLIUtil.get_result_db_filter_args_from_argparser(args) if args.show_id: kwargs["remove_id_field"] = not args.show_id distinct_key = None if args.distinct is not None: distinct_key = args.distinct # get distinct values for script name elif args.list_ran_scripts: distinct_key = MongoUtil.get_attr_str(RESOBJ_SCRIPT_META, RESOBJ_SCRIPT_META_NAME, args.non_document) no_args_supplied = len(kwargs) == 0 and not args.latest and not args.count and distinct_key is None whole_db = args.all raw = args.raw # update with basic result query options kwargs.update(CLIUtil.get_basic_result_query_options(args)) kwargs.update(dict(include_fields=args.include_fields, exclude_fields=args.exclude_fields, non_document_raw=raw, distinct_key = distinct_key)) if no_args_supplied and not whole_db: raise CLIError('Not enough arguments supplied!\nIf you want to dump the whole db, use the --all switch!', parser) res = cli_check_n_exec( androlyze.action_query_result_db, prompt_prefix='Will print whole results db!', circumvent_check=not no_args_supplied or yes, args=(self.storage, CLIUtil.get_checks_from_cli(args)), kwargs=kwargs) # log results print_query_result_db(res, distict_generator=distinct_key is not None, count=args.count, raw=raw, interactive = not args.not_interactive)
def action_query_result_db(storage, checks = {}, **kwargs): ''' Get results from the database. Parameters ---------- storage : ResultsStorageInterface The store to use. checks : dict, optional (default is {}) Dictionary describing the checks to perform on some values. Will be passed to :py:method:`.MongoUtil.build_checks_filter` (as keyword arguments) checks_non_empty_list : iterable<str>, optional (default is ()) Check the keys against a non empty list. checks_empty_list : iterable<str>, optional (default is ()) Check the keys against an empty list. checks_true : iterable<str>, optional (default is ()) Check if the values of the given keys are true. checks_false : iterable<str>, optional (default is ()) Check if the values of the given keys are false. checks_not_null : iterable<str>, optional (default is ()) Check if the values of the given keys are null (python None). checks_null : iterable<str>, optional (default is ()) Check if the values of the given keys are not null (python None). conjunction : str, optional (default is 'or') Choose between 'or' and 'and'. Specifies how to to link the checks together. Other Parameters ---------------- include_fields : list<str>, optional (default is []) List of fields to include in the result. Mutually exclusive with `exclude_fields`. exclude_fields : list<str>, optional (default is []) List of fields to exclude from the result. Mutually exclusive with `include_fields`. where : dict, optional (default is {}) A filter. remove_id_field : bool, optional (default is True) Will remove the `_id` field by default. distinct_key : str, optional (default is None) If given, list the distinct values for the `distinct_key. list_ran_scripts: bool, optional (default is False) List all scripts that have been run on the given selection. Normally you want to supply the `package_name`. Overrides `distinct_key`. sort : bool, optional (default is True) If true sort by analysis date. latest : bool, optional (default is False) Get the result of the latest script run. Will only return one result. n : int, optional (default is None) Number of results to return. None means no limit. non_document : bool, optional (default is False) Get custom data from mongodb's gridfs. non_document_raw : bool, optional (default is False) Get the raw data from the database. Otherwise meta infos will be returned. Only interesting if `non_document`. package_name : str, optional (default is None) apk_hash : str, optional (default is None) version_name : str, optional (default is None) tag : str, optional (default is None) script_hash : str, optional (default is None) script_name : str, optional (default is None) script_version : str, optional (default is None) Notes ----- If any of the other parameters is None it won't be used for filtering. Returns ------- gridfs.grid_file.GridOutCursor If non_document and non_document_raw. pymongo.cursor.Cursor Otherwise Raises ------ DatabaseLoadException Examples -------- >>> import androlyzelab ... from androlyze.storage.resultdb.ResultDatabaseStorage import ResultDatabaseStorage ... from androlyze.model.script.ScriptUtil import dict2json ... storage = ResultDatabaseStorage('127.0.0.1', 27017) ... res = androlyze.action_query_result_db(storage, n = 2, script_name = "ChainedApkInfos", include_fields = ["apkinfo.components.activities"]) ... for r in res: ... # get dict ... # print r ... # get json ... print dict2json(r) { "apkinfo": { "components": { "activities": { "all": [ "cn.wps.impress.test.selfvalidate.lmj.TestServiceActivity", ... ''' # build check filter dict if some checks are given which shall be done on some attributes if checks: checks = MongoUtil.build_checks_filter(**checks) # update with checks dict or {} if 'where' in kwargs and kwargs['where'] is not None: kwargs['where'].update(checks) else: kwargs['where'] = checks non_document = kwargs.get("non_document", False) if kwargs.get("list_ran_scripts", False): kwargs['distinct_key'] = MongoUtil.get_attr_str(StaticResultKeys.RESOBJ_SCRIPT_META, StaticResultKeys.RESOBJ_SCRIPT_META_NAME, non_document) return storage.get_results(**kwargs)
def get_results(self, include_fields = None, exclude_fields = None, where = None, distinct_key = None, n = None, sort = True, latest = False, non_document = False, non_document_raw = False, remove_id_field = True, **kwargs): ''' See doc of :py:meth:`.ResultStorageInterface.get_results` ''' if include_fields is not None and exclude_fields is not None: raise ValueError("include_fields and exclude_fields are mutually exclusive!") if include_fields is None: include_fields = [] if exclude_fields is None: exclude_fields = [] if where is None: where = {} # latest means enable sorting and only return one result if latest: sort = True n = 1 # create projection dict fields = [(p, 0) for p in exclude_fields] + [(p, 1) for p in include_fields] if remove_id_field: # we don't want the id field fields += [(RESOBJ_ID, 0)] select = dict(fields) # no projection criteria given, disable! # because empty dict means only id if not select: select = None where.update(self.create_where_clause(kwargs, from_gridfs = non_document)) try: res_cursor = None # get appropriate collection coll = self.__get_collection(gridfs_files_coll = non_document and not non_document_raw, gridfs_obj = non_document and non_document_raw) # pymongo 3.0 removed the as_class option in the collection.find method # this is the fix find_kwargs = {} if int(pymongo.version[0]) < 3: find_kwargs['as_class'] = OrderedDict # grid fs if non_document: if non_document_raw: log.debug("mongodb query: find(%s) on gridfs", where) res_cursor = coll.find(where) else: # using the gridfs files collection directly enables us projection an attributes log.debug("mongodb query: find(%s, %s) ", where, select) res_cursor = coll.find(where, select, **find_kwargs) # normal collection else: res_cursor = coll.find(where, select, **find_kwargs) log.debug("mongodb query: find(%s, %s) ", where, select) # enable sorting if wanted if sort: # construct sorting criteria structure, structure is different if using gridfs sort_crit = [( MongoUtil.get_attr_str(RESOBJ_SCRIPT_META, RESOBJ_SCRIPT_META_ANALYSIS_DATE, gridfs=non_document) , -1)] res_cursor = res_cursor.sort(sort_crit) # limit results if wanted if n is not None: res_cursor = res_cursor.limit(n) # generator that abstracts if normal collection or is gridfs if non_document: if non_document_raw: return res_cursor if distinct_key is not None: res_cursor = res_cursor.distinct(distinct_key) return res_cursor except PyMongoError as e: raise DatabaseLoadException(self, "find(%s, %s)", where, select, caused_by = e), None, sys.exc_info()[2]