def get_file_info(param, param_type='id', db_location = None, standalone = False): result = None try: if not param: raise Exception('Unspecified param') if param_type not in ['id', 'path']: raise Exception('Invalid param type') if param_type == 'id': query = 'select * from file_info where id="%s"'%param elif param_type == 'path': query = 'select * from file_info where path="%s"'%param if not db_location: db_location, err = scan_utils.get_db_location(standalone) if err: raise Exception(err) result, err = db.get_single_row(db_location, query) if err: raise Exception(err) if 'last_modify_time' in result: tm_str, err = datetime_utils.convert_from_epoch(result['last_modify_time'], return_format='str', str_format='%c', to='local') if err: raise Exception(err) result['last_modify_time_str'] = tm_str if 'size' in result: result['size_human_readable'] = filesize.get_naturalsize(result['size']) except Exception, e: return None, 'Error retrieving duplicate file sets : %s'%str(e)
def get_file_info_query_results(query_type, result_count=20, scan_configuration_id=None, db_location = None, standalone = False, param1 = None): results = None try: if scan_configuration_id: if query_type == 'largest_files': query = 'select * from file_info where scan_configuration_id = "%d" order by size desc limit %d'%(scan_configuration_id, result_count) elif query_type == 'oldest_files': query = 'select * from file_info where scan_configuration_id = "%d" order by last_modify_time limit %d'%(scan_configuration_id, result_count) elif query_type == 'newest_files': query = 'select * from file_info where scan_configuration_id = "%d" order by last_modify_time desc limit %d'%(scan_configuration_id, result_count) elif query_type == 'extension_counts': query = 'select extension, count(*) as count from file_info where scan_configuration_id = "%d" group by extension having (count(*) > 0 and id != 0) order by count desc'%scan_configuration_id elif query_type == 'duplicate_sets': query = 'select checksum, size, count(checksum) as dup_count from file_info where scan_configuration_id="%d" and checksum is not null group by checksum, size order by size desc, dup_count desc;'%scan_configuration_id elif query_type == 'duplicate_files': if not param1: raise Exception('Invalid request') query = 'select * from file_info where scan_configuration_id="%d" and checksum ="%s";'%(scan_configuration_id, param1) else: if query_type == 'largest_files': query = 'select * from file_info order by size desc limit %d'%(result_count) elif query_type == 'oldest_files': query = 'select * from file_info order by last_modify_time limit %d'%(result_count) elif query_type == 'newest_files': query = 'select * from file_info order by last_modify_time desc limit %d'%(result_count) elif query_type == 'extension_counts': query = 'select extension, count(*) as count from file_info group by extension having (count(*) > 0 and id != 0) order by count desc' elif query_type == 'duplicate_sets': query = 'select checksum, size, count(checksum) as dup_count from file_info where checksum is not null group by checksum, size order by size desc, dup_count desc;' elif query_type == 'duplicate_files': if not param1: raise Exception('Invalid request') query = 'select * from file_info where checksum ="%s";'%(param1) if not db_location: db_location, err = scan_utils.get_db_location(standalone) if err: raise Exception(err) tmp_results, err = db.get_multiple_rows(db_location, query) if err: raise Exception(err) if query_type == 'duplicate_sets': results = [] for result in tmp_results: if result['dup_count'] >= 2: results.append(result) else: results = tmp_results for result in results: if 'last_modify_time' in result: tm_str, err = datetime_utils.convert_from_epoch(result['last_modify_time'], return_format='str', str_format='%c', to='local') if err: raise Exception(err) result['last_modify_time_str'] = tm_str if 'size' in result: result['size_human_readable'] = filesize.get_naturalsize(result['size']) except Exception, e: return None, 'Error retrieving general query results: %s'%str(e)
def get_extension_counts(scan_configuration_id=None, db_location = None, standalone = False): results = None try: if scan_configuration_id: query = 'select extension, count(*) as count from file_info where scan_configuration_id = "%d" group by extension having (count(*) > 0 and id != 0) order by count desc'%scan_configuration_id else: query = 'select extension, count(*) as count from file_info group by extension having (count(*) > 0 and id != 0) order by count desc' if not db_location: db_location, err = scan_utils.get_db_location(standalone) if err: raise Exception(err) results, err = db.get_multiple_rows(db_location, query) if err: raise Exception(err) except Exception, e: return None, 'Error retrieving extension counts: %s'%str(e)
def get_unique_extensions(scan_configuration_id=None, db_location = None, standalone = False): extensions = None try: if scan_configuration_id: query = 'select distinct(extension) from file_info where scan_configuration_id = "%d" '%scan_configuration_id else: query = 'select distinct(extension) from file_info' if not db_location: db_location, err = scan_utils.get_db_location(standalone) if err: raise Exception(err) results, err = db.get_multiple_rows(db_location, query) if err: raise Exception(err) if results: extensions = [result['extension'] for result in results] except Exception, e: return None, 'Error retrieving unique extensions: %s'%str(e)
def find_files(file_name_pattern, scan_configuration_id=None, db_location = None, standalone = False): results = None try: fnp = file_name_pattern.replace('*', '%') if scan_configuration_id: query = 'select * from file_info where scan_configuration_id = %d and path like "%%%s"'%(scan_configuration_id, fnp) else: query = 'select * from file_info where path like "%%%s"'%(fnp) print query if not db_location: db_location, err = scan_utils.get_db_location(standalone) if err: raise Exception(err) results, err = db.get_multiple_rows(db_location, query) if err: raise Exception(err) except Exception, e: return None, 'Error finding files : %s'%str(e)
def get_duplicate_sets(scan_configuration_id=None, db_location = None, standalone = False): results = None try: if scan_configuration_id: query = 'select checksum, size, count(checksum) as dup_count from file_info where scan_configuration_id="%d" and checksum is not null group by checksum, size order by size desc, dup_count desc;'%scan_configuration_id else: query = 'select checksum, size, count(checksum) as dup_count from file_info where checksum is not null group by checksum, size order by size desc, dup_count desc;' if not db_location: db_location, err = scan_utils.get_db_location(standalone) if err: raise Exception(err) results, err = db.get_multiple_rows(db_location, query) if err: raise Exception(err) for result in results: if result['dup_count'] < 2: results.remove(result) except Exception, e: return None, 'Error retrieving duplicate file sets : %s'%str(e)
def get_files_by_extension(extension, scan_configuration_id=None, db_location = None, standalone = False): results = None try: if scan_configuration_id: query = 'select * from file_info where scan_configuration_id = %d and extension="%s" order by size desc'%(scan_configuration_id, extension) else: query = 'select * from file_info where extension="%s" order by size desc'%extension if not db_location: db_location, err = scan_utils.get_db_location(standalone) if err: raise Exception(err) results, err = db.get_multiple_rows(db_location, query) if err: raise Exception(err) for result in results: if 'last_modify_time' in result: tm_str, err = datetime_utils.convert_from_epoch(result['last_modify_time'], return_format='str', str_format='%c', to='local') if err: raise Exception(err) result['last_modify_time_str'] = tm_str if 'size' in result: result['size_human_readable'] = filesize.get_naturalsize(result['size']) except Exception, e: return None, 'Error retrieving files by extension : %s'%str(e)