class DBtool: ''' This class contains the core functions for interacting with the database and processing the files from directories specified during initialization. ''' def __init__(self, dbpath, dir_list=None, file_regex=None, preview_path=None, watch_directory=None, init_sql_path=None, video_cmd=None, zip_cmd=None): self.dir_list = dir_list self.file_regex = file_regex self.preview_path = preview_path self.preview_creation_threads = 4 self.watch_directory = watch_directory self.video_cmd = video_cmd self.zip_cmd = zip_cmd self.q = queue.Queue() self.work_cv = threading.Condition() self.sql_runner = SQLRunner(dbpath, self.work_cv, self.q) self.sql_runner.daemon = True self.sql_runner.start() self.preview_creator = PreviewCreator() self.update_running = False # There probably is a somewhat rare bug happening here, which causes the # execution of queries lock down. def run_sql(self, sql, params=None, commit=True): if not self.sql_runner.is_alive(): return cond = threading.Condition() res = list() job = [sql, params, res, cond, commit] with cond: with self.work_cv: self.q.put(job) self.work_cv.notify() cond.wait() return job[2] # TODO: proper error reporting def add_to_playlist(self, file_id_list, pl_name): '''Add files to a playlist. Upon completion returns a list of id's that were inserted. Arguments: file_id_list -- list of id's of files to be added to the playlist pl_name -- name of the playlist''' self.run_sql('INSERT OR IGNORE Into Playlist Values(NULL, ?)', (pl_name,), True) inserted = list() for i, e in enumerate(file_id_list): commit = False if i == len(file_id_list)-1: commit = True query = ('''INSERT INTO FilePlaylist (file_id, playlist_id) SELECT ?, Playlist.id FROM Playlist WHERE Playlist.name=?''') res = self.run_sql(query, (e, pl_name), commit) if res != None: inserted.append(e) return inserted def change_paths(self, id_path_pairs): '''Change paths of files in the database. id_path_pairs should be a list of dicts of format {'id':file_id, 'old_path':old_path, 'new_path':new_path}''' res = list() for p in id_path_pairs: file_id = int(p['id']) destpath = p['new_path'] u_res = self.run_sql('UPDATE File SET Name=? WHERE id=?', (destpath, file_id)) if u_res: res.append({'id':p['id'], 'path':'new_path'}) else: logging.error('Error updating file name withd id %d to %s', file_id, destpath) return res def create_preview(self, file_id, path=None, outpath=None, force=False): if not path: entry = self.run_sql('''SELECT File.name,Preview.path FROM File LEFT JOIN Preview ON File.id=Preview.file_id WHERE File.id=?''', (file_id,)) path = entry[0][0] if entry[0][1] and not force: logging.info('Preview already exists for file with id %s. Skipping overwrite', file_id) return entry[0][1] if path.find('.zip') != -1: return fname = os.path.basename(path) m = hashlib.md5() m.update(bytes(fname, 'utf-8')) outpath = os.path.join(self.preview_path, '{0}.{1}'.format(m.hexdigest(), 'mp4')) res = self.preview_creator.create_preview(path, outpath) print("PREVIEW CREATED", res) if res: self.run_sql('INSERT OR IGNORE INTO Preview VALUES(?, ?)', (file_id, outpath), False) self.run_sql('UPDATE Preview SET Path=? WHERE file_id=?', (outpath, file_id), True) return res def delete_files(self, files_ids): '''Permanently delete files from hard disk and all data associated to them from the database Arguments: files_ids -- list of dicts of format {'id':file_id, 'name':file_name} ''' res = list() for i in files_ids: p = (i,) path = None try: path = self.run_sql('SELECT name FROM File WHERE id=?', (i,), False)[0][0] except: pass self.run_sql('DELETE FROM File WHERE id=?', p, False) self.run_sql('DELETE FROM Viewcount WHERE file_id=?', p, False) self.run_sql('DELETE FROM Preview WHERE file_id=?', p, False) self.run_sql('DELETE FROM FilePlaylist WHERE file_id=?', p, True) try: os.remove(path) except: print('Error when attempting to remove file "{}"'.format(path)) res.append(i) return res def __get_join_type(self, join_str): allowed_types = ['LEFT JOIN', 'JOIN', 'RIGHT JOIN', 'FULL JOIN'] if join_str in allowed_types: return join_str return 'JOIN' def search_files(self, file_terms=None, file_terms_not=None, view_count_min=None, view_count_max=None, size_min=None, size_max=None, added_min=None, added_max=None, playlist=None, playlist_not=None): query_params = list() query_str = "" clause_list = list() if file_terms: clause_list.extend(['File.name LIKE ?' for f in file_terms]) query_params.extend(['%{}%'.format(f) for f in file_terms]) if file_terms_not: clause_list.extend(['File.name NOT LIKE ?' for f in file_terms_not]) query_params.extend(['%{}%'.format(f) for f in file_terms_not]) if view_count_min: clause_list.append('Viewcount.views > ?') query_params.append(view_count_min) if view_count_max: clause_list.append('Viewcount.views < ?') query_params.append(view_count_max) if size_min: clause_list.append('File.fsize > ?') query_params.append(size_min) if size_max: clause_list.append('File.fsize < ?') query_params.append(size_max) if added_min: clause_list.append('File.added > ?') query_params.append(added_min) if added_max: clause_list.append('File.added < ?') query_params.append(added_max) if not any([playlist, playlist_not]): query_str = ('''SELECT File.id,File.name,File.fsize,Preview.path,Viewcount.views FROM File LEFT JOIN Preview ON File.id=Preview.file_id LEFT JOIN Viewcount ON File.id=Viewcount.file_id WHERE {} '''.format(' AND '.join(clause_list))) else: playlist_q = list() playlist_params = list() if playlist: playlist_q.append('''File.id IN (SELECT FilePlaylist.file_id FROM Playlist,FilePlaylist WHERE Playlist.id=FilePlaylist.playlist_id AND Playlist.name=?)''') playlist_params.append(playlist) if playlist_not: playlist_q.append('''File.id NOT IN (SELECT FilePlaylist.file_id FROM Playlist,FilePlaylist WHERE Playlist.id=FilePlaylist.playlist_id AND Playlist.name=?)''') playlist_params.append(playlist_not) query_params = playlist_params + query_params query_str = ('''SELECT File.id,File.name,File.fsize,Preview.path,Viewcount.views FROM File LEFT JOIN Preview ON File.id=Preview.file_id LEFT JOIN Viewcount ON File.id=Viewcount.file_id WHERE {}'''.format(' AND '.join(playlist_q))) print("DEBUG QUERY LEN, PLAYLIST_Q_LEN", len(query_params), len(playlist_q)) if len(query_params)-len(playlist_q) > 0: query_str += ' AND {}'.format(' AND '.join(clause_list)) res = self.run_sql(query_str, query_params) return res def insert_files(self, flist): ''' Insert files to the database. Files should be passed as a list of 3-tuples of format (path, size_bytes, date_added)''' commit = False for i, e in enumerate(flist): if i == len(flist)-1: commit = True res_q = self.run_sql('INSERT INTO File Values(NULL, ?, ?, datetime(?))', (e[0], e[1], e[2],), commit) if res_q != None: vc = self.run_sql('''INSERT INTO Viewcount (file_id, views) SELECT id, 0 FROM File WHERE File.name=?''', (e[0],), commit) def open_file(self, fileId): res = self.run_sql('SELECT Name FROM File where id=?', (fileId,)) for row in res: # TODO: Fix hard-coding and open the file based on mime-type logging.debug('Opening file %s', row[0]) if row[0].lower().find('.zip') != -1: subprocess.Popen([self.zip_cmd, row[0]], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) else: subprocess.Popen([self.video_cmd, row[0]], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) break self.run_sql('UPDATE Viewcount SET views=views+1 WHERE file_id=?',(fileId,)) self.run_sql('INSERT OR IGNORE INTO Viewcount Values(?, ?)', (fileId, 1)) # TODO: perform this using batch insertion def update_database(self): '''Add new files to the database''' if self.update_running: return None self.update_running = True res = dict() res['inserted_bytes'] = 0 res['inserted_files'] = 0 for d in self.dir_list: print('Updating from "{0}"'.format(d)) dw = DirWalker(d, self.file_regex) dw.walk() for i, e in enumerate(dw.get_path_list()): commit = False if i == len(dw.get_path_list())-1: commit = True res_q = self.run_sql('INSERT INTO File Values(NULL, ?, ?, datetime(?))', (e[0], e[1], e[2]), commit) if res_q != None: self.run_sql('''INSERT OR IGNORE INTO Viewcount (file_id, views) SELECT id, 0 FROM File WHERE File.name=?''', (e[0],), False) res['inserted_bytes'] += e[1] res['inserted_files'] += 1 self.update_running = False return res def clean_preview_entries(self): '''Remove paths to nonexisting preview files''' res = [x for x in self.conn.execute('SELECT * From Preview')] for e in res: if not os.path.exists(e[1]): self.conn.execute('DELETE FROM Preview WHERE fileId=?', (e[0], )) logging.debug('Clearing preview %d %s', e[0], e[1]) self.conn.commit() def get_disk_files(self): '''Recursively get all files from directories specified in the list self.dir_list returns a dictionary in the format: {file_name:file_size_bytes}''' path_entries = dict() for d in self.dir_list: print('Scanning files from directory "{}"'.format(d)) dw = DirWalker(d, self.file_regex) dw.walk() path_entries.update({ x[0]:x[1] for x in dw.get_path_list()}) return path_entries def get_disk_files2(self): '''Recursively get all files from directories specified in the list self.dir_list. Returns a list of tuples in the format: [(path1,size1), (path2,size2), ... ]''' print(self.dir_list) for d in self.dir_list: dw = DirWalker(d, self.file_regex) dw.walk() return dw.get_path_list() def get_duplicates(self): '''Return list of database entries of files that are similar in size but differ in path name''' get_all_q = 'SELECT id, name, fsize FROM File ORDER BY fsize' db_entries = [x for x in self.conn.execute(get_all_q)] record = False res = dict() key = None for i in range(0, len(db_entries)-2): e1 = db_entries[i] e2 = db_entries[i+1] diff = abs(e1[2]-e2[2]) if diff < 100 and e1[1] != e2[1]: record = True if not key: key = e1 else: record = False key = None if record: if key not in res: res[key] = list() res[key].append(e2) return res def get_stats(self): stat_q = 'SELECT fileId, Name, SUM(fsize), COUNT(fileId) FROM File' res = [x for x in self.conn.execute(stat_q)] return {'file_sizes': res[0][2], 'file_count': res[0][3]} def find_missing_entries(self): '''Return missing entries from database and find files that are similar in size from hard disk.. Upon completion returns a dict in the format: {db_entry: [(path1, size1), (path2, size2) ...]}''' logging.info('Scanning directories...') path_entries = self.get_disk_files() db_entries = self.run_sql('SELECT id, fsize, name FROM File') missing_entries = list() for d in db_entries: if not d[2] in path_entries: missing_entries.append(d) else: del path_entries[d[2]] def size_check(*args): return args[1] == args[0] path_entries = [(x, path_entries[x]) for x in path_entries] res = get_dupes_bsearch(missing_entries, path_entries, is_dupe=size_check, cmpr_func=lambda x: x[1], get_attr=lambda x: x[1]) return res def repair(self): if self.update_running: return {} self.update_running = True missing = self.find_missing_entries() self.update_running = False return missing def mark_as_viewed(self, file_id_list): ''' Set the view counts of files, with id's given in file_id_list to 1. If a file already has a view count greater than 1, then that view count is let unchanged. Returns the list of id's that were marked. NOTICE: currently the files are correctly marked, but UPDATE statement is considered incorrectly to fail, because it doesn't return anything upon succesful execution. ''' if len(file_id_list) == 0: return list() ids = tuple([int(i) for i in file_id_list]) query = 'SELECT file_id FROM Viewcount WHERE file_id IN ({}) AND views=0' query = query.format(','.join(['?' for i in file_id_list])) files = [x[0] for x in self.run_sql(query, ids)] update = 'UPDATE Viewcount SET views=1 WHERE file_id IN ({})' update = update.format(','.join(['?' for i in files])) res = self.run_sql(update, tuple(files)) return files def get_playlist(self, name): '''Get all files from a playlist''' query = ('''SELECT File.id, File.name, File.fsize, Preview.path, Viewcount.views From File LEFT JOIN Preview ON File.id=Preview.file_id LEFT JOIN Viewcount ON File.id=Viewcount.file_id WHERE File.id in (SELECT FilePlaylist.file_id FROM FilePlaylist,Playlist WHERE Playlist.id=FilePlaylist.playlist_id AND Playlist.name=?) ''') res = self.run_sql(query, (name,)) return res