def file_duplicate_no(self, name: str, path: str, remote_id: str) -> (int, DatabaseMedia): """ determine if there is already an entry for file. If not determine if other entries share the same path/filename and determine a duplicate number for providing a unique local filename suffix Returns: duplicate no. (zero if there are no duplicates), Single row from the SyncRow table """ query = "SELECT {0} FROM SyncFiles WHERE RemoteId = ?; ". \ format(GooglePhotosRow.columns) self.cur.execute(query, (remote_id, )) result = self.cur.fetchone() if result: # return the existing file entry's duplicate no. return result['DuplicateNo'], GooglePhotosRow(result).to_media() self.cur.execute( "SELECT MAX(DuplicateNo) FROM SyncFiles " "WHERE Path = ? AND OrigFileName = ?;", (path, name)) results = self.cur.fetchone() if results[0] is not None: # assign the next available duplicate no. dup = results[0] + 1 return dup, None else: # the file is new and has no duplicates return 0, None
def get_extra_meta(self): count = 0 log.warning('updating index with extra metadata for comparison ' '(may take some time) ...') media_items = self._db.get_rows_by_search( GooglePhotosRow, uid='ISNULL') for item in media_items: file_path = self._root_folder / item.relative_path # if this item has a uid it has been scanned before if file_path.exists(): local_file = LocalFilesMedia(file_path) count += 1 log.info('updating metadata %d on %s', count, file_path) item.update_extra_meta(local_file.uid, local_file.create_date, local_file.size) # erm lets try some duck typing then ! # todo is the DbRow class model rubbish or brilliant Python? # noinspection PyTypeChecker self._db.put_row(GooglePhotosRow.from_media(item), update=True) if count % 2000 == 0: self._db.store() else: log.debug('skipping metadata (already scanned) on %s', file_path) log.warning('updating index with extra metadata complete')
def get_extra_paths(self): self.cur2.execute(Queries.pre_extra_files) self.cur2.execute(Queries.extra_files) while True: records = self.cur2.fetchmany(LocalData.BLOCK_SIZE) if not records: break for record in records: r = GooglePhotosRow(record).to_media() pth = r.relative_path.parent / r.filename yield pth
def write_media_index(self, media: GooglePhotosMedia, update: bool = True): self._db.put_row(GooglePhotosRow.from_media(media), update) if media.create_date > self.latest_download: self.latest_download = media.create_date