def _migrate_resource(self, contribution, revision, resource, created_dt, ignored_checksums): storage_backend, storage_path, size, md5 = self._get_local_file_info( resource, force_access=True) content_type = mimetypes.guess_type( resource.fileName)[0] or 'application/octet-stream' if not storage_path: self.print_error("%[red!]File not accessible [{}]".format( convert_to_unicode(resource.fileName))) return paper_file = PaperFile(filename=resource.fileName, content_type=content_type, size=size, md5=md5, storage_backend=storage_backend, storage_file_id=storage_path, created_dt=created_dt) # check whether the same file has been uploaded to a subsequent revision if md5: self.checksum_map[md5] = paper_file collision = self.file_checksums.get(md5) if collision: ignored_checksums.add(md5) self.print_warning( '%[yellow!]File {} (rev. {}) already in revision {}'. format(convert_to_unicode(resource.fileName), revision.id if revision else None, collision.id)) return else: self.file_checksums[md5] = revision else: self.print_error( "%[red!]File not accessible; can't MD5 it [{}]".format( convert_to_unicode(paper_file.filename))) paper_file._contribution = contribution paper_file.paper_revision = revision db.session.add(paper_file) return paper_file
def _migrate_resource(self, old_contrib, contribution, revision, resource, created_dt, ignored_checksums): storage_backend, storage_path, size = self._get_local_file_info( resource) content_type = mimetypes.guess_type( resource.fileName)[0] or 'application/octet-stream' paper_file = PaperFile(filename=resource.fileName, content_type=content_type, size=size, storage_backend=storage_backend, storage_file_id=storage_path, created_dt=created_dt, paper_revision=revision) # check whether the same file has been uploaded to a subsequent revision try: with paper_file.open() as f: checksum = crc32(f.read()) self.checksum_map[checksum] = paper_file collision = self.file_checksums.get(checksum) if collision: ignored_checksums.add(checksum) self.print_warning( '%[yellow!]File {} (rev. {}) already in revision {}'. format(resource.filename, revision.id if revision else None, collision.id)) return else: self.file_checksums[checksum] = revision except (RuntimeError, StorageError): self.print_error( "%[red!]File not accessible; can't CRC it [{}]".format( convert_to_unicode(paper_file.filename))) paper_file._contribution = contribution db.session.add(paper_file) return paper_file