def fix_file_checksum(self, dry_run, **kwargs): self.stdout.write("Fixing missing checksums...") matching = models.Upload.objects.filter( Q(checksum=None) & (Q(audio_file__isnull=False) | Q(source__startswith="file://"))) total = matching.count() self.stdout.write( "[checksum] {} entries found with missing values".format(total)) if dry_run: return chunks = common_utils.chunk_queryset( matching.only("id", "audio_file", "source"), kwargs["batch_size"]) handled = 0 for chunk in chunks: updated = [] for upload in chunk: handled += 1 self.stdout.write("[checksum] {}/{} fixing file #{}".format( handled, total, upload.pk)) try: upload.checksum = common_utils.get_file_hash( upload.get_audio_file()) except Exception as e: self.stderr.write( "[checksum] error with file #{}: {}".format( upload.pk, str(e))) else: updated.append(upload) models.Upload.objects.bulk_update(updated, ["checksum"])
def test_get_file_hash(tmpfile, settings): settings.HASHING_ALGORITHM = "sha256" content = b"hello" tmpfile.write(content) # echo -n "hello" | sha256sum expected = "sha256:2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824" assert utils.get_file_hash(tmpfile) == expected
def check_upload(stdout, upload): try: audio_file = upload.get_audio_file() except (FileNotFoundError, PermissionError): stdout.write(" Removing file #{} missing from disk at {}".format( upload.pk, upload.source)) return upload.delete() checksum = common_utils.get_file_hash(audio_file) if upload.checksum != checksum: stdout.write( " File #{} at {} was modified, updating metadata…".format( upload.pk, upload.source)) if upload.library.actor_id != upload.track.attributed_to_id: stdout.write( " Cannot update track metadata, track belongs to someone else" ) else: track = models.Track.objects.select_related( "artist", "album__artist").get(pk=upload.track_id) try: tasks.update_track_metadata(upload.get_metadata(), track) except serializers.ValidationError as e: stdout.write(" Invalid metadata: {}".format(e)) return else: upload.checksum = checksum upload.save(update_fields=["checksum"])
def test_audio_track_checksum(name, factories): path = os.path.join(DATA_DIR, name) upload = factories["music.Upload"](audio_file__from_path=path, mimetype=None) with open(path, "rb") as f: expected = common_utils.get_file_hash(f) assert upload.checksum == expected
def handle_modified(event, stdout, library, in_place, **kwargs): existing_candidates = library.uploads.filter(import_status="finished") with open(event["path"], "rb") as f: checksum = common_utils.get_file_hash(f) existing = existing_candidates.filter(checksum=checksum).first() if existing: # found an existing file with same checksum, nothing to do stdout.write(" File already imported and metadata is up-to-date") return to_update = None if in_place: source = "file://{}".format(event["path"]) to_update = (existing_candidates.in_place().filter( source=source).select_related( "track__attributed_to", "track__artist", "track__album__artist", ).first()) if to_update: if (to_update.track.attributed_to and to_update.track.attributed_to != library.actor): stdout.write( " Cannot update track metadata, track belongs to someone else" ) return else: stdout.write( " Updating existing file #{} with new metadata…".format( to_update.pk)) audio_metadata = to_update.get_metadata() try: tasks.update_track_metadata(audio_metadata, to_update.track) except serializers.ValidationError as e: stdout.write(" Invalid metadata: {}".format(e)) else: to_update.checksum = checksum to_update.save(update_fields=["checksum"]) return stdout.write(" Launching import for new file") create_upload( path=event["path"], reference=kwargs["reference"], library=library, async_=False, replace=kwargs["replace"], in_place=in_place, dispatch_outbox=kwargs["dispatch_outbox"], broadcast=kwargs["broadcast"], )
def test_fix_uploads_checksum(factories, mocker): upload1 = factories["music.Upload"]() upload2 = factories["music.Upload"]() upload1.__class__.objects.filter(pk=upload1.pk).update(checksum="test") upload2.__class__.objects.filter(pk=upload2.pk).update(checksum=None) c = fix_uploads.Command() c.fix_file_checksum(dry_run=False, batch_size=100) upload1.refresh_from_db() upload2.refresh_from_db() # not updated assert upload1.checksum == "test" # updated assert upload2.checksum == common_utils.get_file_hash(upload2.audio_file)
def test_handle_modified_skips_existing_checksum(tmpfile, factories, mocker): stdout = mocker.Mock() event = { "path": tmpfile.name, } tmpfile.write(b"hello") library = factories["music.Library"]() factories["music.Upload"]( checksum=common_utils.get_file_hash(tmpfile), library=library, import_status="finished", ) import_files.handle_modified( event=event, stdout=stdout, library=library, in_place=True, ) assert library.uploads.count() == 1
def save(self, **kwargs): if not self.mimetype: if self.audio_file: self.mimetype = utils.guess_mimetype(self.audio_file) elif self.source and self.source.startswith("file://"): self.mimetype = utils.guess_mimetype_from_name(self.source) if not self.size and self.audio_file: self.size = self.audio_file.size if not self.checksum: try: audio_file = self.get_audio_file() except FileNotFoundError: pass else: if audio_file: self.checksum = common_utils.get_file_hash(audio_file) if not self.pk and not self.fid and self.library.actor.get_user(): self.fid = self.get_federation_id() return super().save(**kwargs)