def testMigrateReclaim(self): dataset = generate_dataset() experiment = generate_experiment([dataset], [self.dummy_user]) datafile = generate_datafile(None, dataset, "Hi mum") datafile2 = generate_datafile(None, dataset, "Hi mum") datafile3 = generate_datafile(None, dataset, "Hi mum") out = StringIO() try: call_command('migratefiles', 'reclaim', '11', stdout=out, verbosity=2, dryRun=True) except SystemExit: pass out.seek(0) self.assertEquals(out.read(), 'Would have migrated %s / %s saving 6 bytes\n' 'Would have migrated %s / %s saving 6 bytes\n' 'Would have reclaimed 12 bytes\n' % (datafile.url, datafile.id, datafile2.url, datafile2.id)) out = StringIO() try: call_command('migratefiles', 'reclaim', '11', stdout=out, verbosity=2) except SystemExit: pass out.seek(0) self.assertEquals(out.read(), 'Migrating %s / %s saving 6 bytes\n' 'Migrating %s / %s saving 6 bytes\n' 'Reclaimed 12 bytes\n' % (datafile.url, datafile.id, datafile2.url, datafile2.id))
def testMigrateExperiment(self): dataset = generate_dataset() experiment = generate_experiment([dataset], [self.dummy_user]) datafile = generate_datafile(None, dataset, "Hi mum") datafile2 = generate_datafile(None, dataset, "Hi mum") datafile3 = generate_datafile(None, dataset, "Hi mum") out = StringIO() try: call_command('migratefiles', 'migrate', 'experiment', experiment.id, verbosity=2, stdout=out) except SystemExit: pass out.seek(0) self.assertEquals(out.read(), 'Migrated datafile %s\n' 'Migrated datafile %s\n' 'Migrated datafile %s\n' % (datafile.id, datafile2.id, datafile3.id)) out = StringIO() try: call_command('migratefiles', 'restore', 'experiment', experiment.id, verbosity=2, stdout=out) except SystemExit: pass out.seek(0) self.assertEquals(out.read(), 'Restored datafile %s\n' 'Restored datafile %s\n' 'Restored datafile %s\n' % (datafile.id, datafile2.id, datafile3.id))
def testMirrorDatafile(self): dataset = generate_dataset() experiment = generate_experiment([dataset], [self.dummy_user]) datafile = generate_datafile(None, dataset, "Hi grandpa") # Dry run ... out = StringIO() try: call_command('migratefiles', 'mirror', 'datafile', datafile.id, verbosity=1, stdout=out, dryRun=True) except SystemExit: pass out.seek(0) self.assertEquals(out.read(), 'Would have mirrored datafile %s\n' % datafile.id) # Do it out = StringIO() try: call_command('migratefiles', 'mirror', 'datafile', datafile.id, verbosity=2, stdout=out) except SystemExit: pass out.seek(0) self.assertEquals(out.read(), 'Mirrored datafile %s\n' % datafile.id)
def testErrors(self): dataset = generate_dataset() experiment = generate_experiment([dataset], [self.dummy_user]) datafile = generate_datafile(None, dataset, "Hi mum") err = StringIO() try: call_command('migratefiles', 'migrate', 'datafile', 999, stderr=err) except SystemExit: pass err.seek(0) self.assertEquals(err.read(), 'Datafile 999 does not exist\n' 'Error: No Datafiles selected\n') err = StringIO() try: call_command('migratefiles', 'migrate', 'datafile', datafile.id, dest='nowhere', stderr=err) except SystemExit: pass err.seek(0) self.assertEquals(err.read(), 'Error: Destination nowhere not known\n') err = StringIO() try: call_command('migratefiles', 'restore', 'datafile', datafile.id, dest='test', stderr=err) except SystemExit: pass err.seek(0) self.assertEquals(err.read(), 'Error: The --dest option cannot ' 'be used with the restore subcommand\n')
def testMigrationNoHashes(self): # Tweak the server to turn off the '?metadata' query self.server.server.allowQuery = False dest = Destination.get_destination('test') datafile = generate_datafile("1/2/3", self.dataset, "Hi mum") self.assertEquals(datafile.verify(allowEmptyChecksums=True), True) datafile.save() path = datafile.get_absolute_filepath() self.assertTrue(os.path.exists(path)) migrate_datafile(datafile, dest) self.assertFalse(os.path.exists(path))
def testMigrateStoreWithSpaces(self): dest = Destination.get_destination('test') datafile = generate_datafile('1/1/Hi Mum', self.dataset, "Hi mum") datafile2 = generate_datafile('1/1/Hi Dad', self.dataset, "Hi dad") path = datafile.get_absolute_filepath() self.assertTrue(os.path.exists(path)) path2 = datafile.get_absolute_filepath() self.assertTrue(os.path.exists(path2)) # Migrate them migrate_datafile(datafile, dest) self.assertFalse(os.path.exists(path)) migrate_datafile(datafile2, dest) self.assertFalse(os.path.exists(path2)) # Bring them back restore_datafile(datafile) self.assertTrue(os.path.exists(path)) restore_datafile(datafile2) self.assertTrue(os.path.exists(path2))
def testScore(self): dataset = generate_dataset() experiment = generate_experiment([dataset], [self.dummy_user]) datafile = generate_datafile(None, dataset, "Hi mum") datafile2 = generate_datafile(None, dataset, "Hi mum") datafile3 = generate_datafile(None, dataset, "Hi mum") out = StringIO() try: call_command('migratefiles', 'score', stdout=out) except SystemExit: pass out.seek(0) self.assertEquals(out.read(), 'datafile %s / %s, size = 6, ' 'score = 0.778151250384, total_size = 6\n' 'datafile %s / %s, size = 6, ' 'score = 0.778151250384, total_size = 12\n' 'datafile %s / %s, size = 6, ' 'score = 0.778151250384, total_size = 18\n' % (datafile.url, datafile.id, datafile2.url, datafile2.id, datafile3.url, datafile3.id))
def testMirror(self): dest = Destination.get_destination('test') datafile = generate_datafile(None, self.dataset, "Hi granny") path = datafile.get_absolute_filepath() self.assertTrue(os.path.exists(path)) url = dest.provider.generate_url(datafile) try: dest.provider.get_length(url) assertFail() except HTTPError as e: if e.code != 404: raise e self.assertTrue(migrate_datafile(datafile, dest, noUpdate=True)) datafile = Dataset_File.objects.get(id=datafile.id) self.assertTrue(datafile.is_local()) self.assertEquals(dest.provider.get_length(url), 9)
def _setup(self): self.user1 = generate_user('joe', 2) self.user2 = generate_user('fred', 1) self.exp1 = generate_experiment(users=[self.user1, self.user2]) self.exp2 = generate_experiment(users=[self.user1]) self.exp3 = generate_experiment(users=[self.user1]) self.exp4 = generate_experiment(users=[self.user1]) self.ds1 = generate_dataset(experiments=[self.exp1]) self.ds2 = generate_dataset(experiments=[self.exp1, self.exp2]) self.ds3 = generate_dataset(experiments=[self.exp3]) self.ds4 = generate_dataset(experiments=[self.exp4]) self.df1 = generate_datafile('1/2/1', self.ds1, size=100) self.df2 = generate_datafile('1/2/2', self.ds1, size=100, verified=False) self.df3 = generate_datafile('http://foo.com/1/2/3', self.ds1, size=1000) self.df4 = generate_datafile('1/2/4', self.ds2, size=1000) self.df5 = generate_datafile('1/2/5', self.ds2, size=10000) self.df6 = generate_datafile('1/2/6', self.ds3, size=100000) self.df7 = generate_datafile('1/2/7', self.ds4, size=0) self.df8 = generate_datafile('1/2/8', self.ds4, size=-1)
def testMigrateRestore(self): dest = Destination.get_destination('test') datafile = generate_datafile(None, self.dataset, "Hi mum", verify=False) # Attempt to migrate without datafile hashes ... should # fail because we can't verify. with self.assertRaises(MigrationError): migrate_datafile(datafile, dest) # Verify sets hashes ... self.assertEquals(datafile.verify(allowEmptyChecksums=True), True) datafile.save() path = datafile.get_absolute_filepath() self.assertTrue(os.path.exists(path)) self.assertTrue(migrate_datafile(datafile, dest)) self.assertFalse(os.path.exists(path)) # Bring it back url = datafile.url self.assertTrue(restore_datafile(datafile)) self.assertTrue(os.path.exists(path)) # Check it was deleted remotely try: dest.provider.get_length(url) assertFail() except HTTPError as e: if e.code != 404: raise e # Refresh the datafile object because it is now stale ... datafile = Dataset_File.objects.get(id=datafile.id) # Repeat the process with 'noRemove' self.assertTrue(migrate_datafile(datafile, dest, noRemove=True)) self.assertTrue(os.path.exists(path)) self.assertEquals(dest.provider.get_length(url), 6) self.assertTrue(restore_datafile(datafile, noRemove=True)) self.assertTrue(os.path.exists(path)) self.assertEquals(dest.provider.get_length(url), 6)
def do_provider(self, dest): provider = dest.provider base_url = dest.base_url datafile = generate_datafile("1/2/3", self.dataset, "Hi mum") self.assertEquals(datafile.verify(allowEmptyChecksums=True), True) url = provider.generate_url(datafile) self.assertEquals(url, base_url + '1/2/3') provider.put_file(datafile, url) self.assertEqual(provider.get_file(url), "Hi mum") with self.assertRaises(MigrationProviderError): provider.get_file('http://foo/data/1/2/4') with self.assertRaises(HTTPError): provider.get_file(base_url + '1/2/4') self.assertEqual(provider.get_length(url), 6) with self.assertRaises(MigrationProviderError): provider.get_length('http://foo/data/1/2/4') with self.assertRaises(HTTPError): provider.get_length(base_url + '1/2/4') try: self.assertEqual(provider.get_metadata(url), {'sha512sum' : '2274cc8c16503e3d182ffaa835c543b' + 'ce278bc8fc971f3bf38b94b4d9db44cd89c8f36d4006e' + '5abea29bc05f7f0ea662cb4b0e805e56bbce97f00f94e' + 'a6e6498', 'md5sum' : '3b6b51114c3d0ad347e20b8e79765951', 'length' : 6}) with self.assertRaises(MigrationProviderError): provider.get_metadata('http:/foo/data/1/2/4') with self.assertRaises(HTTPError): provider.get_metadata(base_url + '1/2/4') except NotImplementedError: pass provider.remove_file(url) with self.assertRaises(MigrationProviderError): provider.get_length('http://foo/data/1/2/4') with self.assertRaises(HTTPError): provider.remove_file(url)
def testMigrateConfig(self): dataset = generate_dataset() experiment = generate_experiment([dataset], [self.dummy_user]) datafile = generate_datafile(None, dataset, "Hi mum") try: saved = settings.DEFAULT_MIGRATION_DESTINATION settings.DEFAULT_MIGRATION_DESTINATION = '' err = StringIO() try: call_command('migratefiles', 'migrate', 'datafile', datafile.id, stderr=err) except SystemExit: pass err.seek(0) self.assertEquals(err.read(), 'Error: No default destination configured\n') finally: settings.DEFAULT_MIGRATION_DESTINATION = saved try: saved = settings.MIGRATION_DESTINATIONS settings.MIGRATION_DESTINATIONS = [] Destination.clear_destinations_cache() err = StringIO() try: call_command('migratefiles', 'migrate', 'datafile', datafile.id, stderr=err) except SystemExit: pass err.seek(0) self.assertEquals(err.read(), 'Error: Migration error: No destinations ' 'have been configured\n') finally: settings.MIGRATION_DESTINATIONS = saved
def testMigrateDatafile(self): dataset = generate_dataset() experiment = generate_experiment([dataset], [self.dummy_user]) datafile = generate_datafile(None, dataset, "Hi mum", verify=False, verified=False) datafile2 = generate_datafile(None, dataset, "Hi mum") datafile3 = generate_datafile(None, dataset, "Hi mum") err = StringIO() try: call_command('migratefiles', 'migrate', 'datafile', datafile.id, stderr=err) except SystemExit: pass err.seek(0) self.assertEquals(err.read(), 'Migration failed for datafile %s : ' \ 'Only verified datafiles can be migrated ' \ 'to this destination\n' % datafile.id) self.assertEquals(datafile.verify(allowEmptyChecksums=True), True) datafile.save() # (Paths should all be kosher now ...) path = datafile.get_absolute_filepath() path2 = datafile2.get_absolute_filepath() path3 = datafile3.get_absolute_filepath() for p in [path, path2, path3]: self.assertTrue(os.path.exists(p)) # Dry run ... out = StringIO() try: call_command('migratefiles', 'migrate', 'datafile', datafile.id, verbosity=1, stdout=out, dryRun=True) except SystemExit: pass out.seek(0) self.assertEquals(out.read(), 'Would have migrated datafile %s\n' % datafile.id) for p in [path, path2, path3]: self.assertTrue(os.path.exists(p)) # Real run, verbose (migrates 1) out = StringIO() try: call_command('migratefiles', 'migrate', 'datafile', datafile.id, verbosity=2, stdout=out) except SystemExit: pass out.seek(0) self.assertEquals(out.read(), 'Migrated datafile %s\n' % datafile.id) for p in [path, path2, path3]: self.assertTrue(os.path.exists(p) == (p != path)) # Real run, normal (migrates 2 & 3) out = StringIO() try: call_command('migratefiles', 'migrate', 'datafile', datafile2.id, datafile3.id, verbosity=1, stdout=out) except SystemExit: pass out.seek(0) self.assertEquals(out.read(), '') for p in [path, path2, path3]: self.assertFalse(os.path.exists(p)) # Cannot migrate a file that is not local (now) err = StringIO() try: call_command('migratefiles', 'migrate', 'datafile', datafile.id, verbosity=2, stderr=err) except SystemExit: pass err.seek(0) self.assertEquals(err.read(), '') # Should "fail" silently # Real restore, verbose (restores 1, 2 & 3) out = StringIO() try: call_command('migratefiles', 'restore', 'datafile', datafile.id, datafile2.id, datafile3.id, verbosity=2, stdout=out) except SystemExit: pass out.seek(0) self.assertEquals(out.read(), 'Restored datafile %s\n' 'Restored datafile %s\n' 'Restored datafile %s\n' % (datafile.id, datafile2.id, datafile3.id)) for p in [path, path2, path3]: self.assertTrue(os.path.exists(p)) # Cannot restore files that are (now) local out = StringIO() try: call_command('migratefiles', 'restore', 'datafile', datafile.id, verbosity=2, stdout=out) except SystemExit: pass out.seek(0) self.assertEquals(out.read(), '') # Fail quietly ... not remote # Now try migrating with 'no remove' out = StringIO() try: call_command('migratefiles', 'migrate', 'datafile', datafile.id, datafile2.id, datafile3.id, noRemove=True, verbosity=2, stdout=out) except SystemExit: pass out.seek(0) self.assertEquals(out.read(), 'Migrated datafile %s\n' 'Migrated datafile %s\n' 'Migrated datafile %s\n' % (datafile.id, datafile2.id, datafile3.id)) for p in [path, path2, path3]: self.assertTrue(os.path.exists(p)) # When we bring them back now, the local pathnames should change # because the staging code won't clobber an existing file. out = StringIO() try: call_command('migratefiles', 'restore', 'datafile', datafile.id, datafile2.id, datafile3.id, verbosity=2, stdout=out) except SystemExit: pass out.seek(0) self.assertEquals(out.read(), 'Restored datafile %s\n' 'Restored datafile %s\n' 'Restored datafile %s\n' % (datafile.id, datafile2.id, datafile3.id)) for p, d in [(path, datafile), (path2, datafile2), (path3, datafile3)]: dd = Dataset_File.objects.get(id=d.id) self.assertTrue(os.path.exists(p)) self.assertTrue(os.path.exists(dd.get_absolute_filepath())) self.assertNotEqual(p, dd.get_absolute_filepath()) self.assertNotEqual(d.get_absolute_filepath(), dd.get_absolute_filepath())