Example #1
0
    def testMigrateReclaim(self):
        dataset = generate_dataset()
        experiment = generate_experiment([dataset], [self.dummy_user])
        datafile = generate_datafile(None, dataset, "Hi mum")
        datafile2 = generate_datafile(None, dataset, "Hi mum")
        datafile3 = generate_datafile(None, dataset, "Hi mum")

        out = StringIO()
        try:
            call_command('migratefiles', 'reclaim', '11', 
                         stdout=out, verbosity=2, dryRun=True)
        except SystemExit:
            pass
        out.seek(0)
        self.assertEquals(out.read(),
                          'Would have migrated %s / %s saving 6 bytes\n'
                          'Would have migrated %s / %s saving 6 bytes\n'
                          'Would have reclaimed 12 bytes\n' %
                          (datafile.url, datafile.id, 
                           datafile2.url, datafile2.id))
        out = StringIO()
        try:
            call_command('migratefiles', 'reclaim', '11', 
                         stdout=out, verbosity=2)
        except SystemExit:
            pass
        out.seek(0)
        self.assertEquals(out.read(),
                          'Migrating %s / %s saving 6 bytes\n'
                          'Migrating %s / %s saving 6 bytes\n'
                          'Reclaimed 12 bytes\n' %
                          (datafile.url, datafile.id, 
                           datafile2.url, datafile2.id))
Example #2
0
    def testMigrateExperiment(self):
        dataset = generate_dataset()
        experiment = generate_experiment([dataset], [self.dummy_user])
        datafile = generate_datafile(None, dataset, "Hi mum")
        datafile2 = generate_datafile(None, dataset, "Hi mum")
        datafile3 = generate_datafile(None, dataset, "Hi mum")

        out = StringIO()
        try:
            call_command('migratefiles', 'migrate', 'experiment', 
                         experiment.id, 
                         verbosity=2, stdout=out)
        except SystemExit:
            pass
        out.seek(0)
        self.assertEquals(out.read(), 
                          'Migrated datafile %s\n'
                          'Migrated datafile %s\n'
                          'Migrated datafile %s\n' % 
                          (datafile.id, datafile2.id, datafile3.id))

        out = StringIO()
        try:
            call_command('migratefiles', 'restore', 'experiment', 
                         experiment.id, 
                         verbosity=2, stdout=out)
        except SystemExit:
            pass
        out.seek(0)
        self.assertEquals(out.read(), 
                          'Restored datafile %s\n'
                          'Restored datafile %s\n'
                          'Restored datafile %s\n' % 
                          (datafile.id, datafile2.id, datafile3.id))
Example #3
0
    def testMirrorDatafile(self):
        dataset = generate_dataset()
        experiment = generate_experiment([dataset], [self.dummy_user])
        datafile = generate_datafile(None, dataset, "Hi grandpa")

        # Dry run ...
        out = StringIO()
        try:
            call_command('migratefiles', 'mirror', 'datafile', datafile.id, 
                         verbosity=1, stdout=out, dryRun=True)
        except SystemExit:
            pass
        out.seek(0)
        self.assertEquals(out.read(), 
                          'Would have mirrored datafile %s\n' % datafile.id)

        # Do it
        out = StringIO()
        try:
            call_command('migratefiles', 'mirror', 'datafile', datafile.id, 
                         verbosity=2, stdout=out)
        except SystemExit:
            pass
        out.seek(0)
        self.assertEquals(out.read(), 
                          'Mirrored datafile %s\n' % datafile.id)
Example #4
0
    def testErrors(self):
        dataset = generate_dataset()
        experiment = generate_experiment([dataset], [self.dummy_user])
        datafile = generate_datafile(None, dataset, "Hi mum")

        err = StringIO()
        try:
            call_command('migratefiles', 'migrate', 'datafile', 
                         999, stderr=err)
        except SystemExit:
            pass
        err.seek(0)
        self.assertEquals(err.read(), 
                          'Datafile 999 does not exist\n'
                          'Error: No Datafiles selected\n')

        err = StringIO()
        try:
            call_command('migratefiles', 'migrate', 'datafile', datafile.id, 
                         dest='nowhere', stderr=err)
        except SystemExit:
            pass
        err.seek(0)
        self.assertEquals(err.read(), 'Error: Destination nowhere not known\n')

        err = StringIO()
        try:
            call_command('migratefiles', 'restore', 'datafile', datafile.id, 
                         dest='test', stderr=err)
        except SystemExit:
            pass
        err.seek(0)
        self.assertEquals(err.read(), 'Error: The --dest option cannot '
                          'be used with the restore subcommand\n')
Example #5
0
 def testMigrationNoHashes(self):
     # Tweak the server to turn off the '?metadata' query
     self.server.server.allowQuery = False
     
     dest = Destination.get_destination('test')
     datafile = generate_datafile("1/2/3", self.dataset, "Hi mum")
     self.assertEquals(datafile.verify(allowEmptyChecksums=True), True)
     datafile.save()
     path = datafile.get_absolute_filepath()
     self.assertTrue(os.path.exists(path))
     migrate_datafile(datafile, dest)
     self.assertFalse(os.path.exists(path))
Example #6
0
    def testMigrateStoreWithSpaces(self):
        dest = Destination.get_destination('test')
        
        datafile = generate_datafile('1/1/Hi Mum', self.dataset, "Hi mum")
        datafile2 = generate_datafile('1/1/Hi Dad', self.dataset, "Hi dad")

        path = datafile.get_absolute_filepath()
        self.assertTrue(os.path.exists(path))
        path2 = datafile.get_absolute_filepath()
        self.assertTrue(os.path.exists(path2))

        # Migrate them
        migrate_datafile(datafile, dest)
        self.assertFalse(os.path.exists(path))
        migrate_datafile(datafile2, dest)
        self.assertFalse(os.path.exists(path2))

        # Bring them back
        restore_datafile(datafile)
        self.assertTrue(os.path.exists(path))
        restore_datafile(datafile2)
        self.assertTrue(os.path.exists(path2))
Example #7
0
    def testScore(self):
        dataset = generate_dataset()
        experiment = generate_experiment([dataset], [self.dummy_user])
        datafile = generate_datafile(None, dataset, "Hi mum")
        datafile2 = generate_datafile(None, dataset, "Hi mum")
        datafile3 = generate_datafile(None, dataset, "Hi mum")

        out = StringIO()
        try:
            call_command('migratefiles', 'score', stdout=out)
        except SystemExit:
            pass
        out.seek(0)
        self.assertEquals(out.read(),
                          'datafile %s / %s, size = 6, '
                          'score = 0.778151250384, total_size = 6\n'
                          'datafile %s / %s, size = 6, '
                          'score = 0.778151250384, total_size = 12\n'
                          'datafile %s / %s, size = 6, '
                          'score = 0.778151250384, total_size = 18\n' % 
                          (datafile.url, datafile.id, 
                           datafile2.url, datafile2.id, 
                           datafile3.url, datafile3.id))
Example #8
0
    def testMirror(self):
        dest = Destination.get_destination('test')
        datafile = generate_datafile(None, self.dataset, "Hi granny")
        path = datafile.get_absolute_filepath()
        self.assertTrue(os.path.exists(path))
        url = dest.provider.generate_url(datafile)

        try:
            dest.provider.get_length(url)
            assertFail()
        except HTTPError as e:
            if e.code != 404:
                raise e

        self.assertTrue(migrate_datafile(datafile, dest, noUpdate=True))
        datafile = Dataset_File.objects.get(id=datafile.id)
        self.assertTrue(datafile.is_local())
        self.assertEquals(dest.provider.get_length(url), 9)
Example #9
0
 def _setup(self):
     self.user1 = generate_user('joe', 2)
     self.user2 = generate_user('fred', 1)
     self.exp1 = generate_experiment(users=[self.user1, self.user2])
     self.exp2 = generate_experiment(users=[self.user1])
     self.exp3 = generate_experiment(users=[self.user1])
     self.exp4 = generate_experiment(users=[self.user1])
     self.ds1 = generate_dataset(experiments=[self.exp1])
     self.ds2 = generate_dataset(experiments=[self.exp1, self.exp2])
     self.ds3 = generate_dataset(experiments=[self.exp3])
     self.ds4 = generate_dataset(experiments=[self.exp4])
     self.df1 = generate_datafile('1/2/1', self.ds1, size=100)
     self.df2 = generate_datafile('1/2/2', self.ds1, size=100, 
                                  verified=False)
     self.df3 = generate_datafile('http://foo.com/1/2/3', self.ds1, 
                                  size=1000)
     self.df4 = generate_datafile('1/2/4', self.ds2, size=1000)
     self.df5 = generate_datafile('1/2/5', self.ds2, size=10000)
     self.df6 = generate_datafile('1/2/6', self.ds3, size=100000)
     self.df7 = generate_datafile('1/2/7', self.ds4, size=0)
     self.df8 = generate_datafile('1/2/8', self.ds4, size=-1)
Example #10
0
    def testMigrateRestore(self):
        dest = Destination.get_destination('test')
        
        datafile = generate_datafile(None, self.dataset, "Hi mum",
                                     verify=False)

        # Attempt to migrate without datafile hashes ... should
        # fail because we can't verify.
        with self.assertRaises(MigrationError):
            migrate_datafile(datafile, dest)

        # Verify sets hashes ...
        self.assertEquals(datafile.verify(allowEmptyChecksums=True), True)
        datafile.save()
        path = datafile.get_absolute_filepath()
        self.assertTrue(os.path.exists(path))
        self.assertTrue(migrate_datafile(datafile, dest))
        self.assertFalse(os.path.exists(path))

        # Bring it back
        url = datafile.url
        self.assertTrue(restore_datafile(datafile))
        self.assertTrue(os.path.exists(path))
        # Check it was deleted remotely
        try:
            dest.provider.get_length(url)
            assertFail()
        except HTTPError as e:
            if e.code != 404:
                raise e

        # Refresh the datafile object because it is now stale ...
        datafile = Dataset_File.objects.get(id=datafile.id)

        # Repeat the process with 'noRemove'
        self.assertTrue(migrate_datafile(datafile, dest, noRemove=True))
        self.assertTrue(os.path.exists(path))
        self.assertEquals(dest.provider.get_length(url), 6)
        self.assertTrue(restore_datafile(datafile, noRemove=True))
        self.assertTrue(os.path.exists(path))
        self.assertEquals(dest.provider.get_length(url), 6)
Example #11
0
    def do_provider(self, dest):
        provider = dest.provider
        base_url = dest.base_url
        datafile = generate_datafile("1/2/3", self.dataset, "Hi mum")
        self.assertEquals(datafile.verify(allowEmptyChecksums=True), True)
        url = provider.generate_url(datafile)
        self.assertEquals(url, base_url + '1/2/3')
        provider.put_file(datafile, url)

        self.assertEqual(provider.get_file(url), "Hi mum")
        with self.assertRaises(MigrationProviderError):
            provider.get_file('http://foo/data/1/2/4')
        with self.assertRaises(HTTPError):
            provider.get_file(base_url + '1/2/4')

        self.assertEqual(provider.get_length(url), 6)
        with self.assertRaises(MigrationProviderError):
            provider.get_length('http://foo/data/1/2/4')
        with self.assertRaises(HTTPError):
            provider.get_length(base_url + '1/2/4')

        try:
            self.assertEqual(provider.get_metadata(url),
                             {'sha512sum' : '2274cc8c16503e3d182ffaa835c543b' +
                              'ce278bc8fc971f3bf38b94b4d9db44cd89c8f36d4006e' +
                              '5abea29bc05f7f0ea662cb4b0e805e56bbce97f00f94e' +
                              'a6e6498', 
                              'md5sum' : '3b6b51114c3d0ad347e20b8e79765951',
                              'length' : 6})
            with self.assertRaises(MigrationProviderError):
                provider.get_metadata('http:/foo/data/1/2/4')
                with self.assertRaises(HTTPError):
                    provider.get_metadata(base_url + '1/2/4')
        except NotImplementedError:
            pass
            
        provider.remove_file(url)
        with self.assertRaises(MigrationProviderError):
            provider.get_length('http://foo/data/1/2/4')
        with self.assertRaises(HTTPError):
            provider.remove_file(url)
Example #12
0
    def testMigrateConfig(self):
        dataset = generate_dataset()
        experiment = generate_experiment([dataset], [self.dummy_user])
        datafile = generate_datafile(None, dataset, "Hi mum")

        try:
            saved = settings.DEFAULT_MIGRATION_DESTINATION
            settings.DEFAULT_MIGRATION_DESTINATION = ''
            err = StringIO()
            try:
                call_command('migratefiles', 'migrate', 'datafile', 
                             datafile.id, stderr=err)
            except SystemExit:
                pass
            err.seek(0)
            self.assertEquals(err.read(), 
                              'Error: No default destination configured\n')
        finally:
            settings.DEFAULT_MIGRATION_DESTINATION = saved

        try:
            saved = settings.MIGRATION_DESTINATIONS
            settings.MIGRATION_DESTINATIONS = []
            Destination.clear_destinations_cache()
            err = StringIO()
            try:
                call_command('migratefiles', 'migrate', 'datafile', 
                             datafile.id, stderr=err)
            except SystemExit:
                pass
            err.seek(0)
            self.assertEquals(err.read(), 
                              'Error: Migration error: No destinations ' 
                              'have been configured\n')
        finally:
            settings.MIGRATION_DESTINATIONS = saved
Example #13
0
    def testMigrateDatafile(self):
        dataset = generate_dataset()
        experiment = generate_experiment([dataset], [self.dummy_user])
        datafile = generate_datafile(None, dataset,
                                     "Hi mum", verify=False, verified=False)
        datafile2 = generate_datafile(None, dataset, "Hi mum")
        datafile3 = generate_datafile(None, dataset, "Hi mum")

        err = StringIO()
        try:
            call_command('migratefiles', 'migrate', 'datafile', 
                         datafile.id, stderr=err)
        except SystemExit:
            pass
        err.seek(0)
        self.assertEquals(err.read(), 
                          'Migration failed for datafile %s : ' \
                          'Only verified datafiles can be migrated ' \
                          'to this destination\n' % datafile.id)

        self.assertEquals(datafile.verify(allowEmptyChecksums=True), True)
        datafile.save()

        # (Paths should all be kosher now ...)
        path = datafile.get_absolute_filepath()
        path2 = datafile2.get_absolute_filepath()
        path3 = datafile3.get_absolute_filepath()
        for p in [path, path2, path3]:
            self.assertTrue(os.path.exists(p))
        
        # Dry run ...
        out = StringIO()
        try:
            call_command('migratefiles', 'migrate', 'datafile', datafile.id, 
                         verbosity=1, stdout=out, dryRun=True)
        except SystemExit:
            pass
        out.seek(0)
        self.assertEquals(out.read(), 
                          'Would have migrated datafile %s\n' % datafile.id)
        for p in [path, path2, path3]:
            self.assertTrue(os.path.exists(p))

        # Real run, verbose (migrates 1)
        out = StringIO()
        try:
            call_command('migratefiles', 'migrate', 'datafile', datafile.id, 
                         verbosity=2, stdout=out)
        except SystemExit:
            pass
        out.seek(0)
        self.assertEquals(out.read(), 
                          'Migrated datafile %s\n' % datafile.id)
        for p in [path, path2, path3]:
            self.assertTrue(os.path.exists(p) == (p != path))

        # Real run, normal (migrates 2 & 3)
        out = StringIO()
        try:
            call_command('migratefiles', 'migrate', 'datafile', datafile2.id, 
                         datafile3.id, verbosity=1, stdout=out)
        except SystemExit:
            pass
        out.seek(0)
        self.assertEquals(out.read(), '') 
        for p in [path, path2, path3]:
            self.assertFalse(os.path.exists(p))

        # Cannot migrate a file that is not local (now)
        err = StringIO()
        try:
            call_command('migratefiles', 'migrate', 'datafile', datafile.id, 
                         verbosity=2, stderr=err)
        except SystemExit:
            pass
        err.seek(0)
        self.assertEquals(err.read(), '') # Should "fail" silently

        # Real restore, verbose (restores 1, 2 & 3)
        out = StringIO()
        try:
            call_command('migratefiles', 'restore', 'datafile', datafile.id, 
                         datafile2.id, datafile3.id, verbosity=2, stdout=out)
        except SystemExit:
            pass
        out.seek(0)
        self.assertEquals(out.read(), 
                          'Restored datafile %s\n'
                          'Restored datafile %s\n'
                          'Restored datafile %s\n' % 
                          (datafile.id, datafile2.id, datafile3.id))
        for p in [path, path2, path3]:
            self.assertTrue(os.path.exists(p))

        # Cannot restore files that are (now) local
        out = StringIO()
        try:
            call_command('migratefiles', 'restore', 'datafile', datafile.id, 
                         verbosity=2, stdout=out)
        except SystemExit:
            pass
        out.seek(0)
        self.assertEquals(out.read(), '') # Fail quietly ... not remote

        # Now try migrating with 'no remove'
        out = StringIO()
        try:
            call_command('migratefiles', 'migrate', 'datafile', datafile.id, 
                         datafile2.id, datafile3.id, noRemove=True,
                         verbosity=2, stdout=out)
        except SystemExit:
            pass
        out.seek(0)
        self.assertEquals(out.read(), 
                          'Migrated datafile %s\n'
                          'Migrated datafile %s\n'
                          'Migrated datafile %s\n' % 
                          (datafile.id, datafile2.id, datafile3.id))
        for p in [path, path2, path3]:
            self.assertTrue(os.path.exists(p))

        # When we bring them back now, the local pathnames should change
        # because the staging code won't clobber an existing file.
        out = StringIO()
        try:
            call_command('migratefiles', 'restore', 'datafile', datafile.id, 
                         datafile2.id, datafile3.id, verbosity=2, stdout=out)
        except SystemExit:
            pass
        out.seek(0)
        self.assertEquals(out.read(), 
                          'Restored datafile %s\n'
                          'Restored datafile %s\n'
                          'Restored datafile %s\n' % 
                          (datafile.id, datafile2.id, datafile3.id))
        for p, d in [(path, datafile), (path2, datafile2), 
                     (path3, datafile3)]:
            dd = Dataset_File.objects.get(id=d.id)
            self.assertTrue(os.path.exists(p))
            self.assertTrue(os.path.exists(dd.get_absolute_filepath()))
            self.assertNotEqual(p, dd.get_absolute_filepath())
            self.assertNotEqual(d.get_absolute_filepath(),
                                dd.get_absolute_filepath())