Exemple #1
0
    def testMigrateStoreWithSpaces(self):
        dest = Location.get_location('test')
        local = Location.get_location('local')

        datafile, replica = generate_datafile('1/1/Hi Mum', self.dataset,
                                              "Hi mum")
        datafile2, replica2 = generate_datafile('1/1/Hi Dad', self.dataset,
                                                "Hi dad")

        path = datafile.get_absolute_filepath()
        self.assertTrue(os.path.exists(path))
        path2 = datafile.get_absolute_filepath()
        self.assertTrue(os.path.exists(path2))

        # Migrate them
        migrate_replica(replica, dest)
        self.assertFalse(os.path.exists(path))
        migrate_replica(replica2, dest)
        self.assertFalse(os.path.exists(path2))

        # Bring them back
        migrate_replica(datafile.get_preferred_replica(), local)
        self.assertTrue(os.path.exists(path))
        migrate_replica(datafile2.get_preferred_replica(), local)
        self.assertTrue(os.path.exists(path2))
Exemple #2
0
 def setUp(self):
     self.user = generate_user('fred')
     Location.force_initialize()
     self.experiment = generate_experiment(users=[self.user])
     self.dataset = generate_dataset(experiments=[self.experiment])
     self.server = SimpleHttpTestServer()
     self.server.start()
Exemple #3
0
 def testReplicaVerify(self):
     from django.conf import settings
     saved = settings.REQUIRE_DATAFILE_CHECKSUMS
     try:
         Location.get_location('test')
         datafile, replica = generate_datafile("1/2/3", self.dataset,
                                               "Hi mum")
         settings.REQUIRE_DATAFILE_CHECKSUMS = True
         self.assertTrue(replica.verify(), 'Replica.verify() failed.')
         replica.datafile.sha512sum = ''
         replica.datafile.md5sum = ''
         self.assertFalse(
             replica.verify(),
             'Replica.verify() succeeded despite no checksum '
             '(settings.REQUIRE_DATAFILE_CHECKSUMS=True).')
         self.assertFalse(
             replica.verify(allowEmptyChecksums=False),
             'Replica.verify() succeeded despite no checksum '
             '(allowEmptyChecksums=False)')
         settings.REQUIRE_DATAFILE_CHECKSUMS = False
         datafile.sha512sum = None
         datafile.md5sum = None
         self.assertTrue(
             replica.verify(allowEmptyChecksums=True),
             'Replica.verify() failed wrongly '
             '(allowEmptyChecksums=True)')
         datafile.sha512sum = None
         datafile.md5sum = None
         self.assertTrue(replica.verify(),
                         'Replica.verify() failed wrongly')
     finally:
         settings.REQUIRE_DATAFILE_CHECKSUMS = saved
Exemple #4
0
 def test_location(self):
     from tardis.tardis_portal.models import Location
     self.assertEquals(Location.get_default_location().name,
                       'local')
     self.assertEquals(Location.get_location('staging').name,
                       'staging')
     self.assertEquals(len(Location.objects.all()), 6)
Exemple #5
0
 def test_location(self):
     from tardis.tardis_portal.models import Location
     self.assertEquals(Location.get_default_location().name,
                       'local')
     self.assertEquals(Location.get_location('staging').name,
                       'staging')
     self.assertEquals(len(Location.objects.all()), 6)
Exemple #6
0
    def setUp(self):
        from tardis.tardis_portal import models
        from tempfile import mkdtemp, mktemp
        from django.conf import settings
        from os import path
        import os

        # Disconnect post_save signal
        from django.db.models.signals import post_save
        from tardis.tardis_portal.models import \
            staging_hook, Dataset_File, Replica, Location
        post_save.disconnect(staging_hook, sender=Replica)

        from django.contrib.auth.models import User
        user = '******'
        pwd = 'secret'
        email = ''
        self.user = User.objects.create_user(user, email, pwd)

        try:
            os.makedirs(settings.GET_FULL_STAGING_PATH_TEST)
        except OSError:
            pass
        self.temp = mkdtemp(dir=settings.GET_FULL_STAGING_PATH_TEST)

        self.file = mktemp(dir=self.temp)
        content = 'test file'
        with open(self.file, "w+b") as f:
            f.write(content)

        Location.force_initialize()

        # make datafile
        exp = models.Experiment(title='test exp1',
                                institution_name='monash',
                                created_by=self.user)
        exp.save()

        # make dataset
        dataset = models.Dataset(description="dataset description...")
        dataset.save()
        dataset.experiments.add(exp)
        dataset.save()

        # create datafile
        df = models.Dataset_File(dataset=dataset, size = len(content),
                                 filename = path.basename(self.file),
                                 md5sum='f20d9f2072bbeb6691c0f9c5099b01f3')
        df.save()

        # create replica
        base_url = 'file://' + settings.GET_FULL_STAGING_PATH_TEST
        location = Location.load_location({
            'name': 'staging-test-yyy', 'url': base_url, 'type': 'external', 
            'priority': 10, 'transfer_provider': 'local'}) 
        replica = models.Replica(datafile=df, url='file://'+self.file,
                                 protocol="staging",location=location)
        replica.verify()
        replica.save()
        self.replica = replica
Exemple #7
0
 def setUp(self):
     self.user = generate_user('fred')
     Location.force_initialize()
     self.experiment = generate_experiment(users=[self.user])
     self.dataset = generate_dataset(experiments=[self.experiment])
     self.server = SimpleHttpTestServer()
     self.server.start()
    def testMigrateStoreWithSpaces(self):
        dest = Location.get_location('test')
        local = Location.get_location('local')

        datafile, replica = generate_datafile('1/1/Hi Mum', self.dataset,
                                              "Hi mum")
        datafile2, replica2 = generate_datafile('1/1/Hi Dad', self.dataset,
                                                "Hi dad")

        path = datafile.get_absolute_filepath()
        self.assertTrue(os.path.exists(path))
        path2 = datafile.get_absolute_filepath()
        self.assertTrue(os.path.exists(path2))

        # Migrate them
        migrate_replica(replica, dest)
        self.assertFalse(os.path.exists(path))
        migrate_replica(replica2, dest)
        self.assertFalse(os.path.exists(path2))

        # Bring them back
        migrate_replica(datafile.get_preferred_replica(), local)
        self.assertTrue(os.path.exists(path))
        migrate_replica(datafile2.get_preferred_replica(), local)
        self.assertTrue(os.path.exists(path2))
 def testReplicaVerify(self):
     from django.conf import settings
     saved = settings.REQUIRE_DATAFILE_CHECKSUMS
     try:
         Location.get_location('test')
         datafile, replica = generate_datafile("1/2/3", self.dataset,
                                               "Hi mum")
         settings.REQUIRE_DATAFILE_CHECKSUMS = True
         self.assertTrue(replica.verify(), 'Replica.verify() failed.')
         replica.datafile.sha512sum = ''
         replica.datafile.md5sum = ''
         self.assertFalse(
             replica.verify(),
             'Replica.verify() succeeded despite no checksum '
             '(settings.REQUIRE_DATAFILE_CHECKSUMS=True).')
         self.assertFalse(replica.verify(allowEmptyChecksums=False),
                          'Replica.verify() succeeded despite no checksum '
                          '(allowEmptyChecksums=False)')
         settings.REQUIRE_DATAFILE_CHECKSUMS = False
         datafile.sha512sum = None
         datafile.md5sum = None
         self.assertTrue(replica.verify(allowEmptyChecksums=True),
                         'Replica.verify() failed wrongly '
                         '(allowEmptyChecksums=True)')
         datafile.sha512sum = None
         datafile.md5sum = None
         self.assertTrue(replica.verify(),
                         'Replica.verify() failed wrongly')
     finally:
         settings.REQUIRE_DATAFILE_CHECKSUMS = saved
    def setUp(self):
        # Create test owner without enough details
        username, email, password = ('testuser',
                                     '*****@*****.**',
                                     'password')
        user = User.objects.create_user(username, email, password)
        profile = UserProfile(user=user, isDjangoAccount=True)
        profile.save()

        Location.force_initialize()

        # Create test experiment and make user the owner of it
        experiment = Experiment(title='Text Experiment',
                                institution_name='Test Uni',
                                created_by=user)
        experiment.save()
        acl = ObjectACL(
            pluginId='django_user',
            entityId=str(user.id),
            content_object=experiment,
            canRead=True,
            canWrite=True,
            canDelete=True,
            isOwner=True,
            aclOwnershipType=ObjectACL.OWNER_OWNED)
        acl.save()

        dataset = Dataset(description='dataset description...')
        dataset.save()
        dataset.experiments.add(experiment)
        dataset.save()

        def create_datafile(filename):
            testfile = path.join(path.dirname(__file__), 'fixtures',
                                 filename)

            size, sha512sum = get_size_and_sha512sum(testfile)

            datafile = Dataset_File(dataset=dataset,
                                    filename=path.basename(testfile),
                                    size=size,
                                    sha512sum=sha512sum)
            datafile.save()
            base_url = 'file://' + path.abspath(path.dirname(testfile))
            location = Location.load_location({
                'name': 'test-grabber', 'url': base_url, 'type': 'external',
                'priority': 10, 'transfer_provider': 'local'})
            replica = Replica(datafile=datafile,
                              url='file://'+path.abspath(testfile),
                              protocol='file',
                              location=location)
            replica.verify()
            replica.save()
            return Dataset_File.objects.get(pk=datafile.pk)

        self.dataset = dataset
        self.datafiles = [create_datafile('data_grabber_test1.admin'),
                          create_datafile('testfile.txt')
                         ] 
Exemple #11
0
    def setUp(self):
        from os import path, mkdir
        from tempfile import mkdtemp

        user = '******'
        pwd = 'secret'
        email = ''
        self.user = User.objects.create_user(user, email, pwd)

        self.userProfile = UserProfile(user=self.user).save()

        self.test_dir = mkdtemp()

        Location.force_initialize()

        self.exp = Experiment(title='test exp1',
                              institution_name='monash',
                              created_by=self.user)
        self.exp.save()

        acl = ObjectACL(
            pluginId=django_user,
            entityId=str(self.user.id),
            content_object=self.exp,
            canRead=True,
            isOwner=True,
            aclOwnershipType=ObjectACL.OWNER_OWNED,
        )
        acl.save()

        self.dataset = \
            Dataset(description='dataset description...')
        self.dataset.save()
        self.dataset.experiments.add(self.exp)
        self.dataset.save()

        self.experiment_path = path.join(
            settings.FILE_STORE_PATH,
            str(self.dataset.get_first_experiment().id))

        self.dataset_path = path.join(self.experiment_path,
                                      str(self.dataset.id))

        if not path.exists(self.experiment_path):
            mkdir(self.experiment_path)
        if not path.exists(self.dataset_path):
            mkdir(self.dataset_path)

        # write test file

        self.filename = 'testfile.txt'

        self.f1 = open(path.join(self.test_dir, self.filename), 'w')
        self.f1.write('Test file 1')
        self.f1.close()

        self.f1_size = path.getsize(path.join(self.test_dir, self.filename))

        self.f1 = open(path.join(self.test_dir, self.filename), 'r')
Exemple #12
0
    def setUp(self):
        # Create test owner without enough details
        username, email, password = ('testuser', '*****@*****.**',
                                     'password')
        user = User.objects.create_user(username, email, password)
        profile = UserProfile(user=user, isDjangoAccount=True)
        profile.save()

        Location.force_initialize()

        # Create test experiment and make user the owner of it
        experiment = Experiment(title='Text Experiment',
                                institution_name='Test Uni',
                                created_by=user)
        experiment.save()
        acl = ObjectACL(
            pluginId='django_user',
            entityId=str(user.id),
            content_object=experiment,
            canRead=True,
            isOwner=True,
            aclOwnershipType=ObjectACL.OWNER_OWNED,
        )
        acl.save()

        dataset = Dataset(description='dataset description...')
        dataset.save()
        dataset.experiments.add(experiment)
        dataset.save()

        def create_datafile(index):
            testfile = path.join(path.dirname(__file__), 'fixtures',
                                 'jeol_sem_test%d.txt' % index)

            size, sha512sum = get_size_and_sha512sum(testfile)

            datafile = Dataset_File(dataset=dataset,
                                    filename=path.basename(testfile),
                                    size=size,
                                    sha512sum=sha512sum)
            datafile.save()
            base_url = 'file://' + path.abspath(path.dirname(testfile))
            location = Location.load_location({
                'name': 'test-jeol',
                'url': base_url,
                'type': 'external',
                'priority': 10,
                'transfer_provider': 'local'
            })
            replica = Replica(datafile=datafile,
                              url='file://' + path.abspath(testfile),
                              protocol='file',
                              location=location)
            replica.verify()
            replica.save()
            return Dataset_File.objects.get(pk=datafile.pk)

        self.dataset = dataset
        self.datafiles = [create_datafile(i) for i in (1, 2)]
Exemple #13
0
 def setUp(self):
     from django.contrib.auth.models import User
     from tardis.tardis_portal.models import Location
     user = '******'
     pwd = 'secret'
     email = ''
     self.user = User.objects.create_user(user, email, pwd)
     Location.force_initialize()
Exemple #14
0
 def setUp(self):
     self.user = generate_user('fred')
     Location.force_initialize()
     self.experiment = generate_experiment(
         users=[self.user],
         title='Meanwhile, down in the archives ...',
         url='http://example.com/something')
     self.dataset = generate_dataset(experiments=[self.experiment])
Exemple #15
0
 def setUp(self):
     from django.contrib.auth.models import User
     from tardis.tardis_portal.models import Location
     user = '******'
     pwd = 'secret'
     email = ''
     self.user = User.objects.create_user(user, email, pwd)
     Location.force_initialize()
Exemple #16
0
    def setUp(self):
        from os import path, mkdir
        from tempfile import mkdtemp

        user = '******'
        pwd = 'secret'
        email = ''
        self.user = User.objects.create_user(user, email, pwd)

        self.userProfile = UserProfile(user=self.user).save()

        self.test_dir = mkdtemp()

        Location.force_initialize()

        self.exp = Experiment(title='test exp1',
                institution_name='monash', created_by=self.user)
        self.exp.save()

        acl = ObjectACL(
            pluginId=django_user,
            entityId=str(self.user.id),
            content_object=self.exp,
            canRead=True,
            isOwner=True,
            aclOwnershipType=ObjectACL.OWNER_OWNED,
        )
        acl.save()

        self.dataset = \
            Dataset(description='dataset description...')
        self.dataset.save()
        self.dataset.experiments.add(self.exp)
        self.dataset.save()

        self.experiment_path = path.join(settings.FILE_STORE_PATH,
                str(self.dataset.get_first_experiment().id))

        self.dataset_path = path.join(self.experiment_path,
                                      str(self.dataset.id))

        if not path.exists(self.experiment_path):
            mkdir(self.experiment_path)
        if not path.exists(self.dataset_path):
            mkdir(self.dataset_path)

        # write test file

        self.filename = 'testfile.txt'

        self.f1 = open(path.join(self.test_dir, self.filename), 'w')
        self.f1.write('Test file 1')
        self.f1.close()

        self.f1_size = path.getsize(path.join(self.test_dir,
                                    self.filename))

        self.f1 = open(path.join(self.test_dir, self.filename), 'r')
Exemple #17
0
def _infer_location(path):
    if urlparse.urlparse(path).scheme == '':
        loc = Location.get_default_location()
    else:
        loc = Location.get_location_for_url(path)
    if loc:
        return loc
    else:
        raise Exception('Cannot infer a location for %s' % path)
Exemple #18
0
def _infer_location(path):
    if urlparse.urlparse(path).scheme == '':
        loc = Location.get_default_location()
    else:
        loc = Location.get_location_for_url(path)
    if loc:
        return loc
    else:
        raise Exception('Cannot infer a location for %s' % path)
Exemple #19
0
    def testRemoveExperimentData(self):
        # First with no sharing
        self._build()
        archive_location = Location.get_location('archtest')
        try:
            nos_experiments = Experiment.objects.count()
            nos_datasets = Dataset.objects.count()
            nos_datafiles = Dataset_File.objects.count()
            nos_replicas = Replica.objects.count()
            self.assertTrue(exists(self.replica.get_absolute_filepath()))
            remove_experiment_data(self.experiment, 
                                   'http://example.com/some.tar.gz',
                                   archive_location)
            self.assertEquals(nos_experiments, Experiment.objects.count())
            self.assertEquals(nos_datasets, Dataset.objects.count())
            self.assertEquals(nos_datafiles, Dataset_File.objects.count())
            self.assertEquals(nos_replicas, Replica.objects.count())
            new_replica = self.datafile.get_preferred_replica()
            self.assertTrue(self.replica.id != new_replica.id)
            self.assertFalse(new_replica.stay_remote)
            self.assertTrue(new_replica.verified)
            self.assertEqual(self.replica.protocol, new_replica.protocol)
            self.assertEqual(archive_location.id, new_replica.location.id)
            self.assertEqual('http://example.com/some.tar.gz#1/1/1',
                             new_replica.url)
            self.assertFalse(exists(self.replica.get_absolute_filepath()))
        finally:
            self._clear()

        # (Check that the deletes cascaded ... )
        self.assertEquals(0, Dataset_File.objects.count())
        self.assertEquals(0, Replica.objects.count())
        
        # Repeat, but with the first dataset in 2 experiments.
        self._build()
        self.dataset.experiments.add(self.experiment2)
        archive_location = Location.get_location('archtest')
        try:
            nos_experiments = Experiment.objects.count()
            nos_datasets = Dataset.objects.count()
            nos_datafiles = Dataset_File.objects.count()
            nos_replicas = Replica.objects.count()
            self.assertTrue(exists(self.replica.get_absolute_filepath()))
            remove_experiment_data(self.experiment, 
                                   'http://example.com/some.tar.gz',
                                   archive_location)
            self.assertEquals(nos_experiments, Experiment.objects.count())
            self.assertEquals(nos_datasets, Dataset.objects.count())
            self.assertEquals(nos_datafiles, Dataset_File.objects.count())
            self.assertEquals(nos_replicas, Replica.objects.count())
            new_replica = self.datafile.get_preferred_replica()
            self.assertTrue(self.replica.id == new_replica.id)
            self.assertTrue(exists(self.replica.get_absolute_filepath()))
            self.assertFalse(exists(self.replica2.get_absolute_filepath()))
        finally:
            self._clear()
    def setUpClass(cls):
        cls.priorcwd = os.getcwd()
        os.chdir(os.path.dirname(__file__)+'/atom_test')
        cls.server = TestWebServer()
        cls.server.start()

        Location.force_initialize()
        Location.load_location({
            'name': 'test-atom',
            'transfer_provider': 'http',
            'url': 'http://localhost:4272/files/',
            'type': 'external',
            'priority': 10})
        Location.load_location({
            'name': 'test-atom2', 
            'transfer_provider': 'http',
            'url': 'http://mydatagrabber.cmm.uq.edu.au/files',
            'type': 'external',
            'priority': 10})

        files = path.realpath(path.join(path.dirname(__file__), 
                                        'atom_test', 'files'))
        Location.load_location({
            'name': 'test-atom3',
            'transfer_provider': 'local',
            'url': 'file://' + files,
            'type': 'external',
            'priority': 10})
Exemple #21
0
    def setUpClass(cls):
        cls.priorcwd = os.getcwd()
        os.chdir(os.path.dirname(__file__) + '/atom_test')
        cls.server = TestWebServer()
        cls.server.start()

        Location.force_initialize()
        Location.load_location({
            'name': 'test-atom',
            'transfer_provider': 'http',
            'url': 'http://localhost:4272/files/',
            'type': 'external',
            'priority': 10
        })
        Location.load_location({
            'name': 'test-atom2',
            'transfer_provider': 'http',
            'url': 'http://mydatagrabber.cmm.uq.edu.au/files',
            'type': 'external',
            'priority': 10
        })

        files = path.realpath(
            path.join(path.dirname(__file__), 'atom_test', 'files'))
        Location.load_location({
            'name': 'test-atom3',
            'transfer_provider': 'local',
            'url': 'file://' + files,
            'type': 'external',
            'priority': 10
        })
Exemple #22
0
 def testScoring(self):
     self._setup()
     scorer = MigrationScorer(Location.get_location('local').id)
     self.assertEquals(2.0, scorer.datafile_score(self.df1))
     self.assertEquals(2, get_user_priority(self.user1))
     self.assertEquals(1, get_user_priority(self.user2))
     self.assertEquals(1.0, scorer.user_score(self.user1))
     self.assertEquals(2.0, scorer.user_score(self.user2))
     self.assertEquals(2.0, scorer.experiment_score(self.exp1))
     self.assertEquals(2.0, scorer.dataset_score(self.df1.dataset))
     self.assertEquals(4.0, scorer.score_datafile(self.df1))
     self.assertEquals([(self.df1, self.rep1, 4.0)], 
                       scorer.score_datafiles_in_dataset(self.ds1))
     self.assertEquals([(self.df5, self.rep5, 8.0), 
                        (self.df4, self.rep4, 6.0), 
                        (self.df1, self.rep1, 4.0)],
                       scorer.score_datafiles_in_experiment(self.exp1))
     self.assertEquals([(self.df5, self.rep5, 8.0), 
                        (self.df4, self.rep4, 6.0)],
                       scorer.score_datafiles_in_experiment(self.exp2))
     self.assertEquals([(self.df6, self.rep6, 5.0)],
                       scorer.score_datafiles_in_experiment(self.exp3))
     self.assertEquals([(self.df5, self.rep5, 8.0), 
                        (self.df4, self.rep4, 6.0), 
                        (self.df6, self.rep6, 5.0), 
                        (self.df1, self.rep1, 4.0), 
                        (self.df7, self.rep7, 0.0), 
                        (self.df8, self.rep8, 0.0)],
                       scorer.score_all_datafiles())
     self.assertEquals([(self.df7, self.rep7, 0.0), 
                        (self.df8, self.rep8, 0.0)], 
                       scorer.score_datafiles_in_dataset(self.ds4))
Exemple #23
0
    def testScoringWithTimes(self):
        self._setup()
        scorer = MigrationScorer(
            Location.get_location('local').id, {
                'user_priority_weighting': [5.0, 2.0, 1.0, 0.5, 0.2],
                'file_size_weighting': 1.0,
                'file_access_weighting': 1.0,
                'file_age_weighting': 1.0,
                'file_size_threshold': 0,
                'file_access_threshold': 0,
                'file_age_threshold': 1})
        
        self.assertEquals(0.0, scorer.datafile_score(self.df1))
     
        f = tempfile.NamedTemporaryFile(dir=settings.FILE_STORE_PATH)
        f.write("Hi Mom!!\n")
        rep = Replica.objects.get(pk=self.rep1.pk)
        rep.url = f.name
        rep.save()

        self.assertEquals(2.0, scorer.datafile_score(self.df1))
        
        older = time.time() - (60 * 60 * 24 + 300)
        os.utime(f.name, (older, older))

        self.assertEquals(3.0, scorer.datafile_score(self.df1))

        older = time.time() - (60 * 60 * 24 * 2 + 300)
        os.utime(f.name, (older, older))

        self.assertEquals(5.0, scorer.datafile_score(self.df1))

        f.close()
Exemple #24
0
def stage_replica(replica):
    from django.core.files.uploadedfile import TemporaryUploadedFile
    from tardis.tardis_portal.models import Replica, Location
    if not replica.location.type == 'external':
        raise ValueError('Only external replicas can be staged')
    if getattr(settings, "DEEP_DATASET_STORAGE", False):
        relurl = path.relpath(replica.url[7:], settings.SYNC_TEMP_PATH)
        spliturl = relurl.split(os.sep)[1:]
        subdir = path.dirname(path.join(*spliturl))
    else:
        subdir = None
    with TemporaryUploadedFile(replica.datafile.filename, None, None,
                               None) as tf:
        if replica.verify(tempfile=tf.file):
            if not replica.stay_remote:
                tf.file.flush()
                target_replica = {
                    'datafile':
                    replica.datafile,
                    'url':
                    write_uploaded_file_to_dataset(replica.datafile.dataset,
                                                   tf,
                                                   subdir=subdir),
                    'location':
                    Location.get_default_location(),
                    'verified':
                    True,
                    'protocol':
                    ''
                }
                Replica.objects.filter(id=replica.id).update(**target_replica)
            return True
        else:
            return False
Exemple #25
0
def remove_experiment_data(exp, archive_url, archive_location):
    """Remove the online Replicas for an Experiment that are not shared with
    other Experiments.  When Replicas are removed, they are replaced with
    offline replicas whose 'url' consists of the archive_url, with the 
    archive pathname for the datafile as a url fragment id.
    """
    for ds in Dataset.objects.filter(experiments=exp):
        if ds.experiments.count() == 1:
            for df in Dataset_File.objects.filter(dataset=ds):
                replicas = Replica.objects.filter(datafile=df, 
                                                  location__type='online')
                if replicas.count() > 0:
                    for replica in replicas:
                        location = Location.get_location(replica.location.name)
                        location.provider.remove_file(replica)
                        if archive_url:
                            old_replica = replicas[0]
                            path_in_archive = '%s/%s/%s' % (
                                exp.id, ds.id, df.filename)
                            new_replica_url = '%s#%s' % (
                                archive_url, quote(path_in_archive))
                            new_replica = Replica(datafile=old_replica.datafile,
                                                  url=new_replica_url,
                                                  protocol=old_replica.protocol,
                                                  verified=True,
                                                  stay_remote=False,
                                                  location=archive_location)
                            new_replica.save()
                    replicas.delete()
Exemple #26
0
 def testScoring(self):
     self._setup()
     scorer = MigrationScorer(Location.get_location('local').id)
     self.assertEquals(2.0, scorer.datafile_score(self.df1))
     self.assertEquals(2, get_user_priority(self.user1))
     self.assertEquals(1, get_user_priority(self.user2))
     self.assertEquals(1.0, scorer.user_score(self.user1))
     self.assertEquals(2.0, scorer.user_score(self.user2))
     self.assertEquals(2.0, scorer.experiment_score(self.exp1))
     self.assertEquals(2.0, scorer.dataset_score(self.df1.dataset))
     self.assertEquals(4.0, scorer.score_datafile(self.df1))
     self.assertEquals([(self.df1, self.rep1, 4.0)],
                       scorer.score_datafiles_in_dataset(self.ds1))
     self.assertEquals([(self.df5, self.rep5, 8.0),
                        (self.df4, self.rep4, 6.0),
                        (self.df1, self.rep1, 4.0)],
                       scorer.score_datafiles_in_experiment(self.exp1))
     self.assertEquals([(self.df5, self.rep5, 8.0),
                        (self.df4, self.rep4, 6.0)],
                       scorer.score_datafiles_in_experiment(self.exp2))
     self.assertEquals([(self.df6, self.rep6, 5.0)],
                       scorer.score_datafiles_in_experiment(self.exp3))
     self.assertEquals([(self.df5, self.rep5, 8.0),
                        (self.df4, self.rep4, 6.0),
                        (self.df6, self.rep6, 5.0),
                        (self.df1, self.rep1, 4.0),
                        (self.df7, self.rep7, 0.0),
                        (self.df8, self.rep8, 0.0)],
                       scorer.score_all_datafiles())
     self.assertEquals([(self.df7, self.rep7, 0.0),
                        (self.df8, self.rep8, 0.0)],
                       scorer.score_datafiles_in_dataset(self.ds4))
Exemple #27
0
    def testScoringWithTimes(self):
        self._setup()
        scorer = MigrationScorer(
            Location.get_location('local').id, {
                'user_priority_weighting': [5.0, 2.0, 1.0, 0.5, 0.2],
                'file_size_weighting': 1.0,
                'file_access_weighting': 1.0,
                'file_age_weighting': 1.0,
                'file_size_threshold': 0,
                'file_access_threshold': 0,
                'file_age_threshold': 1
            })

        self.assertEquals(0.0, scorer.datafile_score(self.df1))

        f = tempfile.NamedTemporaryFile(dir=settings.FILE_STORE_PATH)
        f.write("Hi Mom!!\n")
        rep = Replica.objects.get(pk=self.rep1.pk)
        rep.url = f.name
        rep.save()

        self.assertEquals(2.0, scorer.datafile_score(self.df1))

        older = time.time() - (60 * 60 * 24 + 300)
        os.utime(f.name, (older, older))

        self.assertEquals(3.0, scorer.datafile_score(self.df1))

        older = time.time() - (60 * 60 * 24 * 2 + 300)
        os.utime(f.name, (older, older))

        self.assertEquals(5.0, scorer.datafile_score(self.df1))

        f.close()
Exemple #28
0
    def testLocalFile(self):
        content = urandom(1024)
        cf = ContentFile(content, 'background_task_testfile')

        # Create new Datafile
        datafile = Dataset_File(dataset=self.dataset)
        datafile.filename = cf.name
        datafile.size = len(content)
        datafile.sha512sum = hashlib.sha512(content).hexdigest()
        datafile.save()
        replica = Replica(datafile=datafile,
                          url=write_uploaded_file_to_dataset(self.dataset, cf),
                          location=Location.get_default_location())
        replica.save()

        def get_replica(datafile):
            return Replica.objects.get(datafile=datafile)

        # undo auto-verify:
        replica.verified = False
        replica.save(update_fields=['verified'])

        # Check that it's not currently verified
        expect(get_replica(datafile).verified).to_be(False)

        # Check it verifies
        verify_files()
        expect(get_replica(datafile).verified).to_be(True)
Exemple #29
0
def stage_replica(replica):
    from django.core.files.uploadedfile import TemporaryUploadedFile
    from tardis.tardis_portal.models import Replica, Location
    if not replica.location.type == 'external':
        raise ValueError('Only external replicas can be staged')
    if getattr(settings, "DEEP_DATASET_STORAGE", False):
        relurl = path.relpath(replica.url[7:], settings.SYNC_TEMP_PATH)
        spliturl = relurl.split(os.sep)[1:]
        subdir = path.dirname(path.join(*spliturl))
    else:
        subdir = None
    with TemporaryUploadedFile(replica.datafile.filename,
                               None, None, None) as tf:
        if replica.verify(tempfile=tf.file):
            if not replica.stay_remote:
                tf.file.flush()
                target_replica = {
                    'datafile': replica.datafile,
                    'url': write_uploaded_file_to_dataset(
                        replica.datafile.dataset, tf,
                        subdir=subdir),
                    'location': Location.get_default_location(),
                    'verified': True,
                    'protocol': ''}
                Replica.objects.filter(id=replica.id).update(**target_replica)
            return True
        else:
            return False
Exemple #30
0
        def create_datafile(index):
            testfile = path.join(path.dirname(__file__), 'fixtures',
                                 'jeol_sem_test%d.txt' % index)

            size, sha512sum = get_size_and_sha512sum(testfile)

            datafile = Dataset_File(dataset=dataset,
                                    filename=path.basename(testfile),
                                    size=size,
                                    sha512sum=sha512sum)
            datafile.save()
            base_url = 'file://' + path.abspath(path.dirname(testfile))
            location = Location.load_location({
                'name': 'test-jeol',
                'url': base_url,
                'type': 'external',
                'priority': 10,
                'transfer_provider': 'local'
            })
            replica = Replica(datafile=datafile,
                              url='file://' + path.abspath(testfile),
                              protocol='file',
                              location=location)
            replica.verify()
            replica.save()
            return Dataset_File.objects.get(pk=datafile.pk)
    def process_enclosure(self, dataset, enclosure):
        filename = getattr(enclosure, 'title', basename(enclosure.href))
        datafile = Dataset_File(filename=filename, dataset=dataset)
        try:
            datafile.mimetype = enclosure.mime
        except AttributeError:
            pass
        try:
            datafile.size = enclosure.length
        except AttributeError:
            pass
        try:
            hash = enclosure.hash
            # Split on white space, then ':' to get tuples to feed into dict
            hashdict = dict([s.partition(':')[::2] for s in hash.split()])
            # Set SHA-512 sum
            datafile.sha512sum = hashdict['sha-512']
        except AttributeError:
            pass
        datafile.save()
        url = enclosure.href
        # This means we will allow the atom feed to feed us any enclosure
        # URL that matches a registered location.  Maybe we should restrict
        # this to a specific location.
        location = Location.get_location_for_url(url)
        if not location:
            logger.error('Rejected ingestion for unknown location %s' % url)
            return

        replica = Replica(datafile=datafile, url=url,
                          location=location)
        replica.protocol = enclosure.href.partition('://')[0]
        replica.save()
        self.make_local_copy(replica)
    def process_enclosure(self, dataset, enclosure):
        filename = getattr(enclosure, 'title', basename(enclosure.href))
        datafile = Dataset_File(filename=filename, dataset=dataset)
        try:
            datafile.mimetype = enclosure.mime
        except AttributeError:
            pass
        try:
            datafile.size = enclosure.length
        except AttributeError:
            pass
        try:
            hash = enclosure.hash
            # Split on white space, then ':' to get tuples to feed into dict
            hashdict = dict([s.partition(':')[::2] for s in hash.split()])
            # Set SHA-512 sum
            datafile.sha512sum = hashdict['sha-512']
        except AttributeError:
            pass
        datafile.save()
        url = enclosure.href
        # This means we will allow the atom feed to feed us any enclosure
        # URL that matches a registered location.  Maybe we should restrict
        # this to a specific location.
        location = Location.get_location_for_url(url)
        if not location:
            logger.error('Rejected ingestion for unknown location %s' % url)
            return

        replica = Replica(datafile=datafile, url=url, location=location)
        replica.protocol = enclosure.href.partition('://')[0]
        replica.save()
        self.make_local_copy(replica)
Exemple #33
0
    def testMigrateRestore(self):
        dest = Location.get_location('test')
        local = Location.get_location('local')
        datafile, replica = generate_datafile(None,
                                              self.dataset,
                                              "Hi mum",
                                              verify=False,
                                              verify_checksums_req=True)

        # Attempt to migrate without datafile hashes ... should
        # fail because we can't verify.
        with self.assertRaises(MigrationError):
            migrate_replica(replica, dest)

        # Verify sets hashes ...
        self.assertEquals(replica.verify(allowEmptyChecksums=True), True)
        replica = Replica.objects.get(pk=replica.pk)
        path = datafile.get_absolute_filepath()
        self.assertTrue(os.path.exists(path))
        self.assertTrue(migrate_replica(replica, dest))
        self.assertFalse(os.path.exists(path))

        # Bring it back
        new_replica = datafile.get_preferred_replica()
        url = new_replica.url
        self.assertTrue(migrate_replica(new_replica, local))
        self.assertTrue(os.path.exists(path))
        # Check it was deleted remotely
        with self.assertRaises(TransferError):
            dest.provider.get_length(new_replica)

        # Refresh the datafile object because it is now stale ...
        datafile = Dataset_File.objects.get(id=datafile.id)
        replica = datafile.get_preferred_replica()

        # Repeat the process with 'noRemove'
        self.assertTrue(migrate_replica(replica, dest, noRemove=True))
        new_replica = datafile.get_preferred_replica()
        self.assertTrue(os.path.exists(path))
        self.assertEquals(dest.provider.get_length(new_replica), 6)
        migrate_replica(new_replica, local, noRemove=True)
        newpath = datafile.get_absolute_filepath()
        replica = datafile.get_preferred_replica()
        self.assertTrue(os.path.exists(path))
        self.assertTrue(os.path.exists(newpath))
        self.assertNotEqual(path, newpath)
        self.assertEquals(dest.provider.get_length(new_replica), 6)
Exemple #34
0
    def testMigrateRestore(self):
        dest = Location.get_location('test')
        local = Location.get_location('local')
        datafile, replica = generate_datafile(None, self.dataset, "Hi mum",
                                              verify=False,
                                              verify_checksums_req=True)

        # Attempt to migrate without datafile hashes ... should
        # fail because we can't verify.
        with self.assertRaises(MigrationError):
            migrate_replica(replica, dest)

        # Verify sets hashes ...
        self.assertEquals(replica.verify(allowEmptyChecksums=True), True)
        replica = Replica.objects.get(pk=replica.pk)
        path = datafile.get_absolute_filepath()
        self.assertTrue(os.path.exists(path))
        self.assertTrue(migrate_replica(replica, dest))
        self.assertFalse(os.path.exists(path))

        # Bring it back
        new_replica = datafile.get_preferred_replica()
        url = new_replica.url
        self.assertTrue(migrate_replica(new_replica, local))
        self.assertTrue(os.path.exists(path))
        # Check it was deleted remotely
        with self.assertRaises(TransferError):
            dest.provider.get_length(new_replica)

        # Refresh the datafile object because it is now stale ...
        datafile = Dataset_File.objects.get(id=datafile.id)
        replica = datafile.get_preferred_replica()

        # Repeat the process with 'noRemove'
        self.assertTrue(migrate_replica(replica, dest, noRemove=True))
        new_replica = datafile.get_preferred_replica()
        self.assertTrue(os.path.exists(path))
        self.assertEquals(dest.provider.get_length(new_replica), 6)
        migrate_replica(new_replica, local, noRemove=True)
        newpath = datafile.get_absolute_filepath()
        replica = datafile.get_preferred_replica()
        self.assertTrue(os.path.exists(path))
        self.assertTrue(os.path.exists(newpath))
        self.assertNotEqual(path, newpath)
        self.assertEquals(dest.provider.get_length(new_replica), 6)
    def setUp(self):
        # Create test owner without enough details
        username, email, password = ('testuser',
                                     '*****@*****.**',
                                     'password')
        user = User.objects.create_user(username, email, password)
        profile = UserProfile(user=user, isDjangoAccount=True)
        profile.save()
        # Need UserAuthentication
        UserAuthentication(userProfile=profile,
                           username=username,
                           authenticationMethod='localdb').save()
        # Create staging dir
        from os import path, makedirs
        staging_dir = path.join(settings.STAGING_PATH, username)
        if not path.exists(staging_dir):
            makedirs(staging_dir)
        # Ensure that staging dir is set up properly
        expect(get_full_staging_path(username)).to_be_truthy()

        Location.force_initialize()

        # Create test experiment and make user the owner of it
        experiment = Experiment(title='Text Experiment',
                                institution_name='Test Uni',
                                created_by=user)
        experiment.save()
        acl = ExperimentACL(
            pluginId=django_user,
            entityId=str(user.id),
            experiment=experiment,\

            canRead=True,
            isOwner=True,
            aclOwnershipType=ExperimentACL.OWNER_OWNED,
            )
        acl.save()

        self.dataset = \
            Dataset(description='dataset description...')
        self.dataset.save()
        self.dataset.experiments.add(experiment)
        self.dataset.save()

        self.username, self.password = (username, password)
 def do_ext_provider(self, loc_name):
     # This test requires an external test server configured
     # as per the 'dest_name' destination.  We skip the test is the
     # server doesn't respond.
     loc = Location.get_location(loc_name)
     if loc.provider.alive():
         self.do_provider(loc)
     else:
         print "SKIPPING TEST - %s server on %s is not responding\n" % (loc_name, loc.url)
    def testArchiveExperiment(self):
        dataset = generate_dataset()
        experiment = generate_experiment([dataset], [self.dummy_user])
        datafile, _ = generate_datafile(None, dataset, "Hi grandpa")
        archtest = Location.get_location('archtest')

        # Dry run ...
        out = StringIO()
        try:
            call_command('archive', experiment.id, 
                         verbosity=1, stdout=out, dryRun=True)
        except SystemExit:
            pass
        out.seek(0)
        self.assertEquals(out.read(), 
                          'Would have archived experiment %s\n' % experiment.id)

        # Dry run ... all
        out = StringIO()
        try:
            call_command('archive', all=True,
                         verbosity=1, stdout=out, dryRun=True)
        except SystemExit:
            pass
        out.seek(0)
        self.assertEquals(out.read(), 
                          'Would have archived experiment %s\n' % experiment.id)

        # Do one ... to file
        out = StringIO()
        try:
            call_command('archive', experiment.id, directory='/tmp',
                         verbosity=1, stdout=out)
        except SystemExit:
            pass
        out.seek(0)
        self.assertEquals(
            out.read(), 
            'Archived experiment %s to /tmp/%s-archive.tar.gz\n' \
            'Archived 1 experiments with 0 errors\n' % \
                (experiment.id, experiment.id))

        # Do one ... to archtest
        out = StringIO()
        try:
            call_command('archive', experiment.id, location='archtest',
                         verbosity=1, stdout=out)
        except SystemExit:
            pass
        out.seek(0)
        self.assertEquals(
            out.read(), 
            'Archived experiment %s to %s%s-archive.tar.gz\n' \
            'Archived 1 experiments with 0 errors\n' % \
                (experiment.id, archtest.provider.base_url, 
                 experiment.id))
Exemple #38
0
 def do_ext_provider(self, loc_name):
     # This test requires an external test server configured
     # as per the 'dest_name' destination.  We skip the test is the
     # server doesn't respond.
     loc = Location.get_location(loc_name)
     if loc.provider.alive():
         self.do_provider(loc)
     else:
         print 'SKIPPING TEST - %s server on %s is not responding\n' % \
             (loc_name, loc.url)
Exemple #39
0
    def setUp(self):
        # Create test owner without enough details
        username, email, password = ('testuser', '*****@*****.**',
                                     'password')
        user = User.objects.create_user(username, email, password)
        profile = UserProfile(user=user, isDjangoAccount=True)
        profile.save()
        # Need UserAuthentication
        UserAuthentication(userProfile=profile,
                           username=username,
                           authenticationMethod='localdb').save()
        # Create staging dir
        from os import path, makedirs
        staging_dir = path.join(settings.STAGING_PATH, username)
        if not path.exists(staging_dir):
            makedirs(staging_dir)
        # Ensure that staging dir is set up properly
        expect(get_full_staging_path(username)).to_be_truthy()

        Location.force_initialize()

        # Create test experiment and make user the owner of it
        experiment = Experiment(title='Text Experiment',
                                institution_name='Test Uni',
                                created_by=user)
        experiment.save()
        acl = ObjectACL(
            pluginId=django_user,
            entityId=str(user.id),
            content_object=experiment,
            canRead=True,
            isOwner=True,
            aclOwnershipType=ObjectACL.OWNER_OWNED,
        )
        acl.save()

        self.dataset = \
            Dataset(description='dataset description...')
        self.dataset.save()
        self.dataset.experiments.add(experiment)
        self.dataset.save()

        self.username, self.password = (username, password)
Exemple #40
0
 def _get_destination(self, destName, default):
     if not destName:
         if not default:
             raise CommandError("No default destination configured")
         else:
             destName = default
     dest = Location.get_location(destName)
     if not dest:
         raise CommandError("Destination %s not known" % destName)
     return dest
Exemple #41
0
 def _build(dataset, filename, url, protocol):
     from tardis.tardis_portal.models import \
         Dataset_File, Replica, Location
     datafile = Dataset_File(dataset=dataset, filename=filename)
     datafile.save()
     replica = Replica(datafile=datafile, url=url,
                       protocol=protocol,
                       location=Location.get_default_location())
     replica.save()
     return datafile
Exemple #42
0
 def _build(dataset, filename, url, protocol):
     from tardis.tardis_portal.models import \
         Dataset_File, Replica, Location
     datafile = Dataset_File(dataset=dataset, filename=filename)
     datafile.save()
     replica = Replica(datafile=datafile, url=url, 
                       protocol=protocol,
                       location=Location.get_default_location())
     replica.save()
     return datafile
Exemple #43
0
def _create_datafile():
    user = User.objects.create_user('testuser', '*****@*****.**', 'pwd')
    user.save()
    UserProfile(user=user).save()

    Location.force_initialize()

    full_access = Experiment.PUBLIC_ACCESS_FULL
    experiment = Experiment.objects.create(title="IIIF Test",
                                           created_by=user,
                                           public_access=full_access)
    experiment.save()
    ObjectACL(content_object=experiment,
              pluginId='django_user',
              entityId=str(user.id),
              isOwner=True,
              canRead=True,
              canWrite=True,
              canDelete=True,
              aclOwnershipType=ObjectACL.OWNER_OWNED).save()
    dataset = Dataset()
    dataset.save()
    dataset.experiments.add(experiment)
    dataset.save()

    # Create new Datafile
    tempfile = TemporaryUploadedFile('iiif_stored_file', None, None, None)
    with Image(filename='magick:rose') as img:
        img.format = 'tiff'
        img.save(file=tempfile.file)
        tempfile.file.flush()
    datafile = Dataset_File(dataset=dataset,
                            size=os.path.getsize(tempfile.file.name),
                            filename='iiif_named_file')
    replica = Replica(datafile=datafile,
                      url=write_uploaded_file_to_dataset(dataset, tempfile),
                      location=Location.get_default_location())
    replica.verify(allowEmptyChecksums=True)
    datafile.save()
    replica.datafile = datafile
    replica.save()
    return datafile
Exemple #44
0
def _create_datafile():
    user = User.objects.create_user("testuser", "*****@*****.**", "pwd")
    user.save()
    UserProfile(user=user).save()

    Location.force_initialize()

    full_access = Experiment.PUBLIC_ACCESS_FULL
    experiment = Experiment.objects.create(title="IIIF Test", created_by=user, public_access=full_access)
    experiment.save()
    ObjectACL(
        content_object=experiment,
        pluginId="django_user",
        entityId=str(user.id),
        isOwner=True,
        canRead=True,
        canWrite=True,
        canDelete=True,
        aclOwnershipType=ObjectACL.OWNER_OWNED,
    ).save()
    dataset = Dataset()
    dataset.save()
    dataset.experiments.add(experiment)
    dataset.save()

    # Create new Datafile
    tempfile = TemporaryUploadedFile("iiif_stored_file", None, None, None)
    with Image(filename="magick:rose") as img:
        img.format = "tiff"
        img.save(file=tempfile.file)
        tempfile.file.flush()
    datafile = Dataset_File(dataset=dataset, size=os.path.getsize(tempfile.file.name), filename="iiif_named_file")
    replica = Replica(
        datafile=datafile,
        url=write_uploaded_file_to_dataset(dataset, tempfile),
        location=Location.get_default_location(),
    )
    replica.verify(allowEmptyChecksums=True)
    datafile.save()
    replica.datafile = datafile
    replica.save()
    return datafile
Exemple #45
0
    def testMigrationNoHashes(self):
        # Tweak the server to turn off the '?metadata' query
        self.server.server.allowQuery = False

        dest = Location.get_location('test')
        datafile, replica = generate_datafile("1/2/3", self.dataset, "Hi mum")
        self.assertEquals(replica.verify(allowEmptyChecksums=True), True)
        path = datafile.get_absolute_filepath()
        self.assertTrue(os.path.exists(path))
        migrate_replica(replica, dest)
        self.assertFalse(os.path.exists(path))
Exemple #46
0
    def testMigrationNoHashes(self):
        # Tweak the server to turn off the '?metadata' query
        self.server.server.allowQuery = False

        dest = Location.get_location('test')
        datafile, replica = generate_datafile("1/2/3", self.dataset, "Hi mum")
        self.assertEquals(replica.verify(allowEmptyChecksums=True), True)
        path = datafile.get_absolute_filepath()
        self.assertTrue(os.path.exists(path))
        migrate_replica(replica, dest)
        self.assertFalse(os.path.exists(path))
Exemple #47
0
 def _build_datafile(self,
                     testfile,
                     filename,
                     dataset,
                     url,
                     protocol='',
                     checksum=None,
                     size=None,
                     mimetype=''):
     filesize, sha512sum = get_size_and_sha512sum(testfile)
     datafile = Dataset_File(
         dataset=dataset,
         filename=filename,
         mimetype=mimetype,
         size=str(size if size != None else filesize),
         sha512sum=(checksum if checksum else sha512sum))
     datafile.save()
     if urlparse.urlparse(url).scheme == '':
         location = Location.get_location('local')
     else:
         location = Location.get_location_for_url(url)
         if not location:
             location = Location.load_location({
                 'name':
                 filename,
                 'url':
                 urlparse.urljoin(url, '.'),
                 'type':
                 'external',
                 'priority':
                 10,
                 'transfer_provider':
                 'local'
             })
     replica = Replica(datafile=datafile,
                       protocol=protocol,
                       url=url,
                       location=location)
     replica.verify()
     replica.save()
     return Dataset_File.objects.get(pk=datafile.pk)
    def testProviderInstantiation(self):
        """
        Test that transfer_provider instantiation works
        """

        provider = Location.get_location("test").provider
        self.assertIsInstance(provider, TransferProvider)
        self.assertIsInstance(provider, SimpleHttpTransfer)
        self.assertEqual(provider.base_url, "http://127.0.0.1:4272/data/")

        provider = Location.get_location("test2").provider
        self.assertIsInstance(provider, TransferProvider)
        self.assertIsInstance(provider, WebDAVTransfer)
        self.assertFalse(401 in provider.opener.handle_error["http"])
        self.assertEqual(provider.base_url, "http://127.0.0.1/data2/")

        provider = Location.get_location("test3").provider
        self.assertIsInstance(provider, TransferProvider)
        self.assertIsInstance(provider, WebDAVTransfer)
        self.assertTrue(401 in provider.opener.handle_error["http"])
        self.assertEqual(provider.base_url, "http://127.0.0.1/data3/")
def _create_test_dataset(nosDatafiles):
    ds_ = Dataset(description='happy snaps of plumage')
    ds_.save()
    for i in range (0, nosDatafiles) :
        df_ = Dataset_File(dataset=ds_, size='21', sha512sum='bogus')
        df_.save()
        rep_ = Replica(datafile=df_,
                       url='http://planet-python.org/' + str(_next_id()),
                       location=Location.get_default_location())
        rep_.save()
    ds_.save()
    return ds_
Exemple #50
0
    def testProviderInstantiation(self):
        '''
        Test that transfer_provider instantiation works
        '''

        provider = Location.get_location('test').provider
        self.assertIsInstance(provider, TransferProvider)
        self.assertIsInstance(provider, SimpleHttpTransfer)
        self.assertEqual(provider.base_url, 'http://127.0.0.1:4272/data/')

        provider = Location.get_location('test2').provider
        self.assertIsInstance(provider, TransferProvider)
        self.assertIsInstance(provider, WebDAVTransfer)
        self.assertFalse(401 in provider.opener.handle_error['http'])
        self.assertEqual(provider.base_url, 'http://127.0.0.1/data2/')

        provider = Location.get_location('test3').provider
        self.assertIsInstance(provider, TransferProvider)
        self.assertIsInstance(provider, WebDAVTransfer)
        self.assertTrue(401 in provider.opener.handle_error['http'])
        self.assertEqual(provider.base_url, 'http://127.0.0.1/data3/')
Exemple #51
0
def _create_test_dataset(nosDatafiles):
    ds_ = Dataset(description='happy snaps of plumage')
    ds_.save()
    for i in range(0, nosDatafiles):
        df_ = Dataset_File(dataset=ds_, size='21', sha512sum='bogus')
        df_.save()
        rep_ = Replica(datafile=df_,
                       url='http://planet-python.org/' + str(_next_id()),
                       location=Location.get_default_location())
        rep_.save()
    ds_.save()
    return ds_
Exemple #52
0
    def setUp(self):
        """
        setting up essential objects, copied from tests above
        """
        Location.force_initialize()
        self.location = Location.get_location('local')

        user = '******'
        pwd = 'secret'
        email = ''
        self.user = User.objects.create_user(user, email, pwd)
        self.userProfile = UserProfile(user=self.user).save()
        self.exp = Experiment(title='test exp1',
                              institution_name='monash',
                              created_by=self.user)
        self.exp.save()
        self.acl = ObjectACL(
            pluginId=django_user,
            entityId=str(self.user.id),
            content_object=self.exp,
            canRead=True,
            isOwner=True,
            aclOwnershipType=ObjectACL.OWNER_OWNED,
        )
        self.acl.save()
        self.dataset = Dataset(description='dataset description...')
        self.dataset.save()
        self.dataset.experiments.add(self.exp)
        self.dataset.save()

        self.dataset_file = Dataset_File(dataset=self.dataset,
                                         size=42,
                                         filename="foo",
                                         md5sum="junk")
        self.dataset_file.save()
        self.replica = Replica(datafile=self.dataset_file,
                               url="http://foo",
                               location=self.location,
                               verified=False)
        self.replica.save()
Exemple #53
0
 def _get_destination(self, destName, default):
     if not destName:
         if not default:
             raise CommandError("No default destination configured")
         else:
             destName = default
     try:
         dest = Location.get_location(destName)
         if not dest:
             raise CommandError("Destination %s not known" % destName)
         return dest
     except MigrationError as e:
         raise CommandError("Migration error: %s" % e.args[0])
Exemple #54
0
 def _get_destination(self, destName, default):
     if not destName:
         if not default:
             raise CommandError("No default destination configured")
         else:
             destName = default
     try:
         dest = Location.get_location(destName)
         if not dest:
             raise CommandError("Destination %s not known" % destName)
         return dest
     except MigrationError as e:
         raise CommandError("Migration error: %s" % e.args[0])
Exemple #55
0
 def _build_datafile(self, testfile, filename, dataset, url, 
                     protocol='', checksum=None, size=None, mimetype=''):
     filesize, sha512sum = get_size_and_sha512sum(testfile)
     datafile = Dataset_File(dataset=dataset, filename=filename,
                             mimetype=mimetype,
                             size=str(size if size != None else filesize), 
                             sha512sum=(checksum if checksum else sha512sum))
     datafile.save()
     if urlparse.urlparse(url).scheme == '':
         location = Location.get_location('local')
     else:
         location = Location.get_location_for_url(url)
         if not location:
             location = Location.load_location({
                 'name': filename, 'url': urlparse.urljoin(url, '.'), 
                 'type': 'external', 
                 'priority': 10, 'transfer_provider': 'local'})
     replica = Replica(datafile=datafile, protocol=protocol, url=url,
                       location=location)
     replica.verify()
     replica.save()
     return Dataset_File.objects.get(pk=datafile.pk)
Exemple #56
0
 def _setup(self):
     Location.force_initialize()
     self.user1 = generate_user('joe', 2)
     self.user2 = generate_user('fred', 1)
     self.exp1 = generate_experiment(users=[self.user1, self.user2])
     self.exp2 = generate_experiment(users=[self.user1])
     self.exp3 = generate_experiment(users=[self.user1])
     self.exp4 = generate_experiment(users=[self.user1])
     self.ds1 = generate_dataset(experiments=[self.exp1])
     self.ds2 = generate_dataset(experiments=[self.exp1, self.exp2])
     self.ds3 = generate_dataset(experiments=[self.exp3])
     self.ds4 = generate_dataset(experiments=[self.exp4])
     self.df1, self.rep1 = generate_datafile('1/2/1', self.ds1, size=100)
     self.df2, self.rep2 = generate_datafile('1/2/2',
                                             self.ds1,
                                             size=100,
                                             verified=False)
     self.df3, self.rep3 = generate_datafile(
         'http://127.0.0.1:4272/data/1/2/3', self.ds1, size=1000)
     self.df4, self.rep4 = generate_datafile('1/2/4', self.ds2, size=1000)
     self.df5, self.rep5 = generate_datafile('1/2/5', self.ds2, size=10000)
     self.df6, self.rep6 = generate_datafile('1/2/6', self.ds3, size=100000)
     self.df7, self.rep7 = generate_datafile('1/2/7', self.ds4, size=0)
     self.df8, self.rep8 = generate_datafile('1/2/8', self.ds4, size=-1)
Exemple #57
0
def create_staging_datafile(filepath, username, dataset_id):
    from tardis.tardis_portal.models import Dataset_File, Dataset, Replica, \
        Location
    dataset = Dataset.objects.get(id=dataset_id)

    url, size = get_staging_url_and_size(username, filepath)
    datafile = Dataset_File(dataset=dataset,
                            filename=path.basename(filepath),
                            size=size)
    replica = Replica(datafile=datafile,
                      protocol='staging',
                      url=url,
                      location=Location.get_location('staging'))
    replica.verify(allowEmptyChecksums=True)
    datafile.save()
    replica.datafile = datafile
    replica.save()
Exemple #58
0
    def testMirror(self):
        dest = Location.get_location('test')
        datafile, replica = generate_datafile(None, self.dataset, "Hi granny")
        path = datafile.get_absolute_filepath()
        self.assertTrue(os.path.exists(path))
        dummy_replica = Replica()
        dummy_replica.datafile = datafile
        dummy_replica.location = Location.objects.get(name='test')
        dummy_replica.url = dummy_replica.generate_default_url()

        with self.assertRaises(TransferError):
            dest.provider.get_length(dummy_replica)

        self.assertTrue(migrate_replica(replica, dest, mirror=True))
        datafile = Dataset_File.objects.get(id=datafile.id)
        self.assertTrue(datafile.is_local())
        self.assertEquals(dest.provider.get_length(dummy_replica), 9)
Exemple #59
0
def fpupload(request, dataset_id):
    """
    Uploads all files picked by filepicker to the dataset

    :param request: a HTTP Request instance
    :type request: :class:`django.http.HttpRequest`
    :param dataset_id: the dataset_id
    :type dataset_id: integer
    :returns: boolean true if successful
    :rtype: bool
    """

    dataset = Dataset.objects.get(id=dataset_id)
    logger.debug('called fpupload')

    if request.method == 'POST':
        logger.debug('got POST')
        for key, val in request.POST.items():
            splits = val.split(",")
            for url in splits:
                try:
                    fp = FilepickerFile(url)
                except ValueError:
                    pass
                else:
                    picked_file = fp.get_file()
                    filepath = write_uploaded_file_to_dataset(
                        dataset, picked_file)
                    datafile = Dataset_File(dataset=dataset,
                                            filename=picked_file.name,
                                            size=picked_file.size)
                    replica = Replica(datafile=datafile,
                                      url=filepath,
                                      protocol='',
                                      location=Location.get_default_location())
                    replica.verify(allowEmptyChecksums=True)
                    datafile.save()
                    replica.datafile = datafile
                    replica.save()

    return HttpResponse(json.dumps({"result": True}))
Exemple #60
0
def migrate_replica(replica, location, noRemove=False, mirror=False):
    """
    Migrate the replica to a different storage location.  The overall
    effect will be that the datafile will be stored at the new location and
    removed from the current location, and the datafile metadata will be
    updated to reflect this.
    """

    from tardis.tardis_portal.models import Replica, Location

    with transaction.commit_on_success():
        replica = Replica.objects.select_for_update().get(pk=replica.pk)
        source = Location.get_location(replica.location.name)

        if not replica.verified or location.provider.trust_length:
            raise MigrationError('Only verified datafiles can be migrated' \
                                     ' to this destination')

        filename = replica.get_absolute_filepath()
        try:
            newreplica = Replica.objects.get(datafile=replica.datafile,
                                             location=location)
            created_replica = False
            # We've most likely mirrored this file previously.  But if
            # we are about to delete the source Replica, we need to check
            # that the target Replica still verifies.
            if not mirror and not check_file_transferred(newreplica, location):
                raise MigrationError('Previously mirrored / migrated Replica' \
                                         ' no longer verifies locally!')
        except Replica.DoesNotExist:
            newreplica = Replica()
            newreplica.location = location
            newreplica.datafile = replica.datafile
            newreplica.protocol = ''
            newreplica.stay_remote = location != Location.get_default_location(
            )
            newreplica.verified = False
            url = location.provider.generate_url(newreplica)

            if newreplica.url == url:
                # We should get here ...
                raise MigrationError('Cannot migrate a replica to its' \
                                         ' current location')
            newreplica.url = url
            location.provider.put_file(replica, newreplica)
            verified = False
            try:
                verified = check_file_transferred(newreplica, location)
            except:
                # FIXME - should we always do this?
                location.provider.remove_file(newreplica)
                raise

            newreplica.verified = verified
            newreplica.save()
            logger.info('Transferred file %s for replica %s' %
                        (filename, replica.id))
            created_replica = True

        if mirror:
            return created_replica

        # FIXME - do this more reliably ...
        replica.delete()
        if not noRemove:
            source.provider.remove_file(replica)
            logger.info('Removed local file %s for replica %s' %
                        (filename, replica.id))
        return True