Example #1
0
        def create_datafile(index):
            testfile = path.join(path.dirname(__file__), 'fixtures',
                                 'jeol_sem_test%d.txt' % index)

            size, sha512sum = get_size_and_sha512sum(testfile)

            datafile = Dataset_File(dataset=dataset,
                                    filename=path.basename(testfile),
                                    size=size,
                                    sha512sum=sha512sum)
            datafile.save()
            base_url = 'file://' + path.abspath(path.dirname(testfile))
            location = Location.load_location({
                'name': 'test-jeol',
                'url': base_url,
                'type': 'external',
                'priority': 10,
                'transfer_provider': 'local'
            })
            replica = Replica(datafile=datafile,
                              url='file://' + path.abspath(testfile),
                              protocol='file',
                              location=location)
            replica.verify()
            replica.save()
            return Dataset_File.objects.get(pk=datafile.pk)
Example #2
0
 def clone(cls, oldInstance, newDescription, username):
     newInstance = cls(description=newDescription,
                       experiment_id=oldInstance.dataset.experiment.id)
     for param in oldInstance.parameters:
         if param.name.name not in cls.doNotCopyParams:
             if param.name.isNumeric():
                 value = param.numerical_value
             else:
                 value = param.string_value
             newInstance.new_param(param.name.name, value)
     import shutil
     import os
     for filename in oldInstance.get_params("uploaded_file", value=True):
         if filename[-8:] != ".jobfile":
             thisfile = Dataset_File.objects.get(
                 dataset=oldInstance.dataset,
                 filename=filename)
             shutil.copy(thisfile.get_absolute_filepath(),
                         get_full_staging_path(username))
             newfileurl = os.path.join(get_full_staging_path(username),
                                       filename)
             newDatafile = Dataset_File(
                 dataset=newInstance.dataset,
                 url=newfileurl,
                 protocol="staging",
                 mimetype=thisfile.mimetype,
                 )
             newDatafile.save()
     return newInstance
Example #3
0
def add_datafile_to_dataset(dataset, filepath, size):
    """
    Adds datafile metadata to a dataset

    :param dataset: dataset who's directory to be written to
    :type dataset: :class:`tardis.tardis_portal.models.Dataset`
    :param filepath: The full os path to the file
    :type filepath: string
    :param size: The file size in bytes
    :type size: string
    :rtype: The new datafile object
    """
    from tardis.tardis_portal.models import Dataset_File

    experiment_path = path.join(settings.FILE_STORE_PATH,
                                str(dataset.experiment.id))

    dataset_path = path.join(experiment_path, str(dataset.id))
    urlpath = 'tardis:/' + filepath[len(dataset_path):]
    filename = urlpath.rpartition('/')[2]

    datafile = Dataset_File(dataset=dataset,
                            filename=filename,
                            url=urlpath,
                            size=size,
                            protocol='tardis')
    datafile.save()

    return datafile
Example #4
0
    def setUp(self):
        # create a test user
        self.user = User.objects.create_user(username='******',
                                             email='',
                                             password='******')

        # create a public experiment
        self.experiment1 = Experiment(title='Experiment 1',
                                      created_by=self.user,
                                      public=True)
        self.experiment1.save()

        # create a non-public experiment
        self.experiment2 = Experiment(title='Experiment 2',
                                      created_by=self.user,
                                      public=False)
        self.experiment2.save()

        # dataset1 belongs to experiment1
        self.dataset1 = Dataset(experiment=self.experiment1)
        self.dataset1.save()

        # dataset2 belongs to experiment2
        self.dataset2 = Dataset(experiment=self.experiment2)
        self.dataset2.save()

        # absolute path first
        filename = 'testfile.txt'
        self.dest1 = abspath(
            join(settings.FILE_STORE_PATH,
                 '%s/%s/' % (self.experiment1.id, self.dataset1.id)))
        self.dest2 = abspath(
            join(settings.FILE_STORE_PATH,
                 '%s/%s/' % (self.experiment2.id, self.dataset2.id)))
        if not exists(self.dest1):
            makedirs(self.dest1)
        if not exists(self.dest2):
            makedirs(self.dest2)

        testfile1 = abspath(join(self.dest1, filename))
        f = open(testfile1, 'w')
        f.write("Hello World!\n")
        f.close()

        testfile2 = abspath(join(self.dest2, filename))
        f = open(testfile2, 'w')
        f.write("Hello World!\n")
        f.close()

        self.dataset_file1 = Dataset_File(dataset=self.dataset1,
                                          filename=filename,
                                          protocol='tardis',
                                          url='tardis://%s' % filename)
        self.dataset_file1.save()

        self.dataset_file2 = Dataset_File(dataset=self.dataset2,
                                          filename=basename(filename),
                                          protocol='tardis',
                                          url='tardis://%s' % filename)
        self.dataset_file2.save()
Example #5
0
def add_datafile_to_dataset(dataset, filepath, size):
    """
    Adds datafile metadata to a dataset

    :param dataset: dataset who's directory to be written to
    :type dataset: :class:`tardis.tardis_portal.models.Dataset`
    :param filepath: The full os path to the file
    :type filepath: string
    :param size: The file size in bytes
    :type size: string
    :rtype: The new datafile object
    """

    experiment_path = path.join(settings.FILE_STORE_PATH,
                                str(dataset.experiment.id))

    dataset_path = path.join(experiment_path, str(dataset.id))
    urlpath = 'file:/' + filepath[len(experiment_path):]
    filename = urlpath.rpartition('/')[2]

    datafile = Dataset_File(dataset=dataset, filename=filename,
                            url=urlpath, size=size, protocol='')
    datafile.save()

    return datafile
Example #6
0
 def _make_dataset(self, exp, filenames):
     dataset = Dataset(experiment=exp)
     dataset.save()
     for filename in filenames:
         df = Dataset_File(dataset=dataset, size=41, protocol='file')
         df.filename = filename
         df.url = 'file://' + path.join(path.dirname(__file__), 'data', df.filename)
         df.save()
Example #7
0
def _create_test_dataset(nosDatafiles):
    ds_ = Dataset(description='happy snaps of plumage')
    ds_.save()
    for i in range (0, nosDatafiles) :
        df_ = Dataset_File(dataset=ds_, url='http://planet-python.org/' + str(_next_id()))
        df_.save()
    ds_.save()
    return ds_
    def setUp(self):
        # create a test user
        self.user = User.objects.create_user(username='******',
                                             email='',
                                             password='******')

        # create a public experiment
        self.experiment1 = Experiment(title='Experiment 1',
                                      created_by=self.user,
                                      public=True)
        self.experiment1.save()

        # create a non-public experiment
        self.experiment2 = Experiment(title='Experiment 2',
                                      created_by=self.user,
                                      public=False)
        self.experiment2.save()

        # dataset1 belongs to experiment1
        self.dataset1 = Dataset(experiment=self.experiment1)
        self.dataset1.save()

        # dataset2 belongs to experiment2
        self.dataset2 = Dataset(experiment=self.experiment2)
        self.dataset2.save()

        # absolute path first
        filename = 'testfile.txt'
        self.dest1 = abspath(join(settings.FILE_STORE_PATH, '%s'
                                  % self.experiment1.id))
        self.dest2 = abspath(join(settings.FILE_STORE_PATH, '%s'
                                  % self.experiment2.id))
        if not exists(self.dest1):
            mkdir(self.dest1)
        if not exists(self.dest2):
            mkdir(self.dest2)

        testfile1 = abspath(join(self.dest1, filename))
        f = open(testfile1, 'w')
        f.write("Hello World!\n")
        f.close()

        testfile2 = abspath(join(self.dest2, filename))
        f = open(testfile2, 'w')
        f.write("Hello World!\n")
        f.close()

        self.dataset_file1 = Dataset_File(dataset=self.dataset1,
                                          filename=filename,
                                          protocol='tardis',
                                          url='tardis://%s' % filename)
        self.dataset_file1.save()

        self.dataset_file2 = Dataset_File(dataset=self.dataset2,
                                          filename=basename(filename),
                                          protocol='tardis',
                                          url='tardis://%s' % filename)
        self.dataset_file2.save()
Example #9
0
def _create_test_dataset(nosDatafiles):
    ds_ = Dataset(description='happy snaps of plumage')
    ds_.save()
    for i in range(0, nosDatafiles):
        df_ = Dataset_File(dataset=ds_,
                           url='http://planet-python.org/' + str(_next_id()))
        df_.save()
    ds_.save()
    return ds_
Example #10
0
 def _build(dataset, filename, url, protocol):
     from tardis.tardis_portal.models import \
         Dataset_File, Replica, Location
     datafile = Dataset_File(dataset=dataset, filename=filename)
     datafile.save()
     replica = Replica(datafile=datafile, url=url,
                       protocol=protocol,
                       location=Location.get_default_location())
     replica.save()
     return datafile
Example #11
0
 def _build(dataset, filename, url, protocol):
     from tardis.tardis_portal.models import \
         Dataset_File, Replica, Location
     datafile = Dataset_File(dataset=dataset, filename=filename)
     datafile.save()
     replica = Replica(datafile=datafile, url=url, 
                       protocol=protocol,
                       location=Location.get_default_location())
     replica.save()
     return datafile
Example #12
0
def add_staged_file_to_dataset(rel_filepath, dataset_id, username,
                               mimetype="application/octet-stream"):
    """
    add file in user's staging path to a dataset
    may be replaced by main code functions.
    quick and dirty hack to get it working
    """
    originfilepath = os.path.join(get_full_staging_path(username), rel_filepath)
    dataset = Dataset.objects.get(pk=dataset_id)
    newDatafile = Dataset_File()
    newDatafile.dataset = dataset
    newDatafile.size = os.path.getsize(originfilepath)
    newDatafile.protocol = "tardis"
    newDatafile.mimetype = mimetype
    file_dir = "/" + str(dataset.experiment.id) + "/" + str(dataset.id) + "/"
    file_path = file_dir + rel_filepath
    prelim_full_file_path = settings.FILE_STORE_PATH + file_path
    full_file_path = duplicate_file_check_rename(prelim_full_file_path)
    newDatafile.filename = os.path.basename(full_file_path)
    newDatafile.url = "%s://%s" % (newDatafile.protocol,
                                   full_file_path[
            len(settings.FILE_STORE_PATH) + len(file_dir):])
    if not os.path.exists(os.path.dirname(full_file_path)):
        os.makedirs(os.path.dirname(full_file_path))
    shutil.move(originfilepath, full_file_path)
    newDatafile.save()
Example #13
0
def _create_test_dataset(nosDatafiles):
    ds_ = Dataset(description='happy snaps of plumage')
    ds_.save()
    for i in range (0, nosDatafiles) :
        df_ = Dataset_File(dataset=ds_, size='21', sha512sum='bogus')
        df_.save()
        rep_ = Replica(datafile=df_,
                       url='http://planet-python.org/' + str(_next_id()),
                       location=Location.get_default_location())
        rep_.save()
    ds_.save()
    return ds_
Example #14
0
def _create_test_dataset(nosDatafiles):
    ds_ = Dataset(description='happy snaps of plumage')
    ds_.save()
    for i in range(0, nosDatafiles):
        df_ = Dataset_File(dataset=ds_, size='21', sha512sum='bogus')
        df_.save()
        rep_ = Replica(datafile=df_,
                       url='http://planet-python.org/' + str(_next_id()),
                       location=Location.get_default_location())
        rep_.save()
    ds_.save()
    return ds_
Example #15
0
    def testRemoteFile(self):
        content = urandom(1024)
        with NamedTemporaryFile() as f:
            # Create new Datafile
            datafile = Dataset_File(dataset=self.dataset)
            datafile.filename = 'background_task_testfile'
            datafile.size = len(content)
            datafile.sha512sum = hashlib.sha512(content).hexdigest()
            datafile.url = 'file://' + path.abspath(f.name)
            datafile.save()

            def get_datafile(datafile):
                return Dataset_File.objects.get(id=datafile.id)

            # Check that it won't verify as it stands
            expect(get_datafile(datafile).verified).to_be(False)
            verify_files()
            expect(get_datafile(datafile).verified).to_be(False)
            expect(get_datafile(datafile).is_local()).to_be(False)

            # Fill in the content
            f.write(content)
            f.flush()

            # Check it now verifies
            verify_files()
            expect(get_datafile(datafile).verified).to_be(True)
            expect(get_datafile(datafile).is_local()).to_be(True)
Example #16
0
def create_staging_datafile(filepath, username, dataset_id):
    dataset = Dataset.objects.get(id=dataset_id)

    url, size = get_staging_url_and_size(username, filepath)
    datafile = Dataset_File(dataset=dataset,
                            filename=path.basename(filepath),
                            size=size)
    replica = Replica(datafile=datafile,
                      protocol='staging',
                      url=url,
                      location=Location.get_location('staging'))
    replica.verify(allowEmptyChecksums=True)
    datafile.save()
    replica.datafile = datafile
    replica.save()
Example #17
0
def create_staging_datafile(filepath, username, dataset_id):
    from tardis.tardis_portal.models import Dataset_File, Dataset, Replica, \
        Location
    dataset = Dataset.objects.get(id=dataset_id)

    url, size = get_staging_url_and_size(username, filepath)
    datafile = Dataset_File(dataset=dataset,
                            filename=path.basename(filepath),
                            size=size)
    replica = Replica(datafile=datafile,
                      protocol='staging',
                      url=url,
                      location=Location.get_location('staging'))
    replica.verify(allowEmptyChecksums=True)
    datafile.save()
    replica.datafile = datafile
    replica.save()
Example #18
0
    def testRemoteFile(self):
            content = urandom(1024)
            with NamedTemporaryFile() as f:
                # Create new Datafile
                datafile = Dataset_File(dataset=self.dataset)
                datafile.filename = 'background_task_testfile'
                datafile.size = len(content)
                datafile.sha512sum = hashlib.sha512(content).hexdigest()
                datafile.url = 'file://' + path.abspath(f.name)
                datafile.save()

                def get_datafile(datafile):
                    return Dataset_File.objects.get(id=datafile.id)

                # Check that it won't verify as it stands
                expect(get_datafile(datafile).verified).to_be(False)
                verify_files()
                expect(get_datafile(datafile).verified).to_be(False)
                expect(get_datafile(datafile).is_local()).to_be(False)


                # Fill in the content
                f.write(content)
                f.flush()

                # Check it now verifies
                verify_files()
                expect(get_datafile(datafile).verified).to_be(True)
                expect(get_datafile(datafile).is_local()).to_be(True)
Example #19
0
    def process_enclosure(self, dataset, enclosure):
        filename = getattr(enclosure, 'title', basename(enclosure.href))
        datafile = Dataset_File(filename=filename, dataset=dataset)
        try:
            datafile.mimetype = enclosure.mime
        except AttributeError:
            pass
        try:
            datafile.size = enclosure.length
        except AttributeError:
            pass
        try:
            hash = enclosure.hash
            # Split on white space, then ':' to get tuples to feed into dict
            hashdict = dict([s.partition(':')[::2] for s in hash.split()])
            # Set SHA-512 sum
            datafile.sha512sum = hashdict['sha-512']
        except AttributeError:
            pass
        datafile.save()
        url = enclosure.href
        # This means we will allow the atom feed to feed us any enclosure
        # URL that matches a registered location.  Maybe we should restrict
        # this to a specific location.
        location = Location.get_location_for_url(url)
        if not location:
            logger.error('Rejected ingestion for unknown location %s' % url)
            return

        replica = Replica(datafile=datafile, url=url, location=location)
        replica.protocol = enclosure.href.partition('://')[0]
        replica.save()
        self.make_local_copy(replica)
Example #20
0
def aadd_staged_file_to_dataset(rel_filepath, dataset_id, username,
                               mimetype="application/octet-stream"):
    """
    add file in user's staging path to a dataset
    may be replaced by main code functions.
    quick and dirty hack to get it working
    """
    originfilepath = os.path.join(get_full_staging_path(username),
                                  rel_filepath)
    dataset = Dataset.objects.get(pk=dataset_id)

    newDatafile = Dataset_File(
        dataset=dataset,
        url=originfilepath,
        protocol="staging",
        mimetype=mimetype,
        )
    newDatafile.save()
Example #21
0
    def test_hrmc_filter(self):
        """
           Make an experiment, lood up grexp file and check
           dataset schema missing, then loadup grfinal and check dataset schema
           created
        """
        user = _create_test_user()
        license = _create_license()
        exp = _create_test_experiment(user, license)
        ds = Dataset(description='happy snaps of plumage')
        ds.save()
        _create_test_dataset(ds, exp.id, {
            "output.dat": 'hello',
            "grexp.dat": '2 5\n6 15\n'
        })
        ds.experiments.add(exp)
        ds.save()

        sch = Schema(namespace=self.HRMCSCHEMA,
                     name="hrmc_views",
                     type=Schema.DATASET)
        sch.save()

        param = ParameterName(schema=sch,
                              name="plot",
                              full_name="scatterplot",
                              units="image",
                              data_type=ParameterName.FILENAME)
        param.save()

        param_sets = get_param_sets(ds)
        self.assertEquals(list(param_sets), [])

        _create_test_dataset(ds, exp.id, {'grfinal21.dat': "1 3\n5 14\n"})

        df2 = Dataset_File(dataset=ds, url='path/grfinal21.dat')
        df2.save()

        h = hrmc.HRMCOutput('HRMC', self.HRMCSCHEMA)
        h(sender=Dataset_File, instance=df2)

        param_sets = get_param_sets(ds)
        self.assertEquals([x.schema.namespace for x in param_sets],
                          [self.HRMCSCHEMA])
Example #22
0
def _create_datafile():
    user = User.objects.create_user('testuser', '*****@*****.**', 'pwd')
    user.save()
    UserProfile(user=user).save()

    Location.force_initialize()

    full_access = Experiment.PUBLIC_ACCESS_FULL
    experiment = Experiment.objects.create(title="IIIF Test",
                                           created_by=user,
                                           public_access=full_access)
    experiment.save()
    ObjectACL(content_object=experiment,
              pluginId='django_user',
              entityId=str(user.id),
              isOwner=True,
              canRead=True,
              canWrite=True,
              canDelete=True,
              aclOwnershipType=ObjectACL.OWNER_OWNED).save()
    dataset = Dataset()
    dataset.save()
    dataset.experiments.add(experiment)
    dataset.save()

    # Create new Datafile
    tempfile = TemporaryUploadedFile('iiif_stored_file', None, None, None)
    with Image(filename='magick:rose') as img:
        img.format = 'tiff'
        img.save(file=tempfile.file)
        tempfile.file.flush()
    datafile = Dataset_File(dataset=dataset,
                            size=os.path.getsize(tempfile.file.name),
                            filename='iiif_named_file')
    replica = Replica(datafile=datafile,
                      url=write_uploaded_file_to_dataset(dataset, tempfile),
                      location=Location.get_default_location())
    replica.verify(allowEmptyChecksums=True)
    datafile.save()
    replica.datafile = datafile
    replica.save()
    return datafile
Example #23
0
def _create_datafile():
    user = User.objects.create_user("testuser", "*****@*****.**", "pwd")
    user.save()
    UserProfile(user=user).save()

    Location.force_initialize()

    full_access = Experiment.PUBLIC_ACCESS_FULL
    experiment = Experiment.objects.create(title="IIIF Test", created_by=user, public_access=full_access)
    experiment.save()
    ObjectACL(
        content_object=experiment,
        pluginId="django_user",
        entityId=str(user.id),
        isOwner=True,
        canRead=True,
        canWrite=True,
        canDelete=True,
        aclOwnershipType=ObjectACL.OWNER_OWNED,
    ).save()
    dataset = Dataset()
    dataset.save()
    dataset.experiments.add(experiment)
    dataset.save()

    # Create new Datafile
    tempfile = TemporaryUploadedFile("iiif_stored_file", None, None, None)
    with Image(filename="magick:rose") as img:
        img.format = "tiff"
        img.save(file=tempfile.file)
        tempfile.file.flush()
    datafile = Dataset_File(dataset=dataset, size=os.path.getsize(tempfile.file.name), filename="iiif_named_file")
    replica = Replica(
        datafile=datafile,
        url=write_uploaded_file_to_dataset(dataset, tempfile),
        location=Location.get_default_location(),
    )
    replica.verify(allowEmptyChecksums=True)
    datafile.save()
    replica.datafile = datafile
    replica.save()
    return datafile
Example #24
0
    def setUp(self):
        """
        setting up essential objects, copied from tests above
        """
        user = '******'
        pwd = 'secret'
        email = ''
        self.user = User.objects.create_user(user, email, pwd)
        self.userProfile = UserProfile(user=self.user).save()
        self.exp = Experiment(title='test exp1',
                              institution_name='monash',
                              created_by=self.user)
        self.exp.save()
        self.acl = ObjectACL(
            pluginId=django_user,
            entityId=str(self.user.id),
            content_object=self.exp,
            canRead=True,
            isOwner=True,
            aclOwnershipType=ObjectACL.OWNER_OWNED,
        )
        self.acl.save()
        self.dataset = Dataset(description='dataset description...')
        self.dataset.save()
        self.dataset.experiments.add(self.exp)
        self.dataset.save()

        self.dataset_file = Dataset_File(dataset=self.dataset,
                                         size=42,
                                         filename="foo",
                                         md5sum="junk")
        self.dataset_file.save()

        self.testschema = Schema(namespace="http://test.com/test/schema",
                                 name="Test View",
                                 type=Schema.DATAFILE,
                                 hidden=True)
        self.testschema.save()
        self.dfps = DatafileParameterSet(dataset_file=self.dataset_file,
                                         schema=self.testschema)
        self.dfps.save()
Example #25
0
def fpupload(request, dataset_id):
    """
    Uploads all files picked by filepicker to the dataset

    :param request: a HTTP Request instance
    :type request: :class:`django.http.HttpRequest`
    :param dataset_id: the dataset_id
    :type dataset_id: integer
    :returns: boolean true if successful
    :rtype: bool
    """

    dataset = Dataset.objects.get(id=dataset_id)
    logger.debug('called fpupload')

    if request.method == 'POST':
        logger.debug('got POST')
        for key, val in request.POST.items():
            splits = val.split(",")
            for url in splits:
                try:
                    fp = FilepickerFile(url)
                except ValueError:
                    pass
                else:
                    picked_file = fp.get_file()
                    filepath = write_uploaded_file_to_dataset(dataset,
                                                              picked_file)
                    datafile = Dataset_File(dataset=dataset,
                                            filename=picked_file.name,
                                            size=picked_file.size)
                    replica = Replica(datafile=datafile,
                                      url=filepath,
                                      protocol='',
                                      location=Location.get_default_location())
                    replica.verify(allowEmptyChecksums=True)
                    datafile.save()
                    replica.datafile = datafile
                    replica.save()

    return HttpResponse(json.dumps({"result": True}))
Example #26
0
def fpupload(request, dataset_id):
    """
    Uploads all files picked by filepicker to the dataset

    :param request: a HTTP Request instance
    :type request: :class:`django.http.HttpRequest`
    :param dataset_id: the dataset_id
    :type dataset_id: integer
    :returns: boolean true if successful
    :rtype: bool
    """

    dataset = Dataset.objects.get(id=dataset_id)
    logger.debug('called fpupload')

    if request.method == 'POST':
        logger.debug('got POST')
        for key, val in request.POST.items():
            splits = val.split(",")
            for url in splits:
                try:
                    fp = FilepickerFile(url)
                except ValueError:
                    pass
                else:
                    picked_file = fp.get_file()
                    filepath = write_uploaded_file_to_dataset(
                        dataset, picked_file)
                    datafile = Dataset_File(dataset=dataset,
                                            filename=picked_file.name,
                                            size=picked_file.size)
                    replica = Replica(datafile=datafile,
                                      url=filepath,
                                      protocol='',
                                      location=Location.get_default_location())
                    replica.verify(allowEmptyChecksums=True)
                    datafile.save()
                    replica.datafile = datafile
                    replica.save()

    return HttpResponse(json.dumps({"result": True}))
Example #27
0
 def _build_datafile(self,
                     testfile,
                     filename,
                     dataset,
                     url,
                     protocol='',
                     checksum=None,
                     size=None,
                     mimetype=''):
     filesize, sha512sum = get_size_and_sha512sum(testfile)
     datafile = Dataset_File(
         dataset=dataset,
         filename=filename,
         mimetype=mimetype,
         size=str(size if size != None else filesize),
         sha512sum=(checksum if checksum else sha512sum))
     datafile.save()
     if urlparse.urlparse(url).scheme == '':
         location = Location.get_location('local')
     else:
         location = Location.get_location_for_url(url)
         if not location:
             location = Location.load_location({
                 'name':
                 filename,
                 'url':
                 urlparse.urljoin(url, '.'),
                 'type':
                 'external',
                 'priority':
                 10,
                 'transfer_provider':
                 'local'
             })
     replica = Replica(datafile=datafile,
                       protocol=protocol,
                       url=url,
                       location=location)
     replica.verify()
     replica.save()
     return Dataset_File.objects.get(pk=datafile.pk)
        def create_datafile(file_path):
            testfile = path.join(path.dirname(__file__), 'fixtures', file_path)

            size, sha512sum = get_size_and_sha512sum(testfile)

            datafile = Dataset_File(dataset=dataset,
                                    filename=path.basename(testfile),
                                    size=size,
                                    sha512sum=sha512sum)
            datafile.save()
            base_url = 'file://' + path.abspath(path.dirname(testfile))
            location = Location.load_location({
                'name': 'test-flexstation', 'url': base_url, 'type': 'external',
                'priority': 10, 'transfer_provider': 'local'})
            replica = Replica(datafile=datafile,
                              url='file://'+ path.abspath(testfile),
                              protocol='file',
                              location=location)
            replica.verify()
            replica.save()
            return Dataset_File.objects.get(pk=datafile.pk)
    def test_hrmc_filter(self):
        """
           Make an experiment, lood up grexp file and check
           dataset schema missing, then loadup grfinal and check dataset schema
           created
        """
        user = _create_test_user()
        license = _create_license()
        exp = _create_test_experiment(user, license)
        ds = Dataset(description='happy snaps of plumage')
        ds.save()
        _create_test_dataset(ds, exp.id,
            {"output.dat": 'hello', "grexp.dat": '2 5\n6 15\n'})
        ds.experiments.add(exp)
        ds.save()

        sch = Schema(namespace=self.HRMCSCHEMA,
            name="hrmc_views", type=Schema.DATASET)
        sch.save()

        param = ParameterName(schema=sch, name="plot",
            full_name="scatterplot", units="image",
            data_type=ParameterName.FILENAME
            )
        param.save()

        param_sets = get_param_sets(ds)
        self.assertEquals(list(param_sets), [])

        _create_test_dataset(ds, exp.id, {'grfinal21.dat': "1 3\n5 14\n"})

        df2 = Dataset_File(dataset=ds, url='path/grfinal21.dat')
        df2.save()

        h = hrmc.HRMCOutput('HRMC', self.HRMCSCHEMA)
        h(sender=Dataset_File, instance=df2)

        param_sets = get_param_sets(ds)
        self.assertEquals([x.schema.namespace for x in param_sets],
            [self.HRMCSCHEMA])
Example #30
0
    def testLocalFile(self):
        content = urandom(1024)
        cf = ContentFile(content, 'background_task_testfile')

        # Create new Datafile
        datafile = Dataset_File(dataset=self.dataset)
        datafile.filename = cf.name
        datafile.size = len(content)
        datafile.sha512sum = hashlib.sha512(content).hexdigest()
        datafile.save()
        replica = Replica(datafile=datafile,
                          url=write_uploaded_file_to_dataset(self.dataset, cf),
                          location=Location.get_default_location())
        replica.save()

        def get_replica(datafile):
            return Replica.objects.get(datafile=datafile)

        # undo auto-verify:
        replica.verified = False
        replica.save(update_fields=['verified'])

        # Check that it's not currently verified
        expect(get_replica(datafile).verified).to_be(False)

        # Check it verifies
        verify_files()
        expect(get_replica(datafile).verified).to_be(True)
Example #31
0
    def setUp(self):
        """
        setting up essential objects, copied from tests above
        """
        Location.force_initialize()
        self.location = Location.get_location('local')

        user = '******'
        pwd = 'secret'
        email = ''
        self.user = User.objects.create_user(user, email, pwd)
        self.userProfile = UserProfile(user=self.user).save()
        self.exp = Experiment(title='test exp1',
                              institution_name='monash',
                              created_by=self.user)
        self.exp.save()
        self.acl = ObjectACL(
            pluginId=django_user,
            entityId=str(self.user.id),
            content_object=self.exp,
            canRead=True,
            isOwner=True,
            aclOwnershipType=ObjectACL.OWNER_OWNED,
        )
        self.acl.save()
        self.dataset = Dataset(description='dataset description...')
        self.dataset.save()
        self.dataset.experiments.add(self.exp)
        self.dataset.save()

        self.dataset_file = Dataset_File(dataset=self.dataset,
                                         size=42,
                                         filename="foo",
                                         md5sum="junk")
        self.dataset_file.save()
        self.replica = Replica(datafile=self.dataset_file,
                               url="http://foo",
                               location=self.location,
                               verified=False)
        self.replica.save()
Example #32
0
    def process_enclosure(self, dataset, enclosure):
        filename = getattr(enclosure, 'title', basename(enclosure.href))
        datafile = Dataset_File(filename=filename, dataset=dataset)
        try:
            datafile.mimetype = enclosure.mime
        except AttributeError:
            pass
        try:
            datafile.size = enclosure.length
        except AttributeError:
            pass
        try:
            hash = enclosure.hash
            # Split on white space, then ':' to get tuples to feed into dict
            hashdict = dict([s.partition(':')[::2] for s in hash.split()])
            # Set SHA-512 sum
            datafile.sha512sum = hashdict['sha-512']
        except AttributeError:
            pass
        datafile.save()
        url = enclosure.href
        # This means we will allow the atom feed to feed us any enclosure
        # URL that matches a registered location.  Maybe we should restrict
        # this to a specific location.
        location = Location.get_location_for_url(url)
        if not location:
            logger.error('Rejected ingestion for unknown location %s' % url)
            return

        replica = Replica(datafile=datafile, url=url,
                          location=location)
        replica.protocol = enclosure.href.partition('://')[0]
        replica.save()
        self.make_local_copy(replica)
Example #33
0
 def _build_datafile(self, testfile, filename, dataset, url, 
                     protocol='', checksum=None, size=None, mimetype=''):
     filesize, sha512sum = get_size_and_sha512sum(testfile)
     datafile = Dataset_File(dataset=dataset, filename=filename,
                             mimetype=mimetype,
                             size=str(size if size != None else filesize), 
                             sha512sum=(checksum if checksum else sha512sum))
     datafile.save()
     if urlparse.urlparse(url).scheme == '':
         location = Location.get_location('local')
     else:
         location = Location.get_location_for_url(url)
         if not location:
             location = Location.load_location({
                 'name': filename, 'url': urlparse.urljoin(url, '.'), 
                 'type': 'external', 
                 'priority': 10, 'transfer_provider': 'local'})
     replica = Replica(datafile=datafile, protocol=protocol, url=url,
                       location=location)
     replica.verify()
     replica.save()
     return Dataset_File.objects.get(pk=datafile.pk)
Example #34
0
 def _make_dataset(self, exp, filenames):
     dataset = Dataset(experiment=exp)
     dataset.save()
     for filename in filenames:
         df = Dataset_File(dataset=dataset, size=41, protocol='file')
         df.filename = filename
         df.url = 'file://' + path.join(path.dirname(__file__), 'data',
                                        df.filename)
         df.save()
Example #35
0
        def create_datafile(index):
            testfile = path.join(path.dirname(__file__), 'fixtures',
                                 'middleware_test%d.txt' % index)

            size, sha512sum = get_size_and_sha512sum(testfile)

            datafile = Dataset_File(dataset=dataset,
                                    filename=path.basename(testfile),
                                    size=size,
                                    sha512sum=sha512sum)
            datafile.save()
            base_url = 'file://' + path.abspath(path.dirname(testfile))
            location = Location.load_location({
                'name': 'test-middleware', 'url': base_url, 'type': 'external',
                'priority': 10, 'transfer_provider': 'local'})
            replica = Replica(datafile=datafile,
                              url='file://'+path.abspath(testfile),
                              protocol='file',
                              location=location)
            replica.save()
            if index != 1:
                replica.verified = False
                replica.save(update_fields=['verified'])
            return Dataset_File.objects.get(pk=datafile.pk)
def _create_test_dataset(ds, exp_id, fnames):
    for fname, contents in fnames.items():
        dest = os.path.abspath(os.path.join(settings.FILE_STORE_PATH, '%s/%s/'
                                  % (exp_id,
                                  ds.id)))
        if not os.path.exists(dest):
            os.makedirs(dest)
        testfile = os.path.abspath(os.path.join(dest, fname))
        with open(testfile, "w+b") as f:
            f.write(contents)

        size, sha512sum = get_size_and_sha512sum(testfile)
        dataset_file = Dataset_File(dataset=ds,
                                          filename=fname,
                                          protocol='',
                                          size=size,
                                          sha512sum=sha512sum,
                                          url='%d/%d/%s'
                                              % (exp_id,
                                                 ds.id,
                                                 fname))
        dataset_file.verify()
        dataset_file.save()
    return ds
Example #37
0
    def testLocalFile(self):
        content = urandom(1024)
        cf = ContentFile(content, 'background_task_testfile')

        # Create new Datafile
        datafile = Dataset_File(dataset=self.dataset)
        datafile.filename = cf.name
        datafile.size = len(content)
        datafile.sha512sum = hashlib.sha512(content).hexdigest()
        datafile.url = write_uploaded_file_to_dataset(self.dataset, cf)
        datafile.save()

        def get_datafile(datafile):
            return Dataset_File.objects.get(id=datafile.id)

        # Check that it's not currently verified
        expect(get_datafile(datafile).verified).to_be(False)

        # Check it verifies
        verify_files()
        expect(get_datafile(datafile).verified).to_be(True)
Example #38
0
        def create_datafile(index):
            testfile = path.join(path.dirname(__file__), 'fixtures',
                                 'jeol_sem_test%d.txt' % index)

            size, sha512sum = get_size_and_sha512sum(testfile)

            datafile = Dataset_File(dataset=dataset,
                                    filename=path.basename(testfile),
                                    url='file://'+path.abspath(testfile),
                                    protocol='file',
                                    size=size,
                                    sha512sum=sha512sum)
            datafile.verify()
            datafile.save()
            return datafile
Example #39
0
    def setUp(self):
        """
        setting up essential objects, copied from tests above
        """
        user = '******'
        pwd = 'secret'
        email = ''
        self.user = User.objects.create_user(user, email, pwd)
        self.userProfile = UserProfile(user=self.user).save()
        self.exp = Experiment(title='test exp1',
                              institution_name='monash', created_by=self.user)
        self.exp.save()
        self.acl = ObjectACL(
            pluginId=django_user,
            entityId=str(self.user.id),
            content_object=self.exp,
            canRead=True,
            isOwner=True,
            aclOwnershipType=ObjectACL.OWNER_OWNED,
        )
        self.acl.save()
        self.dataset = Dataset(description='dataset description...')
        self.dataset.save()
        self.dataset.experiments.add(self.exp)
        self.dataset.save()

        self.dataset_file = Dataset_File(dataset=self.dataset,
                                         size=42, filename="foo",
                                         md5sum="junk")
        self.dataset_file.save()

        self.testschema = Schema(namespace="http://test.com/test/schema",
                                 name="Test View",
                                 type=Schema.DATAFILE,
                                 hidden=True)
        self.testschema.save()
        self.dfps = DatafileParameterSet(dataset_file=self.dataset_file,
                                         schema=self.testschema)
        self.dfps.save()
Example #40
0
    def testLocalFile(self):
        content = urandom(1024)
        cf = ContentFile(content, 'background_task_testfile')

        # Create new Datafile
        datafile = Dataset_File(dataset=self.dataset)
        datafile.filename = cf.name
        datafile.size = len(content)
        datafile.sha512sum = hashlib.sha512(content).hexdigest()
        datafile.url = write_uploaded_file_to_dataset(self.dataset, cf)
        datafile.save()

        def get_datafile(datafile):
            return Dataset_File.objects.get(id=datafile.id)

        # Check that it's not currently verified
        expect(get_datafile(datafile).verified).to_be(False)

        # Check it verifies
        verify_files()
        expect(get_datafile(datafile).verified).to_be(True)
Example #41
0
    def setUp(self):
        """
        setting up essential objects, copied from tests above
        """
        Location.force_initialize()
        self.location = Location.get_location('local')

        user = '******'
        pwd = 'secret'
        email = ''
        self.user = User.objects.create_user(user, email, pwd)
        self.userProfile = UserProfile(user=self.user).save()
        self.exp = Experiment(title='test exp1',
                              institution_name='monash', created_by=self.user)
        self.exp.save()
        self.acl = ObjectACL(
            pluginId=django_user,
            entityId=str(self.user.id),
            content_object=self.exp,
            canRead=True,
            isOwner=True,
            aclOwnershipType=ObjectACL.OWNER_OWNED,
        )
        self.acl.save()
        self.dataset = Dataset(description='dataset description...')
        self.dataset.save()
        self.dataset.experiments.add(self.exp)
        self.dataset.save()

        self.dataset_file = Dataset_File(dataset=self.dataset,
                                         size=42, filename="foo",
                                         md5sum="junk")
        self.dataset_file.save()
        self.replica = Replica(datafile=self.dataset_file,
                               url="http://foo",
                               location=self.location,
                               verified=False)
        self.replica.save()
Example #42
0
    def testRemoteFile(self):
            content = urandom(1024)
            with NamedTemporaryFile() as f:
                # Create new Datafile
                datafile = Dataset_File(dataset=self.dataset)
                datafile.filename = 'background_task_testfile'
                datafile.size = len(content)
                datafile.sha512sum = hashlib.sha512(content).hexdigest()
                datafile.save()
                url = 'file://' + path.abspath(f.name)
                base_url = 'file://' + path.dirname(path.abspath(f.name))
                location = self._get_or_create_local_location(
                    'test-staging-xxx', base_url, 'external', 10)
                replica = Replica(datafile=datafile, location=location, url=url)
                replica.save()

                def get_replica(replica):
                    try:
                        return Replica.objects.get(id=replica.id)
                    except Replica.DoesNotExist:
                        return None

                def get_new_replica(datafile):
                    location = Location.get_default_location()
                    return Replica.objects.get(datafile=datafile.id,
                                               location=location)

                # Check that it won't verify as it stands
                expect(get_replica(replica).verified).to_be(False)
                verify_files()
                expect(get_replica(replica).verified).to_be(False)
                expect(get_replica(replica).is_local()).to_be(False)

                # Fill in the content
                f.write(content)
                f.flush()

                # Check it now verifies
                verify_files()
                expect(get_replica(replica).id).to_be(
                    get_new_replica(datafile).id)
                expect(get_new_replica(datafile).verified).to_be(True)
                expect(get_new_replica(datafile).is_local()).to_be(True)
Example #43
0
def _create_test_dataset(ds, exp_id, fnames):
    for fname, contents in fnames.items():
        dest = os.path.abspath(
            os.path.join(settings.FILE_STORE_PATH, '%s/%s/' % (exp_id, ds.id)))
        if not os.path.exists(dest):
            os.makedirs(dest)
        testfile = os.path.abspath(os.path.join(dest, fname))
        with open(testfile, "w+b") as f:
            f.write(contents)

        size, sha512sum = get_size_and_sha512sum(testfile)
        dataset_file = Dataset_File(dataset=ds,
                                    filename=fname,
                                    protocol='',
                                    size=size,
                                    sha512sum=sha512sum,
                                    url='%d/%d/%s' % (exp_id, ds.id, fname))
        dataset_file.verify()
        dataset_file.save()
    return ds
Example #44
0
 def process_enclosure(self, dataset, enclosure):
     filename = getattr(enclosure, 'title', basename(enclosure.href))
     datafile = Dataset_File(url=enclosure.href, \
                             filename=filename, \
                             dataset=dataset)
     datafile.protocol = enclosure.href.partition('://')[0]
     try:
         datafile.mimetype = enclosure.mime
     except AttributeError:
         pass
     try:
         datafile.size = enclosure.length
     except AttributeError:
         pass
     try:
         hash = enclosure.hash
         # Split on white space, then ':' to get tuples to feed into dict
         hashdict = dict([s.partition(':')[::2] for s in hash.split()])
         # Set SHA-512 sum
         datafile.sha512sum = hashdict['sha-512']
     except AttributeError:
         pass
     datafile.save()
     self.make_local_copy(datafile)
Example #45
0
class DownloadTestCase(TestCase):

    def setUp(self):
        # create a test user
        self.user = User.objects.create_user(username='******',
                                             email='',
                                             password='******')

        # create a public experiment
        self.experiment1 = Experiment(title='Experiment 1',
                                      created_by=self.user,
                                      public_access=Experiment.PUBLIC_ACCESS_FULL)
        self.experiment1.save()

        # create a non-public experiment
        self.experiment2 = Experiment(title='Experiment 2',
                                      created_by=self.user,
                                      public_access=Experiment.PUBLIC_ACCESS_NONE)
        self.experiment2.save()

        # dataset1 belongs to experiment1
        self.dataset1 = Dataset()
        self.dataset1.save()
        self.dataset1.experiments.add(self.experiment1)
        self.dataset1.save()


        # dataset2 belongs to experiment2
        self.dataset2 = Dataset()
        self.dataset2.save()
        self.dataset2.experiments.add(self.experiment2)
        self.dataset2.save()

        # absolute path first
        filename1 = 'testfile.txt'
        filename2 = 'testfile.tiff'
        self.dest1 = abspath(join(settings.FILE_STORE_PATH, '%s/%s/'
                                  % (self.experiment1.id,
                                  self.dataset1.id)))
        self.dest2 = abspath(join(settings.FILE_STORE_PATH,
                                '%s/%s/'
                                  % (self.experiment2.id,
                                  self.dataset2.id)))
        if not exists(self.dest1):
            makedirs(self.dest1)
        if not exists(self.dest2):
            makedirs(self.dest2)

        testfile1 = abspath(join(self.dest1, filename1))
        f = open(testfile1, 'w')
        f.write("Hello World!\n")
        f.close()

        testfile2 = abspath(join(self.dest2, filename2))
        _generate_test_image(testfile2)

        size, sha512sum = get_size_and_sha512sum(testfile1)
        self.dataset_file1 = Dataset_File(dataset=self.dataset1,
                                          filename=filename1,
                                          protocol='',
                                          size=size,
                                          sha512sum=sha512sum,
                                          url='%d/%d/%s'
                                              % (self.experiment1.id,
                                                 self.dataset1.id,
                                                 filename1))
        self.dataset_file1.verify()
        self.dataset_file1.save()

        size, sha512sum = get_size_and_sha512sum(testfile2)
        self.dataset_file2 = Dataset_File(dataset=self.dataset2,
                                          filename=basename(filename2),
                                          protocol='',
                                          size=size,
                                          sha512sum=sha512sum,
                                          url='%d/%d/%s'
                                            % (self.experiment2.id,
                                               self.dataset2.id,
                                               filename2))
        self.dataset_file2.verify()
        self.dataset_file2.save()

    def tearDown(self):
        self.user.delete()
        self.experiment1.delete()
        self.experiment2.delete()
        rmtree(self.dest1)
        rmtree(self.dest2)

    def testView(self):
        client = Client()

        # check view of file1
        response = client.get('/datafile/view/%i/' % self.dataset_file1.id)

        self.assertEqual(response['Content-Disposition'],
                         'inline; filename="%s"'
                         % self.dataset_file1.filename)
        self.assertEqual(response.status_code, 200)
        self.assertEqual(response.content, 'Hello World!\n')

        # check view of file2
        response = client.get('/datafile/view/%i/' % self.dataset_file2.id)
        # Should be forbidden
        self.assertEqual(response.status_code, 403)

        self.experiment2.public_access=Experiment.PUBLIC_ACCESS_FULL
        self.experiment2.save()
        # check view of file2 again
        response = client.get('/datafile/view/%i/' % self.dataset_file2.id)
        self.assertEqual(response.status_code, 200)

        # The following behaviour relies on ImageMagick
        if IMAGEMAGICK_AVAILABLE:
            # file2 should have a ".png" filename
            self.assertEqual(response['Content-Disposition'],
                             'inline; filename="%s"'
                             % (self.dataset_file2.filename+'.png'))
            # file2 should be a PNG
            self.assertEqual(response['Content-Type'], 'image/png')
            png_signature = "\x89PNG\r\n\x1a\n"
            self.assertEqual(response.content[0:8], png_signature)
        else:
            # file2 should have a ".tiff" filename
            self.assertEqual(response['Content-Disposition'],
                             'inline; filename="%s"'
                             % (self.dataset_file2.filename))
            # file2 should be a TIFF
            self.assertEqual(response['Content-Type'], 'image/tiff')
            tiff_signature = "II\x2a\x00"
            self.assertEqual(response.content[0:4], tiff_signature)

    def _check_tar_file(self, content, rootdir, datafiles):
        # It should be a zip file
        with NamedTemporaryFile('w') as tempfile:
            tempfile.write(content)
            tempfile.flush()
            with open(tempfile.name, 'r') as zipread:
                # It should be a zip file (all of which start with "PK")
                expect(zipread.read(2)).to_equal('PK')
            expect(is_zipfile(tempfile.name)).to_be_truthy()
            with ZipFile(tempfile.name, 'r') as zf:
                expect(len(zf.namelist())).to_equal(len(datafiles))
                for df in datafiles:
                    filename = join(rootdir, str(df.dataset.id), df.filename)
                    expect(filename in zf.namelist()).to_be_truthy()

    def _check_zip_file(self, content, rootdir, datafiles):
        # It should be a zip file
        with NamedTemporaryFile('w') as tempfile:
            tempfile.write(content)
            tempfile.flush()
            with open(tempfile.name, 'r') as zipread:
                # It should be a zip file (all of which start with "PK")
                expect(zipread.read(2)).to_equal('PK')
            expect(is_zipfile(tempfile.name)).to_be_truthy()
            zf = ZipFile(tempfile.name, 'r')
            expect(len(zf.namelist())).to_equal(len(datafiles))
            for df in datafiles:
                filename = join(rootdir, str(df.dataset.id), df.filename)
                expect(filename in zf.namelist()).to_be_truthy()
            zf.close()


    def testDownload(self):
        client = Client()

        # check download for experiment1
        response = client.get('/download/experiment/%i/zip/' % self.experiment1.id)
        self.assertEqual(response['Content-Disposition'],
                         'attachment; filename="experiment%s-complete.zip"'
                         % self.experiment1.id)
        self.assertEqual(response.status_code, 200)
        self._check_zip_file(response.content, str(self.experiment1.id),
                             reduce(lambda x, y: x + y,
                                    [ds.dataset_file_set.all() \
                                     for ds in self.experiment1.datasets.all()]))

        # check download of file1
        response = client.get('/download/datafile/%i/' % self.dataset_file1.id)

        self.assertEqual(response['Content-Disposition'],
                         'attachment; filename="%s"'
                         % self.dataset_file1.filename)
        self.assertEqual(response.status_code, 200)
        self.assertEqual(response.content, 'Hello World!\n')

        # requesting file2 should be forbidden...
        response = client.get('/download/datafile/%i/' % self.dataset_file2.id)
        self.assertEqual(response.status_code, 403)

        # check dataset1 download
        response = client.post('/download/datafiles/',
                               {'expid': self.experiment1.id,
                                'dataset': [self.dataset1.id],
                                'datafile': []})
        self.assertEqual(response.status_code, 200)
        self._check_zip_file(response.content, 'datasets',
                             self.dataset1.dataset_file_set.all())

        # check dataset2 download
        response = client.post('/download/datafiles/',
                               {'expid': self.experiment2.id,
                                'dataset': [self.dataset2.id],
                                'datafile': []})
        self.assertEqual(response.status_code, 403)

        # check datafile1 download via POST
        response = client.post('/download/datafiles/',
                               {'expid': self.experiment1.id,
                                'dataset': [],
                                'datafile': [self.dataset_file1.id]})
        self.assertEqual(response.status_code, 200)
        self._check_zip_file(response.content, 'datasets', [self.dataset_file1])

        # check datafile2 download via POST
        response = client.post('/download/datafiles/',
                               {'expid': self.experiment2.id,
                                'dataset': [],
                                'datafile': [self.dataset_file2.id]})
        self.assertEqual(response.status_code, 403)

        # Check datafile2 download with second experiment to "metadata only"
        self.experiment2.public_access=Experiment.PUBLIC_ACCESS_METADATA
        self.experiment2.save()
        response = client.get('/download/datafile/%i/' % self.dataset_file2.id)
        # Metadata-only means "no file access"!
        self.assertEqual(response.status_code, 403)

        # Check datafile2 download with second experiment to public
        self.experiment2.public_access=Experiment.PUBLIC_ACCESS_FULL
        self.experiment2.save()
        response = client.get('/download/datafile/%i/' % self.dataset_file2.id)
        self.assertEqual(response.status_code, 200)
        # This should be a TIFF (which often starts with "II\x2a\x00")
        self.assertEqual(response['Content-Type'], 'image/tiff')
        self.assertEqual(response.content[0:4], "II\x2a\x00")


    def testDatasetFile(self):

        # check registered text file for physical file meta information
        df = Dataset_File.objects.get(pk=self.dataset_file1.id)

        try:
            from magic import Magic
            self.assertEqual(df.mimetype, 'text/plain; charset=us-ascii')
        except:
            # XXX Test disabled because lib magic can't be loaded
            pass
        self.assertEqual(df.size, str(13))
        self.assertEqual(df.md5sum, '8ddd8be4b179a529afa5f2ffae4b9858')

        # now check a JPG file
        filename = abspath(join(dirname(__file__),
                                '../static/images/ands-logo-hi-res.jpg'))

        dataset = Dataset.objects.get(pk=self.dataset1.id)

        size, sha512sum = get_size_and_sha512sum(filename)
        pdf1 = Dataset_File(dataset=dataset,
                            filename=basename(filename),
                            size=str(size),
                            sha512sum=sha512sum,
                            url='file://%s' % filename,
                            protocol='file')
        pdf1.verify()
        pdf1.save()
        try:
            from magic import Magic
            self.assertEqual(pdf1.mimetype, 'image/jpeg')
        except:
            # XXX Test disabled because lib magic can't be loaded
            pass
        self.assertEqual(pdf1.size, str(14232))
        self.assertEqual(pdf1.md5sum, 'c450d5126ffe3d14643815204daf1bfb')

        # now check that we can override the physical file meta information
        pdf2 = Dataset_File(dataset=dataset,
                            filename=basename(filename),
                            url='file://%s' % filename,
                            protocol='file',
                            mimetype='application/vnd.openxmlformats-officedocument.presentationml.presentation',
                            size=str(0),
                            # Empty string always has the same hash
                            sha512sum='cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e')
        pdf2.save()
        try:
            from magic import Magic
            self.assertEqual(pdf2.mimetype, 'application/vnd.openxmlformats-officedocument.presentationml.presentation')
        except:
            # XXX Test disabled because lib magic can't be loaded
            pass
        self.assertEqual(pdf2.size, str(0))
        self.assertEqual(pdf2.md5sum, '')

        pdf2.mimetype = ''
        pdf2.save()

        try:
            from magic import Magic
            self.assertEqual(pdf2.mimetype, 'application/pdf')
        except:
            # XXX Test disabled because lib magic can't be loaded
            pass
Example #46
0
class DownloadTestCase(TestCase):

    def setUp(self):
        # create a test user
        self.user = User.objects.create_user(username='******',
                                             email='',
                                             password='******')

        # create a public experiment
        self.experiment1 = Experiment(title='Experiment 1',
                                      created_by=self.user,
                                      public=True)
        self.experiment1.save()

        # create a non-public experiment
        self.experiment2 = Experiment(title='Experiment 2',
                                      created_by=self.user,
                                      public=False)
        self.experiment2.save()

        # dataset1 belongs to experiment1
        self.dataset1 = Dataset(experiment=self.experiment1)
        self.dataset1.save()

        # dataset2 belongs to experiment2
        self.dataset2 = Dataset(experiment=self.experiment2)
        self.dataset2.save()

        # absolute path first
        filename = 'testfile.txt'
        self.dest1 = abspath(join(settings.FILE_STORE_PATH, '%s/%s/'
                                  % (self.experiment1.id,
                                  self.dataset1.id)))
        self.dest2 = abspath(join(settings.FILE_STORE_PATH,
                                '%s/%s/'
                                  % (self.experiment2.id,
                                  self.dataset2.id)))
        if not exists(self.dest1):
            makedirs(self.dest1)
        if not exists(self.dest2):
            makedirs(self.dest2)

        testfile1 = abspath(join(self.dest1, filename))
        f = open(testfile1, 'w')
        f.write("Hello World!\n")
        f.close()

        testfile2 = abspath(join(self.dest2, filename))
        f = open(testfile2, 'w')
        f.write("Hello World!\n")
        f.close()

        self.dataset_file1 = Dataset_File(dataset=self.dataset1,
                                          filename=filename,
                                          protocol='tardis',
                                          url='tardis://%s' % filename)
        self.dataset_file1.save()

        self.dataset_file2 = Dataset_File(dataset=self.dataset2,
                                          filename=basename(filename),
                                          protocol='tardis',
                                          url='tardis://%s' % filename)
        self.dataset_file2.save()

    def tearDown(self):
        self.user.delete()
        self.experiment1.delete()
        self.experiment2.delete()
        rmtree(self.dest1)
        rmtree(self.dest2)

    def testDownload(self):
        client = Client()

        # check download for experiment1
        response = client.get('/download/experiment/%i/zip/' % self.experiment1.id)
        self.assertEqual(response['Content-Disposition'],
                         'attachment; filename="experiment%s-complete.zip"'
                         % self.experiment1.id)
        self.assertEqual(response.status_code, 200)

        # check download of file1
        response = client.get('/download/datafile/%i/' % self.dataset_file1.id)

        self.assertEqual(response['Content-Disposition'],
                         'attachment; filename="%s"'
                         % self.dataset_file2.filename)
        self.assertEqual(response.status_code, 200)
        self.assertEqual(response.content, 'Hello World!\n')

        # requesting file2 should be forbidden...
        response = client.get('/download/datafile/%i/' % self.dataset_file2.id)
        self.assertEqual(response.status_code, 403)

        # check dataset1 download
        response = client.post('/download/datafiles/',
                               {'expid': self.experiment1.id,
                                'dataset': [self.dataset1.id],
                                'datafile': []})
        self.assertEqual(response.status_code, 200)

        # check dataset2 download
        response = client.post('/download/datafiles/',
                               {'expid': self.experiment2.id,
                                'dataset': [self.dataset2.id],
                                'datafile': []})
        self.assertEqual(response.status_code, 403)

        # check datafile1 download via POST
        response = client.post('/download/datafiles/',
                               {'expid': self.experiment1.id,
                                'dataset': [],
                                'datafile': [self.dataset_file1.id]})
        self.assertEqual(response.status_code, 200)

        # check datafile2 download via POST
        response = client.post('/download/datafiles/',
                               {'expid': self.experiment2.id,
                                'dataset': [],
                                'datafile': [self.dataset_file2.id]})
        self.assertEqual(response.status_code, 403)

    def testDatasetFile(self):

        # check registered text file for physical file meta information
        df = Dataset_File.objects.get(pk=self.dataset_file1.id)

        try:
            from magic import Magic
            self.assertEqual(df.mimetype, 'text/plain; charset=us-ascii')
        except:
            # XXX Test disabled becuse lib magic can't be loaded
            pass
        self.assertEqual(df.size, str(13))
        self.assertEqual(df.md5sum, '8ddd8be4b179a529afa5f2ffae4b9858')

        # now check a JPG file
        filename = join(abspath(dirname(__file__)),
                        '../static/images/ands-logo-hi-res.jpg')

        dataset = Dataset.objects.get(pk=self.dataset1.id)

        pdf1 = Dataset_File(dataset=dataset,
                            filename=basename(filename),
                            url='file://%s' % filename,
                            protocol='file')
        pdf1.save()
        try:
            from magic import Magic
            self.assertEqual(pdf1.mimetype, 'image/jpeg')
        except:
            # XXX Test disabled becuse lib magic can't be loaded
            pass
        self.assertEqual(pdf1.size, str(14232))
        self.assertEqual(pdf1.md5sum, 'c450d5126ffe3d14643815204daf1bfb')

        # now check that we can override the physical file meta information
        pdf2 = Dataset_File(dataset=dataset,
                            filename=basename(filename),
                            url='file://%s' % filename,
                            protocol='file',
                            mimetype='application/vnd.openxmlformats-officedocument.presentationml.presentation',
                            size=str(0),
                            md5sum='md5sum')
        pdf2.save()
        try:
            from magic import Magic
            self.assertEqual(pdf2.mimetype, 'application/vnd.openxmlformats-officedocument.presentationml.presentation')
        except:
            # XXX Test disabled becuse lib magic can't be loaded
            pass
        self.assertEqual(pdf2.size, str(0))
        self.assertEqual(pdf2.md5sum, 'md5sum')

        pdf2.mimetype = ''
        pdf2.save()

        try:
            from magic import Magic
            self.assertEqual(pdf2.mimetype, 'application/pdf')
        except:
            # XXX Test disabled becuse lib magic can't be loaded
            pass
Example #47
0
def _make_data_file(dataset, filename, content):
    # TODO:
    # create datasetfile

    f = mktemp()
    print "Inside make data file ", f
    open(f, "w+b").write(content)
    df = Dataset_File()
    df.dataset = dataset
    df.filename = filename
    df.url = 'file://'+f
    df.protocol = "staging"
    df.size = len(content)
    df.verify(allowEmptyChecksums=True)
    df.save()
    print "Df ---", df
Example #48
0
class DownloadTestCase(TestCase):

    def setUp(self):
        # create a test user
        self.user = User.objects.create_user(username='******',
                                             email='',
                                             password='******')

        # create a public experiment
        self.experiment1 = Experiment(title='Experiment 1',
                                      created_by=self.user,
                                      public=True)
        self.experiment1.save()

        # create a non-public experiment
        self.experiment2 = Experiment(title='Experiment 2',
                                      created_by=self.user,
                                      public=False)
        self.experiment2.save()

        # dataset1 belongs to experiment1
        self.dataset1 = Dataset(experiment=self.experiment1)
        self.dataset1.save()

        # dataset2 belongs to experiment2
        self.dataset2 = Dataset(experiment=self.experiment2)
        self.dataset2.save()

        # absolute path first
        filename = 'testfile.txt'
        self.dest1 = abspath(join(settings.FILE_STORE_PATH, '%s/%s/'
                                  % (self.experiment1.id,
                                  self.dataset1.id)))
        self.dest2 = abspath(join(settings.FILE_STORE_PATH,
                                '%s/%s/'
                                  % (self.experiment2.id,
                                  self.dataset2.id)))
        if not exists(self.dest1):
            makedirs(self.dest1)
        if not exists(self.dest2):
            makedirs(self.dest2)

        testfile1 = abspath(join(self.dest1, filename))
        f = open(testfile1, 'w')
        f.write("Hello World!\n")
        f.close()

        testfile2 = abspath(join(self.dest2, filename))
        f = open(testfile2, 'w')
        f.write("Hello World!\n")
        f.close()

        self.dataset_file1 = Dataset_File(dataset=self.dataset1,
                                          filename=filename,
                                          protocol='tardis',
                                          url='tardis://%s' % filename)
        self.dataset_file1.save()

        self.dataset_file2 = Dataset_File(dataset=self.dataset2,
                                          filename=basename(filename),
                                          protocol='tardis',
                                          url='tardis://%s' % filename)
        self.dataset_file2.save()

    def tearDown(self):
        self.user.delete()
        self.experiment1.delete()
        self.experiment2.delete()
        rmtree(self.dest1)
        rmtree(self.dest2)

    def testDownload(self):
        client = Client()

        # check download for experiment1
        response = client.get('/download/experiment/%i/zip/' % self.experiment1.id)
        self.assertEqual(response['Content-Disposition'],
                         'attachment; filename="experiment%s-complete.zip"'
                         % self.experiment1.id)
        self.assertEqual(response.status_code, 200)

        # check download of file1
        response = client.get('/download/datafile/%i/' % self.dataset_file1.id)

        self.assertEqual(response['Content-Disposition'],
                         'attachment; filename="%s"'
                         % self.dataset_file2.filename)
        self.assertEqual(response.status_code, 200)
        self.assertEqual(response.content, 'Hello World!\n')

        # requesting file2 should be forbidden...
        response = client.get('/download/datafile/%i/' % self.dataset_file2.id)
        self.assertEqual(response.status_code, 403)

        # check dataset1 download
        response = client.post('/download/datafiles/',
                               {'expid': self.experiment1.id,
                                'dataset': [self.dataset1.id],
                                'datafile': []})
        self.assertEqual(response.status_code, 200)

        # check dataset2 download
        response = client.post('/download/datafiles/',
                               {'expid': self.experiment2.id,
                                'dataset': [self.dataset2.id],
                                'datafile': []})
        self.assertEqual(response.status_code, 403)

        # check datafile1 download via POST
        response = client.post('/download/datafiles/',
                               {'expid': self.experiment1.id,
                                'dataset': [],
                                'datafile': [self.dataset_file1.id]})
        self.assertEqual(response.status_code, 200)

        # check datafile2 download via POST
        response = client.post('/download/datafiles/',
                               {'expid': self.experiment2.id,
                                'dataset': [],
                                'datafile': [self.dataset_file2.id]})
        self.assertEqual(response.status_code, 403)

    def testDatasetFile(self):

        # check registered text file for physical file meta information
        df = Dataset_File.objects.get(pk=self.dataset_file1.id)

        try:
            from magic import Magic
            self.assertEqual(df.mimetype, 'text/plain; charset=us-ascii')
        except:
            # XXX Test disabled becuse lib magic can't be loaded
            pass
        self.assertEqual(df.size, str(13))
        self.assertEqual(df.md5sum, '8ddd8be4b179a529afa5f2ffae4b9858')

        # now check a pdf file
        filename = join(abspath(dirname(__file__)),
                        '../static/downloads/DatasetDepositionGuide.pdf')

        dataset = Dataset.objects.get(pk=self.dataset1.id)

        pdf1 = Dataset_File(dataset=dataset,
                            filename=basename(filename),
                            url='file://%s' % filename,
                            protocol='file')
        pdf1.save()
        try:
            from magic import Magic
            self.assertEqual(pdf1.mimetype, 'application/pdf')
        except:
            # XXX Test disabled becuse lib magic can't be loaded
            pass
        self.assertEqual(pdf1.size, str(1008475))
        self.assertEqual(pdf1.md5sum, '9192b3d3e0056412b1d21d3e33562eba')

        # now check that we can override the physical file meta information
        pdf2 = Dataset_File(dataset=dataset,
                            filename=basename(filename),
                            url='file://%s' % filename,
                            protocol='file',
                            mimetype='application/vnd.openxmlformats-officedocument.presentationml.presentation',
                            size=str(0),
                            md5sum='md5sum')
        pdf2.save()
        try:
            from magic import Magic
            self.assertEqual(pdf2.mimetype, 'application/vnd.openxmlformats-officedocument.presentationml.presentation')
        except:
            # XXX Test disabled becuse lib magic can't be loaded
            pass
        self.assertEqual(pdf2.size, str(0))
        self.assertEqual(pdf2.md5sum, 'md5sum')

        pdf2.mimetype = ''
        pdf2.save()

        try:
            from magic import Magic
            self.assertEqual(pdf2.mimetype, 'application/pdf')
        except:
            # XXX Test disabled becuse lib magic can't be loaded
            pass
Example #49
0
    def testDatasetFile(self):

        # check registered text file for physical file meta information
        df = Dataset_File.objects.get(pk=self.dataset_file1.id)

        try:
            from magic import Magic
            self.assertEqual(df.mimetype, 'text/plain; charset=us-ascii')
        except:
            # XXX Test disabled becuse lib magic can't be loaded
            pass
        self.assertEqual(df.size, str(13))
        self.assertEqual(df.md5sum, '8ddd8be4b179a529afa5f2ffae4b9858')

        # now check a pdf file
        filename = join(abspath(dirname(__file__)),
                        '../static/downloads/DatasetDepositionGuide.pdf')

        dataset = Dataset.objects.get(pk=self.dataset1.id)

        pdf1 = Dataset_File(dataset=dataset,
                            filename=basename(filename),
                            url='file://%s' % filename,
                            protocol='file')
        pdf1.save()
        try:
            from magic import Magic
            self.assertEqual(pdf1.mimetype, 'application/pdf')
        except:
            # XXX Test disabled becuse lib magic can't be loaded
            pass
        self.assertEqual(pdf1.size, str(1008475))
        self.assertEqual(pdf1.md5sum, '9192b3d3e0056412b1d21d3e33562eba')

        # now check that we can override the physical file meta information
        pdf2 = Dataset_File(dataset=dataset,
                            filename=basename(filename),
                            url='file://%s' % filename,
                            protocol='file',
                            mimetype='application/vnd.openxmlformats-officedocument.presentationml.presentation',
                            size=str(0),
                            md5sum='md5sum')
        pdf2.save()
        try:
            from magic import Magic
            self.assertEqual(pdf2.mimetype, 'application/vnd.openxmlformats-officedocument.presentationml.presentation')
        except:
            # XXX Test disabled becuse lib magic can't be loaded
            pass
        self.assertEqual(pdf2.size, str(0))
        self.assertEqual(pdf2.md5sum, 'md5sum')

        pdf2.mimetype = ''
        pdf2.save()

        try:
            from magic import Magic
            self.assertEqual(pdf2.mimetype, 'application/pdf')
        except:
            # XXX Test disabled becuse lib magic can't be loaded
            pass
Example #50
0
class ContextualViewTest(TestCase):
    def setUp(self):
        """
        setting up essential objects, copied from tests above
        """
        user = '******'
        pwd = 'secret'
        email = ''
        self.user = User.objects.create_user(user, email, pwd)
        self.userProfile = UserProfile(user=self.user).save()
        self.exp = Experiment(title='test exp1',
                              institution_name='monash',
                              created_by=self.user)
        self.exp.save()
        self.acl = ObjectACL(
            pluginId=django_user,
            entityId=str(self.user.id),
            content_object=self.exp,
            canRead=True,
            isOwner=True,
            aclOwnershipType=ObjectACL.OWNER_OWNED,
        )
        self.acl.save()
        self.dataset = Dataset(description='dataset description...')
        self.dataset.save()
        self.dataset.experiments.add(self.exp)
        self.dataset.save()

        self.dataset_file = Dataset_File(dataset=self.dataset,
                                         size=42,
                                         filename="foo",
                                         md5sum="junk")
        self.dataset_file.save()

        self.testschema = Schema(namespace="http://test.com/test/schema",
                                 name="Test View",
                                 type=Schema.DATAFILE,
                                 hidden=True)
        self.testschema.save()
        self.dfps = DatafileParameterSet(dataset_file=self.dataset_file,
                                         schema=self.testschema)
        self.dfps.save()

    def tearDown(self):
        self.user.delete()
        self.exp.delete()
        self.dataset.delete()
        self.dataset_file.delete()
        self.testschema.delete()
        self.dfps.delete()
        self.acl.delete()

    def testDetailsDisplay(self):
        """
        test display of view for an existing schema and no display for an undefined one.
        """
        from tardis.tardis_portal.views import display_datafile_details
        request = flexmock(user=self.user, groups=[("testgroup", flexmock())])
        with self.settings(DATAFILE_VIEWS=[(
                "http://test.com/test/schema",
                "/test/url"), ("http://does.not.exist", "/false/url")]):
            response = display_datafile_details(
                request, dataset_file_id=self.dataset_file.id)
            self.assertEqual(response.status_code, 200)
            self.assertTrue("/ajax/parameters/" in response.content)
            self.assertTrue("/test/url" in response.content)
            self.assertFalse("/false/url" in response.content)
Example #51
0
    def testDatasetFile(self):

        # check registered text file for physical file meta information
        df = Dataset_File.objects.get(pk=self.dataset_file1.id)

        try:
            from magic import Magic
            self.assertEqual(df.mimetype, 'text/plain; charset=us-ascii')
        except:
            # XXX Test disabled becuse lib magic can't be loaded
            pass
        self.assertEqual(df.size, str(13))
        self.assertEqual(df.md5sum, '8ddd8be4b179a529afa5f2ffae4b9858')

        # now check a JPG file
        filename = join(abspath(dirname(__file__)),
                        '../static/images/ands-logo-hi-res.jpg')

        dataset = Dataset.objects.get(pk=self.dataset1.id)

        pdf1 = Dataset_File(dataset=dataset,
                            filename=basename(filename),
                            url='file://%s' % filename,
                            protocol='file')
        pdf1.save()
        try:
            from magic import Magic
            self.assertEqual(pdf1.mimetype, 'image/jpeg')
        except:
            # XXX Test disabled becuse lib magic can't be loaded
            pass
        self.assertEqual(pdf1.size, str(14232))
        self.assertEqual(pdf1.md5sum, 'c450d5126ffe3d14643815204daf1bfb')

        # now check that we can override the physical file meta information
        pdf2 = Dataset_File(
            dataset=dataset,
            filename=basename(filename),
            url='file://%s' % filename,
            protocol='file',
            mimetype=
            'application/vnd.openxmlformats-officedocument.presentationml.presentation',
            size=str(0),
            md5sum='md5sum')
        pdf2.save()
        try:
            from magic import Magic
            self.assertEqual(
                pdf2.mimetype,
                'application/vnd.openxmlformats-officedocument.presentationml.presentation'
            )
        except:
            # XXX Test disabled becuse lib magic can't be loaded
            pass
        self.assertEqual(pdf2.size, str(0))
        self.assertEqual(pdf2.md5sum, 'md5sum')

        pdf2.mimetype = ''
        pdf2.save()

        try:
            from magic import Magic
            self.assertEqual(pdf2.mimetype, 'application/pdf')
        except:
            # XXX Test disabled becuse lib magic can't be loaded
            pass
Example #52
0
    def testDatasetFile(self):

        # check registered text file for physical file meta information
        df = Dataset_File.objects.get(pk=self.dataset_file1.id)

        try:
            from magic import Magic
            self.assertEqual(df.mimetype, 'text/plain; charset=us-ascii')
        except:
            # XXX Test disabled because lib magic can't be loaded
            pass
        self.assertEqual(df.size, str(13))
        self.assertEqual(df.md5sum, '8ddd8be4b179a529afa5f2ffae4b9858')

        # now check a JPG file
        filename = abspath(join(dirname(__file__),
                                '../static/images/ands-logo-hi-res.jpg'))

        dataset = Dataset.objects.get(pk=self.dataset1.id)

        size, sha512sum = get_size_and_sha512sum(filename)
        pdf1 = Dataset_File(dataset=dataset,
                            filename=basename(filename),
                            size=str(size),
                            sha512sum=sha512sum,
                            url='file://%s' % filename,
                            protocol='file')
        pdf1.verify()
        pdf1.save()
        try:
            from magic import Magic
            self.assertEqual(pdf1.mimetype, 'image/jpeg')
        except:
            # XXX Test disabled because lib magic can't be loaded
            pass
        self.assertEqual(pdf1.size, str(14232))
        self.assertEqual(pdf1.md5sum, 'c450d5126ffe3d14643815204daf1bfb')

        # now check that we can override the physical file meta information
        pdf2 = Dataset_File(dataset=dataset,
                            filename=basename(filename),
                            url='file://%s' % filename,
                            protocol='file',
                            mimetype='application/vnd.openxmlformats-officedocument.presentationml.presentation',
                            size=str(0),
                            # Empty string always has the same hash
                            sha512sum='cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e')
        pdf2.save()
        try:
            from magic import Magic
            self.assertEqual(pdf2.mimetype, 'application/vnd.openxmlformats-officedocument.presentationml.presentation')
        except:
            # XXX Test disabled because lib magic can't be loaded
            pass
        self.assertEqual(pdf2.size, str(0))
        self.assertEqual(pdf2.md5sum, '')

        pdf2.mimetype = ''
        pdf2.save()

        try:
            from magic import Magic
            self.assertEqual(pdf2.mimetype, 'application/pdf')
        except:
            # XXX Test disabled because lib magic can't be loaded
            pass
Example #53
0
def addfiles(request):

    import os
    from os.path import basename
    from os import path
    from tardis.tardis_portal.models import Dataset_File
    import itertools
    from tardis.hpctardis.metadata import process_all_experiments
    from tardis.hpctardis.metadata import process_experimentX

    if 'username' in request.POST and \
            'password' in request.POST:
        authMethod = request.POST['authMethod']

        user = auth_service.authenticate(authMethod=authMethod,
                                         request=request)

    if user:
        eid = request.POST['eid']
        desc = request.POST['desc']
        folder = request.POST['folder']
        eid = int(eid)

        #   TODO Use the try and except
        auth_key = settings.DEFAULT_AUTH
        try:
            exp = Experiment.objects.get(pk=eid)
            author = exp.created_by
        except Experiment.DoesNotExist:
            logger.exception(
                'Experiment for eid %i in addfiles does not exist' % eid)
            return HttpResponse("Experiment Not Found")

        current_user = str(user)
        created_user = str(author)

        if current_user == created_user:
            staging = path.join(settings.STAGING_PATH, str(user), str(eid),
                                str(folder))
            filelist = []
            ds_desc = {}
            #          import pdb
            #          pdb.set_trace()
            for root, dirs, files in os.walk(staging):
                for named in files:
                    filelist.append(named)

            next = str(filelist)
            ds_desc[desc] = filelist

            #TODO If needed for security - Metadata from the folder can be extracted
            #to check the folder name

            for d, df in ds_desc.items():
                dataset = models.Dataset(description=d, experiment=exp)
                dataset.save()
                for f in df:
                    logger.debug('f = %s' % f)
                    filepath = path.join(staging, f)
                    size = path.getsize(filepath)
                    filename = path.basename(filepath)

                    datafile = Dataset_File(dataset=dataset,
                                            filename=filename,
                                            url=filepath,
                                            size=size,
                                            protocol='staging')
                    datafile.save()

            next = next + ' File path :' + staging

            process_experimentX(exp)

            next = next + ' The Author is : ' + str(
                author) + ',' + ' The User is : ' + str(user)
            return HttpResponse(next)
        else:
            next = 'The author of the experiment can only add the files (From Tardis)'
            return HttpResponse(next)
    else:
        return HttpResponse("UnSuccessful")
Example #54
0
    def setUp(self):
        # create a test user
        self.user = User.objects.create_user(username='******',
                                             email='',
                                             password='******')

        # create a public experiment
        self.experiment1 = Experiment(title='Experiment 1',
                                      created_by=self.user,
                                      public_access=Experiment.PUBLIC_ACCESS_FULL)
        self.experiment1.save()

        # create a non-public experiment
        self.experiment2 = Experiment(title='Experiment 2',
                                      created_by=self.user,
                                      public_access=Experiment.PUBLIC_ACCESS_NONE)
        self.experiment2.save()

        # dataset1 belongs to experiment1
        self.dataset1 = Dataset()
        self.dataset1.save()
        self.dataset1.experiments.add(self.experiment1)
        self.dataset1.save()


        # dataset2 belongs to experiment2
        self.dataset2 = Dataset()
        self.dataset2.save()
        self.dataset2.experiments.add(self.experiment2)
        self.dataset2.save()

        # absolute path first
        filename1 = 'testfile.txt'
        filename2 = 'testfile.tiff'
        self.dest1 = abspath(join(settings.FILE_STORE_PATH, '%s/%s/'
                                  % (self.experiment1.id,
                                  self.dataset1.id)))
        self.dest2 = abspath(join(settings.FILE_STORE_PATH,
                                '%s/%s/'
                                  % (self.experiment2.id,
                                  self.dataset2.id)))
        if not exists(self.dest1):
            makedirs(self.dest1)
        if not exists(self.dest2):
            makedirs(self.dest2)

        testfile1 = abspath(join(self.dest1, filename1))
        f = open(testfile1, 'w')
        f.write("Hello World!\n")
        f.close()

        testfile2 = abspath(join(self.dest2, filename2))
        _generate_test_image(testfile2)

        size, sha512sum = get_size_and_sha512sum(testfile1)
        self.dataset_file1 = Dataset_File(dataset=self.dataset1,
                                          filename=filename1,
                                          protocol='',
                                          size=size,
                                          sha512sum=sha512sum,
                                          url='%d/%d/%s'
                                              % (self.experiment1.id,
                                                 self.dataset1.id,
                                                 filename1))
        self.dataset_file1.verify()
        self.dataset_file1.save()

        size, sha512sum = get_size_and_sha512sum(testfile2)
        self.dataset_file2 = Dataset_File(dataset=self.dataset2,
                                          filename=basename(filename2),
                                          protocol='',
                                          size=size,
                                          sha512sum=sha512sum,
                                          url='%d/%d/%s'
                                            % (self.experiment2.id,
                                               self.dataset2.id,
                                               filename2))
        self.dataset_file2.verify()
        self.dataset_file2.save()
Example #55
0
def addfiles(request):

    import os
    from os.path import basename
    from os import path
    from tardis.tardis_portal.models import Dataset_File
    import itertools
    from tardis.apps.hpctardis.metadata import process_all_experiments
    from tardis.apps.hpctardis.metadata import process_experimentX

    if "username" in request.POST and "password" in request.POST:
        authMethod = request.POST["authMethod"]

        user = auth_service.authenticate(authMethod=authMethod, request=request)

    if user:
        eid = request.POST["eid"]
        desc = request.POST["desc"]
        folder = request.POST["folder"]
        eid = int(eid)

        #   TODO Use the try and except
        auth_key = settings.DEFAULT_AUTH
        try:
            exp = Experiment.objects.get(pk=eid)
            author = exp.created_by
        except Experiment.DoesNotExist:
            logger.exception("Experiment for eid %i in addfiles does not exist" % eid)
            return HttpResponse("Experiment Not Found")

        current_user = str(user)
        created_user = str(author)

        if current_user == created_user:
            staging = path.join(settings.STAGING_PATH, str(user), str(eid), str(folder))
            filelist = []
            ds_desc = {}
            #          import pdb
            #          pdb.set_trace()
            for root, dirs, files in os.walk(staging):
                for named in files:
                    filelist.append(named)

            next = str(filelist)
            ds_desc[desc] = filelist

            # TODO If needed for security - Metadata from the folder can be extracted
            # to check the folder name

            for d, df in ds_desc.items():
                dataset = models.Dataset(description=d, experiment=exp)
                dataset.save()
                for f in df:
                    logger.debug("f = %s" % f)
                    filepath = path.join(staging, f)
                    size = path.getsize(filepath)
                    filename = path.basename(filepath)

                    datafile = Dataset_File(
                        dataset=dataset, filename=filename, url=filepath, size=size, protocol="staging"
                    )
                    datafile.save()

            next = next + " File path :" + staging

            process_experimentX(exp)

            next = next + " The Author is : " + str(author) + "," + " The User is : " + str(user)
            return HttpResponse(next)
        else:
            next = "The author of the experiment can only add the files (From Tardis)"
            return HttpResponse(next)
    else:
        return HttpResponse("UnSuccessful")
Example #56
0
def generate_datafile(path, dataset, content=None, size=-1,
                      verify=True, verified=True, verify_checksums_req=False):
    '''Generates a datafile AND a replica to hold its contents'''
    from tardis.tardis_portal.models import Dataset_File, Replica, Location

    saved = settings.REQUIRE_DATAFILE_CHECKSUMS
    settings.REQUIRE_DATAFILE_CHECKSUMS = False
    try:
        datafile = Dataset_File()
        if content:
            datafile.size = str(len(content))
        else:
            datafile.size = str(size)
        # Normally we use any old string for the datafile path, but some
        # tests require the path to be the same as what 'staging' would use
        if path == None:
            datafile.dataset_id = dataset.id
            datafile.save()
            path = "%s/%s/%s" % (dataset.get_first_experiment().id,
                                 dataset.id, datafile.id)

        filepath = os.path.normpath(settings.FILE_STORE_PATH + '/' + path)
        if content:
            try:
                os.makedirs(os.path.dirname(filepath))
                os.remove(filepath)
            except:
                pass
            gen_file = open(filepath, 'wb+')
            gen_file.write(content)
            gen_file.close()
        datafile.mimetype = "application/unspecified"
        datafile.filename = os.path.basename(filepath)
        datafile.dataset_id = dataset.id
        datafile.save()
        settings.REQUIRE_DATAFILE_CHECKSUMS = verify_checksums_req
        location = _infer_location(path)
        replica = Replica(datafile=datafile, url=path, protocol='',
                          location=location)
        if verify and content:
            if not replica.verify():
                raise RuntimeError('verify failed!?!')
        replica.save()
        replica.verified = verified
        replica.save(update_fields=['verified'])  # force no verification
        return (datafile, replica)
    finally:
        settings.REQUIRE_DATAFILE_CHECKSUMS = saved
Example #57
0
    def process_enclosure(self, dataset, enclosure):
        '''
        Examines one "enclosure" from an entry, representing a datafile.
        Determines whether to process it, and if so, starts the transfer.
        '''
        # TODO tjdett: This method needs a clean-up, as it's doing many more things than was originally intended. It now contains more more code about 
        # deciding whether to process the enclosure than it does about actually processing it. That decision, or the influencing factors, should be refactored into separate methods.
        # Python has built-in time deltas and Django has time formatting functions, both of which would clean this code up considerably.
        
        def _get_enclosure_url(enclosure):
            ''' Optionally manipulate datafile URL, eg: http://foo.edu/bar.txt -> file:////fooserver/bar.txt'''
            if IngestOptions.USE_LOCAL_TRANSFERS:
                return enclosure.href.replace(IngestOptions.URL_BASE_TO_REPLACE, IngestOptions.LOCAL_SOURCE_PATH)
            else:
                return enclosure.href

        filename = getattr(enclosure, 'title', basename(enclosure.href))
        # check if we were provided a full path, and hence a subdirectory for the file 
        if (IngestOptions.DATAFILE_DIRECTORY_DEPTH >= 1 and
                    getattr(enclosure, "path", "") != "" and
                    enclosure.path.split("/")[IngestOptions.DATAFILE_DIRECTORY_DEPTH:] != ""):
            filename = "/".join(enclosure.path.split("/")[IngestOptions.DATAFILE_DIRECTORY_DEPTH:])
                
        datafiles = dataset.dataset_file_set.filter(filename=filename)
        def fromunix1000 (tstr):
            return datetime.datetime.utcfromtimestamp(float(tstr)/1000)
        if datafiles.count() > 0:
            datafile = datafiles[0]
            from django.db.models import Max                     
            newest=datafiles.aggregate(Max('modification_time'))['modification_time__max']
            if not newest:# datafile.modification_time:
                ### rethink this!
                return # We have this file, it has no time/date, let's skip it.
            
            def total_seconds(td): # exists on datetime.timedelta in Python 2.7
                return (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6
            timediff = total_seconds(fromunix1000(enclosure.modified) - newest)

            if timediff == 0:
                return # We have this file already, same time/date.
            elif timediff < 0:
                logging.getLogger(__name__).warn("Skipping datafile. File to ingest '{0}' is {1} *older* than stored file. Are the system clocks correct?".
                                                format(enclosure.href, self.human_time(-timediff)))
                return
            else:
                if not IngestOptions.ALLOW_UPDATING_DATAFILES:
                    logging.getLogger(__name__).warn("Skipping datafile. ALLOW_UPDATING_DATAFILES is disabled, and '{0}' is {1}newer than stored file.".
                                                format(enclosure.href, self.human_time(timediff)))
                    return
                logging.getLogger(__name__).info("Ingesting updated datafile. File to ingest '{0}' is {1} newer than stored file. This will create an additional copy.".
                                                 format(enclosure.href, self.human_time(timediff)))
                if IngestOptions.HIDE_REPLACED_DATAFILES:
                    # Mark all older versions of file as hidden. (!)
                    try:
                        from tardis.microtardis.models import Dataset_Hidden
                        Dataset_Hidden.objects.filter(datafile__dataset=dataset).update(hidden=True)
                    except ImportError:
                        logger.warn("The MicroTardis app must be installed in order to use the HIDE_REPLACED_DATAFILES option. Existing version of datafile {0} " +
                                    "will not be hidden.".format(datafile.filename))
                  
        else: # no local copy already.
            logging.getLogger(__name__).info("Ingesting datafile: '{0}'".format(enclosure.href))


        # Create a record and start transferring.
        datafile = Dataset_File(dataset=dataset,
                                url=_get_enclosure_url(enclosure), 
                                filename=filename,
                                created_time=fromunix1000(enclosure.created),
                                modification_time=fromunix1000(enclosure.modified))
        datafile.protocol = enclosure.href.partition('://')[0]
        
        datafile.mimetype = getattr(enclosure, "mime", datafile.mimetype)
        datafile.size = getattr(enclosure, "length", datafile.size)

        try:
            hash = enclosure.hash
            # Split on white space, then ':' to get tuples to feed into dict
            hashdict = dict([s.partition(':')[::2] for s in hash.split()])
            # Set SHA-512 sum
            datafile.sha512sum = hashdict['sha-512']
        except AttributeError:
            pass
        datafile.save()
Example #58
0
class ViewTemplateContextsTest(TestCase):
    def setUp(self):
        """
        setting up essential objects, copied from tests above
        """
        Location.force_initialize()
        self.location = Location.get_location('local')

        user = '******'
        pwd = 'secret'
        email = ''
        self.user = User.objects.create_user(user, email, pwd)
        self.userProfile = UserProfile(user=self.user).save()
        self.exp = Experiment(title='test exp1',
                              institution_name='monash',
                              created_by=self.user)
        self.exp.save()
        self.acl = ObjectACL(
            pluginId=django_user,
            entityId=str(self.user.id),
            content_object=self.exp,
            canRead=True,
            isOwner=True,
            aclOwnershipType=ObjectACL.OWNER_OWNED,
        )
        self.acl.save()
        self.dataset = Dataset(description='dataset description...')
        self.dataset.save()
        self.dataset.experiments.add(self.exp)
        self.dataset.save()

        self.dataset_file = Dataset_File(dataset=self.dataset,
                                         size=42,
                                         filename="foo",
                                         md5sum="junk")
        self.dataset_file.save()
        self.replica = Replica(datafile=self.dataset_file,
                               url="http://foo",
                               location=self.location,
                               verified=False)
        self.replica.save()

    def tearDown(self):
        self.user.delete()
        self.exp.delete()
        self.dataset.delete()
        self.dataset_file.delete()
        self.acl.delete()

    def testExperimentView(self):
        """
        test some template context parameters for an experiment view
        """
        from tardis.tardis_portal.views import view_experiment
        from tardis.tardis_portal.shortcuts import render_response_index
        from django.http import HttpRequest
        from django.template import Context
        import sys

        # Default behavior
        views_module = flexmock(sys.modules['tardis.tardis_portal.views'])
        request = HttpRequest()
        request.user = self.user
        request.groups = []
        context = {
            'organization': ['test', 'test2'],
            'default_organization':
            'test',
            'default_format':
            'tar',
            'protocol': [['tgz', '/download/experiment/1/tgz/'],
                         ['tar', '/download/experiment/1/tar/']]
        }
        views_module.should_call('render_response_index'). \
            with_args(_AnyMatcher(), "tardis_portal/view_experiment.html",
                      _ContextMatcher(context))
        response = view_experiment(request, experiment_id=self.exp.id)
        self.assertEqual(response.status_code, 200)

        # Behavior with USER_AGENT_SENSING enabled and a request.user_agent
        saved_setting = getattr(settings, "USER_AGENT_SENSING", None)
        try:
            setattr(settings, "USER_AGENT_SENSING", True)
            request = HttpRequest()
            request.user = self.user
            request.groups = []
            mock_agent = _MiniMock(os=_MiniMock(family="Macintosh"))
            setattr(request, 'user_agent', mock_agent)
            context = {
                'organization': ['classic', 'test', 'test2'],
                'default_organization': 'classic',
                'default_format': 'tar',
                'protocol': [['tar', '/download/experiment/1/tar/']]
            }
            views_module.should_call('render_response_index'). \
                with_args(_AnyMatcher(), "tardis_portal/view_experiment.html",
                          _ContextMatcher(context))
            response = view_experiment(request, experiment_id=self.exp.id)
            self.assertEqual(response.status_code, 200)
        finally:
            if saved_setting != None:
                setattr(settings, "USER_AGENT_SENSING", saved_setting)
            else:
                delattr(settings, "USER_AGENT_SENSING")

    def testDatasetView(self):
        """
        test some context parameters for a dataset view
        """
        from tardis.tardis_portal.views import view_dataset
        from tardis.tardis_portal.shortcuts import render_response_index
        from django.http import HttpRequest
        from django.template import Context
        import sys

        views_module = flexmock(sys.modules['tardis.tardis_portal.views'])
        request = HttpRequest()
        request.user = self.user
        request.groups = []
        context = {'default_organization': 'test', 'default_format': 'tar'}
        views_module.should_call('render_response_index'). \
            with_args(_AnyMatcher(), "tardis_portal/view_dataset.html",
                      _ContextMatcher(context))
        response = view_dataset(request, dataset_id=self.dataset.id)
        self.assertEqual(response.status_code, 200)

        # Behavior with USER_AGENT_SENSING enabled and a request.user_agent
        saved_setting = getattr(settings, "USER_AGENT_SENSING", None)
        try:
            setattr(settings, "USER_AGENT_SENSING", True)
            request = HttpRequest()
            request.user = self.user
            request.groups = []
            mock_agent = _MiniMock(os=_MiniMock(family="Macintosh"))
            setattr(request, 'user_agent', mock_agent)
            context = {
                'default_organization': 'classic',
                'default_format': 'tar'
            }
            views_module.should_call('render_response_index'). \
                with_args(_AnyMatcher(), "tardis_portal/view_dataset.html",
                          _ContextMatcher(context))
            response = view_dataset(request, dataset_id=self.dataset.id)
            self.assertEqual(response.status_code, 200)
        finally:
            if saved_setting != None:
                setattr(settings, "USER_AGENT_SENSING", saved_setting)
            else:
                delattr(settings, "USER_AGENT_SENSING")