def low_memory_update_chunk_hash(self, list_data_to_hash):
     self["chunk_hash"] = low_memory_chunk_hash(list_data_to_hash)
     self.save()
Exemple #2
0
    def get_creation_arguments(cls, params, file_object):
        errors = []

        # ensure required are present, we don't allow falsey contents.
        for key in PipelineUpload.REQUIREDS:
            if not params.get(key, None):
                errors.append('missing required parameter: "%s"' % key)

        # if we escape here early we can simplify the code that requires all parameters later
        if errors:
            raise InvalidUploadParameterError("\n".join(errors))

        # validate study_id
        study_id_object_id = ObjectId(params["study_id"])
        if not Studies(_id=study_id_object_id):
            errors.append('encountered invalid study_id: "%s"' %
                          params["study_id"] if params["study_id"] else None)

        print 'file_name' in params
        print params['file_name']
        if len(params['file_name']) > 256:
            errors.append(
                "encountered invalid file_name, file_names cannot be more than 256 characters"
            )

        if PipelineUploads.count(file_name=params['file_name']):
            errors.append('a file with the name "%s" already exists' %
                          params['file_name'])

        try:
            tags = json.loads(params["tags"])
            if not isinstance(tags, list):
                # must be json list, can't be json dict, number, or string.
                raise ValueError()
            if not tags:
                errors.append(
                    "you must provide at least one tag for your file.")
            tags = [str(_) for _ in tags]
        except ValueError:
            errors.append(
                "could not parse tags, ensure that your uploaded list of tags is a json compatible array."
            )

        if errors:
            raise InvalidUploadParameterError("\n".join(errors))

        creation_time = datetime.utcnow()
        file_hash = low_memory_chunk_hash(file_object.read())
        file_object.seek(0)

        s3_path = "%s/%s/%s/%s/%s" % (
            PIPELINE_FOLDER,
            params["study_id"],
            params["file_name"],
            creation_time.isoformat(),
            ''.join(
                random.choice(string.ascii_letters + string.digits)
                for i in range(32)),
            # todo: file_name?
        )

        return {
            "creation_time": creation_time,
            "s3_path": s3_path,
            "study_id": study_id_object_id,
            "tags": tags,
            "file_name": params["file_name"],
            "file_hash": file_hash,
        }