Ejemplo n.º 1
0
    def register_chunked_data(cls,
                              data_type,
                              time_bin,
                              chunk_path,
                              file_contents,
                              study_id,
                              participant_id,
                              survey_id=None):
        if data_type not in CHUNKABLE_FILES:
            raise UnchunkableDataTypeError

        chunk_hash_str = chunk_hash(file_contents).decode()
        time_bin = int(time_bin) * CHUNK_TIMESLICE_QUANTUM
        time_bin = timezone.make_aware(datetime.utcfromtimestamp(time_bin),
                                       timezone.utc)

        cls.objects.create(
            is_chunkable=True,
            chunk_path=chunk_path,
            chunk_hash=chunk_hash_str,
            data_type=data_type,
            time_bin=time_bin,
            study_id=study_id,
            participant_id=participant_id,
            survey_id=survey_id,
            file_size=len(file_contents),
        )
Ejemplo n.º 2
0
    def register_chunked_data(cls,
                              data_type,
                              time_bin,
                              chunk_path,
                              file_contents,
                              study_id,
                              participant_id,
                              survey_id=None):

        if data_type not in CHUNKABLE_FILES:
            raise UnchunkableDataTypeError

        time_bin = int(time_bin) * CHUNK_TIMESLICE_QUANTUM
        chunk_hash_str = chunk_hash(file_contents)

        cls.objects.create(
            is_chunkable=True,
            chunk_path=chunk_path,
            chunk_hash=chunk_hash_str,
            data_type=data_type,
            time_bin=datetime.fromtimestamp(time_bin),
            study_id=study_id,
            participant_id=participant_id,
            survey_id=survey_id,
        )
    def register_chunked_data(cls,
                              data_type,
                              time_bin,
                              chunk_path,
                              file_contents,
                              study_id,
                              participant_id,
                              survey_id=None):

        if data_type not in CHUNKABLE_FILES:
            raise UnchunkableDataTypeError

        chunk_hash_str = chunk_hash(file_contents).decode()

        time_bin = int(time_bin) * CHUNK_TIMESLICE_QUANTUM
        time_bin = timezone.make_aware(datetime.utcfromtimestamp(time_bin),
                                       timezone.utc)
        # previous time_bin form was this:
        # datetime.fromtimestamp(time_bin)
        # On the server, but not necessarily in development environments, datetime.fromtimestamp(0)
        # provides the same date and time as datetime.utcfromtimestamp(0).
        # timezone.make_aware(datetime.utcfromtimestamp(0), timezone.utc) creates a time zone
        # aware datetime that is unambiguous in the UTC timezone and generally identical timestamps.
        # Django's behavior (at least on this project, but this project is set to the New York
        # timezone so it should be generalizable) is to add UTC as a timezone when storing a naive
        # datetime in the database.

        cls.objects.create(
            is_chunkable=True,
            chunk_path=chunk_path,
            chunk_hash=chunk_hash_str,
            data_type=data_type,
            time_bin=time_bin,
            study_id=study_id,
            participant_id=participant_id,
            survey_id=survey_id,
            file_size=len(file_contents),
        )
Ejemplo n.º 4
0
    def add_new_chunk(cls,
                      study_id,
                      user_id,
                      data_type,
                      s3_file_path,
                      time_bin,
                      file_contents=None,
                      survey_id=None):
        is_chunkable = data_type in CHUNKABLE_FILES
        if is_chunkable: time_bin = int(time_bin) * CHUNK_TIMESLICE_QUANTUM

        ChunkRegistry.create(
            {
                "study_id": study_id,
                "user_id": user_id,
                "data_type": data_type,
                "chunk_path": s3_file_path,
                "chunk_hash":
                chunk_hash(file_contents) if is_chunkable else None,
                "time_bin": datetime.fromtimestamp(time_bin),
                "is_chunkable": is_chunkable,
                "survey_id": survey_id
            },  #the survey_id field is only used by the timings file.
            random_id=True)
Ejemplo n.º 5
0
 def update_chunk_hash(self, data_to_hash):
     self["chunk_hash"] = chunk_hash(data_to_hash)
     self.save()
    def get_creation_arguments(cls, params, file_object):
        errors = []

        # ensure required are present, we don't allow falsey contents.
        for field in PipelineUpload.REQUIREDS:
            if not params.get(field, None):
                errors.append('missing required parameter: "%s"' % field)

        # if we escape here early we can simplify the code that requires all parameters later
        if errors:
            raise InvalidUploadParameterError("\n".join(errors))

        # validate study_id
        study_id_object_id = params["study_id"]
        if not Study.objects.get(object_id=study_id_object_id):
            errors.append('encountered invalid study_id: "%s"' %
                          params["study_id"] if params["study_id"] else None)

        study_id = Study.objects.get(object_id=study_id_object_id).id

        if len(params['file_name']) > 256:
            errors.append(
                "encountered invalid file_name, file_names cannot be more than 256 characters"
            )

        if cls.objects.filter(file_name=params['file_name']).count():
            errors.append('a file with the name "%s" already exists' %
                          params['file_name'])

        try:
            tags = json.loads(params["tags"])
            if not isinstance(tags, list):
                # must be json list, can't be json dict, number, or string.
                raise ValueError()
            if not tags:
                errors.append(
                    "you must provide at least one tag for your file.")
            tags = [str(_) for _ in tags]
        except ValueError:
            tags = None
            errors.append(
                "could not parse tags, ensure that your uploaded list of tags is a json compatible array."
            )

        if errors:
            raise InvalidUploadParameterError("\n".join(errors))

        created_on = timezone.now()
        file_hash = chunk_hash(file_object.read())
        file_object.seek(0)

        s3_path = "%s/%s/%s/%s/%s" % (
            PIPELINE_FOLDER,
            params["study_id"],
            params["file_name"],
            created_on.isoformat(),
            ''.join(
                random.choice(string.ascii_letters + string.digits)
                for i in range(32)),
            # todo: file_name?
        )

        creation_arguments = {
            "created_on": created_on,
            "s3_path": s3_path,
            "study_id": study_id,
            "file_name": params["file_name"],
            "file_hash": file_hash,
        }

        return creation_arguments, tags
Ejemplo n.º 7
0
 def update_chunk_hash(self, data_to_hash):
     self.chunk_hash = chunk_hash(data_to_hash).decode()
     self.save()