def register_chunked_data(cls, data_type, time_bin, chunk_path, file_contents, study_id, participant_id, survey_id=None): if data_type not in CHUNKABLE_FILES: raise UnchunkableDataTypeError chunk_hash_str = chunk_hash(file_contents).decode() time_bin = int(time_bin) * CHUNK_TIMESLICE_QUANTUM time_bin = timezone.make_aware(datetime.utcfromtimestamp(time_bin), timezone.utc) cls.objects.create( is_chunkable=True, chunk_path=chunk_path, chunk_hash=chunk_hash_str, data_type=data_type, time_bin=time_bin, study_id=study_id, participant_id=participant_id, survey_id=survey_id, file_size=len(file_contents), )
def register_chunked_data(cls, data_type, time_bin, chunk_path, file_contents, study_id, participant_id, survey_id=None): if data_type not in CHUNKABLE_FILES: raise UnchunkableDataTypeError time_bin = int(time_bin) * CHUNK_TIMESLICE_QUANTUM chunk_hash_str = chunk_hash(file_contents) cls.objects.create( is_chunkable=True, chunk_path=chunk_path, chunk_hash=chunk_hash_str, data_type=data_type, time_bin=datetime.fromtimestamp(time_bin), study_id=study_id, participant_id=participant_id, survey_id=survey_id, )
def register_chunked_data(cls, data_type, time_bin, chunk_path, file_contents, study_id, participant_id, survey_id=None): if data_type not in CHUNKABLE_FILES: raise UnchunkableDataTypeError chunk_hash_str = chunk_hash(file_contents).decode() time_bin = int(time_bin) * CHUNK_TIMESLICE_QUANTUM time_bin = timezone.make_aware(datetime.utcfromtimestamp(time_bin), timezone.utc) # previous time_bin form was this: # datetime.fromtimestamp(time_bin) # On the server, but not necessarily in development environments, datetime.fromtimestamp(0) # provides the same date and time as datetime.utcfromtimestamp(0). # timezone.make_aware(datetime.utcfromtimestamp(0), timezone.utc) creates a time zone # aware datetime that is unambiguous in the UTC timezone and generally identical timestamps. # Django's behavior (at least on this project, but this project is set to the New York # timezone so it should be generalizable) is to add UTC as a timezone when storing a naive # datetime in the database. cls.objects.create( is_chunkable=True, chunk_path=chunk_path, chunk_hash=chunk_hash_str, data_type=data_type, time_bin=time_bin, study_id=study_id, participant_id=participant_id, survey_id=survey_id, file_size=len(file_contents), )
def add_new_chunk(cls, study_id, user_id, data_type, s3_file_path, time_bin, file_contents=None, survey_id=None): is_chunkable = data_type in CHUNKABLE_FILES if is_chunkable: time_bin = int(time_bin) * CHUNK_TIMESLICE_QUANTUM ChunkRegistry.create( { "study_id": study_id, "user_id": user_id, "data_type": data_type, "chunk_path": s3_file_path, "chunk_hash": chunk_hash(file_contents) if is_chunkable else None, "time_bin": datetime.fromtimestamp(time_bin), "is_chunkable": is_chunkable, "survey_id": survey_id }, #the survey_id field is only used by the timings file. random_id=True)
def update_chunk_hash(self, data_to_hash): self["chunk_hash"] = chunk_hash(data_to_hash) self.save()
def get_creation_arguments(cls, params, file_object): errors = [] # ensure required are present, we don't allow falsey contents. for field in PipelineUpload.REQUIREDS: if not params.get(field, None): errors.append('missing required parameter: "%s"' % field) # if we escape here early we can simplify the code that requires all parameters later if errors: raise InvalidUploadParameterError("\n".join(errors)) # validate study_id study_id_object_id = params["study_id"] if not Study.objects.get(object_id=study_id_object_id): errors.append('encountered invalid study_id: "%s"' % params["study_id"] if params["study_id"] else None) study_id = Study.objects.get(object_id=study_id_object_id).id if len(params['file_name']) > 256: errors.append( "encountered invalid file_name, file_names cannot be more than 256 characters" ) if cls.objects.filter(file_name=params['file_name']).count(): errors.append('a file with the name "%s" already exists' % params['file_name']) try: tags = json.loads(params["tags"]) if not isinstance(tags, list): # must be json list, can't be json dict, number, or string. raise ValueError() if not tags: errors.append( "you must provide at least one tag for your file.") tags = [str(_) for _ in tags] except ValueError: tags = None errors.append( "could not parse tags, ensure that your uploaded list of tags is a json compatible array." ) if errors: raise InvalidUploadParameterError("\n".join(errors)) created_on = timezone.now() file_hash = chunk_hash(file_object.read()) file_object.seek(0) s3_path = "%s/%s/%s/%s/%s" % ( PIPELINE_FOLDER, params["study_id"], params["file_name"], created_on.isoformat(), ''.join( random.choice(string.ascii_letters + string.digits) for i in range(32)), # todo: file_name? ) creation_arguments = { "created_on": created_on, "s3_path": s3_path, "study_id": study_id, "file_name": params["file_name"], "file_hash": file_hash, } return creation_arguments, tags
def update_chunk_hash(self, data_to_hash): self.chunk_hash = chunk_hash(data_to_hash).decode() self.save()