def create_new_participant(): """ Creates a new user, generates a password and keys, pushes data to s3 and user database, adds user to the study they are supposed to be attached to and returns a string containing password and patient id. """ study_id = request.values['study_id'] patient_id, password = Participant.create_with_password(study_id=study_id) participant = Participant.objects.get(patient_id=patient_id) study = Study.objects.get(id=study_id) add_fields_and_interventions(participant, study) # Create an empty file on S3 indicating that this user exists study_object_id = Study.objects.filter(pk=study_id).values_list( 'object_id', flat=True).get() s3_upload(patient_id, b"", study_object_id) create_client_key_pair(patient_id, study_object_id) repopulate_all_survey_scheduled_events(study, participant) response_string = 'Created a new patient\npatient_id: {:s}\npassword: {:s}'.format( patient_id, password) flash(response_string, 'success') return redirect('/view_study/{:s}'.format(study_id))
def data_pipeline_upload(): # block extra keys errors = [] for key in request.values.keys(): if key not in VALID_PIPELINE_POST_PARAMS: errors.append('encountered invalid parameter: "%s"' % key) if errors: return Response("\n".join(errors), 400) # the information we need is all gathered from the get_creation_arguments method, use it. try: creation_args, tags = PipelineUpload.get_creation_arguments(request.values, request.files['file']) except InvalidUploadParameterError as e: return Response(str(e), 400) s3_upload( creation_args['s3_path'], request.files['file'].read(), Study.objects.get(id=creation_args['study_id']).object_id, raw_path=True, ) pipeline_upload = PipelineUpload(object_id=PipelineUpload.generate_objectid_string('object_id'), **creation_args) pipeline_upload.save() for tag in tags: pipeline_upload_tag = PipelineUploadTags(pipeline_upload=pipeline_upload, tag=tag) pipeline_upload_tag.save() return Response("SUCCESS", status=200)
def batch_upload(upload): """ Used for mapping an s3_upload function. """ ret = {'exception': None, 'traceback': None} try: if len(upload) != 4: print upload chunk, chunk_path, new_contents, study_id = upload del upload new_contents = new_contents.decode("zip") s3_upload(chunk_path, new_contents, study_id, raw_path=True) # print "data uploaded!" if isinstance(chunk, ChunkRegistry): chunk.low_memory_update_chunk_hash(new_contents) else: ChunkRegistry.add_new_chunk( chunk['study_id'], chunk['user_id'], chunk['data_type'], chunk['chunk_path'], chunk['time_bin'], file_contents=new_contents, #unlikely to be huge. survey_id=chunk['survey_id']) except Exception as e: ret['traceback'] = format_exc(e) ret['exception'] = e return ret
def create_client_key_pair(patient_id, study_id): """Generate key pairing, push to database, return sanitized key for client.""" public, private = encryption.generate_key_pairing() key_pair_paths = construct_s3_key_paths(study_id, patient_id) s3_upload(key_pair_paths['private'], private, study_id, raw_path=True) s3_upload(key_pair_paths['public'], public, study_id, raw_path=True) return
def do_upload(file_paths_and_contents, data_type=None, forcibly_overwrite=False): if data_type == None: raise Exception("DATA TYPE!") upload_stream_map = { "survey_answers":("surveyAnswers", "csv"), "audio":("voiceRecording", "mp4") } data_stream_string, file_extension = upload_stream_map[data_type] for timings_path, contents_and_timestamp in file_paths_and_contents.items(): contents, timestamp = contents_and_timestamp study_id_string, user_id, _, survey_id, _ = timings_path.split("/") try: timestamp_string = str( int( mktime( timestamp.timetuple( ) ) ) ) + "000" except AttributeError: print "PROBLEM WITH TIMESTAMP FROM: %s" % timings_path continue if len(timestamp_string) != 13: raise Exception("LOL! No.") study_obj_id = Study(ObjectId(study_id_string))._id s3_file_path = "%s/%s/%s/%s/%s.%s" % (study_id_string, user_id, data_stream_string, survey_id, timestamp_string, file_extension) if len(s3_list_files(s3_file_path)) != 0: print "ALREADY_EXISTS: %s, %s" % (timings_path, s3_file_path) if forcibly_overwrite == False: continue else: print "yay!: ", s3_file_path contents = contents.encode("utf8") #maybe make this unicode-16? s3_upload(s3_file_path, contents, study_obj_id, raw_path=True) FileToProcess.append_file_for_processing( s3_file_path, study_obj_id, user_id )
def test_s3_upload(self): study = Study(object_id='0vsvxgyx5skpI0ndOSAk1Duf', encryption_key='aabbccddefggiijjkklmnoppqqrrsstt', name='TEST_STUDY_FOR_TESTS') study.save() test_data = "THIS IS TEST DATA" s3_upload("test_file_for_tests.txt", test_data, study.object_id) s3_data = s3_retrieve("test_file_for_tests.txt", study.object_id) self.assertEqual(s3_data, test_data)
def batch_upload(upload: Tuple[dict, str, bytes, str]): """ Used for mapping an s3_upload function. the tuple is unpacked, can only have one parameter. """ ret = {'exception': None, 'traceback': None} try: if len(upload) != 4: # upload should have length 4; this is for debugging if it doesn't print(upload) chunk, chunk_path, new_contents, study_object_id = upload del upload if "b'" in chunk_path: raise Exception(chunk_path) s3_upload(chunk_path, codecs.decode(new_contents, "zip"), study_object_id, raw_path=True) print("data uploaded!", chunk_path) if isinstance(chunk, ChunkRegistry): # If the contents are being appended to an existing ChunkRegistry object chunk.file_size = len(new_contents) chunk.update_chunk_hash(new_contents) else: # If a new ChunkRegistry object is being created # Convert the ID's used in the S3 file names into primary keys for making ChunkRegistry FKs participant_pk, study_pk = Participant.objects.filter( patient_id=chunk['user_id']).values_list('pk', 'study_id').get() if chunk['survey_id']: survey_pk = Survey.objects.filter( object_id=chunk['survey_id']).values_list('pk', flat=True).get() else: survey_pk = None ChunkRegistry.register_chunked_data( chunk['data_type'], chunk['time_bin'], chunk['chunk_path'], new_contents, # unlikely to be huge study_pk, participant_pk, survey_pk, ) # it broke. print stacktrace for debugging except Exception as e: traceback.print_exc() ret['traceback'] = sys.exc_info() ret['exception'] = e return ret
def data_pipeline_upload(): #Cases: invalid access creds access_key = request.values["access_key"] access_secret = request.values["secret_key"] if not Researcher.objects.filter(access_key_id=access_key).exists(): return abort(403) # access key DNE researcher = Researcher.objects.get(access_key_id=access_key) if not researcher.validate_access_credentials(access_secret): return abort(403) # incorrect secret key # case: invalid study study_id = request.values["study_id"] if not Study.objects.filter(object_id=study_id).exists(): return abort(404) study_obj = Study.objects.get(object_id=study_id) # case: study not authorized for user if not study_obj.get_researchers().filter(id=researcher.id).exists(): return abort(403) # block extra keys errors = [] for key in request.values.iterkeys(): if key not in VALID_PIPELINE_POST_PARAMS: errors.append('encountered invalid parameter: "%s"' % key) if errors: return Response("\n".join(errors), 400) try: creation_args, tags = PipelineUpload.get_creation_arguments( request.values, request.files['file']) except InvalidUploadParameterError as e: return Response(e.message, 400) s3_upload(creation_args['s3_path'], request.files['file'].read(), Study.objects.get(id=creation_args['study_id']).object_id, raw_path=True) pipeline_upload = PipelineUpload( object_id=PipelineUpload.generate_objectid_string('object_id'), **creation_args) pipeline_upload.save() for tag in tags: pipeline_upload_tag = PipelineUploadTags( pipeline_upload=pipeline_upload, tag=tag) pipeline_upload_tag.save() return Response("SUCCESS", status=200)
def csv_generator(study_id, number_of_new_patients): si = StreamingBytesIO() filewriter = writer(si) filewriter.writerow(['Patient ID', "Registration password"]) study_object_id = Study.objects.filter(pk=study_id).values_list('object_id', flat=True).get() for _ in xrange(number_of_new_patients): patient_id, password = Participant.create_with_password(study_id=study_id) # Creates an empty file on s3 indicating that this user exists s3_upload(patient_id, "", study_object_id) create_client_key_pair(patient_id, study_object_id) filewriter.writerow([patient_id, password]) yield si.getvalue() si.empty()
def participant_credential_generator(study_id, number_of_new_patients, desired_filename): si = StreamingBytesIO() filewriter = writer(si) filewriter.writerow(['Patient ID', "Registration password"]) study_object_id = Study.objects.filter(pk=study_id).values_list('object_id', flat=True).get() study_name = Study.objects.filter(pk=study_id).values_list('name', flat=True).get() for _ in xrange(number_of_new_patients): patient_id, password = Participant.create_with_password(study_id=study_id) # Creates an empty file on s3 indicating that this user exists s3_upload(construct_s3_raw_data_path(study_object_id, patient_id), "", study_object_id, raw_path=True) filewriter.writerow([patient_id, password]) yield si.getvalue() si.empty()
def create_fake_mp4(number=10): participant_id = Participant.objects.get(patient_id='h6fflp') for x in range(number): with open("thing", "r") as f: file_path = "55d3826297013e3a1c9b8c3e/h6fflp/voiceRecording/%s.mp4" % ( 1000000000 + x) s3_upload(file_path, f.read(), "55d3826297013e3a1c9b8c3e", raw_path=True) FileToProcess.append_file_for_processing( file_path, "55d3826297013e3a1c9b8c3e", participant_id=participant_id)
def create_new_patient(): """ Creates a new user, generates a password and keys, pushes data to s3 and user database, adds user to the study they are supposed to be attached to and returns a string containing password and patient id. """ study_id = request.values['study_id'] patient_id, password = Participant.create_with_password(study_id=study_id) # Create an empty file on S3 indicating that this user exists study_object_id = Study.objects.filter(pk=study_id).values_list('object_id', flat=True).get() s3_upload(construct_s3_raw_data_path(study_object_id, patient_id), "", study_object_id, raw_path=True) response_string = 'Created a new patient\npatient_id: {:s}\npassword: {:s}'.format(patient_id, password) flash(response_string, 'success') return redirect('/view_study/{:s}'.format(study_id))
def participant_csv_generator(study_id, number_of_new_patients): study = Study.objects.get(pk=study_id) si = StreamingStringsIO() filewriter = writer(si) filewriter.writerow(['Patient ID', "Registration password"]) for _ in range(number_of_new_patients): patient_id, password = Participant.create_with_password( study_id=study_id) participant = Participant.objects.get(patient_id=patient_id) add_fields_and_interventions(participant, Study.objects.get(id=study_id)) # Creates an empty file on s3 indicating that this user exists s3_upload(patient_id, b"", study.object_id) create_client_key_pair(patient_id, study.object_id) repopulate_all_survey_scheduled_events(study, participant) filewriter.writerow([patient_id, password]) yield si.getvalue() si.empty()
def batch_upload(upload: Tuple[ChunkRegistry or dict, str, bytes, str]): """ Used for mapping an s3_upload function. the tuple is unpacked, can only have one parameter. """ ret = {'exception': None, 'traceback': None} with make_error_sentry(sentry_type=SentryTypes.data_processing): try: chunk, chunk_path, new_contents, study_object_id = upload del upload new_contents = decompress(new_contents) if "b'" in chunk_path: raise Exception(chunk_path) # for use with test script to avoid network uploads # with open("processing_tests/" + GLOBAL_TIMESTAMP, 'ba') as f: # f.write(b"\n\n") # f.write(new_contents) # return ret s3_upload(chunk_path, new_contents, study_object_id, raw_path=True) # if the chunk object is a chunk registry then we are updating an old one, # otherwise we are creating a new one. if isinstance(chunk, ChunkRegistry): # If the contents are being appended to an existing ChunkRegistry object chunk.file_size = len(new_contents) chunk.update_chunk(new_contents) else: ChunkRegistry.register_chunked_data(**chunk, file_contents=new_contents) # it broke. print stacktrace for debugging except Exception as e: traceback.print_exc() ret['traceback'] = sys.exc_info() ret['exception'] = e # using an error sentry we can easily report a real error with a real stack trace! :D raise return ret
def batch_upload(upload): """ Used for mapping an s3_upload function. """ ret = {'exception': None, 'traceback': None} try: if len(upload) != 4: # upload should have length 4; this is for debugging if it doesn't print(upload) chunk, chunk_path, new_contents, study_object_id = upload del upload new_contents = new_contents.decode("zip") s3_upload(chunk_path, new_contents, study_object_id, raw_path=True) print("data uploaded!", chunk_path) if isinstance(chunk, ChunkRegistry): # If the contents are being appended to an existing ChunkRegistry object chunk.low_memory_update_chunk_hash(new_contents) else: # If a new ChunkRegistry object is being created # Convert the ID's used in the S3 file names into primary keys for making ChunkRegistry FKs participant_pk, study_pk = Participant.objects.filter( patient_id=chunk['user_id']).values_list('pk', 'study_id').get() if chunk['survey_id']: survey_pk = Survey.objects.filter( object_id=chunk['survey_id']).values_list('pk', flat=True).get() else: survey_pk = None ChunkRegistry.register_chunked_data( chunk['data_type'], chunk['time_bin'], chunk['chunk_path'], new_contents, # unlikely to be huge study_pk, participant_pk, survey_pk, ) except Exception as e: ret['traceback'] = format_exc(e) ret['exception'] = e return ret
def create_new_patient(): """ Creates a new user, generates a password and keys, pushes data to s3 and user database, adds user to the study they are supposed to be attached to and returns a string containing password and patient id. """ study_id = request.values['study_id'] patient_id, password = Participant.create_with_password(study_id=study_id) # Create an empty file on S3 indicating that this user exists study_object_id = Study.objects.filter(pk=study_id).values_list( 'object_id', flat=True).get() s3_upload(patient_id, "", study_object_id) create_client_key_pair(patient_id, study_object_id) flash( 'Created a new participant with patient_id: %s , password: %s' % (patient_id, password), 'success') return make_QR(study_id, patient_id, password, timezone=session["timezone"])
def upload(OS_API=""): """ Entry point to upload GPS, Accelerometer, Audio, PowerState, Calls Log, Texts Log, Survey Response, and debugging files to s3. Behavior: The Beiwe app is supposed to delete the uploaded file if it receives an html 200 response. The API returns a 200 response when the file has A) been successfully handled, B) the file it has been sent is empty, C) the file did not decrypt properly. We encountered problems in production with incorrectly encrypted files (as well as Android generating "rList" files under unknown circumstances) and the app then uploads them. When the device receives a 200 that is its signal to delete the file. When a file is undecryptable (this was tracked to a scenario where the device could not create/write an AES encryption key) we send a 200 response to stop that device attempting to re-upload the data. In the event of a single line being undecryptable (can happen due to io errors on the device) we drop only that line (and store the erroring line in an attempt to track it down. A 400 error means there is something is wrong with the uploaded file or its parameters, administrators will be emailed regarding this upload, the event will be logged to the apache log. The app should not delete the file, it should try to upload it again at some point. If a 500 error occurs that means there is something wrong server side, administrators will be emailed and the event will be logged. The app should not delete the file, it should try to upload it again at some point. Request format: send an http post request to [domain name]/upload, remember to include security parameters (see user_authentication for documentation). Provide the contents of the file, encrypted (see encryption specification) and properly converted to Base64 encoded text, as a request parameter entitled "file". Provide the file name in a request parameter entitled "file_name". """ # Handle these corner cases first because they requires no database input. # Crash logs are from truly ancient versions of the android codebase file_name = request.values['file_name'] if file_name.startswith("rList") or "crashlog" in file_name.lower(): return render_template('blank.html'), 200 patient_id = request.values['patient_id'] user = get_session_participant() # Slightly different values for iOS vs Android behavior. # Android sends the file data as standard form post parameter (request.values) # iOS sends the file as a multipart upload (so ends up in request.files) # if neither is found, consider the "body" of the post the file # ("body" post is not currently used by any client, only here for completeness) if "file" in request.files: uploaded_file = request.files['file'] elif "file" in request.values: uploaded_file = request.values['file'] else: uploaded_file = request.data if isinstance(uploaded_file, FileStorage): uploaded_file = uploaded_file.read() elif isinstance(uploaded_file, str): uploaded_file = uploaded_file.encode() elif isinstance(uploaded_file, bytes): # not current behavior on any app pass else: raise TypeError("uploaded_file was a %s" % type(uploaded_file)) client_private_key = get_client_private_key(patient_id, user.study.object_id) try: uploaded_file = decrypt_device_file(patient_id, uploaded_file, client_private_key, user) except HandledError as e: # when decrypting fails, regardless of why, we rely on the decryption code # to log it correctly and return 200 OK to get the device to delete the file. # We do not want emails on these types of errors, so we use log_error explicitly. # this log statement hasn't been valuable since 2015, turning it off. # log_error(e, "%s; %s; %s" % (patient_id, file_name, e)) return render_template('blank.html'), 200 except DecryptionKeyInvalidError: # when the decryption key is invalid the file is lost. Nothing we can do. # record the event, send the device a 200 so it can clear out the file. tags = { "participant": patient_id, "operating system": "ios" if "ios" in request.path.lower() else "android", "DecryptionKeyError id": str(DecryptionKeyError.objects.last().id), "file_name": file_name, } make_sentry_client('eb', tags).captureMessage("DecryptionKeyInvalidError") return render_template('blank.html'), 200 s3_file_location = file_name.replace("_", "/") # if uploaded data a) actually exists, B) is validly named and typed... if uploaded_file and file_name and contains_valid_extension(file_name): s3_upload(s3_file_location, uploaded_file, user.study.object_id) FileToProcess.append_file_for_processing( s3_file_location, user.study.object_id, participant=user ) UploadTracking.objects.create( file_path=s3_file_location, file_size=len(uploaded_file), timestamp=timezone.now(), participant=user, ) return render_template('blank.html'), 200 else: error_message = "an upload has failed " + patient_id + ", " + file_name + ", " if not uploaded_file: # it appears that occasionally the app creates some spurious files # with a name like "rList-org.beiwe.app.LoadingActivity" error_message += "there was no/an empty file, returning 200 OK so device deletes bad file." log_error(Exception("upload error"), error_message) return render_template('blank.html'), 200 elif not file_name: error_message += "there was no provided file name, this is an app error." elif file_name and not contains_valid_extension(file_name): error_message += "contains an invalid extension, it was interpreted as " error_message += grab_file_extension(file_name) else: error_message += "AN UNKNOWN ERROR OCCURRED." tags = {"upload_error": "upload error", "user_id": patient_id} sentry_client = make_sentry_client('eb', tags) sentry_client.captureMessage(error_message) return abort(400)
def create_fake_mp4(number=10): for x in range(number): with open("thing", "r") as f: file_path = "55d3826297013e3a1c9b8c3e/h6fflp/voiceRecording/%s.mp4" % (1000000000 + x) s3_upload(file_path, f.read(), ObjectId("55d3826297013e3a1c9b8c3e"), raw_path=True) FileToProcess.append_file_for_processing(file_path, ObjectId("55d3826297013e3a1c9b8c3e"), "h6fflp")
def upload(OS_API=""): """ Entry point to upload GPS, Accelerometer, Audio, PowerState, Calls Log, Texts Log, Survey Response, and debugging files to s3. Behavior: The Beiwe app is supposed to delete the uploaded file if it receives an html 200 response. The API returns a 200 response when the file has A) been successfully handled, B) the file it has been sent is empty, C) the file did not decrypt properly. We encountered problems in production with incorrectly encrypted files (as well as Android generating "rList" files under unknown circumstances) and the app then uploads them. When the device receives a 200 that is its signal to delete the file. When a file is undecryptable (this was tracked to a scenario where the device could not create/write an AES encryption key) we send a 200 response to stop that device attempting to re-upload the data. In the event of a single line being undecryptable (can happen due to io errors on the device) we drop only that line (and store the erroring line in an attempt to track it down. A 400 error means there is something is wrong with the uploaded file or its parameters, administrators will be emailed regarding this upload, the event will be logged to the apache log. The app should not delete the file, it should try to upload it again at some point. If a 500 error occurs that means there is something wrong server side, administrators will be emailed and the event will be logged. The app should not delete the file, it should try to upload it again at some point. Request format: send an http post request to [domain name]/upload, remember to include security parameters (see user_authentication for documentation). Provide the contents of the file, encrypted (see encryption specification) and properly converted to Base64 encoded text, as a request parameter entitled "file". Provide the file name in a request parameter entitled "file_name". """ patient_id = request.values['patient_id'] user = Participant.objects.get(patient_id=patient_id) # first we check to make sure that the participant is currently registered, if not we reject the upload and # tell the mobile app to delete it so it will not be resent if user.device_id == '': error_message = "an upload has failed " + patient_id error_message += ". Participant is not registered, returning 200 OK so device deletes bad file." log_error(Exception("upload error"), error_message) return render_template('blank.html'), 200 # Slightly different values for iOS vs Android behavior. # Android sends the file data as standard form post parameter (request.values) # iOS sends the file as a multipart upload (so ends up in request.files) # if neither is found, consider the "body" of the post the file # ("body" post is not currently used by any client, only here for completeness) print('finding file contents') if "file" in request.files: uploaded_file = request.files['file'] elif "file" in request.values: uploaded_file = request.values['file'] else: uploaded_file = request.data if isinstance(uploaded_file, FileStorage): print('reading file contents') uploaded_file = uploaded_file.read() print('finding file name') if 'file_name' in request.values and request.values['file_name']: file_name = request.values['file_name'] else: error_message = "an upload has failed " + patient_id error_message += ". Request did not include a file_name." log_error(Exception("upload error"), error_message) return render_template('blank.html'), 200 print('filename is {0}'.format(file_name)) if "crashlog" in file_name.lower(): send_android_error_report(patient_id, uploaded_file) return render_template('blank.html'), 200 if file_name[:6] == "rList-": return render_template('blank.html'), 200 print('retrieving private key') client_private_key = get_client_private_key(patient_id, user.study.object_id) print('decrypting device file') try: uploaded_file = decrypt_device_file(patient_id, uploaded_file, client_private_key, user) except HandledError as e: # when decrypting fails, regardless of why, we rely on the decryption code # to log it correctly and return 200 OK to get the device to delete the file. # We do not want emails on these types of errors, so we use log_error explicitly. print("the following error was handled:") log_error(e, "%s; %s; %s" % (patient_id, file_name, e.message)) return render_template('blank.html'), 200 #This is what the decryption failure mode SHOULD be, but we are still identifying the decryption bug except DecryptionKeyInvalidError: tags = { "participant": patient_id, "operating system": "ios" if "ios" in request.path.lower() else "android", "DecryptionKeyError id": str(DecryptionKeyError.objects.last().id), "file_name": file_name, } make_sentry_client('eb', tags).captureMessage("DecryptionKeyInvalidError") return render_template('blank.html'), 200 # print "decryption success:", file_name # if uploaded data a) actually exists, B) is validly named and typed... if uploaded_file and file_name and contains_valid_extension(file_name): print('constructing rawdata filename') raw_data_filename = construct_s3_raw_data_path( user.study.object_id, file_name.replace("_", "/")) print('rawdata filename {0}'.format(raw_data_filename)) s3_upload(raw_data_filename, uploaded_file, user.study.object_id, raw_path=True) print('file uploaded to s3, now adding to FTP') FileToProcess.append_file_for_processing(raw_data_filename, user.study.object_id, participant=user) print('next is update upload tracking database') UploadTracking.objects.create( file_path=raw_data_filename, file_size=len(uploaded_file), timestamp=timezone.now(), participant=user, ) print('next is update received data stats database') ReceivedDataStats.update_statistics( file_path=raw_data_filename, file_size=len(uploaded_file), timestamp=timezone.now(), participant=user, ) print('finished processing the upload') return render_template('blank.html'), 200 else: error_message = "an upload has failed " + patient_id + ", " + file_name + ", " if not uploaded_file: # it appears that occasionally the app creates some spurious files # with a name like "rList-org.beiwe.app.LoadingActivity" error_message += "there was no/an empty file, returning 200 OK so device deletes bad file." log_error(Exception("upload error"), error_message) return render_template('blank.html'), 200 elif not file_name: error_message += "there was no provided file name, this is an app error." elif file_name and not contains_valid_extension(file_name): error_message += "contains an invalid extension, it was interpretted as " error_message += grab_file_extension(file_name) else: error_message += "AN UNKNOWN ERROR OCCURRED." tags = {"upload_error": "upload error", "user_id": patient_id} sentry_client = make_sentry_client('eb', tags) sentry_client.captureMessage(error_message) return abort(400)
def save(file_name, uploaded_file): uploaded_file0 = uploaded_file error_count = 0 if "crashlog" in file_name.lower(): send_android_error_report(patient_id, uploaded_file) return render_template('blank.html'), 200 # it appears that occasionally the app creates some spurious files with a name like "rList-org.beiwe.app.LoadingActivity" if file_name[:6] == "rList-": return render_template('blank.html'), 200 # test whether can decrypt successfully # if cannot decrypt, save the raw file, return OK:200 to free up phone storage # if cannot save to S3 bucket, return Error:500 to postpone upload & keep the file on the phone client_private_key = get_client_private_key(patient_id, user.study.object_id) try: uploaded_file, error_count = decrypt_device_file( patient_id, uploaded_file, client_private_key, user) except HandledError as e: canUpload = s3_upload(file_name.replace("_", "/"), uploaded_file, user.study.object_id, encrypt=False) print("The following upload error was handled:") log_error(e, "%s; %s; %s" % (patient_id, file_name, e.message)) return render_template('blank.html'), 200 if canUpload else 500 except OurBase64Error: canUpload = s3_upload(file_name.replace("_", "/"), uploaded_file, user.study.object_id, encrypt=False) print( "### decryption error: patient_id=%s, file_name=%s, file_size=%s" % (patient_id, file_name, len(uploaded_file))) return render_template('blank.html'), 200 if canUpload else 500 except: canUpload = s3_upload(file_name.replace("_", "/"), uploaded_file, user.study.object_id, encrypt=False) return render_template('blank.html'), 200 if canUpload else 500 # set upload info file_basename = file_name.split('_')[-2] if file_basename in CHECKABLE_FILES: try: upload_info = user.get_upload_info() update_upload_info(file_basename, upload_info, uploaded_file.strip().splitlines()[1:], 2 if file_basename == 'callLog' else 0) user.set_upload_info(upload_info) except Exception as e: log_error( e, "Failed to update upload info: patient_id=%s; file_name=%s; msg=%s" % (patient_id, file_name, e.message)) # if uploaded data a) actually exists, B) is validly named and typed... if uploaded_file and file_name and contains_valid_extension(file_name): canUpload = s3_upload(file_name.replace("_", "/"), uploaded_file, user.study.object_id) user.set_upload_time() # for files with non-fatal decryption errors, save another raw copy if canUpload and error_count > 0: canUpload = s3_upload(file_name.replace("_", "/"), uploaded_file0, user.study.object_id, encrypt=False) return render_template('blank.html'), 200 if canUpload else 500 else: error_message = "an upload has failed " + patient_id + ", " + file_name + ", " canUpload = s3_upload(file_name.replace("_", "/"), uploaded_file, user.study.object_id, encrypt=False) user.set_upload_time() if not uploaded_file: error_message += "there was an empty file, returning 200 OK so device deletes bad file." log_error(Exception("upload error"), error_message) return render_template('blank.html'), 200 if canUpload else 500 elif not file_name: error_message += "there was no provided file name, this is an app error." elif not contains_valid_extension(file_name): error_message += "contains an invalid extension, it was interpretted as " error_message += grab_file_extension(file_name) else: error_message += "AN UNKNOWN ERROR OCCURRED." tags = {"upload_error": "upload error", "user_id": patient_id} sentry_client = make_sentry_client('eb', tags) sentry_client.captureMessage(error_message) # log_and_email_500_error(Exception("upload error"), error_message) return render_template('blank.html'), 200 if canUpload else 500
def upload(OS_API=""): """ Entry point to upload GPS, Accelerometer, Audio, PowerState, Calls Log, Texts Log, Survey Response, and debugging files to s3. Behavior: The Beiwe app is supposed to delete the uploaded file if it receives an html 200 response. The API returns a 200 response when the file has A) been successfully handled, B) the file it has been sent is empty, C) the file did not decrypt properly. We encountered problems in production with incorrectly encrypted files (as well as Android generating "rList" files under unknown circumstances) and the app then uploads them. The source of encryption errors is not well understood and could not be tracked down. In order to salvage partial data the server decrypts files to the best of its ability and uploads it to S3. In order to delete these files we still send a 200 response. (The above about encryption is awful, in a theoretical version 2.0 the 200 response would be replaced with a difference response code to allow for better debugging and less/fewer ... hax.) A 400 error means there is something is wrong with the uploaded file or its parameters, administrators will be emailed regarding this upload, the event will be logged to the apache log. The app should not delete the file, it should try to upload it again at some point. If a 500 error occurs that means there is something wrong server side, administrators will be emailed and the event will be logged. The app should not delete the file, it should try to upload it again at some point. Request format: send an http post request to [domain name]/upload, remember to include security parameters (see user_authentication for documentation). Provide the contents of the file, encrypted (see encryption specification) and properly converted to Base64 encoded text, as a request parameter entitled "file". Provide the file name in a request parameter entitled "file_name". """ patient_id = request.values['patient_id'] user = Participant.objects.get(patient_id=patient_id) # Slightly different values for iOS vs Android behavior. # Android sends the file data as standard form post parameter (request.values) # iOS sends the file as a multipart upload (so ends up in request.files) # if neither is found, consider the "body" of the post the file # ("body" post is not currently used by any client, only here for completeness) if "file" in request.files: uploaded_file = request.files['file'] elif "file" in request.values: uploaded_file = request.values['file'] else: uploaded_file = request.data if isinstance(uploaded_file, FileStorage): uploaded_file = uploaded_file.read() file_name = request.values['file_name'] # print "uploaded file name:", file_name, len(uploaded_file) if "crashlog" in file_name.lower(): send_android_error_report(patient_id, uploaded_file) return render_template('blank.html'), 200 if file_name[:6] == "rList-": return render_template('blank.html'), 200 client_private_key = get_client_private_key(patient_id, user.study.object_id) try: uploaded_file = decrypt_device_file(patient_id, uploaded_file, client_private_key, user) except HandledError as e: # when decrypting fails, regardless of why, we rely on the decryption code # to log it correctly and return 200 OK to get the device to delete the file. # We do not want emails on these types of errors, so we use log_error explicitly. print("the following error was handled:") log_error(e, "%s; %s; %s" % (patient_id, file_name, e.message)) return render_template('blank.html'), 200 except OurBase64Error: if IS_STAGING: print("decryption problems" + "#" * 200) print(patient_id) print(file_name) print(uploaded_file) raise # This is what the decryption failure mode SHOULD be, but we are still identifying the decryption bug # except DecryptionKeyInvalidError: # return render_template('blank.html'), 200 # print "decryption success:", file_name # if uploaded data a) actually exists, B) is validly named and typed... if uploaded_file and file_name and contains_valid_extension(file_name): s3_upload(file_name.replace("_", "/"), uploaded_file, user.study.object_id) FileToProcess.append_file_for_processing(file_name.replace("_", "/"), user.study.object_id, participant=user) UploadTracking.objects.create( file_path=file_name.replace("_", "/"), file_size=len(uploaded_file), timestamp=timezone.now(), participant=user, ) return render_template('blank.html'), 200 else: error_message = "an upload has failed " + patient_id + ", " + file_name + ", " if not uploaded_file: # it appears that occasionally the app creates some spurious files # with a name like "rList-org.beiwe.app.LoadingActivity" error_message += "there was no/an empty file, returning 200 OK so device deletes bad file." log_error(Exception("upload error"), error_message) return render_template('blank.html'), 200 elif not file_name: error_message += "there was no provided file name, this is an app error." elif file_name and not contains_valid_extension(file_name): error_message += "contains an invalid extension, it was interpretted as " error_message += grab_file_extension(file_name) else: error_message += "AN UNKNOWN ERROR OCCURRED." tags = {"upload_error": "upload error", "user_id": patient_id} sentry_client = make_sentry_client('eb', tags) sentry_client.captureMessage(error_message) # log_and_email_500_error(Exception("upload error"), error_message) return abort(400)
def register_user(OS_API=""): """ Checks that the patient id has been granted, and that there is no device registered with that id. If the patient id has no device registered it registers this device and logs the bluetooth mac address. Check the documentation in user_authentication to ensure you have provided the proper credentials. Returns the encryption key for this patient/user. """ #CASE: If the id and password combination do not match, the decorator returns a 403 error. #the following parameter values are required. patient_id = request.values['patient_id'] phone_number = request.values['phone_number'] device_id = request.values['device_id'] # These values may not be returned by earlier versions of the beiwe app try: device_os = request.values['device_os'] except BadRequestKeyError: device_os = "none" try: os_version = request.values['os_version'] except BadRequestKeyError: os_version = "none" try: product = request.values["product"] except BadRequestKeyError: product = "none" try: brand = request.values["brand"] except BadRequestKeyError: brand = "none" try: hardware_id = request.values["hardware_id"] except BadRequestKeyError: hardware_id = "none" try: manufacturer = request.values["manufacturer"] except BadRequestKeyError: manufacturer = "none" try: model = request.values["model"] except BadRequestKeyError: model = "none" try: beiwe_version = request.values["beiwe_version"] except BadRequestKeyError: beiwe_version = "none" # This value may not be returned by later versions of the beiwe app. try: mac_address = request.values['bluetooth_id'] except BadRequestKeyError: mac_address = "none" user = Participant.objects.get(patient_id=patient_id) study_id = user.study.object_id if user.device_id and user.device_id != request.values['device_id']: # CASE: this patient has a registered a device already and it does not match this device. # They need to contact the study and unregister their their other device. The device # will receive a 405 error and should alert the user accordingly. # Provided a user does not completely reset their device (which resets the device's # unique identifier) they user CAN reregister an existing device, the unlock key they # need to enter to at registration is their old password. # KG: 405 is good for IOS and Android, no need to check OS_API return abort(405) if user.os_type and user.os_type != OS_API: # CASE: this patient has registered, but the user was previously registered with a # different device type. To keep the CSV munging code sane and data consistent (don't # cross the iOS and Android data streams!) we disallow it. return abort(400) # At this point the device has been checked for validity and will be registered successfully. # Any errors after this point will be server errors and return 500 codes. the final return # will be the encryption key associated with this user. # Upload the user's various identifiers. unix_time = str(calendar.timegm(time.gmtime())) file_name = patient_id + '/identifiers_' + unix_time + ".csv" # Construct a manual csv of the device attributes file_contents = (DEVICE_IDENTIFIERS_HEADER + "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s" % (patient_id, mac_address, phone_number, device_id, device_os, os_version, product, brand, hardware_id, manufacturer, model, beiwe_version)) # print(file_contents + "\n") s3_upload(file_name, file_contents, study_id) FileToProcess.append_file_for_processing(file_name, user.study.object_id, participant=user) # set up device. user.set_device(device_id) user.set_os_type(OS_API) user.set_password(request.values['new_password']) device_settings = user.study.device_settings.as_native_python() device_settings.pop('_id', None) return_obj = { 'client_public_key': get_client_public_key_string(patient_id, study_id), 'device_settings': device_settings } return json.dumps(return_obj), 200
def upload(OS_API=""): """ Entry point to upload GPS, Accelerometer, Audio, PowerState, Calls Log, Texts Log, Survey Response, and debugging files to s3. Behavior: The Beiwe app is supposed to delete the uploaded file if it receives an html 200 response. The API returns a 200 response when the file has A) been successfully handled, B) the file it has been sent is empty, C) the file did not decrypt properly. We encountered problems in production with incorrectly encrypted files (as well as Android generating "rList" files under unknown circumstances) and the app then uploads them. When the device receives a 200 that is its signal to delete the file. When a file is undecryptable (this was tracked to a scenario where the device could not create/write an AES encryption key) we send a 200 response to stop that device attempting to re-upload the data. In the event of a single line being undecryptable (can happen due to io errors on the device) we drop only that line (and store the erroring line in an attempt to track it down. A 400 error means there is something is wrong with the uploaded file or its parameters, administrators will be emailed regarding this upload, the event will be logged to the apache log. The app should not delete the file, it should try to upload it again at some point. If a 500 error occurs that means there is something wrong server side, administrators will be emailed and the event will be logged. The app should not delete the file, it should try to upload it again at some point. Request format: send an http post request to [domain name]/upload, remember to include security parameters (see user_authentication for documentation). Provide the contents of the file, encrypted (see encryption specification) and properly converted to Base64 encoded text, as a request parameter entitled "file". Provide the file name in a request parameter entitled "file_name". """ # Handle these corner cases first because they requires no database input. # Crash logs are from truly ancient versions of the android codebase # rList are randomly generated by android # PersistedInstallation files come from firebase. # todo: stop uploading junk files in the app by putting our files into a folder. file_name = request.values.get("file_name", None) if ( not bool(file_name) or file_name.startswith("rList") or file_name.startswith("PersistedInstallation") or not contains_valid_extension(file_name) ): return render_template('blank.html'), 200 s3_file_location = file_name.replace("_", "/") participant = get_session_participant() if participant.unregistered: # "Unregistered" participants are blocked from uploading further data. # If the participant is unregistered, throw away the data file, but # return a 200 "OK" status to the phone so the phone decides it can # safely delete the file. return render_template('blank.html'), 200 # block duplicate FTPs. Testing the upload history is too complex if FileToProcess.test_file_path_exists(s3_file_location, participant.study.object_id): return render_template('blank.html'), 200 uploaded_file = get_uploaded_file() try: uploaded_file = decrypt_device_file(uploaded_file, participant) except HandledError: return render_template('blank.html'), 200 except DecryptionKeyInvalidError: # when the decryption key is invalid the file is lost. Nothing we can do. # record the event, send the device a 200 so it can clear out the file. if REPORT_DECRYPTION_KEY_ERRORS: tags = { "participant": participant.patient_id, "operating system": "ios" if "ios" in request.path.lower() else "android", "DecryptionKeyError id": str(DecryptionKeyError.objects.last().id), "file_name": file_name, "bug_report": DECRYPTION_KEY_ADDITIONAL_MESSAGE, } sentry_client = make_sentry_client(SentryTypes.elastic_beanstalk, tags) sentry_client.captureMessage(DECRYPTION_KEY_ERROR_MESSAGE) return render_template('blank.html'), 200 # if uploaded data actually exists, and has a valid extension if uploaded_file and file_name and contains_valid_extension(file_name): s3_upload(s3_file_location, uploaded_file, participant.study.object_id) # race condition: multiple _concurrent_ uploads with same file path. Behavior without # try-except is correct, but we don't care about reporting it. Just send the device a 500 # error so it skips the file, the followup attempt receives 200 code and deletes the file. try: FileToProcess.append_file_for_processing( s3_file_location, participant.study.object_id, participant=participant ) except ValidationError as e: # Real error is a second validation inside e.error_dict["s3_file_path"]. # Ew; just test for this string instead... if S3_FILE_PATH_UNIQUE_CONSTRAINT_ERROR in str(e): # this tells the device to just move on to the next file, try again later. return abort(500) else: raise UploadTracking.objects.create( file_path=s3_file_location, file_size=len(uploaded_file), timestamp=timezone.now(), participant=participant, ) return render_template('blank.html'), 200 elif not uploaded_file: # if the file turns out to be empty, delete it, we simply do not care. return render_template('blank.html'), 200 else: return make_upload_error_report(participant.patient_id, file_name)
def register_user(OS_API=""): """ Checks that the patient id has been granted, and that there is no device registered with that id. If the patient id has no device registered it registers this device and logs the bluetooth mac address. Check the documentation in user_authentication to ensure you have provided the proper credentials. Returns the encryption key for this patient/user. """ # CASE: If the id and password combination do not match, the decorator returns a 403 error. # the following parameter values are required. patient_id = request.values['patient_id'] phone_number = request.values['phone_number'] device_id = request.values['device_id'] # These values may not be returned by earlier versions of the beiwe app device_os = request.values.get('device_os', "none") os_version = request.values.get('os_version', "none") product = request.values.get("product", "none") brand = request.values.get("brand", "none") hardware_id = request.values.get("hardware_id", "none") manufacturer = request.values.get("manufacturer", "none") model = request.values.get("model", "none") beiwe_version = request.values.get("beiwe_version", "none") # This value may not be returned by later versions of the beiwe app. mac_address = request.values.get('bluetooth_id', "none") participant = get_session_participant() if participant.device_id and participant.device_id != request.values['device_id']: # CASE: this patient has a registered a device already and it does not match this device. # They need to contact the study and unregister their their other device. The device # will receive a 405 error and should alert the user accordingly. # Provided a user does not completely reset their device (which resets the device's # unique identifier) they user CAN reregister an existing device, the unlock key they # need to enter to at registration is their old password. # KG: 405 is good for IOS and Android, no need to check OS_API return abort(405) if participant.os_type and participant.os_type != OS_API: # CASE: this patient has registered, but the user was previously registered with a # different device type. To keep the CSV munging code sane and data consistent (don't # cross the iOS and Android data streams!) we disallow it. return abort(400) # At this point the device has been checked for validity and will be registered successfully. # Any errors after this point will be server errors and return 500 codes. the final return # will be the encryption key associated with this user. # Upload the user's various identifiers. unix_time = str(calendar.timegm(time.gmtime())) file_name = patient_id + '/identifiers_' + unix_time + ".csv" # Construct a manual csv of the device attributes file_contents = (DEVICE_IDENTIFIERS_HEADER + "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s" % (patient_id, mac_address, phone_number, device_id, device_os, os_version, product, brand, hardware_id, manufacturer, model, beiwe_version)).encode() s3_upload(file_name, file_contents, participant.study.object_id) FileToProcess.append_file_for_processing(file_name, participant.study.object_id, participant=participant) # set up device. participant.device_id = device_id participant.os_type = OS_API participant.set_password(request.values['new_password']) # set password saves the model device_settings = participant.study.device_settings.as_unpacked_native_python() device_settings.pop('_id', None) # set up FCM files firebase_plist_data = None firebase_json_data = None if participant.os_type == 'IOS': ios_credentials = FileAsText.objects.filter(tag=IOS_FIREBASE_CREDENTIALS).first() if ios_credentials: firebase_plist_data = plistlib.loads(ios_credentials.text.encode()) elif participant.os_type == 'ANDROID': android_credentials = FileAsText.objects.filter(tag=ANDROID_FIREBASE_CREDENTIALS).first() if android_credentials: firebase_json_data = json.loads(android_credentials.text) # ensure the survey schedules are updated for this participant. repopulate_all_survey_scheduled_events(participant.study, participant) return_obj = { 'client_public_key': get_client_public_key_string(patient_id, participant.study.object_id), 'device_settings': device_settings, 'ios_plist': firebase_plist_data, 'android_firebase_json': firebase_json_data, 'study_name': participant.study.name, 'study_id': participant.study.object_id, } return json.dumps(return_obj), 200