def get_analysis(user, token_info, paging_token, page_size): assert_user_has("search", token_info) default_token = {"page_size": page_size or 100, "offset": 0} token = parse_paging_token(paging_token) or default_token # If user has 'own-institution' clearance, pass an implicit filter to the query institution_filter = ( token_info["institution"] if token_info["sofi-data-clearance"] == "own-institution" else False ) items = get_analysis_page( token.get("query", {}), token["page_size"], token["offset"], authorized_columns(token_info), institution_filter, ) count = get_analysis_count(token.get("query", {})) new_token = ( None if len(items) < token["page_size"] else render_paging_token( token["page_size"], token.get("query", {}), token["offset"] + token["page_size"], ) ) response = { "items": items, "paging_token": new_token, "total_count": count, "approval_matrix": {}, } audit_query(token_info, items) return jsonify(response)
def bulk_metadata(user, token_info, path, metadata_tsv): assert_user_has("approve", token_info) metadata_list = validate_metadata_tsv(metadata_tsv) for m in metadata_list: assert_authorized_to_edit(token_info, m) errors = [] sequence_names = [] for m in metadata_list: sequence_names.extend(m["sequence_filename"].split()) trimmed_path = path.read().decode("utf-8").strip('"').strip() existing_sequences, missing_sequences = check_bulk_isolate_exists( trimmed_path, sequence_names ) if existing_sequences: try: metadata = [UploadMetadataFields.from_dict(m) for m in metadata_list] upload_metadata_list(metadata) except Exception as e: errors.append(f"Error: {str(e)}") errors.extend( [f"Missing {filename} in directory" for filename in missing_sequences] ) return upload_response_helper(errors)
def create_approval(user, token_info, body: ApprovalRequest): assert_user_has("approve", token_info) appr = Approval() appr.matrix = body.matrix appr.approver = user appr.timestamp = datetime.datetime.now() appr.status = "submitted" appr.id = str(uuid.uuid4()) # set approval dates on approved sequences before sending them for # approval, so the timestamps can be transferred to upstream metadata # services, if needed. analysis_timestamp_updates = {} seq_update = {} for seq in body.matrix: fields = body.matrix[seq] # find dates that were already approved, for incremental approval case. existing_matrix = get_approval_matrix(seq) existing_matrix.update(fields) time_fields = find_approved_categories(body.matrix[seq]) # When approving date_epi, automatically generate the timestamp if fields.get("date_epi", False): seq_update["date_epi"] = appr.timestamp time_fields += "date_epi" for f in time_fields: seq_update[f] = appr.timestamp analysis_timestamp_updates[seq] = seq_update update_analysis(analysis_timestamp_updates) errors_tuple = handle_approvals(appr, token_info["institution"]) errors = [] analysis_timestamp_reverts = {} for (error_seq_id, error) in errors_tuple: time_fields = find_approved_categories(appr.matrix[error_seq_id]) for f in time_fields: analysis_timestamp_reverts[error_seq_id] = {f: None} del appr.matrix[error_seq_id] errors.append(error) # If any sequences errored out on the metadata service, revert their # date_analysis_sofi timestamp update_analysis(analysis_timestamp_reverts) # Insert approval after matrix has been manipulated res = insert_approval(token_info["email"], appr) return ( jsonify({"success": appr.to_dict(), "error": errors}) if res != None else abort(400) )
def single_upload(user, token_info, metadata, _files): assert_user_has("approve", token_info) base_metadata: UploadMetadataFields = UploadMetadataFields.from_dict( json.loads(metadata.read()) ) assert_authorized_to_edit(token_info, base_metadata.to_dict()) try: files = request.files.getlist("files") upload_isolate(base_metadata, files, token_info["institution"]) return upload_response_helper() except Exception as e: return upload_response_helper([e.args[0]])
def extract_data_from_pi(user, token_info, identifier_type=None, identifier=None): assert_user_has("gdpr.manage", token_info) document = personal_data_from_identifier(identifier_type, identifier) res = personal_data_to_text(document) mail = token_info["email"] # TODO: should we log the identifier here? would be PII itself app.logger.info( f"[GDPR Audit]: User -{mail}- extracted personally identifiable information via {identifier_type} {identifier}" ) return PersonalData(data=res)
def multi_upload(user, token_info, metadata_tsv, _files): assert_user_has("approve", token_info) # Files aren't properly routed from connexion, use these files instead: files = request.files.getlist("files") errors = [] metadata = [] try: metadata_list = validate_metadata_tsv(metadata_tsv) metadata_map = {item["sequence_filename"]: item for item in metadata_list} filenames = [x.filename for x in files] for key, metadata in metadata_map.items(): split_filenames = key.split() files_for_metadata = [] if not all(f in filenames for f in split_filenames): errors.append(f"Could not find samples {key} in metadata TSV.") continue else: files_for_metadata = [f for f in files if f.filename in split_filenames] try: if metadata: base_metadata = UploadMetadataFields.from_dict(metadata) current_errors = validate_metadata( base_metadata, files_for_metadata ) if authorized_to_edit(token_info, base_metadata): if not current_errors: upload_isolate( base_metadata, files_for_metadata, token_info["institution"], ) else: errors.extend(current_errors) else: errors.append( f"You are not authorized to edit isolate -{base_metadata.isolate_id}-" ) except Exception as e: import traceback print(traceback.format_exc(), file=sys.stderr) errors.append(f"Error with files: {key}, {str(e)}") continue except Exception as e: print(e, file=sys.stderr) return upload_response_helper(errors)
def get_sequence_by_id(user, token_info, sequence_id): assert_user_has("search", token_info) row = get_single_analysis(sequence_id) if row is None: abort(404) if ( token_info["sofi-data-clearance"] == "own-institution" and token_info["institution"] != row["institution"] ): abort(404) allowed_cols = authorized_columns(token_info) for key, _ in list(row.items()): # Only return columns user is allowed to see if not key in allowed_cols: del row[key] return jsonify(row)
def submit_changes( user, token_info: Dict[str, str], body: Dict[str, Any] ) -> Dict[str, Dict[str, Any]]: assert_user_has("approve", token_info) updates = list(map(lambda x: x, body.keys())) allowed_cols = authorized_columns(token_info) for identifier in updates: row = get_single_analysis(identifier) # Make sure user is allowed to modify this row assert_authorized_to_edit(token_info, row) for col in body[identifier].keys(): # Make sure is allowed to modify that column if not col in allowed_cols: raise Forbidden(f"You are not authorized to edit column -{col}-") # TODO: Verify that none of these cells are already approved update_analysis(body) res = dict() for u in updates: res[u] = get_single_analysis(u) return jsonify(res)
def forget_pii(user, token_info, identifier_type, identifier): assert_user_has("gdpr.manage", token_info) res, ids = forget_user_data(identifier_type, identifier) audit_gdpr_forget(token_info, ids) return res
def cancel_approval(user, token_info, approval_id: str): assert_user_has("approve", token_info) res = revoke_approval(token_info["email"], approval_id) return None if res.modified_count > 0 else abort(404)