} MODEL_USER_FIELDS = MAIN_USER_FIELDS + list(BOOL_USER_FIELDS.keys()) COMPUTED_USER_FIELDS = { 'is_anvil': lambda user, is_anvil=None, **kwargs: is_anvil_authenticated(user) if is_anvil is None else is_anvil, 'display_name': lambda user, **kwargs: user.get_full_name(), 'is_analyst': lambda user, analyst_users=None, **kwargs: user in analyst_users if analyst_users is not None else user_is_analyst(user), 'is_data_manager': lambda user, **kwargs: user_is_data_manager(user), 'is_pm': lambda user, pm_users=None, **kwargs: user in pm_users if pm_users is not None else user_is_pm(user), } DEFAULT_USER = {_to_camel_case(field): '' for field in MAIN_USER_FIELDS} DEFAULT_USER.update( {_to_camel_case(field): val for field, val in BOOL_USER_FIELDS.items()}) DEFAULT_USER.update( {_to_camel_case(field): False for field in COMPUTED_USER_FIELDS.keys()}) def _get_json_for_user(user, is_anvil=None, fields=None, analyst_users=None,
'is_superuser': False, 'is_active': True, } MODEL_USER_FIELDS = MAIN_USER_FIELDS + list(BOOL_USER_FIELDS.keys()) COMPUTED_USER_FIELDS = { 'is_anvil': lambda user, is_anvil=None: is_anvil_authenticated(user) if is_anvil is None else is_anvil, 'display_name': lambda user, **kwargs: user.get_full_name(), 'is_analyst': lambda user, **kwargs: user_is_analyst(user), 'is_data_manager': lambda user, **kwargs: user_is_data_manager(user), 'is_pm': lambda user, **kwargs: user_is_pm(user), } DEFAULT_USER = {_to_camel_case(field): '' for field in MAIN_USER_FIELDS} DEFAULT_USER.update( {_to_camel_case(field): val for field, val in BOOL_USER_FIELDS.items()}) DEFAULT_USER.update( {_to_camel_case(field): False for field in COMPUTED_USER_FIELDS.keys()}) def _get_json_for_user(user, is_anvil=None, fields=None): """Returns JSON representation of the given User object Args:
def parse_pedigree_table(parsed_file, filename, user, project=None): """Validates and parses pedigree information from a .fam, .tsv, or Excel file. Args: parsed_file (array): The parsed output from the raw file. filename (string): The original filename - used to determine the file format based on the suffix. user (User): (optional) Django User object project (Project): (optional) Django Project object Return: A 3-tuple that contains: ( json_records (list): list of dictionaries, with each dictionary containing info about one of the individuals in the input data errors (list): list of error message strings warnings (list): list of warning message strings ) """ json_records = [] errors = [] warnings = [] is_merged_pedigree_sample_manifest = False # parse rows from file try: rows = [ row for row in parsed_file[1:] if row and not (row[0] or '').startswith('#') ] header_string = str(parsed_file[0]) is_datstat_upload = 'DATSTAT' in header_string is_merged_pedigree_sample_manifest = "do not modify" in header_string.lower( ) and "Broad" in header_string if is_merged_pedigree_sample_manifest: if not user_is_pm(user): raise ValueError('Unsupported file format') # the merged pedigree/sample manifest has 3 header rows, so use the known header and skip the next 2 rows. headers = rows[:2] rows = rows[2:] # validate manifest_header_row1 expected_header_columns = MergedPedigreeSampleManifestConstants.MERGED_PEDIGREE_SAMPLE_MANIFEST_COLUMN_NAMES expected_header_1_columns = expected_header_columns[:4] + [ "Alias", "Alias" ] + expected_header_columns[6:] expected = expected_header_1_columns actual = headers[0] if expected == actual: expected = expected_header_columns[4:6] actual = headers[1][4:6] unexpected_header_columns = '|'.join( difflib.unified_diff(expected, actual)).split('\n')[3:] if unexpected_header_columns: raise ValueError( "Expected vs. actual header columns: {}".format( "\t".join(unexpected_header_columns))) header = expected_header_columns else: if _is_header_row(header_string): header_row = parsed_file[0] else: header_row = next( (row for row in parsed_file[1:] if row[0].startswith('#') and _is_header_row(','.join(row))), [ 'family_id', 'individual_id', 'paternal_id', 'maternal_id', 'sex', 'affected' ]) header = [(field or '').strip('#') for field in header_row] for i, row in enumerate(rows): if len(row) != len(header): raise ValueError( "Row {} contains {} columns: {}, while header contains {}: {}" .format(i + 1, len(row), ', '.join(row), len(header), ', '.join(header))) rows = [dict(zip(header, row)) for row in rows] except Exception as e: errors.append("Error while parsing file: %(filename)s. %(e)s" % locals()) return json_records, errors, warnings # convert to json and validate try: if is_merged_pedigree_sample_manifest: logger.info("Parsing merged pedigree-sample-manifest file") rows, sample_manifest_rows, kit_id = _parse_merged_pedigree_sample_manifest_format( rows) elif is_datstat_upload: logger.info("Parsing datstat export file") rows = _parse_datstat_export_format(rows) else: logger.info("Parsing regular pedigree file") json_records = _convert_fam_file_rows_to_json(rows) except Exception as e: errors.append( "Error while converting %(filename)s rows to json: %(e)s" % locals()) return json_records, errors, warnings errors, warnings = validate_fam_file_records(json_records) if not errors and is_merged_pedigree_sample_manifest: _send_sample_manifest(sample_manifest_rows, kit_id, filename, parsed_file, user, project) return json_records, errors, warnings
from seqr.models import Individual, IgvSample from seqr.utils.file_utils import file_iter, does_file_exist from seqr.views.utils.file_utils import save_uploaded_file from seqr.views.utils.json_to_orm_utils import get_or_create_model_from_json from seqr.views.utils.json_utils import create_json_response from seqr.views.utils.orm_to_json_utils import get_json_for_sample from seqr.views.utils.permissions_utils import get_project_and_check_permissions, check_project_permissions, \ user_is_data_manager, user_is_pm from settings import API_LOGIN_REQUIRED_URL import logging logger = logging.getLogger(__name__) pm_or_data_manager_required = user_passes_test( lambda user: user_is_data_manager(user) or user_is_pm(user), login_url=API_LOGIN_REQUIRED_URL) @pm_or_data_manager_required def receive_igv_table_handler(request, project_guid): project = get_project_and_check_permissions(project_guid, request.user, can_edit=True) info = [] def _process_alignment_records(rows, **kwargs): invalid_row = next((row for row in rows if not 2 <= len(row) <= 3), None) if invalid_row: raise ValueError("Must contain 2 or 3 columns: " +