Ejemplos de save_uploaded_file en Python, ejemplos de seqr.views.utils.file_utils.save_uploaded_file en Python

Ejemplo n.º 1

0

Mostrar archivo

def receive_igv_table_handler(request, project_guid):
    project = get_project_and_check_permissions(project_guid,
                                                request.user,
                                                can_edit=True)
    info = []

    def _process_alignment_records(rows, **kwargs):
        invalid_row = next((row for row in rows if len(row) != 2), None)
        if invalid_row:
            raise ValueError("Must contain 2 columns: " +
                             ', '.join(invalid_row))
        return {row[0]: row[1] for row in rows}

    try:
        uploaded_file_id, filename, individual_dataset_mapping = save_uploaded_file(
            request, process_records=_process_alignment_records)

        matched_individuals = Individual.objects.filter(
            family__project=project,
            individual_id__in=individual_dataset_mapping.keys())
        unmatched_individuals = set(individual_dataset_mapping.keys()) - {
            i.individual_id
            for i in matched_individuals
        }
        if len(unmatched_individuals) > 0:
            raise Exception(
                'The following Individual IDs do not exist: {}'.format(
                    ", ".join(unmatched_individuals)))

        info.append('Parsed {} rows from {}'.format(
            len(individual_dataset_mapping), filename))

        existing_samples = IgvSample.objects.select_related(
            'individual').filter(individual__in=matched_individuals)
        unchanged_individual_ids = {
            s.individual.individual_id
            for s in existing_samples if individual_dataset_mapping[
                s.individual.individual_id] == s.file_path
        }
        if unchanged_individual_ids:
            info.append('No change detected for {} individuals'.format(
                len(unchanged_individual_ids)))

        updates_by_individual_guid = {
            i.guid: individual_dataset_mapping[i.individual_id]
            for i in matched_individuals
            if i.individual_id not in unchanged_individual_ids
        }

    except Exception as e:
        traceback.print_exc()
        return create_json_response({'errors': [str(e)]}, status=400)

    response = {
        'updatesByIndividualGuid': updates_by_individual_guid,
        'uploadedFileId': uploaded_file_id,
        'errors': [],
        'info': info,
    }
    return create_json_response(response)

Ejemplo n.º 2

0

Mostrar archivo

Archivo: individual_api.py Proyecto: rsolano1-uw/seqr

def receive_hpo_table_handler(request, project_guid):
    """Handler for bulk update of hpo terms. This handler parses the records, but doesn't save them in the database.
    Instead, it saves them to a temporary file and sends a 'uploadedFileId' representing this file back to the client.

    Args:
        request (object): Django request object
        project_guid (string): project GUID
    """

    project = get_project_and_check_permissions(project_guid, request.user)

    def process_records(json_records, filename=''):
        records, errors, warnings = _process_hpo_records(json_records, filename, project)
        if errors:
            raise ErrorsWarningsException(errors, warnings)
        return records, warnings

    try:
        uploaded_file_id, _, (json_records, warnings) = save_uploaded_file(request, process_records=process_records)
    except ErrorsWarningsException as e:
        return create_json_response({'errors': e.errors, 'warnings': e.warnings}, status=400, reason=e.errors)
    except Exception as e:
        return create_json_response({'errors': [e.message or str(e)], 'warnings': []}, status=400, reason=e.message or str(e))

    response = {
        'uploadedFileId': uploaded_file_id,
        'errors': [],
        'warnings': warnings,
        'info': ['{} individuals will be updated'.format(len(json_records))],
    }
    return create_json_response(response)

Ejemplo n.º 3

0

Mostrar archivo

def receive_individuals_table_handler(request, project_guid):
    """Handler for the initial upload of an Excel or .tsv table of individuals. This handler
    parses the records, but doesn't save them in the database. Instead, it saves them to
    a temporary file and sends a 'uploadedFileId' representing this file back to the client. If/when the
    client then wants to 'apply' this table, it can send the uploadedFileId to the
    save_individuals_table(..) handler to actually save the data in the database.

    Args:
        request (object): Django request object
        project_guid (string): project GUID
    """

    project = get_project_and_check_permissions(project_guid, request.user)

    def parse_file(filename, stream):
        pedigree_records, errors, warnings = parse_pedigree_table(filename, stream, user=request.user, project=project)
        if errors:
            raise ErrorsWarningsException(errors, warnings)
        return pedigree_records

    try:
        uploaded_file_id, filename, json_records = save_uploaded_file(request, parse_file)
    except ErrorsWarningsException as e:
        return create_json_response({'errors': e.errors, 'warnings': e.warnings}, status=400, reason=e.errors)
    except Exception as e:
        return create_json_response({'errors': [e.message or str(e)], 'warnings': []}, status=400, reason=e.message or str(e))

    # send back some stats
    num_families = len(set(r['familyId'] for r in json_records))
    num_individuals = len(set(r['individualId'] for r in json_records))
    num_families_to_create = len([
        family_id for family_id in set(r['familyId'] for r in json_records)
        if not Family.objects.filter(family_id=family_id, project=project)])

    num_individuals_to_create = len(set(
        r['individualId'] for r in json_records
        if not Individual.objects.filter(
            individual_id=r['individualId'],
            family__family_id=r['familyId'],
            family__project=project)))

    info = [
        "{num_families} families, {num_individuals} individuals parsed from {filename}".format(
            num_families=num_families, num_individuals=num_individuals, filename=filename
        ),
        "%d new families, %d new individuals will be added to the project" % (num_families_to_create, num_individuals_to_create),
        "%d existing individuals will be updated" % (num_individuals - num_individuals_to_create),
    ]

    response = {
        'uploadedFileId': uploaded_file_id,
        'errors': [],
        'warnings': [],
        'info': info,
    }
    logger.info(response)
    return create_json_response(response)

Ejemplo n.º 4

0

Mostrar archivo

def receive_hpo_table_handler(request, project_guid):
    """Handler for bulk update of hpo terms. This handler parses the records, but doesn't save them in the database.
    Instead, it saves them to a temporary file and sends a 'uploadedFileId' representing this file back to the client.

    Args:
        request (object): Django request object
        project_guid (string): project GUID
    """

    project = get_project_and_check_permissions(project_guid, request.user)

    try:
        uploaded_file_id, _, json_records = save_uploaded_file(
            request, process_records=_process_hpo_records)
    except Exception as e:
        return create_json_response(
            {
                'errors': [e.message or str(e)],
                'warnings': []
            },
            status=400,
            reason=e.message or str(e))

    updates_by_individual_guid = {}
    missing_individuals = []
    unchanged_individuals = []
    all_hpo_terms = set()
    for record in json_records:
        family_id = record.get(FAMILY_ID_COLUMN, None)
        individual_id = record.get(INDIVIDUAL_ID_COLUMN)
        individual_q = Individual.objects.filter(
            individual_id__in=[
                individual_id, '{}_{}'.format(family_id, individual_id)
            ],
            family__project=project,
        )
        if family_id:
            individual_q = individual_q.filter(family__family_id=family_id)
        individual = individual_q.first()
        if individual:
            features = record.get(FEATURES_COLUMN) or []
            if individual.phenotips_data and features and \
                    _feature_set(features) == _feature_set(json.loads(individual.phenotips_data).get('features', [])):
                unchanged_individuals.append(individual_id)
            else:
                all_hpo_terms.update([feature['id'] for feature in features])
                updates_by_individual_guid[individual.guid] = features
        else:
            missing_individuals.append(individual_id)

    if not updates_by_individual_guid:
        return create_json_response(
            {
                'errors': [
                    'Unable to find individuals to update for any of the {total} parsed individuals.{missing}{unchanged}'
                    .format(
                        total=len(missing_individuals) +
                        len(unchanged_individuals),
                        missing=' No matching ids found for {} individuals'.
                        format(len(missing_individuals))
                        if missing_individuals else '',
                        unchanged=' No changes detected for {} individuals'.
                        format(len(unchanged_individuals))
                        if unchanged_individuals else '',
                    )
                ],
                'warnings': []
            },
            status=400,
            reason='Unable to find any matching individuals')

    info = [
        '{} individuals will be updated'.format(
            len(updates_by_individual_guid))
    ]
    warnings = []
    if missing_individuals:
        warnings.append(
            'Unable to find matching ids for {} individuals. The following entries will not be updated: {}'
            .format(len(missing_individuals), ', '.join(missing_individuals)))
    if unchanged_individuals:
        warnings.append(
            'No changes detected for {} individuals. The following entries will not be updated: {}'
            .format(len(unchanged_individuals),
                    ', '.join(unchanged_individuals)))

    hpo_terms = {
        hpo.hpo_id: hpo
        for hpo in HumanPhenotypeOntology.objects.filter(
            hpo_id__in=all_hpo_terms)
    }
    invalid_hpo_terms = set()
    for features in updates_by_individual_guid.values():
        for feature in features:
            hpo_data = hpo_terms.get(feature['id'])
            if hpo_data:
                feature['category'] = hpo_data.category_id
                feature['label'] = hpo_data.name
            else:
                invalid_hpo_terms.add(feature['id'])
    if invalid_hpo_terms:
        warnings.append(
            "The following HPO terms were not found in seqr's HPO data, and while they will be added they may be incorrect: {}"
            .format(', '.join(invalid_hpo_terms)))

    response = {
        'updatesByIndividualGuid': updates_by_individual_guid,
        'uploadedFileId': uploaded_file_id,
        'errors': [],
        'warnings': warnings,
        'info': info,
    }
    return create_json_response(response)

Ejemplo n.º 5

0

Mostrar archivo

def receive_igv_table_handler(request, project_guid):
    project = get_project_and_check_permissions(project_guid,
                                                request.user,
                                                can_edit=True)
    info = []

    def _process_alignment_records(rows, **kwargs):
        invalid_row = next((row for row in rows if not 2 <= len(row) <= 3),
                           None)
        if invalid_row:
            raise ValueError("Must contain 2 or 3 columns: " +
                             ', '.join(invalid_row))
        parsed_records = defaultdict(list)
        for row in rows:
            parsed_records[row[0]].append({
                'filePath':
                row[1],
                'sampleId':
                row[2] if len(row) > 2 else None
            })
        return parsed_records

    try:
        uploaded_file_id, filename, individual_dataset_mapping = save_uploaded_file(
            request, process_records=_process_alignment_records)

        matched_individuals = Individual.objects.filter(
            family__project=project,
            individual_id__in=individual_dataset_mapping.keys())
        unmatched_individuals = set(individual_dataset_mapping.keys()) - {
            i.individual_id
            for i in matched_individuals
        }
        if len(unmatched_individuals) > 0:
            raise Exception(
                'The following Individual IDs do not exist: {}'.format(
                    ", ".join(unmatched_individuals)))

        info.append('Parsed {} rows in {} individuals from {}'.format(
            sum([len(rows) for rows in individual_dataset_mapping.values()]),
            len(individual_dataset_mapping), filename))

        existing_sample_files = defaultdict(set)
        for sample in IgvSample.objects.select_related('individual').filter(
                individual__in=matched_individuals):
            existing_sample_files[sample.individual.individual_id].add(
                sample.file_path)

        unchanged_rows = set()
        for individual_id, updates in individual_dataset_mapping.items():
            unchanged_rows.update([
                (individual_id, update['filePath']) for update in updates
                if update['filePath'] in existing_sample_files[individual_id]
            ])

        if unchanged_rows:
            info.append('No change detected for {} rows'.format(
                len(unchanged_rows)))

        all_updates = []
        for i in matched_individuals:
            all_updates += [
                dict(individualGuid=i.guid, **update)
                for update in individual_dataset_mapping[i.individual_id]
                if (i.individual_id, update['filePath']) not in unchanged_rows
            ]

    except Exception as e:
        return create_json_response({'errors': [str(e)]}, status=400)

    response = {
        'updates': all_updates,
        'uploadedFileId': uploaded_file_id,
        'errors': [],
        'info': info,
    }
    return create_json_response(response)

Ejemplo n.º 6

0

Mostrar archivo

Archivo: individual_api.py Proyecto: xirasasa/seqr

def receive_individuals_table_handler(request, project_guid):
    """Handler for the initial upload of an Excel or .tsv table of individuals. This handler
    parses the records, but doesn't save them in the database. Instead, it saves them to
    a temporary file and sends a 'uploadedFileId' representing this file back to the client. If/when the
    client then wants to 'apply' this table, it can send the uploadedFileId to the
    save_individuals_table(..) handler to actually save the data in the database.

    Args:
        request (object): Django request object
        project_guid (string): project GUID
    """

    project = get_project_and_check_permissions(project_guid, request.user)

    def process_records(json_records, filename='ped_file'):
        pedigree_records, errors, warnings = parse_pedigree_table(json_records, filename, user=request.user, project=project)
        if errors:
            raise ErrorsWarningsException(errors, warnings)
        return pedigree_records

    try:
        uploaded_file_id, filename, json_records = save_uploaded_file(request, process_records=process_records)
    except ErrorsWarningsException as e:
        return create_json_response({'errors': e.errors, 'warnings': e.warnings}, status=400, reason=e.errors)
    except Exception as e:
        return create_json_response({'errors': [e.message or str(e)], 'warnings': []}, status=400, reason=e.message or str(e))

    # send back some stats
    individual_ids_by_family = defaultdict(list)
    for r in json_records:
        if r.get(JsonConstants.PREVIOUS_INDIVIDUAL_ID_COLUMN):
            individual_ids_by_family[r[JsonConstants.FAMILY_ID_COLUMN]].append(
                (r[JsonConstants.PREVIOUS_INDIVIDUAL_ID_COLUMN], True)
            )
        else:
            individual_ids_by_family[r[JsonConstants.FAMILY_ID_COLUMN]].append(
                (r[JsonConstants.INDIVIDUAL_ID_COLUMN], False)
            )

    num_individuals = sum([len(indiv_ids) for indiv_ids in individual_ids_by_family.values()])
    num_existing_individuals = 0
    missing_prev_ids = []
    for family_id, indiv_ids in individual_ids_by_family.items():
        existing_individuals = {i.individual_id for i in Individual.objects.filter(
            individual_id__in=[indiv_id for (indiv_id, _) in indiv_ids], family__family_id=family_id, family__project=project
        ).only('individual_id')}
        num_existing_individuals += len(existing_individuals)
        missing_prev_ids += [indiv_id for (indiv_id, is_previous) in indiv_ids if is_previous and indiv_id not in existing_individuals]
    num_individuals_to_create = num_individuals - num_existing_individuals
    if missing_prev_ids:
        return create_json_response(
            {'errors': [
                'Could not find individuals with the following previous IDs: {}'.format(', '.join(missing_prev_ids))
            ], 'warnings': []},
            status=400, reason='Invalid input')

    family_ids = set(r[JsonConstants.FAMILY_ID_COLUMN] for r in json_records)
    num_families = len(family_ids)
    num_existing_families = Family.objects.filter(family_id__in=family_ids, project=project).count()
    num_families_to_create = num_families - num_existing_families

    info = [
        "{num_families} families, {num_individuals} individuals parsed from {filename}".format(
            num_families=num_families, num_individuals=num_individuals, filename=filename
        ),
        "{} new families, {} new individuals will be added to the project".format(num_families_to_create, num_individuals_to_create),
        "{} existing individuals will be updated".format(num_existing_individuals),
    ]

    response = {
        'uploadedFileId': uploaded_file_id,
        'errors': [],
        'warnings': [],
        'info': info,
    }
    logger.info(response)
    return create_json_response(response)

Ejemplo n.º 7

0

Mostrar archivo

Archivo: dataset_api.py Proyecto: zhonghua-wang/seqr

def receive_alignment_table_handler(request, project_guid):
    """Create or update samples for the given dataset

    Args:
        request: Django request object
        project_guid (string): GUID of the project that should be updated

    HTTP POST
        Request body - should contain the following json structure:
        {
            'sampleType':  <"WGS", "WES", or "RNA"> (required)
            'datasetType': <"VARIANTS", or "ALIGN"> (required)
            'elasticsearchIndex': <String>
            'datasetPath': <String>
            'datasetName': <String>
            'ignoreExtraSamplesInCallset': <Boolean>
            'mappingFile': { 'uploadedFileId': <Id for temporary uploaded file> }
        }

        Response body - will contain the following structure:

    """
    project = get_project_and_check_permissions(project_guid, request.user, permission_level=CAN_EDIT)
    info = []

    def _process_alignment_records(rows, **kwargs):
        invalid_row = next((row for row in rows if len(row) != 2), None)
        if invalid_row:
            raise ValueError("Must contain 2 columns: " + ', '.join(invalid_row))
        return {row[0]: row[1] for row in rows}

    try:
        uploaded_file_id, filename, individual_dataset_mapping = save_uploaded_file(request, process_records=_process_alignment_records)

        matched_individuals = Individual.objects.filter(family__project=project, individual_id__in=individual_dataset_mapping.keys())
        unmatched_individuals = set(individual_dataset_mapping.keys()) - {i.individual_id for i in matched_individuals}
        if len(unmatched_individuals) > 0:
            raise Exception('The following Individual IDs do not exist: {}'.format(", ".join(unmatched_individuals)))

        info.append('Parsed {} rows from {}'.format(len(individual_dataset_mapping), filename))

        existing_samples = Sample.objects.select_related('individual').filter(
            individual__in=matched_individuals,
            dataset_type=Sample.DATASET_TYPE_READ_ALIGNMENTS,
            is_active=True
        )
        unchanged_individual_ids = {s.individual.individual_id for s in existing_samples
                                    if individual_dataset_mapping[s.individual.individual_id] == s.dataset_file_path}
        if unchanged_individual_ids:
            info.append('No change detected for {} individuals'.format(len(unchanged_individual_ids)))

        updates_by_individual_guid = {i.guid: individual_dataset_mapping[i.individual_id] for i in matched_individuals
                                      if i.individual_id not in unchanged_individual_ids}

    except Exception as e:
        traceback.print_exc()
        return create_json_response({'errors': [e.message or str(e)]}, status=400)

    response = {
        'updatesByIndividualGuid': updates_by_individual_guid,
        'uploadedFileId': uploaded_file_id,
        'errors': [],
        'info': info,
    }
    return create_json_response(response)

Ejemplo n.º 8

0

Mostrar archivo

Archivo: family_api.py Proyecto: samarthaFFM/seqr

def receive_families_table_handler(request, project_guid):
    """Handler for the initial upload of an Excel or .tsv table of families. This handler
    parses the records, but doesn't save them in the database. Instead, it saves them to
    a temporary file and sends a 'uploadedFileId' representing this file back to the client.

    Args:
        request (object): Django request object
        project_guid (string): project GUID
    """

    project = get_project_and_check_permissions(project_guid, request.user)

    def _process_records(records, filename=''):
        column_map = {}
        for i, field in enumerate(records[0]):
            key = field.lower()
            if 'family' in key:
                if 'prev' in key:
                    column_map[PREVIOUS_FAMILY_ID_FIELD] = i
                else:
                    column_map[FAMILY_ID_FIELD] = i
            elif 'display' in key:
                column_map['displayName'] = i
            elif 'description' in key:
                column_map['description'] = i
            elif 'phenotype' in key:
                column_map['codedPhenotype'] = i
        if FAMILY_ID_FIELD not in column_map:
            raise ValueError('Invalid header, missing family id column')

        return [{
            column: row[index] if isinstance(index, int) else next(
                (row[i] for i in index if row[i]), None)
            for column, index in column_map.items()
        } for row in records[1:]]

    try:
        uploaded_file_id, filename, json_records = save_uploaded_file(
            request, process_records=_process_records)
    except Exception as e:
        return create_json_response({
            'errors': [str(e)],
            'warnings': []
        },
                                    status=400,
                                    reason=str(e))

    prev_fam_ids = {
        r[PREVIOUS_FAMILY_ID_FIELD]
        for r in json_records if r.get(PREVIOUS_FAMILY_ID_FIELD)
    }
    existing_prev_fam_ids = {
        f.family_id
        for f in Family.objects.filter(family_id__in=prev_fam_ids,
                                       project=project).only('family_id')
    }
    if len(prev_fam_ids) != len(existing_prev_fam_ids):
        missing_prev_ids = [
            family_id for family_id in prev_fam_ids
            if family_id not in existing_prev_fam_ids
        ]
        return create_json_response(
            {
                'errors': [
                    'Could not find families with the following previous IDs: {}'
                    .format(', '.join(missing_prev_ids))
                ],
                'warnings': []
            },
            status=400,
            reason='Invalid input')

    fam_ids = {
        r[FAMILY_ID_FIELD]
        for r in json_records if not r.get(PREVIOUS_FAMILY_ID_FIELD)
    }
    num_families_to_update = len(prev_fam_ids) + Family.objects.filter(
        family_id__in=fam_ids, project=project).count()

    num_families = len(json_records)
    num_families_to_create = num_families - num_families_to_update

    info = [
        "{num_families} families parsed from {filename}".format(
            num_families=num_families, filename=filename),
        "{} new families will be added, {} existing families will be updated".
        format(num_families_to_create, num_families_to_update),
    ]

    return create_json_response({
        'uploadedFileId': uploaded_file_id,
        'errors': [],
        'warnings': [],
        'info': info,
    })

Ejemplo n.º 9

0

Mostrar archivo

Archivo: phenotips_api.py Proyecto: macarthur-lab/seqr

def receive_hpo_table_handler(request, project_guid):
    """Handler for bulk update of hpo terms. This handler parses the records, but doesn't save them in the database.
    Instead, it saves them to a temporary file and sends a 'uploadedFileId' representing this file back to the client.

    Args:
        request (object): Django request object
        project_guid (string): project GUID
    """

    project = get_project_and_check_permissions(project_guid, request.user)

    try:
        uploaded_file_id, _, json_records = save_uploaded_file(request, process_records=_process_hpo_records)
    except Exception as e:
        return create_json_response({'errors': [e.message or str(e)], 'warnings': []}, status=400, reason=e.message or str(e))

    updates_by_individual_guid = {}
    missing_individuals = []
    unchanged_individuals = []
    all_hpo_terms = set()
    for record in json_records:
        family_id = record.get(FAMILY_ID_COLUMN, None)
        individual_id = record.get(INDIVIDUAL_ID_COLUMN)
        individual_q = Individual.objects.filter(
            individual_id__in=[individual_id, '{}_{}'.format(family_id, individual_id)],
            family__project=project,
        )
        if family_id:
            individual_q = individual_q.filter(family__family_id=family_id)
        individual = individual_q.first()
        if individual:
            features = record.get(FEATURES_COLUMN) or []
            if individual.phenotips_data and features and \
                    _feature_set(features) == _feature_set(json.loads(individual.phenotips_data).get('features', [])):
                unchanged_individuals.append(individual_id)
            else:
                all_hpo_terms.update([feature['id'] for feature in features])
                updates_by_individual_guid[individual.guid] = features
        else:
            missing_individuals.append(individual_id)

    if not updates_by_individual_guid:
        return create_json_response({
            'errors': ['Unable to find individuals to update for any of the {total} parsed individuals.{missing}{unchanged}'.format(
                total=len(missing_individuals) + len(unchanged_individuals),
                missing=' No matching ids found for {} individuals'.format(len(missing_individuals)) if missing_individuals else '',
                unchanged=' No changes detected for {} individuals'.format(len(unchanged_individuals)) if unchanged_individuals else '',
            )],
            'warnings': []
        }, status=400, reason='Unable to find any matching individuals')

    hpo_terms = {hpo.hpo_id: hpo for hpo in HumanPhenotypeOntology.objects.filter(hpo_id__in=all_hpo_terms)}
    invalid_hpo_terms = set()
    for features in updates_by_individual_guid.values():
        for feature in features:
            hpo_data = hpo_terms.get(feature['id'])
            if hpo_data:
                feature['category'] = hpo_data.category_id
                feature['label'] = hpo_data.name
            else:
                invalid_hpo_terms.add(feature['id'])
    if invalid_hpo_terms:
        return create_json_response({
            'errors': [
                "The following HPO terms were not found in seqr's HPO data: {}".format(', '.join(invalid_hpo_terms))
            ],
            'warnings': []
        }, status=400, reason='Invalid HPO terms')

    info = ['{} individuals will be updated'.format(len(updates_by_individual_guid))]
    warnings = []
    if missing_individuals:
        warnings.append(
            'Unable to find matching ids for {} individuals. The following entries will not be updated: {}'.format(
                len(missing_individuals), ', '.join(missing_individuals)
            ))
    if unchanged_individuals:
        warnings.append(
            'No changes detected for {} individuals. The following entries will not be updated: {}'.format(
                len(unchanged_individuals), ', '.join(unchanged_individuals)
            ))

    response = {
        'updatesByIndividualGuid': updates_by_individual_guid,
        'uploadedFileId': uploaded_file_id,
        'errors': [],
        'warnings': warnings,
        'info': info,
    }
    return create_json_response(response)

Ejemplo n.º 10

0

Mostrar archivo

Archivo: family_api.py Proyecto: macarthur-lab/seqr

def receive_families_table_handler(request, project_guid):
    """Handler for the initial upload of an Excel or .tsv table of families. This handler
    parses the records, but doesn't save them in the database. Instead, it saves them to
    a temporary file and sends a 'uploadedFileId' representing this file back to the client.

    Args:
        request (object): Django request object
        project_guid (string): project GUID
    """

    project = get_project_and_check_permissions(project_guid, request.user)

    def _process_records(records, filename=''):
        column_map = {}
        for i, field in enumerate(records[0]):
            key = field.lower()
            if 'family' in key:
                if 'prev' in key:
                    column_map[PREVIOUS_FAMILY_ID_FIELD] = i
                else:
                    column_map[FAMILY_ID_FIELD] = i
            elif 'display' in key:
                column_map['displayName'] = i
            elif 'description' in key:
                column_map['description'] = i
            elif 'phenotype' in key:
                column_map['codedPhenotype'] = i
        if FAMILY_ID_FIELD not in column_map:
            raise ValueError('Invalid header, missing family id column')

        return [{column: row[index] if isinstance(index, int) else next((row[i] for i in index if row[i]), None)
                for column, index in column_map.items()} for row in records[1:]]

    try:
        uploaded_file_id, filename, json_records = save_uploaded_file(request, process_records=_process_records)
    except Exception as e:
        return create_json_response({'errors': [e.message or str(e)], 'warnings': []}, status=400, reason=e.message or str(e))

    prev_fam_ids = {r[PREVIOUS_FAMILY_ID_FIELD] for r in json_records if r.get(PREVIOUS_FAMILY_ID_FIELD)}
    existing_prev_fam_ids = {f.family_id for f in Family.objects.filter(family_id__in=prev_fam_ids, project=project).only('family_id')}
    if len(prev_fam_ids) != len(existing_prev_fam_ids):
        missing_prev_ids = [family_id for family_id in prev_fam_ids if family_id not in existing_prev_fam_ids]
        return create_json_response(
            {'errors': [
                'Could not find families with the following previous IDs: {}'.format(', '.join(missing_prev_ids))
            ], 'warnings': []},
            status=400, reason='Invalid input')

    fam_ids = {r[FAMILY_ID_FIELD] for r in json_records if not r.get(PREVIOUS_FAMILY_ID_FIELD)}
    num_families_to_update = len(prev_fam_ids) + Family.objects.filter(family_id__in=fam_ids, project=project).count()

    num_families = len(json_records)
    num_families_to_create = num_families - num_families_to_update

    info = [
        "{num_families} families parsed from {filename}".format(num_families=num_families, filename=filename),
        "{} new families will be added, {} existing families will be updated".format(num_families_to_create, num_families_to_update),
    ]

    return create_json_response({
        'uploadedFileId': uploaded_file_id,
        'errors': [],
        'warnings': [],
        'info': info,
    })