Exemple #1
0
    def test_temp_file_upload(self):
        url = reverse(save_temp_file)
        self.check_require_login(url)

        response = self.client.post(url, {
            'f': SimpleUploadedFile("test_data.tsv", TSV_DATA),
            'invalid': SimpleUploadedFile("test_data.foo", TSV_DATA),
        })
        self.assertEqual(response.status_code, 400)
        self.assertDictEqual(response.json(), {'errors': ['Received 2 files instead of 1']})

        response = self.client.post(url, {'invalid': SimpleUploadedFile("test_data.foo", TSV_DATA)})
        self.assertEqual(response.status_code, 400)
        self.assertDictEqual(response.json(), {'errors': ['Unexpected file type: test_data.foo']})

        response = self.client.post(url, {'f': SimpleUploadedFile("test_data.tsv", TSV_DATA)})
        self.assertEqual(response.status_code, 200)
        response_json = response.json()
        self.assertDictEqual(response_json, {
            'info': ['Parsed 3 rows from test_data.tsv'],
            'uploadedFileId': mock.ANY,
        })

        # Test loading uploaded file
        uploaded_file_id = response_json['uploadedFileId']
        file_content = load_uploaded_file(uploaded_file_id)
        self.assertListEqual(file_content, PARSED_DATA)
        # File should be removed after loading it once
        with self.assertRaises(IOError):
            load_uploaded_file(uploaded_file_id)

        # Test uploading with returned data and test with file formats
        wb = xl.Workbook()
        ws = wb[wb.sheetnames[0]]
        ws['A1'], ws['B1'], ws['C1'] = ['Family ID', 'Individual ID', 'Notes']
        ws['A2'], ws['B2'], ws['C2'] = [1, 'NA19675', 'An affected individual, additional metadata']
        ws['A3'], ws['B3'] = [0, 'NA19678']
        ws['A4'] = ''  # for testing trimming trailing empty rows

        with NamedTemporaryFile() as tmp:
            wb.save(tmp)
            tmp.seek(0)
            xlsx_data = tmp.read()


        for ext, data in TEST_DATA_TYPES.items():
            if ext == 'xls' or ext == 'xlsx':
                data = xlsx_data
            response = self.client.post(
                '{}?parsedData=true'.format(url), {'f': SimpleUploadedFile("test_data.{}".format(ext), data)})
            self.assertEqual(response.status_code, 200)
            self.assertDictEqual(response.json(), {
                'parsedData': PARSED_DATA,
                'uploadedFileId': mock.ANY,
            })
Exemple #2
0
    def test_temp_file_upload(self):
        url = reverse(save_temp_file)
        self.check_require_login(url)

        response = self.client.post(
            url, {
                'f': SimpleUploadedFile("test_data.tsv", TSV_DATA),
                'invalid': SimpleUploadedFile("test_data.foo", TSV_DATA),
            })
        self.assertEqual(response.status_code, 400)
        self.assertDictEqual(response.json(),
                             {'errors': ['Received 2 files instead of 1']})

        response = self.client.post(
            url, {'invalid': SimpleUploadedFile("test_data.foo", TSV_DATA)})
        self.assertEqual(response.status_code, 400)
        self.assertDictEqual(
            response.json(),
            {'errors': ['Unexpected file type: test_data.foo']})

        response = self.client.post(
            url, {'f': SimpleUploadedFile("test_data.tsv", TSV_DATA)})
        self.assertEqual(response.status_code, 200)
        response_json = response.json()
        self.assertDictEqual(
            response_json, {
                'info': ['Parsed 3 rows from test_data.tsv'],
                'uploadedFileId': mock.ANY,
            })

        # Test loading uploaded file
        uploaded_file_id = response_json['uploadedFileId']
        file_content = load_uploaded_file(uploaded_file_id)
        self.assertListEqual(file_content, PARSED_DATA)
        # File should be removed after loading it once
        with self.assertRaises(IOError):
            load_uploaded_file(uploaded_file_id)

        # Test uploading with returned data and test with file formats other than 'xls' and 'xlsx'
        for ext, data in TEST_DATA_TYPES.items():
            if ext == 'xls' or ext == 'xlsx':
                data = self.xlsx_data
            response = self.client.post(
                '{}?parsedData=true'.format(url),
                {'f': SimpleUploadedFile("test_data.{}".format(ext), data)})
            self.assertEqual(response.status_code, 200)
            self.assertDictEqual(response.json(), {
                'parsedData': PARSED_DATA,
                'uploadedFileId': mock.ANY,
            })
Exemple #3
0
def save_individuals_table_handler(request, project_guid, upload_file_id):
    """Handler for 'save' requests to apply Individual tables previously uploaded through receive_individuals_table(..)

    Args:
        request (object): Django request object
        project_guid (string): project GUID
        uploadedFileId (string): a token sent to the client by receive_individuals_table(..)
    """
    project = get_project_and_check_permissions(project_guid, request.user)

    json_records = load_uploaded_file(upload_file_id)

    updated_families, updated_individuals = add_or_update_individuals_and_families(
        project, individual_records=json_records, user=request.user
    )

    # edit individuals
    individuals = _get_json_for_individuals(updated_individuals, request.user, add_sample_guids_field=True)
    individuals_by_guid = {individual['individualGuid']: individual for individual in individuals}
    families = _get_json_for_families(updated_families, request.user, add_individual_guids_field=True)
    families_by_guid = {family['familyGuid']: family for family in families}

    updated_families_and_individuals_by_guid = {
        'individualsByGuid': individuals_by_guid,
        'familiesByGuid': families_by_guid,
    }

    return create_json_response(updated_families_and_individuals_by_guid)
Exemple #4
0
def save_hpo_table_handler(request, project_guid, upload_file_id):
    """
    Handler for 'save' requests to apply HPO terms tables previously uploaded through receive_hpo_table_handler
    """
    project = get_project_and_check_permissions(project_guid, request.user)

    json_records, _ = load_uploaded_file(upload_file_id)

    individual_guids = [record[INDIVIDUAL_GUID_COLUMN] for record in json_records]
    individuals_by_guid = {
        i.guid: i for i in Individual.objects.filter(family__project=project, guid__in=individual_guids)
    }

    for record in json_records:
        individual = individuals_by_guid[record[INDIVIDUAL_GUID_COLUMN]]
        individual.features = [{'id': feature} for feature in record[HPO_TERMS_PRESENT_COLUMN]]
        individual.absent_features = [{'id': feature} for feature in record[HPO_TERMS_ABSENT_COLUMN]]
        individual.save()

    return create_json_response({
        'individualsByGuid': {
            individual['individualGuid']: individual for individual in _get_json_for_individuals(
            individuals_by_guid.values(), user=request.user, add_hpo_details=True,
        )},
    })
Exemple #5
0
def save_individuals_metadata_table_handler(request, project_guid,
                                            upload_file_id):
    """
    Handler for 'save' requests to apply HPO terms tables previously uploaded through receive_individuals_metadata_handler
    """
    project = get_project_and_check_permissions(project_guid, request.user)

    json_records, _ = load_uploaded_file(upload_file_id)

    individual_guids = [record[INDIVIDUAL_GUID_COL] for record in json_records]
    individuals_by_guid = {
        i.guid: i
        for i in Individual.objects.filter(family__project=project,
                                           guid__in=individual_guids)
    }

    for record in json_records:
        individual = individuals_by_guid[record[INDIVIDUAL_GUID_COL]]
        update_model_from_json(individual, {
            k: record[k]
            for k in INDIVIDUAL_METADATA_FIELDS.keys() if k in record
        },
                               user=request.user)

    return create_json_response({
        'individualsByGuid': {
            individual['individualGuid']: individual
            for individual in _get_json_for_individuals(
                list(individuals_by_guid.values()),
                user=request.user,
                add_hpo_details=True,
            )
        },
    })
Exemple #6
0
def _load_mapping_file(mapping_file_id):
    if not mapping_file_id:
        return {}

    id_mapping = {}
    for line in load_uploaded_file(mapping_file_id):
        if len(line) != 2:
            raise ValueError("Must contain 2 columns: " + ', '.join(line))
        id_mapping[line[0]] = line[1]
    return id_mapping
Exemple #7
0
def edit_families_handler(request, project_guid):
    """Edit or one or more Family records.

    Args:
        project_guid (string): GUID of project that contains these individuals.
    """

    project = get_project_and_check_pm_permissions(project_guid, request.user)

    request_json = json.loads(request.body)

    if request_json.get('uploadedFileId'):
        modified_families = load_uploaded_file(
            request_json.get('uploadedFileId'))
    else:
        modified_families = request_json.get('families')
    if modified_families is None:
        return create_json_response({},
                                    status=400,
                                    reason="'families' not specified")

    updated_families = []
    for fields in modified_families:
        if fields.get('familyGuid'):
            family = Family.objects.get(project=project,
                                        guid=fields['familyGuid'])
        elif fields.get(PREVIOUS_FAMILY_ID_FIELD):
            family = Family.objects.get(
                project=project, family_id=fields[PREVIOUS_FAMILY_ID_FIELD])
        else:
            family, _ = get_or_create_model_from_json(
                Family, {
                    'project': project,
                    'family_id': fields[FAMILY_ID_FIELD]
                },
                update_json=None,
                user=request.user)

        update_family_from_json(family,
                                fields,
                                user=request.user,
                                allow_unknown_keys=True)
        updated_families.append(family)

    updated_families_by_guid = {
        'familiesByGuid': {
            family.guid: _get_json_for_family(family,
                                              request.user,
                                              add_individual_guids_field=True)
            for family in updated_families
        }
    }

    return create_json_response(updated_families_by_guid)
Exemple #8
0
def _load_mapping_file(mapping_file_id, mapping_file_path):
    id_mapping = {}
    file_content = []
    if mapping_file_id:
        file_content = load_uploaded_file(mapping_file_id)
    elif mapping_file_path:
        file_content = parse_file(mapping_file_path,
                                  file_iter(mapping_file_path))
    for line in file_content:
        if len(line) != 2:
            raise ValueError("Must contain 2 columns: " + ', '.join(line))
        id_mapping[line[0]] = line[1]
    return id_mapping
Exemple #9
0
def edit_families_handler(request, project_guid):
    """Edit or one or more Family records.

    Args:
        project_guid (string): GUID of project that contains these individuals.
    """

    request_json = json.loads(request.body)

    if request_json.get('uploadedFileId'):
        modified_families = load_uploaded_file(request_json.get('uploadedFileId'))
    else:
        modified_families = request_json.get('families')
    if modified_families is None:
        return create_json_response(
            {}, status=400, reason="'families' not specified")

    project = get_project_and_check_permissions(project_guid, request.user, CAN_EDIT)

    updated_families = []
    for fields in modified_families:
        if fields.get('familyGuid'):
            family = Family.objects.get(project=project, guid=fields['familyGuid'])
        elif fields.get(PREVIOUS_FAMILY_ID_FIELD):
            family = Family.objects.get(project=project, family_id=fields[PREVIOUS_FAMILY_ID_FIELD])
        else:
            family, _ = get_or_create_seqr_model(Family, project=project, family_id=fields[FAMILY_ID_FIELD])

        update_family_from_json(family, fields, user=request.user, allow_unknown_keys=True)
        updated_families.append(family)

    updated_families_by_guid = {
        'familiesByGuid': {
            family.guid: _get_json_for_family(family, request.user, add_individual_guids_field=True) for family in updated_families
        }
    }

    return create_json_response(updated_families_by_guid)
def create_project_from_workspace(request, namespace, name):
    """
    Create a project when a cooperator requests to load data from an AnVIL workspace.

    :param request: Django request object
    :param namespace: The namespace (or the billing account) of the workspace
    :param name: The name of the workspace. It also be used as the project name
    :return the projectsByGuid with the new project json

    """
    # Validate that the current user has logged in through google and has sufficient permissions
    workspace_meta = check_workspace_perm(request.user, CAN_EDIT, namespace, name, can_share=True, meta_fields=['workspace.bucketName'])

    projects = Project.objects.filter(workspace_namespace=namespace, workspace_name=name)
    if projects:
        error = 'Project "{}" for workspace "{}/{}" exists.'.format(projects.first().name, namespace, name)
        return create_json_response({'error': error}, status=400, reason=error)

    # Validate all the user inputs from the post body
    request_json = json.loads(request.body)

    missing_fields = [field for field in ['genomeVersion', 'uploadedFileId', 'dataPath'] if not request_json.get(field)]
    if missing_fields:
        error = 'Field(s) "{}" are required'.format(', '.join(missing_fields))
        return create_json_response({'error': error}, status=400, reason=error)

    if not request_json.get('agreeSeqrAccess'):
        error = 'Must agree to grant seqr access to the data in the associated workspace.'
        return create_json_response({'error': error}, status=400, reason=error)

    # Add the seqr service account to the corresponding AnVIL workspace
    added_account_to_workspace = add_service_account(request.user, namespace, name)
    if added_account_to_workspace:
        _wait_for_service_account_access(request.user,namespace, name)

    # Validate the data path
    bucket_name = workspace_meta['workspace']['bucketName']
    data_path = 'gs://{bucket}/{path}'.format(bucket=bucket_name.rstrip('/'), path=request_json['dataPath'].lstrip('/'))
    if not does_file_exist(data_path):
        error = 'Data file or path {} is not found.'.format(request_json['dataPath'])
        return create_json_response({'error': error}, status=400, reason=error)

    # Parse families/individuals in the uploaded pedigree file
    json_records = load_uploaded_file(request_json['uploadedFileId'])
    pedigree_records, errors, ped_warnings = parse_pedigree_table(json_records, 'uploaded pedigree file', user=request.user)
    errors += ped_warnings
    if errors:
        return create_json_response({'errors': errors}, status=400)

    # Create a new Project in seqr
    project_args = {
        'name': name,
        'genome_version': request_json['genomeVersion'],
        'description': request_json.get('description', ''),
        'workspace_namespace': namespace,
        'workspace_name': name,
    }

    project = create_model_from_json(Project, project_args, user=request.user)

    # add families and individuals according to the uploaded individual records
    _, updated_individuals = add_or_update_individuals_and_families(
        project, individual_records=pedigree_records, user=request.user
    )

    # Send an email to all seqr data managers
    try:
        _send_load_data_email(project, updated_individuals, data_path, request.user)
    except Exception as ee:
        message = 'Exception while sending email to user {}. {}'.format(request.user, str(ee))
        logger.error(message)

    return create_json_response({'projectGuid':  project.guid})
Exemple #11
0
def load_uploaded_mapping_file(mapping_file_id):
    file_content = load_uploaded_file(mapping_file_id)
    return _load_mapping_file(file_content)
Exemple #12
0
def load_uploaded_mapping_file(mapping_file_id):
    file_content = load_uploaded_file(mapping_file_id)
    return _load_mapping_file(file_content)