Beispiel #1
0
    def test_export_table(self):
        header = ['column1', 'column2']
        rows = [['row1_v1', 'row1_v2'], ['row2_v1', 'row2_v2']]

        # test tsv format
        response = export_table('test_file', header, rows, file_format='tsv')

        self.assertEqual(response.status_code, 200)
        self.assertEqual(response.get('content-disposition'),
                         'attachment; filename="test_file.tsv"')
        self.assertEqual(
            response.content,
            '\n'.join(['\t'.join(row) for row in [header] + rows]) + '\n')

        # test Excel format
        response = export_table('test_file', header, rows, file_format='xls')
        self.assertEqual(response.status_code, 200)
        self.assertEqual(response.get('content-disposition'),
                         'attachment; filename="test_file.xlsx"')
        wb = load_workbook(StringIO(response.content))
        worksheet = wb.active

        self.assertListEqual([cell.value for cell in worksheet['A']],
                             ['Column1', 'row1_v1', 'row2_v1'])
        self.assertListEqual([cell.value for cell in worksheet['B']],
                             ['Column2', 'row1_v2', 'row2_v2'])
        self.assertEqual([cell.value for cell in worksheet['C']],
                         [None, None, None])

        # test unknown format
        self.assertRaisesRegexp(
            ValueError, '.*format.*', lambda: export_table(
                'test_file', header, rows, file_format='unknown_format'))
Beispiel #2
0
def export_projects_table_handler(request):
    file_format = request.GET.get('file_format', 'tsv')

    projects_by_guid = _get_projects_json(request.user)
    project_categories_by_guid = _retrieve_project_categories_by_guid(
        projects_by_guid.keys())

    header = [
        'Project',
        'Description',
        'Categories',
        'Created Date',
        'Families',
        'Individuals',
        'Tagged Variants',
        'WES Samples',
        'WGS Samples',
        'RNA Samples',
    ]

    header.extend(
        [label for key, label in Family.ANALYSIS_STATUS_CHOICES if key != 'S'])

    rows = []
    for project in sorted(projects_by_guid.values(),
                          key=lambda project: project.get('name') or project.
                          get('deprecatedProjectId')):
        project_categories = ', '.join([
            project_categories_by_guid[category_guid]['name']
            for category_guid in project.get('projectCategoryGuids')
        ])

        row = [
            project.get('name') or project.get('deprecatedProjectId'),
            project.get('description'),
            project_categories,
            project.get('createdDate'),
            project.get('numFamilies'),
            project.get('numIndividuals'),
            project.get('numVariantTags'),
            project.get('sampleTypeCounts', {}).get(Sample.SAMPLE_TYPE_WES, 0),
            project.get('sampleTypeCounts', {}).get(Sample.SAMPLE_TYPE_WGS, 0),
            project.get('sampleTypeCounts', {}).get(Sample.SAMPLE_TYPE_RNA, 0),
        ]

        row.extend([
            project.get('analysisStatusCounts', {}).get(key, 0)
            for key, _ in Family.ANALYSIS_STATUS_CHOICES if key != 'S'
        ])

        rows.append(row)

    try:
        response = export_table('projects', header, rows, file_format)
    except ValueError as e:
        response = create_json_response({'error': str(e)}, status=400)
    return response
Beispiel #3
0
def export_variants_handler(request, search_hash):
    results_model = VariantSearchResults.objects.get(search_hash=search_hash)

    _check_results_permission(results_model, request.user)

    families = results_model.families.all()
    family_ids_by_guid = {family.guid: family.family_id for family in families}

    variants, _ = get_es_variants(results_model, page=1, load_all=True)
    variants = _flatten_variants(variants)

    json, variants_to_saved_variants = _get_saved_variants(variants, families)

    max_families_per_variant = max([len(variant['familyGuids']) for variant in variants])
    max_samples_per_variant = max([len(variant['genotypes']) for variant in variants])

    rows = []
    for variant in variants:
        row = [_get_field_value(variant, config) for config in VARIANT_EXPORT_DATA]
        for i in range(max_families_per_variant):
            family_guid = variant['familyGuids'][i] if i < len(variant['familyGuids']) else ''
            variant_guid = variants_to_saved_variants.get(variant['variantId'], {}).get(family_guid, '')
            family_tags = {
                'family_id': family_ids_by_guid.get(family_guid),
                'tags': [tag for tag in json['variantTagsByGuid'].values() if variant_guid in tag['variantGuids']],
                'notes': [note for note in json['variantNotesByGuid'].values() if variant_guid in note['variantGuids']],
            }
            row += [_get_field_value(family_tags, config) for config in VARIANT_FAMILY_EXPORT_DATA]
        genotypes = variant['genotypes'].values()
        for i in range(max_samples_per_variant):
            if i < len(genotypes):
                row.append('{sampleId}:{numAlt}:{gq}:{ab}'.format(**genotypes[i]))
            else:
                row.append('')
        rows.append(row)

    header = [config['header'] for config in VARIANT_EXPORT_DATA]
    for i in range(max_families_per_variant):
        header += ['{}_{}'.format(config['header'], i+1) for config in VARIANT_FAMILY_EXPORT_DATA]
    header += ['sample_{}:num_alt_alleles:gq:ab'.format(i+1) for i in range(max_samples_per_variant)]

    file_format = request.GET.get('file_format', 'tsv')

    return export_table('search_results_{}'.format(search_hash), header, rows, file_format, titlecase_header=False)
    def test_export_table(self):
        header = ['column1', 'column2']
        rows = [['row1_v1\xe2', 'row1_v2'], ['row2_v1', 'row2_v2']]

        # test tsv format
        response = export_table('test_file', header, rows, file_format='tsv')

        self.assertEqual(response.status_code, 200)
        self.assertEqual(response.get('content-disposition'),
                         'attachment; filename="test_file.tsv"')
        self.assertEqual(
            response.content,
            ('\n'.join(['\t'.join(row)
                        for row in [header] + rows]) + '\n').encode('utf-8'))

        # test Excel format
        response = export_table('test_file', header, rows, file_format='xls')
        self.assertEqual(response.status_code, 200)
        self.assertEqual(response.get('content-disposition'),
                         'attachment; filename="test_file.xlsx"')
        wb = load_workbook(BytesIO(response.content))
        worksheet = wb.active

        self.assertListEqual([cell.value for cell in worksheet['A']],
                             ['Column1', 'row1_v1\xe2', 'row2_v1'])
        self.assertListEqual([cell.value for cell in worksheet['B']],
                             ['Column2', 'row1_v2', 'row2_v2'])
        self.assertEqual([cell.value for cell in worksheet['C']],
                         [None, None, None])

        # test invalid input
        with self.assertRaises(ValueError) as cm:
            export_table('test_file',
                         header,
                         rows,
                         file_format='unknown_format')
        self.assertEqual(str(cm.exception),
                         'Invalid file_format: unknown_format')

        with self.assertRaises(ValueError) as cm:
            export_table('test_file', ['column1'], rows)
        self.assertEqual(
            str(cm.exception),
            'len(header) != len(row): 1 != 2\ncolumn1\nrow1_v1\xe2,row1_v2')