예제 #1
0
def setup(path):
    interval = [
        hl.eval(
            hl.parse_locus_interval('chr1:START-END',
                                    reference_genome='GRCh38'))
    ]
    return hl.import_vcfs([path], interval, reference_genome='GRCh38')[0]
예제 #2
0
파일: drive_combiner.py 프로젝트: zscu/hail
def h(paths, sample_names, tmp_path, json, header, out_path, i):
    vcfs = [comb.transform_one(vcf)
            for vcf in hl.import_vcfs(paths, json, array_elements_required=False,
                                      _external_header=header,
                                      _external_sample_ids=sample_names)]
    combined = [comb.combine_gvcfs(mts) for mts in chunks(vcfs, MAX_COMBINE_NUMBER)]
    if len(paths) <= MAX_COMBINE_NUMBER:  # only 1 item, just write it
        combined[0].write(out_path, overwrite=True)
        return []
    pad = len(str(len(combined)))
    hl.experimental.write_matrix_tables(combined, tmp_path + f'{i}/', overwrite=True)
    return [tmp_path + f'{i}/' + str(n).zfill(pad) + '.mt' for n in range(len(combined))]
예제 #3
0
def run_combiner(sample_list,
                 intervals,
                 out_path,
                 tmp_path,
                 summary_path=None,
                 overwrite=False):
    import gc
    # make the temp path a directory, no matter what
    tmp_path += f'/combiner-temporary/{uuid.uuid4()}/'
    vcfs = [
        comb.transform_one(vcf) for vcf in hl.import_vcfs(
            sample_list, intervals, array_elements_required=False)
    ]
    combined = [
        comb.combine_gvcfs(mts) for mts in chunks(vcfs, MAX_COMBINER_LENGTH)
    ]
    if len(combined) == 1:
        combined[0].write(out_path, overwrite=overwrite)
    else:
        hl.utils.java.info(f'Writing combiner temporary files to: {tmp_path}')
        i = 0
        while len(combined) > 1:
            pad = len(str(len(combined)))
            hl.experimental.write_matrix_tables(combined,
                                                tmp_path + f'{i}/',
                                                overwrite=True)
            paths = [
                tmp_path + f'{i}/' + str(n).zfill(pad) + '.mt'
                for n in range(len(combined))
            ]
            i += 1
            wmts = [hl.read_matrix_table(path) for path in paths]
            combined = [
                comb.combine_gvcfs(mts)
                for mts in chunks(wmts, MAX_COMBINER_LENGTH)
            ]
            gc.collect()  # need to try to free memory on the master
        combined[0].write(out_path, overwrite=overwrite)
    if summary_path is not None:
        mt = hl.read_matrix_table(out_path)
        comb.summarize(mt).rows().write(summary_path, overwrite=overwrite)
예제 #4
0
파일: run_combiner.py 프로젝트: jigold/hail
def run_combiner(sample_list, json, out_path, tmp_path, summary_path=None, overwrite=False):
    import gc
    # make the temp path a directory, no matter what
    tmp_path += f'/combiner-temporary/{uuid.uuid4()}/'
    vcfs = [comb.transform_one(vcf)
            for vcf in hl.import_vcfs(sample_list, json, array_elements_required=False)]
    combined = [comb.combine_gvcfs(mts) for mts in chunks(vcfs, MAX_COMBINER_LENGTH)]
    if len(combined) == 1:
        combined[0].write(out_path, overwrite=overwrite)
    else:
        hl.utils.java.info(f'Writing combiner temporary files to: {tmp_path}')
        i = 0
        while len(combined) > 1:
            pad = len(str(len(combined)))
            hl.experimental.write_matrix_tables(combined, tmp_path + f'{i}/', overwrite=True)
            paths = [tmp_path + f'{i}/' + str(n).zfill(pad) + '.mt' for n in range(len(combined))]
            i += 1
            wmts = [hl.read_matrix_table(path) for path in paths]
            combined = [comb.combine_gvcfs(mts) for mts in chunks(wmts, MAX_COMBINER_LENGTH)]
            gc.collect()  # need to try to free memory on the master
        combined[0].write(out_path, overwrite=overwrite)
    if summary_path is not None:
        mt = hl.read_matrix_table(out_path)
        comb.summarize(mt).rows().write(summary_path, overwrite=overwrite)
예제 #5
0
            }
        },
        'end': {
            'locus': {
                'contig': 'chr20',
                'position': 19776611
            }
        },
        'includeStart': True,
        'includeEnd': True
    },
    {
        'start': {
            'locus': {
                'contig': 'chr20',
                'position': 19776612
            }
        },
        'end': {
            'locus': {
                'contig': 'chr20',
                'position': 21144633
            }
        },
        'includeStart': True,
        'includeEnd': True
    },
]
parts_str = json.dumps(parts)
vcfs = hl.import_vcfs(gvcfs, parts_str)
예제 #6
0
import json
import hail as hl

gvcfs = ['gs://hail-ci/gvcfs/HG00096.g.vcf.gz',
         'gs://hail-ci/gvcfs/HG00268.g.vcf.gz']
hl.init(default_reference='GRCh38')
parts = [
    {'start': {'locus': {'contig': 'chr20', 'position': 17821257}},
     'end': {'locus': {'contig': 'chr20', 'position': 18708366}},
     'includeStart': True,
     'includeEnd': True},
    {'start': {'locus': {'contig': 'chr20', 'position': 18708367}},
     'end': {'locus': {'contig': 'chr20', 'position': 19776611}},
     'includeStart': True,
     'includeEnd': True},
    {'start': {'locus': {'contig': 'chr20', 'position': 19776612}},
     'end': {'locus': {'contig': 'chr20', 'position': 21144633}},
     'includeStart': True,
     'includeEnd': True},
]
parts_str = json.dumps(parts)
vcfs = hl.import_vcfs(gvcfs, parts_str)
예제 #7
0
import hail as hl

gvcfs = ['gs://hail-common/test-resources/HG00096.g.vcf.gz',
         'gs://hail-common/test-resources/HG00268.g.vcf.gz']
hl.init(default_reference='GRCh38')
parts_json = [
    {'start': {'locus': {'contig': 'chr20', 'position': 17821257}},
     'end': {'locus': {'contig': 'chr20', 'position': 18708366}},
     'includeStart': True,
     'includeEnd': True},
    {'start': {'locus': {'contig': 'chr20', 'position': 18708367}},
     'end': {'locus': {'contig': 'chr20', 'position': 19776611}},
     'includeStart': True,
     'includeEnd': True},
    {'start': {'locus': {'contig': 'chr20', 'position': 19776612}},
     'end': {'locus': {'contig': 'chr20', 'position': 21144633}},
     'includeStart': True,
     'includeEnd': True},
]

parts = hl.tarray(hl.tinterval(hl.tstruct(locus=hl.tlocus('GRCh38'))))._convert_from_json(parts_json)
for mt in hl.import_vcfs(gvcfs, parts):
    mt._force_count_rows()