Beispiel #1
0
def test_retrieve_run_report():
    """Test retrieve_run_report function"""
    report = enasearch.retrieve_run_report(accession="SRX017289",
                                           fields=None,
                                           file=None)
    assert "small RNAs_wild type" in report
    exp_fields = ["run_accession", "fastq_ftp", "fastq_md5", "fastq_bytes"]
    report = enasearch.retrieve_run_report(accession="SRX017289",
                                           fields=",".join(exp_fields),
                                           file=None)
    assert cmp(report.split("\n")[0].split("\t"), exp_fields)
Beispiel #2
0
def get_fastq_urls(accessions: List[str],
                   fields: List[str] = None) -> Dict[str, Dict]:
    """
    Given an ENA (or SRA) Run (SRR*), Experiment (SRX*), Project (PRJ*)
    (or Study?) accession, return a list of associated FASTQ download URLs.

    :param fields:
    :type fields:
    :param accessions:
    :type accessions:
    :return:
    :rtype:
    """

    if fields is None:
        fields = [
            'run_accession', 'experiment_accession', 'study_accession',
            'sample_accession', 'secondary_sample_accession',
            'instrument_platform', 'library_strategy', 'read_count',
            'fastq_ftp', 'fastq_md5', 'fastq_bytes'
        ]

    urls_dict = dict()
    # raises HTTPError on status_code 500 (eg ENA is temporarily down)
    for accession in accessions:
        table = enasearch.retrieve_run_report(accession=accession,
                                              fields=','.join(fields))
        table = flatten_fastq_table(table)
        urls = parse_fastq_table(table, key_by='fastq_ftp')
        urls_dict.update(urls)

    return urls_dict
Beispiel #3
0
def retrieve_run_report(accession, fields, file):
    """ Retrieve run report """
    fields = None if not fields else ",".join(fields)
    file = None if not file else file
    report = enasearch.retrieve_run_report(accession=accession,
                                           fields=fields,
                                           file=file)
    if file is None:
        print_display(report, 'report')
Beispiel #4
0
def retrieve_run_report(accession, fields, file):
    """Retrieve run report from ENA.

    The output can be redirected to a file and directly display to the standard
    output given the display chosen.
    """
    fields = None if not fields else ",".join(fields)
    file = None if not file else file
    report = enasearch.retrieve_run_report(accession=accession,
                                           fields=fields,
                                           file=file)
    if file is None:
        print_display(report, 'report')
Beispiel #5
0
def get_run_table(accessions: List[str],
                  fields: List[str] = None) -> Dict[str, Dict]:
    """
    Given an ENA (or SRA) Run (SRR*), Experiment (SRX*), Project (PRJ*)
    (or Study?) accession, return a list of associated Sample-style records
    (including FASTQ download URLs) for each run suitable for use by the
    frontend.

    :param fields:
    :type fields:
    :param accessions:
    :type accessions:
    :return:
    :rtype:
    """

    if fields is None:
        fields = [
            'run_accession', 'experiment_accession', 'study_accession',
            'sample_accession', 'secondary_sample_accession',
            'instrument_platform', 'instrument_model', 'library_strategy',
            'library_source', 'library_layout', 'library_selection',
            'library_name', 'broker_name', 'study_alias', 'experiment_alias',
            'sample_alias', 'run_alias', 'read_count', 'base_count',
            'fastq_ftp', 'fastq_md5', 'fastq_bytes', 'center_name'
        ]

    runs_dict = dict()
    # raises HTTPError on status_code 500 (eg ENA is temporarily down)
    for accession in accessions:
        table = enasearch.retrieve_run_report(accession=accession,
                                              fields=','.join(fields))
        # table = flatten_fastq_table(table)
        runs = parse_fastq_table(table, key_by='run_accession')
        runs_dict.update(runs)

    # We turn the list of FTP urls into a list of dicts like
    # [{'R1': 'ftp://bla_1.fastq.gz'}, {'R2': 'ftp://bla_2.fastq.gz'}]
    for run, metadata in runs_dict.items():
        if metadata.get('fastq_ftp', False):
            metadata['fastq_ftp'] = [{
                'R%s' % str(n + 1): url
            } for n, url in enumerate(metadata['fastq_ftp'])]
    return runs_dict