Beispiel #1
0
def test_retrieve_data():
    """Test retrieve_data function"""
    data = enasearch.retrieve_data(ids="ERA000010-ERA000020",
                                   display="xml",
                                   download=None,
                                   file=None,
                                   offset=None,
                                   length=None,
                                   subseq_range=None,
                                   expanded=None,
                                   header=None)
    assert "ROOT" in data
    data = enasearch.retrieve_data(ids="A00145",
                                   display="fasta",
                                   download=None,
                                   file=None,
                                   offset=0,
                                   length=100000,
                                   subseq_range="3-63",
                                   expanded=None,
                                   header=None)
    pprint([seq.id for seq in data])
    assert 'ENA|A00145|A00145.1' in [seq.id for seq in data]
    data = enasearch.retrieve_data(ids="AL513382",
                                   display="text",
                                   download=None,
                                   file=None,
                                   offset=0,
                                   length=100000,
                                   subseq_range=None,
                                   expanded="true",
                                   header=None)
    pprint(data)
    assert "AL513382" in data and len(data.split("\n")) >= 200000
    data = enasearch.retrieve_data(ids="AL513382",
                                   display="text",
                                   download=None,
                                   file=None,
                                   offset=0,
                                   length=100000,
                                   subseq_range=None,
                                   expanded=None,
                                   header="true")
    pprint(data)
    assert "AL513382" in data and len(data.split("\n")) >= 745
    data = enasearch.retrieve_data(ids="PRJEB2772",
                                   display="xml",
                                   download=None,
                                   file=None,
                                   offset=0,
                                   length=100000,
                                   subseq_range=None,
                                   expanded=None,
                                   header=None)
    pprint(data)
    assert "ROOT" in data
Beispiel #2
0
def retrieve_data(ids, display, download, file, offset, length, subseq_range,
                  expanded, header):
    """Retrieve ENA data (other than taxon).

    This function retrieves data (other than taxon) from ENA by:

    - Building the URL based on the ids to retrieve and some parameters to format the results
    - Requesting the URL to extract the data

    The output can be redirected to a file and directly display to the standard
    output given the display chosen.
    """
    download = None if not download else download
    file = None if not file else file
    offset = None if not offset else offset
    length = None if not length else length
    subseq_range = None if not subseq_range else subseq_range
    expanded = True if expanded else False
    header = True if header else False
    data = enasearch.retrieve_data(ids=",".join(ids),
                                   display=display,
                                   download=download,
                                   file=file,
                                   offset=offset,
                                   length=length,
                                   subseq_range=subseq_range,
                                   expanded=expanded,
                                   header=header)
    if file is None:
        print_display(data, display)
Beispiel #3
0
def retrieve_data(ids, display, download, file, offset, length, subseq_range,
                  expanded, header):
    """ Retrieve ENA data (other than taxon and project) """
    download = None if not download else download
    file = None if not file else file
    offset = None if not offset else offset
    length = None if not length else length
    subseq_range = None if not subseq_range else subseq_range
    expanded = True if expanded else False
    header = True if header else False
    data = enasearch.retrieve_data(ids=",".join(ids),
                                   display=display,
                                   download=download,
                                   file=file,
                                   offset=offset,
                                   length=length,
                                   subseq_range=subseq_range,
                                   expanded=expanded,
                                   header=header)
    if file is None:
        print_display(data, display)
Beispiel #4
0
def sample2assembly(sample_id):
    response = ena_sample(sample_id, result='assembly')
    if response.status_code != 200:
        return ''
    else:
        out = response.text
        data = json.loads(out)
        return data[0]


data = enasearch.search_data(free_text_search=True,
                             query="CABMLH010000000",
                             result='wgs_set',
                             display='xml')
data = enasearch.retrieve_data(ids="CABMLH010000000", display="html")


def get_returnable_fields(result):
    url = f"https://www.ebi.ac.uk/ena/portal/api/returnFields?dataPortal=ena&format=json&result={result}"
    response = requests.get(url, )
    out = response.text
    data = json.loads(out)
    return data


returnable_fields = get_returnable_fields('wgs_set')
returnable_fields = [_['columnId'] for _ in returnable_fields]


def file_report(sample_id):
Beispiel #5
0
def search_ena_accessions(accessions: List[str]):
    ids = ','.join(accessions)
    return enasearch.retrieve_data(ids=ids, display="xml")