Example #1
0
def print_acc2gi(accessions):
    """Print GIs corresponding to the given accession numbers."""
    term = ' OR '.join(a + '[accn]' for a in accessions)
    for line in entrez.on_search(db='nucleotide', term=term, tool='summary'):
        if 'Name="Extra"' in line and any(a in line for a in accessions):
            _, gi, _, acc, _ = line.split('|', 4)
            print('%18s  ->  %s' % (acc, gi))
Example #2
0
def main():
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-s', '--sra', metavar='SRAid', help='SRA identifier')
    args = parser.parse_args()

    for line in entrez.on_search(db='sra', term=args.sra, tool='summary'):
        if 'Name="Runs"' in line:
            acc = re.search('acc=\"(?P<acc>\w+[0-9]+)\"', line).group('acc')
            print(acc)
Example #3
0
def print_acc2gi(accessions):
    """Print GIs corresponding to the given accession numbers."""
    term = ' OR '.join(a + '[accn]' for a in accessions)
    for line in entrez.on_search(db='nucleotide', term=term, tool='summary'):
        if 'Name="Extra"' in line and any(a in line for a in accessions):
            gi = re.search('gi\|([0-9]+)\|', line).group(1)
            acc = re.search('((emb)|(gb)|(ref)|(dbj))\|(?P<acc>\w+\.[0-9]+)\|',
                            line).group('acc')
            print('%18s  ->  %s' % (acc, gi))
Example #4
0
def print_acc2gi(accessions):
    """Print GIs corresponding to the given accession numbers."""
    term = ' OR '.join(a + '[accn]' for a in accessions)
    for line in entrez.on_search(db='nucleotide', term=term, tool='summary'):
        if 'Name="Extra"' in line and any(a in line for a in accessions):
            gi = re.search('gi\|([0-9]+)\|', line).group(1)
            acc = re.search('((emb)|(gb)|(ref)|(dbj))\|(?P<acc>\w+\.[0-9]+)\|',
                            line).group('acc')
            print('%18s  ->  %s' % (acc, gi))
Example #5
0
def application_3():
    """Retrieving large datasets.

    Download all chimpanzee mRNA sequences in FASTA format (>50,000 sequences).
    """
    query = 'chimpanzee[orgn] AND biomol mrna[prop]'
    with open('chimp.fna', 'w') as fout:
        for line in entrez.on_search(db='nucleotide', term=query,
                                     tool='fetch', rettype='fasta'):
            fout.write(line + '\n')
Example #6
0
def application_3():
    """Sample Application 3: Retrieving large datasets

    Download all chimpanzee mRNA sequences in FASTA format (>50,000 sequences).
    """
    query = 'chimpanzee[orgn] AND biomol mrna[prop]'
    with open('chimp.fna', 'w') as fout:
        for line in ez.on_search(db='nucleotide',
                                 term=query,
                                 tool='fetch',
                                 rettype='fasta'):
            fout.write(line + '\n')
    print('The results are in file chimp.fna.')
Example #7
0
def application_2():
    """Converting accession numbers to data.

    Starting with a list of protein accession numbers, return the sequences in
    FASTA format.
    """
    # Input: comma-delimited list of accessions.
    accs = 'NM_009417,NM_000547,NM_001003009,NM_019353'.split(',')
    query = ' OR '.join(a + '[accn]' for a in accs)

    # Output: FASTA data.
    for line in entrez.on_search(db='nucleotide', term=query, tool='fetch',
                                 db2='protein', rettype='fasta'):
        print(line)
Example #8
0
def application_2():
    """Sample Application 2: Converting accession numbers to data

    Starting with a list of protein accession numbers, return the sequences in
    FASTA format.
    """
    # Input: comma-delimited list of accessions.
    accs = 'NM_009417,NM_000547,NM_001003009,NM_019353'.split(',')
    query = ' OR '.join(a + '[accn]' for a in accs)

    # Output: FASTA data.
    for line in ez.on_search(db='nuccore',
                             term=query,
                             tool='fetch',
                             rettype='fasta'):
        print(line)