Ejemplo n.º 1
0
def seq_batch_query():
    con = sqlite3.connect('./data/DB')
    cur = con.cursor()
    list_file = input('list file name:\n')
    with open(list_file, 'r') as In:
        organism_list = In.read().split(sep='\n')
    cur.execute('CREATE TABLE IF NOT EXISTS tasklist (Name TEXT);')
    for organism in organism_list:
        cur.execute('INSERT INTO tasklist (Name) VALUES (?);', (organism, ))
    cur.execute(
        'SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main WHERE Organism IN (SELECT Name FROM tasklist) ORDER BY Head',
        (organism))
    result = cur.fetchall()
    cur.execute('DROP TABLE tasklist;')
    cur.close()
    con.close()
    query_result = []
    for i in result:
        title = '|'.join([str(i[0]), i[1], i[2], i[3]])
        filename = i[2]
        sequence = MutableSeq(i[5])
        if i[4] == '-1':
            sequence.seq = sequence.reverse_complement()
        record = [title, filename, sequence]
        query_result.append(record)
    for i in query_result:
        with open(''.join(['./out/', i[1], '.fasta']), 'a') as Fileout:
            Fileout.write('>%s\n%s\n' % (i[0], i[2]))
            # rps12 may have larger than 50k fragments,  here to filter it
    rps12 = SeqIO.parse('./out/rps12.fasta', 'fasta')
    rps12short = list()
    for item in rps12:
        if len(item.seq) < 4000:
            rps12short.append(item)
    SeqIO.write(rps12short, './out/rps12short.fasta', 'fasta')
    print('Done.\n')
Ejemplo n.º 2
0
def seq_batch_query():
    con = sqlite3.connect('./data/DB')
    cur = con.cursor()
    list_file = input('list file name:\n')
    with open(list_file, 'r') as In:
        organism_list = In.read().split(sep='\n')
    cur.execute('CREATE TABLE IF NOT EXISTS tasklist (Name TEXT);')
    for organism in organism_list:
        cur.execute('INSERT INTO tasklist (Name) VALUES (?);', (organism,))
    cur.execute(
        'SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main WHERE Organism IN (SELECT Name FROM tasklist) ORDER BY Head',
        (organism))
    result = cur.fetchall()
    cur.execute('DROP TABLE tasklist;')
    cur.close()
    con.close()
    query_result = []
    for i in result:
        title = '|'.join([str(i[0]), i[1], i[2], i[3]])
        filename = i[2]
        sequence = MutableSeq(i[5])
        if i[4] == '-1':
            sequence.seq = sequence.reverse_complement()
        record = [title, filename, sequence]
        query_result.append(record)
    for i in query_result:
        with open(''.join(['./out/', i[1], '.fasta']), 'a') as Fileout:
            Fileout.write('>%s\n%s\n' % (i[0], i[2]))
            # rps12 may have larger than 50k fragments,  here to filter it
    rps12 = SeqIO.parse('./out/rps12.fasta', 'fasta')
    rps12short = list()
    for item in rps12:
        if len(item.seq) < 4000:
            rps12short.append(item)
    SeqIO.write(rps12short, './out/rps12short.fasta', 'fasta')
    print('Done.\n')
Ejemplo n.º 3
0
def seq_query():
    """Sequence query function,  to be continued.
    """
    query_type = input('1.Specific fragment\n'
                       '2.Specific Organism\n'
                       '3.Specific gene\n'
                       '4.All\n'
                       '5.All cds\n')
    organize = input('Organize output?(y/n)\n')
    if query_type not in ['1', '2', '3', '4', '5']:
        raise ValueError('wrong input!\n')
    con = sqlite3.connect('./data/DB')
    cur = con.cursor()
    if query_type == '1':
        organism = input('Organism:\n')
        gene = input('Gene:\n')
        frag_type = input(
            'Fragment type(gene, cds, rRNA, tRNA, exon, intron, spacer):\n')
        cur.execute(
            'SELECT Taxon, Organism, Name, Type, Strand, Sequence FROM main WHERE Name LIKE ? AND Type = ? AND Organism=?',
            ('%' + gene + '%', frag_type, organism))
        result = cur.fetchall()
    elif query_type == '2':
        organism = input('Organism:\n')
        frag_type = input(
            'Fragment type(gene, cds, rRNA, tRNA, exon, intron, spacer, whole, fragments):\n'
        )
        if frag_type == 'fragments':
            cur.execute(
                'SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main WHERE Organism = ?  ORDER BY Head',
                (organism, ))
        else:
            cur.execute(
                'SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main WHERE Organism LIKE ? AND Type = ? ORDER BY Head',
                ('%' + organism + '%', frag_type))
        result = cur.fetchall()
    elif query_type == '3':
        gene = input('Gene:\n')
        frag_type = input(
            'Fragment type(gene, cds, rRNA, tRNA, exon, intron, spacer):\n')
        cur.execute(
            'SELECT Taxon, Organism, Name, Type, Strand, Sequence FROM main WHERE Name LIKE ? AND Type = ? ORDER BY Taxon',
            ('%' + gene + '%', frag_type))
        result = cur.fetchall()
    elif query_type == '4':
        cur.execute(
            'SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main ORDER BY Taxon'
        )
        result = cur.fetchall()
    elif query_type == '5':
        cur.execute(
            'SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main WHERE type = "cds" ORDER BY Taxon'
        )
        result = cur.fetchall()

    query_result = []
    for i in result:
        title = '|'.join([str(i[0]), i[1], i[2], i[3]])
        sequence = MutableSeq(i[5])
        gene = i[2]
        if i[4] == '-1':
            sequence.seq = sequence.reverse_complement()
        record = [title, gene, sequence]
        query_result.append(record)

    if organize == 'y':
        if not exists('output'):
            makedirs('output')
        for i in query_result:
            file_name = ''.join(
                ['output', '/', i[1].replace('/', ''), '.fasta'])
            with open(file_name, 'a') as output_file:
                output_file.write('>%s\n%s\n' % (i[0], i[2]))
    else:
        output = input('Enter output filename:\n')
        with open('.'.join([output, 'fasta']), 'w') as output_file:
            for i in query_result:
                output_file.write('>%s\n%s\n' % (i[0], i[2]))

    cur.close()
    con.close()
    print('Done.\n')
Ejemplo n.º 4
0
def seq_query():
    """Sequence query function,  to be continued.
    """
    query_type = input(
        '1.Specific fragment\n'
        '2.Specific Organism\n'
        '3.Specific gene\n'
        '4.All\n'
        '5.All cds\n'
    )
    organize = input('Organize output?(y/n)\n')
    if query_type not in ['1', '2', '3', '4', '5']:
        raise ValueError('wrong input!\n')
    con = sqlite3.connect('./data/DB')
    cur = con.cursor()
    if query_type == '1':
        organism = input('Organism:\n')
        gene = input('Gene:\n')
        frag_type = input('Fragment type(gene, cds, rRNA, tRNA, exon, intron, spacer):\n')
        cur.execute(
            'SELECT Taxon, Organism, Name, Type, Strand, Sequence FROM main WHERE Name LIKE ? AND Type = ? AND Organism=?',
            ('%' + gene + '%', frag_type, organism))
        result = cur.fetchall()
    elif query_type == '2':
        organism = input('Organism:\n')
        frag_type = input('Fragment type(gene, cds, rRNA, tRNA, exon, intron, spacer, whole, fragments):\n')
        if frag_type == 'fragments':
            cur.execute(
                'SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main WHERE Organism = ?  ORDER BY Head',
                (organism,))
        else:
            cur.execute(
                'SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main WHERE Organism LIKE ? AND Type = ? ORDER BY Head',
                ('%' + organism + '%', frag_type))
        result = cur.fetchall()
    elif query_type == '3':
        gene = input('Gene:\n')
        frag_type = input('Fragment type(gene, cds, rRNA, tRNA, exon, intron, spacer):\n')
        cur.execute(
            'SELECT Taxon, Organism, Name, Type, Strand, Sequence FROM main WHERE Name LIKE ? AND Type = ? ORDER BY Taxon',
            ('%' + gene + '%', frag_type))
        result = cur.fetchall()
    elif query_type == '4':
        cur.execute('SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main ORDER BY Taxon')
        result = cur.fetchall()
    elif query_type == '5':
        cur.execute(
            'SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main WHERE type = "cds" ORDER BY Taxon')
        result = cur.fetchall()

    query_result = []
    for i in result:
        title = '|'.join([str(i[0]), i[1], i[2], i[3]])
        sequence = MutableSeq(i[5])
        gene = i[2]
        if i[4] == '-1':
            sequence.seq = sequence.reverse_complement()
        record = [title, gene, sequence]
        query_result.append(record)

    if organize == 'y':
        if not exists('output'):
            makedirs('output')
        for i in query_result:
            file_name = ''.join([
                'output',
                '/',
                i[1].replace('/', ''),
                '.fasta'
            ])
            with open(file_name, 'a') as output_file:
                output_file.write('>%s\n%s\n' % (i[0], i[2]))
    else:
        output = input('Enter output filename:\n')
        with open('.'.join([output, 'fasta']), 'w') as output_file:
            for i in query_result:
                output_file.write('>%s\n%s\n' % (i[0], i[2]))

    cur.close()
    con.close()
    print('Done.\n')