def seq_batch_query(): con = sqlite3.connect('./data/DB') cur = con.cursor() list_file = input('list file name:\n') with open(list_file, 'r') as In: organism_list = In.read().split(sep='\n') cur.execute('CREATE TABLE IF NOT EXISTS tasklist (Name TEXT);') for organism in organism_list: cur.execute('INSERT INTO tasklist (Name) VALUES (?);', (organism, )) cur.execute( 'SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main WHERE Organism IN (SELECT Name FROM tasklist) ORDER BY Head', (organism)) result = cur.fetchall() cur.execute('DROP TABLE tasklist;') cur.close() con.close() query_result = [] for i in result: title = '|'.join([str(i[0]), i[1], i[2], i[3]]) filename = i[2] sequence = MutableSeq(i[5]) if i[4] == '-1': sequence.seq = sequence.reverse_complement() record = [title, filename, sequence] query_result.append(record) for i in query_result: with open(''.join(['./out/', i[1], '.fasta']), 'a') as Fileout: Fileout.write('>%s\n%s\n' % (i[0], i[2])) # rps12 may have larger than 50k fragments, here to filter it rps12 = SeqIO.parse('./out/rps12.fasta', 'fasta') rps12short = list() for item in rps12: if len(item.seq) < 4000: rps12short.append(item) SeqIO.write(rps12short, './out/rps12short.fasta', 'fasta') print('Done.\n')
def seq_batch_query(): con = sqlite3.connect('./data/DB') cur = con.cursor() list_file = input('list file name:\n') with open(list_file, 'r') as In: organism_list = In.read().split(sep='\n') cur.execute('CREATE TABLE IF NOT EXISTS tasklist (Name TEXT);') for organism in organism_list: cur.execute('INSERT INTO tasklist (Name) VALUES (?);', (organism,)) cur.execute( 'SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main WHERE Organism IN (SELECT Name FROM tasklist) ORDER BY Head', (organism)) result = cur.fetchall() cur.execute('DROP TABLE tasklist;') cur.close() con.close() query_result = [] for i in result: title = '|'.join([str(i[0]), i[1], i[2], i[3]]) filename = i[2] sequence = MutableSeq(i[5]) if i[4] == '-1': sequence.seq = sequence.reverse_complement() record = [title, filename, sequence] query_result.append(record) for i in query_result: with open(''.join(['./out/', i[1], '.fasta']), 'a') as Fileout: Fileout.write('>%s\n%s\n' % (i[0], i[2])) # rps12 may have larger than 50k fragments, here to filter it rps12 = SeqIO.parse('./out/rps12.fasta', 'fasta') rps12short = list() for item in rps12: if len(item.seq) < 4000: rps12short.append(item) SeqIO.write(rps12short, './out/rps12short.fasta', 'fasta') print('Done.\n')
def seq_query(): """Sequence query function, to be continued. """ query_type = input('1.Specific fragment\n' '2.Specific Organism\n' '3.Specific gene\n' '4.All\n' '5.All cds\n') organize = input('Organize output?(y/n)\n') if query_type not in ['1', '2', '3', '4', '5']: raise ValueError('wrong input!\n') con = sqlite3.connect('./data/DB') cur = con.cursor() if query_type == '1': organism = input('Organism:\n') gene = input('Gene:\n') frag_type = input( 'Fragment type(gene, cds, rRNA, tRNA, exon, intron, spacer):\n') cur.execute( 'SELECT Taxon, Organism, Name, Type, Strand, Sequence FROM main WHERE Name LIKE ? AND Type = ? AND Organism=?', ('%' + gene + '%', frag_type, organism)) result = cur.fetchall() elif query_type == '2': organism = input('Organism:\n') frag_type = input( 'Fragment type(gene, cds, rRNA, tRNA, exon, intron, spacer, whole, fragments):\n' ) if frag_type == 'fragments': cur.execute( 'SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main WHERE Organism = ? ORDER BY Head', (organism, )) else: cur.execute( 'SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main WHERE Organism LIKE ? AND Type = ? ORDER BY Head', ('%' + organism + '%', frag_type)) result = cur.fetchall() elif query_type == '3': gene = input('Gene:\n') frag_type = input( 'Fragment type(gene, cds, rRNA, tRNA, exon, intron, spacer):\n') cur.execute( 'SELECT Taxon, Organism, Name, Type, Strand, Sequence FROM main WHERE Name LIKE ? AND Type = ? ORDER BY Taxon', ('%' + gene + '%', frag_type)) result = cur.fetchall() elif query_type == '4': cur.execute( 'SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main ORDER BY Taxon' ) result = cur.fetchall() elif query_type == '5': cur.execute( 'SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main WHERE type = "cds" ORDER BY Taxon' ) result = cur.fetchall() query_result = [] for i in result: title = '|'.join([str(i[0]), i[1], i[2], i[3]]) sequence = MutableSeq(i[5]) gene = i[2] if i[4] == '-1': sequence.seq = sequence.reverse_complement() record = [title, gene, sequence] query_result.append(record) if organize == 'y': if not exists('output'): makedirs('output') for i in query_result: file_name = ''.join( ['output', '/', i[1].replace('/', ''), '.fasta']) with open(file_name, 'a') as output_file: output_file.write('>%s\n%s\n' % (i[0], i[2])) else: output = input('Enter output filename:\n') with open('.'.join([output, 'fasta']), 'w') as output_file: for i in query_result: output_file.write('>%s\n%s\n' % (i[0], i[2])) cur.close() con.close() print('Done.\n')
def seq_query(): """Sequence query function, to be continued. """ query_type = input( '1.Specific fragment\n' '2.Specific Organism\n' '3.Specific gene\n' '4.All\n' '5.All cds\n' ) organize = input('Organize output?(y/n)\n') if query_type not in ['1', '2', '3', '4', '5']: raise ValueError('wrong input!\n') con = sqlite3.connect('./data/DB') cur = con.cursor() if query_type == '1': organism = input('Organism:\n') gene = input('Gene:\n') frag_type = input('Fragment type(gene, cds, rRNA, tRNA, exon, intron, spacer):\n') cur.execute( 'SELECT Taxon, Organism, Name, Type, Strand, Sequence FROM main WHERE Name LIKE ? AND Type = ? AND Organism=?', ('%' + gene + '%', frag_type, organism)) result = cur.fetchall() elif query_type == '2': organism = input('Organism:\n') frag_type = input('Fragment type(gene, cds, rRNA, tRNA, exon, intron, spacer, whole, fragments):\n') if frag_type == 'fragments': cur.execute( 'SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main WHERE Organism = ? ORDER BY Head', (organism,)) else: cur.execute( 'SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main WHERE Organism LIKE ? AND Type = ? ORDER BY Head', ('%' + organism + '%', frag_type)) result = cur.fetchall() elif query_type == '3': gene = input('Gene:\n') frag_type = input('Fragment type(gene, cds, rRNA, tRNA, exon, intron, spacer):\n') cur.execute( 'SELECT Taxon, Organism, Name, Type, Strand, Sequence FROM main WHERE Name LIKE ? AND Type = ? ORDER BY Taxon', ('%' + gene + '%', frag_type)) result = cur.fetchall() elif query_type == '4': cur.execute('SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main ORDER BY Taxon') result = cur.fetchall() elif query_type == '5': cur.execute( 'SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main WHERE type = "cds" ORDER BY Taxon') result = cur.fetchall() query_result = [] for i in result: title = '|'.join([str(i[0]), i[1], i[2], i[3]]) sequence = MutableSeq(i[5]) gene = i[2] if i[4] == '-1': sequence.seq = sequence.reverse_complement() record = [title, gene, sequence] query_result.append(record) if organize == 'y': if not exists('output'): makedirs('output') for i in query_result: file_name = ''.join([ 'output', '/', i[1].replace('/', ''), '.fasta' ]) with open(file_name, 'a') as output_file: output_file.write('>%s\n%s\n' % (i[0], i[2])) else: output = input('Enter output filename:\n') with open('.'.join([output, 'fasta']), 'w') as output_file: for i in query_result: output_file.write('>%s\n%s\n' % (i[0], i[2])) cur.close() con.close() print('Done.\n')