def load_one_file(args, cxn, file_name, ends, seq_end_clamp=''): """Load sequences from a fasta/fastq file into the atram database.""" log.info('Loading "{}" into sqlite database'.format(file_name)) parser = get_parser(args, file_name) with util.open_file(args, file_name) as sra_file: batch = [] for rec in parser(sra_file): title = rec[0].strip() seq = rec[1] seq_name, seq_end = blast.parse_fasta_title( title, ends, seq_end_clamp) batch.append((seq_name, seq_end, seq)) if len(batch) >= db.BATCH_SIZE: db.insert_sequences_batch(cxn, batch) batch = [] db.insert_sequences_batch(cxn, batch)
def test_parse_fasta_title_10(): """It handles single ends with no sequence end.""" seq_name, seq_end = blast.parse_fasta_title('title after', 'single_ends', '') assert seq_name == 'title' assert seq_end == ''
def test_parse_fasta_title_09(): """It handles mixed ends with a space delimited sequence end.""" seq_name, seq_end = blast.parse_fasta_title('title 2 after', 'mixed_ends', '') assert seq_name == 'title' assert seq_end == '2'
def test_parse_fasta_title_07(): """It handles mixed ends with no sequence end.""" seq_name, seq_end = blast.parse_fasta_title('title', 'mixed_ends', '') assert seq_name == 'title' assert seq_end == ''
def test_parse_fasta_title_06(): """It handles a dot delimited end.""" seq_name, seq_end = blast.parse_fasta_title('title.1 after', 'end_1', '1') assert seq_name == 'title' assert seq_end == '1'
def test_parse_fasta_title_05(): """It handles an underscore delimited end.""" seq_name, seq_end = blast.parse_fasta_title('title_1', 'end_1', '1') assert seq_name == 'title' assert seq_end == '1'
def test_parse_fasta_title_04(): """It handles a slash delimited end.""" seq_name, seq_end = blast.parse_fasta_title( 'title/2 after', 'end_2', '2') assert seq_name == 'title' assert seq_end == '2'
def test_parse_fasta_title_01(): """It handles empty strings.""" actual_seq_name, actual_seq_end = blast.parse_fasta_title('', '', '') assert actual_seq_name == '' assert actual_seq_end == ''
def test_parse_fasta_title_12(): """It handles single ends with a space delimited sequence end.""" seq_name, seq_end = blast.parse_fasta_title( 'title 2 words', 'single_ends', '') assert seq_name == 'title' assert seq_end == ''
def test_parse_fasta_title_10(): """It handles single ends with no sequence end.""" seq_name, seq_end = blast.parse_fasta_title( 'title after', 'single_ends', '') assert seq_name == 'title' assert seq_end == ''
def test_parse_fasta_title_09(): """It handles mixed ends with a space delimited sequence end.""" seq_name, seq_end = blast.parse_fasta_title( 'title 2 after', 'mixed_ends', '') assert seq_name == 'title' assert seq_end == '2'
def test_parse_fasta_title_07(): """It handles mixed ends with no sequence end.""" seq_name, seq_end = blast.parse_fasta_title( 'title', 'mixed_ends', '') assert seq_name == 'title' assert seq_end == ''
def test_parse_fasta_title_06(): """It handles a dot delimited end.""" seq_name, seq_end = blast.parse_fasta_title( 'title.1 after', 'end_1', '1') assert seq_name == 'title' assert seq_end == '1'
def test_parse_fasta_title_05(): """It handles an underscore delimited end.""" seq_name, seq_end = blast.parse_fasta_title( 'title_1', 'end_1', '1') assert seq_name == 'title' assert seq_end == '1'
def test_parse_fasta_title_12(): """It handles single ends with a space delimited sequence end.""" seq_name, seq_end = blast.parse_fasta_title('title 2 words', 'single_ends', '') assert seq_name == 'title 2' assert seq_end == ''
def test_parse_fasta_title_03(): """It handles a 1 or 2 at the end of the title.""" seq_name, seq_end = blast.parse_fasta_title('title1/2 after', 'end_2', '2') assert seq_name == 'title1' assert seq_end == '2'
def test_parse_fasta_title_04(): """It handles a slash delimited end.""" seq_name, seq_end = blast.parse_fasta_title('title/2 after', 'end_2', '2') assert seq_name == 'title' assert seq_end == '2'
def test_parse_fasta_title_03(): """It handles a 1 or 2 at the end of the title.""" seq_name, seq_end = blast.parse_fasta_title( 'title1/2 after', 'end_2', '2') assert seq_name == 'title1' assert seq_end == '2'