Esempio n. 1
0
def load_one_file(args, cxn, file_name, ends, seq_end_clamp=''):
    """Load sequences from a fasta/fastq file into the atram database."""
    log.info('Loading "{}" into sqlite database'.format(file_name))

    parser = get_parser(args, file_name)

    with util.open_file(args, file_name) as sra_file:
        batch = []

        for rec in parser(sra_file):
            title = rec[0].strip()
            seq = rec[1]
            seq_name, seq_end = blast.parse_fasta_title(
                title, ends, seq_end_clamp)

            batch.append((seq_name, seq_end, seq))

            if len(batch) >= db.BATCH_SIZE:
                db.insert_sequences_batch(cxn, batch)
                batch = []

        db.insert_sequences_batch(cxn, batch)
Esempio n. 2
0
def load_one_file(args, cxn, file_name, ends, seq_end_clamp=''):
    """Load sequences from a fasta/fastq file into the atram database."""
    log.info('Loading "{}" into sqlite database'.format(file_name))

    parser = get_parser(args, file_name)

    with util.open_file(args, file_name) as sra_file:
        batch = []

        for rec in parser(sra_file):
            title = rec[0].strip()
            seq = rec[1]
            seq_name, seq_end = blast.parse_fasta_title(
                title, ends, seq_end_clamp)

            batch.append((seq_name, seq_end, seq))

            if len(batch) >= db.BATCH_SIZE:
                db.insert_sequences_batch(cxn, batch)
                batch = []

        db.insert_sequences_batch(cxn, batch)
Esempio n. 3
0
def test_parse_fasta_title_10():
    """It handles single ends with no sequence end."""
    seq_name, seq_end = blast.parse_fasta_title('title after', 'single_ends',
                                                '')
    assert seq_name == 'title'
    assert seq_end == ''
Esempio n. 4
0
def test_parse_fasta_title_09():
    """It handles mixed ends with a space delimited sequence end."""
    seq_name, seq_end = blast.parse_fasta_title('title 2 after', 'mixed_ends',
                                                '')
    assert seq_name == 'title'
    assert seq_end == '2'
Esempio n. 5
0
def test_parse_fasta_title_07():
    """It handles mixed ends with no sequence end."""
    seq_name, seq_end = blast.parse_fasta_title('title', 'mixed_ends', '')
    assert seq_name == 'title'
    assert seq_end == ''
Esempio n. 6
0
def test_parse_fasta_title_06():
    """It handles a dot delimited end."""
    seq_name, seq_end = blast.parse_fasta_title('title.1 after', 'end_1', '1')
    assert seq_name == 'title'
    assert seq_end == '1'
Esempio n. 7
0
def test_parse_fasta_title_05():
    """It handles an underscore delimited end."""
    seq_name, seq_end = blast.parse_fasta_title('title_1', 'end_1', '1')
    assert seq_name == 'title'
    assert seq_end == '1'
Esempio n. 8
0
def test_parse_fasta_title_04():
    """It handles a slash delimited end."""
    seq_name, seq_end = blast.parse_fasta_title(
        'title/2 after', 'end_2', '2')
    assert seq_name == 'title'
    assert seq_end == '2'
Esempio n. 9
0
def test_parse_fasta_title_01():
    """It handles empty strings."""
    actual_seq_name, actual_seq_end = blast.parse_fasta_title('', '', '')
    assert actual_seq_name == ''
    assert actual_seq_end == ''
Esempio n. 10
0
def test_parse_fasta_title_12():
    """It handles single ends with a space delimited sequence end."""
    seq_name, seq_end = blast.parse_fasta_title(
        'title 2 words', 'single_ends', '')
    assert seq_name == 'title'
    assert seq_end == ''
Esempio n. 11
0
def test_parse_fasta_title_10():
    """It handles single ends with no sequence end."""
    seq_name, seq_end = blast.parse_fasta_title(
        'title after', 'single_ends', '')
    assert seq_name == 'title'
    assert seq_end == ''
Esempio n. 12
0
def test_parse_fasta_title_09():
    """It handles mixed ends with a space delimited sequence end."""
    seq_name, seq_end = blast.parse_fasta_title(
        'title 2 after', 'mixed_ends', '')
    assert seq_name == 'title'
    assert seq_end == '2'
Esempio n. 13
0
def test_parse_fasta_title_07():
    """It handles mixed ends with no sequence end."""
    seq_name, seq_end = blast.parse_fasta_title(
        'title', 'mixed_ends', '')
    assert seq_name == 'title'
    assert seq_end == ''
Esempio n. 14
0
def test_parse_fasta_title_06():
    """It handles a dot delimited end."""
    seq_name, seq_end = blast.parse_fasta_title(
        'title.1 after', 'end_1', '1')
    assert seq_name == 'title'
    assert seq_end == '1'
Esempio n. 15
0
def test_parse_fasta_title_05():
    """It handles an underscore delimited end."""
    seq_name, seq_end = blast.parse_fasta_title(
        'title_1', 'end_1', '1')
    assert seq_name == 'title'
    assert seq_end == '1'
Esempio n. 16
0
def test_parse_fasta_title_01():
    """It handles empty strings."""
    actual_seq_name, actual_seq_end = blast.parse_fasta_title('', '', '')
    assert actual_seq_name == ''
    assert actual_seq_end == ''
Esempio n. 17
0
def test_parse_fasta_title_12():
    """It handles single ends with a space delimited sequence end."""
    seq_name, seq_end = blast.parse_fasta_title('title 2 words', 'single_ends',
                                                '')
    assert seq_name == 'title 2'
    assert seq_end == ''
Esempio n. 18
0
def test_parse_fasta_title_03():
    """It handles a 1 or 2 at the end of the title."""
    seq_name, seq_end = blast.parse_fasta_title('title1/2 after', 'end_2', '2')
    assert seq_name == 'title1'
    assert seq_end == '2'
Esempio n. 19
0
def test_parse_fasta_title_04():
    """It handles a slash delimited end."""
    seq_name, seq_end = blast.parse_fasta_title('title/2 after', 'end_2', '2')
    assert seq_name == 'title'
    assert seq_end == '2'
Esempio n. 20
0
def test_parse_fasta_title_03():
    """It handles a 1 or 2 at the end of the title."""
    seq_name, seq_end = blast.parse_fasta_title(
        'title1/2 after', 'end_2', '2')
    assert seq_name == 'title1'
    assert seq_end == '2'