コード例 #1
0
def test_get_strains(mocker, capsys):
    # ensure params are correct
    mocker.patch('align.align_helpers.gp.fasta_suffix',
                 '.fa')
    mocker.patch('align.align_helpers.gp.chrms',
                 ['I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX',
                  'X', 'XI', 'XII', 'XIII', 'XIV', 'XV', 'XVI'])

    mock_dir = mocker.patch('os.listdir')

    mock_dir.return_value = []
    helper.get_strains(['mock'])
    assert "found no chromosome sequence files in mock" in\
        capsys.readouterr().out

    mock_dir.return_value = ['nothing']
    helper.get_strains(['invalid'])
    assert "found no chromosome sequence files in invalid" in\
        capsys.readouterr().out

    with pytest.raises(AssertionError) as e:
        mock_dir.return_value = ['missing_chr.fa']
        helper.get_strains(['noStrains'])

    assert "some strains in noStrains are missing" in str(e)

    mock_dir.return_value = ['strain_chr{}.fa'.format(chrm)
                             for chrm in helper.gp.chrms]
    strains = helper.get_strains(['one_strain'])
    assert strains == [('strain', 'one_strain')]

    mock_dir.return_value = ['strain{}_chr{}.fa'.format(strain, chrm)
                             for chrm in helper.gp.chrms
                             for strain in (1, 2)]
    strains = helper.get_strains(['two/strains'])
    assert strains == [('strain1', 'two/strains'),
                       ('strain2', 'two/strains')]

    mock_dir.side_effect = [['strain{}_chr{}.fa'.format(strain, chrm)
                             for chrm in helper.gp.chrms
                             for strain in (1, 2)],
                            ['strain_chr{}.fa'.format(chrm)
                             for chrm in helper.gp.chrms]]
    strains = helper.get_strains(['two_strains', 'one_strain'])
    assert strains == [('strain', 'one_strain'),
                       ('strain1', 'two_strains'),
                       ('strain2', 'two_strains')]
コード例 #2
0
def test_get_strains(mocker, capsys):
    # ensure params are correct
    mocker.patch('align.align_helpers.gp.fasta_suffix', '.fa')
    mocker.patch('align.align_helpers.gp.chrms', [
        'I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X', 'XI',
        'XII', 'XIII', 'XIV', 'XV', 'XVI'
    ])

    mock_dir = mocker.patch('os.listdir')

    mock_dir.return_value = []
    helper.get_strains(['mock'])
    assert "found no chromosome sequence files in mock" in\
        capsys.readouterr().out

    mock_dir.return_value = ['nothing']
    helper.get_strains(['invalid'])
    assert "found no chromosome sequence files in invalid" in\
        capsys.readouterr().out

    with pytest.raises(AssertionError) as e:
        mock_dir.return_value = ['missing_chr.fa']
        helper.get_strains(['noStrains'])

    assert "some strains in noStrains are missing" in str(e)

    mock_dir.return_value = [
        'strain_chr{}.fa'.format(chrm) for chrm in helper.gp.chrms
    ]
    strains = helper.get_strains(['one_strain'])
    assert strains == [('strain', 'one_strain')]

    mock_dir.return_value = [
        'strain{}_chr{}.fa'.format(strain, chrm) for chrm in helper.gp.chrms
        for strain in (1, 2)
    ]
    strains = helper.get_strains(['two/strains'])
    assert strains == [('strain1', 'two/strains'), ('strain2', 'two/strains')]

    mock_dir.side_effect = [[
        'strain{}_chr{}.fa'.format(strain, chrm) for chrm in helper.gp.chrms
        for strain in (1, 2)
    ], ['strain_chr{}.fa'.format(chrm) for chrm in helper.gp.chrms]]
    strains = helper.get_strains(['two_strains', 'one_strain'])
    assert strains == [('strain', 'one_strain'), ('strain1', 'two_strains'),
                       ('strain2', 'two_strains')]
コード例 #3
0
def read_setup_args(fn):

    x = {}

    f = open(fn, 'r')
    line = f.readline()
    while line != '':
        line = line[:-1].split(' ')
        x[line[0]] = line[1:]
        line = f.readline()
    f.close()

    d = {}
    d['references'] = x['references']
    d['reference_directories'] =  dict(zip(x['references'], x['reference_directories']))
    d['alignments_directory'] = x['alignments_directory'][0]

    d['strain_dirs'] = \
        align_helpers.get_strains(x['test_strain_directories'])

    return d
コード例 #4
0
def read_setup_args(fn):

    x = {}

    f = open(fn, 'r')
    line = f.readline()
    while line != '':
        line = line[:-1].split(' ')
        x[line[0]] = line[1:]
        line = f.readline()
    f.close()

    d = {}
    d['references'] = x['references']
    d['reference_directories'] = \
        dict(zip(x['references'], x['reference_directories']))
    d['alignments_directory'] = x['alignments_directory'][0]

    d['strain_dirs'] = \
        align_helpers.get_strains(x['test_strain_directories'])

    return d
コード例 #5
0
            d = diffs_per_site(seqs[keys[i]].lower(), seqs[keys[j]].lower())
            if d != 'NA':
                num += d
                den += 1
    if den == 0:
        return 'NA'
    return float(num) / den


# read in shared regions
shared_regions, _ = \
    read_table.read_table_rows('shared_introgression_nonsingleton_list.txt',
                               '\t')

# read in strain dirs information
s = align_helpers.get_strains(align_helpers.flatten(gp.non_ref_dirs.values()))
strain_dirs = dict(s)

# for each shared region:
# - calculate fraction of sites that are polymorphic among introgressed strains
# - for each introgressed strain, calculate:
#   - number of unique variants among introgressed strains (or all strains?)
f = open('shared_introgression_nonsingleton_polymorphism.txt', 'w')
f.write('region_number\tchromosome\tstart\tend\tpi\t'
        'frac_poly\tnum_poly\tnum_total\tnum_strains\tstrain_list\n')
for chrm in gp.chrms:

    chrom_seqs = {}
    for region_number in shared_regions.keys():
        if shared_regions[region_number]['chromosome'] != chrm:
            continue
コード例 #6
0
import os
from align.align_helpers import get_strains, flatten
import global_params as gp

# get all non-reference strains of cerevisiae and paradoxus
s = get_strains(flatten(gp.non_ref_dirs.values()))

gp_dir = '../'
a = []
if gp.resume_alignment:
    a = os.listdir(gp_dir + gp.alignments_dir)

# need to add this on the start of each command because os.system()
# creates a new shell instance every time
cmd_string_start = 'export MUGSY_INSTALL=' + gp.mugsy_install_path + '; '
cmd_string_start += 'export PATH=$PATH:$MUGSY_INSTALL:$MUGSY_INSTALL/mapping; '
cmd_string_start += 'export PERL5LIB=$MUGSY_INSTALL/perllibs; '

ref_prefix = '_'.join(gp.alignment_ref_order) + '_'
ref_dirs = [gp.ref_dir[ref] for ref in gp.alignment_ref_order]

for strain, d in s:
    print(strain)

    cmd_string = cmd_string_start

    for chrm in [gp.chrms[-1]]:
        align_fn = ref_prefix + strain + '_chr' + chrm + gp.alignment_suffix
        # if we don't already have an alignment for this strain/chromosome,
        # then make one
        if align_fn not in a:
# strain
regions_by_chrm_and_strain = dict(
    zip(gp.chrms, [{} for i in range(len(gp.chrms))]))
fn_regions = gp.analysis_out_dir_absolute + tag + '/' + \
             'introgressed_blocks_filtered_par_' + tag + '_summary_plus.txt'
d, labels = read_table.read_table_rows(fn_regions, '\t')

for region in d:
    chrm = d[region]['chromosome']
    strain = d[region]['strain']
    if strain not in regions_by_chrm_and_strain[chrm]:
        regions_by_chrm_and_strain[chrm][strain] = []
    regions_by_chrm_and_strain[chrm][strain].append(
        (int(d[region]['start']), int(d[region]['end'])))
# read in all strains
strain_dirs = align_helpers.get_strains(gp.non_ref_dirs[gp.master_ref])
num_strains = len(strain_dirs)

# read in genes in reference sequence into dictionary keyed by
# chromosome
ref_genes = {}
for chrm in gp.chrms:
    ref_genes[chrm] = []
    f = open(
        gp.analysis_out_dir_absolute + gp.master_ref + '_chr' + chrm +
        '_genes.txt', 'r')
    line = f.readline()
    while line != '':
        line = line[:-1].split('\t')
        ref_genes[chrm].append((int(line[1]), int(line[2])))
        line = f.readline()