def test_flatten():
    assert helper.flatten([]) == []
    assert helper.flatten([[]]) == []
    assert helper.flatten([[1]]) == [1]
    assert helper.flatten([[2], [1]]) == [2, 1]
    assert helper.flatten([[1, 2], [1]]) == [1, 2, 1]
Ejemplo n.º 2
0
            d = diffs_per_site(seqs[keys[i]].lower(), seqs[keys[j]].lower())
            if d != 'NA':
                num += d
                den += 1
    if den == 0:
        return 'NA'
    return float(num) / den


# read in shared regions
shared_regions, _ = \
    read_table.read_table_rows('shared_introgression_nonsingleton_list.txt',
                               '\t')

# read in strain dirs information
s = align_helpers.get_strains(align_helpers.flatten(gp.non_ref_dirs.values()))
strain_dirs = dict(s)

# for each shared region:
# - calculate fraction of sites that are polymorphic among introgressed strains
# - for each introgressed strain, calculate:
#   - number of unique variants among introgressed strains (or all strains?)
f = open('shared_introgression_nonsingleton_polymorphism.txt', 'w')
f.write('region_number\tchromosome\tstart\tend\tpi\t'
        'frac_poly\tnum_poly\tnum_total\tnum_strains\tstrain_list\n')
for chrm in gp.chrms:

    chrom_seqs = {}
    for region_number in shared_regions.keys():
        if shared_regions[region_number]['chromosome'] != chrm:
            continue
def test_flatten():
    assert helper.flatten([]) == []
    assert helper.flatten([[]]) == []
    assert helper.flatten([[1]]) == [1]
    assert helper.flatten([[2], [1]]) == [2, 1]
    assert helper.flatten([[1, 2], [1]]) == [1, 2, 1]
Ejemplo n.º 4
0
import os
from align.align_helpers import get_strains, flatten
import global_params as gp

# get all non-reference strains of cerevisiae and paradoxus
s = get_strains(flatten(gp.non_ref_dirs.values()))

gp_dir = '../'
a = []
if gp.resume_alignment:
    a = os.listdir(gp_dir + gp.alignments_dir)

# need to add this on the start of each command because os.system()
# creates a new shell instance every time
cmd_string_start = 'export MUGSY_INSTALL=' + gp.mugsy_install_path + '; '
cmd_string_start += 'export PATH=$PATH:$MUGSY_INSTALL:$MUGSY_INSTALL/mapping; '
cmd_string_start += 'export PERL5LIB=$MUGSY_INSTALL/perllibs; '

ref_prefix = '_'.join(gp.alignment_ref_order) + '_'
ref_dirs = [gp.ref_dir[ref] for ref in gp.alignment_ref_order]

for strain, d in s:
    print(strain)

    cmd_string = cmd_string_start

    for chrm in [gp.chrms[-1]]:
        align_fn = ref_prefix + strain + '_chr' + chrm + gp.alignment_suffix
        # if we don't already have an alignment for this strain/chromosome,
        # then make one
        if align_fn not in a:
Ejemplo n.º 5
0

# region_start = 787000
# region_end = 794000
# chrm = 'II'
region_start = 917571 - 100
region_end = 921647 + 100
chrm = 'IV'
region_length = region_end - region_start + 1

# ======
# get strains
# ======

strain_dirs = align_helpers.get_strains(
    align_helpers.flatten(gp.non_ref_dirs.values()))
num_strains = len(strain_dirs)


# ======
# loop through all strains, getting appropriate sequence
# ======

# master reference and other reference seqs
master_ref = gp.alignment_ref_order[0]
master_fn = gp.ref_dir[master_ref] + gp.ref_fn_prefix[master_ref] + '_chr' + \
            chrm + gp.fasta_suffix
master_seq = read_fasta.read_fasta(master_fn)[1][0][
    region_start:region_end+1].lower()