Ejemplo n.º 1
0
def test_two_insertions_in_one_position():
    assert len(str2sites('16129.1A 16129.2A')) == 2
    assert 16129 == str2sites('16129.1A 16129.2A')[0].position
    assert str2sites('16129.1A 16129.2A')[0].insert in [1,2]
Ejemplo n.º 2
0
def test_two_insertions_in_plus_format():
    assert len(str2sites('16129+AA')) == 2
Ejemplo n.º 3
0
def test_two_insertions_in_plus_format_add16k():
    assert len(str2sites('129+AA',True)) == 2
Ejemplo n.º 4
0
        if hvr2:
            try:
                polys = str2sites(l[hvr2])
                polymorphisms += polys
            except MtconvertError, e:
                errors.append( (line_number, e) )

        if sites:
            for i in range(0,len(sites)):
                site_index = sites[i]
                position   = sites_on_rCRS[i]
                value      = l[site_index].strip().upper()
                if value in ('A','G','C','T'):
                    try:
                        poly = str2sites('%d%s' % (position,value))
                        polymorphisms.append(poly)
                        this_sample_segments.append(position)
                    except MtconvertError, e:
                        errors.append( (line_number, e) )

        coverage = Coverage(*this_sample_segments)

        ###################################################
        # Extract haplogroup
        ###################################################

        hap = None
        if haplogroup is not False:
            hap = l[haplogroup].strip()
Ejemplo n.º 5
0
def test_transition_without_value_add16k():
    assert 16129 == str2sites('129',True)[0].position
    assert 'C' == str2sites('129',True)[0].value
Ejemplo n.º 6
0
def test_commas_spaces_and_semicolons():
    assert 'G' == str2sites('16293G, 16311C; 16129')[0].value
    assert 'C' == str2sites('16293G, 16311C; 16129')[1].value
Ejemplo n.º 7
0
def test_single_substitution_add16k():
    assert 16129 == str2sites('129A',add16k=True)[0].position
    assert 'A' == str2sites('129A',add16k=True)[0].value
Ejemplo n.º 8
0
def test_deletion_in_del_and_value_format():
    assert '-' == str2sites('16129delA')[0].value
Ejemplo n.º 9
0
def test_insertion_in_standard_format():
    assert 1 == str2sites('16129.1A')[0].insert
Ejemplo n.º 10
0
def test_deletion_in_standard_format():
    assert '-' == str2sites('16129d')[0].value
Ejemplo n.º 11
0
def test_deletion_in_standard_format_add16k():
    assert '-' == str2sites('129d',True)[0].value
Ejemplo n.º 12
0
def test_deletion_in_minus_format_add16k():
    assert '-' == str2sites('129-',True)[0].value
Ejemplo n.º 13
0
def test_deletion_in_minus_format():
    assert '-' == str2sites('16129-')[0].value
Ejemplo n.º 14
0
def test_empty_string():
    assert [] == str2sites('')
Ejemplo n.º 15
0
def test_two_insertions_in_one_position_add16k():
    assert len(str2sites('129.1A 129.2A', True)) == 2
    assert 16129 == str2sites('129.1A 129.2A', True)[0].position
    assert str2sites('129.1A 129.2A', True)[0].insert in [1,2]
Ejemplo n.º 16
0
def test_insertion_in_standard_format_add16k():
    assert 1 == str2sites('129.1A',True)[0].insert
Ejemplo n.º 17
0
def test_spaces():
    assert len(str2sites('16293G 16311C')) == 2
    assert 'G' == str2sites('16293G 16311C')[0].value
    assert 'C' == str2sites('16293G 16311C')[1].value
Ejemplo n.º 18
0
def test_single_substitution():
    assert 16129 == str2sites('16129A')[0].position
    assert 'A' == str2sites('16129A')[0].value
Ejemplo n.º 19
0
def test_out_of_order_insertions():
    assert len(str2sites('1.2A 1.1A')) == 2
Ejemplo n.º 20
0
def test_insertion_in_plus_format():
    assert 1 == str2sites('16129+A')[0].insert
Ejemplo n.º 21
0
def test_10_insertions():
    s = '1.1A 1.2A 1.3A 1.4A 1.5A 1.6A 1.7A 1.8A 1.9A 1.10A'
    assert len(str2sites(s)) == 10
Ejemplo n.º 22
0
def test_insertion_in_plus_format_add16k():
    assert 1 == str2sites('129+A',True)[0].insert
Ejemplo n.º 23
0
def load_csv(file,
             header         = 1,     # number of rows to skip for header info
             hvr1           = False, # column number if present
             hvr1_covers    = [16024,16365],
             add16k         = True,  # add 16000 to every hvr1 site?
             hvr2           = False, # column number if present
             hvr2_covers    = [73,340],
             rflps          = False, # column number if present
             rflp_format    = False, # ? how to implement this ?
             sites          = False, # column number(s) if present
             sites_on_rCRS  = [], # matched entry or list for sites columns
             haplogroup     = False, # column number if present
             sample_id      = False, # column number if present
             sample_id_sep  = ',',   # what separates multiple ids?
             haplotype_id   = False, # column number if present
             pop_with_n     = False, # are N's arranged by population?
             n              = False, # column number(s) if present
             population     = False, # column number or name or if pop_with_n is True, 
                                     # names to go with N columns 
             doi            = False,
             pmid           = False,
             ):
    """Load mitochondrial haplotype definitions from csv file.
    
    Column numbers are to be provided in python-standard 0-based counting.
    """

    if population is False:
        population = "Unknown"

    errors = []
    line_number = header

    if sites:
        if len(sites) != len(sites_on_rCRS):
            errors.append((0, MtconvertError("When sites are included, sites_on_rCRS must match")))

    # base coverage is given by hvr1 and hvr2 covers
    segments = list(x for x in [hvr1_covers, hvr2_covers] if x)

    # every sample needs an id,
    #  if it is given in the file, we use that
    #  otherwise create one in the format 's#'
    if sample_id is not False:
        def sample_generator(line):
            sids = line[sample_id].split(sample_id_sep)
            for sid in sids:
                yield (sid, population)
    else:
        def sample_generator(line):
            global s
            count = 0
            pop_names = []
            if pop_with_n:
                max = sum(num(line[x]) for x in n)
                for i in range(len(n)):
                    pop_names = pop_names + [population[i]] * num(line[n[i]])
            elif n:
                max = num(line[n])
                pop_names = [population] * max
            else:
                max = 1
                pop_names = [population]
            while count < max:
                pop = pop_names[count]
                count += 1
                s += 1
                yield ("s%d" % s, pop)

    # start the reader
    reader = csv.reader(open(file, 'rU'))
    samples = []
    for l in list(x for x in reader)[header:]:

        line_number += 1

        ###################################################
        # Read in polymorphism data
        ###################################################

        polymorphisms = []

        this_sample_segments = list(x for x in segments)

        if hvr1:
            try:
                polys = str2sites(l[hvr1], add16k=add16k)
                polymorphisms += polys
            except MtconvertError, e:
                errors.append( (line_number, e) )

        if hvr2:
            try:
                polys = str2sites(l[hvr2])
                polymorphisms += polys
            except MtconvertError, e:
                errors.append( (line_number, e) )
Ejemplo n.º 24
0
def test_transition_without_value():
    assert 16129 == str2sites('16129')[0].position
    assert 'C' == str2sites('16129')[0].value