Beispiel #1
0
def procress_emit(qinput, qoutput, lock, pileup_prefix, parser_parameters, n,
                  p_neutral, f_sel1, f_sel2, ancestral):
    qualityEncoding = parser_parameters[0]
    minQual = parser_parameters[1]
    minCount = parser_parameters[2]
    minCoverage = parser_parameters[3]
    maxCoverage = parser_parameters[4]

    #creation of the parser object
    if ancestral == "provided":
        parser = pp.Pileup_parser_provided(qualityEncoding, minQual, minCount,
                                           minCoverage, maxCoverage)
    elif ancestral == "unknown":
        parser = pp.Pileup_parser_folded(qualityEncoding, minQual, minCount,
                                         minCoverage, maxCoverage)
    else:
        parser = pp.Pileup_parser_ref(qualityEncoding, minQual, minCount,
                                      minCoverage, maxCoverage)
    f = pp.Format()

    pileup = pp.openPileup(pileup_prefix, 'r')
    for item in iter(qinput.get, 'STOP'):
        l = []
        pileup.seek(item[0])
        for i in range(item[1]):
            l.append(pileup.readline())
        #...
        p = A()
        for l_item in l:
            parsed = parser.get_pileup_parser(l_item)
            if parsed['valid'] == 1:
                info = f.format('info', parsed)
                if info.split()[5] != "N":
                    unfolded = int(info.split()[7])
                    votemp = np.fromstring(f.format('freq', parsed),
                                           dtype=int,
                                           sep=' ')
                    SE = np.fromstring(f.format('qual', parsed),
                                       dtype=float,
                                       sep=' ')
                    SEtemp = 10**(-SE / 10)
                    pseg = prob_cond_true_freq(n, votemp, SEtemp, unfolded)
                    votemp = np.zeros(len(votemp))
                    p0 = prob_cond_true_freq(n, votemp, SEtemp, unfolded)
                    E = comp_emit_BF_3_SEG(n, pseg, p0, p_neutral, f_sel1,
                                           f_sel2, unfolded)
                    if E[0] > 0 and E[1] > 0 and E[2] > 0:
                        p.list_chro.append(info.split()[0])
                        p.list_pos.append(int(info.split()[1]))
                        p.list_E.append(E)
                #...
            #...
        #...
        if len(p.list_E
               ) != 0:  #in case that all the lines parsed are not valid
            qoutput.put(p)
        #...
    #...
    pileup.close()
Beispiel #2
0
def process_probCond(qinput, qoutput, lock, pileup_prefix, parser_parameters,
                     ratio, n, ancestral):
    pileup = pp.openPileup(pileup_prefix, 'r')
    qualityEncoding = parser_parameters[0]
    minQual = parser_parameters[1]
    minCount = parser_parameters[2]
    minCoverage = parser_parameters[3]
    maxCoverage = parser_parameters[4]

    #creation of the parser object
    if ancestral == "provided":
        parser = pp.Pileup_parser_provided(qualityEncoding, minQual, minCount,
                                           minCoverage, maxCoverage)
    elif ancestral == "unknown":
        parser = pp.Pileup_parser_folded(qualityEncoding, minQual, minCount,
                                         minCoverage, maxCoverage)
    else:
        parser = pp.Pileup_parser_ref(qualityEncoding, minQual, minCount,
                                      minCoverage, maxCoverage)
    f = pp.Format()

    for item in iter(qinput.get, 'STOP'):
        l = []
        lock.acquire()
        pileup.seek(item[0])
        for i in range(item[1]):
            l.append(pileup.readline())
        #...
        lock.release()
        p_list = []
        for l_item in l:
            parsed = parser.get_pileup_parser(l_item)
            if parsed['valid'] == 1:
                if bernoulli.rvs(1. / ratio) == 1:
                    info = f.format('info', parsed)
                    unfolded = int(info.split()[7])
                    SE = np.fromstring(f.format('qual', parsed),
                                       dtype=float,
                                       sep=' ')
                    #print SE
                    votemp = np.fromstring(f.format('freq', parsed),
                                           dtype=int,
                                           sep=' ')
                    SEtemp = 10**(-SE / 10)
                    p = prob_cond_true_freq(n, votemp, SEtemp, unfolded)
                    if np.sum(p) > 0:
                        p_list.append(p)
                #....
            #...
        #...
        if len(p_list) != 0:  #in case that all the lines parsed are not valid
            qoutput.put(p_list)

        #...
    #...
    pileup.close()
Beispiel #3
0
            'Error : --k option can\'t be used with the --estim option without the --pred option'
        )
        sys.exit()
    #...
    if spectrum_file and theta:
        print(
            'ERROR : --theta option can\'t be used with the --spectrum-file option.'
        )
        sys.exit()

    if not ancestral == "reference" and not ancestral == "provided" and not ancestral == "unknown":
        print('ERROR : invalid argument for -a')
        sys.exit()

    #pileup file loading
    pileup = openPileup(pileup_prefix, 'r', True)
    #...

    # management of the region if it is specified with the --region option
    if region:
        chro = region.split(':')
        #if only the chromosome is specified in the region ex: --region 2L
        if len(chro) == 1:
            chro = chro[0]
            prefix += '_' + chro
            # region's parameters. 'start' end 'end' are set to 'None
            region = [chro, start, end]
        #chromosome, start and end are specified in the region ex: --region 2L:1000..2000
        else:
            zone = chro[1].split('..')
            #...
Beispiel #4
0
def comp_emit_seg_direct(parser_parameters, region, nProcess, n, prefix,
                         p_neutral, pileup_prefix, ancestral):

    emit = open(prefix + '.segemit', 'w')
    lock = Lock()
    task_queue = Queue()
    done_queue = Queue()
    block = 10000
    pileup = pp.openPileup(pileup_prefix, 'rb')

    # computation of cond prob for segsites
    if region:
        chro = region[0]
        start = region[1]
        end = region[2]
        offset_default = pileup.tell()
        pileup_line = pileup.readline()
        a = pileup_line.split()[0]
        while (a != chro):
            offset_default = pileup.tell()
            pileup_line = pileup.readline()
            try:
                a = pileup_line.split()[0]
            except IndexError:
                #if the pileup_line can't be splited, that's the end of the file
                print('ERROR : chro %s not found' % (chro))
                sys.exit()
            #...
        #...
        if start:
            a = int(pileup_line.split()[1])
            b = pileup_line.split()[0]
            if a > end:
                print('ERROR : interval\'s positions not found.')
                sys.exit()
            #...
            while a < start and b == chro:
                offset_default = pileup.tell()
                pileup_line = pileup.readline()
                try:
                    a = int(pileup_line.split()[1])
                    b = pileup_line.split()[0]
                except IndexError:
                    #if the pileup_line can't be splited, that's the end of the file
                    print('ERROR : interval\'s positions not found.')
                #...
            #...
            if b != chro:
                print b
                print('ERROR : interval\'s positions not found.')
                sys.exit()
        #...
        offset_table = [offset_default]
        nbLine = 0
        split_pileup = pileup_line.split()
        while split_pileup[0] == chro:
            if start:
                if int(split_pileup[1]) > end:
                    break
                #...
            #...
            nbLine += 1
            if nbLine % block == 0:
                offset_table.append(pileup.tell())
            #...
            pileup_line = pileup.readline()
            split_pileup = pileup_line.split()
            if len(split_pileup) == 0:
                break
            #...
        #...
    #...
    else:
        offset_table = [0]
        nbLine = 0
        pileup_line = pileup.readline()
        while (pileup_line !=
               ''):  #if pileup_line == '', that's the end of the file
            nbLine += 1
            if nbLine % block == 0:
                offset_table.append(pileup.tell())
            #...
            pileup_line = pileup.readline()
        #...
    #...
    pileup.close()
    coeff1 = 0.7
    coeff2 = 0.2
    f_sel1 = proba_nielsen(n, p_neutral, coeff1)
    print('f_sel1 loaded')
    f_sel2 = proba_nielsen(n, p_neutral, coeff2)
    print('f_sel2 loaded')

    for i in range(nProcess):
        p = Process(target=procress_emit,
                    args=(task_queue, done_queue, lock, pileup_prefix,
                          parser_parameters, n, p_neutral, f_sel1, f_sel2,
                          ancestral)).start()
    #...

    #for each offset expept the last one
    for offset in offset_table[:-1]:
        task_queue.put([offset, block])
    #...

    #management of the last line_block
    if nbLine % block != 0:
        task_queue.put([offset_table[-1], nbLine % block])
    #...
    del offset_table
    del f_sel1
    del f_sel2

    for i in range(nProcess):
        task_queue.put('STOP')
    #...
    while task_queue.qsize() != 0:
        pass
    #...
    mat = []
    for i in range(done_queue.qsize()):
        mat.append(done_queue.get())
    #...
    if len(mat) > 1:
        quickSort(mat)
    #...

    for item in mat:
        for i in range(len(item.list_pos)):
            emit.write(item.list_chro[i] + ' ' + str(item.list_pos[i]) + ' ' +
                       str(item.list_E[i][0]) + ' ' + str(item.list_E[i][1]) +
                       ' ' + str(item.list_E[i][2]) + '\n')
        #...
    #...
    emit.close()
Beispiel #5
0
def prob_cond(parser_parameters, region, theta, nProcess, ratio, n, prefix,
              pileup_prefix, ancestral):
    lock = Lock()
    task_queue = Queue()
    done_queue = Queue()
    block = 10000
    pileup = pp.openPileup(pileup_prefix, 'rb')

    if region:
        chro = region[0]
        start = region[1]
        end = region[2]
        offset_default = pileup.tell()
        pileup_line = pileup.readline()
        a = pileup_line.split()[0]
        while (a != chro):
            offset_default = pileup.tell()
            pileup_line = pileup.readline()
            try:
                a = pileup_line.split()[0]
            except IndexError:
                #if the pileup_line can't be splited, that's the end of the file
                print('ERROR : chro %s not found' % (chro))
                sys.exit()
            #...
        #...
        if start:
            a = int(pileup_line.split()[1])
            b = pileup_line.split()[0]
            if a > end:
                print('ERROR : interval\'s positions not found.')
                sys.exit()
            #...
            while a < start and b == chro:
                offset_default = pileup.tell()
                pileup_line = pileup.readline()
                try:
                    a = int(pileup_line.split()[1])
                    b = pileup_line.split()[0]
                except IndexError:
                    #if the pileup_line can't be splited, that's the end of the file
                    print('ERROR : interval\'s positions not found.')
                #...
            #...
            if b != chro:
                print('ERROR : interval\'s positions not found.')
                sys.exit()
        #...
        offset_table = [offset_default]
        nbLine = 0
        split_pileup = pileup_line.split()
        while split_pileup[0] == chro:
            if start:
                if int(split_pileup[1]) > end:
                    break
                #...
            #...
            nbLine += 1
            if nbLine % block == 0:
                offset_table.append(pileup.tell())
            #...
            pileup_line = pileup.readline()
            split_pileup = pileup_line.split()
            if len(split_pileup) == 0:
                break
            #...
        #...
    #...
    else:
        offset_table = [0]
        nbLine = 0
        pileup_line = pileup.readline()
        while (pileup_line !=
               ''):  #if pileup_line == '', that's the end of the file
            nbLine += 1
            if nbLine % block == 0:
                offset_table.append(pileup.tell())
            #...
            pileup_line = pileup.readline()
        #...
    #...
    pileup.close()

    #for each offset except the last one
    for offset in offset_table[:-1]:
        task_queue.put([offset, block])
    #...

    #management of the last line_block
    if nbLine % block != 0:
        task_queue.put([offset_table[-1], nbLine % block])
    #...

    del offset_table

    for i in range(nProcess):
        task_queue.put('STOP')
    #...

    for i in range(nProcess):
        p = Process(target=process_probCond,
                    args=(task_queue, done_queue, lock, pileup_prefix,
                          parser_parameters, ratio, n, ancestral)).start()
    #...

    while task_queue.qsize() != 0:
        pass
    #...

    p_neutral = []
    for i in range(done_queue.qsize()):
        p_neutral += done_queue.get()
    #...

    p_neutral = np.array(p_neutral)
    p_neutral = comp_spectrum(p_neutral, n, theta, ancestral)
    np.savetxt(prefix + '.spectrum',
               np.array([p_neutral]),
               delimiter=' ',
               fmt='%.6e')
    return p_neutral
Beispiel #6
0
def process_estim(qinput, qoutput, lock, parser_parameters, pileup_prefix, n,
                  p_neutral, ancestral):
    print 'process starts'

    pileup = pp.openPileup(pileup_prefix, 'r')

    qualityEncoding = parser_parameters[0]
    minQual = parser_parameters[1]
    minCount = parser_parameters[2]
    minCoverage = parser_parameters[3]
    maxCoverage = parser_parameters[4]

    #creation of the parser object
    if ancestral == "provided":
        parser = pp.Pileup_parser_provided(qualityEncoding, minQual, minCount,
                                           minCoverage, maxCoverage)
    elif ancestral == "unknown":
        parser = pp.Pileup_parser_folded(qualityEncoding, minQual, minCount,
                                         minCoverage, maxCoverage)
    else:
        parser = pp.Pileup_parser_ref(qualityEncoding, minQual, minCount,
                                      minCoverage, maxCoverage)
    f = pp.Format()

    for item in iter(qinput.get, 'STOP'):
        l = []
        lock.acquire()
        pileup.seek(item[0])
        for i in range(item[1]):
            l.append(pileup.readline())
        #...
        lock.release()
        estim_tab = A()
        for l_item in l:
            parsed = parser.get_pileup_parser(l_item)
            if parsed['valid'] == 1:
                info = f.format('info', parsed)
                unfolded = int(info.split()[7])
                SE = np.fromstring(f.format('qual', parsed),
                                   dtype=float,
                                   sep=' ')
                votemp = np.fromstring(f.format('freq', parsed),
                                       dtype=int,
                                       sep=' ')
                SEtemp = 10**(-SE / 10)
                estim_tab.list_chro.append(info.split()[0])
                estim_tab.list_pos.append(int(info.split()[1]))
                estim_tab.list_anc.append(info.split()[4])
                estim_tab.list_der.append(info.split()[5])
                if unfolded == 1:
                    estim_tab.list_u.append(
                        np.argmax(p_neutral *
                                  prob_cond_true_freq(n, votemp, SEtemp, 1)))
                else:
                    estim_tab.list_u.append(
                        np.argmax((p_neutral + p_neutral[::-1]) *
                                  prob_cond_true_freq(n, votemp, SEtemp, 1)))
                    Ltemp = len(estim_tab.list_u) - 1
                    if estim_tab.list_u[Ltemp] > (n / 2):
                        estim_tab.list_u[Ltemp] = n - estim_tab.list_u[Ltemp]
                        estim_tab.list_anc[Ltemp] = info.split()[5]
                        estim_tab.list_der[Ltemp] = info.split()[4]
            #...
        #...
        #print '+1'
        if len(estim_tab.list_u
               ) != 0:  #in case that all the lines parsed are not valid
            qoutput.put(estim_tab)
        #...
    #...
    print 'process stops'
    pileup.close()
Beispiel #7
0
def estimation(parser_parameters, region, nProcess, n, prefix, p_neutral,
               pileup_prefix, ancestral):
    lock = Lock()
    task_queue = Queue()
    done_queue = Queue()
    block = 10000
    pileup = pp.openPileup(pileup_prefix, 'r')

    if region:
        chro = region[0]
        start = region[1]
        end = region[2]
        offset_default = pileup.tell()
        pileup_line = pileup.readline()
        a = pileup_line.split()[0]
        while (a != chro):
            offset_default = pileup.tell()
            pileup_line = pileup.readline()
            try:
                a = pileup_line.split()[0]
            except IndexError:
                #if the pileup_line can't be splited, that's the end of the file
                print('ERROR : chro %s not found' % (chro))
                sys.exit()
            #...
        #...
        if start:
            a = int(pileup_line.split()[1])
            b = pileup_line.split()[0]
            if a > end:
                print('ERROR : interval\'s positions not found.')
                sys.exit()
            #...
            while a < start and b == chro:
                offset_default = pileup.tell()
                pileup_line = pileup.readline()
                try:
                    a = int(pileup_line.split()[1])
                    b = pileup_line.split()[0]
                except IndexError:
                    #if the pileup_line can't be splited, that's the end of the file
                    print('ERROR : interval\'s positions not found.')
                #...
            #...
            if b != chro:
                print('ERROR : interval\'s positions not found.')
                sys.exit()
        #...
        offset_table = [offset_default]
        nbLine = 0
        split_pileup = pileup_line.split()
        while split_pileup[0] == chro:
            if start:
                if int(split_pileup[1]) > end:
                    break
                #...
            #...
            nbLine += 1
            if nbLine % block == 0:
                offset_table.append(pileup.tell())
            #...
            pileup_line = pileup.readline()
            split_pileup = pileup_line.split()
            if len(split_pileup) == 0:
                break
            #...
        #...
    #...
    else:
        offset_table = [0]
        nbLine = 0
        pileup_line = pileup.readline()
        while (pileup_line !=
               ''):  #if pileup_line == '', that's the end of the file
            nbLine += 1
            if nbLine % block == 0:
                offset_table.append(pileup.tell())
            #...
            pileup_line = pileup.readline()
        #...
    #...

    #for each offset expept the last one
    for offset in offset_table[:-1]:
        task_queue.put([offset, block])
    #...

    #management of the last line_block
    if nbLine % block != 0:
        task_queue.put([offset_table[-1], nbLine % block])
    #...

    for i in range(nProcess):
        task_queue.put('STOP')
    #...

    for i in range(nProcess):
        p = Process(target=process_estim,
                    args=(task_queue, done_queue, lock, parser_parameters,
                          pileup_prefix, n, p_neutral, ancestral)).start()
    #...

    while task_queue.qsize() != 0:
        pass
    #...

    estim = []
    for i in range(done_queue.qsize()):
        estim.append(done_queue.get())
    #...
    if len(estim) > 1:
        quickSort(estim)
    #...
    fic_estim = open(prefix + '.estim', 'w')
    for item in estim:
        for i in range(len(item.list_pos)):
            fic_estim.write(item.list_chro[i] + ' ' + str(item.list_pos[i]) +
                            ' ' + item.list_anc[i] + ' ' + item.list_der[i] +
                            ' ' + str(item.list_u[i]) + '\n')
        #...
    #...
    fic_estim.close()