Beispiel #1
0
def get_consensus_with_trim( c_input ):
    seqs, seed_id, config = c_input
    min_cov, K, local_match_count_window, local_match_count_threshold, max_n_read, min_idt, edge_tolerance, trim_size = config
    trim_seqs = []
    seed = seqs[0]
    for seq in seqs[1:]:
        aln_data = get_alignment(seq, seed, edge_tolerance)
        s1, e1, s2, e2, aln_size, aln_score, c_status = aln_data
        if c_status == "none":
            continue
        if aln_score > 1000 and e1 - s1 > 500:
            e1 -= trim_size
            s1 += trim_size
            trim_seqs.append( (e1-s1, seq[s1:e1]) )
    trim_seqs.sort(key = lambda x:-x[0]) #use longest alignment first
    trim_seqs = [x[1] for x in trim_seqs]
        
    if len(trim_seqs) > max_n_read:
        trim_seqs = trim_seqs[:max_n_read]

    trim_seqs = [seed] + trim_seqs


    seqs_ptr = (c_char_p * len(trim_seqs))()
    seqs_ptr[:] = trim_seqs
    consensus_data_ptr = falcon.generate_consensus( seqs_ptr, len(trim_seqs), min_cov, K, 
                                               local_match_count_window, local_match_count_threshold, min_idt )
    consensus = string_at(consensus_data_ptr[0].sequence)[:]
    eff_cov = consensus_data_ptr[0].eff_cov[:len(consensus)]
    falcon.free_consensus_data( consensus_data_ptr )
    del seqs_ptr
    return consensus, seed_id
Beispiel #2
0
def get_consensus_without_trim( c_input ):
    seqs, seed_id, config = c_input
    min_cov, K, max_n_read, min_idt, edge_tolerance, trim_size, min_cov_aln, max_cov_aln = config
    if len(seqs) > max_n_read:
        seqs = get_longest_reads(seqs, max_n_read, max_cov_aln, sort=True)
    seqs_ptr = (c_char_p * len(seqs))()
    seqs_ptr[:] = seqs
    consensus_data_ptr = falcon.generate_consensus( seqs_ptr, len(seqs), min_cov, K, min_idt )

    consensus = string_at(consensus_data_ptr[0].sequence)[:]
    eff_cov = consensus_data_ptr[0].eff_cov[:len(consensus)]
    falcon.free_consensus_data( consensus_data_ptr )
    del seqs_ptr
    return consensus, seed_id
Beispiel #3
0
def get_consensus_without_trim( c_input ):
    seqs, seed_id, config = c_input
    min_cov, K, local_match_count_window, local_match_count_threshold, max_n_read, min_idt, edge_tolerance, trim_size = config
    if len(seqs) > max_n_read:
        seqs = seqs[:max_n_read]
    seqs_ptr = (c_char_p * len(seqs))()
    seqs_ptr[:] = seqs
    consensus_data_ptr = falcon.generate_consensus( seqs_ptr, len(seqs), min_cov, K, 
                                                    local_match_count_window, local_match_count_threshold, min_idt )

    consensus = string_at(consensus_data_ptr[0].sequence)[:]
    eff_cov = consensus_data_ptr[0].eff_cov[:len(consensus)]
    falcon.free_consensus_data( consensus_data_ptr )
    del seqs_ptr
    return consensus, seed_id
Beispiel #4
0
def get_consensus_without_trim(c_input):
    seqs, seed_id, config = c_input
    min_cov, K, max_n_read, min_idt, edge_tolerance, trim_size, min_cov_aln, max_cov_aln = config
    if len(seqs) > max_n_read:
        seqs = get_longest_reads(seqs, max_n_read, max_cov_aln, sort=True)
    seqs_ptr = (c_char_p * len(seqs))()
    seqs_ptr[:] = seqs
    consensus_data_ptr = falcon.generate_consensus(seqs_ptr, len(seqs),
                                                   min_cov, K, min_idt)

    consensus = string_at(consensus_data_ptr[0].sequence)[:]
    eff_cov = consensus_data_ptr[0].eff_cov[:len(consensus)]
    falcon.free_consensus_data(consensus_data_ptr)
    del seqs_ptr
    return consensus, seed_id
Beispiel #5
0
def get_consensus_without_trim(c_input):
    seqs, seed_id, config = c_input
    LOG.debug('Starting get_consensus_without_trim(len(seqs)=={}, seed_id={})'.format(
        len(seqs), seed_id))
    min_cov, K, max_n_read, min_idt, edge_tolerance, trim_size, min_cov_aln, max_cov_aln = config
    if len(seqs) > max_n_read:
        seqs = get_longest_reads(seqs, max_n_read, max_cov_aln, sort=True)
    seqs_ptr = (c_char_p * len(seqs))()
    seqs_ptr[:] = seqs
    consensus_data_ptr = falcon.generate_consensus(
        seqs_ptr, len(seqs), min_cov, K, min_idt)
    assert consensus_data_ptr
    consensus = string_at(consensus_data_ptr[0].sequence)[:]
    eff_cov = consensus_data_ptr[0].eff_cov[:len(consensus)]
    LOG.debug(' Freeing1')
    falcon.free_consensus_data(consensus_data_ptr)
    del seqs_ptr
    LOG.debug(' Finishing get_consensus_without_trim(seed_id={})'.format(seed_id))
    return consensus, seed_id
Beispiel #6
0
def get_consensus_without_trim(c_input):
    seqs, seed_id, config = c_input
    LOG.debug('Starting get_consensus_without_trim(len(seqs)=={}, seed_id={})'.
              format(len(seqs), seed_id))
    min_cov, K, max_n_read, min_idt, edge_tolerance, trim_size, min_cov_aln, max_cov_aln = config
    if len(seqs) > max_n_read:
        seqs = get_longest_reads(seqs, max_n_read, max_cov_aln, sort=True)
    seqs_ptr = (c_char_p * len(seqs))()
    seqs_ptr[:] = seqs
    consensus_data_ptr = falcon.generate_consensus(seqs_ptr, len(seqs),
                                                   min_cov, K, min_idt)

    consensus = string_at(consensus_data_ptr[0].sequence)[:]
    eff_cov = consensus_data_ptr[0].eff_cov[:len(consensus)]
    LOG.debug(' Freeing1')
    falcon.free_consensus_data(consensus_data_ptr)
    del seqs_ptr
    LOG.debug(
        ' Finishing get_consensus_without_trim(seed_id={})'.format(seed_id))
    return consensus, seed_id
Beispiel #7
0
def get_consensus_with_trim(c_input):
    seqs, seed_id, config = c_input
    LOG.debug(
        'Starting get_consensus_with_trim(len(seqs)=={}, seed_id={})'.format(
            len(seqs), seed_id))
    min_cov, K, max_n_read, min_idt, edge_tolerance, trim_size, min_cov_aln, max_cov_aln = config
    trim_seqs = []
    seed = seqs[0]
    for seq in seqs[1:]:
        aln_data = get_alignment(seq, seed, edge_tolerance)
        s1, e1, s2, e2, aln_size, aln_score, c_status = aln_data
        if c_status == "none":
            continue
        if aln_score > 1000 and e1 - s1 > 500:
            e1 -= trim_size
            s1 += trim_size
            trim_seqs.append((e1 - s1, seq[s1:e1]))
    trim_seqs.sort(key=lambda x: -x[0])  # use longest alignment first
    trim_seqs = [x[1] for x in trim_seqs]

    trim_seqs = [seed] + trim_seqs
    if len(trim_seqs[1:]) > max_n_read:
        # seqs already sorted, dont' sort again
        trim_seqs = get_longest_reads(trim_seqs,
                                      max_n_read,
                                      max_cov_aln,
                                      sort=False)

    seqs_ptr = (c_char_p * len(trim_seqs))()
    seqs_ptr[:] = trim_seqs
    consensus_data_ptr = falcon.generate_consensus(seqs_ptr, len(trim_seqs),
                                                   min_cov, K, min_idt)
    consensus = string_at(consensus_data_ptr[0].sequence)[:]
    eff_cov = consensus_data_ptr[0].eff_cov[:len(consensus)]
    LOG.debug(' Freeing2')
    falcon.free_consensus_data(consensus_data_ptr)
    del seqs_ptr
    LOG.debug(' Finishing get_consensus_with_trim(seed_id={})'.format(seed_id))
    return consensus, seed_id
Beispiel #8
0
def get_consensus_core(seqs, min_cov, K, min_idt, allow_external_mapping):
    seqs_ptr = (c_char_p * len(seqs))()
    seqs_ptr[:] = [bytes(val.seq, encoding='ascii')  for val in seqs]

    all_seqs_mapped = False

    if allow_external_mapping:
        all_seqs_mapped = True
        for seq in seqs:
            if not seq.is_mapped:
                all_seqs_mapped = False
                break

    if not all_seqs_mapped:
        LOG.info('Internally mapping the sequences.')
        consensus_data_ptr = falcon.generate_consensus(
            seqs_ptr, len(seqs), min_cov, K, min_idt)

    else:
        LOG.info('Using external mapping coordinates from input.')
        aln_ranges_ptr = (POINTER(falcon_kit.AlnRange) * len(seqs))()
        for i, seq in enumerate(seqs):
            a = falcon_kit.AlnRange(seq.qstart, seq.qend, seq.tstart, seq.tend, (seq.qend - seq.qstart))
            aln_ranges_ptr[i] = pointer(a)
        consensus_data_ptr = falcon.generate_consensus_from_mapping(
            seqs_ptr, aln_ranges_ptr, len(seqs), min_cov, K, min_idt)
        del aln_ranges_ptr

    del seqs_ptr

    if not consensus_data_ptr:
        return ''
    # assert consensus_data_ptr
    consensus = string_at(consensus_data_ptr[0].sequence)[:]
    #eff_cov = consensus_data_ptr[0].eff_cov[:len(consensus)]
    LOG.debug(' Freeing')
    falcon.free_consensus_data(consensus_data_ptr)
    return consensus.decode('ascii')
Beispiel #9
0
def get_consensus_with_trim(c_input):
    seqs, seed_id, config = c_input
    LOG.debug('Starting get_consensus_with_trim(len(seqs)=={}, seed_id={})'.format(
        len(seqs), seed_id))
    min_cov, K, max_n_read, min_idt, edge_tolerance, trim_size, min_cov_aln, max_cov_aln = config
    trim_seqs = []
    seed = seqs[0]
    for seq in seqs[1:]:
        aln_data = get_alignment(seq, seed, edge_tolerance)
        s1, e1, s2, e2, aln_size, aln_score, c_status = aln_data
        if c_status == "none":
            continue
        if aln_score > 1000 and e1 - s1 > 500:
            e1 -= trim_size
            s1 += trim_size
            trim_seqs.append((e1 - s1, seq[s1:e1]))
    trim_seqs.sort(key=lambda x: -x[0])  # use longest alignment first
    trim_seqs = [x[1] for x in trim_seqs]

    trim_seqs = [seed] + trim_seqs
    if len(trim_seqs[1:]) > max_n_read:
        # seqs already sorted, dont' sort again
        trim_seqs = get_longest_reads(
            trim_seqs, max_n_read, max_cov_aln, sort=False)

    seqs_ptr = (c_char_p * len(trim_seqs))()
    seqs_ptr[:] = trim_seqs
    consensus_data_ptr = falcon.generate_consensus(
        seqs_ptr, len(trim_seqs), min_cov, K, min_idt)
    assert consensus_data_ptr
    consensus = string_at(consensus_data_ptr[0].sequence)[:]
    eff_cov = consensus_data_ptr[0].eff_cov[:len(consensus)]
    LOG.debug(' Freeing2')
    falcon.free_consensus_data(consensus_data_ptr)
    del seqs_ptr
    LOG.debug(' Finishing get_consensus_with_trim(seed_id={})'.format(seed_id))
    return consensus, seed_id