Beispiel #1
0
def read_contigs_file(open_contigs_file, start_position=False,taxonomy_info=True):
    """ Read contigs file generated by generate_contigs script"""
    
    contigs = []
    seqs = list(SeqIO.parse(open_contigs_file, "fasta"))
    for seq in seqs:
        if taxonomy_info:
            contig_id_hash = parse_contig_description(seq.description, start_position=start_position)
            contig = DNA(id=contig_id_hash["genome"], seq=str(seq.seq))
            if start_position:
                contig.start_position = contig_id_hash["start_position"]
            contig.family = contig_id_hash["family"]
            contig.genus = contig_id_hash["genus"]
            contig.species = contig_id_hash["species"]
            contig.contig_id = contig_id_hash["contig_id"]
        else:
            contig = DNA(id=seq.id,seq=str(seq.seq))
            contig.contig_id = seq.id
        contigs.append(contig)

    return contigs
def sample_contig(genome, x_st, contig_id, start_position=False):
    """ Generates a contig from genome genome

    :genome - DNA object
    :x_st - SampleSetting object
    :contig_id - The unique id given to this contig"""
    min_length = x_st.contig_min_length
    max_length = x_st.contig_max_length
    l = randint(min_length, max_length)
    gen_l = len(genome.full_seq)
    if x_st.debug_mode:
        start = 0
    else:
        start = randint(0, (gen_l - l))
    end = start + l
    contig = DNA(id=genome.id + " contig", seq=genome.full_seq[start:end])
    contig.contig_id = contig_id
    if start_position:
        contig.start_position = start
    return contig