Exemple #1
0
def loadSeqAndChain(seq_file,
                    k,
                    suppress_save=False,
                    mc_file=None,
                    retain_n=False):
    """Load the sequence and the Markov Chain List.
    Load the MC list from a file if it exists.  If not, create the chain
    and save it to the file for the next use (skip the save if suppressed)."""
    template_seq = str(SeqIO.read(seq_file, 'fasta').seq)

    # Cut out all the maximul prefix and suffix of ambiguity codes -- which will have no effect on the Markov chain construction.
    if not retain_n:
        start = 0
        while template_seq[start] not in bases:
            start += 1
        finish = len(template_seq)
        while template_seq[finish - 1] not in bases:
            finish -= 1
        coord_adjust = start
        template_seq = template_seq[start:finish]
    else:
        coord_adjust = 0

    mc_file = re.sub("\.(fa|fasta)$", ".pmc%d" %
                     (k), seq_file) if mc_file is None else mc_file
    if os.path.exists(mc_file):
        markov_list = markov_gen.read_pmck(mc_file)
    else:
        markov_list = markov_gen.MarkovArray(k, template_seq)
        if not suppress_save:
            markov_gen.pickle_markov_list(markov_list, mc_file)

    return template_seq, markov_list, coord_adjust
def loadSeqAndChain(seq_file, k, suppress_save = False, mc_file = None, retain_n = False):
    """Load the sequence and the Markov Chain List.
    Load the MC list from a file if it exists.  If not, create the chain
    and save it to the file for the next use (skip the save if suppressed).
    Parameters:
    * seq_file: The sequence file.
    * k: The order of the markov chain.
    * suppress_save: Boolean.  If true, don't save the generated MC file.  (Can't imagine why we would want this.)
    * mc_file: The name of the mc_file to use.  (Derive from seq_file if not provided.)
    * retrain_n: If false, we will be cutting of the largest possible N* prefix and suffix.
    Return: A tuple:
    1. The chromosome sequence.
    2. The markov chain
    3. Where we will start in the template sequence (in case a prefix has been removed).
    4. Where we will end in the templace sequence (in case a suffix has been removed).
    """

    template_seq = str(SeqIO.read(seq_file, 'fasta').seq)

    # Cut out all the maximul prefix and suffix of ambiguity codes -- which will have no effect on the Markov chain construction.
    start, finish = 0, len(template_seq)
    if not retain_n:   # Cut down the chromsome to the first real base at each end -- eliminate trailing Ns.
        while template_seq[start] not in bases: start += 1
        while template_seq[finish-1] not in bases: finish -= 1
    
    mc_file = re.sub("\.(fa|fasta)$", ".pmc%d" % (k), seq_file) if mc_file is None else mc_file
    if os.path.exists(mc_file):
        markov_list = markov_gen.read_pmck(mc_file)
    else:
        markov_list = markov_gen.MarkovArray(k, template_seq)
        if not suppress_save:
            markov_gen.pickle_markov_list(markov_list, mc_file)

    return template_seq, markov_list, start, finish
def loadSeqAndChain(seq_file, k, suppress_save = False, mc_file = None, retain_n = False):
    """Load the sequence and the Markov Chain List.
    Load the MC list from a file if it exists.  If not, create the chain
    and save it to the file for the next use (skip the save if suppressed)."""
    template_seq = str(SeqIO.read(seq_file, 'fasta').seq)

    # Cut out all the maximul prefix and suffix of ambiguity codes -- which will have no effect on the Markov chain construction.
    if not retain_n:
        start = 0
        while template_seq[start] not in bases: start += 1
        finish = len(template_seq)
        while template_seq[finish-1] not in bases: finish -= 1
        coord_adjust = start
        template_seq = template_seq[start:finish]
    else:
        coord_adjust = 0

    
    mc_file = re.sub("\.(fa|fasta)$", ".pmc%d" % (k), seq_file) if mc_file is None else mc_file
    if os.path.exists(mc_file):
        markov_list = markov_gen.read_pmck(mc_file)
    else:
        markov_list = markov_gen.MarkovArray(k, template_seq)
        if not suppress_save:
            markov_gen.pickle_markov_list(markov_list, mc_file)

    return template_seq, markov_list, coord_adjust
def loadSeqAndChain(seq_file,
                    k,
                    suppress_save=False,
                    mc_file=None,
                    retain_n=False):
    """Load the sequence and the Markov Chain List.
    Load the MC list from a file if it exists.  If not, create the chain
    and save it to the file for the next use (skip the save if suppressed).
    Parameters:
    * seq_file: The sequence file.
    * k: The order of the markov chain.
    * suppress_save: Boolean.  If true, don't save the generated MC file.  (Can't imagine why we would want this.)
    * mc_file: The name of the mc_file to use.  (Derive from seq_file if not provided.)
    * retrain_n: If false, we will be cutting of the largest possible N* prefix and suffix.
    Return: A tuple:
    1. The chromosome sequence.
    2. The markov chain
    3. Where we will start in the template sequence (in case a prefix has been removed).
    4. Where we will end in the templace sequence (in case a suffix has been removed).
    """

    template_seq = str(SeqIO.read(seq_file, 'fasta').seq)

    # Cut out all the maximul prefix and suffix of ambiguity codes -- which will have no effect on the Markov chain construction.
    start, finish = 0, len(template_seq)
    if not retain_n:  # Cut down the chromsome to the first real base at each end -- eliminate trailing Ns.
        while template_seq[start] not in bases:
            start += 1
        while template_seq[finish - 1] not in bases:
            finish -= 1

    mc_file = re.sub("\.(fa|fasta)$", ".pmc%d" %
                     (k), seq_file) if mc_file is None else mc_file
    if os.path.exists(mc_file):
        markov_list = markov_gen.read_pmck(mc_file)
    else:
        markov_list = markov_gen.MarkovArray(k, template_seq)
        if not suppress_save:
            markov_gen.pickle_markov_list(markov_list, mc_file)

    return template_seq, markov_list, start, finish