Example #1
0
def build_module_objects(motif_block, sequence_map, truncate_len=None):
    """Returns module object given a motif_block and sequence_map.
    
        - motif_block is list of lines resulting from calling get_motif_blocks
        - sequence_map is the mapping between Gibbs sequence numbering and 
        sequence id from fasta file.
    """
    #Get motif id
    motif_id = motif_block[0].strip().split()[-1]

    #Get motif_list
    motif_list = get_motif_sequences(motif_block)
    #Get motif p-value
    motif_p = get_motif_p_value(motif_block)
    #Guess alphabet from motif sequences
    alphabet = guess_alphabet(motif_list)
    
    #Create Module object(s)
    all_modules = {}

    # DISABLED FOR NOW
    #module_keys = set([x[4] for x in motif_list])
    module_keys = ["1"] 

    for k in module_keys:
        cur_mod = Module({}, Alphabet=alphabet)
        cur_mod.Pvalue = motif_p
        cur_mod.ID = motif_id + k
        all_modules[k] = cur_mod 

    for motif in motif_list:

        seq_id = str(sequence_map[motif[0]])

        if truncate_len:
            seq_id = seq_id[:truncate_len]
        
        start = motif[1]
        seq = motif[2]
        sig = motif[3]
        #motif_num = motif[4]
        motif_num = "1" 
        
        #Create Location object
        location = Location(seq_id, start, start + len(seq))
        #Create ModuleInstance
        mod_instance = ModuleInstance(seq,location,sig)
        cur_key = (seq_id,start)
        all_modules[motif_num][(seq_id,start)]=mod_instance

    for gmod in all_modules.values():
        yield gmod 
Example #2
0
def extractModuleData(module_data, alphabet):
    """Creates Module object given module_data list.

        - Only works on 1 module at a time: only pass in data from one module.

    """
    # Create Module object
    meme_module = Module({}, Alphabet=alphabet)

    # Only keep first 3 elements of the list
    module_data = module_data[:3]

    # Get Module general information: module_data[0]
    # Only need to keep first line
    general_dict = getModuleGeneralInfo(module_data[0][0])
    # Get Multilevel Consensus Sequence
    meme_module.ConsensusSequence = getConsensusSequence(module_data[1])
    # Pull out desired values from dict
    module_length = int(general_dict["width"])
    meme_module.Llr = int(general_dict["llr"])
    meme_module.Evalue = float(general_dict["E-value"])
    meme_module.ID = general_dict["MOTIF"]

    # Get ModuleInstances: module_data[2]
    instance_data = module_data[2][4:-2]
    for i in xrange(len(instance_data)):
        instance_data[i] = instance_data[i].split()
    # Create a ModuleInstance object and add it to Module for each instance
    for instance in instance_data:
        seqId = instance[0]
        start = int(instance[1]) - 1
        Pvalue = float(instance[2])
        sequence = instance[4]
        # Create Location object for ModuleInstance
        location = Location(seqId, start, start + module_length)
        # Create ModuleInstance
        mod_instance = ModuleInstance(sequence, location, Pvalue)
        # Add ModuleInstance to Module
        meme_module[(seqId, start)] = mod_instance

    return meme_module