예제 #1
0
 def test_init(self):
     """Init should properly initialize Module object."""
     module = Module(
         data={(self.modules_no_e[0].Location.SeqId, self.modules_no_e[0].Location.Start): self.modules_no_e[0]}
     )
     self.assertEqual(module.Template, None)
     self.assertEqual(module.Alphabet, ASCII.Alphabet)
     self.assertEqual(module.Pvalue, None)
     self.assertEqual(module.Evalue, None)
     self.assertEqual(module.keys(), [("seq0", 1)])
     self.assertEqual(module.values(), [ModuleInstance(self.sequences[0], self.locations[0], self.Pvalues[0])])
예제 #2
0
 def test_init(self):
     """Init should properly initialize Module object."""
     module = Module(data={(self.modules_no_e[0].Location.SeqId,
                        self.modules_no_e[0].Location.Start): \
                       self.modules_no_e[0]})
     self.assertEqual(module.Template, None)
     self.assertEqual(module.Alphabet, ASCII.Alphabet)
     self.assertEqual(module.Pvalue, None)
     self.assertEqual(module.Evalue, None)
     self.assertEqual(module.keys(),[('seq0',1)])
     self.assertEqual(module.values(),[ModuleInstance(self.sequences[0],
                                             self.locations[0],
                                             self.Pvalues[0])])
예제 #3
0
 def __call__(self, word_length):
     """Builds a dict of all Modules and a list of their order.
     
         - module_dict is {module pattern:Module object}
         - module_order is a list in descending order of their count.
     """
     #Dictionary keying k-word to Module
     self.ModuleDict = {}
     #For each sequence in the alignment
     for key, seq in self.Alignment.items():
         #For each position in seq till end - word_length
         for i in range(0, len(seq) - word_length + 1):
             #Get the current k-word
             word = seq[i:i + word_length].tostring()
             #Create a location object
             location = Location(key, i, i + word_length)
             #Create a ModuleInstance
             curr_instance = ModuleInstance(word, location)
             #Check to see if pattern is already in dict
             if word in self.ModuleDict:
                 #Add instance to Module
                 self.ModuleDict[word][(key, i)] = curr_instance
             #Not in dict
             else:
                 #Create a new module and add to dict
                 self.ModuleDict[word]=Module({(key,i):curr_instance},\
                     MolType=self.MolType)
     #Get list of counts
     module_counts = \
         [(len(mod.Names),word) for word,mod in self.ModuleDict.items()]
     #Sort and put in descending order
     module_counts.sort()
     module_counts.reverse()
     #Get list of only the words in descending order
     self.ModuleOrder = [word for i, word in module_counts]
예제 #4
0
 def test_cmp(self):
     """Module objects should sort properly with __cmp__ overwritten."""
     pvals_sorted = [3e-010, 0.002,
                     0.0020100000000000001,
                     0.0094000000000000004,
                     0.10000000000000001,
                     0.59999999999999998,
                     0.59999999999999998]
     evals_sorted = [.9,
                     .02,
                     .900001,
                     .09,
                     .006,
                     .0000003,
                     .0200000001,
                     ]
     modules = []
     for instance, pvalue, evalue in zip(self.modules_no_e,
                                         self.Pvalues,
                                         self.Evalues):
         modules.append(Module({(instance.Location.SeqId,
                                instance.Location.Start):instance},
                               Pvalue=pvalue,
                               Evalue=evalue))
     modules.sort()
     for ans, p, e in zip(modules, pvals_sorted, evals_sorted):
         self.assertEqual(ans.Pvalue, p)
         self.assertEqual(ans.Evalue, e)
예제 #5
0
def build_module_objects(motif_block, sequence_map, truncate_len=None):
    """Returns module object given a motif_block and sequence_map.
    
        - motif_block is list of lines resulting from calling get_motif_blocks
        - sequence_map is the mapping between Gibbs sequence numbering and 
        sequence id from fasta file.
    """
    #Get motif id
    motif_id = motif_block[0].strip().split()[-1]

    #Get motif_list
    motif_list = get_motif_sequences(motif_block)
    #Get motif p-value
    motif_p = get_motif_p_value(motif_block)
    #Guess alphabet from motif sequences
    alphabet = guess_alphabet(motif_list)

    #Create Module object(s)
    gibbs_module = {}

    module_keys = ["1"]

    for motif in motif_list:

        seq_id = str(sequence_map[motif[0]])

        if truncate_len:
            seq_id = seq_id[:truncate_len]

        start = motif[1]
        seq = motif[2]
        sig = motif[3]
        motif_num = "1"

        #Create Location object
        location = Location(seq_id, start, start + len(seq))
        #Create ModuleInstance
        mod_instance = ModuleInstance(seq, location, sig)
        cur_key = (seq_id, start)

        gibbs_module[(seq_id, start)] = mod_instance
    gibbs_mod = Module(gibbs_module, MolType=alphabet)
    gibbs_mod.Pvalue = motif_p
    gibbs_mod.ID = motif_id + module_keys[0]
    yield gibbs_mod
예제 #6
0
def build_module_objects(motif_block, sequence_map, truncate_len=None):
    """Returns module object given a motif_block and sequence_map.
    
        - motif_block is list of lines resulting from calling get_motif_blocks
        - sequence_map is the mapping between Gibbs sequence numbering and 
        sequence id from fasta file.
    """
    #Get motif id
    motif_id = motif_block[0].strip().split()[-1]

    #Get motif_list
    motif_list = get_motif_sequences(motif_block)
    #Get motif p-value
    motif_p = get_motif_p_value(motif_block)
    #Guess alphabet from motif sequences
    alphabet = guess_alphabet(motif_list)
    
    #Create Module object(s)
    gibbs_module = {}

    module_keys = ["1"]

    for motif in motif_list:

        seq_id = str(sequence_map[motif[0]])

        if truncate_len:
            seq_id = seq_id[:truncate_len]
        
        start = motif[1]
        seq = motif[2]
        sig = motif[3]
        motif_num = "1" 
        
        #Create Location object
        location = Location(seq_id, start, start + len(seq))
        #Create ModuleInstance
        mod_instance = ModuleInstance(seq,location,sig)
        cur_key = (seq_id,start)
        
        gibbs_module[(seq_id,start)]=mod_instance
    gibbs_mod = Module(gibbs_module,MolType=alphabet)
    gibbs_mod.Pvalue = motif_p
    gibbs_mod.ID = motif_id + module_keys[0]
    yield gibbs_mod
예제 #7
0
파일: meme.py 프로젝트: chungtseng/pycogent
def extractModuleData(module_data, alphabet, remap_dict):
    """Creates Module object given module_data list.

        - Only works on 1 module at a time: only pass in data from one module.

    """
    #Create Module object
    meme_module = {}
    
    #Only keep first 3 elements of the list
    module_data = module_data[:3]

    #Get Module general information: module_data[0]
    #Only need to keep first line
    general_dict = getModuleGeneralInfo(module_data[0][0])
    module_length = int(general_dict['width'])

    #Get ModuleInstances: module_data[2]
    instance_data = module_data[2][4:-2]
    for i in xrange(len(instance_data)):
        instance_data[i] = instance_data[i].split()
    #Create a ModuleInstance object and add it to Module for each instance
    for instance in instance_data:
        seqId = remap_dict[instance[0]]
        start = int(instance[1])-1
        Pvalue = float(instance[2])
        sequence = instance[4]
        #Create Location object for ModuleInstance
        location = Location(seqId, start, start + module_length)
        #Create ModuleInstance
        mod_instance = ModuleInstance(sequence,location,Pvalue)

        #Add ModuleInstance to Module
        meme_module[(seqId,start)] = mod_instance
    
    meme_module = Module(meme_module, MolType=alphabet)
    #Get Multilevel Consensus Sequence
    meme_module.ConsensusSequence = getConsensusSequence(module_data[1])
    #Pull out desired values from dict    
    meme_module.Llr = int(general_dict['llr'])
    meme_module.Evalue = float(general_dict['E-value'])
    meme_module.ID = general_dict['MOTIF']

    return meme_module
예제 #8
0
 def test_init(self):
     """Init should properly initialize Motif object."""
     module = Module({
                         ('a',3): ModuleInstance('guc', Location('a',3,5)),
                         ('b',3): ModuleInstance('guc', Location('b',3,5)),
                         ('c',8): ModuleInstance('guc', Location('c',8,10)),
                         })
     m = Motif(module)
     self.assertEqual(m.Modules,[module])
     self.assertEqual(m.Info,None)
예제 #9
0
 def test_init(self):
     """Init should properly initialize MotifResults object."""
     module = Module({
                         ('a',3): ModuleInstance('guc', Location('a',3,5)),
                         ('b',3): ModuleInstance('guc', Location('b',3,5)),
                         ('c',8): ModuleInstance('guc', Location('c',8,10)),
                         })
     motif = Motif([module])
     results = {'key1':'value1','key2':'value2'}
     parameters = {'parameter1':1,'parameter2':2}
     mr = MotifResults([module],[motif],results,parameters)
     self.assertEqual(mr.Modules,[module])
     self.assertEqual(mr.Motifs,[motif])
     self.assertEqual(mr.Results,results)
     self.assertEqual(mr.parameter1,1)
     self.assertEqual(mr.parameter2,2)
예제 #10
0
def extractModuleData(module_data, alphabet, remap_dict):
    """Creates Module object given module_data list.

        - Only works on 1 module at a time: only pass in data from one module.

    """
    #Create Module object
    meme_module = {}

    #Only keep first 3 elements of the list
    module_data = module_data[:3]

    #Get Module general information: module_data[0]
    #Only need to keep first line
    general_dict = getModuleGeneralInfo(module_data[0][0])
    module_length = int(general_dict['width'])

    #Get ModuleInstances: module_data[2]
    instance_data = module_data[2][4:-2]
    for i in range(len(instance_data)):
        instance_data[i] = instance_data[i].split()
    #Create a ModuleInstance object and add it to Module for each instance
    for instance in instance_data:
        seqId = remap_dict[instance[0]]
        start = int(instance[1]) - 1
        Pvalue = float(instance[2])
        sequence = instance[4]
        #Create Location object for ModuleInstance
        location = Location(seqId, start, start + module_length)
        #Create ModuleInstance
        mod_instance = ModuleInstance(sequence, location, Pvalue)

        #Add ModuleInstance to Module
        meme_module[(seqId, start)] = mod_instance

    meme_module = Module(meme_module, MolType=alphabet)
    #Get Multilevel Consensus Sequence
    meme_module.ConsensusSequence = getConsensusSequence(module_data[1])
    #Pull out desired values from dict
    meme_module.Llr = int(general_dict['llr'])
    meme_module.Evalue = float(general_dict['E-value'])
    meme_module.ID = general_dict['MOTIF']

    return meme_module
예제 #11
0
    def setUp(self):
        """SetUp for MotifFormatter class tests."""
        self.sequences = [
                        'accucua',
                        'caucguu',
                        'accucua',
                        'cgacucg',
                        'cgaucag',
                        'cuguacc',
                        'cgcauca',
                        ]
        self.locations = [
                        Location('seq0',1,3),
                        Location('seq1',2,3),
                        Location('seq1',1,5),
                        Location('seq1',5,3),
                        Location('seq2',3,54),
                        Location('seq2',54,2),
                        Location('seq3',4,0),
                        ]
        self.Pvalues = [
                        .1,
                        .002,
                        .0000000003,
                        .6,
                        .0094,
                        .6,
                        .00201,
                        ]
        self.Evalues = [
                        .006,
                        .02,
                        .9,
                        .0200000001,
                        .09,
                        .0000003,
                        .900001,
                        ]
        self.modules_no_e = []
        for i in xrange(7):
            self.modules_no_e.append(ModuleInstance(self.sequences[i],
                                                    self.locations[i],
                                                    self.Pvalues[i]))

        self.module_with_template = Module(
            {
                (self.modules_no_e[0].Location.SeqId,
                 self.modules_no_e[0].Location.Start):self.modules_no_e[0],
                (self.modules_no_e[1].Location.SeqId,
                 self.modules_no_e[1].Location.Start):self.modules_no_e[1],
                (self.modules_no_e[2].Location.SeqId,
                 self.modules_no_e[2].Location.Start):self.modules_no_e[2],
                (self.modules_no_e[3].Location.SeqId,
                 self.modules_no_e[3].Location.Start):self.modules_no_e[3],
                (self.modules_no_e[4].Location.SeqId,
                 self.modules_no_e[4].Location.Start):self.modules_no_e[4],
                (self.modules_no_e[5].Location.SeqId,
                 self.modules_no_e[5].Location.Start):self.modules_no_e[5],
                (self.modules_no_e[6].Location.SeqId,
                 self.modules_no_e[6].Location.Start):self.modules_no_e[6],
                },
            Template = 'accgucg', ID='1'
            )
        
        self.modules_with_ids =\
                    [Module({
                            ('a',3): ModuleInstance('guc', Location('a',3,5)),
                            ('b',3): ModuleInstance('guc', Location('b',3,5)),
                            ('c',8): ModuleInstance('guc', Location('c',8,10)),
                            },ID='1'),
                    Module({
                            ('a',7): ModuleInstance('cca', Location('a',7,9)),
                            ('b',7): ModuleInstance('cca', Location('b',7,9)),
                            ('c',11): ModuleInstance('cca',Location('c',11,13)),
                            },ID='2'),
                    Module({
                            ('a',10): ModuleInstance('gca',Location('a',10,12)),
                            ('b',10): ModuleInstance('gca',Location('b',10,12)),
                            ('c',14): ModuleInstance('gca',Location('c',14,12)),
                            },ID='3'),
                    Module({
                            ('a',13): ModuleInstance('ggg',Location('a',13,15)),
                            ('b',13): ModuleInstance('ggg',Location('b',13,15)),
                            ('c',18): ModuleInstance('ggg',Location('c',18,20)),
                            },ID='4'),
                    ]
        self.motifs_with_ids = map(Motif,self.modules_with_ids)
        self.motif_results = MotifResults(Modules=self.modules_with_ids,\
            Motifs=self.motifs_with_ids)
        
        self.color_map = {'1':"""background-color: #0000FF; ; font-family: 'Courier New', Courier""",
                          '2':"""background-color: #FFFF00; ; font-family: 'Courier New', Courier""",
                          '3':"""background-color: #00FFFF; ; font-family: 'Courier New', Courier""",
                          '4':"""background-color: #FF00FF; ; font-family: 'Courier New', Courier""",
                          }
        self.color_map_rgb = {
            'color_1':(0.0,0.0,1.0),
            'color_2':(1.0,1.0,0.0),
            'color_3':(0.0,1.0,1.0),
            'color_4':(1.0,0.0,1.0),
            }
예제 #12
0
    def setUp(self):
        """SetUp for Module class tests."""
        self.sequences = [
                        'accucua',
                        'caucguu',
                        'accucua',
                        'cgacucg',
                        'cgaucag',
                        'cuguacc',
                        'cgcauca',
                        ]
        self.locations = [
                        Location('seq0',1,3),
                        Location('seq1',2,3),
                        Location('seq1',1,5),
                        Location('seq1',5,3),
                        Location('seq2',3,54),
                        Location('seq2',54,2),
                        Location('seq3',4,0),
                        ]
        self.Pvalues = [
                        .1,
                        .002,
                        .0000000003,
                        .6,
                        .0094,
                        .6,
                        .00201,
                        ]
        self.Evalues = [
                        .006,
                        .02,
                        .9,
                        .0200000001,
                        .09,
                        .0000003,
                        .900001,
                        ]
        self.modules_no_e = []
        for i in xrange(7):
            self.modules_no_e.append(ModuleInstance(self.sequences[i],
                                                    self.locations[i],
                                                    self.Pvalues[i]))
        
        self.modules_p_and_e = []
        for i in xrange(7):
            self.modules_p_and_e.append(ModuleInstance(self.sequences[i],
                                                       self.locations[i],
                                                       self.Pvalues[i],
                                                       self.Evalues[i]))
        self.module_no_template = Module(
            {
                (self.modules_no_e[0].Location.SeqId,
                 self.modules_no_e[0].Location.Start):self.modules_no_e[0],
                (self.modules_no_e[1].Location.SeqId,
                 self.modules_no_e[1].Location.Start):self.modules_no_e[1],
                (self.modules_no_e[2].Location.SeqId,
                 self.modules_no_e[2].Location.Start):self.modules_no_e[2],
                (self.modules_no_e[3].Location.SeqId,
                 self.modules_no_e[3].Location.Start):self.modules_no_e[3],
                (self.modules_no_e[4].Location.SeqId,
                 self.modules_no_e[4].Location.Start):self.modules_no_e[4],
                (self.modules_no_e[5].Location.SeqId,
                 self.modules_no_e[5].Location.Start):self.modules_no_e[5],
                (self.modules_no_e[6].Location.SeqId,
                 self.modules_no_e[6].Location.Start):self.modules_no_e[6],
                }
            )

        self.module_with_template = Module(
            {
                (self.modules_no_e[0].Location.SeqId,
                 self.modules_no_e[0].Location.Start):self.modules_no_e[0],
                (self.modules_no_e[1].Location.SeqId,
                 self.modules_no_e[1].Location.Start):self.modules_no_e[1],
                (self.modules_no_e[2].Location.SeqId,
                 self.modules_no_e[2].Location.Start):self.modules_no_e[2],
                (self.modules_no_e[3].Location.SeqId,
                 self.modules_no_e[3].Location.Start):self.modules_no_e[3],
                (self.modules_no_e[4].Location.SeqId,
                 self.modules_no_e[4].Location.Start):self.modules_no_e[4],
                (self.modules_no_e[5].Location.SeqId,
                 self.modules_no_e[5].Location.Start):self.modules_no_e[5],
                (self.modules_no_e[6].Location.SeqId,
                 self.modules_no_e[6].Location.Start):self.modules_no_e[6],
                },
            Template = 'accgucg'
            )