logging.basicConfig(level=logging.INFO) max_mosaics = 15 max_order = 5 me = evaluate_mosaics(max_mosaics=max_mosaics, max_order=max_order) for i in range(max_order): e=me[i*max_mosaics:(i+1)*max_mosaics] plot([x[1] for x in e], [x[2] for x in e]) xlabel('# mosaics') ylabel('LL') title('Evaluation of mosaic models of various Markov orders') savefig('mosaic-evaluation.png', format='PNG') raise # load our sequences sequences = convert_fasta_sequences(fasta_file_for_fragment('T00671')) # build our model model_by_states = create_mosaic_model( num_mosaics=1, p_transition=0., alphabet_size=4, order=2, dirichlet_prior_strength=10. ) model = hmm.as_model(model_by_states) print model.B # convert our sequences to the correct order sequences_order_n = [model.converter.to_order_n(s) for s in sequences]
logging.basicConfig(level=logging.INFO) max_mosaics = 15 max_order = 5 me = evaluate_mosaics(max_mosaics=max_mosaics, max_order=max_order) for i in range(max_order): e = me[i * max_mosaics:(i + 1) * max_mosaics] plot([x[1] for x in e], [x[2] for x in e]) xlabel('# mosaics') ylabel('LL') title('Evaluation of mosaic models of various Markov orders') savefig('mosaic-evaluation.png', format='PNG') raise # load our sequences sequences = convert_fasta_sequences(fasta_file_for_fragment('T00671')) # build our model model_by_states = create_mosaic_model(num_mosaics=1, p_transition=0., alphabet_size=4, order=2, dirichlet_prior_strength=10.) model = hmm.as_model(model_by_states) print model.B # convert our sequences to the correct order sequences_order_n = [model.converter.to_order_n(s) for s in sequences] #from IPython.Debugger import Pdb; Pdb().set_trace() def callback(LL):
# Copyright John Reid 2008 # """ Code to generate negative test sequences for those fragments in the test harness. """ from gapped_pssms.data import fasta_file_for_fragment, test_set_fragments import sys def sequences_from_fasta(fasta): """Yields sequences from fasta file.""" import corebio.seq_io.fasta_io from itertools import imap return imap( lambda s: s.strip("nN"), imap(str, corebio.seq_io.fasta_io.iterseq(open(fasta, "r"), corebio.seq.dna_alphabet)) ) for fragment in test_set_fragments: seqs = list(sequences_from_fasta(fasta_file_for_fragment(fragment))) seq_length = max(len(s) for s in seqs) num_seqs = len(seqs) sys.argv = ( "generate_negative_test_sequences.py -m ..\..\Python\%s-bg-model.pickle -n %d -l %d -o negative-%s.fa" % (fragment, num_seqs, seq_length, fragment) ).split() execfile("generate_negative_test_sequences.py")
# option_parser = OptionParser() add_algorithm_options(option_parser) logging.info('Command line: %s', ' '.join(sys.argv)) options, args = option_parser.parse_args() log_filename = os.path.join(options.output_dir, '%s.log' % options.tag) logging.getLogger('').addHandler(logging.FileHandler(log_filename)) logging.info('Writing log to %s', log_filename) for option in option_parser.option_list: if option.dest: logging.info('%s: %s (%s)', option.dest, str(getattr(options, option.dest)), option.help) #inputs = [('K10-g0.50-N200-L200-seed4-1', fasta_file_for_synthetic_data('K10-g0.50-N200-L200-seed4-1'))] inputs = [(fragment, fasta_file_for_fragment(fragment)) for fragment in test_set_fragments] # for each input sequence for seq_tag, fasta_file in inputs: # add a file handler to log for this test set file_handler = logging.FileHandler( os.path.join(options.output_dir, '%s.log' % seq_tag)) logging.getLogger('').addHandler(file_handler) try: sequences = convert_fasta_sequences(fasta_file) #sequences = [s[:200] for s in sequences[:10]] # set up the options for this test set options.tag = seq_tag options.bg_model_filename = "%s-bg-model.pickle" % seq_tag
# Parse the options # option_parser = OptionParser() add_algorithm_options(option_parser) logging.info('Command line: %s', ' '.join(sys.argv)) options, args = option_parser.parse_args() log_filename = os.path.join(options.output_dir, '%s.log' % options.tag) logging.getLogger('').addHandler(logging.FileHandler(log_filename)) logging.info('Writing log to %s', log_filename) for option in option_parser.option_list: if option.dest: logging.info('%s: %s (%s)', option.dest, str(getattr(options, option.dest)), option.help) #inputs = [('K10-g0.50-N200-L200-seed4-1', fasta_file_for_synthetic_data('K10-g0.50-N200-L200-seed4-1'))] inputs = [(fragment, fasta_file_for_fragment(fragment)) for fragment in test_set_fragments] # for each input sequence for seq_tag, fasta_file in inputs: # add a file handler to log for this test set file_handler = logging.FileHandler(os.path.join(options.output_dir, '%s.log' % seq_tag)) logging.getLogger('').addHandler(file_handler) try: sequences = convert_fasta_sequences(fasta_file) #sequences = [s[:200] for s in sequences[:10]] # set up the options for this test set options.tag = seq_tag options.bg_model_filename = "%s-bg-model.pickle" % seq_tag # Run the algorithm