logging.basicConfig(level=logging.INFO) max_mosaics = 15 max_order = 5 me = evaluate_mosaics(max_mosaics=max_mosaics, max_order=max_order) for i in range(max_order): e = me[i * max_mosaics:(i + 1) * max_mosaics] plot([x[1] for x in e], [x[2] for x in e]) xlabel('# mosaics') ylabel('LL') title('Evaluation of mosaic models of various Markov orders') savefig('mosaic-evaluation.png', format='PNG') raise # load our sequences sequences = convert_fasta_sequences(fasta_file_for_fragment('T00671')) # build our model model_by_states = create_mosaic_model(num_mosaics=1, p_transition=0., alphabet_size=4, order=2, dirichlet_prior_strength=10.) model = hmm.as_model(model_by_states) print model.B # convert our sequences to the correct order sequences_order_n = [model.converter.to_order_n(s) for s in sequences] #from IPython.Debugger import Pdb; Pdb().set_trace() def callback(LL):
logging.basicConfig(level=logging.INFO) max_mosaics = 15 max_order = 5 me = evaluate_mosaics(max_mosaics=max_mosaics, max_order=max_order) for i in range(max_order): e=me[i*max_mosaics:(i+1)*max_mosaics] plot([x[1] for x in e], [x[2] for x in e]) xlabel('# mosaics') ylabel('LL') title('Evaluation of mosaic models of various Markov orders') savefig('mosaic-evaluation.png', format='PNG') raise # load our sequences sequences = convert_fasta_sequences(fasta_file_for_fragment('T00671')) # build our model model_by_states = create_mosaic_model( num_mosaics=1, p_transition=0., alphabet_size=4, order=2, dirichlet_prior_strength=10. ) model = hmm.as_model(model_by_states) print model.B # convert our sequences to the correct order sequences_order_n = [model.converter.to_order_n(s) for s in sequences]
print reduce_sequence(N.arange(10), 10) print reduce_sequence(N.arange(10), 9) print reduce_sequence(N.arange(10), 6) print reduce_sequence(N.arange(10), 5) print reduce_sequence(N.arange(10), 3) print reduce_sequence(N.arange(10), 2) print reduce_sequence(N.arange(10), 1) raise RuntimeError('Stopping') K = 8 model_file = 'bg-model.pickle' fasta = '/home/john/Data/GappedPssms/apr-2009/T00759trimRM.fa' bg_model = cPickle.load(open(model_file)) sequences = convert_fasta_sequences(fasta) converted_seqs = [bg_model.converter.to_order_n(s) for s in sequences] def k_mer_log_likelihoods_new(alpha, c): result = N.empty(len(c)-K+1) alpha_sum = alpha.sum(axis=1) for i in xrange(len(c)-K+1): if 0 == i: result[i] = alpha_sum[K] / c[:K+1].prod() else: result[i] = alpha_sum[i+K-1] / alpha_sum[i-1] / c[i:i+K].prod() return N.log(result) def calculate_k_mer_scores(bg_model, converted_seqs, K):
for option in option_parser.option_list: if option.dest: logging.info('%s: %s (%s)', option.dest, str(getattr(options, option.dest)), option.help) #inputs = [('K10-g0.50-N200-L200-seed4-1', fasta_file_for_synthetic_data('K10-g0.50-N200-L200-seed4-1'))] inputs = [(fragment, fasta_file_for_fragment(fragment)) for fragment in test_set_fragments] # for each input sequence for seq_tag, fasta_file in inputs: # add a file handler to log for this test set file_handler = logging.FileHandler( os.path.join(options.output_dir, '%s.log' % seq_tag)) logging.getLogger('').addHandler(file_handler) try: sequences = convert_fasta_sequences(fasta_file) #sequences = [s[:200] for s in sequences[:10]] # set up the options for this test set options.tag = seq_tag options.bg_model_filename = "%s-bg-model.pickle" % seq_tag # Run the algorithm algorithm = SingleGapAlgorithm(options) algorithm(sequences) finally: logging.getLogger('').removeHandler(file_handler) file_handler.close()
"-f", "--fasta", dest="fasta", help="The fasta file containing the sequences to run.") add_algorithm_options(option_parser) options, args = option_parser.parse_args() if not options.output_dir: raise ValueError('No output directory specified') if not os.path.exists(options.output_dir): os.makedirs(options.output_dir) log_filename = os.path.join(options.output_dir, '%s.log' % options.tag) file_handler = logging.FileHandler(log_filename) file_handler.setFormatter( logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")) logging.getLogger('').addHandler(file_handler) logging.info('Writing log to %s', log_filename) logging.info('Command line: %s', ' '.join(sys.argv)) for option in option_parser.option_list: if option.dest: logging.info('%32s: %s', option.dest, str(getattr(options, option.dest))) logging.info('Reading sequences from: %s' % options.fasta) sequences = convert_fasta_sequences(options.fasta) #sequences = [s[:200] for s in sequences[:10]] # Run the algorithm algorithm = SingleGapAlgorithm(options) algorithm(sequences)
"-f", "--fasta", dest="fasta", help="The fasta file containing the sequences to run." ) add_algorithm_options(option_parser) options, args = option_parser.parse_args() if not options.output_dir: raise ValueError('No output directory specified') if not os.path.exists(options.output_dir): os.makedirs(options.output_dir) log_filename = os.path.join(options.output_dir, '%s.log' % options.tag) file_handler = logging.FileHandler(log_filename) file_handler.setFormatter(logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")) logging.getLogger('').addHandler(file_handler) logging.info('Writing log to %s', log_filename) logging.info('Command line: %s', ' '.join(sys.argv)) for option in option_parser.option_list: if option.dest: logging.info('%32s: %s', option.dest, str(getattr(options, option.dest))) logging.info('Reading sequences from: %s' % options.fasta) sequences = convert_fasta_sequences(options.fasta) #sequences = [s[:200] for s in sequences[:10]] # Run the algorithm algorithm = SingleGapAlgorithm(options) algorithm(sequences)
logging.info('%32s: %s', option.dest, str(getattr(options, option.dest))) # # For each data set # for fragment in test_set_fragments: for i in xrange(1, cross_folds+1): train_fasta, test_fasta = fasta_files_for_fragment_cross_fold(fragment, i) logging.info('Training data set: %s', train_fasta) #log.info('Test data set: %s', test_fasta) seq_tag = '%s-%d' % (fragment, i) file_handler = logging.FileHandler(os.path.join(options.output_dir, '%s.log' % seq_tag)) logging.getLogger('').addHandler(file_handler) try: sequences = convert_fasta_sequences(train_fasta) #sequences = [s[:200] for s in sequences[:100]] # set up the options for this test set options.tag = seq_tag options.bg_model_filename = "%s-bg-model.pickle" % seq_tag # Run the algorithm algorithm = SingleGapAlgorithm(options) algorithm(sequences) finally: logging.getLogger('').removeHandler(file_handler) file_handler.close()