Esempio n. 1
0
    logging.basicConfig(level=logging.INFO)

    max_mosaics = 15
    max_order = 5
    me = evaluate_mosaics(max_mosaics=max_mosaics, max_order=max_order)
    for i in range(max_order):
        e = me[i * max_mosaics:(i + 1) * max_mosaics]
        plot([x[1] for x in e], [x[2] for x in e])
    xlabel('# mosaics')
    ylabel('LL')
    title('Evaluation of mosaic models of various Markov orders')
    savefig('mosaic-evaluation.png', format='PNG')
    raise

    # load our sequences
    sequences = convert_fasta_sequences(fasta_file_for_fragment('T00671'))

    # build our model
    model_by_states = create_mosaic_model(num_mosaics=1,
                                          p_transition=0.,
                                          alphabet_size=4,
                                          order=2,
                                          dirichlet_prior_strength=10.)
    model = hmm.as_model(model_by_states)
    print model.B

    # convert our sequences to the correct order
    sequences_order_n = [model.converter.to_order_n(s) for s in sequences]

    #from IPython.Debugger import Pdb; Pdb().set_trace()
    def callback(LL):
Esempio n. 2
0
    logging.basicConfig(level=logging.INFO)

    max_mosaics = 15
    max_order = 5
    me = evaluate_mosaics(max_mosaics=max_mosaics, max_order=max_order)
    for i in range(max_order):
        e=me[i*max_mosaics:(i+1)*max_mosaics]
        plot([x[1] for x in e], [x[2] for x in e])
    xlabel('# mosaics')
    ylabel('LL')
    title('Evaluation of mosaic models of various Markov orders')
    savefig('mosaic-evaluation.png', format='PNG')
    raise

    # load our sequences
    sequences = convert_fasta_sequences(fasta_file_for_fragment('T00671'))

    # build our model
    model_by_states = create_mosaic_model(
      num_mosaics=1,
      p_transition=0.,
      alphabet_size=4,
      order=2,
      dirichlet_prior_strength=10.
    )
    model = hmm.as_model(model_by_states)
    print model.B

    # convert our sequences to the correct order
    sequences_order_n = [model.converter.to_order_n(s) for s in sequences]
Esempio n. 3
0
print reduce_sequence(N.arange(10), 10)
print reduce_sequence(N.arange(10), 9)
print reduce_sequence(N.arange(10), 6)
print reduce_sequence(N.arange(10), 5)
print reduce_sequence(N.arange(10), 3)
print reduce_sequence(N.arange(10), 2)
print reduce_sequence(N.arange(10), 1)

raise RuntimeError('Stopping')

K = 8
model_file = 'bg-model.pickle'
fasta = '/home/john/Data/GappedPssms/apr-2009/T00759trimRM.fa'

bg_model = cPickle.load(open(model_file))
sequences = convert_fasta_sequences(fasta)
converted_seqs = [bg_model.converter.to_order_n(s) for s in sequences]


def k_mer_log_likelihoods_new(alpha, c):
    result = N.empty(len(c)-K+1)
    alpha_sum = alpha.sum(axis=1)
    for i in xrange(len(c)-K+1):
        if 0 == i:
            result[i] = alpha_sum[K] / c[:K+1].prod()
        else:
            result[i] = alpha_sum[i+K-1] / alpha_sum[i-1] / c[i:i+K].prod()
    return N.log(result)


def calculate_k_mer_scores(bg_model, converted_seqs, K):
Esempio n. 4
0
for option in option_parser.option_list:
    if option.dest:
        logging.info('%s: %s (%s)', option.dest,
                     str(getattr(options, option.dest)), option.help)

#inputs = [('K10-g0.50-N200-L200-seed4-1', fasta_file_for_synthetic_data('K10-g0.50-N200-L200-seed4-1'))]
inputs = [(fragment, fasta_file_for_fragment(fragment))
          for fragment in test_set_fragments]

# for each input sequence
for seq_tag, fasta_file in inputs:
    # add a file handler to log for this test set
    file_handler = logging.FileHandler(
        os.path.join(options.output_dir, '%s.log' % seq_tag))
    logging.getLogger('').addHandler(file_handler)
    try:
        sequences = convert_fasta_sequences(fasta_file)
        #sequences = [s[:200] for s in sequences[:10]]

        # set up the options for this test set
        options.tag = seq_tag
        options.bg_model_filename = "%s-bg-model.pickle" % seq_tag

        # Run the algorithm
        algorithm = SingleGapAlgorithm(options)
        algorithm(sequences)

    finally:
        logging.getLogger('').removeHandler(file_handler)
        file_handler.close()
Esempio n. 5
0
    "-f",
    "--fasta",
    dest="fasta",
    help="The fasta file containing the sequences to run.")
add_algorithm_options(option_parser)
options, args = option_parser.parse_args()

if not options.output_dir:
    raise ValueError('No output directory specified')
if not os.path.exists(options.output_dir):
    os.makedirs(options.output_dir)
log_filename = os.path.join(options.output_dir, '%s.log' % options.tag)
file_handler = logging.FileHandler(log_filename)
file_handler.setFormatter(
    logging.Formatter("%(asctime)s - %(levelname)s - %(message)s"))
logging.getLogger('').addHandler(file_handler)
logging.info('Writing log to %s', log_filename)
logging.info('Command line: %s', ' '.join(sys.argv))
for option in option_parser.option_list:
    if option.dest:
        logging.info('%32s: %s', option.dest,
                     str(getattr(options, option.dest)))

logging.info('Reading sequences from: %s' % options.fasta)
sequences = convert_fasta_sequences(options.fasta)
#sequences = [s[:200] for s in sequences[:10]]

# Run the algorithm
algorithm = SingleGapAlgorithm(options)
algorithm(sequences)
Esempio n. 6
0
  "-f",
  "--fasta",
  dest="fasta",
  help="The fasta file containing the sequences to run."
)
add_algorithm_options(option_parser)
options, args = option_parser.parse_args()

if not options.output_dir:
    raise ValueError('No output directory specified')
if not os.path.exists(options.output_dir):
    os.makedirs(options.output_dir)
log_filename = os.path.join(options.output_dir, '%s.log' % options.tag)
file_handler = logging.FileHandler(log_filename)
file_handler.setFormatter(logging.Formatter("%(asctime)s - %(levelname)s - %(message)s"))
logging.getLogger('').addHandler(file_handler)
logging.info('Writing log to %s', log_filename)
logging.info('Command line: %s', ' '.join(sys.argv))
for option in option_parser.option_list:
    if option.dest:
        logging.info('%32s: %s', option.dest, str(getattr(options, option.dest)))

logging.info('Reading sequences from: %s' % options.fasta)
sequences = convert_fasta_sequences(options.fasta)
#sequences = [s[:200] for s in sequences[:10]]


# Run the algorithm
algorithm = SingleGapAlgorithm(options)
algorithm(sequences)
Esempio n. 7
0
        logging.info('%32s: %s', option.dest, str(getattr(options, option.dest)))


#
# For each data set
#
for fragment in test_set_fragments:
    for i in xrange(1, cross_folds+1):
        train_fasta, test_fasta = fasta_files_for_fragment_cross_fold(fragment, i)
        logging.info('Training data set: %s', train_fasta)
        #log.info('Test data set: %s', test_fasta)

        seq_tag = '%s-%d' % (fragment, i)
        file_handler = logging.FileHandler(os.path.join(options.output_dir, '%s.log' % seq_tag))
        logging.getLogger('').addHandler(file_handler)
        try:
            sequences = convert_fasta_sequences(train_fasta)
            #sequences = [s[:200] for s in sequences[:100]]

            # set up the options for this test set
            options.tag = seq_tag
            options.bg_model_filename = "%s-bg-model.pickle" % seq_tag

            # Run the algorithm
            algorithm = SingleGapAlgorithm(options)
            algorithm(sequences)

        finally:
            logging.getLogger('').removeHandler(file_handler)
            file_handler.close()