Esempio n. 1
0
def show_pssm_score(pssm_filename):
    from parse_gapped_format import parse_models
    models = parse_models(open(pssm_filename))
    for model in models:
        emissions = _calculate_emissions(model)
        first_order_entropy_score = calculate_first_order_entropy_score(emissions)
        first_order_entropy_score **= 2
        information_content_score = calculate_information_content_score(emissions)
        overall_score = geometric_mean((first_order_entropy_score, information_content_score))
        print '%6g %6g %6g' % (first_order_entropy_score, information_content_score, overall_score)
Esempio n. 2
0
def show_pssm_score(pssm_filename):
    from parse_gapped_format import parse_models
    models = parse_models(open(pssm_filename))
    for model in models:
        emissions = _calculate_emissions(model)
        first_order_entropy_score = calculate_first_order_entropy_score(
            emissions)
        first_order_entropy_score **= 2
        information_content_score = calculate_information_content_score(
            emissions)
        overall_score = geometric_mean(
            (first_order_entropy_score, information_content_score))
        print '%6g %6g %6g' % (first_order_entropy_score,
                               information_content_score, overall_score)
Esempio n. 3
0
def calculate_emissions(model):
    emissions = numpy.zeros((model.N, model.M))
    for i in xrange(model.N):
        assert model.emissions[i][0] == i
        emissions[i] = model.emissions[i][1]
    return emissions


M = 4
for fragment in test_set_fragments:
    for cross_fold in xrange(1, 6):
        logging.info('%s %d', fragment, cross_fold)
        results = list()
        for pssm_file in glob.glob('typical-pssms/%s-%d-*.pssm' % (fragment, cross_fold)):
            models = parse_models(open(pssm_file))
            for model in models:
                assert model.M == M
                emissions = calculate_emissions(model)
                first_order_entropy_score = calculate_first_order_entropy_score(emissions)
                information_content_score = calculate_information_content_score(emissions)
                overall_score = geometric_mean((first_order_entropy_score, information_content_score))
                results.append((overall_score, pssm_file))

        results.sort(reverse=True)
        files = []
        for i, (score, file) in enumerate(results):
            src = file.replace('.pssm', '.png')
            dest = 'typical-pssms/rescored/%s-%d-%03d.png' % (fragment, cross_fold, i)
            files.append(dest)
            shutil.copy(src, dest)
Esempio n. 4
0
    print >> sys.stderr, 'USAGE: %s <method> <pssm-file> <fragment> <fold> [<background>]' % sys.argv[
        0]
    sys.exit(-1)

#
# Set up the test harness
#
harness = TestHarness(options)

#
# Build the model
#
logging.info('Parsing: %s' % pssm_file)
if options.glam2_format:
    output = GLAM2Output.parse(open(pssm_file))
    freqs, gaps = output.freqs_and_gaps()
    model = build_hmm_model(freqs, gaps)
else:
    semi_parsed_models = list(parse_models(open(pssm_file)))
    if len(semi_parsed_models) > 1:
        print >> sys.stderr, 'For the moment we can only handle one model at a time.'
        sys.exit(-1)
    parsed = semi_parsed_models[0]
    logging.info(str(parsed))
    model, traits = build_hmm_from_semi_parsed(parsed)

#
# Run the model
#
harness.run_method_on_dataset(dataset, method, model)
Esempio n. 5
0
TAAAAAGGTCTATGACTTATCAAATTTCAATAAGCTGACTGTTAGCAGTATTAAAAAATATTAAATATGCTAACANNNNN
NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNATACATAAAGGGAATAGGCAGAGTTCACAGATT
AATATTTCTTACCTCTACAATAAGAAGAAATACCTTGTTCTATGAGCAGCTGCCATACTTTCAGACATGTTTCTGACTTT
TAGATAATTAACAAATCCTCTGAAGAAAAGGAGCAGGCCTGAGAAGGTTGAAATAATATGGATATACTATGTTTTTATAC
AGAAAAGGGCAAGATAAATTTAAAGTAGACAATTATAAACANNNNNNNNNNNNNNNNNGGA""".replace('\n', '')

def convert_seq(seq):
    return numpy.array(corebio.seq.Seq(seq, alphabet=corebio.seq.reduced_nucleic_alphabet).ords())

old_pp = hmm.preprocess_sequence(convert_seq(old_seq))
new_pp = hmm.preprocess_sequence(convert_seq(new_seq))

#meme_dir = '/home/reid/Analysis/GappedPssms/MEME/x-validate'
#pssm_file = os.path.join(meme_dir, 'T00671-1.pssm')
pssm_file = '/home/john/Analysis/GappedPssms/MEME/x-validate/vm-T00671-motif-h2-v9-x1.pssm'
semi_parsed_models = list(parse_models(open(pssm_file)))
if len(semi_parsed_models) > 1:
    print >> sys.stderr, 'For the moment we can only handle one model at a time.'
    sys.exit(-1)
parsed = semi_parsed_models[0]
logging.info(str(parsed))
model, traits = build_hmm_from_semi_parsed(parsed)
classifier = make_classifier(model)

def test_seq(seq):
    return classifier(convert_seq(seq))

print 'Old sequence (without Ns):', classifier(old_pp)
print 'New sequence (with    Ns):', classifier(new_pp)

LL, alpha, beta, c = model.forward_backward(new_pp)