def show_pssm_score(pssm_filename): from parse_gapped_format import parse_models models = parse_models(open(pssm_filename)) for model in models: emissions = _calculate_emissions(model) first_order_entropy_score = calculate_first_order_entropy_score(emissions) first_order_entropy_score **= 2 information_content_score = calculate_information_content_score(emissions) overall_score = geometric_mean((first_order_entropy_score, information_content_score)) print '%6g %6g %6g' % (first_order_entropy_score, information_content_score, overall_score)
def show_pssm_score(pssm_filename): from parse_gapped_format import parse_models models = parse_models(open(pssm_filename)) for model in models: emissions = _calculate_emissions(model) first_order_entropy_score = calculate_first_order_entropy_score( emissions) first_order_entropy_score **= 2 information_content_score = calculate_information_content_score( emissions) overall_score = geometric_mean( (first_order_entropy_score, information_content_score)) print '%6g %6g %6g' % (first_order_entropy_score, information_content_score, overall_score)
def calculate_emissions(model): emissions = numpy.zeros((model.N, model.M)) for i in xrange(model.N): assert model.emissions[i][0] == i emissions[i] = model.emissions[i][1] return emissions M = 4 for fragment in test_set_fragments: for cross_fold in xrange(1, 6): logging.info('%s %d', fragment, cross_fold) results = list() for pssm_file in glob.glob('typical-pssms/%s-%d-*.pssm' % (fragment, cross_fold)): models = parse_models(open(pssm_file)) for model in models: assert model.M == M emissions = calculate_emissions(model) first_order_entropy_score = calculate_first_order_entropy_score(emissions) information_content_score = calculate_information_content_score(emissions) overall_score = geometric_mean((first_order_entropy_score, information_content_score)) results.append((overall_score, pssm_file)) results.sort(reverse=True) files = [] for i, (score, file) in enumerate(results): src = file.replace('.pssm', '.png') dest = 'typical-pssms/rescored/%s-%d-%03d.png' % (fragment, cross_fold, i) files.append(dest) shutil.copy(src, dest)
print >> sys.stderr, 'USAGE: %s <method> <pssm-file> <fragment> <fold> [<background>]' % sys.argv[ 0] sys.exit(-1) # # Set up the test harness # harness = TestHarness(options) # # Build the model # logging.info('Parsing: %s' % pssm_file) if options.glam2_format: output = GLAM2Output.parse(open(pssm_file)) freqs, gaps = output.freqs_and_gaps() model = build_hmm_model(freqs, gaps) else: semi_parsed_models = list(parse_models(open(pssm_file))) if len(semi_parsed_models) > 1: print >> sys.stderr, 'For the moment we can only handle one model at a time.' sys.exit(-1) parsed = semi_parsed_models[0] logging.info(str(parsed)) model, traits = build_hmm_from_semi_parsed(parsed) # # Run the model # harness.run_method_on_dataset(dataset, method, model)
TAAAAAGGTCTATGACTTATCAAATTTCAATAAGCTGACTGTTAGCAGTATTAAAAAATATTAAATATGCTAACANNNNN NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNATACATAAAGGGAATAGGCAGAGTTCACAGATT AATATTTCTTACCTCTACAATAAGAAGAAATACCTTGTTCTATGAGCAGCTGCCATACTTTCAGACATGTTTCTGACTTT TAGATAATTAACAAATCCTCTGAAGAAAAGGAGCAGGCCTGAGAAGGTTGAAATAATATGGATATACTATGTTTTTATAC AGAAAAGGGCAAGATAAATTTAAAGTAGACAATTATAAACANNNNNNNNNNNNNNNNNGGA""".replace('\n', '') def convert_seq(seq): return numpy.array(corebio.seq.Seq(seq, alphabet=corebio.seq.reduced_nucleic_alphabet).ords()) old_pp = hmm.preprocess_sequence(convert_seq(old_seq)) new_pp = hmm.preprocess_sequence(convert_seq(new_seq)) #meme_dir = '/home/reid/Analysis/GappedPssms/MEME/x-validate' #pssm_file = os.path.join(meme_dir, 'T00671-1.pssm') pssm_file = '/home/john/Analysis/GappedPssms/MEME/x-validate/vm-T00671-motif-h2-v9-x1.pssm' semi_parsed_models = list(parse_models(open(pssm_file))) if len(semi_parsed_models) > 1: print >> sys.stderr, 'For the moment we can only handle one model at a time.' sys.exit(-1) parsed = semi_parsed_models[0] logging.info(str(parsed)) model, traits = build_hmm_from_semi_parsed(parsed) classifier = make_classifier(model) def test_seq(seq): return classifier(convert_seq(seq)) print 'Old sequence (without Ns):', classifier(old_pp) print 'New sequence (with Ns):', classifier(new_pp) LL, alpha, beta, c = model.forward_backward(new_pp)