Beispiel #1
0
 def test_with_cluster_map(self):
     aln_set = AlignmentSet(self.seqs, cluster_map=self.cluster_map)
     self.assertEqual(len(aln_set.clusters), 2)
     self.assertEqual(len(aln_set.cluster_alns), 2)
     for result in aln_set.multiple_context_analysis([old_pattern]):
         hm_pos = result['call']['hm_pos']
         if result['call']['sequence'] in ['seq1.3', 'seq2.3']:
             self.assertTrue(hm_pos)
         else:
             self.assertFalse(hm_pos)
Beispiel #2
0
 def test_with_cluster_map(self):
     aln_set = AlignmentSet(self.seqs, cluster_map=self.cluster_map)
     self.assertEqual(len(aln_set.clusters), 2)
     self.assertEqual(len(aln_set.cluster_alns), 2)
     for result in aln_set.multiple_context_analysis([old_pattern]):
         hm_pos = result['call']['hm_pos']
         if result['call']['sequence'] in ['seq1.3', 'seq2.3']:
             self.assertTrue(hm_pos)
         else:
             self.assertFalse(hm_pos)
Beispiel #3
0
 def test_without_cluster_map(self):
     aln_set = AlignmentSet(self.seqs)
     self.assertEqual(len(aln_set.clusters), 1)
     self.assertEqual(len(aln_set.cluster_alns), 1)
     self.assertIn('all', aln_set.cluster_alns)
     for result in aln_set.multiple_context_analysis([old_pattern]):
         hm_pos = result['call']['hm_pos']
         if result['call']['sequence'] == 'seq2.3':
             self.assertTrue(hm_pos)
         else:
             self.assertFalse(hm_pos)
Beispiel #4
0
 def test_without_cluster_map(self):
     aln_set = AlignmentSet(self.seqs)
     self.assertEqual(len(aln_set.clusters), 1)
     self.assertEqual(len(aln_set.cluster_alns), 1)
     self.assertIn('all', aln_set.cluster_alns)
     for result in aln_set.multiple_context_analysis([old_pattern]):
         hm_pos = result['call']['hm_pos']
         if result['call']['sequence'] == 'seq2.3':
             self.assertTrue(hm_pos)
         else:
             self.assertFalse(hm_pos)
Beispiel #5
0
 def test_with_reference_seqs(self):
     ref_seqs = helpers.parse_fasta("""
     >cluster1
     AAAAAAAAAAAAAAAAACCC
     >cluster2
     CCTTGGCCGGTTGGCCGCCC
     """)
     aln_set = AlignmentSet(self.seqs, cluster_map=self.cluster_map,
         reference_sequences=ref_seqs)
     self.assertEqual(len(aln_set.clusters), 2)
     for result in aln_set.multiple_context_analysis([old_pattern]):
         hm_pos = result['call']['hm_pos']
         if result['call']['sequence'] in ['seq2.{}'.format(i) for i in [1,2,3]]:
             self.assertTrue(hm_pos)
         else:
             self.assertFalse(hm_pos)
Beispiel #6
0
 def test_with_reference_seqs(self):
     ref_seqs = helpers.parse_fasta("""
     >cluster1
     AAAAAAAAAAAAAAAAACCC
     >cluster2
     CCTTGGCCGGTTGGCCGCCC
     """)
     aln_set = AlignmentSet(self.seqs,
                            cluster_map=self.cluster_map,
                            reference_sequences=ref_seqs)
     self.assertEqual(len(aln_set.clusters), 2)
     for result in aln_set.multiple_context_analysis([old_pattern]):
         hm_pos = result['call']['hm_pos']
         if result['call']['sequence'] in [
                 'seq2.{}'.format(i) for i in [1, 2, 3]
         ]:
             self.assertTrue(hm_pos)
         else:
             self.assertFalse(hm_pos)
Beispiel #7
0
def analyze(args):
    import logging
    logging.captureWarnings(True)
    # Fetch sequence records and analysis patterns
    seq_records = SeqIO.to_dict(SeqIO.parse(args.alignment, 'fasta'))
    patterns = [mut_pattern.patterns[p] for p in args.patterns]
    pattern_names = [p.name for p in patterns]
    prefix = path.join(args.out_dir, args.prefix)
    analysis_settings = dict(rpr_cutoff=args.rpr_cutoff,
                             significance_level=args.significance_level,
                             quants=args.quants,
                             pos_quants_only=args.pos_quants_only,
                             caller=args.caller,
                             prior=args.prior,
                             cdfs=args.cdfs,
                             quadr_maxiter=args.quadr_maxiter,
                             optim_maxiter=args.optim_maxiter)

    # Need to think about how best to fork things here; for instance, might make sense to let the user specify
    # the initial clusters for whatever reason... However, specifying the reference sequences shouldn't make
    # any sense there
    if args.reference_sequences:
        reference_sequences = SeqIO.to_dict(
            SeqIO.parse(args.reference_sequences, 'fasta'))
    else:
        reference_sequences = None

    # This lets the cluster map be optional, so that this script can be used
    # for naive hm filtering/analysis
    cluster_map = load_cluster_map(
        args.cluster_map,
        cluster_col=args.cluster_col) if args.cluster_map else None
    alignments = AlignmentSet(seq_records,
                              cluster_map,
                              consensus_threshold=args.consensus_threshold,
                              reference_sequences=reference_sequences)

    # Create the analysis generator
    analysis = alignments.multiple_context_analysis(patterns,
                                                    **analysis_settings)

    if args.cluster_threshold:
        for hm_it in range(args.cluster_iterations - 1):
            print " ..On hm/cluster iteration", hm_it
            # Grab the HM columns from the most recent analysis and split out the pos sites
            hm_columns = []
            for result in analysis:
                hm_columns += result['call']['mut_columns']
            hm_neg_aln = Alignment(
                seq_records.values()).split_hypermuts(hm_columns).hm_neg_aln
            # Cluster with the specified settings
            clustering = alnclst.Clustering(hm_neg_aln, args.cluster_threshold,
                                            args.consensus_threshold)
            clustering = clustering.recenter(args.recentering_iterations)
            clustering.merge_small_clusters(args.min_per_cluster)
            cluster_map = parse_clusters(clustering.mapping_iterator(),
                                         cluster_key=0,
                                         sequence_key=1)
            # Create the Alignment set
            clustered_alignment = AlignmentSet(
                seq_records,
                cluster_map,
                consensus_threshold=args.consensus_threshold)
            analysis = clustered_alignment.multiple_context_analysis(
                patterns, **analysis_settings)
        # write out the final clusters
        clusterout_handle = file(prefix + '.clst.csv', 'w')
        clustering.write(clusterout_handle)

    if args.interactive:
        local = copy.copy(locals())
        import hyperfreq
        local.update(
            dict(hyperfreq=hyperfreq,
                 Alignment=Alignment,
                 AlignmentSet=AlignmentSet,
                 mut_pattern=mut_pattern,
                 write_analysis=write_analysis))
        code.interact(local=local)

    # Write the final analysis to file
    write_analysis(analysis,
                   prefix,
                   pattern_names,
                   args.quants,
                   args.cdfs,
                   call_only=args.call_only)
    if args.write_references:
        write_reference_seqs(alignments, prefix)

    # Closing files
    args.alignment.close()
    if args.cluster_map:
        args.cluster_map.close()
Beispiel #8
0
def analyze(args):
    import logging; logging.captureWarnings(True)
    # Fetch sequence records and analysis patterns
    seq_records = SeqIO.to_dict(SeqIO.parse(args.alignment, 'fasta'))
    patterns = [mut_pattern.patterns[p] for p in args.patterns]
    pattern_names = [p.name for p in patterns]
    prefix = path.join(args.out_dir, args.prefix)
    analysis_settings = dict(
            rpr_cutoff=args.rpr_cutoff, significance_level=args.significance_level, quants=args.quants,
            pos_quants_only=args.pos_quants_only, caller=args.caller, prior=args.prior, cdfs=args.cdfs,
            quadr_maxiter=args.quadr_maxiter, optim_maxiter=args.optim_maxiter)

    # Need to think about how best to fork things here; for instance, might make sense to let the user specify
    # the initial clusters for whatever reason... However, specifying the reference sequences shouldn't make
    # any sense there
    if args.reference_sequences:
        reference_sequences = SeqIO.to_dict(SeqIO.parse(args.reference_sequences, 'fasta'))
    else:
        reference_sequences = None

    # This lets the cluster map be optional, so that this script can be used
    # for naive hm filtering/analysis
    cluster_map = load_cluster_map(args.cluster_map, cluster_col=args.cluster_col) if args.cluster_map else None
    alignments = AlignmentSet(seq_records, cluster_map, consensus_threshold=args.consensus_threshold,
            reference_sequences=reference_sequences)

    # Create the analysis generator
    analysis = alignments.multiple_context_analysis(patterns, **analysis_settings)

    if args.cluster_threshold:
        for hm_it in range(args.cluster_iterations - 1):
            print " ..On hm/cluster iteration", hm_it
            # Grab the HM columns from the most recent analysis and split out the pos sites
            hm_columns = []
            for result in analysis:
                hm_columns += result['call']['mut_columns']
            hm_neg_aln = Alignment(seq_records.values()).split_hypermuts(hm_columns).hm_neg_aln
            # Cluster with the specified settings
            clustering = alnclst.Clustering(hm_neg_aln, args.cluster_threshold,
                    args.consensus_threshold)
            clustering = clustering.recenter(args.recentering_iterations)
            clustering.merge_small_clusters(args.min_per_cluster)
            cluster_map = parse_clusters(clustering.mapping_iterator(), cluster_key=0, sequence_key=1)
            # Create the Alignment set
            clustered_alignment = AlignmentSet(seq_records, cluster_map,
                    consensus_threshold=args.consensus_threshold)
            analysis = clustered_alignment.multiple_context_analysis(patterns, **analysis_settings)
        # write out the final clusters
        clusterout_handle = file(prefix + '.clst.csv', 'w')
        clustering.write(clusterout_handle)

    if args.interactive:
        local = copy.copy(locals())
        import hyperfreq
        local.update(dict(hyperfreq=hyperfreq,
            Alignment=Alignment,
            AlignmentSet=AlignmentSet,
            mut_pattern=mut_pattern,
            write_analysis=write_analysis))
        code.interact(local=local)

    # Write the final analysis to file
    write_analysis(analysis, prefix, pattern_names, args.quants, args.cdfs, call_only=args.call_only)
    if args.write_references:
        write_reference_seqs(alignments, prefix)

    # Closing files
    args.alignment.close()
    if args.cluster_map:
        args.cluster_map.close()