def cache_parameters(self):
        assert self.args.n_sets == 1  # er, could do it for n > 1, but I'd want to think through a few things first
        assert self.args.plotdir is not None

        sw_parameter_dir = self.args.parameter_dir + '/sw'
        waterer = Waterer(self.args,
                          self.input_info,
                          self.reco_info,
                          self.germline_seqs,
                          parameter_dir=sw_parameter_dir,
                          write_parameters=True,
                          plotdir=self.args.plotdir + '/sw')
        waterer.run()
        self.write_hmms(sw_parameter_dir, waterer.info['all_best_matches'])

        parameter_in_dir = sw_parameter_dir
        for ibw in range(self.args.baum_welch_iterations):
            parameter_out_dir = self.args.parameter_dir + '/hmm'
            hmm_plotdir = self.args.plotdir + '/hmm'
            if self.args.baum_welch_iterations > 1:
                parameter_out_dir += '-' + str(ibw)
                hmm_plotdir += '-' + str(ibw)
            self.run_hmm('viterbi',
                         waterer.info,
                         parameter_in_dir=parameter_in_dir,
                         parameter_out_dir=parameter_out_dir,
                         hmm_type='k=1',
                         count_parameters=True,
                         plotdir=hmm_plotdir)
            self.write_hmms(parameter_out_dir,
                            waterer.info['all_best_matches'])
            parameter_in_dir = parameter_out_dir

        if not self.args.no_clean:
            os.rmdir(self.args.workdir)
    def run_algorithm(self, algorithm):
        if not os.path.exists(self.args.parameter_dir):
            raise Exception('ERROR ' + self.args.parameter_dir + ' d.n.e')
        waterer = Waterer(self.args, self.input_info, self.reco_info, self.germline_seqs, parameter_dir=self.args.parameter_dir, write_parameters=False)
        waterer.run()

        self.run_hmm(algorithm, waterer.info, parameter_in_dir=self.args.parameter_dir, hmm_type='k=nsets', \
                     count_parameters=self.args.plot_parameters, plotdir=self.args.plotdir)

        # self.clean(waterer)
        if not self.args.no_clean:
            os.rmdir(self.args.workdir)
    def run_algorithm(self, algorithm):
        if not os.path.exists(self.args.parameter_dir):
            raise Exception('ERROR ' + self.args.parameter_dir + ' d.n.e')
        waterer = Waterer(self.args,
                          self.input_info,
                          self.reco_info,
                          self.germline_seqs,
                          parameter_dir=self.args.parameter_dir,
                          write_parameters=False)
        waterer.run()

        self.run_hmm(algorithm, waterer.info, parameter_in_dir=self.args.parameter_dir, hmm_type='k=nsets', \
                     count_parameters=self.args.plot_parameters, plotdir=self.args.plotdir)

        # self.clean(waterer)
        if not self.args.no_clean:
            os.rmdir(self.args.workdir)
    def partition(self):
        assert os.path.exists(self.args.parameter_dir)

        # run smith-waterman
        waterer = Waterer(self.args,
                          self.input_info,
                          self.reco_info,
                          self.germline_seqs,
                          parameter_dir=self.args.parameter_dir,
                          write_parameters=False)
        waterer.run()

        # cdr3 length partitioning
        cdr3_cluster = False  # don't precluster on cdr3 length for the moment -- I cannot accurately infer cdr3 length in some sequences, so I need a way to pass query seqs to the clusterer with several possible cdr3 lengths (and I don't know how to do that!)
        cdr3_length_clusters = None
        if cdr3_cluster:
            cdr3_length_clusters = self.cdr3_length_precluster(waterer)

        hamming_clusters = self.hamming_precluster(cdr3_length_clusters)
        # stripped_clusters = self.run_hmm('forward', waterer.info, self.args.parameter_dir, preclusters=hamming_clusters, stripped=True)
        hmm_clusters = self.run_hmm('forward',
                                    waterer.info,
                                    self.args.parameter_dir,
                                    preclusters=hamming_clusters,
                                    hmm_type='k=2',
                                    make_clusters=True)

        self.run_hmm('forward',
                     waterer.info,
                     self.args.parameter_dir,
                     preclusters=hmm_clusters,
                     hmm_type='k=preclusters',
                     prefix='k-',
                     make_clusters=False)
        # self.run_hmm('viterbi', waterer.info, self.args.parameter_dir, preclusters=hmm_clusters, hmm_type='k=preclusters', prefix='k-', make_clusters=False)

        # self.clean(waterer)
        if not self.args.no_clean:
            os.rmdir(self.args.workdir)
    def cache_parameters(self):
        assert self.args.n_sets == 1  # er, could do it for n > 1, but I'd want to think through a few things first
        assert self.args.plotdir is not None

        sw_parameter_dir = self.args.parameter_dir + '/sw'
        waterer = Waterer(self.args, self.input_info, self.reco_info, self.germline_seqs, parameter_dir=sw_parameter_dir, write_parameters=True, plotdir=self.args.plotdir + '/sw')
        waterer.run()
        self.write_hmms(sw_parameter_dir, waterer.info['all_best_matches'])

        parameter_in_dir = sw_parameter_dir
        for ibw in range(self.args.baum_welch_iterations):
            parameter_out_dir = self.args.parameter_dir + '/hmm'
            hmm_plotdir = self.args.plotdir + '/hmm'
            if self.args.baum_welch_iterations > 1:
                parameter_out_dir += '-' + str(ibw)
                hmm_plotdir += '-' + str(ibw)
            self.run_hmm('viterbi', waterer.info, parameter_in_dir=parameter_in_dir, parameter_out_dir=parameter_out_dir, hmm_type='k=1', count_parameters=True, plotdir=hmm_plotdir)
            self.write_hmms(parameter_out_dir, waterer.info['all_best_matches'])
            parameter_in_dir = parameter_out_dir

        if not self.args.no_clean:
            os.rmdir(self.args.workdir)
    def partition(self):
        assert os.path.exists(self.args.parameter_dir)

        # run smith-waterman
        waterer = Waterer(self.args, self.input_info, self.reco_info, self.germline_seqs, parameter_dir=self.args.parameter_dir, write_parameters=False)
        waterer.run()

        # cdr3 length partitioning
        cdr3_cluster = False  # don't precluster on cdr3 length for the moment -- I cannot accurately infer cdr3 length in some sequences, so I need a way to pass query seqs to the clusterer with several possible cdr3 lengths (and I don't know how to do that!)
        cdr3_length_clusters = None
        if cdr3_cluster:
            cdr3_length_clusters = self.cdr3_length_precluster(waterer)

        hamming_clusters = self.hamming_precluster(cdr3_length_clusters)
        # stripped_clusters = self.run_hmm('forward', waterer.info, self.args.parameter_dir, preclusters=hamming_clusters, stripped=True)
        hmm_clusters = self.run_hmm('forward', waterer.info, self.args.parameter_dir, preclusters=hamming_clusters, hmm_type='k=2', make_clusters=True)

        self.run_hmm('forward', waterer.info, self.args.parameter_dir, preclusters=hmm_clusters, hmm_type='k=preclusters', prefix='k-', make_clusters=False)
        # self.run_hmm('viterbi', waterer.info, self.args.parameter_dir, preclusters=hmm_clusters, hmm_type='k=preclusters', prefix='k-', make_clusters=False)

        # self.clean(waterer)
        if not self.args.no_clean:
            os.rmdir(self.args.workdir)