コード例 #1
0
ファイル: chromosome.py プロジェクト: Tong-Chen/tadbit
    def find_tad(self, experiments, weights=None, name=None, n_cpus=1,
                 verbose=True, max_tad_size="max", heuristic=True,
                 batch_mode=False, **kwargs):
        """
        Call the :func:`pytadbit.tadbit.tadbit` function to calculate the
        position of Topologically Associated Domain boundaries
        
        :param experiment: A square matrix of interaction counts of Hi-C
           data or a list of such matrices for replicated experiments. The
           counts must be evenly sampled and not normalized. 'experiment'
           can be either a list of lists, a path to a file or a file handler
        :param True normalized: if False simple normalization will be computed,
           as well as a simple column filtering will be applied (remove columns
           where value at the diagonal is null)
        :param 1 n_cpus: The number of CPUs to allocate to TADbit. If
           n_cpus='max' the total number of CPUs will be used
        :param max max_tad_size: an integer defining the maximum size of a 
           TAD. Default (auto) defines it as the number of rows/columns
        :param True heuristic: whether to use or not some heuristics
        :param False batch_mode: if True, all the experiments will be 
           concatenated into one for the search of TADs. The resulting TADs 
           found are stored under the name 'batch' plus a concatenation of the
           experiment names passed (e.g.: if experiments=['exp1', 'exp2'], the
           name would be: 'batch_exp1_exp2').

        """
        experiments = experiments or self.experiments
        if not isinstance(experiments, list):
            experiments = [experiments]
        xprs = []
        for xpr in experiments:
            if not isinstance(xpr, Experiment):
                xpr = self.get_experiment(xpr)
            xprs.append(xpr)
            # if normalized and (not xpr._zeros or not xpr._normalization):
            #     raise Exception('ERROR: Experiments should be normalized, and' +
            #                     ' filtered first')
        if len(xprs) <= 1 and batch_mode:
            raise Exception('ERROR: batch_mode implies that more than one ' +
                            'experiment is passed')
        if batch_mode:
            matrix = []
            if not name:
                name = 'batch'
            resolution = xprs[0].resolution
            for xpr in sorted(xprs, key=lambda x: x.name):
                if xpr.resolution != resolution:
                    raise Exception('All Experiments must have the same ' +
                                    'resolution\n')
                matrix.append(xpr.hic_data[0])
                if name.startswith('batch'):
                    name += '_' + xpr.name
            siz = xprs[0].size
            tmp = reduce(lambda x, y: x.__add__(y, silent=True), xprs)
            tmp.filter_columns(silent=kwargs.get('silent', False))
            remove = tuple([1 if i in tmp._zeros else 0
                            for i in xrange(siz)])
            result = tadbit(matrix,
                            remove=remove,
                            n_cpus=n_cpus, verbose=verbose,
                            max_tad_size=max_tad_size,
                            no_heuristic=not heuristic, **kwargs)
            xpr = Experiment(name, resolution, hic_data=matrix,
                             tad_def=result, **kwargs)
            xpr._zeros = xprs[0]._zeros
            for other in xprs[1:]:
                xpr._zeros = dict([(k, None) for k in
                                   set(xpr._zeros.keys()).intersection(
                                       other._zeros.keys())])
            self.add_experiment(xpr)
            return
        for xpr in xprs:
            result = tadbit(
                xpr.hic_data,
                remove=tuple([1 if i in xpr._zeros else 0 for i in
                              xrange(xpr.size)]),
                n_cpus=n_cpus, verbose=verbose,
                max_tad_size=max_tad_size,
                no_heuristic=not heuristic, **kwargs)
            xpr.load_tad_def(result)
            self._get_forbidden_region(xpr)
コード例 #2
0
    def find_tad(self, experiments, name=None, n_cpus=1,
                 verbose=True, max_tad_size="max", heuristic=True,
                 batch_mode=False, **kwargs):
        """
        Call the :func:`pytadbit.tadbit.tadbit` function to calculate the
        position of Topologically Associated Domain boundaries

        :param experiment: A square matrix of interaction counts of Hi-C
           data or a list of such matrices for replicated experiments. The
           counts must be evenly sampled and not normalized. 'experiment'
           can be either a list of lists, a path to a file or a file handler
        :param True normalized: if False simple normalization will be computed,
           as well as a simple column filtering will be applied (remove columns
           where value at the diagonal is null)
        :param 1 n_cpus: The number of CPUs to allocate to TADbit. If
           n_cpus='max' the total number of CPUs will be used
        :param max max_tad_size: an integer defining the maximum size of a
           TAD. Default (auto) defines it as the number of rows/columns
        :param True heuristic: whether to use or not some heuristics
        :param False batch_mode: if True, all the experiments will be
           concatenated into one for the search of TADs. The resulting TADs
           found are stored under the name 'batch' plus a concatenation of the
           experiment names passed (e.g.: if experiments=['exp1', 'exp2'], the
           name would be: 'batch_exp1_exp2').

        """
        experiments = experiments or self.experiments
        if not isinstance(experiments, list):
            experiments = [experiments]
        xprs = []
        for xpr in experiments:
            if not isinstance(xpr, Experiment):
                xpr = self.get_experiment(xpr)
            xprs.append(xpr)
            # if normalized and (not xpr._zeros or not xpr._normalization):
            #     raise Exception('ERROR: Experiments should be normalized, and' +
            #                     ' filtered first')
        if len(xprs) <= 1 and batch_mode:
            raise Exception('ERROR: batch_mode implies that more than one ' +
                            'experiment is passed')
        if batch_mode:
            matrix = []
            if not name:
                name = 'batch'
            resolution = xprs[0].resolution
            for xpr in sorted(xprs, key=lambda x: x.name):
                if xpr.resolution != resolution:
                    raise Exception('All Experiments must have the same ' +
                                    'resolution\n')
                matrix.append(xpr.hic_data[0])
                if name.startswith('batch'):
                    name += '_' + xpr.name
            siz = xprs[0].size
            tmp = reduce(lambda x, y: x.__add__(y, silent=True), xprs)
            tmp.filter_columns(silent=kwargs.get('silent', False))
            remove = tuple([1 if i in tmp._zeros else 0
                            for i in range(siz)])
            result = tadbit(matrix,
                            remove=remove,
                            n_cpus=n_cpus, verbose=verbose,
                            max_tad_size=max_tad_size,
                            no_heuristic=not heuristic, **kwargs)
            xpr = Experiment(name, resolution, hic_data=matrix,
                             tad_def=result, **kwargs)
            xpr._zeros = xprs[0]._zeros
            for other in xprs[1:]:
                xpr._zeros = dict([(k, None) for k in
                                   set(xpr._zeros.keys()).intersection(
                                       list(other._zeros.keys()))])
            self.add_experiment(xpr)
            return
        for xpr in xprs:
            result = tadbit(
                xpr.hic_data,
                remove=tuple([1 if i in xpr._zeros else 0 for i in
                              range(xpr.size)]),
                n_cpus=n_cpus, verbose=verbose,
                max_tad_size=max_tad_size,
                no_heuristic=not heuristic, **kwargs)
            xpr.load_tad_def(result)
            self._get_forbidden_region(xpr)