def find_tad(self, experiments, weights=None, name=None, n_cpus=1, verbose=True, max_tad_size="max", heuristic=True, batch_mode=False, **kwargs): """ Call the :func:`pytadbit.tadbit.tadbit` function to calculate the position of Topologically Associated Domain boundaries :param experiment: A square matrix of interaction counts of Hi-C data or a list of such matrices for replicated experiments. The counts must be evenly sampled and not normalized. 'experiment' can be either a list of lists, a path to a file or a file handler :param True normalized: if False simple normalization will be computed, as well as a simple column filtering will be applied (remove columns where value at the diagonal is null) :param 1 n_cpus: The number of CPUs to allocate to TADbit. If n_cpus='max' the total number of CPUs will be used :param max max_tad_size: an integer defining the maximum size of a TAD. Default (auto) defines it as the number of rows/columns :param True heuristic: whether to use or not some heuristics :param False batch_mode: if True, all the experiments will be concatenated into one for the search of TADs. The resulting TADs found are stored under the name 'batch' plus a concatenation of the experiment names passed (e.g.: if experiments=['exp1', 'exp2'], the name would be: 'batch_exp1_exp2'). """ experiments = experiments or self.experiments if not isinstance(experiments, list): experiments = [experiments] xprs = [] for xpr in experiments: if not isinstance(xpr, Experiment): xpr = self.get_experiment(xpr) xprs.append(xpr) # if normalized and (not xpr._zeros or not xpr._normalization): # raise Exception('ERROR: Experiments should be normalized, and' + # ' filtered first') if len(xprs) <= 1 and batch_mode: raise Exception('ERROR: batch_mode implies that more than one ' + 'experiment is passed') if batch_mode: matrix = [] if not name: name = 'batch' resolution = xprs[0].resolution for xpr in sorted(xprs, key=lambda x: x.name): if xpr.resolution != resolution: raise Exception('All Experiments must have the same ' + 'resolution\n') matrix.append(xpr.hic_data[0]) if name.startswith('batch'): name += '_' + xpr.name siz = xprs[0].size tmp = reduce(lambda x, y: x.__add__(y, silent=True), xprs) tmp.filter_columns(silent=kwargs.get('silent', False)) remove = tuple([1 if i in tmp._zeros else 0 for i in xrange(siz)]) result = tadbit(matrix, remove=remove, n_cpus=n_cpus, verbose=verbose, max_tad_size=max_tad_size, no_heuristic=not heuristic, **kwargs) xpr = Experiment(name, resolution, hic_data=matrix, tad_def=result, **kwargs) xpr._zeros = xprs[0]._zeros for other in xprs[1:]: xpr._zeros = dict([(k, None) for k in set(xpr._zeros.keys()).intersection( other._zeros.keys())]) self.add_experiment(xpr) return for xpr in xprs: result = tadbit( xpr.hic_data, remove=tuple([1 if i in xpr._zeros else 0 for i in xrange(xpr.size)]), n_cpus=n_cpus, verbose=verbose, max_tad_size=max_tad_size, no_heuristic=not heuristic, **kwargs) xpr.load_tad_def(result) self._get_forbidden_region(xpr)
def find_tad(self, experiments, name=None, n_cpus=1, verbose=True, max_tad_size="max", heuristic=True, batch_mode=False, **kwargs): """ Call the :func:`pytadbit.tadbit.tadbit` function to calculate the position of Topologically Associated Domain boundaries :param experiment: A square matrix of interaction counts of Hi-C data or a list of such matrices for replicated experiments. The counts must be evenly sampled and not normalized. 'experiment' can be either a list of lists, a path to a file or a file handler :param True normalized: if False simple normalization will be computed, as well as a simple column filtering will be applied (remove columns where value at the diagonal is null) :param 1 n_cpus: The number of CPUs to allocate to TADbit. If n_cpus='max' the total number of CPUs will be used :param max max_tad_size: an integer defining the maximum size of a TAD. Default (auto) defines it as the number of rows/columns :param True heuristic: whether to use or not some heuristics :param False batch_mode: if True, all the experiments will be concatenated into one for the search of TADs. The resulting TADs found are stored under the name 'batch' plus a concatenation of the experiment names passed (e.g.: if experiments=['exp1', 'exp2'], the name would be: 'batch_exp1_exp2'). """ experiments = experiments or self.experiments if not isinstance(experiments, list): experiments = [experiments] xprs = [] for xpr in experiments: if not isinstance(xpr, Experiment): xpr = self.get_experiment(xpr) xprs.append(xpr) # if normalized and (not xpr._zeros or not xpr._normalization): # raise Exception('ERROR: Experiments should be normalized, and' + # ' filtered first') if len(xprs) <= 1 and batch_mode: raise Exception('ERROR: batch_mode implies that more than one ' + 'experiment is passed') if batch_mode: matrix = [] if not name: name = 'batch' resolution = xprs[0].resolution for xpr in sorted(xprs, key=lambda x: x.name): if xpr.resolution != resolution: raise Exception('All Experiments must have the same ' + 'resolution\n') matrix.append(xpr.hic_data[0]) if name.startswith('batch'): name += '_' + xpr.name siz = xprs[0].size tmp = reduce(lambda x, y: x.__add__(y, silent=True), xprs) tmp.filter_columns(silent=kwargs.get('silent', False)) remove = tuple([1 if i in tmp._zeros else 0 for i in range(siz)]) result = tadbit(matrix, remove=remove, n_cpus=n_cpus, verbose=verbose, max_tad_size=max_tad_size, no_heuristic=not heuristic, **kwargs) xpr = Experiment(name, resolution, hic_data=matrix, tad_def=result, **kwargs) xpr._zeros = xprs[0]._zeros for other in xprs[1:]: xpr._zeros = dict([(k, None) for k in set(xpr._zeros.keys()).intersection( list(other._zeros.keys()))]) self.add_experiment(xpr) return for xpr in xprs: result = tadbit( xpr.hic_data, remove=tuple([1 if i in xpr._zeros else 0 for i in range(xpr.size)]), n_cpus=n_cpus, verbose=verbose, max_tad_size=max_tad_size, no_heuristic=not heuristic, **kwargs) xpr.load_tad_def(result) self._get_forbidden_region(xpr)