def test_19_matrix_manip(self): if ONLY and not "19" in ONLY: return if CHKTIME: t0 = time() hic_data1 = load_hic_data_from_reads("lala-map~", resolution=10000) hic_map(hic_data1, savedata="lala-map.tsv~", savefig="lala.pdf") hic_map(hic_data1, by_chrom="intra", savedata="lala-maps~", savefig="lalalo~") hic_map(hic_data1, by_chrom="inter", savedata="lala-maps~", savefig="lalala~") # slowest part of the all test: hic_data2 = read_matrix("lala-map.tsv~", resolution=10000) self.assertEqual(hic_data1, hic_data2) # vals = plot_distance_vs_interactions(hic_data1) # self.assertEqual([round(i, 2) if str(i)!="nan" else 0.0 for i in # reduce(lambda x, y: x + y, vals)], # [-1.68, -2.08, 0.02, 2.76, -8.99, 0.0, 0.82, -6.8, 0.0]) a, b = insert_sizes("lala-map~") self.assertEqual([int(a), int(b)], [43, 1033]) hic_data1 = read_matrix(PATH + "/20Kb/chrT/chrT_A.tsv", resolution=20000) hic_data2 = read_matrix(PATH + "/20Kb/chrT/chrT_B.tsv", resolution=20000) corr = correlate_matrices(hic_data1, hic_data2) corr = [round(i, 3) for i in corr[0]] self.assertEqual(corr, [ 0.755, 0.729, 0.804, 0.761, 0.789, 0.776, 0.828, 0.757, 0.797, 0.832 ]) ecorr = eig_correlate_matrices(hic_data1, hic_data2, savefig='lala3.pdf') ecorr = [round(i, 3) for i in reduce(lambda x, y: x + y, ecorr)] self.assertEqual(ecorr, [ 0.997, 0.322, 0.442, 0.017, 0.243, 0.014, 0.321, 0.999, 0.01, 0.006, 0.0, 0.007, 0.451, 0.012, 0.996, 0.031, 0.013, 0.004, 0.002, 0.006, 0.029, 0.974, 0.076, 0.03, 0.219, 0.013, 0.031, 0.08, 0.974, 0.018, 0.028, 0.004, 0.0, 0.028, 0.034, 0.89 ]) system("rm -rf lala*") if CHKTIME: self.assertEqual(True, True) print "19", time() - t0
def test_19_matrix_manip(self): if ONLY and ONLY != '19': return if CHKTIME: t0 = time() hic_data1 = load_hic_data_from_reads('lala-map~', resolution=10000) hic_map(hic_data1, savedata='lala-map.tsv~', savefig='lala.pdf~') hic_map(hic_data1, by_chrom='intra', savedata='lala-maps~', savefig='lalalo~') hic_map(hic_data1, by_chrom='inter', savedata='lala-maps~', savefig='lalala~') # slowest part of the all test: hic_data2 = read_matrix('lala-map.tsv~', resolution=10000) self.assertEqual(hic_data1, hic_data2) vals = plot_distance_vs_interactions(hic_data1) self.assertEqual([ round(i, 2) if str(i) != 'nan' else 0.0 for i in reduce(lambda x, y: x + y, vals) ], [-1.68, -2.08, 0.02, 2.76, -8.99, 0.0, 0.82, -6.8, 0.0]) a, b = insert_sizes('lala-map~') self.assertEqual([int(a), int(b)], [43, 1033]) hic_data1 = read_matrix('20Kb/chrT/chrT_A.tsv', resolution=20000) hic_data2 = read_matrix('20Kb/chrT/chrT_B.tsv', resolution=20000) corr = correlate_matrices(hic_data1, hic_data2) corr = [round(i, 3) for i in corr[0]] self.assertEqual(corr, [ 0.755, 0.729, 0.804, 0.761, 0.789, 0.776, 0.828, 0.757, 0.797, 0.832 ]) ecorr = eig_correlate_matrices(hic_data1, hic_data2) ecorr = [round(i, 3) for i in reduce(lambda x, y: x + y, ecorr)] self.assertEqual(ecorr, [ 0.997, 0.322, 0.442, 0.017, 0.243, 0.014, 0.321, 0.999, 0.01, 0.006, 0.0, 0.007, 0.451, 0.012, 0.996, 0.031, 0.013, 0.004, 0.002, 0.006, 0.029, 0.974, 0.076, 0.03, 0.219, 0.013, 0.031, 0.08, 0.974, 0.018, 0.028, 0.004, 0.0, 0.028, 0.034, 0.89 ]) system('rm -rf lala*') if CHKTIME: self.assertEqual(True, True) print '19', time() - t0
def tadbit(x, n_cpus=None, verbose=True, max_tad_size="auto", no_heuristic=False, get_weights=False): """ The tadbit algorithm works on raw chromosome interaction count data. Not only is normalization not necessary, it is also not recommended since the data is assumed to be discrete counts. Tadbit is a breakpoint detection algorithm that returns the optimal segmentation of the chromosome under BIC-penalized likelihood. The model assumes that counts have a Poisson distribution and that the expected value of the counts decreases like a power-law with the linear distance on the chromosome. This expected value of the counts at position (i,j) is corrected by the counts at diagonal positions (i,i) and (j,j). This normalizes for different restriction enzynme site densities and 'mappability' of the reads in case a bin contains repeated regions. :param x: A square matrix of interaction counts in hi-C data or a list of such matrices for replicated experiments. The counts must be evenly sampled and not normalized. x might be either a list of list, a path to a file or a file handler :param None n_cpus: The number of CPUs to allocate to tadbit. The value default is the total number of CPUs minus 1. :param auto max_tad_size: an integer defining maximum size of TAD. Default (auto) defines it to the number of rows/columns. :param False no_heuristic: whether to use or not some heuristics :param False get_weights: either to return the weights corresponding to the Hi-C count (weights are a normalization dependent of the count of each columns). :returns: the :py:func:`list` of topologically associated domains' boundaries, and the corresponding list associated log likelihoods. Depending on the value of the get_weights parameter, may also return weights. """ nums, size = read_matrix(x) max_tad_size = size if max_tad_size is "auto" else max_tad_size _, nbks, passages, _, _, bkpts, weights = \ _tadbit_wrapper(nums, # list of lists representing matrices size, # size of one row/column len(nums), # number of matrices n_cpus or 0, # number of threads int(verbose), # verbose 0/1 max_tad_size, # max_tad_size int(no_heuristic) # heuristic 0/1 ) breaks = [i for i in xrange(size) if bkpts[i + nbks * size] == 1] scores = [p for p in passages if p > 0] result = {'start': [], 'end' : [], 'score': []} for brk in xrange(len(breaks)+1): result['start'].append((breaks[brk-1] + 1) if brk > 0 else 0) result['end' ].append(breaks[brk] if brk < len(breaks) else size - 1) result['score'].append(scores[brk] if brk < len(breaks) else None) if get_weights: return result, weights return result
def test_19_matrix_manip(self): if ONLY and ONLY != '19': return if CHKTIME: t0 = time() hic_data1 = load_hic_data_from_reads('lala-map~', resolution=10000) hic_map(hic_data1, savedata='lala-map.tsv~', savefig='lala.pdf~') hic_map(hic_data1, by_chrom='intra', savedata='lala-maps~', savefig='lalalo~') hic_map(hic_data1, by_chrom='inter', savedata='lala-maps~', savefig='lalala~') # slowest part of the all test: hic_data2 = read_matrix('lala-map.tsv~', resolution=10000) self.assertEqual(hic_data1, hic_data2) vals = plot_distance_vs_interactions(hic_data1) self.assertEqual([round(i, 2) if str(i)!='nan' else 0.0 for i in reduce(lambda x, y: x + y, vals)], [-1.74, 4.2, 0.52, 1.82, -0.44, 0.0, -0.5, 2.95, 0.0]) a, b = insert_sizes('lala-map~') self.assertEqual([int(a),int(b)], [43, 1033]) hic_data1 = read_matrix('20Kb/chrT/chrT_A.tsv', resolution=20000) hic_data2 = read_matrix('20Kb/chrT/chrT_B.tsv', resolution=20000) corr = correlate_matrices(hic_data1, hic_data2) corr = [round(i,3) for i in corr[0]] self.assertEqual(corr, [0.755, 0.729, 0.804, 0.761, 0.789, 0.776, 0.828, 0.757, 0.797, 0.832]) ecorr = eig_correlate_matrices(hic_data1, hic_data2) ecorr = [round(i,3) for i in reduce(lambda x, y:x+y, ecorr)] self.assertEqual(ecorr, [0.997, 0.322, 0.442, 0.017, 0.243, 0.014, 0.321, 0.999, 0.01, 0.006, 0.0, 0.007, 0.451, 0.012, 0.996, 0.031, 0.013, 0.004, 0.002, 0.006, 0.029, 0.974, 0.076, 0.03, 0.219, 0.013, 0.031, 0.08, 0.974, 0.018, 0.028, 0.004, 0.0, 0.028, 0.034, 0.89]) system('rm -rf lala*') if CHKTIME: self.assertEqual(True, True) print '19', time() - t0
def load_hic_matrix_data(self, norm=True): """ Load the interactions from Hi-C adjacency matrix into the HiC-Data data type """ if norm == True: # Dump the data pre-normalized adj_list = self.parsed_reads_dir + '/adjlist_map.tsv' else: adj_list = self.parsed_reads_dir + '/adjlist_map_norm.tsv' self.hic_data = read_matrix(adj_list, resolution=self.resolution)
def load_experiment(self, hic_data, parser=None, resolution=None, filter_columns=True): """ Add a Hi-C experiment to the Chromosome object. :param None hic_data: whether a file or a list of lists corresponding to the Hi-C data :param name: name of the experiment :param False force: overwrite the experiments loaded under the same name :param None parser: a parser function that returns a tuple of lists representing the data matrix and the length of a row/column. With the file example.tsv: :: chrT_001 chrT_002 chrT_003 chrT_004 chrT_001 629 164 88 105 chrT_002 86 612 175 110 chrT_003 159 216 437 105 chrT_004 100 111 146 278 the output of parser('example.tsv') would be: ``[([629, 86, 159, 100, 164, 612, 216, 111, 88, 175, 437, 146, 105, 110, 105, 278]), 4]`` :param None resolution: resolution of the experiment in the file; it will be adjusted to the resolution of the experiment. By default the file is expected to contain a Hi-C experiment with the same resolution as the :class:`pytadbit.Experiment` created, and no change is made :param True filter_columns: filter the columns with unexpectedly high content of low values """ nums, size = read_matrix(hic_data, parser=parser) self.hic_data = nums self.size = size resolution = resolution or self.resolution self.set_resolution(resolution, keep_original=False) # self._zeros = [int(pos) for pos, raw in enumerate( # xrange(0, self.size**2, self.size)) # if sum(self.hic_data[0][raw:raw + self.size]) <= 100] if filter_columns: self._zeros = hic_filtering_for_modelling(self.get_hic_matrix())
def add_experiment(self, f_name, name, force=False): """ Add Hi-C experiment to Chromosome """ nums, size = read_matrix(f_name) if name in self.experiments: if "hi-c" in self.experiments[name] and not force: raise Exception( """Hi-C data already loaded under the name: {}. Force loading or use an other name.\n""".format( name ) ) self.experiments[name]["hi-c"] = nums self.experiments[name]["size"] = size else: self.experiments[name] = {"hi-c": nums, "size": size, "tads": None, "brks": None, "wght": None}
def load_hic_data(self, hic_data, parser=None, wanted_resolution=None, data_resolution=None, filter_columns=True): """ Add a Hi-C experiment to the Chromosome object. :param None hic_data: whether a file or a list of lists corresponding to the Hi-C data :param name: name of the experiment :param False force: overwrite the experiments loaded under the same name :param None parser: a parser function that returns a tuple of lists representing the data matrix and the length of a row/column. With the file example.tsv: :: chrT_001 chrT_002 chrT_003 chrT_004 chrT_001 629 164 88 105 chrT_002 86 612 175 110 chrT_003 159 216 437 105 chrT_004 100 111 146 278 the output of parser('example.tsv') would be: ``[([629, 86, 159, 100, 164, 612, 216, 111, 88, 175, 437, 146, 105, 110, 105, 278]), 4]`` :param None resolution: resolution of the experiment in the file; it will be adjusted to the resolution of the experiment. By default the file is expected to contain a Hi-C experiment with the same resolution as the :class:`pytadbit.Experiment` created, and no change is made :param True filter_columns: filter the columns with unexpectedly high content of low values """ nums, size = read_matrix(hic_data, parser=parser) self.hic_data = nums self.size = size self._ori_resolution = self.resolution = data_resolution or self._ori_resolution wanted_resolution = wanted_resolution or self.resolution self.set_resolution(wanted_resolution, keep_original=False) # self._zeros = [int(pos) for pos, raw in enumerate( # xrange(0, self.size**2, self.size)) # if sum(self.hic_data[0][raw:raw + self.size]) <= 100] if filter_columns: self._zeros = hic_filtering_for_modelling(self.get_hic_matrix())
def load_experiment(self, handler, parser=None, resolution=None): """ Add Hi-C experiment to Chromosome :param f_name: path to tsv file :param name: name of the experiment :param False force: overwrite experiments loaded under the same name :param None parser: a parser function that returns a tuple of lists representing the data matrix, and the length of a row/column, with this file example.tsv: :: chrT_001 chrT_002 chrT_003 chrT_004 chrT_001 629 164 88 105 chrT_002 86 612 175 110 chrT_003 159 216 437 105 chrT_004 100 111 146 278 the output of parser('example.tsv') might be: ``[([629, 86, 159, 100, 164, 612, 216, 111, 88, 175, 437, 146, 105, 110, 105, 278]), 4]`` :param None resolution: resolution of the experiment in the file, it will be adjusted to the resolution of the experiment. By default the file is expected to contain an hi-c experiment at the same resolution as the :class:`pytadbit.Experiment` created, and no change is made. """ nums, size = read_matrix(handler, parser=parser) self.hic_data = nums self.size = size resolution = resolution or self.resolution self.set_resolution(resolution, keep_original=False) # self._zeros = [int(pos) for pos, raw in enumerate( # xrange(0, self.size**2, self.size)) # if sum(self.hic_data[0][raw:raw + self.size]) <= 100] self._zeros = hic_filtering_for_modelling(self.get_hic_matrix())
def generate_tads(self, chrom): """ Uses TADbit to generate the TAD borders based on the computed hic_data """ from pytadbit import Chromosome exptName = self.library + "_" + str( self.resolution) + "_" + str(chrom) + "-" + str(chrom) fname = self.parsed_reads_dir + '/adjlist_map_' + str( chrom) + '-' + str(chrom) + '_' + str(self.resolution) + '.tsv' chr_hic_data = read_matrix(fname, resolution=int(self.resolution)) my_chrom = Chromosome(name=exptName, centromere_search=True) my_chrom.add_experiment(exptName, hic_data=chr_hic_data, resolution=int(self.resolution)) # Run core TADbit function to find TADs on each expt. # For the current dataset required 61GB of RAM my_chrom.find_tad(exptName, n_cpus=15) exp = my_chrom.experiments[exptName] tad_file = self.library_dir + exptName + '_tads.tsv' exp.write_tad_borders(savedata=tad_file)
def tadbit(x, remove=None, n_cpus=1, verbose=True, max_tad_size="max", no_heuristic=0, use_topdom=False, topdom_window=5, **kwargs): """ The TADbit algorithm works on raw chromosome interaction count data. The normalization is neither necessary nor recommended, since the data is assumed to be discrete counts. TADbit is a breakpoint detection algorithm that returns the optimal segmentation of the chromosome under BIC-penalized likelihood. The model assumes that counts have a Poisson distribution and that the expected value of the counts decreases like a power-law with the linear distance on the chromosome. This expected value of the counts at position (i,j) is corrected by the counts at diagonal positions (i,i) and (j,j). This normalizes for different restriction enzyme site densities and 'mappability' of the reads in case a bin contains repeated regions. :param x: a square matrix of interaction counts in the HI-C data or a list of such matrices for replicated experiments. The counts must be evenly sampled and not normalized. x might be either a list of list, a path to a file or a file handler :argument 'visibility' norm: kind of normalization to use. Choose between 'visibility' of 'Imakaev' :argument None remove: a python list of lists of booleans mapping positively columns to remove (if None only columns with a 0 in the diagonal will be removed) :param 1 n_cpus: The number of CPUs to allocate to TADbit. If n_cpus='max' the total number of CPUs will be used :param auto max_tad_size: an integer defining maximum size of TAD. Default (auto or max) defines it as the number of rows/columns :param False no_heuristic: whether to use or not some heuristics :param False use_topdom: whether to use TopDom algorithm to find tads or not (http://www.ncbi.nlm.nih.gov/pubmed/26704975, http://zhoulab.usc.edu/TopDom/) :param 5 topdom_window: the window size for topdom algorithm :param False get_weights: either to return the weights corresponding to the Hi-C count (weights are a normalization dependent of the count of each columns) :returns: the :py:func:`list` of topologically associated domains' boundaries, and the corresponding list associated log likelihoods. If no weights are given, it may also return calculated weights. """ nums = [hic_data for hic_data in read_matrix(x, one=False)] if not use_topdom: size = len(nums[0]) nums = [num.get_as_tuple() for num in nums] if not remove: # if not given just remove columns with zero in diagonal remove = tuple( [0 if nums[0][i * size + i] else 1 for i in xrange(size)]) n_cpus = n_cpus if n_cpus != 'max' else 0 max_tad_size = size if max_tad_size in ["max", "auto" ] else max_tad_size _, nbks, passages, _, _, bkpts = \ _tadbit_wrapper(nums, # list of lists of Hi-C data remove, # list of columns marking filtered size, # size of one row/column len(nums), # number of matrices n_cpus, # number of threads int(verbose), # verbose 0/1 max_tad_size, # max_tad_size kwargs.get('ntads', -1) + 1, int(no_heuristic),# heuristic 0/1 ) breaks = [i for i in xrange(size) if bkpts[i + nbks * size] == 1] scores = [p for p in passages if p > 0] result = {'start': [], 'end': [], 'score': []} for brk in xrange(len(breaks) + 1): result['start'].append((breaks[brk - 1] + 1) if brk > 0 else 0) result['end'].append(breaks[brk] if brk < len(breaks) else size - 1) result['score'].append(scores[brk] if brk < len(breaks) else None) else: result = {'start': [], 'end': [], 'score': [], 'tag': []} ret = TopDom(nums[0], window_size=topdom_window) for key in sorted(ret): result['tag'].append(ret[key]['tag']) result['start'].append(ret[key]['start']) result['end'].append(ret[key]['end']) if ret[key]['tag'] == 'domain': result['score'].append(ret[key]['score']) else: result['score'].append(0) max_score = max(result['score']) for i in xrange(len(result['score'])): result['score'][i] = 1 - int((result['score'][i] / max_score) * 10) return result
def tadbit(x, remove=None, n_cpus=1, verbose=True, max_tad_size="max", no_heuristic=0, use_topdom=False, topdom_window=5, **kwargs): """ The TADbit algorithm works on raw chromosome interaction count data. The normalization is neither necessary nor recommended, since the data is assumed to be discrete counts. TADbit is a breakpoint detection algorithm that returns the optimal segmentation of the chromosome under BIC-penalized likelihood. The model assumes that counts have a Poisson distribution and that the expected value of the counts decreases like a power-law with the linear distance on the chromosome. This expected value of the counts at position (i,j) is corrected by the counts at diagonal positions (i,i) and (j,j). This normalizes for different restriction enzyme site densities and 'mappability' of the reads in case a bin contains repeated regions. :param x: a square matrix of interaction counts in the HI-C data or a list of such matrices for replicated experiments. The counts must be evenly sampled and not normalized. x might be either a list of list, a path to a file or a file handler :argument 'visibility' norm: kind of normalization to use. Choose between 'visibility' of 'Imakaev' :argument None remove: a python list of lists of booleans mapping positively columns to remove (if None only columns with a 0 in the diagonal will be removed) :param 1 n_cpus: The number of CPUs to allocate to TADbit. If n_cpus='max' the total number of CPUs will be used :param auto max_tad_size: an integer defining maximum size of TAD. Default (auto or max) defines it as the number of rows/columns :param False no_heuristic: whether to use or not some heuristics :param False use_topdom: whether to use TopDom algorithm to find tads or not (http://www.ncbi.nlm.nih.gov/pubmed/26704975, http://zhoulab.usc.edu/TopDom/) :param 5 topdom_window: the window size for topdom algorithm :param False get_weights: either to return the weights corresponding to the Hi-C count (weights are a normalization dependent of the count of each columns) :returns: the :py:func:`list` of topologically associated domains' boundaries, and the corresponding list associated log likelihoods. If no weights are given, it may also return calculated weights. """ nums = [hic_data for hic_data in read_matrix(x, one=False)] if not use_topdom: size = len(nums[0]) nums = [num.get_as_tuple() for num in nums] if not remove: # if not given just remove columns with zero in diagonal remove = tuple([0 if nums[0][i*size+i] else 1 for i in xrange(size)]) n_cpus = n_cpus if n_cpus != 'max' else 0 max_tad_size = size if max_tad_size in ["max", "auto"] else max_tad_size _, nbks, passages, _, _, bkpts = \ _tadbit_wrapper(nums, # list of lists of Hi-C data remove, # list of columns marking filtered size, # size of one row/column len(nums), # number of matrices n_cpus, # number of threads int(verbose), # verbose 0/1 max_tad_size, # max_tad_size kwargs.get('ntads', -1) + 1, int(no_heuristic),# heuristic 0/1 ) breaks = [i for i in xrange(size) if bkpts[i + nbks * size] == 1] scores = [p for p in passages if p > 0] result = {'start': [], 'end' : [], 'score': []} for brk in xrange(len(breaks)+1): result['start'].append((breaks[brk-1] + 1) if brk > 0 else 0) result['end' ].append(breaks[brk] if brk < len(breaks) else size - 1) result['score'].append(scores[brk] if brk < len(breaks) else None) else: result = {'start': [], 'end' : [], 'score': [], 'tag': []} ret = TopDom(nums[0],window_size=topdom_window) for key in sorted(ret): result['tag'].append(ret[key]['tag']) result['start'].append(ret[key]['start']) result['end'].append(ret[key]['end']) if ret[key]['tag'] == 'domain': result['score'].append(ret[key]['score']) else: result['score'].append(0) max_score = max(result['score']) for i in xrange(len(result['score'])): result['score'][i] = 1-int((result['score'][i]/max_score)*10) return result
def tadbit(x, n_cpus=1, verbose=True, max_tad_size="max", no_heuristic=False, get_weights=False, use_visibility=False): """ The TADBit algorithm works on raw chromosome interaction count data. The normalization is neither necessary nor recommended, since the data is assumed to be discrete counts. TADBit is a breakpoint detection algorithm that returns the optimal segmentation of the chromosome under BIC-penalized likelihood. The model assumes that counts have a Poisson distribution and that the expected value of the counts decreases like a power-law with the linear distance on the chromosome. This expected value of the counts at position (i,j) is corrected by the counts at diagonal positions (i,i) and (j,j). This normalizes for different restriction enzynme site densities and 'mappability' of the reads in case a bin contains repeated regions. :param x: a square matrix of interaction counts in the HI-C data or a list of such matrices for replicated experiments. The counts must be evenly sampled and not normalized. x might be either a list of list, a path to a file or a file handler :param 1 n_cpus: The number of CPUs to allocate to TADBit. If n_cpus='max' the total number of CPUs will be used :param auto max_tad_size: an integer defining maximum size of TAD. Default (auto) defines it as the number of rows/columns :param False no_heuristic: whether to use or not some heuristics :param False get_weights: either to return the weights corresponding to the Hi-C count (weights are a normalization dependent of the count of each columns) :returns: the :py:func:`list` of topologically associated domains' boundaries, and the corresponding list associated log likelihoods. Depending on the value of the get_weights parameter, may also return weights. """ nums, size = read_matrix(x) n_cpus = n_cpus if n_cpus != 'max' else 0 max_tad_size = size if max_tad_size is "auto" else max_tad_size _, nbks, passages, _, _, bkpts, weights = \ _tadbit_wrapper(nums, # list of lists representing matrices size, # size of one row/column len(nums), # number of matrices n_cpus, # number of threads int(verbose), # verbose 0/1 max_tad_size, # max_tad_size int(no_heuristic), # heuristic 0/1 int(use_visibility) # TODO: remove this ) breaks = [i for i in xrange(size) if bkpts[i + nbks * size] == 1] scores = [p for p in passages if p > 0] result = {'start': [], 'end' : [], 'score': []} for brk in xrange(len(breaks)+1): result['start'].append((breaks[brk-1] + 1) if brk > 0 else 0) result['end' ].append(breaks[brk] if brk < len(breaks) else size - 1) result['score'].append(scores[brk] if brk < len(breaks) else None) if get_weights: # in tadbit we are not using directly weights, but the # multiplication by the real value tadbit_weights = [[i/j if j else 0.0 for i, j in zip(nums[k], weights[k])] for k in xrange(len(nums))] return result, tadbit_weights return result