def _concordance_counts_by_axis(r1, r2, groups, num_groups, axis): '''Calculate #concordances, #discordances by axis (0=SNP, 1=sample).''' # print np.concatenate((g1[groups], g2[groups]), axis=1) groups = groups[axis] concordant, has_data = util.sum_by_group((r1 == r2).astype(np.byte), groups) discordant, _ = util.sum_by_group((r1 != r2).astype(np.byte), groups) c = np.zeros((num_groups,), dtype=np.float) c[has_data] = concordant d = np.zeros((num_groups,), dtype=np.float) d[has_data] = discordant return has_data, c, d, concordant, discordant
def concordance(g1, g2, data_filter=CALLED, samples=None): '''Calculate the concordance (# of filled entries that agree) between two genotype data arrays g1,g2. Returns an array of SNP indices that have data for comparison, and the corresponding concordance rates.''' if samples is not None: g1, g2 = g1[:, samples, :], g2[:, samples, :] r1, r2, groups = recode_single(g1, g2, data_filter) # print np.concatenate((g1[groups], g2[groups]), axis=1) groups = groups[0] concordant, snps = util.sum_by_group((r1 == r2).astype(np.byte), groups) discordant, _ = util.sum_by_group((r1 != r2).astype(np.byte), groups) c = np.zeros((max(groups) + 1,), dtype=np.float) c[snps] = 1.0 * concordant / (concordant + discordant) # return snps, 1.0 * concordant / (concordant + discordant) return c
def concordance(g1, g2, data_filter=im.imputation.istat.CALLED, samples=None): '''Calculate the concordance (# of filled entries that agree) between two genotype data arrays g1,g2. Returns an array of SNP indices that have data for comparison, and the corresponding concordance rates.''' if samples is not None: g1, g2 = g1[:, samples, :], g2[:, samples, :] r1, r2, groups = recode_single(g1, g2, data_filter) # print np.concatenate((g1[groups], g2[groups]), axis=1) groups = groups[0] concordant, snps = util.sum_by_group((r1 == r2).astype(np.byte), groups) discordant, _ = util.sum_by_group((r1 != r2).astype(np.byte), groups) c = np.zeros((max(groups) + 1,), dtype=np.float) c[snps] = 1.0 * concordant / (concordant + discordant) # return snps, 1.0 * concordant / (concordant + discordant) return c
def _genotype_call_rate(self, axis, data_filter=CALLED, samples=None, snps=None): '''Return the call rate by axis (0=SNP, 1=sample).''' rg, _, called, num_groups = self._restrict_arrays(data_filter, samples, snps) num_called, has_data = util.sum_by_group(np.ones_like(rg).astype(np.byte), called[axis]) call_rate = np.zeros((num_groups[axis],), dtype=np.float) call_rate[has_data] = (1.0 * num_called) / num_groups[1 - axis] return call_rate
def _call_counts_by_axis(self, axis, data_filter=CALLED, samples=None, snps=None): '''Return the call counts (called count, all count) by axis (0=SNP, 1=sample).''' _, ri, called, num_groups = self._restrict_arrays(data_filter, samples, snps) num_called, has_data = util.sum_by_group(np.ones_like(ri).astype(np.byte), called[axis]) call_rate = np.zeros((num_groups[axis],), dtype=np.float) num_all = num_groups[1 - axis] call_rate[has_data] = (1.0 * num_called) / num_all # return has_data, num_called, num_all return num_called, num_all
def closest_relatives(sample, num=50): '''IBD sharing information on num closest relatives.''' s = np.loadtxt('/home/oren/ober/out/validation/affy/segments.%d.%d.out' % (sample, PATERNAL), dtype=int) values, groups = util.sum_by_group(s[:, 3] - s[:, 2], 2 * s[:, 4] + s[:, 5]) # hash haps columns 4 and 5 j = np.argsort(values)[-1::-1][:num] return np.concatenate((values[j][np.newaxis].transpose() / total, groups[j][np.newaxis].transpose(), np.array([im.pt.lowest_common_ancestor(ped.graph, sample, relative) for relative in groups[j] / 2])), axis=1)
def closest_relatives(sample, num=50): '''IBD sharing information on num closest relatives.''' s = np.loadtxt('/home/oren/ober/out/validation/affy/segments.%d.%d.out' % (sample, PATERNAL), dtype=int) values, groups = util.sum_by_group(s[:, 3] - s[:, 2], 2 * s[:, 4] + s[:, 5]) # hash haps columns 4 and 5 j = np.argsort(values)[-1::-1][:num] return np.concatenate( (values[j][np.newaxis].transpose() / total, groups[j][np.newaxis].transpose(), np.array([ im.pt.lowest_common_ancestor(ped.graph, sample, relative) for relative in groups[j] / 2 ])), axis=1)