Ejemplo n.º 1
0
 def _concordance_counts_by_axis(r1, r2, groups, num_groups, axis):
     '''Calculate #concordances, #discordances by axis (0=SNP, 1=sample).'''
     # print np.concatenate((g1[groups], g2[groups]), axis=1)
     groups = groups[axis]
     concordant, has_data = util.sum_by_group((r1 == r2).astype(np.byte), groups)
     discordant, _ = util.sum_by_group((r1 != r2).astype(np.byte), groups)
     c = np.zeros((num_groups,), dtype=np.float)
     c[has_data] = concordant
     d = np.zeros((num_groups,), dtype=np.float)
     d[has_data] = discordant
     return has_data, c, d, concordant, discordant
Ejemplo n.º 2
0
def concordance(g1, g2, data_filter=CALLED, samples=None):
    '''Calculate the concordance (# of filled entries that agree) between two genotype data arrays g1,g2.
    Returns an array of SNP indices that have data for comparison, and the corresponding concordance rates.'''
    if samples is not None: g1, g2 = g1[:, samples, :], g2[:, samples, :]
    r1, r2, groups = recode_single(g1, g2, data_filter)
    # print np.concatenate((g1[groups], g2[groups]), axis=1)
    groups = groups[0]
    concordant, snps = util.sum_by_group((r1 == r2).astype(np.byte), groups)
    discordant, _ = util.sum_by_group((r1 != r2).astype(np.byte), groups)
    c = np.zeros((max(groups) + 1,), dtype=np.float)
    c[snps] = 1.0 * concordant / (concordant + discordant)
#    return snps, 1.0 * concordant / (concordant + discordant)
    return c
Ejemplo n.º 3
0
def concordance(g1, g2, data_filter=im.imputation.istat.CALLED, samples=None):
    '''Calculate the concordance (# of filled entries that agree) between two genotype data arrays g1,g2.
    Returns an array of SNP indices that have data for comparison, and the corresponding concordance rates.'''
    if samples is not None: g1, g2 = g1[:, samples, :], g2[:, samples, :]
    r1, r2, groups = recode_single(g1, g2, data_filter)
    # print np.concatenate((g1[groups], g2[groups]), axis=1)
    groups = groups[0]
    concordant, snps = util.sum_by_group((r1 == r2).astype(np.byte), groups)
    discordant, _ = util.sum_by_group((r1 != r2).astype(np.byte), groups)
    c = np.zeros((max(groups) + 1,), dtype=np.float)
    c[snps] = 1.0 * concordant / (concordant + discordant)
#    return snps, 1.0 * concordant / (concordant + discordant)
    return c
Ejemplo n.º 4
0
 def _genotype_call_rate(self, axis, data_filter=CALLED, samples=None, snps=None):
     '''Return the call rate by axis (0=SNP, 1=sample).'''
     rg, _, called, num_groups = self._restrict_arrays(data_filter, samples, snps)
     num_called, has_data = util.sum_by_group(np.ones_like(rg).astype(np.byte), called[axis])
     call_rate = np.zeros((num_groups[axis],), dtype=np.float)
     call_rate[has_data] = (1.0 * num_called) / num_groups[1 - axis]
     return call_rate
Ejemplo n.º 5
0
 def _call_counts_by_axis(self, axis, data_filter=CALLED, samples=None, snps=None):
     '''Return the call counts (called count, all count) by axis (0=SNP, 1=sample).'''
     _, ri, called, num_groups = self._restrict_arrays(data_filter, samples, snps)
     num_called, has_data = util.sum_by_group(np.ones_like(ri).astype(np.byte), called[axis])
     call_rate = np.zeros((num_groups[axis],), dtype=np.float)
     num_all = num_groups[1 - axis]
     call_rate[has_data] = (1.0 * num_called) / num_all
     # return has_data, num_called, num_all
     return num_called, num_all
Ejemplo n.º 6
0
def closest_relatives(sample, num=50):
    '''IBD sharing information on num closest relatives.'''
    s = np.loadtxt('/home/oren/ober/out/validation/affy/segments.%d.%d.out' % (sample, PATERNAL), dtype=int)
    values, groups = util.sum_by_group(s[:, 3] - s[:, 2], 2 * s[:, 4] + s[:, 5])  # hash haps columns 4 and 5
    j = np.argsort(values)[-1::-1][:num]
    return np.concatenate((values[j][np.newaxis].transpose() / total,
                           groups[j][np.newaxis].transpose(),
                           np.array([im.pt.lowest_common_ancestor(ped.graph, sample, relative) for relative in groups[j] / 2])),
                          axis=1)
Ejemplo n.º 7
0
def closest_relatives(sample, num=50):
    '''IBD sharing information on num closest relatives.'''
    s = np.loadtxt('/home/oren/ober/out/validation/affy/segments.%d.%d.out' %
                   (sample, PATERNAL),
                   dtype=int)
    values, groups = util.sum_by_group(s[:, 3] - s[:, 2], 2 * s[:, 4] +
                                       s[:, 5])  # hash haps columns 4 and 5
    j = np.argsort(values)[-1::-1][:num]
    return np.concatenate(
        (values[j][np.newaxis].transpose() / total,
         groups[j][np.newaxis].transpose(),
         np.array([
             im.pt.lowest_common_ancestor(ped.graph, sample, relative)
             for relative in groups[j] / 2
         ])),
        axis=1)