def smallestDistance(n1, n2): """this is inefficient but the hope is that it will be amortized by the cache in relationMap""" if n1 == n2: return 0 if n2 not in common.relationMap(n1): return None relation = common.relationMap(n1)[n2] return min(abs(h1 + h2) for h1, h2 in relation.keys())
def makePopulation(size=100000, generations=10, public_prob=0.002, public_gens=4): tree = mate.makeTree(size, generations) for node in tree[:-public_gens] | pFlatten: node.ispublic = False node.isalive = False for node in tree[-public_gens:] | pFlatten: node.ispublic = random.random() < public_prob node.isalive = True publicnodes = tree[-public_gens:] | pFlatten | Filter(lambda n: n.ispublic) | pSet for node in status.wrap(publicnodes): #warm up cache common.relationMap(node) return tree, publicnodes
def makePopulation(size=100000, generations=10, public_prob=0.002, public_gens=4): tree = mate.makeTree(size, generations) for node in tree[:-public_gens] | pFlatten: node.ispublic = False node.isalive = False for node in tree[-public_gens:] | pFlatten: node.ispublic = random.random() < public_prob node.isalive = True publicnodes = tree[-public_gens:] | pFlatten | Filter( lambda n: n.ispublic) | pSet for node in status.wrap(publicnodes): #warm up cache common.relationMap(node) return tree, publicnodes
def analyzeScores(node, scores, ibdsamples, publicnodes): result = utils.JStruct() highscores = dict( (k, s) for k, s in scores.items() if sum(s) >= sum(scores[node])) result.num_cands = len(highscores) siblinggroups = set( frozenset(k.dad.children & k.mom.children) for k in highscores) groupreps = utils.permute( [node if node in group else list(group)[0] for group in siblinggroups]) groupreps = filter( lambda r: nonmatchScore(node, r, publicnodes, scores, ibdsamples) > 0.1, groupreps) result.num_groups_unfiltered = len(siblinggroups) result.num_groups = len(groupreps) #FIXME: inefficient #result.distances = [smallestDistance(node, rep) for rep in groupreps] try: result.generations = map(lambda n: n.generation, groupreps) except AttributeError: pass result.num_relatives = sum(1 for r in common.relationMap(node) if r.ispublic) result.num_matching_relatives = len(scores[node]) result.cand_num_rel_hist = map(lambda n: len(scores[n]), groupreps) | pHist() return result
def aggregateScores(node): relmap = common.relationMap(node) scores = {} totallen = sum(1 for n in relmap if n.ispublic) ibdsamples = {} for relative, relation in relmap.iteritems(): if not relative.ispublic: continue h1, h2 = relation.keys()[0] if min(map(sum, relation.keys())) < 2: raise SiblingError ibdsample = ibdsamples[relative] = \ sampleFromPdfVector(cached.convolvedDensity(relation)) if ibdsample == 0: continue for k, s in individualScores(node, relative, relation, ibdsample).iteritems(): # total score is L-p norm scores.setdefault(k, []).append(s ** 0.5) return scores, ibdsamples
def individualScores(victim, relative, relation, sample): """sample is specified as an array index""" global _node, _rel pair = relation.keys()[0] nodeheight = pair[0]-pair[1] relmap = common.relationMap(relative, min(8, 9-nodeheight)) if victim not in relmap: _node = victim _rel = relative assert False scores = {} for possiblevictim, relation in relmap.iteritems(): h1, h2 = relation.keys()[0] if abs(h1 - h2) > 3 or min(map(sum, relation.keys())) < 2: continue pdfvector = cached.convolvedDensity(relation) scores[possiblevictim] = pdfvector[sample] if sample < len(pdfvector) else 0 meanscore = utils.mean(scores.itervalues()) maxscore = max(scores.itervalues()) return dict(((k, val / maxscore) for k, val in scores.iteritems()))
def analyzeScores(node, scores, ibdsamples, publicnodes): result = utils.JStruct() highscores = dict((k, s) for k, s in scores.items() if sum(s) >= sum(scores[node])) result.num_cands = len(highscores) siblinggroups = set(frozenset(k.dad.children & k.mom.children) for k in highscores) groupreps = utils.permute( [node if node in group else list(group)[0] for group in siblinggroups]) groupreps = filter(lambda r: nonmatchScore(node, r, publicnodes, scores, ibdsamples) > 0.1, groupreps) result.num_groups_unfiltered = len(siblinggroups) result.num_groups = len(groupreps) #FIXME: inefficient #result.distances = [smallestDistance(node, rep) for rep in groupreps] try: result.generations = map(lambda n:n.generation, groupreps) except AttributeError: pass result.num_relatives = sum(1 for r in common.relationMap(node) if r.ispublic) result.num_matching_relatives = len(scores[node]) result.cand_num_rel_hist = map(lambda n: len(scores[n]), groupreps) | pHist() return result
def aggregateScores(node): relmap = common.relationMap(node) scores = {} totallen = sum(1 for n in relmap if n.ispublic) ibdsamples = {} for relative, relation in relmap.iteritems(): if not relative.ispublic: continue h1, h2 = relation.keys()[0] if min(map(sum, relation.keys())) < 2: raise SiblingError ibdsample = ibdsamples[relative] = \ sampleFromPdfVector(cached.convolvedDensity(relation)) if ibdsample == 0: continue for k, s in individualScores(node, relative, relation, ibdsample).iteritems(): # total score is L-p norm scores.setdefault(k, []).append(s**0.5) return scores, ibdsamples
def individualScores(victim, relative, relation, sample): """sample is specified as an array index""" global _node, _rel pair = relation.keys()[0] nodeheight = pair[0] - pair[1] relmap = common.relationMap(relative, min(8, 9 - nodeheight)) if victim not in relmap: _node = victim _rel = relative assert False scores = {} for possiblevictim, relation in relmap.iteritems(): h1, h2 = relation.keys()[0] if abs(h1 - h2) > 3 or min(map(sum, relation.keys())) < 2: continue pdfvector = cached.convolvedDensity(relation) scores[possiblevictim] = pdfvector[sample] if sample < len( pdfvector) else 0 meanscore = utils.mean(scores.itervalues()) maxscore = max(scores.itervalues()) return dict(((k, val / maxscore) for k, val in scores.iteritems()))
def zeroProb(node): """prob that i.b.d. of node and cand is zero""" relation = common.relationMap(node).get(cand) return cached.convolvedDensity(relation)[0] if relation else 1.0