def get_json(self, params, id): id = int(id); graph.update_if_needed() # ??? The stats includes persons "Unknown" that were created during a # gedcom import for the purpose of preserving families. Will be fixed # when we store children differently (for instance in a group) distance = dict() decujus = graph.node_from_id(id) allpeople = graph.people_in_tree(id=decujus.main_id, distance=distance) persons = extended_personas( nodes=allpeople, styles=None, event_types=event_types_for_pedigree, graph=graph) f = graph.fathers(decujus.main_id) fathers = graph.people_in_tree(id=f[0], maxdepthDescendants=0) if f else [] m = graph.mothers(decujus.main_id) mothers = graph.people_in_tree(id=m[0], maxdepthDescendants=0) if m else [] cal = CalendarGregorian() generations = dict() # list of persons for each generation for a in allpeople: d = distance[a] if d not in generations: generations[d] = [] generations[d].append(a) ranges = [] for index in sorted(generations.keys()): births = None deaths = None gen_range = [index + 1, "?", "?", ""] # gen, min, max, legend for p in generations[index]: p = persons[p.main_id] if p.birth and p.birth.Date: if births is None or p.birth.Date < births: births = p.birth.Date year = p.birth.Date.year(cal) if year is not None: gen_range[1] = year if p.death and p.death.Date: if deaths is None or p.death.Date > deaths: deaths = p.death.Date year = p.death.Date.year(cal) if year is not None: gen_range[2] = year if index >= 0: gen_range[3] = "Gen. %02d (%d / %d) %s - %s" \ % (index + 1, len(generations[index]), 2 ** (index + 1), gen_range[1], gen_range[2]) else: gen_range[3] = "Desc. %02d (%d) %s - %s" \ % (-index, len(generations[index]), gen_range[1], gen_range[2]) # Postprocess the ranges: # generation n's earliest date has to be at least 15 years before # its children's earliest date (can't have children before that) # generation n's latest date (death) has to be after the children's # generation earliest date (first birth) if len(ranges) > 0: if gen_range[1] == "?": gen_range[1] = ranges[-1][1] - 15 if gen_range[2] == "?" or gen_range[2] < ranges[-1][1]: gen_range[2] = ranges[-1][1] if gen_range[2] == '?': gen_range[2] = datetime.datetime.now().year ranges.append(gen_range) ages = [] for a in range(0, 120, 5): ages.append([a, 0, 0, 0]) # date_range, males, females, unknown for p in persons.itervalues(): if p.birth and p.birth.Date and p.death and p.death.Date: age = p.death.Date.years_since(p.birth.Date) if age is not None: if p.sex == "M": ages[int(age / 5)][1] += 1 elif p.sex == "F": ages[int(age / 5)][2] += 1 else: ages[int(age / 5)][3] += 1 return { "total_ancestors": len(allpeople), "total_father": len(fathers), "total_mother": len(mothers), "total_persons": len(graph), "ranges": ranges, "ages": ages, "decujus": decujus.main_id, "decujus_name": "%s %s" % ( persons[decujus.main_id].given_name, persons[decujus.main_id].surname) }
def find_candidate(): """Find all candidate personas for a merge""" p1 = 7843 # Emmanuel Briot p2 = 1 # Emmanuel Briot p3 = 3052 # Marie HOUTEVILLE p4 = 3335 # Thomine Levesque p5 = 1311 p6 = 7842 persons = extended_personas( graph=global_graph, nodes=set([global_graph.node_from_id(p1), global_graph.node_from_id(p2), global_graph.node_from_id(p3), global_graph.node_from_id(p4), global_graph.node_from_id(p5), global_graph.node_from_id(p6)]), styles=None, query_groups=False) for p in [(p1, p2), (p2, p1), (p3, p4), (p5, p6), (p6, p5)]: score = compare(persons[p[0]], persons[p[1]]) # Get all persons from the database with a guess at their lifespan. # If we know the birth date, lifespan starts there, otherwise it starts # some years before the first event # Likewise for death date. # This results is potentially over-optimistic lifespans, but still reduces # the number of comparisons to do. # The following query (and its processing) might take a while on big # databases, but we'll need access to the whole information for persons # anyway, so we might as well query everything from the start) # number of persons: 9171 # number of queries:6 total queries time:0.26s total time:26.21s persons = extended_personas( nodes=None, styles=None, graph=global_graph, query_groups=False) # A temporary structure ordered by the first date in lifespan births = [] delta = datetime.timedelta(days=maximum_lifespan * 365) for p in persons.values(): birth = death = None if p.birth is not None: birth = p.birth.date_sort if p.death is not None: death = p.death.date_sort if birth is None or death is None: h = [a.event.date_sort for a in p.all_events.values() if a.event.date_sort is not None] if h: h.sort() birth = birth or h[0] - delta death = death or h[-1] + delta # If birth is None, that means there are no events, and we don't # really want to merge that person then. if birth: p.max_lifespan = death heapq.heappush(births, (birth, p)) # Now we traverse the list and only compare persons that were alive at # the same time (otherwise we assume they cannot be merged) # ??? We can save time by not comparing when we have already # decided in the past they can't be the same alive = [] # Each person alive at the given date comparisons = 0 same = 0 while births: date, person = heapq.heappop(births) for a in alive: if a.max_lifespan < date: alive.remove(a) elif date.year < 1970: continue else: # print "Compare %s and %s" % (person.name, a.name) comparisons += 1 score = compare(a, person) if score >= 150: print( "%d Might be the same: %d %s and %d %s, score=%d %d" % (date.year, person.id, person.name, a.id, a.name, score, compare(person, a))) same += 1 alive.append(person) # Maximum comparisons (n^2) would be: 83_302_129 # Actual comparisons with this algo: 14_635_289 print("Number of comparisons: ", comparisons) print("Possible merges: ", same)
def __get_json_sosa_tree(graph, id, max_levels, style_rules, last_descendant_known=-1, maxdepthDescendants=1, last_gen_known=-1): """ :param last_gen_known: is the number of the last generation for which the client already has data, and thus do not need to be sent again. -1 to retrieve all. :param maxdepthDescendants: The number of generations for which we compute the children. """ decujus = graph.node_from_id(id) styles = Styles(style_rules, graph, decujus=decujus.main_id) distance = dict() ancestors = graph.people_in_tree( id=decujus.main_id, maxdepthAncestors=max_levels - 1, maxdepthDescendants=0, distance=distance) ancestors = [a for a in ancestors if distance[a] >= last_gen_known] descendants = graph.people_in_tree( id=decujus.main_id, maxdepthAncestors=0, distance=distance, maxdepthDescendants=maxdepthDescendants) descendants = [ a for a in descendants if a != decujus and distance[a] >= last_descendant_known] sosa_tree = dict() marriage = dict() children = {} persons = {} all_person_nodes = set(ancestors).union(descendants) if all_person_nodes: persons = extended_personas( all_person_nodes, styles, event_types=event_types_for_pedigree, graph=graph) def add_parents(p): p.generation = distance[graph.node_from_id(p.id)] if p.generation >= max_levels: return fathers = graph.fathers(p.id) mothers = graph.mothers(p.id) p.parents = [ None if not fathers else persons.get(fathers[0].main_id, None), None if not mothers else persons.get(mothers[0].main_id, None)] for pa in p.parents: if pa: add_parents(pa) def add_children(p, gen): p.children = [] sorted = [(persons[node.main_id] if node.main_id in persons else None, node) for node in graph.children(p.id)] sorted.sort( key=lambda c: c[0].birth.Date if c[0] and c[0].birth else None) for c in sorted: if c[0]: c[0].generation = -gen # distance[c[1]] p.children.append(c[0]) if gen < maxdepthDescendants: add_children(c[0], gen + 1) main = persons[decujus.main_id] add_parents(main) add_children(main, gen=1) # We will however return a simpler version of the information computed # above (which includes all known events for the persons) show_age = False def person_to_json_for_pedigree(obj): if isinstance(obj, models.Persona): d = obj.death if show_age and obj.birth: if d: if d.Date: d.Date += " (age %s)" % ( str(d.Date.years_since(obj.birth.Date)), ) else: d = {Date: " (age %s)" % ( str(DateRange.today().years_since(obj.birth.Date)), )} return { 'id': obj.id, 'givn': obj.given_name, 'surn': obj.surname, 'sex': obj.sex, 'generation': obj.generation, 'parents': obj.parents if hasattr(obj, 'parents') else None, 'children': obj.children if hasattr(obj, 'children') else None, 'style': obj.styles, 'birth': obj.birth, 'marriage': obj.marriage, 'death': d} return to_json( obj= {'generations': max_levels, 'descendants': maxdepthDescendants, 'decujus': main, 'styles': styles.all_styles()}, custom=person_to_json_for_pedigree)
def view(request, decujus=1): """Display the statistics for a given person""" decujus = int(decujus) graph.update_if_needed() if len(graph) == 0: return render_to_response( 'geneaprove/firsttime.html', context_instance=RequestContext(request)) # ??? The stats includes persons "Unknown" that were created during a # gedcom import for the purpose of preserving families. Will be fixed # when we store children differently (for instance in a group) distance = dict() ancestors = graph.people_in_tree(id=decujus, distance=distance) persons = extended_personas( nodes=ancestors, styles=None, event_types=event_types_for_pedigree, graph=graph) f = graph.fathers(decujus) fathers = graph.people_in_tree(id=f[0], maxdepthDescendants=0) if f else [] m = graph.mothers(decujus) mothers = graph.people_in_tree(id=m[0], maxdepthDescendants=0) if m else [] cal = CalendarGregorian() generations = dict() # list of persons for each generation for a in ancestors: d = distance[a] if d not in generations: generations[d] = [] generations[d].append(a) ranges = [] for index in sorted(generations.keys()): births = None deaths = None gen_range = [index + 1, "?", "?", ""] # gen, min, max, legend for p in generations[index]: p = persons[p.main_id] if p.birth and p.birth.Date: if births is None or p.birth.Date < births: births = p.birth.Date year = p.birth.Date.year(cal) if year is not None: gen_range[1] = year if p.death and p.death.Date: if deaths is None or p.death.Date > deaths: deaths = p.death.Date year = p.death.Date.year(cal) if year is not None: gen_range[2] = year gen_range[3] = "Gen. %02d (%d / %d) (%s - %s)" \ % (index + 1, len(generations[index]), 2 ** (index + 1), gen_range[1], gen_range[2]) # Postprocess the ranges: # generation n's earliest date has to be at least 15 years before # its children's earliest date (can't have children before that) # generation n's latest date (death) has to be after the children's # generation earliest date (first birth) if len(ranges) > 0: if gen_range[1] == "?": gen_range[1] = ranges[-1][1] - 15 if gen_range[2] == "?" or gen_range[2] < ranges[-1][1]: gen_range[2] = ranges[-1][1] if gen_range[2] == '?': gen_range[2] = datetime.datetime.now().year ranges.append(gen_range) ages = [] for a in range(0, 120, 5): ages.append([a, 0, 0, 0]) # date_range, males, females, unknown for p in persons.itervalues(): if p.birth and p.birth.Date and p.death and p.death.Date: age = p.death.Date.years_since(p.birth.Date) if age is not None: if p.sex == "M": ages[int(age / 5)][1] += 1 elif p.sex == "F": ages[int(age / 5)][2] += 1 else: ages[int(age / 5)][3] += 1 data = { "total_ancestors": len(ancestors), "total_father": len(fathers), "total_mother": len(mothers), "total_persons": len(graph), "ranges": ranges, "ages": ages, "decujus": decujus, "decujus_name": "%s %s" % ( persons[decujus].given_name, persons[decujus].surname) } return HttpResponse(to_json(data), content_type='application/json')
def get_sosa_tree(graph, id, max_levels, style_rules, last_descendant_known=-1, maxdepthDescendants=1, last_gen_known=-1): """ :param last_gen_known: is the number of the last generation for which the client already has data, and thus do not need to be sent again. -1 to retrieve all. :param maxdepthDescendants: The number of generations for which we compute the children. """ decujus = graph.node_from_id(id) styles = Styles(style_rules, graph, decujus=decujus.main_id) distance = dict() ancestors = graph.people_in_tree( id=decujus.main_id, maxdepthAncestors=max_levels - 1, maxdepthDescendants=0, distance=distance) ancestors = [a for a in ancestors if distance[a] >= last_gen_known] descendants = graph.people_in_tree( id=decujus.main_id, maxdepthAncestors=0, distance=distance, maxdepthDescendants=maxdepthDescendants) descendants.remove(decujus) descendants = [a for a in descendants if distance[a] >= last_descendant_known] sosa_tree = dict() marriage = dict() children = {} persons = {} all_person_nodes = set(ancestors).union(descendants) if all_person_nodes: persons = extended_personas( all_person_nodes, styles, event_types=event_types_for_pedigree, graph=graph) def build_sosa_tree(sosa_tree, marriage, sosa, id): # A person might not be in 'persons', and yet its parent be there, # in case we have filtered out earlier generations. if id in persons: sosa_tree[sosa] = id persons[id].generation = distance[graph.node_from_id(id)] if persons[id].marriage: marriage[sosa] = persons[id].marriage fathers = graph.fathers(id) if fathers: build_sosa_tree(sosa_tree, marriage, sosa * 2, fathers[0].main_id) mothers = graph.mothers(id) if mothers: build_sosa_tree( sosa_tree, marriage, sosa * 2 + 1, mothers[0].main_id) def build_children_tree(children, id, gen): if id in persons: children[id] = [] sorted = [(persons[node.main_id] if node.main_id in persons else None, node) for node in graph.children(id)] sorted.sort(key=lambda p: p[0].birth.Date if p[0] and p[0].birth else None) for p in sorted: if p[0]: p[0].generation = -distance[p[1]] if id in persons: children[id].append(p[0].id) if gen < maxdepthDescendants: build_children_tree(children, id=p[0].id, gen=gen + 1) build_sosa_tree(sosa_tree, marriage, 1, decujus.main_id) build_children_tree(children, id=decujus.main_id, gen=1) return {'generations': max_levels, 'descendants': maxdepthDescendants, 'persons': persons, # All persons indexed by id 'sosa': sosa_tree, # sosa_number -> person_id 'children': children, # personId -> [children_id*] 'marriage': marriage, # sosa_number -> marriage info 'styles': styles.all_styles()}
def get_json(self, params, id): # ??? Should lock until the view has been generated graph.update_if_needed() max_levels = int(params.get("gens", 5)) last_descendant_known = int(params.get("desc_known", -1)) # The number of generations for which we compute the children. maxdepthDescendants = int(params.get("descendant_gens", 1)) # the number of the last generation for which the client already has # data, and thus do not need to be sent again. -1 to retrieve all. last_gen_known = int(params.get("gens_known", -1)) # Whether to show full dates or only the year self.year_only = params.get('year_only', '') == 'true' decujus = graph.node_from_id(id) styles = Styles(style_rules, graph, decujus=decujus.main_id) distance = dict() people = graph.people_in_tree( id=decujus.main_id, maxdepthAncestors=max_levels - 1, maxdepthDescendants=maxdepthDescendants, distance=distance) ancestors = [a for a in people if distance[a] >= 0 and distance[a] >= last_gen_known] descendants = [a for a in people if a != decujus and distance[a] < 0 and distance[a] <= -last_descendant_known] sosa_tree = dict() marriage = dict() children = {} persons = {} all_person_nodes = set(ancestors).union(descendants) if all_person_nodes: persons = extended_personas( all_person_nodes, styles, event_types=event_types_for_pedigree, graph=graph) def add_parents(p): p.generation = distance[graph.node_from_id(p.id)] if p.generation >= max_levels: return fathers = graph.fathers(p.id) mothers = graph.mothers(p.id) p.parents = [ None if not fathers else persons.get(fathers[0].main_id, None), None if not mothers else persons.get(mothers[0].main_id, None)] for pa in p.parents: if pa: add_parents(pa) def add_children(p, gen): p.children = [] sorted = [(persons[node.main_id] if node.main_id in persons else None, node) for node in graph.children(p.id)] sorted.sort( key=lambda c: c[0].birth.Date if c[0] and c[0].birth else None) for c in sorted: if c[0]: c[0].generation = -gen # distance[c[1]] p.children.append(c[0]) if gen < maxdepthDescendants: add_children(c[0], gen + 1) main = persons[decujus.main_id] add_parents(main) add_children(main, gen=1) return {'generations': max_levels, 'descendants': maxdepthDescendants, 'decujus': main, 'styles': styles.all_styles()}
def find_candidate(graph): """Find all candidate personas for a merge""" p1 = 7843 # Emmanuel Briot p2 = 1 # Emmanuel Briot p3 = 3052 # Marie HOUTEVILLE p4 = 3335 # Thomine Levesque p5 = 1311 p6 = 7842 persons = extended_personas( nodes=set([graph.node_from_id(p1), graph.node_from_id(p2), graph.node_from_id(p3), graph.node_from_id(p4), graph.node_from_id(p5), graph.node_from_id(p6)]), styles=None, same=same, query_groups=False) for p in [(p1, p2), (p2, p1), (p3, p4), (p5, p6), (p6, p5)]: print persons[p[0]].name, persons[p[1]].name score = compare(persons[p[0]], persons[p[1]]) print " => ", score # Get all persons from the database with a guess at their lifespan. # If we know the birth date, lifespan starts there, otherwise it starts # some years before the first event # Likewise for death date. # This results is potentially over-optimistic lifespans, but still reduces # the number of comparisons to do. # The following query (and its processing) might take a while on big # databases, but we'll need access to the whole information for persons # anyway, so we might as well query everything from the start) # number of persons: 9171 # number of queries:6 total queries time:0.26s total time:26.21s persons = extended_personas( nodes=None, styles=None, graph=graph, query_groups=False) # A temporary structure ordered by the first date in lifespan births = [] delta = datetime.timedelta(days=maximum_lifespan * 365) for p in persons.itervalues(): birth = death = None if p.birth is not None: birth = p.birth.date_sort if p.death is not None: death = p.death.date_sort if birth is None or death is None: h = [a.event.date_sort for a in p.all_events.itervalues() if a.event.date_sort is not None] if h: h.sort() birth = birth or h[0] - delta death = death or h[-1] + delta # If birth is None, that means there are no events, and we don't # really want to merge that person then. if birth: p.max_lifespan = death heapq.heappush(births, (birth, p)) # Now we traverse the list and only compare persons that were alive at # the same time (otherwise we assume they cannot be merged) # ??? We can save time by not comparing when we have already # decided in the past they can't be the same alive = [] # Each person alive at the given date comparisons = 0 same = 0 while births: date, person = heapq.heappop(births) for a in alive: if a.max_lifespan < date: alive.remove(a) elif date.year < 1970: continue else: # print "Compare %s and %s" % (person.name, a.name) comparisons += 1 score = compare(a, person) if score >= 150: print "%d Might be the same: %d %s and %d %s, score=%d %d" % ( date.year, person.id, person.name, a.id, a.name, score, compare(person, a)) same += 1 alive.append(person) # Maximum comparisons (n^2) would be: 83_302_129 # Actual comparisons with this algo: 14_635_289 print "Number of comparisons: ", comparisons print "Possible merges: ", same
def view(request, decujus=1): """Display the statistics for a given person""" decujus = int(decujus) graph.update_if_needed() if len(graph) == 0: return render_to_response('geneaprove/firsttime.html', context_instance=RequestContext(request)) # ??? The stats includes persons "Unknown" that were created during a # gedcom import for the purpose of preserving families. Will be fixed # when we store children differently (for instance in a group) distance = dict() ancestors = graph.people_in_tree(id=decujus, distance=distance) persons = extended_personas(nodes=ancestors, styles=None, event_types=event_types_for_pedigree, graph=graph) f = graph.fathers(decujus) fathers = graph.people_in_tree(id=f[0], maxdepthDescendants=0) if f else [] m = graph.mothers(decujus) mothers = graph.people_in_tree(id=m[0], maxdepthDescendants=0) if m else [] cal = CalendarGregorian() generations = dict() # list of persons for each generation for a in ancestors: d = distance[a] if d not in generations: generations[d] = [] generations[d].append(a) ranges = [] for index in sorted(generations.keys()): births = None deaths = None gen_range = [index + 1, "?", "?", ""] # gen, min, max, legend for p in generations[index]: p = persons[p.main_id] if p.birth and p.birth.Date: if births is None or p.birth.Date < births: births = p.birth.Date year = p.birth.Date.year(cal) if year is not None: gen_range[1] = year if p.death and p.death.Date: if deaths is None or p.death.Date > deaths: deaths = p.death.Date year = p.death.Date.year(cal) if year is not None: gen_range[2] = year gen_range[3] = "Generation %02d (%d out of %d) (%s - %s)" \ % (index + 1, len(generations[index]), 2 ** (index + 1), gen_range[1], gen_range[2]) # Postprocess the ranges: # generation n's earliest date has to be at least 15 years before # its children's earliest date (can't have children before that) # generation n's latest date (death) has to be after the children's # generation earliest date (first birth) if len(ranges) > 0: if gen_range[1] == "?": gen_range[1] = ranges[-1][1] - 15 if gen_range[2] == "?" or gen_range[2] < ranges[-1][1]: gen_range[2] = ranges[-1][1] if gen_range[2] == '?': gen_range[2] = datetime.datetime.now().year ranges.append(gen_range) ages = [] for a in range(0, 120, 5): ages.append([a, 0, 0, 0]) # date_range, males, females, unknown for p in persons.itervalues(): if p.birth and p.birth.Date and p.death and p.death.Date: age = p.death.Date.years_since(p.birth.Date) if age is not None: if p.sex == "M": ages[int(age / 5)][1] += 1 elif p.sex == "F": ages[int(age / 5)][2] += 1 else: ages[int(age / 5)][3] += 1 data = { "total_ancestors": len(ancestors), "total_father": len(fathers), "total_mother": len(mothers), "total_persons": len(graph), "ranges": ranges, "ages": ages, "decujus": decujus, "decujus_name": "%s %s" % (persons[decujus].given_name, persons[decujus].surname) } return HttpResponse(to_json(data), content_type='application/json')
def get_json(self, params, id): # ??? Should lock until the view has been generated global_graph.update_if_needed() max_levels = int(params.get("gens", 5)) last_descendant_known = int(params.get("desc_known", -1)) # The number of generations for which we compute the children. maxdepthDescendants = int(params.get("descendant_gens", 1)) # the number of the last generation for which the client already has # data, and thus do not need to be sent again. -1 to retrieve all. last_gen_known = int(params.get("gens_known", -1)) self.year_only = params.get('year_only', '') == 'true' decujus = global_graph.node_from_id(id) styles = Styles(style_rules(), global_graph, decujus=decujus.main_id) styles = None # disabled for now distance = dict() people = global_graph.people_in_tree( id=decujus.main_id, maxdepthAncestors=max_levels - 1, maxdepthDescendants=maxdepthDescendants, distance=distance) ancestors = [a for a in people if distance[a] >= 0 and distance[a] >= last_gen_known] descendants = [a for a in people if a != decujus and distance[a] < 0 and distance[a] <= -last_descendant_known] sosa_tree = dict() marriage = dict() children = {} persons = {} all_person_nodes = set(ancestors).union(descendants) if all_person_nodes: persons = extended_personas( all_person_nodes, styles, event_types=event_types_for_pedigree(), graph=global_graph) def add_parents(p): p.generation = distance[global_graph.node_from_id(p.id)] if p.generation >= max_levels: return fathers = global_graph.fathers(p.id) mothers = global_graph.mothers(p.id) p.parents = [ None if not fathers else fathers[0].main_id, None if not mothers else mothers[0].main_id] if fathers and fathers[0].main_id in persons: add_parents(persons[fathers[0].main_id]) if mothers and mothers[0].main_id in persons: add_parents(persons[mothers[0].main_id]) def add_children(p, gen): p.children = [] sorted = [ (persons[node.main_id] if node.main_id in persons else None, node) for node in global_graph.children(p.id)] for c in sorted: if c[0]: c[0].generation = -gen # distance[c[1]] p.children.append(c[0].id) if gen < maxdepthDescendants: add_children(c[0], gen + 1) main = persons[decujus.main_id] add_parents(main) add_children(main, gen=1) main = persons[decujus.main_id] layout = {} for p in persons.values(): layout[p.id] = {'children': getattr(p, 'children', None), 'parents': getattr(p, 'parents', None)} return {'generations': max_levels, 'descendants': maxdepthDescendants, 'decujus': decujus.main_id, 'persons': list(persons.values()), 'layout': layout, 'styles': styles.all_styles() if styles is not None else None}