def main(): train = TitanicDataSet.get_train() test = TitanicDataSet.get_test() families = construct_family_components(train, test) people = [mark_problems(p, f) for f in families for p in f.nodes] synthesize('train', [p for p in people if p.survived is not None], train) synthesize('test', [p for p in people if p.survived is None], test)
def plot_troubled_families(): """Show the graphs that couldn't be broken down int families """ train = TitanicDataSet.get_train() test = TitanicDataSet.get_test() families = construct_family_components(train, test) families = sorted(families, key=lambda f: len(f.nodes)) generate_graph('trouble.png', [f for f in families if f.difficult_parent_child])
def plot_class(fignum, name, pclass, y_max): def splice(ds): return ds.splice(ds.pclass == pclass) train = splice(TitanicDataSet.get_train()) test = splice(TitanicDataSet.get_test()) families = construct_family_components(train, test) families = sorted(families, key=lambda f: len(f.nodes)) families = list(f for f in families if not f.difficult_parent_child) frames = [] for f in families: nuclear_families, extra_nodes, extra_edges = find_nuclear_families(f) for nf in nuclear_families: frames.append(FamilyFrame(nf.mother, nf.father, nf.children)) for e in extra_edges: if e.definitive_spouse: a,b = e.a, e.b if a.a.sex == 0: a,b = b,a frames.append(CoupleFrame(a,b)) frames.sort(key=lambda f: (not isinstance(f,CoupleFrame), f.n_members)) for f in frames: f.setup() f.scale(1.1) fp = FramePlacer(11, 0.5, [0.5, 0.2]) fp.place_frames(frames) plt.figure(fignum) plt.clf() for m,c,ps in fp.collect_points(): plt.plot(ps[::, 0], ps[::, 1], linestyle='None', marker=m, color=c, ms=9) lines = LineCollection(fp.collect_lines(), colors='k', linestyles='solid') plt.gca().add_collection(lines) plt.title(name + 'Class Families') def label(label, **kwds): plt.plot([-1,-1], [-1,-1], label=label, **kwds) label('Female', marker='D', linestyle='None', markerfacecolor='white', color='k') label('Male', marker='o', linestyle='None', markerfacecolor='white', color='k') label('Survived', marker='s', linestyle='None', markerfacecolor=(0,1,0), markeredgecolor='white') label('Died', marker='s', linestyle='None', markerfacecolor='r', markeredgecolor='white') label('Unkown', marker='s', linestyle='None', markerfacecolor='k', markeredgecolor='white') plt.legend(loc='upper left', numpoints=1, frameon=False, ncol=3) plt.xlim(0, 12) plt.ylim(-0.1, y_max) plt.xticks([]) plt.yticks([]) plt.draw() plt.show() plt.savefig('%d_class_families.png' % (pclass+1,), bbox_inches='tight', pad_inches=0.1)
def construct_family_components(train=TitanicDataSet.get_train(), test=TitanicDataSet.get_test(), tune=True): '''Entry point for finding relationships. Returns a list of graph components (graphlib.Component) where the nodes are individuals (Person) and edges are relationships (RelationEdge). ''' lnb = LastNameBuilder() add_last_names(lnb, train) add_last_names(lnb, test) last_name_graph = lnb.get_graph() return [tune_family_relations(f) if tune else f for c in last_name_graph.components for f in build_relations(c)]
def construct_family_components(train=TitanicDataSet.get_train(), test=TitanicDataSet.get_test(), tune=True): '''Entry point for finding relationships. Returns a list of graph components (graphlib.Component) where the nodes are individuals (Person) and edges are relationships (RelationEdge). ''' lnb = LastNameBuilder() add_last_names(lnb, train) add_last_names(lnb, test) last_name_graph = lnb.get_graph() return [ tune_family_relations(f) if tune else f for c in last_name_graph.components for f in build_relations(c) ]
def main(): train = TitanicDataSet.get_train() test = TitanicDataSet.get_test() families = construct_family_components(train, test) acc = [] i = 0 for c in families: #if len(c.nodes) == 1: # continue nuclear_families, extra_nodes, extra_edges = find_nuclear_families(c) c.tear_down() acc.append(Component(extra_nodes, extra_edges)) if sum(len(c.nodes) for c in acc) > 40: display_graph(i, acc) i += 1 acc = [] if acc: display_graph(i, acc)
def main(): train = TitanicDataSet.get_train() test = TitanicDataSet.get_test() families = construct_family_components(train, test) families = sorted(families, key=lambda f: len(f.nodes)) acc = [] i = 0 for c in families: if len(c.nodes) == 1: continue #if not any(n.a.age == -1 for n in c.nodes): # continue #if not c.difficult_parent_child: # continue if sum(len(c.nodes) for c in acc) > MAX_FILE_NODES: display_graph(i, acc) i += 1 acc = [] acc.append(c) if acc: display_graph(i, acc)
def synthesize(name, people, original_ds): # Correct order or individuals assert len(people) == len(original_ds) name_orders = list(original_ds.name) people = sorted(people, key=lambda p: name_orders.index(p.a.name)) assert all(p.a.name == name for p, name in zip(people, original_ds.name)) base_keys = people[0].a._fields synthesized_keys, calculates = zip(*synthesized_attributes) keys = base_keys + synthesized_keys rows = [ map(coere_attribute, p.a) + [c(p) for c in calculates] for p in people ] cols = map(np.array, zip(*rows)) global ds ds = TitanicDataSet(keys, cols, people[0].survived is not None) with open('data/synthesized/%s.p' % (name, ), 'w') as fp: pickle.dump(ds, fp, 2)
def plot_class(fignum, name, pclass, y_max): def splice(ds): return ds.splice(ds.pclass == pclass) train = splice(TitanicDataSet.get_train()) test = splice(TitanicDataSet.get_test()) families = construct_family_components(train, test) families = sorted(families, key=lambda f: len(f.nodes)) families = list(f for f in families if not f.difficult_parent_child) frames = [] for f in families: nuclear_families, extra_nodes, extra_edges = find_nuclear_families(f) for nf in nuclear_families: frames.append(FamilyFrame(nf.mother, nf.father, nf.children)) for e in extra_edges: if e.definitive_spouse: a, b = e.a, e.b if a.a.sex == 0: a, b = b, a frames.append(CoupleFrame(a, b)) frames.sort(key=lambda f: (not isinstance(f, CoupleFrame), f.n_members)) for f in frames: f.setup() f.scale(1.1) fp = FramePlacer(11, 0.5, [0.5, 0.2]) fp.place_frames(frames) plt.figure(fignum) plt.clf() for m, c, ps in fp.collect_points(): plt.plot(ps[::, 0], ps[::, 1], linestyle='None', marker=m, color=c, ms=9) lines = LineCollection(fp.collect_lines(), colors='k', linestyles='solid') plt.gca().add_collection(lines) plt.title(name + 'Class Families') def label(label, **kwds): plt.plot([-1, -1], [-1, -1], label=label, **kwds) label('Female', marker='D', linestyle='None', markerfacecolor='white', color='k') label('Male', marker='o', linestyle='None', markerfacecolor='white', color='k') label('Survived', marker='s', linestyle='None', markerfacecolor=(0, 1, 0), markeredgecolor='white') label('Died', marker='s', linestyle='None', markerfacecolor='r', markeredgecolor='white') label('Unkown', marker='s', linestyle='None', markerfacecolor='k', markeredgecolor='white') plt.legend(loc='upper left', numpoints=1, frameon=False, ncol=3) plt.xlim(0, 12) plt.ylim(-0.1, y_max) plt.xticks([]) plt.yticks([]) plt.draw() plt.show() plt.savefig('%d_class_families.png' % (pclass + 1, ), bbox_inches='tight', pad_inches=0.1)