def top_10_decision_path(crosses, crossname): print('|symset|glo_l1|sel_l1|glo_l2|sel_l2|select_len|min|max|', file=out_file) print('|------|------|------|------|------|----------|---|---|', file=out_file) for each in crosses: cross = each[crossname] df_test = pd.concat([c.df_test for c in cross]) df_select, df_year, df_month = ana.select2(confer.score1, confer.score2, df_test, 1, 10) print('|%s|%.2f|%.2f|%.2f|%.2f|%d|%.2f|%.2f|' % (each["symsetname"], ana.accurate(df_test, confer.score1), ana.accurate(df_select, confer.score1), ana.accurate(df_test, confer.score2), ana.accurate(df_select, confer.score2), len(df_select), df_select.tail(1)["pred"] if len(df_select) > 0 else 0, df_select.head(1)["pred"] if len(df_select) > 0 else 0), file=out_file) np_feat = df_select[base.get_feat_names(df_select)].values classifier = cross[0].classifier for i in range(len(np_feat)): x = np_feat[i, :] print(x) dot_file = os.path.join( root, "data", "cross", 'top_10_decision_path-%s-%d' % (each["symsetname"], i)) decision_path.export_decision_path2( classifier, x, dot_file + ".dot", feature_names=base.get_feat_names(df_select)) import pydot (graph, ) = pydot.graph_from_dot_file(dot_file + ".dot") graph.write_png(dot_file + ".png") for each in crosses: for i in range(10): dot_file = os.path.join( root, "data", "cross", 'top_10_decision_path-%s-%d' % (each["symsetname"], i)) print("![](%s.png)" % (dot_file), file=out_file)
def accurate(crosses, crossname): print('\n\n|symset|glo_l1|sel_l1|glo_l2|sel_l2|select_len|min|max|', file=out_file) print('|------|------|------|------|------|----------|---|---|', file=out_file) for threshold in [2000, 1000, 500, 200, 100]: for each in crosses: cross = each[crossname] df_test = pd.concat([c.df_test for c in cross]) df_test.sort_values("pred", ascending=False, inplace=True) df_select, df_year, df_month = ana.select2( confer.score1, confer.score2, df_test, 2, threshold) print('|%s|%.2f|%.2f|%.2f|%.2f|%d|%.2f|%.2f|' % ( each["symsetname"], ana.accurate(df_test, confer.score1), ana.accurate(df_select, confer.score1), ana.accurate(df_test, confer.score2), ana.accurate(df_select , confer.score2), len(df_select), df_select.tail(1)["pred"] if len(df_select) > 0 else 0, df_select.head(1)["pred"] if len(df_select) > 0 else 0), file=out_file)
def accurate(crosses, crossname): print('\n\n|symset|glo_l1|sel_l1|glo_l2|sel_l2|select_len|min|max|', file=out_file) print('|------|------|------|------|------|----------|---|---|', file=out_file) for threshold in [2000, 1000, 500, 200, 100]: for each in crosses: cross = each[crossname] df_test = pd.concat([c.df_test for c in cross]) df_test.sort_values("pred", ascending=False, inplace=True) df_select, df_year, df_month = ana.select2( confer.score1, confer.score2, df_test, 2, threshold) print( '|%s|%.2f|%.2f|%.2f|%.2f|%d|%.2f|%.2f|' % (each["symsetname"], ana.accurate(df_test, confer.score1), ana.accurate(df_select, confer.score1), ana.accurate(df_test, confer.score2), ana.accurate(df_select, confer.score2), len(df_select), df_select.tail(1)["pred"] if len(df_select) > 0 else 0, df_select.head(1)["pred"] if len(df_select) > 0 else 0), file=out_file)
def top_10_decision_path(crosses, crossname): print('|symset|glo_l1|sel_l1|glo_l2|sel_l2|select_len|min|max|', file=out_file) print('|------|------|------|------|------|----------|---|---|', file=out_file) for each in crosses: cross = each[crossname] df_test = pd.concat([c.df_test for c in cross]) df_select, df_year, df_month = ana.select2(confer.score1, confer.score2, df_test, 1, 10) print('|%s|%.2f|%.2f|%.2f|%.2f|%d|%.2f|%.2f|' % ( each["symsetname"], ana.accurate(df_test, confer.score1), ana.accurate(df_select, confer.score1), ana.accurate(df_test, confer.score2), ana.accurate(df_select , confer.score2), len(df_select), df_select.tail(1)["pred"] if len(df_select) > 0 else 0, df_select.head(1)["pred"] if len(df_select) > 0 else 0), file=out_file) np_feat = df_select[base.get_feat_names(df_select)].values classifier = cross[0].classifier for i in range(len(np_feat)): x = np_feat[i,:] print(x) dot_file = os.path.join(root, "data", "cross", 'top_10_decision_path-%s-%d' % (each["symsetname"], i)) decision_path.export_decision_path2(classifier, x, dot_file + ".dot" , feature_names=base.get_feat_names(df_select)) import pydot (graph,) = pydot.graph_from_dot_file(dot_file + ".dot") graph.write_png(dot_file + ".png") for each in crosses: for i in range(10): dot_file = os.path.join(root, "data", "cross", 'top_10_decision_path-%s-%d' % (each["symsetname"], i)) print("![](%s.png)" % (dot_file), file=out_file)
#!/usr/bin/env python2.7