Ejemplo n.º 1
0
 def top_10_decision_path(crosses, crossname):
     print('|symset|glo_l1|sel_l1|glo_l2|sel_l2|select_len|min|max|',
           file=out_file)
     print('|------|------|------|------|------|----------|---|---|',
           file=out_file)
     for each in crosses:
         cross = each[crossname]
         df_test = pd.concat([c.df_test for c in cross])
         df_select, df_year, df_month = ana.select2(confer.score1,
                                                    confer.score2, df_test,
                                                    1, 10)
         print('|%s|%.2f|%.2f|%.2f|%.2f|%d|%.2f|%.2f|' %
               (each["symsetname"], ana.accurate(df_test, confer.score1),
                ana.accurate(df_select, confer.score1),
                ana.accurate(df_test, confer.score2),
                ana.accurate(df_select, confer.score2), len(df_select),
                df_select.tail(1)["pred"] if len(df_select) > 0 else 0,
                df_select.head(1)["pred"] if len(df_select) > 0 else 0),
               file=out_file)
         np_feat = df_select[base.get_feat_names(df_select)].values
         classifier = cross[0].classifier
         for i in range(len(np_feat)):
             x = np_feat[i, :]
             print(x)
             dot_file = os.path.join(
                 root, "data", "cross",
                 'top_10_decision_path-%s-%d' % (each["symsetname"], i))
             decision_path.export_decision_path2(
                 classifier,
                 x,
                 dot_file + ".dot",
                 feature_names=base.get_feat_names(df_select))
             import pydot
             (graph, ) = pydot.graph_from_dot_file(dot_file + ".dot")
             graph.write_png(dot_file + ".png")
     for each in crosses:
         for i in range(10):
             dot_file = os.path.join(
                 root, "data", "cross",
                 'top_10_decision_path-%s-%d' % (each["symsetname"], i))
             print("![](%s.png)" % (dot_file), file=out_file)
Ejemplo n.º 2
0
 def accurate(crosses, crossname):
     print('\n\n|symset|glo_l1|sel_l1|glo_l2|sel_l2|select_len|min|max|', file=out_file)
     print('|------|------|------|------|------|----------|---|---|', file=out_file)
     for threshold in [2000, 1000, 500, 200, 100]:
         for each in crosses:
             cross = each[crossname]
             df_test = pd.concat([c.df_test for c in cross])
             df_test.sort_values("pred", ascending=False, inplace=True)
             df_select, df_year, df_month = ana.select2( confer.score1, confer.score2,
                                                         df_test,
                                                         2, threshold)
             print('|%s|%.2f|%.2f|%.2f|%.2f|%d|%.2f|%.2f|'
                   % (
                       each["symsetname"],
                       ana.accurate(df_test, confer.score1),
                       ana.accurate(df_select, confer.score1),
                       ana.accurate(df_test, confer.score2),
                       ana.accurate(df_select , confer.score2),
                       len(df_select),
                       df_select.tail(1)["pred"] if len(df_select) > 0 else 0,
                       df_select.head(1)["pred"] if len(df_select) > 0 else 0), file=out_file)
Ejemplo n.º 3
0
 def accurate(crosses, crossname):
     print('\n\n|symset|glo_l1|sel_l1|glo_l2|sel_l2|select_len|min|max|',
           file=out_file)
     print('|------|------|------|------|------|----------|---|---|',
           file=out_file)
     for threshold in [2000, 1000, 500, 200, 100]:
         for each in crosses:
             cross = each[crossname]
             df_test = pd.concat([c.df_test for c in cross])
             df_test.sort_values("pred", ascending=False, inplace=True)
             df_select, df_year, df_month = ana.select2(
                 confer.score1, confer.score2, df_test, 2, threshold)
             print(
                 '|%s|%.2f|%.2f|%.2f|%.2f|%d|%.2f|%.2f|' %
                 (each["symsetname"], ana.accurate(df_test, confer.score1),
                  ana.accurate(df_select, confer.score1),
                  ana.accurate(df_test, confer.score2),
                  ana.accurate(df_select, confer.score2), len(df_select),
                  df_select.tail(1)["pred"] if len(df_select) > 0 else 0,
                  df_select.head(1)["pred"] if len(df_select) > 0 else 0),
                 file=out_file)
Ejemplo n.º 4
0
 def top_10_decision_path(crosses, crossname):
     print('|symset|glo_l1|sel_l1|glo_l2|sel_l2|select_len|min|max|', file=out_file)
     print('|------|------|------|------|------|----------|---|---|', file=out_file)
     for each in crosses:
         cross = each[crossname]
         df_test = pd.concat([c.df_test for c in cross])
         df_select, df_year, df_month = ana.select2(confer.score1, confer.score2, df_test,
                                                    1, 10)
         print('|%s|%.2f|%.2f|%.2f|%.2f|%d|%.2f|%.2f|'
               % (
                   each["symsetname"],
                   ana.accurate(df_test, confer.score1),
                   ana.accurate(df_select, confer.score1),
                   ana.accurate(df_test, confer.score2),
                   ana.accurate(df_select , confer.score2),
                   len(df_select),
                   df_select.tail(1)["pred"] if len(df_select) > 0 else 0,
                   df_select.head(1)["pred"] if len(df_select) > 0 else 0), file=out_file)
         np_feat = df_select[base.get_feat_names(df_select)].values
         classifier = cross[0].classifier
         for i in range(len(np_feat)):
             x = np_feat[i,:]
             print(x)
             dot_file = os.path.join(root, "data", "cross",
                                     'top_10_decision_path-%s-%d'
                                     % (each["symsetname"], i))
             decision_path.export_decision_path2(classifier, x, dot_file + ".dot" ,
                                                 feature_names=base.get_feat_names(df_select))
             import pydot
             (graph,) = pydot.graph_from_dot_file(dot_file + ".dot")
             graph.write_png(dot_file + ".png")
     for each in crosses:
         for i in range(10):
             dot_file = os.path.join(root, "data", "cross",
                                     'top_10_decision_path-%s-%d'
                                     % (each["symsetname"], i))
             print("![](%s.png)" % (dot_file), file=out_file)
Ejemplo n.º 5
0
#!/usr/bin/env python2.7