if options.has_class: evals_dict0 = {} evals_dict0['Grant.Status'] = [int(s) for s in evals_dict['Grant.Status']] if DO_COMPOUND_RULES: print 'compound_rules:', len(compound_rules) print 'num_rules:', num_rules for i, compound in enumerate(compound_rules[:num_rules]): attrs = [attr for (attr, _, _) in compound] val_rows = [[data_dict[attr][instance] for attr in attrs] for instance in range(num_instances)] evals = [1 if evaluate_compound_rule(compound, vals) else 0 for vals in val_rows] evals_dict0[compound_rule_to_string(compound)] = evals else: for i, rule in enumerate(sorted_keys[:num_rules]): attr, _, _ = rule vals = data_dict[attr] evals = [1 if evaluate_rule(rule, val) else 0 for val in vals] evals_dict0[rule_to_string(rule)] = evals analyse_evals_dict(evals_dict0, evals_header) csv.writeCsvDict(knn_file_csv, evals_dict, evals_header) if False: out_filename = filename + '.csv' out_lines = ['Grant.Application.ID,Grant.Status,Success'] for k in sorted(results.keys()): r = results[k] out_lines.append(','.join([str(x) for x in [k, r['prob1'], r['predicted1']]])) out_data = '\n'.join(out_lines) file(out_filename, 'wt').write(out_data)
} if False: histo = getHistogram(sales_mo_filtered, measured_keys, stats, 20000) exit() if False: keys = [k+':'+t for k in measured_keys for t in ['level','number']] columns = [histo[k][i] for k in measured_keys for i in [0,1]] print len(columns), [len(c) for c in columns] histo_cols = dict(zip(keys, columns)) if False: histo_cols = {} for k in measured_keys: for i in [0,1]: ck = k + ' ' + ['level','number'][i] histo_cols[ck] = [histo[k][n][i] for n in range(len(histo[k]))] csv.writeCsvDict(sales_mo_histo, histo_cols) if False: print zip(h1, h2) print [x1 == x2 for x1,x2 in zip(h1,h2)] print '======================================' if False: groups = {} for h in h1: parts = h.split('_') if len(parts) >= 2: groups[parts[0]] = [] else: groups[h] = [h] for h in h1: