Example #1
0
def gen_samples(x_indexes, cust_num_protol_nums, feats, labels):
    '''
    将原有数据记录转为Samples格式
    '''
    samples = Samples(x_indexes)
    for feat, label in zip(feats, labels):
        cust_num = feat[0]
        protol_nums = cust_num_protol_nums[cust_num]
        X = feat[1]
        y = label[1]
        sample = Sample(cust_num, protol_nums, X, y)
        samples.append(sample)
    return samples
Example #2
0
def analyze_samples(trn_samples, tst_samples, n, filename):
    '''
    分析样本中逾期客户的风险点

    Args:
        trn_samples (Samples): 训练样本
        tst_samples (Samples): 测试样本
        n (int): 取top n个重要特征
        filename (str): 结果文件
    '''
    x_importances = get_x_importances(trn_samples)
    top_n_xs = sorted(x_importances.keys(), key=lambda item: x_importances[item])[: n]  # 取top n个重要特征

    x_indexes = trn_samples.get_x_indexes()

    tst_pos_samples = Samples(x_indexes, [sample for sample in tst_samples if sample.get_y_pred() == cf.OVERDUE])  # 逾期客户
    trn_neg_samples = Samples(x_indexes, [sample for sample in trn_samples if sample.get_y() == cf.NON_OVERDUE])  # 正常客户
    trn_neg_avg_X = trn_neg_samples.get_avg_X()
    trn_neg_std_X = trn_neg_samples.get_std_X()

    with open(filename, 'w') as outfile:
        d = {'samples': {'sample': []}}
        for sample in tst_pos_samples:
            s = {}
            s['cust_num'] = sample.get_cust_num()
            s['protol_nums'] = {'protol_num': sample.get_protol_nums()}
            s['y_pred'] = sample.get_y_pred()
            X = sample.get_X()
            ratios = []
            for name in top_n_xs:
                index = x_indexes[name]
                x = X[index]
                avg_x = trn_neg_avg_X[index]
                std_x = trn_neg_std_X[index]
                ratio = abs(x - avg_x) / std_x
                ratios.append([name, x, avg_x, std_x, ratio])
            ratios.sort(key=lambda item: item[4], reverse=True)
            s['X'] = {'x': []}
            for name, x, avg_x, std_x, ratio in ratios:
                s['X']['x'].append({'ratio': str(ratio), 'val': str(x), 'avg_val': str(avg_x), 'std_val': str(std_x), '@name': name.decode('utf-8')})
            d['samples']['sample'].append(s)
        xml = xmltodict.unparse(d, pretty=True).encode('utf-8')
        outfile.write(xml)