Ejemplo n.º 1
0
def capital_gains_lift(source):
    """
    Computes capital gains lift in top income percentages over time chart
    """
    dataset = gds.getDataSetUsingCSV(source)
    columns = (
        ("Top 10% income share-including capital gains",
         "Top 10% income share"),
        ("Top 5% income share-including capital gains", "Top 5% income share"),
        ("Top 1% income share-including capital gains", "Top 1% income share"),
        ("Top 0.5% income share-including capital gains",
         "Top 0.5% income share"),
        ("Top 0.1% income share-including capital gains",
         "Top 0.1% income share"),
        ("Top 0.05% income share-including capital gains",
         "Top 0.05% income share"),
    )
    source = list(dataset)
    series = [
        md.delta(gds.timeseries(source, a), gds.timeseries(source, b))
        for a, b in columns
    ]
    return pld.linechart(series,
                         labels=list(col[1] for col in columns),
                         title="U.S. Capital Gains Income Lift",
                         ylabel="Percentage Difference")
Ejemplo n.º 2
0
def percent_income_share(source):
    """Create Income Share chart"""
    columns = (
        "Top 10% income share",
        "Top 5% income share",
        "Top 1% income share",
        "Top 0.5% income share",
        "Top 0.1% income share",
    )
    source = list(gds.getDataSetUsingCSV(source))
    return pld.linechart([gds.timeseries(source, col) for col in columns],
                         labels=columns,
                         title="U.S. Percentage Income Share",
                         ylabel="Percentage")
Ejemplo n.º 3
0
def mean_normalized_percent_income_share(source):
    dataset = gds.getDataSetUsingCSV(source)
    columns = (
        "Top 10% income share",
        "Top 5% income share",
        "Top 1% income share",
        "Top 0.5% income share",
        "Top 0.1% income share",
    )
    source = list(dataset)
    return pld.linechart(
        [md.normalize(gds.timeseries(source, col)) for col in columns],
        labels=columns,
        title="Mean Normalized U.S. Percentage Income Share",
        ylabel="Percentage")
Ejemplo n.º 4
0
def generate_report_for_selected_countries(source):
    # Select countries to include
    include = ("United States", "France", "Italy", "Germany", "South Africa",
               "New Zealand")
    # Get dataset from CSV
    data = list(gds.getDataSetUsingDictReader(source, include))
    years = set(gds.extract_years(data))
    # Generate context
    context = {
        'title':
        "Average Income per Family, %i - %i" % (min(years), max(years)),
        'years': json.dumps(list(years)),
        'countries': [v[0] for v in data],
        'series': json.dumps(list(gds.extract_series(data, years))),
    }
    # Write HTML with template
    gr.write(context)
Ejemplo n.º 5
0
def average_incomes(source):
    """
    Compares percentage average incomes
    """
    dataset = gds.getDataSetUsingCSV(source)
    columns = (
        "Top 10% average income",
        "Top 5% average income",
        "Top 1% average income",
        "Top 0.5% average income",
        "Top 0.1% average income",
        "Top 0.05% average income",
    )
    source = list(dataset)
    return pld.linechart([gds.timeseries(source, col) for col in columns],
                         labels=columns,
                         title="U.S. Average Income",
                         ylabel="2008 US Dollars")
Ejemplo n.º 6
0
def income_composition(source):
    """
    Compares income composition
    """
    dataset = gds.getDataSetUsingCSV(source)
    columns = (
        "Top 10% income composition-Wages, salaries and pensions",
        "Top 10% income composition-Dividends",
        "Top 10% income composition-Interest Income",
        "Top 10% income composition-Rents",
        "Top 10% income composition-Entrepreneurial income",
    )
    source = list(dataset)
    labels = ("Salary", "Dividends", "Interest", "Rent", "Business")
    return pld.stackedarea([gds.timeseries(source, col) for col in columns],
                           labels=labels,
                           title="U.S. Top 10% Income Composition",
                           ylabel="Percentage")
Ejemplo n.º 7
0
def average_top_income_lift(source):
    """
    Compares top percentage avg income over total avg
    """
    dataset = gds.getDataSetUsingCSV(source)
    columns = (
        ("Top 10% average income", "Top 0.1% average income"),
        ("Top 5% average income", "Top 0.1% average income"),
        ("Top 1% average income", "Top 0.1% average income"),
        ("Top 0.5% average income", "Top 0.1% average income"),
        ("Top 0.1% average income", "Top 0.1% average income"),
    )
    source = list(dataset)
    series = [
        md.delta(gds.timeseries(source, a), gds.timeseries(source, b))
        for a, b in columns
    ]
    return pld.linechart(series,
                         labels=list(col[0] for col in columns),
                         title="U.S. Income Disparity",
                         ylabel="2008 US Dollars")
def main(argv):
    try:
        if argv[0] == "--help":
            help()
        elif argv[0] == "--retrain":
            model_type = argv[1]
            pad = int(argv[2])
            csv_path = GetDataSet.get_csv("./dataset/image/data",
                                          "./dataset/image/label", pad)
            if model_type == "DecisionTree":
                GetModel.decisiontree(csv_path)
            elif model_type == "RandomForest":
                GetModel.randomforest(csv_path)
        elif argv[0] == "--evaluate":
            model_path = argv[1]
            GetModel.evaluate(model_path)
        elif argv[0] == "--predict":
            model_path = argv[1]
            image_path = argv[2]
            Predict.predict(image_path, model_path)
        else:
            help()
    except getopt.GetoptError:
        help()
Ejemplo n.º 9
0
def train():
    x1 = tf.placeholder(tf.float32, [None, Inference.input1Node],
                        name='FP-input')
    x2 = tf.placeholder(tf.float32, [None, Inference.input2Node],
                        name='Des-input')
    keeprate = tf.placeholder(tf.float32, name='keeprate')
    label = tf.placeholder(tf.float32, [None, Inference.outputNode],
                           name='Label-input')
    one = tf.ones_like(label, name='ones')
    zero = tf.zeros_like(label, name='zeros')
    t = Inference.fakerinference(x1, x2, keeprate)
    y = tf.where(t < 0.5, zero, one)
    global_step = tf.Variable(0, False)
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=t, labels=tf.argmax(label, 1))
    cross_entropy_mean = tf.reduce_mean(cross_entropy, name='MeanCrossEntropy')
    loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses'),
                                         name='Regularizer')

    train_step = tf.train.AdamOptimizer().minimize(loss,
                                                   global_step,
                                                   name='TrainStep')
    #
    correct_prediction = tf.equal(tf.argmax(y, 1),
                                  tf.argmax(label, 1),
                                  name='CorrectNum')
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32),
                              name='Acc')
    auc_value, auc_op = tf.metrics.auc(label, y, name="Auc")
    traindata, trainlabel, traindesc = GetDataSet.getNextBatch(True, False)
    testdata, testlabel, testdesc = GetDataSet.getNextBatch(False, False)
    with tf.Session() as sess:
        writer = tf.summary.FileWriter("logs/", sess.graph)
        tf.global_variables_initializer().run()
        tf.local_variables_initializer().run()
        for i in range(5000):

            x1s, labels, x2s = GetDataSet.getNextBatch()
            sess.run(train_step,
                     feed_dict={
                         x1: x1s,
                         x2: x2s,
                         label: labels,
                         keeprate: 0.75
                     })

            if i % 10 == 0:
                print(i)
                a, b = sess.run([accuracy, loss],
                                feed_dict={
                                    x1: traindata,
                                    x2: traindesc,
                                    label: trainlabel,
                                    keeprate: 1
                                })
                trainacc.append(a)
                trainloss.append(b)
                a, b, c = sess.run([accuracy, loss, auc_op],
                                   feed_dict={
                                       x1: testdata,
                                       x2: testdesc,
                                       label: testlabel,
                                       keeprate: 1
                                   })
                testacc.append(a)
                testloss.append(b)
                auc.append(
                    sess.run(auc_value,
                             feed_dict={
                                 x1: testdata,
                                 x2: testdesc,
                                 label: testlabel,
                                 keeprate: 1
                             }))
                print(auc[-1])
        writer.close()
Ejemplo n.º 10
0
                                       keeprate: 1
                                   })
                testacc.append(a)
                testloss.append(b)
                auc.append(
                    sess.run(auc_value,
                             feed_dict={
                                 x1: testdata,
                                 x2: testdesc,
                                 label: testlabel,
                                 keeprate: 1
                             }))
                print(auc[-1])
        writer.close()


trainloss = []
trainacc = []
testloss = []
testacc = []
auc = []

projectDir = r'C:\Users\lenovo\Desktop\毕业论文\result\des\projects\project0-0\FP'
GetDataSet.getDataSet(projectDir)
train()
projectDir = r'C:\Users\lenovo\Desktop\毕业论文\result\des\projects'
GetDataSet.save(trainacc, projectDir + '\\trainacc.csv', 1)
GetDataSet.save(trainloss, projectDir + '\\trainloss.csv', 1)
GetDataSet.save(testacc, projectDir + '\\testacc.csv', 1)
GetDataSet.save(testloss, projectDir + '\\testloss.csv', 1)
GetDataSet.save(auc, projectDir + '\\auc.csv', 1)
Ejemplo n.º 11
0
def train():
    x1 = tf.placeholder(tf.float32, [None, Inference.input1Node],
                        name='x1-input')
    x2 = tf.placeholder(tf.float32, [None, Inference.input2Node],
                        name='x2-input')
    keeprate = tf.placeholder(tf.float32)
    label = tf.placeholder(tf.float32, [None, Inference.outputNode],
                           name='label-input')
    one = tf.ones_like(label)
    zero = tf.zeros_like(label)
    t = Inference.inference(x1, x2, keeprate)
    y = tf.where(t < 0.5, zero, one)
    global_step = tf.Variable(0, False)
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=t, labels=tf.argmax(label, 1))
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    loss = cross_entropy_mean  # + tf.add_n(tf.get_collection('losses'))
    learning_rate = tf.train.exponential_decay(LearningRateBase, global_step,
                                               30, LearningRateDecay)

    train_step = tf.train.AdamOptimizer().minimize(loss, global_step)

    #    train_step = tf.train.GradientDescentOptimizer(0.9).minimize(loss,global_step)
    correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(label, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    auc_value, auc_op = tf.metrics.auc(tf.argmax(label, 1), tf.argmax(y, 1))

    trainfakername = [0 for i in range(len(GetDataSet.trainDataSet))]
    testfakername = [0 for i in range(len(GetDataSet.testDataSet))]
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        tf.local_variables_initializer().run()
        for i in range(5000):
            x1s, labels, x2s = GetDataSet.getNextBatch()
            sess.run(train_step,
                     feed_dict={
                         x1: x1s,
                         x2: x2s,
                         label: labels,
                         keeprate: 0.9
                     })
            if i % 200 == 0:
                x1s, labels, x2s = GetDataSet.getNextBatch()
                a, b = sess.run([accuracy, loss],
                                feed_dict={
                                    x1: x1s,
                                    x2: x2s,
                                    label: labels,
                                    keeprate: 1
                                })
                print("train:%g\ntrainloss:%g" % (a, b))
                x1s, labels, x2s = GetDataSet.getNextBatch(False, False)
                a, b, c = sess.run([accuracy, loss, auc_op],
                                   feed_dict={
                                       x1: x1s,
                                       x2: x2s,
                                       label: labels,
                                       keeprate: 1
                                   })
                print("test:%g\ntestloss:%g" % (a, b))
                print("auc:%g" % (sess.run(auc_value,
                                           feed_dict={
                                               x1: x1s,
                                               x2: x2s,
                                               label: labels,
                                               keeprate: 1
                                           })))
            #修改前0.6674
            if i > 4000 and i % 10 == 0:
                x1s, labels, x2s = GetDataSet.getNextBatch(False, False)
                predy = sess.run(correct_prediction,
                                 feed_dict={
                                     x1: x1s,
                                     x2: x2s,
                                     label: labels,
                                     keeprate: 1
                                 })
                for index2 in range(len(predy)):
                    if not predy[index2]:
                        testfakername[index2] += 1
                #print(testfakername)
                #print('end')
    for i in range(len(testfakername)):
        if testfakername[i] > 98:
            print(GetDataSet.testName[i])
Ejemplo n.º 12
0
import tensorflow as tf
import GetDataSet
import Inference

result = []
projectDir = r'C:\Users\lenovo\Desktop\毕业论文\result\des\projects\temp'
#temp'
#project0-0\FP'
GetDataSet.getDataSet(projectDir)


def train():
    x1 = tf.placeholder(tf.float32, [None, Inference.input1Node],
                        name='x1-input')
    x2 = tf.placeholder(tf.float32, [None, Inference.input2Node],
                        name='x2-input')
    keeprate = tf.placeholder(tf.float32)
    label = tf.placeholder(tf.float32, [None, Inference.outputNode],
                           name='label-input')
    one = tf.ones_like(label)
    zero = tf.zeros_like(label)
    t = Inference.inference(x1, x2, keeprate)
    y = tf.where(t < 0.5, zero, one)
    global_step = tf.Variable(0, False)
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=t, labels=tf.argmax(label, 1))
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    loss = cross_entropy_mean  # + tf.add_n(tf.get_collection('losses'))
    learning_rate = tf.train.exponential_decay(LearningRateBase, global_step,
                                               30, LearningRateDecay)
Ejemplo n.º 13
0
def train():
    x1 = tf.placeholder(tf.float32, [None, Inference.input1Node],
                        name='x1-input')
    x2 = tf.placeholder(tf.float32, [None, Inference.input2Node],
                        name='x2-input')
    keeprate = tf.placeholder(tf.float32)
    label = tf.placeholder(tf.float32, [None, Inference.outputNode],
                           name='label-input')
    one = tf.ones_like(label)
    zero = tf.zeros_like(label)
    t = Inference.inference(x1, x2, keeprate)
    y = tf.where(t < 0.5, zero, one)
    global_step = tf.Variable(0, False)
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=t, labels=tf.argmax(label, 1))
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    loss = cross_entropy_mean  # + tf.add_n(tf.get_collection('losses'))

    train_step = tf.train.AdamOptimizer().minimize(loss, global_step)

    correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(label, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    auc_value, auc_op = tf.metrics.auc(tf.argmax(label, 1), tf.argmax(y, 1))
    traindata, trainlabel, traindesc = GetDataSet.getNextBatch(True, False)
    testdata, testlabel, testdesc = GetDataSet.getNextBatch(False, False)
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        tf.local_variables_initializer().run()
        for i in range(6000):

            x1s, labels, x2s = GetDataSet.getNextBatch()
            sess.run(train_step,
                     feed_dict={
                         x1: x1s,
                         x2: x2s,
                         label: labels,
                         keeprate: 0.75
                     })

            if i % 10 == 0:
                print(i)
                a, b = sess.run([accuracy, loss],
                                feed_dict={
                                    x1: traindata,
                                    x2: traindesc,
                                    label: trainlabel,
                                    keeprate: 1
                                })
                trainacc.append(a)
                trainloss.append(b)
                a, b, c = sess.run([accuracy, loss, auc_op],
                                   feed_dict={
                                       x1: testdata,
                                       x2: testdesc,
                                       label: testlabel,
                                       keeprate: 1
                                   })
                testacc.append(a)
                testloss.append(b)
                auc.append(
                    sess.run(auc_value,
                             feed_dict={
                                 x1: testdata,
                                 x2: testdesc,
                                 label: testlabel,
                                 keeprate: 1
                             }))