def capital_gains_lift(source): """ Computes capital gains lift in top income percentages over time chart """ dataset = gds.getDataSetUsingCSV(source) columns = ( ("Top 10% income share-including capital gains", "Top 10% income share"), ("Top 5% income share-including capital gains", "Top 5% income share"), ("Top 1% income share-including capital gains", "Top 1% income share"), ("Top 0.5% income share-including capital gains", "Top 0.5% income share"), ("Top 0.1% income share-including capital gains", "Top 0.1% income share"), ("Top 0.05% income share-including capital gains", "Top 0.05% income share"), ) source = list(dataset) series = [ md.delta(gds.timeseries(source, a), gds.timeseries(source, b)) for a, b in columns ] return pld.linechart(series, labels=list(col[1] for col in columns), title="U.S. Capital Gains Income Lift", ylabel="Percentage Difference")
def percent_income_share(source): """Create Income Share chart""" columns = ( "Top 10% income share", "Top 5% income share", "Top 1% income share", "Top 0.5% income share", "Top 0.1% income share", ) source = list(gds.getDataSetUsingCSV(source)) return pld.linechart([gds.timeseries(source, col) for col in columns], labels=columns, title="U.S. Percentage Income Share", ylabel="Percentage")
def mean_normalized_percent_income_share(source): dataset = gds.getDataSetUsingCSV(source) columns = ( "Top 10% income share", "Top 5% income share", "Top 1% income share", "Top 0.5% income share", "Top 0.1% income share", ) source = list(dataset) return pld.linechart( [md.normalize(gds.timeseries(source, col)) for col in columns], labels=columns, title="Mean Normalized U.S. Percentage Income Share", ylabel="Percentage")
def generate_report_for_selected_countries(source): # Select countries to include include = ("United States", "France", "Italy", "Germany", "South Africa", "New Zealand") # Get dataset from CSV data = list(gds.getDataSetUsingDictReader(source, include)) years = set(gds.extract_years(data)) # Generate context context = { 'title': "Average Income per Family, %i - %i" % (min(years), max(years)), 'years': json.dumps(list(years)), 'countries': [v[0] for v in data], 'series': json.dumps(list(gds.extract_series(data, years))), } # Write HTML with template gr.write(context)
def average_incomes(source): """ Compares percentage average incomes """ dataset = gds.getDataSetUsingCSV(source) columns = ( "Top 10% average income", "Top 5% average income", "Top 1% average income", "Top 0.5% average income", "Top 0.1% average income", "Top 0.05% average income", ) source = list(dataset) return pld.linechart([gds.timeseries(source, col) for col in columns], labels=columns, title="U.S. Average Income", ylabel="2008 US Dollars")
def income_composition(source): """ Compares income composition """ dataset = gds.getDataSetUsingCSV(source) columns = ( "Top 10% income composition-Wages, salaries and pensions", "Top 10% income composition-Dividends", "Top 10% income composition-Interest Income", "Top 10% income composition-Rents", "Top 10% income composition-Entrepreneurial income", ) source = list(dataset) labels = ("Salary", "Dividends", "Interest", "Rent", "Business") return pld.stackedarea([gds.timeseries(source, col) for col in columns], labels=labels, title="U.S. Top 10% Income Composition", ylabel="Percentage")
def average_top_income_lift(source): """ Compares top percentage avg income over total avg """ dataset = gds.getDataSetUsingCSV(source) columns = ( ("Top 10% average income", "Top 0.1% average income"), ("Top 5% average income", "Top 0.1% average income"), ("Top 1% average income", "Top 0.1% average income"), ("Top 0.5% average income", "Top 0.1% average income"), ("Top 0.1% average income", "Top 0.1% average income"), ) source = list(dataset) series = [ md.delta(gds.timeseries(source, a), gds.timeseries(source, b)) for a, b in columns ] return pld.linechart(series, labels=list(col[0] for col in columns), title="U.S. Income Disparity", ylabel="2008 US Dollars")
def main(argv): try: if argv[0] == "--help": help() elif argv[0] == "--retrain": model_type = argv[1] pad = int(argv[2]) csv_path = GetDataSet.get_csv("./dataset/image/data", "./dataset/image/label", pad) if model_type == "DecisionTree": GetModel.decisiontree(csv_path) elif model_type == "RandomForest": GetModel.randomforest(csv_path) elif argv[0] == "--evaluate": model_path = argv[1] GetModel.evaluate(model_path) elif argv[0] == "--predict": model_path = argv[1] image_path = argv[2] Predict.predict(image_path, model_path) else: help() except getopt.GetoptError: help()
def train(): x1 = tf.placeholder(tf.float32, [None, Inference.input1Node], name='FP-input') x2 = tf.placeholder(tf.float32, [None, Inference.input2Node], name='Des-input') keeprate = tf.placeholder(tf.float32, name='keeprate') label = tf.placeholder(tf.float32, [None, Inference.outputNode], name='Label-input') one = tf.ones_like(label, name='ones') zero = tf.zeros_like(label, name='zeros') t = Inference.fakerinference(x1, x2, keeprate) y = tf.where(t < 0.5, zero, one) global_step = tf.Variable(0, False) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=t, labels=tf.argmax(label, 1)) cross_entropy_mean = tf.reduce_mean(cross_entropy, name='MeanCrossEntropy') loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses'), name='Regularizer') train_step = tf.train.AdamOptimizer().minimize(loss, global_step, name='TrainStep') # correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(label, 1), name='CorrectNum') accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='Acc') auc_value, auc_op = tf.metrics.auc(label, y, name="Auc") traindata, trainlabel, traindesc = GetDataSet.getNextBatch(True, False) testdata, testlabel, testdesc = GetDataSet.getNextBatch(False, False) with tf.Session() as sess: writer = tf.summary.FileWriter("logs/", sess.graph) tf.global_variables_initializer().run() tf.local_variables_initializer().run() for i in range(5000): x1s, labels, x2s = GetDataSet.getNextBatch() sess.run(train_step, feed_dict={ x1: x1s, x2: x2s, label: labels, keeprate: 0.75 }) if i % 10 == 0: print(i) a, b = sess.run([accuracy, loss], feed_dict={ x1: traindata, x2: traindesc, label: trainlabel, keeprate: 1 }) trainacc.append(a) trainloss.append(b) a, b, c = sess.run([accuracy, loss, auc_op], feed_dict={ x1: testdata, x2: testdesc, label: testlabel, keeprate: 1 }) testacc.append(a) testloss.append(b) auc.append( sess.run(auc_value, feed_dict={ x1: testdata, x2: testdesc, label: testlabel, keeprate: 1 })) print(auc[-1]) writer.close()
keeprate: 1 }) testacc.append(a) testloss.append(b) auc.append( sess.run(auc_value, feed_dict={ x1: testdata, x2: testdesc, label: testlabel, keeprate: 1 })) print(auc[-1]) writer.close() trainloss = [] trainacc = [] testloss = [] testacc = [] auc = [] projectDir = r'C:\Users\lenovo\Desktop\毕业论文\result\des\projects\project0-0\FP' GetDataSet.getDataSet(projectDir) train() projectDir = r'C:\Users\lenovo\Desktop\毕业论文\result\des\projects' GetDataSet.save(trainacc, projectDir + '\\trainacc.csv', 1) GetDataSet.save(trainloss, projectDir + '\\trainloss.csv', 1) GetDataSet.save(testacc, projectDir + '\\testacc.csv', 1) GetDataSet.save(testloss, projectDir + '\\testloss.csv', 1) GetDataSet.save(auc, projectDir + '\\auc.csv', 1)
def train(): x1 = tf.placeholder(tf.float32, [None, Inference.input1Node], name='x1-input') x2 = tf.placeholder(tf.float32, [None, Inference.input2Node], name='x2-input') keeprate = tf.placeholder(tf.float32) label = tf.placeholder(tf.float32, [None, Inference.outputNode], name='label-input') one = tf.ones_like(label) zero = tf.zeros_like(label) t = Inference.inference(x1, x2, keeprate) y = tf.where(t < 0.5, zero, one) global_step = tf.Variable(0, False) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=t, labels=tf.argmax(label, 1)) cross_entropy_mean = tf.reduce_mean(cross_entropy) loss = cross_entropy_mean # + tf.add_n(tf.get_collection('losses')) learning_rate = tf.train.exponential_decay(LearningRateBase, global_step, 30, LearningRateDecay) train_step = tf.train.AdamOptimizer().minimize(loss, global_step) # train_step = tf.train.GradientDescentOptimizer(0.9).minimize(loss,global_step) correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(label, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) auc_value, auc_op = tf.metrics.auc(tf.argmax(label, 1), tf.argmax(y, 1)) trainfakername = [0 for i in range(len(GetDataSet.trainDataSet))] testfakername = [0 for i in range(len(GetDataSet.testDataSet))] with tf.Session() as sess: tf.global_variables_initializer().run() tf.local_variables_initializer().run() for i in range(5000): x1s, labels, x2s = GetDataSet.getNextBatch() sess.run(train_step, feed_dict={ x1: x1s, x2: x2s, label: labels, keeprate: 0.9 }) if i % 200 == 0: x1s, labels, x2s = GetDataSet.getNextBatch() a, b = sess.run([accuracy, loss], feed_dict={ x1: x1s, x2: x2s, label: labels, keeprate: 1 }) print("train:%g\ntrainloss:%g" % (a, b)) x1s, labels, x2s = GetDataSet.getNextBatch(False, False) a, b, c = sess.run([accuracy, loss, auc_op], feed_dict={ x1: x1s, x2: x2s, label: labels, keeprate: 1 }) print("test:%g\ntestloss:%g" % (a, b)) print("auc:%g" % (sess.run(auc_value, feed_dict={ x1: x1s, x2: x2s, label: labels, keeprate: 1 }))) #修改前0.6674 if i > 4000 and i % 10 == 0: x1s, labels, x2s = GetDataSet.getNextBatch(False, False) predy = sess.run(correct_prediction, feed_dict={ x1: x1s, x2: x2s, label: labels, keeprate: 1 }) for index2 in range(len(predy)): if not predy[index2]: testfakername[index2] += 1 #print(testfakername) #print('end') for i in range(len(testfakername)): if testfakername[i] > 98: print(GetDataSet.testName[i])
import tensorflow as tf import GetDataSet import Inference result = [] projectDir = r'C:\Users\lenovo\Desktop\毕业论文\result\des\projects\temp' #temp' #project0-0\FP' GetDataSet.getDataSet(projectDir) def train(): x1 = tf.placeholder(tf.float32, [None, Inference.input1Node], name='x1-input') x2 = tf.placeholder(tf.float32, [None, Inference.input2Node], name='x2-input') keeprate = tf.placeholder(tf.float32) label = tf.placeholder(tf.float32, [None, Inference.outputNode], name='label-input') one = tf.ones_like(label) zero = tf.zeros_like(label) t = Inference.inference(x1, x2, keeprate) y = tf.where(t < 0.5, zero, one) global_step = tf.Variable(0, False) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=t, labels=tf.argmax(label, 1)) cross_entropy_mean = tf.reduce_mean(cross_entropy) loss = cross_entropy_mean # + tf.add_n(tf.get_collection('losses')) learning_rate = tf.train.exponential_decay(LearningRateBase, global_step, 30, LearningRateDecay)
def train(): x1 = tf.placeholder(tf.float32, [None, Inference.input1Node], name='x1-input') x2 = tf.placeholder(tf.float32, [None, Inference.input2Node], name='x2-input') keeprate = tf.placeholder(tf.float32) label = tf.placeholder(tf.float32, [None, Inference.outputNode], name='label-input') one = tf.ones_like(label) zero = tf.zeros_like(label) t = Inference.inference(x1, x2, keeprate) y = tf.where(t < 0.5, zero, one) global_step = tf.Variable(0, False) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=t, labels=tf.argmax(label, 1)) cross_entropy_mean = tf.reduce_mean(cross_entropy) loss = cross_entropy_mean # + tf.add_n(tf.get_collection('losses')) train_step = tf.train.AdamOptimizer().minimize(loss, global_step) correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(label, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) auc_value, auc_op = tf.metrics.auc(tf.argmax(label, 1), tf.argmax(y, 1)) traindata, trainlabel, traindesc = GetDataSet.getNextBatch(True, False) testdata, testlabel, testdesc = GetDataSet.getNextBatch(False, False) with tf.Session() as sess: tf.global_variables_initializer().run() tf.local_variables_initializer().run() for i in range(6000): x1s, labels, x2s = GetDataSet.getNextBatch() sess.run(train_step, feed_dict={ x1: x1s, x2: x2s, label: labels, keeprate: 0.75 }) if i % 10 == 0: print(i) a, b = sess.run([accuracy, loss], feed_dict={ x1: traindata, x2: traindesc, label: trainlabel, keeprate: 1 }) trainacc.append(a) trainloss.append(b) a, b, c = sess.run([accuracy, loss, auc_op], feed_dict={ x1: testdata, x2: testdesc, label: testlabel, keeprate: 1 }) testacc.append(a) testloss.append(b) auc.append( sess.run(auc_value, feed_dict={ x1: testdata, x2: testdesc, label: testlabel, keeprate: 1 }))