def scatter(): # #create data points = wd.writeData() X = [[p[0]] for p in points] y = [p[1] for p in points] #plot figure plt.figure() plt.scatter(X, y, s=20, edgecolor="black", c="darkorange", label="data") plt.xlabel("data") plt.ylabel("target") plt.ylim(0, 100) plt.title("Scatter Plot") plt.legend()
def main(): print "Retrieving data from Dataset/mnist.pkl.gz ..." try: trainingData, validationData, testData = extract_data() except: print "Error : Unable to retrieve from mnist data. Please make sure you have correct access right." exit(0) print "Done." print "Writting trainingData to Dataset/trainingData.csv ..." writeData(trainingData, "Dataset/trainingData.csv") print "Done." print "Writting validationData to Dataset/validationData.csv ..." writeData(validationData, "Dataset/validationData.csv") print "Done." print "Writting testData to Dataset/testData.csv ..." writeData(testData, "Dataset/testData.csv") print "Done."
import writeData as wd import numpy as np from sklearn.tree import DecisionTreeRegressor import sklearn.tree import matplotlib.pyplot as plt #------------------------------------------------------------------------------- # this file will save 20 png files of regressions on randomly generated datasets #------------------------------------------------------------------------------- for n in range(1, 21): # #create data points = wd.writeData() while type(points) == str: print("!Killed data set!") points = wd.writeData() print(len(points)) X = [[p[0]] for p in points] y = [p[1] for p in points] X_test = np.arange(0.0, len(points), .99999)[:, np.newaxis] regr_1 = DecisionTreeRegressor(max_depth=5, min_impurity_split=3) regr_1.fit(X, y) y_1 = regr_1.predict(X_test) bins, breaks = [], [] for i in y_1: if i not in bins: bins.append(i)
import connectDb import pullCsv import loadCsv import os import writeData import pullQuote_v2 as pq from time import sleep import timeit startTime = timeit.default_timer() results = connectDb.pullSymbols() #print results for result in results: print "PROCESSING SYMBOL " + result price = pq(result) pullCsv.pullCsv(result) # print os.stat('ratios/' + result + '.CSV').st_size if (os.stat('ratios/' + result + '.CSV').st_size > 100): sqlStatements = loadCsv.writeSql(result) writeData.writeData(sqlStatements) else: print "SKIPPING DUE TO FILE SIZE 0 " + result # sleep(3) stopTime = timeit.default_timer() print "RUN TIME: " + str(stopTime - startTime)
def combRegress(): lis, tes, combBreaks = [], [], [] for n in range(1, 11): #create data points = wd.writeData() #prevent use of any poor data constructions (overlapping data from writeData.py) while type(points) == str: print("!Killed data set!") points = wd.writeData() #set Target and Data for regression X = [[p[0]] for p in points] y = [p[1] for p in points] #run regression X_test = np.arange(0.0, len(points), .99999)[:, np.newaxis] regr_1 = DecisionTreeRegressor(max_depth=5, min_impurity_split=3) regr_1.fit(X, y) y_1 = regr_1.predict(X_test) #--------------------------------------------------------------------------- # determine breakpoints of regression for comparison #--------------------------------------------------------------------------- bins, breaks = [], [] for i in y_1: if i not in bins: bins.append(i) #****features will be found here**** breaks = [] # [(start, stop, yval)] ys = y_1.tolist() start = 0 for i in range(0, len(points)): if int(ys[i + 1]) != int(ys[i]): breaks.append((int(start), int(X_test[i]), ys[i])) start = X_test[i] breaks.append((int(start), int(X_test[i]), ys[i])) # print(bins, breaks) #yvals, (start, stop, yval) lis.append((X, y)) tes.append((X_test, y_1)) #****this is the feature string**** combBreaks.append(breaks) #--------------------------------------------------------------------------- # plot unfit, fitted, and fit regressions #--------------------------------------------------------------------------- colors = [ "blue", "green", "red", "cyan", "magenta", "brown", "darkorange", "grey", "pink", "purple" ] for i in range(0, len(lis)): #unfit points in scatterplot plt.subplot(311) plt.scatter(lis[i][0], lis[i][1], s=10, color=colors[i - 1], label="data " + str(i + 1)) plt.ylim(0, 100) plt.ylim(0, 100) plt.title("Prefit, Combined, and Fit Regression") plt.legend() #scatterplot and fit decision tree model plt.subplot(312) plt.scatter(lis[i][0], lis[i][1], s=10, color=colors[i - 1], label="data " + str(i)) plt.plot(tes[i][0], tes[i][1], color=colors[i - 1], label="tree " + str(i), linewidth=2) plt.ylim(0, 100) plt.ylabel("target") plt.ylim(0, 100) #only fit decision tree model plt.subplot(313) plt.plot(tes[i][0], tes[i][1], color=colors[i - 1], label="tree " + str(i), linewidth=2) plt.xlabel("data") plt.ylim(0, 100) plt.ylim(0, 100) print("\n---- complete ----\n") print("type 'showme()' to display triple plot\n") #pprint.pprint(combBreaks) return (combBreaks)
import sys sys.path.append('../') from getContent import getContent from getData import getData from writeData import writeData if __name__ == '__main__': url = 'http://www.weather.com.cn/weather/101210101.shtml' #添加url html = getContent(url) #获取数据 result = getData(html) writeData(result, 'D:/weather.csv') print('mytest')
import connectDb import pullCsv import loadCsv import os import writeData import pullQuote_v2 as pq from time import sleep import timeit import loadSymbols as ls pw = "" startTime = timeit.default_timer() symbols = ls.generateSymbolSql() print("truncating TICKER") truncateStatement = ['truncate table TICKER'] writeData.writeData(truncateStatement, pw) print("Loading Ticker Data") writeData.writeData(symbols, pw) results = connectDb.pullSymbols(pw) for result in results: print "PROCESSING SYMBOL " + result price = pq.newQuote(result, 0) print(price) if price != 'FAIL': try: writeData.writeData(price, pw) except: print("Error with " + result)
def getData(html_text): final = [] bs = BeautifulSoup(html_text, "html.parser") # 创建BeautifulSoup对象 body = bs.body # 获取body data = body.find('div', {'id': '7d'}) ul = data.find('ul') li = ul.find_all('li') for day in li: temp = [] date = day.find('h1').string temp.append(date) # 添加日期 inf = day.find_all('p') weather = inf[0].string # 天气 temp.append(weather) temperature_highest = inf[1].find('span').string # 最高温度 temperature_low = inf[1].find('i').string # 最低温度 temp.append(temperature_low) temp.append(temperature_highest) final.append(temp) print('getDate success') return final if __name__ == '__main__': url = 'http://www.weather.com.cn/weather/101210101.shtml' html = getContent(url) # 调用获取网页信息 result = getData(html) # 解析网页信息,拿到需要的数据 writeData.writeData(result, 'D:/weather.csv') # 数据写入到 csv文档中 print('my frist python file')