def scatter():
    # #create data
    points = wd.writeData()
    X = [[p[0]] for p in points]
    y = [p[1] for p in points]

    #plot figure
    plt.figure()
    plt.scatter(X, y, s=20, edgecolor="black", c="darkorange", label="data")
    plt.xlabel("data")
    plt.ylabel("target")
    plt.ylim(0, 100)
    plt.title("Scatter Plot")
    plt.legend()
def main():
    print "Retrieving data from Dataset/mnist.pkl.gz ..."
    try:
        trainingData, validationData, testData = extract_data()
    except:
        print "Error : Unable to retrieve from mnist data. Please make sure you have correct access right."
        exit(0)
    print "Done."
    print "Writting trainingData to Dataset/trainingData.csv ..."
    writeData(trainingData, "Dataset/trainingData.csv")
    print "Done."
    print "Writting validationData to Dataset/validationData.csv ..."
    writeData(validationData, "Dataset/validationData.csv")
    print "Done."
    print "Writting testData to Dataset/testData.csv ..."
    writeData(testData, "Dataset/testData.csv")
    print "Done."
Beispiel #3
0
import writeData as wd
import numpy as np
from sklearn.tree import DecisionTreeRegressor
import sklearn.tree
import matplotlib.pyplot as plt

#-------------------------------------------------------------------------------
# this file will save 20 png files of regressions on randomly generated datasets
#-------------------------------------------------------------------------------

for n in range(1, 21):
    # #create data
    points = wd.writeData()
    while type(points) == str:
        print("!Killed data set!")
        points = wd.writeData()

    print(len(points))
    X = [[p[0]] for p in points]
    y = [p[1] for p in points]

    X_test = np.arange(0.0, len(points), .99999)[:, np.newaxis]
    regr_1 = DecisionTreeRegressor(max_depth=5, min_impurity_split=3)
    regr_1.fit(X, y)
    y_1 = regr_1.predict(X_test)

    bins, breaks = [], []
    for i in y_1:
        if i not in bins:
            bins.append(i)
Beispiel #4
0
import connectDb
import pullCsv
import loadCsv
import os
import writeData
import pullQuote_v2 as pq
from time import sleep
import timeit

startTime = timeit.default_timer()
results = connectDb.pullSymbols()
#print results

for result in results:
    print "PROCESSING SYMBOL " + result
    price =  pq(result)
    pullCsv.pullCsv(result)
  #  print os.stat('ratios/' + result + '.CSV').st_size
    if (os.stat('ratios/' + result + '.CSV').st_size > 100):
        sqlStatements = loadCsv.writeSql(result)
        writeData.writeData(sqlStatements)
    else:
        print "SKIPPING DUE TO FILE SIZE 0 " + result
#    sleep(3)
stopTime = timeit.default_timer()

print "RUN TIME: " + str(stopTime - startTime)
Beispiel #5
0
def combRegress():
    lis, tes, combBreaks = [], [], []

    for n in range(1, 11):
        #create data
        points = wd.writeData()

        #prevent use of any poor data constructions (overlapping data from writeData.py)
        while type(points) == str:
            print("!Killed data set!")
            points = wd.writeData()

        #set Target and Data for regression
        X = [[p[0]] for p in points]
        y = [p[1] for p in points]

        #run regression
        X_test = np.arange(0.0, len(points), .99999)[:, np.newaxis]
        regr_1 = DecisionTreeRegressor(max_depth=5, min_impurity_split=3)
        regr_1.fit(X, y)
        y_1 = regr_1.predict(X_test)

        #---------------------------------------------------------------------------
        # determine breakpoints of regression for comparison
        #---------------------------------------------------------------------------

        bins, breaks = [], []
        for i in y_1:
            if i not in bins:
                bins.append(i)

        #****features will be found here****
        breaks = []  # [(start, stop, yval)]
        ys = y_1.tolist()
        start = 0
        for i in range(0, len(points)):
            if int(ys[i + 1]) != int(ys[i]):
                breaks.append((int(start), int(X_test[i]), ys[i]))
                start = X_test[i]
        breaks.append((int(start), int(X_test[i]), ys[i]))

        # print(bins, breaks) #yvals, (start, stop, yval)

        lis.append((X, y))
        tes.append((X_test, y_1))

        #****this is the feature string****
        combBreaks.append(breaks)

    #---------------------------------------------------------------------------
    # plot unfit, fitted, and fit regressions
    #---------------------------------------------------------------------------

    colors = [
        "blue", "green", "red", "cyan", "magenta", "brown", "darkorange",
        "grey", "pink", "purple"
    ]
    for i in range(0, len(lis)):
        #unfit points in scatterplot
        plt.subplot(311)
        plt.scatter(lis[i][0],
                    lis[i][1],
                    s=10,
                    color=colors[i - 1],
                    label="data " + str(i + 1))
        plt.ylim(0, 100)
        plt.ylim(0, 100)
        plt.title("Prefit, Combined, and Fit Regression")
        plt.legend()

        #scatterplot and fit decision tree model
        plt.subplot(312)
        plt.scatter(lis[i][0],
                    lis[i][1],
                    s=10,
                    color=colors[i - 1],
                    label="data " + str(i))
        plt.plot(tes[i][0],
                 tes[i][1],
                 color=colors[i - 1],
                 label="tree " + str(i),
                 linewidth=2)
        plt.ylim(0, 100)
        plt.ylabel("target")
        plt.ylim(0, 100)

        #only fit decision tree model
        plt.subplot(313)
        plt.plot(tes[i][0],
                 tes[i][1],
                 color=colors[i - 1],
                 label="tree " + str(i),
                 linewidth=2)
        plt.xlabel("data")
        plt.ylim(0, 100)
        plt.ylim(0, 100)

    print("\n---- complete ----\n")
    print("type 'showme()' to display triple plot\n")
    #pprint.pprint(combBreaks)
    return (combBreaks)
Beispiel #6
0
import sys
sys.path.append('../')
from getContent import getContent
from getData import getData
from writeData import writeData
if __name__ == '__main__':
    url = 'http://www.weather.com.cn/weather/101210101.shtml'
    #添加url
    html = getContent(url)
    #获取数据
    result = getData(html)

    writeData(result, 'D:/weather.csv')
    print('mytest')
Beispiel #7
0
import connectDb
import pullCsv
import loadCsv
import os
import writeData
import pullQuote_v2 as pq
from time import sleep
import timeit
import loadSymbols as ls 

pw = ""
startTime = timeit.default_timer()
symbols = ls.generateSymbolSql()
print("truncating TICKER")
truncateStatement = ['truncate table TICKER']
writeData.writeData(truncateStatement, pw)
print("Loading Ticker Data")
writeData.writeData(symbols, pw)


results = connectDb.pullSymbols(pw)

for result in results:
    print "PROCESSING SYMBOL " + result
    price =  pq.newQuote(result, 0)
    print(price)
    if price != 'FAIL':
        try:
            writeData.writeData(price, pw)
        except:
            print("Error with " + result)
def getData(html_text):
    final = []
    bs = BeautifulSoup(html_text, "html.parser")  # 创建BeautifulSoup对象
    body = bs.body  # 获取body
    data = body.find('div', {'id': '7d'})
    ul = data.find('ul')
    li = ul.find_all('li')

    for day in li:
        temp = []
        date = day.find('h1').string
        temp.append(date)  # 添加日期
        inf = day.find_all('p')
        weather = inf[0].string  # 天气
        temp.append(weather)
        temperature_highest = inf[1].find('span').string  # 最高温度
        temperature_low = inf[1].find('i').string  # 最低温度
        temp.append(temperature_low)
        temp.append(temperature_highest)
        final.append(temp)
        print('getDate success')
        return final

if __name__ == '__main__':
    url = 'http://www.weather.com.cn/weather/101210101.shtml'
    html = getContent(url)  # 调用获取网页信息
    result = getData(html)  # 解析网页信息,拿到需要的数据
    writeData.writeData(result, 'D:/weather.csv')  # 数据写入到 csv文档中
    print('my frist python file')