def pointQuery(api, attribute, sort=False, reverse=False): # result = [] result = api.liststreamkeyitems(att_dict[attribute[0]], attribute[1:], False, MAX_RESULT) result = getData(result["result"]) temp = [] # if attribute[0] == 'T' or attribute[0] == 'I': # for uid in getData(api.liststreamkeyitems( # att_dict[attribute[0]], attribute[1:], False, MAX_RESULT)["result"]): # # print("Uid", uid) # result += getData(api.liststreamkeyitems(UID, # uid, False, MAX_RESULT)["result"]) # else: # result += getData(api.liststreamkeyitems( # att_dict[attribute[0]], attribute[1:], False, MAX_RESULT)["result"]) if attribute[0] == 'U' or attribute[0] == 'R': for line in result: node, _, RID = line.split(" ")[1:4] nrid = node + 'R' + RID RIDResult = getData( api.liststreamkeyitems(NRID, nrid, False, MAX_RESULT)["result"]) temp += RIDResult # for r in RIDResult: # if r.split(" ")[1] == node: # temp += [r] result += temp if DO_VALIDATION: # and attribute[0] != 'U' and attribute[0] != 'R': if database.validate(result, attribute, True) is False: print("Wrong!") return result
def problem_2_1(max_M=15): sse_M = [] l_2_norm = [] for i in range(max_M): x,y = getData('curvefitting.txt') w_ols = max_likelihood(x,y,i) bishop_plot(x,y,w_ols,'maximum likelihood, M = %s' % i) plt.savefig('problem2_ml_M_%s.png' % i) sse_M.append(sse(x,y,w_ols)) l_2_norm.append(sum([ii**2 for ii in w_ols])**0.5) plt.close() plt.plot(np.array(range(max_M)), np.array(sse_M), label='Sum of Squared Errors') plt.savefig('problem2_OLS_SSEvsM.png') plt.close() plt.plot(np.array(range(max_M)), np.array(l_2_norm), label='L2 Norm of Weight Vector') plt.savefig('problem2_OLS_L2_NormvsM.png') plt.close() x,y = getData('curvefitting.txt') indices = np.random.choice(max(x.size, y.size)+1, 2) for ii in np.sort(indices)[::-1]: x = np.delete(x,ii) y = np.delete(y,ii) for M in [0,1,3,9]: w_ols = max_likelihood(x,y,M) bishop_plot(x,y,w_ols,'maximum likelihood w/missing, M = %s' % M) plt.savefig('problem2_mlmissing_M_%s.png' % M)
def problem_3_2(A_or_B, plot_individual=False): train_x, train_y = getData('regress%s_train.txt' % A_or_B) valid_x, valid_y = getData('regress_validate.txt') lams = np.linspace(0, 10, 101) colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k', '#808080', '#DDDD00', '#800000'] best_lams = [] best_sses = [] for M in range(10): errors = [] for lam in lams: error = problem_3_2_case(train_x, train_y, valid_x, valid_y, M, lam, A_or_B, plot_individual) errors.append(error) if not plot_individual: plt.plot(lams, errors, color=colors[M], label='M=%s' % M) best_lams.append(np.argmin(errors)/10.) best_sses.append(min(errors)) print best_lams print best_sses if not plot_individual: plt.ylim(0, 30 if A_or_B == 'A' else 100) plt.title('Ridge regression error by $M$ and $\lambda$, training set %s' % A_or_B) plt.xlabel('$\lambda$') plt.ylabel('sum of squared error (against validation data)') plt.legend() plt.savefig('problem3_2_train%s_meta.png' % A_or_B) plt.show() plt.close()
def rangeQuery(api, start, end): TIMESTAMPE = 'Timestamp' # print(start, end) result = [] for ts in range(start // SCALE + 1, end // SCALE): # print('ts:', ts) # temp = api.liststreamkeyitems(TIMESTAMPE, str(ts))['result'] temp = pointQuery(api, TIMESTAMPE, str(ts)) # print(temp) if temp: result += temp # print(result) temp = api.liststreamkeyitems(TIMESTAMPE, str(start // SCALE))['result'] # print('in range:\n', *result, sep='\n') if temp: data = getData(temp) # print(data) sl = SortedList(data, key=lambda a: a.split(" ")[0]) result += list(sl.irange(str(start), str(end))) temp = api.liststreamkeyitems(TIMESTAMPE, str(end // SCALE))['result'] if temp: data = getData(temp) sl = SortedList(data, key=lambda a: a.split(" ")[0]) # print('end:') # print(*list(sl.irange(maximum=str(end))), sep='\n') result += list(sl.irange(str(start), str(end))) return result
def pointQuery(api, attribute, sort=False, reverse=False): result = api.liststreamkeyitems(att_dict[attribute[0]], attribute[1:], False, MAX_RESULT) if DO_VALIDATION: if database.validate(getData(result["result"]), attribute, True) == False: print("Wrong!") return getData(result["result"])
def pointQuery(api, attribute, sort=True, reverse=False): result = api.liststreamkeyitems( att_dict[attribute[0]], attribute[1:], False, FILE_SIZE) if DO_VALIDATION: validate(getData(result["result"]), attribute[1:]) result = getData(result["result"]) att_name = att_dict[attribute[0]] if type(ATTRIBUTE_TYPE[att_name]) is int and sort: result = sortResult(result, att_name, reverse) # print(*result, sep='\n') # input() return result
def pointQuery(api, attribute, value): result = getData( api.liststreamkeyitems(attribute, value, False, MAX_RESULT)["result"]) temp = [] if attribute == "User" or attribute == "Resource": for line in result: node, ID = line.split(" ")[1:3] RIDResult = getData( api.liststreamkeyitems('Ref-ID', ID, False, MAX_RESULT)["result"]) for r in RIDResult: if r.split(" ")[1] == node: temp += [r] result += temp return result
def pointQuery(api, stream, key): if stream == "Timestamp": txids = getTXids(api, PREFIX + str(SCALE), str(int(key) // SCALE)) # resulls = api.getstreamitem(DATA, txids[0]) elif stream == DATA or stream == 'Activity': return getData( api.liststreamkeyitems(stream, key, False, MAX_RESULT)["result"]) else: txids = getTXids(api, stream, key) args = [[DATA, txid] for txid in txids] results = api.batch('getstreamitem', args) result = [] for r in results: # print(r["result"]) data = bytes.fromhex(r["result"]["data"]).decode(ENCODE_FORMAT) if stream == "Timestamp": if data.split(" ")[0] == key: result = [data] break # return data else: result.append(data) # print(result) # input() # if stream == 'ts1': # stream = "Timestamp" # if database.validate(result, stream, key, True) is False: # print("Wrong!") return result
def update_metrics(n): data = util.getData() twitter_before = data['original'] twitter_after = data['text'] session = data['session'] #print(twitter_before) timenow = [util.convertime(i[0]) for i in session] his = [i[1] for i in session] sessionnow = dict(zip(timenow, his)) # print(timenow) # print(his) style = {'padding': '5px', 'fontSize': '25px'} result = [] result.append( html.Span('Before PreProcess: ' + twitter_before, style=style)) result.append(html.Div()) result.append(html.Span('After PreProcess: ' + twitter_after, style=style)) result.append(html.Div()) for key, value in sessionnow.items(): result.append( html.Span('History Session : ' + str(key) + str(value), style=style)) result.append(html.Div()) return result
def main(): X, Y = getData() X, Y = shuffle(X, Y) K = len(np.unique(Y)) N = len(Y) T = np.zeros((N, K)) for i in range(N): T[i, Y[i]] = 1 # one hot encoding for targets batch_sz = 500 learning_rate = [10e-5, 10e-6, 10e-7, 10 - 8, 10e-9] num_batches = len(learning_rate) trainCost = [] validCost = [] accValid = [] accTrain = [] for i in range(num_batches): m = nn.NeuralNetwork(numHiddenLayer=1, numHiddenUnits=200, actFunc="Tanh") trainCost[i], validCost[i], accTrain[i], accValid[i] = m.train( X, T, epochs=10000, learning_rate=10e-7, reg=10e-7) print("Final Train Accuracy {}".format(accTrain)) print("Final Valid Accuracy {}".format(accValid)) legend1, = plt.plot(trainCost, label='training error') legend2, = plt.plot(validCost, label='validation error') plt.legend([legend1, legend2]) plt.show()
def calEmotion(filename, city): # load dict most, very, more, insuff, ish, posdict, negdict, over = dict.init_dict() jieba.load_userdict("dic/stock_dict.txt") # init score score = {} cur_date = 0 review_id = 0 #init excel hander w, ws = createExcel() # open database & get data rawdata, conn, cur = util.getData(filename, city) for row in rawdata: # collect tokens from yield seg_list, cur_date, review = getTokens(score, row, cur_date) #use sentiment score to cal the emotion by one comment and add it to score. result = dict.sentiment_score_list(seg_list, posdict, negdict, most, very, more, ish, insuff, over) score[cur_date].append(result) # write down the review to xls review_id = review_id + 1 ws.write(review_id, 0, review) ws.write(review_id, 1, result) w.save("excel/" + filename + ".xls") # close the connection with database util.closeDB(conn, cur) # generate the emotionlist from score emotionList = genEmotionList(score) # show pic of the stock price and emotion by one stock showPriceAndEmotion(emotionList, filename, city)
def main(): X, Y = getData(balance_ones=False) # 48by48 = 2304 dimensions per sample # Data structure # 0-48 one row # 48-96 second row and so on # infinite loop. Break out of it by via input while True: # loop through 7 emotions for i in range(7): x, y = X[Y == i], Y[ Y == i] # choose all the data points that equal to this emotion N = len(y) # number of data points equals to this emotion j = np.random.choice(N) # randomly select this point plt.imshow(x[j].reshape(48, 48), cmap='gray') # plot this point. Reshape it plt.title(label_map[y[j]]) # plot the label - emotion plt.show() prompt = input('Quit? Enter Y:\n') if prompt == 'Y': break
def main(): Xtrain, Ytrain, Xvalid, Yvalid = getData() model = ANN(200) model.fit(Xtrain, Ytrain, Xvalid, Yvalid, reg=0, show_fig=True) print(model.score(Xvalid, Yvalid))
def pointQuery(api, attribute, display=False, validation=DO_VALIDATION): result = api.liststreamkeyitems(STREAM, attribute, False, FILE_SIZE) if validation: validate(getData(result["result"]), attribute[1:]) if display: display(result["result"]) return result["result"]
def calEmotion(filename,city): # load dict most,very,more,insuff,ish,posdict,negdict,over=dict.init_dict() jieba.load_userdict("dic/stock_dict.txt") # init score score = {} cur_date = 0 review_id =0 #init excel hander w, ws = createExcel() # open database & get data rawdata, conn, cur = util.getData(filename,city) for row in rawdata: # collect tokens from yield seg_list,cur_date, review = getTokens(score,row,cur_date) #use sentiment score to cal the emotion by one comment and add it to score. result = dict.sentiment_score_list(seg_list,posdict,negdict,most,very,more,ish,insuff,over) score[cur_date].append(result) # write down the review to xls review_id = review_id +1 ws.write(review_id,0,review) ws.write(review_id,1,result) w.save("excel/" + filename + ".xls") # close the connection with database util.closeDB(conn, cur) # generate the emotionlist from score emotionList = genEmotionList(score) # show pic of the stock price and emotion by one stock showPriceAndEmotion(emotionList,filename,city)
def problem_3_1(): x, y = getData('curvefitting.txt') for M in [1, 2, 3]: for lam in [0, 0.0003, 0.001, 0.003]: w_ridge = polynomial_ridge(x, y, M, lam) bishop_plot(x, y, w_ridge, 'ridge regression, M=%s, lam=%s' % (M, lam)) plt.savefig('problem3_1_M_%s_lam_%s.png' % (M, lam)) print 'just tried M = %s, lam=%s' % (M, lam) print ' sse was %s' % sse(x, y, w_ridge)
def rangeQuery(api, start, end): result = [] stream = att_dict['T'] timestamps = api.liststreamkeys(stream)["result"] sl = SortedList(list(map(int, [key['key'] for key in timestamps]))) for timestamp in sl.irange(start, end): result += getData( api.liststreamkeyitems(stream, str(timestamp))['result']) return result
def rangeQuery(api, start, end): TIMESTAMPE = 'Timestamp' result = [] for ts in range(start // SCALE + 1, end // SCALE): temp = pointQuery(api, TIMESTAMPE, str(ts)) if temp: result += temp temp = api.liststreamkeyitems(TIMESTAMPE, str(start // SCALE))['result'] if temp: data = getData(temp) sl = SortedList(data, key=lambda a: a.split(" ")[0]) result += list(sl.irange(str(start), str(end))) temp = api.liststreamkeyitems(TIMESTAMPE, str(end // SCALE))['result'] if temp: data = getData(temp) sl = SortedList(data, key=lambda a: a.split(" ")[0]) result += list(sl.irange(str(start), str(end))) return result
def andQuery(api, attributes, display=False): resultSet = [] for attr in attributes: # print(getData(pointQuery(api, attr))) resultSet.append(set(getData(pointQuery(api, attr)))) result = resultSet[0] for i in range(1, len(resultSet)): result &= resultSet[i] if display: display(result)
def __init__(self, inputDir, modelDir,action, **kwargs): self.action = action self.modelDir=modelDir self.inputDir=inputDir self.clf =None if(self.action=='training'): self.mats1, self.spams1, self.mats2, self.spams2 =util.getData(self.inputDir) else: self.mats2, self.spams2 =util.getPredictData(self.inputDir) self.mats1, self.spams1 = None , None self.clf = joblib.load(self.modelDir)
def update_graph(n): data = util.getData() # most recent session session = data['session'][-1] time = session[0] count = session[1] num = len(count.keys()) sentiment_number = [] for i in count.keys(): sentiment_number.append((count[i][0], count[i][1])) labels = ['Positive', 'Negative'] # generate singel pie format piedata = [] for i in range(num): onepie = util.PieOneData( [sentiment_number[i][0], sentiment_number[i][1]], labels, i) piedata.append(onepie) # generate annotations annotations = [] # position: if num == 1: position = [0.5] elif num == 2: position = [0.2, 0.8] elif num == 3: position = [0.15, 0.5, 0.85] elif num == 4: position = [0.11, 0.375, 0.625, 0.89] tmp = 0 names = [] for i in count.keys(): annotations.append(util.subtitle(i, position[tmp])) names.append(i) tmp += 1 title = " -- ".join(names) #print(piedata) #print(annotations) layout = util.layout(annotations, num, title) fig = util.PieAllData(piedata, layout) return fig
def pointQuery(api, attribute, value): if attribute == "Timestamp": TSResult = getData(api.liststreamkeyitems( 'Timestamp', str(int(value) // SCALE), False, MAX_RESULT)["result"]) for tsr in TSResult: ts = tsr.split(" ")[0] if ts == value: return [tsr] result = getData(api.liststreamkeyitems( attribute, value, False, MAX_RESULT)["result"]) temp = [] if attribute == "User" or attribute == "Resource": for line in result: node, ID = line.split(" ")[1:3] RIDResult = getData(api.liststreamkeyitems( 'Ref-ID', ID, False, MAX_RESULT)["result"]) for r in RIDResult: if r.split(" ")[1] == node: temp += [r] result += temp return result
def main(): X, Y = getData(balance_ones=False) while True: for i in xrange(len(label_map)): x, y = X[Y==i], Y[Y==i] N = len(y) j = np.random.choice(N) plt.imshow(x[j].reshape(48,48), cmap='gray') plt.title(label_map[y[j]]) prompt = raw_input('Quit? Enter Y:\n') if prompt == 'Y': break
def main(): X, Y = getData(balance_ones=False) while (True): for i in range(7): x, y = X[Y == i], Y[Y == i] N = len(y) j = np.random.choice(N) plt.imshow(x[j].reshape(48, 48), cmap='gray') plt.title(labels[y[j]]) plt.show() prompt = input("Quit the program? Y/N\n") if prompt == 'Y': break
def main(): X, Y, _, _ = getData(balance_ones=False) while True: for i in range(7): x, y = X[Y == i], Y[Y == i] N = len(y) j = np.random.choice(N) plt.imshow(x[j].reshape(48, 48), cmap='gray') plt.title(label_map[y[j]]) plt.show() prompt = input('Quit? Enter Y:\n') if prompt.lower().startswith('y'): break
def main(): X, Y = getData(balance_ones=False) while True: for i in xrange(7): x, y = X[Y==i], Y[Y==i] N = len(y) j = np.random.choice(N) plt.imshow(x[j].reshape(48, 48), cmap='gray') plt.title(label_map[y[j]]) plt.show() prompt = raw_input('Quit? Enter Y:\n') if prompt == 'Y': break
def pointQuery(api, stream, key): if stream == "Timestamp": txids = getTXids(api, PREFIX + str(SCALE), str(int(key) // SCALE)) # resulls = api.getstreamitem(DATA, txids[0]) elif stream == DATA or stream == 'Activity': return getData( api.liststreamkeyitems(stream, key, False, MAX_RESULT)["result"]) else: txids = getTXids(api, stream, key) args = [[DATA, txid] for txid in txids] results = api.batch('getstreamitem', args) result = [] for r in results: # print(r["result"]) data = bytes.fromhex(r["result"]["data"]).decode(ENCODE_FORMAT) if stream == "Timestamp": if data.split(" ")[0] == key: result = [data] break # return data else: result.append(data) if stream == "User" or stream == 'Resource': # txids = getTXids(api, stream, key) nrids = [] for line in result: node, _, rid = line.split(" ")[1:4] nrids.append(node + 'R' + rid) args = [[NRID, nrid, False, MAX_RESULT] for nrid in nrids] temp = api.batch('liststreamkeyitems', args) # print(*temp, sep='\n') # input() ridResults = [] for r in temp: ridResults += r["result"] txids = [tx["txid"] for tx in ridResults] args = [[DATA, txid] for txid in txids] results = api.batch('getstreamitem', args) temp = [] for r in results: data = bytes.fromhex(r["result"]["data"]).decode(ENCODE_FORMAT) temp.append(data) result += temp # if database.validate(result, stream, key, True) is False: # print("Wrong!") return result
def run_standalone(c, w, o): config, weights = loadModelFromDisk(c, w) x_train, y_train = getData() if x_train is None or y_train is None: print("Data could not be loaded, abort.", file=sys.stderr) sys.exit(1) else: print("Data loaded") print("Starting training") new_weights = trainModel(config, weights, x_train, y_train) print("Finished training") writeUpdatesToDisk(new_weights, o)
def main(): t0 = datetime.now() print("now: ", t0) X, Y = getData() dt = datetime.now() - t0 print("dt: ", dt) M = 10 # hidden units model = ANN(M) model.fit(X, Y, show_fig=True) print(model.score(X, Y))
def problem_2(M): x,y = getData('curvefitting.txt') w_ols = max_likelihood(x, y, M) print w_ols low_limit_x = min(x) - 0.1*abs(max(x)-min(x)) hi_limit_x = max(x) + 0.1*abs(max(x)-min(x)) x_points = np.linspace(low_limit_x, hi_limit_x, 100) y_hat = np.array([compute_yhat(pt, w_ols) for pt in x_points]) low_limit_y = min(y) - 0.2*abs(max(y) - min(y)) hi_limit_y = max(y) + 0.2*abs(max(y)-min(y)) plt.plot(x_points, y_hat, '-b', label='Max Likelihood, M='+str(M)) plt.scatter(x, y, label='Data') plt.plot(x_points, np.sin(2*np.pi*x_points), '-', color='0.5', label='Sin(2*pi*x)') plt.xlim(low_limit_x, hi_limit_x) plt.ylim(low_limit_y, hi_limit_y) plt.legend() plt.show() plt.savefig('Problem2_M_'+str(M)+'.png')
def pointQuery(api, attribute, sort=False, reverse=False): pointers = getPointers(api, attribute) result = [] # print(pointers) # input() # api.batch(api.liststreamkeyitems, [ # (DATA, p, False, MAX_RESULT) for p in pointers]) # for p in pointers: # result += getValue(api, p) args = [[DATA, p, False, MAX_RESULT] for p in pointers] results = api.batch('liststreamkeyitems', args) for r in results: result.append(getData(r["result"])[0]) # print(result) # if database.validate(result, attribute, True) is False: # print("Wrong!") # input() return result
def main(): X, Y, Xtest, Ytest = getData() print(Y) while True: for i in range(7): # loop through labels x, y = X[Y == i], Y[ Y == i] # select data that corresponds to this label print("x, y: ", x, y) N = len(y) print("N: ", N) j = np.random.choice( N) # select a random data point (index) (image) plt.imshow(x[j].reshape(48, 48), cmap='gray') plt.title(label_map[y[j]]) plt.show() prompt = input('Y to quit:') if prompt == 'Y': break
def main(): X, Y = getData() model = LogisticModel() model.fit(X, Y) print(model.score(X, Y))
def main(): X, Y = getData() # X, Y = getBinaryData() model = ANN([2000, 1000]) model.fit(X, Y, show_fig=True)
# -*- coding:utf-8 -*- import codecs import numpy as np from util.getData import * from regression.standRegression import * ''' train_data:xtrain train_label:ytrain test_data:xpred test_time:date_time ''' getdata = getData() # 0 1 2 3 4 5 6 7 8 # :return: data [星期, 天气, 最高温, 最低温, 节假日长度, 第i个节假日, 工作日长度, 第i个工作日, date, # 9 10 11 # 小时, 线路, 数量(y)] # :return: 返回20150101到20150107的测试数据集 # 0 1 2 3 4 5 6 7 8 9 # [日期, 星期, 天气, 最高温, 最低温, 节假日长度, 第i个节假日, 工作日长度, 第i个工作日, 小时] # # """ features = [1, 2, 3, 4, 5, 7, 9] data = getdata.get_train_data(min_day="20140901", max_day="20141231", line_num=[6]) train = np.array(data)
def main(): X, Y = getData() model = ANN(200) model.fit(X, Y, reg=0, show_fig=True) print(model.score(X, Y))
import requests import numpy as np import matplotlib.pyplot as plt import matplotlib.dates as dts from keras.models import Sequential from keras.layers import Dense, Dropout from util import getData, processData np.random.seed(7) if __name__ == "__main__": data = getData('2012-01-10', '2017-08-25') X_train, X_test, Y_train, Y_test = processData(data) print(X_train) # nn = NN([(39, 500)], [500, 1]) # nn.train(X_train, Y_train, X_test, Y_test, 10000) model = Sequential() model.add(Dense(500, input_shape=(19, ), init='uniform', activation='relu')) # model.add(Dense(500, init='uniform', activation='relu')) model.add(Dropout(0.25)) model.add(Dense(400, init='uniform', activation='relu')) model.add(Dropout(0.25)) model.add(Dense(250, init='uniform', activation='relu')) model.add(Dropout(0.25)) model.add(Dense(1, init='uniform', activation='linear'))
import numpy as np import sys import json import util from sklearn.linear_model import LogisticRegression C=1.1 mats1, spams1, mats2, spams2 =util.getData(sys.argv[1]) for i in range(3): print C clf = LogisticRegression(C=C).fit(mats1, spams1) predict1 = clf.predict(mats1) predict2 = clf.predict(mats2) print util.validation(predict1, spams1), util.validation(predict2, spams2) C=C+0.5
def main(): X, Y = getData() model = LogisticModel() model.fit(X, Y, show_fig=True) print(model.score(X, Y))
def main(): Xtrain, Ytrain, Xvalid, Yvalid = getData() print("read data..........") model = ANN([2000, 1000, 500]) model.fit(Xtrain, Ytrain, Xvalid, Yvalid, show_fig=True)
# -*- coding:utf-8 -*- __author__ = 'HeYongxing' import numpy as np from matplotlib.pylab import show import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D from util.getData import * # 十号线路 # [星期, 天气, 最高温, 最低温, 小时, 线路, 数量(y), datetime] getData = getData() data = getData.get_train_data(min_day="20140901", max_day="20141231", line_num=[15]) train = np.array(data) xtrain = train[:, 0:-3] xdate = train[:, -1] ytrain = train[:, -2] # fig = figure() # ax1 = fig.add_subplot(1, 1, 1) # ax1.scatter(xtrain[:, 0], ytrain, c='red', # alpha=0.3, edgecolors='none') # ax1.set_xlabel(u'星期') # ax1.grid(True) # show() # # # fig = figure()
from random import shuffle from flask import (Flask, request, make_response, jsonify, redirect) from util import getData, getData2 import os app = Flask(__name__) songs, movies = getData() _t = getData2() songs.update(_t) song_indices = {} for v in songs.itervalues(): for each_song in v: index = str(each_song[1].split()[-1][1:-1]) label = ' '.join(each_song[1].split()[:-1]) song_indices[index] = label movie_indices = {} for v in movies.itervalues(): for each_movie in v: index = str(each_movie[1].split()[-1][1:-1]) label = ' '.join(each_movie[1].split()[:-1]) movie_indices[index] = label @app.route('/') def index(): return make_response(open('templates/index.html').read())
def main(): X, Y = getData() # X, Y = getBinaryData() model = ANN([2000, 1000, 500]) model.fit(X, Y, show_fig=True)
Calculates walking distances between a random sampling of buildings and compares to optimal straight-line path Sam Shen """ import json, util, sys import networkx as nx from decimal import * from osmread import Node FIRST_RUN = False SAMPLE_REPEAT_SIZE = 3 #the number of times to sample SAMPLE_SIZE_SQRT = 8 #the squareroot of the sample size graph, nodes = util.init_graph(util.getData('../data/berkeley_map.osm')) print("Initialized graph...") buildings = json.load(open("../data/buildings.json")) #only have to run this part once if FIRST_RUN: util.batch_discretize(nodes, buildings, '../data/discrete_locs.txt') str_coords = [] #list of string coordinate pairs dec_coords = set() #set of decimal coordinate pairs discrete_nodes = [] #the nodes that represent discrete buildings try: file = open("../data/discrete_locs.txt") except: print("Did you download discrete_locs.txt OR run batch discretizing by setting FIRST_RUN to true?")