Ejemplo n.º 1
0
def pointQuery(api, attribute, sort=False, reverse=False):
    # result = []
    result = api.liststreamkeyitems(att_dict[attribute[0]], attribute[1:],
                                    False, MAX_RESULT)
    result = getData(result["result"])
    temp = []
    # if attribute[0] == 'T' or attribute[0] == 'I':
    #     for uid in getData(api.liststreamkeyitems(
    #             att_dict[attribute[0]], attribute[1:], False, MAX_RESULT)["result"]):
    #         # print("Uid", uid)
    #         result += getData(api.liststreamkeyitems(UID,
    #                                                  uid, False, MAX_RESULT)["result"])
    # else:
    #     result += getData(api.liststreamkeyitems(
    #         att_dict[attribute[0]], attribute[1:], False, MAX_RESULT)["result"])

    if attribute[0] == 'U' or attribute[0] == 'R':
        for line in result:
            node, _, RID = line.split(" ")[1:4]
            nrid = node + 'R' + RID
            RIDResult = getData(
                api.liststreamkeyitems(NRID, nrid, False,
                                       MAX_RESULT)["result"])
            temp += RIDResult
    #         for r in RIDResult:
    #             if r.split(" ")[1] == node:
    #                 temp += [r]
    result += temp
    if DO_VALIDATION:  # and attribute[0] != 'U' and attribute[0] != 'R':
        if database.validate(result, attribute, True) is False:
            print("Wrong!")
    return result
Ejemplo n.º 2
0
def problem_2_1(max_M=15):
    sse_M = []
    l_2_norm = []
    for i in range(max_M): 
        x,y = getData('curvefitting.txt')
        w_ols = max_likelihood(x,y,i)
        bishop_plot(x,y,w_ols,'maximum likelihood, M = %s' % i)
        plt.savefig('problem2_ml_M_%s.png' % i)
        sse_M.append(sse(x,y,w_ols))
        l_2_norm.append(sum([ii**2 for ii in w_ols])**0.5)
    plt.close()
    plt.plot(np.array(range(max_M)), np.array(sse_M), label='Sum of Squared Errors')
    plt.savefig('problem2_OLS_SSEvsM.png')
    plt.close()
    plt.plot(np.array(range(max_M)), np.array(l_2_norm), label='L2 Norm of Weight Vector')
    plt.savefig('problem2_OLS_L2_NormvsM.png')
    plt.close()
    x,y = getData('curvefitting.txt')
    indices = np.random.choice(max(x.size, y.size)+1, 2)
    for ii in np.sort(indices)[::-1]:
        x = np.delete(x,ii)
        y = np.delete(y,ii)
    for M in [0,1,3,9]:
        w_ols = max_likelihood(x,y,M)
        bishop_plot(x,y,w_ols,'maximum likelihood w/missing, M = %s' % M)
        plt.savefig('problem2_mlmissing_M_%s.png' % M)
Ejemplo n.º 3
0
def problem_3_2(A_or_B, plot_individual=False):
    train_x, train_y = getData('regress%s_train.txt' % A_or_B)
    valid_x, valid_y = getData('regress_validate.txt')
    lams = np.linspace(0, 10, 101)
    colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k', '#808080', '#DDDD00', '#800000']
    best_lams = []
    best_sses = []
    for M in range(10):
        errors = []
        for lam in lams:
            error = problem_3_2_case(train_x, train_y, valid_x, valid_y,
                M, lam, A_or_B, plot_individual)
            errors.append(error)
        if not plot_individual:
            plt.plot(lams, errors, color=colors[M], 
                label='M=%s' % M)
        best_lams.append(np.argmin(errors)/10.)
        best_sses.append(min(errors))
    print best_lams
    print best_sses
    if not plot_individual:
        plt.ylim(0, 30 if A_or_B == 'A' else 100)
        plt.title('Ridge regression error by $M$ and $\lambda$, training set %s' % A_or_B)
        plt.xlabel('$\lambda$')
        plt.ylabel('sum of squared error (against validation data)')
        plt.legend()
        plt.savefig('problem3_2_train%s_meta.png' % A_or_B)
        plt.show()
        plt.close()
Ejemplo n.º 4
0
def rangeQuery(api, start, end):
    TIMESTAMPE = 'Timestamp'
    # print(start, end)
    result = []
    for ts in range(start // SCALE + 1, end // SCALE):
        # print('ts:', ts)
        # temp = api.liststreamkeyitems(TIMESTAMPE, str(ts))['result']
        temp = pointQuery(api, TIMESTAMPE, str(ts))
        # print(temp)
        if temp:
            result += temp
        # print(result)
    temp = api.liststreamkeyitems(TIMESTAMPE, str(start // SCALE))['result']
    # print('in range:\n', *result, sep='\n')
    if temp:
        data = getData(temp)
        # print(data)
        sl = SortedList(data, key=lambda a: a.split(" ")[0])
        result += list(sl.irange(str(start), str(end)))
    temp = api.liststreamkeyitems(TIMESTAMPE, str(end // SCALE))['result']
    if temp:
        data = getData(temp)
        sl = SortedList(data, key=lambda a: a.split(" ")[0])
        # print('end:')
        # print(*list(sl.irange(maximum=str(end))), sep='\n')
        result += list(sl.irange(str(start), str(end)))

    return result
Ejemplo n.º 5
0
def pointQuery(api, attribute, sort=False, reverse=False):
    result = api.liststreamkeyitems(att_dict[attribute[0]], attribute[1:],
                                    False, MAX_RESULT)
    if DO_VALIDATION:
        if database.validate(getData(result["result"]), attribute,
                             True) == False:
            print("Wrong!")
    return getData(result["result"])
Ejemplo n.º 6
0
def pointQuery(api, attribute, sort=True, reverse=False):
    result = api.liststreamkeyitems(
        att_dict[attribute[0]], attribute[1:], False, FILE_SIZE)
    if DO_VALIDATION:
        validate(getData(result["result"]), attribute[1:])
    result = getData(result["result"])
    att_name = att_dict[attribute[0]]
    if type(ATTRIBUTE_TYPE[att_name]) is int and sort:
        result = sortResult(result, att_name, reverse)
    # print(*result, sep='\n')
    # input()
    return result
Ejemplo n.º 7
0
def pointQuery(api, attribute, value):
    result = getData(
        api.liststreamkeyitems(attribute, value, False, MAX_RESULT)["result"])
    temp = []
    if attribute == "User" or attribute == "Resource":
        for line in result:
            node, ID = line.split(" ")[1:3]
            RIDResult = getData(
                api.liststreamkeyitems('Ref-ID', ID, False,
                                       MAX_RESULT)["result"])
            for r in RIDResult:
                if r.split(" ")[1] == node:
                    temp += [r]
    result += temp
    return result
Ejemplo n.º 8
0
def pointQuery(api, stream, key):
    if stream == "Timestamp":
        txids = getTXids(api, PREFIX + str(SCALE), str(int(key) // SCALE))
        # resulls = api.getstreamitem(DATA, txids[0])
    elif stream == DATA or stream == 'Activity':
        return getData(
            api.liststreamkeyitems(stream, key, False, MAX_RESULT)["result"])
    else:
        txids = getTXids(api, stream, key)
    args = [[DATA, txid] for txid in txids]
    results = api.batch('getstreamitem', args)
    result = []
    for r in results:
        # print(r["result"])
        data = bytes.fromhex(r["result"]["data"]).decode(ENCODE_FORMAT)
        if stream == "Timestamp":
            if data.split(" ")[0] == key:
                result = [data]
                break
                # return data
        else:
            result.append(data)
    # print(result)
    # input()
    # if stream == 'ts1':
    #     stream = "Timestamp"
    # if database.validate(result, stream, key, True) is False:
    # print("Wrong!")
    return result
Ejemplo n.º 9
0
def update_metrics(n):
    data = util.getData()
    twitter_before = data['original']
    twitter_after = data['text']
    session = data['session']
    #print(twitter_before)
    timenow = [util.convertime(i[0]) for i in session]
    his = [i[1] for i in session]
    sessionnow = dict(zip(timenow, his))

    # print(timenow)
    # print(his)
    style = {'padding': '5px', 'fontSize': '25px'}
    result = []
    result.append(
        html.Span('Before PreProcess: ' + twitter_before, style=style))
    result.append(html.Div())
    result.append(html.Span('After PreProcess: ' + twitter_after, style=style))
    result.append(html.Div())
    for key, value in sessionnow.items():
        result.append(
            html.Span('History Session : ' + str(key) + str(value),
                      style=style))
        result.append(html.Div())

    return result
Ejemplo n.º 10
0
def main():

    X, Y = getData()
    X, Y = shuffle(X, Y)
    K = len(np.unique(Y))
    N = len(Y)
    T = np.zeros((N, K))

    for i in range(N):
        T[i, Y[i]] = 1  # one hot encoding for targets

    batch_sz = 500
    learning_rate = [10e-5, 10e-6, 10e-7, 10 - 8, 10e-9]
    num_batches = len(learning_rate)
    trainCost = []
    validCost = []
    accValid = []
    accTrain = []
    for i in range(num_batches):
        m = nn.NeuralNetwork(numHiddenLayer=1,
                             numHiddenUnits=200,
                             actFunc="Tanh")
        trainCost[i], validCost[i], accTrain[i], accValid[i] = m.train(
            X, T, epochs=10000, learning_rate=10e-7, reg=10e-7)

    print("Final Train Accuracy {}".format(accTrain))
    print("Final Valid Accuracy {}".format(accValid))
    legend1, = plt.plot(trainCost, label='training error')
    legend2, = plt.plot(validCost, label='validation error')
    plt.legend([legend1, legend2])
    plt.show()
Ejemplo n.º 11
0
def calEmotion(filename, city):
    # load dict
    most, very, more, insuff, ish, posdict, negdict, over = dict.init_dict()
    jieba.load_userdict("dic/stock_dict.txt")
    # init score
    score = {}
    cur_date = 0
    review_id = 0
    #init excel hander
    w, ws = createExcel()
    # open database & get data
    rawdata, conn, cur = util.getData(filename, city)
    for row in rawdata:
        # collect tokens from yield
        seg_list, cur_date, review = getTokens(score, row, cur_date)
        #use sentiment score to cal the emotion by one comment and add it to score.
        result = dict.sentiment_score_list(seg_list, posdict, negdict, most,
                                           very, more, ish, insuff, over)
        score[cur_date].append(result)
        # write down the review to xls
        review_id = review_id + 1
        ws.write(review_id, 0, review)
        ws.write(review_id, 1, result)
    w.save("excel/" + filename + ".xls")
    # close the connection with database
    util.closeDB(conn, cur)
    # generate the emotionlist from score
    emotionList = genEmotionList(score)
    # show pic of the stock price and emotion by one stock
    showPriceAndEmotion(emotionList, filename, city)
Ejemplo n.º 12
0
def main():
    X, Y = getData(balance_ones=False)

    # 48by48 = 2304 dimensions per sample

    # Data structure
    # 0-48 one row
    # 48-96 second row and so on

    # infinite loop. Break out of it by via input
    while True:
        # loop through 7 emotions
        for i in range(7):
            x, y = X[Y == i], Y[
                Y ==
                i]  # choose all the data points that equal to this emotion
            N = len(y)  # number of data points equals to this emotion
            j = np.random.choice(N)  # randomly select this point
            plt.imshow(x[j].reshape(48, 48),
                       cmap='gray')  # plot this point. Reshape it
            plt.title(label_map[y[j]])  # plot the label - emotion
            plt.show()
        prompt = input('Quit? Enter Y:\n')
        if prompt == 'Y':
            break
Ejemplo n.º 13
0
def main():
    Xtrain, Ytrain, Xvalid, Yvalid = getData()

    model = ANN(200)
    model.fit(Xtrain, Ytrain, Xvalid, Yvalid, reg=0, show_fig=True)

    print(model.score(Xvalid, Yvalid))
Ejemplo n.º 14
0
def pointQuery(api, attribute, display=False, validation=DO_VALIDATION):
    result = api.liststreamkeyitems(STREAM, attribute, False, FILE_SIZE)
    if validation:
        validate(getData(result["result"]), attribute[1:])
    if display:
        display(result["result"])
    return result["result"]
Ejemplo n.º 15
0
def calEmotion(filename,city):
	# load dict
	most,very,more,insuff,ish,posdict,negdict,over=dict.init_dict()
	jieba.load_userdict("dic/stock_dict.txt")
	# init score
	score = {}
	cur_date = 0
	review_id =0
	#init excel hander
	w, ws = createExcel()
	# open database & get data
	rawdata, conn, cur = util.getData(filename,city)
	for row in rawdata:
		# collect tokens from yield
		seg_list,cur_date, review = getTokens(score,row,cur_date)
		#use sentiment score to cal the emotion by one comment and add it to score.	
		result = dict.sentiment_score_list(seg_list,posdict,negdict,most,very,more,ish,insuff,over)
		score[cur_date].append(result)
		# write down the review to xls
		review_id = review_id +1
		ws.write(review_id,0,review)
		ws.write(review_id,1,result)
	w.save("excel/" + filename + ".xls")
	# close the connection with database
	util.closeDB(conn, cur)
	# generate the emotionlist from score
	emotionList = genEmotionList(score)
	# show pic of the stock price and emotion by one stock
	showPriceAndEmotion(emotionList,filename,city)
Ejemplo n.º 16
0
def problem_3_1():
    x, y = getData('curvefitting.txt')
    for M in [1, 2, 3]:
        for lam in [0, 0.0003, 0.001, 0.003]:
            w_ridge = polynomial_ridge(x, y, M, lam)
            bishop_plot(x, y, w_ridge, 'ridge regression, M=%s, lam=%s' % (M, lam))
            plt.savefig('problem3_1_M_%s_lam_%s.png' % (M, lam))
            print 'just tried M = %s, lam=%s' % (M, lam)
            print '    sse was %s' % sse(x, y, w_ridge)
Ejemplo n.º 17
0
def rangeQuery(api, start, end):
    result = []
    stream = att_dict['T']
    timestamps = api.liststreamkeys(stream)["result"]
    sl = SortedList(list(map(int, [key['key'] for key in timestamps])))
    for timestamp in sl.irange(start, end):
        result += getData(
            api.liststreamkeyitems(stream, str(timestamp))['result'])
    return result
Ejemplo n.º 18
0
def rangeQuery(api, start, end):
    TIMESTAMPE = 'Timestamp'
    result = []
    for ts in range(start // SCALE + 1, end // SCALE):
        temp = pointQuery(api, TIMESTAMPE, str(ts))
        if temp:
            result += temp
    temp = api.liststreamkeyitems(TIMESTAMPE, str(start // SCALE))['result']
    if temp:
        data = getData(temp)
        sl = SortedList(data, key=lambda a: a.split(" ")[0])
        result += list(sl.irange(str(start), str(end)))
    temp = api.liststreamkeyitems(TIMESTAMPE, str(end // SCALE))['result']
    if temp:
        data = getData(temp)
        sl = SortedList(data, key=lambda a: a.split(" ")[0])
        result += list(sl.irange(str(start), str(end)))

    return result
Ejemplo n.º 19
0
def andQuery(api, attributes, display=False):
    resultSet = []
    for attr in attributes:
        # print(getData(pointQuery(api, attr)))
        resultSet.append(set(getData(pointQuery(api, attr))))
    result = resultSet[0]
    for i in range(1, len(resultSet)):
        result &= resultSet[i]
    if display:
        display(result)
Ejemplo n.º 20
0
 def __init__(self, inputDir, modelDir,action, **kwargs):
     self.action = action
     self.modelDir=modelDir
     self.inputDir=inputDir
     self.clf =None
     if(self.action=='training'):
         self.mats1, self.spams1, self.mats2, self.spams2 =util.getData(self.inputDir)
     else:
         self.mats2, self.spams2 =util.getPredictData(self.inputDir)
         self.mats1, self.spams1 = None ,  None
         self.clf = joblib.load(self.modelDir)
Ejemplo n.º 21
0
def update_graph(n):

    data = util.getData()
    # most recent session
    session = data['session'][-1]

    time = session[0]
    count = session[1]

    num = len(count.keys())

    sentiment_number = []
    for i in count.keys():
        sentiment_number.append((count[i][0], count[i][1]))

    labels = ['Positive', 'Negative']

    # generate singel pie format
    piedata = []
    for i in range(num):
        onepie = util.PieOneData(
            [sentiment_number[i][0], sentiment_number[i][1]], labels, i)
        piedata.append(onepie)

    # generate annotations
    annotations = []
    # position:

    if num == 1:
        position = [0.5]
    elif num == 2:
        position = [0.2, 0.8]
    elif num == 3:
        position = [0.15, 0.5, 0.85]
    elif num == 4:
        position = [0.11, 0.375, 0.625, 0.89]

    tmp = 0
    names = []
    for i in count.keys():
        annotations.append(util.subtitle(i, position[tmp]))
        names.append(i)
        tmp += 1

    title = " -- ".join(names)

    #print(piedata)
    #print(annotations)
    layout = util.layout(annotations, num, title)

    fig = util.PieAllData(piedata, layout)

    return fig
Ejemplo n.º 22
0
def pointQuery(api, attribute, value):
    if attribute == "Timestamp":
        TSResult = getData(api.liststreamkeyitems(
            'Timestamp', str(int(value) // SCALE), False, MAX_RESULT)["result"])
        for tsr in TSResult:
            ts = tsr.split(" ")[0]
            if ts == value:
                return [tsr]
    result = getData(api.liststreamkeyitems(
        attribute, value, False, MAX_RESULT)["result"])
    temp = []
    if attribute == "User" or attribute == "Resource":
        for line in result:
            node, ID = line.split(" ")[1:3]
            RIDResult = getData(api.liststreamkeyitems(
                'Ref-ID', ID, False, MAX_RESULT)["result"])
            for r in RIDResult:
                if r.split(" ")[1] == node:
                    temp += [r]
    result += temp
    return result
def main():
	X, Y = getData(balance_ones=False)

	while True:
		for i in xrange(len(label_map)):
			x, y = X[Y==i], Y[Y==i]
			N = len(y)
			j = np.random.choice(N)
			plt.imshow(x[j].reshape(48,48), cmap='gray')
			plt.title(label_map[y[j]])
		prompt = raw_input('Quit? Enter Y:\n')
		if prompt == 'Y':
			break
Ejemplo n.º 24
0
def main():
    X, Y = getData(balance_ones=False)

    while (True):
        for i in range(7):
            x, y = X[Y == i], Y[Y == i]
            N = len(y)
            j = np.random.choice(N)
            plt.imshow(x[j].reshape(48, 48), cmap='gray')
            plt.title(labels[y[j]])
            plt.show()
        prompt = input("Quit the program? Y/N\n")
        if prompt == 'Y':
            break
def main():
    X, Y, _, _ = getData(balance_ones=False)

    while True:
        for i in range(7):
            x, y = X[Y == i], Y[Y == i]
            N = len(y)
            j = np.random.choice(N)
            plt.imshow(x[j].reshape(48, 48), cmap='gray')
            plt.title(label_map[y[j]])
            plt.show()
        prompt = input('Quit? Enter Y:\n')
        if prompt.lower().startswith('y'):
            break
def main():
    X, Y = getData(balance_ones=False)

    while True:
        for i in xrange(7):
            x, y = X[Y==i], Y[Y==i]
            N = len(y)
            j = np.random.choice(N)
            plt.imshow(x[j].reshape(48, 48), cmap='gray')
            plt.title(label_map[y[j]])
            plt.show()
        prompt = raw_input('Quit? Enter Y:\n')
        if prompt == 'Y':
            break
Ejemplo n.º 27
0
def pointQuery(api, stream, key):
    if stream == "Timestamp":
        txids = getTXids(api, PREFIX + str(SCALE), str(int(key) // SCALE))
        # resulls = api.getstreamitem(DATA, txids[0])
    elif stream == DATA or stream == 'Activity':
        return getData(
            api.liststreamkeyitems(stream, key, False, MAX_RESULT)["result"])
    else:
        txids = getTXids(api, stream, key)
    args = [[DATA, txid] for txid in txids]
    results = api.batch('getstreamitem', args)
    result = []
    for r in results:
        # print(r["result"])
        data = bytes.fromhex(r["result"]["data"]).decode(ENCODE_FORMAT)
        if stream == "Timestamp":
            if data.split(" ")[0] == key:
                result = [data]
                break
                # return data
        else:
            result.append(data)
    if stream == "User" or stream == 'Resource':
        # txids = getTXids(api, stream, key)
        nrids = []
        for line in result:
            node, _, rid = line.split(" ")[1:4]
            nrids.append(node + 'R' + rid)
        args = [[NRID, nrid, False, MAX_RESULT] for nrid in nrids]
        temp = api.batch('liststreamkeyitems', args)
        # print(*temp, sep='\n')
        # input()
        ridResults = []
        for r in temp:
            ridResults += r["result"]
        txids = [tx["txid"] for tx in ridResults]
        args = [[DATA, txid] for txid in txids]
        results = api.batch('getstreamitem', args)
        temp = []
        for r in results:
            data = bytes.fromhex(r["result"]["data"]).decode(ENCODE_FORMAT)
            temp.append(data)
        result += temp
        # if database.validate(result, stream, key, True) is False:
        # print("Wrong!")
    return result
Ejemplo n.º 28
0
def run_standalone(c, w, o):

    config, weights = loadModelFromDisk(c, w)

    x_train, y_train = getData()

    if x_train is None or y_train is None:
        print("Data could not be loaded, abort.", file=sys.stderr)
        sys.exit(1)
    else:
        print("Data loaded")

    print("Starting training")
    new_weights = trainModel(config, weights, x_train, y_train)
    print("Finished training")

    writeUpdatesToDisk(new_weights, o)
def main():

    t0 = datetime.now()

    print("now:  ", t0)

    X, Y = getData()

    dt = datetime.now() - t0

    print("dt:  ", dt)

    M = 10  # hidden units

    model = ANN(M)
    model.fit(X, Y, show_fig=True)
    print(model.score(X, Y))
Ejemplo n.º 30
0
def problem_2(M):
    x,y = getData('curvefitting.txt')
    w_ols = max_likelihood(x, y, M)
    print w_ols
    low_limit_x = min(x) - 0.1*abs(max(x)-min(x)) 
    hi_limit_x = max(x) + 0.1*abs(max(x)-min(x))
    x_points = np.linspace(low_limit_x, hi_limit_x, 100)
    y_hat = np.array([compute_yhat(pt, w_ols) for pt in x_points])
    low_limit_y = min(y) - 0.2*abs(max(y) - min(y))
    hi_limit_y = max(y) + 0.2*abs(max(y)-min(y))
    plt.plot(x_points, y_hat, '-b', label='Max Likelihood, M='+str(M))
    plt.scatter(x, y, label='Data')
    plt.plot(x_points, np.sin(2*np.pi*x_points), '-', color='0.5', label='Sin(2*pi*x)')
    plt.xlim(low_limit_x, hi_limit_x)
    plt.ylim(low_limit_y, hi_limit_y)
    plt.legend()
    plt.show()
    plt.savefig('Problem2_M_'+str(M)+'.png')    
Ejemplo n.º 31
0
def pointQuery(api, attribute, sort=False, reverse=False):
    pointers = getPointers(api, attribute)
    result = []
    # print(pointers)
    # input()
    # api.batch(api.liststreamkeyitems, [
    # (DATA, p, False, MAX_RESULT) for p in pointers])
    # for p in pointers:
    # result += getValue(api, p)
    args = [[DATA, p, False, MAX_RESULT] for p in pointers]
    results = api.batch('liststreamkeyitems', args)
    for r in results:
        result.append(getData(r["result"])[0])
    # print(result)
    # if database.validate(result, attribute, True) is False:
    # print("Wrong!")
    # input()
    return result
Ejemplo n.º 32
0
def main():
    X, Y, Xtest, Ytest = getData()

    print(Y)

    while True:
        for i in range(7):  # loop through labels
            x, y = X[Y == i], Y[
                Y == i]  # select data that corresponds to this label
            print("x, y: ", x, y)
            N = len(y)
            print("N: ", N)
            j = np.random.choice(
                N)  # select a random data point (index) (image)
            plt.imshow(x[j].reshape(48, 48), cmap='gray')
            plt.title(label_map[y[j]])
            plt.show()
        prompt = input('Y to quit:')
        if prompt == 'Y':
            break
Ejemplo n.º 33
0
def main():
    X, Y = getData()
    model = LogisticModel()
    model.fit(X, Y)
    print(model.score(X, Y))
Ejemplo n.º 34
0
def main():
    X, Y = getData()
    # X, Y = getBinaryData()
    model = ANN([2000, 1000])
    model.fit(X, Y, show_fig=True)
Ejemplo n.º 35
0
# -*- coding:utf-8 -*-
import codecs

import numpy as np

from util.getData import *
from regression.standRegression import *

'''
train_data:xtrain
train_label:ytrain
test_data:xpred
test_time:date_time
'''

getdata = getData()

#                 0    1      2     3        4        5           6         7       8
# :return: data [星期, 天气, 最高温, 最低温, 节假日长度, 第i个节假日, 工作日长度, 第i个工作日, date,
#                  9    10    11
#                 小时, 线路, 数量(y)]

# :return: 返回20150101到20150107的测试数据集
#             0    1    2     3     4        5         6         7        8          9
#          [日期, 星期, 天气, 最高温, 最低温, 节假日长度, 第i个节假日, 工作日长度, 第i个工作日, 小时]
#
# """
features =  [1, 2, 3, 4, 5, 7, 9]

data = getdata.get_train_data(min_day="20140901", max_day="20141231", line_num=[6])
train = np.array(data)
Ejemplo n.º 36
0
def main():
    X, Y = getData()

    model = ANN(200)
    model.fit(X, Y, reg=0, show_fig=True)
    print(model.score(X, Y))
Ejemplo n.º 37
0
import requests
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as dts

from keras.models import Sequential
from keras.layers import Dense, Dropout
from util import getData, processData

np.random.seed(7)

if __name__ == "__main__":
    data = getData('2012-01-10', '2017-08-25')

    X_train, X_test, Y_train, Y_test = processData(data)
    print(X_train)

    # nn = NN([(39, 500)], [500, 1])
    # nn.train(X_train, Y_train, X_test, Y_test, 10000)

    model = Sequential()
    model.add(Dense(500, input_shape=(19, ), init='uniform',
                    activation='relu'))
    # model.add(Dense(500, init='uniform', activation='relu'))
    model.add(Dropout(0.25))
    model.add(Dense(400, init='uniform', activation='relu'))
    model.add(Dropout(0.25))
    model.add(Dense(250, init='uniform', activation='relu'))
    model.add(Dropout(0.25))
    model.add(Dense(1, init='uniform', activation='linear'))
Ejemplo n.º 38
0
import numpy as np
import sys
import json
import util
from sklearn.linear_model import LogisticRegression

C=1.1
mats1, spams1, mats2, spams2 =util.getData(sys.argv[1])

for i in range(3):
	print C
	clf = LogisticRegression(C=C).fit(mats1, spams1)
	predict1 = clf.predict(mats1)
	predict2 = clf.predict(mats2)
	print util.validation(predict1, spams1), util.validation(predict2, spams2)
	C=C+0.5
def main():
    X, Y = getData()
    
    model = LogisticModel()
    model.fit(X, Y, show_fig=True)
    print(model.score(X, Y))
Ejemplo n.º 40
0
def main():
    Xtrain, Ytrain, Xvalid, Yvalid = getData()
    print("read data..........")
    model = ANN([2000, 1000, 500])
    model.fit(Xtrain, Ytrain, Xvalid, Yvalid, show_fig=True)
Ejemplo n.º 41
0
# -*- coding:utf-8 -*-
__author__ = 'HeYongxing'

import numpy as np
from matplotlib.pylab import show
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from util.getData import *


# 十号线路
# [星期, 天气, 最高温, 最低温, 小时, 线路, 数量(y), datetime]
getData = getData()

data = getData.get_train_data(min_day="20140901", max_day="20141231", line_num=[15])
train = np.array(data)
xtrain = train[:, 0:-3]
xdate = train[:, -1]
ytrain = train[:, -2]

# fig = figure()
# ax1 = fig.add_subplot(1, 1, 1)
# ax1.scatter(xtrain[:, 0], ytrain, c='red',
#             alpha=0.3, edgecolors='none')
# ax1.set_xlabel(u'星期')
# ax1.grid(True)
# show()
#

#
# fig = figure()
Ejemplo n.º 42
0
from random import shuffle
from flask import (Flask, request, make_response, jsonify, redirect)
from util import getData, getData2
import os

app = Flask(__name__)

songs, movies = getData()
_t = getData2()
songs.update(_t)

song_indices = {}
for v in songs.itervalues():
	for each_song in v:
		index = str(each_song[1].split()[-1][1:-1])
		label = ' '.join(each_song[1].split()[:-1])
		song_indices[index] = label


movie_indices = {}
for v in movies.itervalues():
	for each_movie in v:
		index = str(each_movie[1].split()[-1][1:-1])
		label = ' '.join(each_movie[1].split()[:-1])
		movie_indices[index] = label


@app.route('/')
def index():
	return make_response(open('templates/index.html').read())
def main():
    X, Y = getData()
    
    model = ANN(200)
    model.fit(X, Y, reg=0, show_fig=True)
    print(model.score(X, Y))
def main():
    X, Y = getData()
    # X, Y = getBinaryData()
    model = ANN([2000, 1000, 500])
    model.fit(X, Y, show_fig=True)
Ejemplo n.º 45
0
Calculates walking distances between a random sampling of buildings and compares to optimal
straight-line path

Sam Shen
"""

import json, util, sys
import networkx as nx
from decimal import *
from osmread import Node

FIRST_RUN = False
SAMPLE_REPEAT_SIZE = 3 #the number of times to sample
SAMPLE_SIZE_SQRT = 8   #the squareroot of the sample size

graph, nodes = util.init_graph(util.getData('../data/berkeley_map.osm'))
print("Initialized graph...")
buildings = json.load(open("../data/buildings.json"))

#only have to run this part once
if FIRST_RUN:
    util.batch_discretize(nodes, buildings, '../data/discrete_locs.txt')


str_coords = []         #list of string coordinate pairs
dec_coords = set()      #set of decimal coordinate pairs
discrete_nodes = []     #the nodes that represent discrete buildings
try:
    file = open("../data/discrete_locs.txt")
except:
    print("Did you download discrete_locs.txt OR run batch discretizing by setting FIRST_RUN to true?")