コード例 #1
0
ファイル: baseline4_1.py プロジェクト: Emory-AIMS/idash2018
def pointQuery(api, attribute, sort=False, reverse=False):
    # result = []
    result = api.liststreamkeyitems(att_dict[attribute[0]], attribute[1:],
                                    False, MAX_RESULT)
    result = getData(result["result"])
    temp = []
    # if attribute[0] == 'T' or attribute[0] == 'I':
    #     for uid in getData(api.liststreamkeyitems(
    #             att_dict[attribute[0]], attribute[1:], False, MAX_RESULT)["result"]):
    #         # print("Uid", uid)
    #         result += getData(api.liststreamkeyitems(UID,
    #                                                  uid, False, MAX_RESULT)["result"])
    # else:
    #     result += getData(api.liststreamkeyitems(
    #         att_dict[attribute[0]], attribute[1:], False, MAX_RESULT)["result"])

    if attribute[0] == 'U' or attribute[0] == 'R':
        for line in result:
            node, _, RID = line.split(" ")[1:4]
            nrid = node + 'R' + RID
            RIDResult = getData(
                api.liststreamkeyitems(NRID, nrid, False,
                                       MAX_RESULT)["result"])
            temp += RIDResult
    #         for r in RIDResult:
    #             if r.split(" ")[1] == node:
    #                 temp += [r]
    result += temp
    if DO_VALIDATION:  # and attribute[0] != 'U' and attribute[0] != 'R':
        if database.validate(result, attribute, True) is False:
            print("Wrong!")
    return result
コード例 #2
0
def problem_2_1(max_M=15):
    sse_M = []
    l_2_norm = []
    for i in range(max_M): 
        x,y = getData('curvefitting.txt')
        w_ols = max_likelihood(x,y,i)
        bishop_plot(x,y,w_ols,'maximum likelihood, M = %s' % i)
        plt.savefig('problem2_ml_M_%s.png' % i)
        sse_M.append(sse(x,y,w_ols))
        l_2_norm.append(sum([ii**2 for ii in w_ols])**0.5)
    plt.close()
    plt.plot(np.array(range(max_M)), np.array(sse_M), label='Sum of Squared Errors')
    plt.savefig('problem2_OLS_SSEvsM.png')
    plt.close()
    plt.plot(np.array(range(max_M)), np.array(l_2_norm), label='L2 Norm of Weight Vector')
    plt.savefig('problem2_OLS_L2_NormvsM.png')
    plt.close()
    x,y = getData('curvefitting.txt')
    indices = np.random.choice(max(x.size, y.size)+1, 2)
    for ii in np.sort(indices)[::-1]:
        x = np.delete(x,ii)
        y = np.delete(y,ii)
    for M in [0,1,3,9]:
        w_ols = max_likelihood(x,y,M)
        bishop_plot(x,y,w_ols,'maximum likelihood w/missing, M = %s' % M)
        plt.savefig('problem2_mlmissing_M_%s.png' % M)
コード例 #3
0
def problem_3_2(A_or_B, plot_individual=False):
    train_x, train_y = getData('regress%s_train.txt' % A_or_B)
    valid_x, valid_y = getData('regress_validate.txt')
    lams = np.linspace(0, 10, 101)
    colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k', '#808080', '#DDDD00', '#800000']
    best_lams = []
    best_sses = []
    for M in range(10):
        errors = []
        for lam in lams:
            error = problem_3_2_case(train_x, train_y, valid_x, valid_y,
                M, lam, A_or_B, plot_individual)
            errors.append(error)
        if not plot_individual:
            plt.plot(lams, errors, color=colors[M], 
                label='M=%s' % M)
        best_lams.append(np.argmin(errors)/10.)
        best_sses.append(min(errors))
    print best_lams
    print best_sses
    if not plot_individual:
        plt.ylim(0, 30 if A_or_B == 'A' else 100)
        plt.title('Ridge regression error by $M$ and $\lambda$, training set %s' % A_or_B)
        plt.xlabel('$\lambda$')
        plt.ylabel('sum of squared error (against validation data)')
        plt.legend()
        plt.savefig('problem3_2_train%s_meta.png' % A_or_B)
        plt.show()
        plt.close()
コード例 #4
0
def rangeQuery(api, start, end):
    TIMESTAMPE = 'Timestamp'
    # print(start, end)
    result = []
    for ts in range(start // SCALE + 1, end // SCALE):
        # print('ts:', ts)
        # temp = api.liststreamkeyitems(TIMESTAMPE, str(ts))['result']
        temp = pointQuery(api, TIMESTAMPE, str(ts))
        # print(temp)
        if temp:
            result += temp
        # print(result)
    temp = api.liststreamkeyitems(TIMESTAMPE, str(start // SCALE))['result']
    # print('in range:\n', *result, sep='\n')
    if temp:
        data = getData(temp)
        # print(data)
        sl = SortedList(data, key=lambda a: a.split(" ")[0])
        result += list(sl.irange(str(start), str(end)))
    temp = api.liststreamkeyitems(TIMESTAMPE, str(end // SCALE))['result']
    if temp:
        data = getData(temp)
        sl = SortedList(data, key=lambda a: a.split(" ")[0])
        # print('end:')
        # print(*list(sl.irange(maximum=str(end))), sep='\n')
        result += list(sl.irange(str(start), str(end)))

    return result
コード例 #5
0
def pointQuery(api, attribute, sort=False, reverse=False):
    result = api.liststreamkeyitems(att_dict[attribute[0]], attribute[1:],
                                    False, MAX_RESULT)
    if DO_VALIDATION:
        if database.validate(getData(result["result"]), attribute,
                             True) == False:
            print("Wrong!")
    return getData(result["result"])
コード例 #6
0
ファイル: baseline3sort.py プロジェクト: Emory-AIMS/idash2018
def pointQuery(api, attribute, sort=True, reverse=False):
    result = api.liststreamkeyitems(
        att_dict[attribute[0]], attribute[1:], False, FILE_SIZE)
    if DO_VALIDATION:
        validate(getData(result["result"]), attribute[1:])
    result = getData(result["result"])
    att_name = att_dict[attribute[0]]
    if type(ATTRIBUTE_TYPE[att_name]) is int and sort:
        result = sortResult(result, att_name, reverse)
    # print(*result, sep='\n')
    # input()
    return result
コード例 #7
0
def pointQuery(api, attribute, value):
    result = getData(
        api.liststreamkeyitems(attribute, value, False, MAX_RESULT)["result"])
    temp = []
    if attribute == "User" or attribute == "Resource":
        for line in result:
            node, ID = line.split(" ")[1:3]
            RIDResult = getData(
                api.liststreamkeyitems('Ref-ID', ID, False,
                                       MAX_RESULT)["result"])
            for r in RIDResult:
                if r.split(" ")[1] == node:
                    temp += [r]
    result += temp
    return result
コード例 #8
0
ファイル: baseline8.py プロジェクト: Emory-AIMS/idash2018
def pointQuery(api, stream, key):
    if stream == "Timestamp":
        txids = getTXids(api, PREFIX + str(SCALE), str(int(key) // SCALE))
        # resulls = api.getstreamitem(DATA, txids[0])
    elif stream == DATA or stream == 'Activity':
        return getData(
            api.liststreamkeyitems(stream, key, False, MAX_RESULT)["result"])
    else:
        txids = getTXids(api, stream, key)
    args = [[DATA, txid] for txid in txids]
    results = api.batch('getstreamitem', args)
    result = []
    for r in results:
        # print(r["result"])
        data = bytes.fromhex(r["result"]["data"]).decode(ENCODE_FORMAT)
        if stream == "Timestamp":
            if data.split(" ")[0] == key:
                result = [data]
                break
                # return data
        else:
            result.append(data)
    # print(result)
    # input()
    # if stream == 'ts1':
    #     stream = "Timestamp"
    # if database.validate(result, stream, key, True) is False:
    # print("Wrong!")
    return result
コード例 #9
0
def update_metrics(n):
    data = util.getData()
    twitter_before = data['original']
    twitter_after = data['text']
    session = data['session']
    #print(twitter_before)
    timenow = [util.convertime(i[0]) for i in session]
    his = [i[1] for i in session]
    sessionnow = dict(zip(timenow, his))

    # print(timenow)
    # print(his)
    style = {'padding': '5px', 'fontSize': '25px'}
    result = []
    result.append(
        html.Span('Before PreProcess: ' + twitter_before, style=style))
    result.append(html.Div())
    result.append(html.Span('After PreProcess: ' + twitter_after, style=style))
    result.append(html.Div())
    for key, value in sessionnow.items():
        result.append(
            html.Span('History Session : ' + str(key) + str(value),
                      style=style))
        result.append(html.Div())

    return result
コード例 #10
0
def main():

    X, Y = getData()
    X, Y = shuffle(X, Y)
    K = len(np.unique(Y))
    N = len(Y)
    T = np.zeros((N, K))

    for i in range(N):
        T[i, Y[i]] = 1  # one hot encoding for targets

    batch_sz = 500
    learning_rate = [10e-5, 10e-6, 10e-7, 10 - 8, 10e-9]
    num_batches = len(learning_rate)
    trainCost = []
    validCost = []
    accValid = []
    accTrain = []
    for i in range(num_batches):
        m = nn.NeuralNetwork(numHiddenLayer=1,
                             numHiddenUnits=200,
                             actFunc="Tanh")
        trainCost[i], validCost[i], accTrain[i], accValid[i] = m.train(
            X, T, epochs=10000, learning_rate=10e-7, reg=10e-7)

    print("Final Train Accuracy {}".format(accTrain))
    print("Final Valid Accuracy {}".format(accValid))
    legend1, = plt.plot(trainCost, label='training error')
    legend2, = plt.plot(validCost, label='validation error')
    plt.legend([legend1, legend2])
    plt.show()
コード例 #11
0
def calEmotion(filename, city):
    # load dict
    most, very, more, insuff, ish, posdict, negdict, over = dict.init_dict()
    jieba.load_userdict("dic/stock_dict.txt")
    # init score
    score = {}
    cur_date = 0
    review_id = 0
    #init excel hander
    w, ws = createExcel()
    # open database & get data
    rawdata, conn, cur = util.getData(filename, city)
    for row in rawdata:
        # collect tokens from yield
        seg_list, cur_date, review = getTokens(score, row, cur_date)
        #use sentiment score to cal the emotion by one comment and add it to score.
        result = dict.sentiment_score_list(seg_list, posdict, negdict, most,
                                           very, more, ish, insuff, over)
        score[cur_date].append(result)
        # write down the review to xls
        review_id = review_id + 1
        ws.write(review_id, 0, review)
        ws.write(review_id, 1, result)
    w.save("excel/" + filename + ".xls")
    # close the connection with database
    util.closeDB(conn, cur)
    # generate the emotionlist from score
    emotionList = genEmotionList(score)
    # show pic of the stock price and emotion by one stock
    showPriceAndEmotion(emotionList, filename, city)
コード例 #12
0
def main():
    X, Y = getData(balance_ones=False)

    # 48by48 = 2304 dimensions per sample

    # Data structure
    # 0-48 one row
    # 48-96 second row and so on

    # infinite loop. Break out of it by via input
    while True:
        # loop through 7 emotions
        for i in range(7):
            x, y = X[Y == i], Y[
                Y ==
                i]  # choose all the data points that equal to this emotion
            N = len(y)  # number of data points equals to this emotion
            j = np.random.choice(N)  # randomly select this point
            plt.imshow(x[j].reshape(48, 48),
                       cmap='gray')  # plot this point. Reshape it
            plt.title(label_map[y[j]])  # plot the label - emotion
            plt.show()
        prompt = input('Quit? Enter Y:\n')
        if prompt == 'Y':
            break
コード例 #13
0
def main():
    Xtrain, Ytrain, Xvalid, Yvalid = getData()

    model = ANN(200)
    model.fit(Xtrain, Ytrain, Xvalid, Yvalid, reg=0, show_fig=True)

    print(model.score(Xvalid, Yvalid))
コード例 #14
0
ファイル: baseline1.py プロジェクト: Emory-AIMS/idash2018
def pointQuery(api, attribute, display=False, validation=DO_VALIDATION):
    result = api.liststreamkeyitems(STREAM, attribute, False, FILE_SIZE)
    if validation:
        validate(getData(result["result"]), attribute[1:])
    if display:
        display(result["result"])
    return result["result"]
コード例 #15
0
ファイル: score.py プロジェクト: frankyzhou/stock
def calEmotion(filename,city):
	# load dict
	most,very,more,insuff,ish,posdict,negdict,over=dict.init_dict()
	jieba.load_userdict("dic/stock_dict.txt")
	# init score
	score = {}
	cur_date = 0
	review_id =0
	#init excel hander
	w, ws = createExcel()
	# open database & get data
	rawdata, conn, cur = util.getData(filename,city)
	for row in rawdata:
		# collect tokens from yield
		seg_list,cur_date, review = getTokens(score,row,cur_date)
		#use sentiment score to cal the emotion by one comment and add it to score.	
		result = dict.sentiment_score_list(seg_list,posdict,negdict,most,very,more,ish,insuff,over)
		score[cur_date].append(result)
		# write down the review to xls
		review_id = review_id +1
		ws.write(review_id,0,review)
		ws.write(review_id,1,result)
	w.save("excel/" + filename + ".xls")
	# close the connection with database
	util.closeDB(conn, cur)
	# generate the emotionlist from score
	emotionList = genEmotionList(score)
	# show pic of the stock price and emotion by one stock
	showPriceAndEmotion(emotionList,filename,city)
コード例 #16
0
def problem_3_1():
    x, y = getData('curvefitting.txt')
    for M in [1, 2, 3]:
        for lam in [0, 0.0003, 0.001, 0.003]:
            w_ridge = polynomial_ridge(x, y, M, lam)
            bishop_plot(x, y, w_ridge, 'ridge regression, M=%s, lam=%s' % (M, lam))
            plt.savefig('problem3_1_M_%s_lam_%s.png' % (M, lam))
            print 'just tried M = %s, lam=%s' % (M, lam)
            print '    sse was %s' % sse(x, y, w_ridge)
コード例 #17
0
def rangeQuery(api, start, end):
    result = []
    stream = att_dict['T']
    timestamps = api.liststreamkeys(stream)["result"]
    sl = SortedList(list(map(int, [key['key'] for key in timestamps])))
    for timestamp in sl.irange(start, end):
        result += getData(
            api.liststreamkeyitems(stream, str(timestamp))['result'])
    return result
コード例 #18
0
def rangeQuery(api, start, end):
    TIMESTAMPE = 'Timestamp'
    result = []
    for ts in range(start // SCALE + 1, end // SCALE):
        temp = pointQuery(api, TIMESTAMPE, str(ts))
        if temp:
            result += temp
    temp = api.liststreamkeyitems(TIMESTAMPE, str(start // SCALE))['result']
    if temp:
        data = getData(temp)
        sl = SortedList(data, key=lambda a: a.split(" ")[0])
        result += list(sl.irange(str(start), str(end)))
    temp = api.liststreamkeyitems(TIMESTAMPE, str(end // SCALE))['result']
    if temp:
        data = getData(temp)
        sl = SortedList(data, key=lambda a: a.split(" ")[0])
        result += list(sl.irange(str(start), str(end)))

    return result
コード例 #19
0
ファイル: baseline1.py プロジェクト: Emory-AIMS/idash2018
def andQuery(api, attributes, display=False):
    resultSet = []
    for attr in attributes:
        # print(getData(pointQuery(api, attr)))
        resultSet.append(set(getData(pointQuery(api, attr))))
    result = resultSet[0]
    for i in range(1, len(resultSet)):
        result &= resultSet[i]
    if display:
        display(result)
コード例 #20
0
ファイル: runner.py プロジェクト: oasis2811623/training
 def __init__(self, inputDir, modelDir,action, **kwargs):
     self.action = action
     self.modelDir=modelDir
     self.inputDir=inputDir
     self.clf =None
     if(self.action=='training'):
         self.mats1, self.spams1, self.mats2, self.spams2 =util.getData(self.inputDir)
     else:
         self.mats2, self.spams2 =util.getPredictData(self.inputDir)
         self.mats1, self.spams1 = None ,  None
         self.clf = joblib.load(self.modelDir)
コード例 #21
0
def update_graph(n):

    data = util.getData()
    # most recent session
    session = data['session'][-1]

    time = session[0]
    count = session[1]

    num = len(count.keys())

    sentiment_number = []
    for i in count.keys():
        sentiment_number.append((count[i][0], count[i][1]))

    labels = ['Positive', 'Negative']

    # generate singel pie format
    piedata = []
    for i in range(num):
        onepie = util.PieOneData(
            [sentiment_number[i][0], sentiment_number[i][1]], labels, i)
        piedata.append(onepie)

    # generate annotations
    annotations = []
    # position:

    if num == 1:
        position = [0.5]
    elif num == 2:
        position = [0.2, 0.8]
    elif num == 3:
        position = [0.15, 0.5, 0.85]
    elif num == 4:
        position = [0.11, 0.375, 0.625, 0.89]

    tmp = 0
    names = []
    for i in count.keys():
        annotations.append(util.subtitle(i, position[tmp]))
        names.append(i)
        tmp += 1

    title = " -- ".join(names)

    #print(piedata)
    #print(annotations)
    layout = util.layout(annotations, num, title)

    fig = util.PieAllData(piedata, layout)

    return fig
コード例 #22
0
def pointQuery(api, attribute, value):
    if attribute == "Timestamp":
        TSResult = getData(api.liststreamkeyitems(
            'Timestamp', str(int(value) // SCALE), False, MAX_RESULT)["result"])
        for tsr in TSResult:
            ts = tsr.split(" ")[0]
            if ts == value:
                return [tsr]
    result = getData(api.liststreamkeyitems(
        attribute, value, False, MAX_RESULT)["result"])
    temp = []
    if attribute == "User" or attribute == "Resource":
        for line in result:
            node, ID = line.split(" ")[1:3]
            RIDResult = getData(api.liststreamkeyitems(
                'Ref-ID', ID, False, MAX_RESULT)["result"])
            for r in RIDResult:
                if r.split(" ")[1] == node:
                    temp += [r]
    result += temp
    return result
コード例 #23
0
def main():
	X, Y = getData(balance_ones=False)

	while True:
		for i in xrange(len(label_map)):
			x, y = X[Y==i], Y[Y==i]
			N = len(y)
			j = np.random.choice(N)
			plt.imshow(x[j].reshape(48,48), cmap='gray')
			plt.title(label_map[y[j]])
		prompt = raw_input('Quit? Enter Y:\n')
		if prompt == 'Y':
			break
コード例 #24
0
def main():
    X, Y = getData(balance_ones=False)

    while (True):
        for i in range(7):
            x, y = X[Y == i], Y[Y == i]
            N = len(y)
            j = np.random.choice(N)
            plt.imshow(x[j].reshape(48, 48), cmap='gray')
            plt.title(labels[y[j]])
            plt.show()
        prompt = input("Quit the program? Y/N\n")
        if prompt == 'Y':
            break
コード例 #25
0
def main():
    X, Y, _, _ = getData(balance_ones=False)

    while True:
        for i in range(7):
            x, y = X[Y == i], Y[Y == i]
            N = len(y)
            j = np.random.choice(N)
            plt.imshow(x[j].reshape(48, 48), cmap='gray')
            plt.title(label_map[y[j]])
            plt.show()
        prompt = input('Quit? Enter Y:\n')
        if prompt.lower().startswith('y'):
            break
コード例 #26
0
def main():
    X, Y = getData(balance_ones=False)

    while True:
        for i in xrange(7):
            x, y = X[Y==i], Y[Y==i]
            N = len(y)
            j = np.random.choice(N)
            plt.imshow(x[j].reshape(48, 48), cmap='gray')
            plt.title(label_map[y[j]])
            plt.show()
        prompt = raw_input('Quit? Enter Y:\n')
        if prompt == 'Y':
            break
コード例 #27
0
def pointQuery(api, stream, key):
    if stream == "Timestamp":
        txids = getTXids(api, PREFIX + str(SCALE), str(int(key) // SCALE))
        # resulls = api.getstreamitem(DATA, txids[0])
    elif stream == DATA or stream == 'Activity':
        return getData(
            api.liststreamkeyitems(stream, key, False, MAX_RESULT)["result"])
    else:
        txids = getTXids(api, stream, key)
    args = [[DATA, txid] for txid in txids]
    results = api.batch('getstreamitem', args)
    result = []
    for r in results:
        # print(r["result"])
        data = bytes.fromhex(r["result"]["data"]).decode(ENCODE_FORMAT)
        if stream == "Timestamp":
            if data.split(" ")[0] == key:
                result = [data]
                break
                # return data
        else:
            result.append(data)
    if stream == "User" or stream == 'Resource':
        # txids = getTXids(api, stream, key)
        nrids = []
        for line in result:
            node, _, rid = line.split(" ")[1:4]
            nrids.append(node + 'R' + rid)
        args = [[NRID, nrid, False, MAX_RESULT] for nrid in nrids]
        temp = api.batch('liststreamkeyitems', args)
        # print(*temp, sep='\n')
        # input()
        ridResults = []
        for r in temp:
            ridResults += r["result"]
        txids = [tx["txid"] for tx in ridResults]
        args = [[DATA, txid] for txid in txids]
        results = api.batch('getstreamitem', args)
        temp = []
        for r in results:
            data = bytes.fromhex(r["result"]["data"]).decode(ENCODE_FORMAT)
            temp.append(data)
        result += temp
        # if database.validate(result, stream, key, True) is False:
        # print("Wrong!")
    return result
コード例 #28
0
ファイル: train.py プロジェクト: fredrikorn/DecFL
def run_standalone(c, w, o):

    config, weights = loadModelFromDisk(c, w)

    x_train, y_train = getData()

    if x_train is None or y_train is None:
        print("Data could not be loaded, abort.", file=sys.stderr)
        sys.exit(1)
    else:
        print("Data loaded")

    print("Starting training")
    new_weights = trainModel(config, weights, x_train, y_train)
    print("Finished training")

    writeUpdatesToDisk(new_weights, o)
コード例 #29
0
def main():

    t0 = datetime.now()

    print("now:  ", t0)

    X, Y = getData()

    dt = datetime.now() - t0

    print("dt:  ", dt)

    M = 10  # hidden units

    model = ANN(M)
    model.fit(X, Y, show_fig=True)
    print(model.score(X, Y))
コード例 #30
0
def problem_2(M):
    x,y = getData('curvefitting.txt')
    w_ols = max_likelihood(x, y, M)
    print w_ols
    low_limit_x = min(x) - 0.1*abs(max(x)-min(x)) 
    hi_limit_x = max(x) + 0.1*abs(max(x)-min(x))
    x_points = np.linspace(low_limit_x, hi_limit_x, 100)
    y_hat = np.array([compute_yhat(pt, w_ols) for pt in x_points])
    low_limit_y = min(y) - 0.2*abs(max(y) - min(y))
    hi_limit_y = max(y) + 0.2*abs(max(y)-min(y))
    plt.plot(x_points, y_hat, '-b', label='Max Likelihood, M='+str(M))
    plt.scatter(x, y, label='Data')
    plt.plot(x_points, np.sin(2*np.pi*x_points), '-', color='0.5', label='Sin(2*pi*x)')
    plt.xlim(low_limit_x, hi_limit_x)
    plt.ylim(low_limit_y, hi_limit_y)
    plt.legend()
    plt.show()
    plt.savefig('Problem2_M_'+str(M)+'.png')    
コード例 #31
0
def pointQuery(api, attribute, sort=False, reverse=False):
    pointers = getPointers(api, attribute)
    result = []
    # print(pointers)
    # input()
    # api.batch(api.liststreamkeyitems, [
    # (DATA, p, False, MAX_RESULT) for p in pointers])
    # for p in pointers:
    # result += getValue(api, p)
    args = [[DATA, p, False, MAX_RESULT] for p in pointers]
    results = api.batch('liststreamkeyitems', args)
    for r in results:
        result.append(getData(r["result"])[0])
    # print(result)
    # if database.validate(result, attribute, True) is False:
    # print("Wrong!")
    # input()
    return result
コード例 #32
0
def main():
    X, Y, Xtest, Ytest = getData()

    print(Y)

    while True:
        for i in range(7):  # loop through labels
            x, y = X[Y == i], Y[
                Y == i]  # select data that corresponds to this label
            print("x, y: ", x, y)
            N = len(y)
            print("N: ", N)
            j = np.random.choice(
                N)  # select a random data point (index) (image)
            plt.imshow(x[j].reshape(48, 48), cmap='gray')
            plt.title(label_map[y[j]])
            plt.show()
        prompt = input('Y to quit:')
        if prompt == 'Y':
            break
コード例 #33
0
ファイル: regression.py プロジェクト: ZUOYANGDING/dpl
def main():
    X, Y = getData()
    model = LogisticModel()
    model.fit(X, Y)
    print(model.score(X, Y))
コード例 #34
0
def main():
    X, Y = getData()
    # X, Y = getBinaryData()
    model = ANN([2000, 1000])
    model.fit(X, Y, show_fig=True)
コード例 #35
0
ファイル: commit.py プロジェクト: lioo717/alitraffic
# -*- coding:utf-8 -*-
import codecs

import numpy as np

from util.getData import *
from regression.standRegression import *

'''
train_data:xtrain
train_label:ytrain
test_data:xpred
test_time:date_time
'''

getdata = getData()

#                 0    1      2     3        4        5           6         7       8
# :return: data [星期, 天气, 最高温, 最低温, 节假日长度, 第i个节假日, 工作日长度, 第i个工作日, date,
#                  9    10    11
#                 小时, 线路, 数量(y)]

# :return: 返回20150101到20150107的测试数据集
#             0    1    2     3     4        5         6         7        8          9
#          [日期, 星期, 天气, 最高温, 最低温, 节假日长度, 第i个节假日, 工作日长度, 第i个工作日, 小时]
#
# """
features =  [1, 2, 3, 4, 5, 7, 9]

data = getdata.get_train_data(min_day="20140901", max_day="20141231", line_num=[6])
train = np.array(data)
コード例 #36
0
def main():
    X, Y = getData()

    model = ANN(200)
    model.fit(X, Y, reg=0, show_fig=True)
    print(model.score(X, Y))
コード例 #37
0
ファイル: kerasnn.py プロジェクト: Alyndre/DeepLearningPython
import requests
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as dts

from keras.models import Sequential
from keras.layers import Dense, Dropout
from util import getData, processData

np.random.seed(7)

if __name__ == "__main__":
    data = getData('2012-01-10', '2017-08-25')

    X_train, X_test, Y_train, Y_test = processData(data)
    print(X_train)

    # nn = NN([(39, 500)], [500, 1])
    # nn.train(X_train, Y_train, X_test, Y_test, 10000)

    model = Sequential()
    model.add(Dense(500, input_shape=(19, ), init='uniform',
                    activation='relu'))
    # model.add(Dense(500, init='uniform', activation='relu'))
    model.add(Dropout(0.25))
    model.add(Dense(400, init='uniform', activation='relu'))
    model.add(Dropout(0.25))
    model.add(Dense(250, init='uniform', activation='relu'))
    model.add(Dropout(0.25))
    model.add(Dense(1, init='uniform', activation='linear'))
コード例 #38
0
import numpy as np
import sys
import json
import util
from sklearn.linear_model import LogisticRegression

C=1.1
mats1, spams1, mats2, spams2 =util.getData(sys.argv[1])

for i in range(3):
	print C
	clf = LogisticRegression(C=C).fit(mats1, spams1)
	predict1 = clf.predict(mats1)
	predict2 = clf.predict(mats2)
	print util.validation(predict1, spams1), util.validation(predict2, spams2)
	C=C+0.5
コード例 #39
0
def main():
    X, Y = getData()
    
    model = LogisticModel()
    model.fit(X, Y, show_fig=True)
    print(model.score(X, Y))
コード例 #40
0
def main():
    Xtrain, Ytrain, Xvalid, Yvalid = getData()
    print("read data..........")
    model = ANN([2000, 1000, 500])
    model.fit(Xtrain, Ytrain, Xvalid, Yvalid, show_fig=True)
コード例 #41
0
ファイル: FeatureTest.py プロジェクト: lioo717/alitraffic
# -*- coding:utf-8 -*-
__author__ = 'HeYongxing'

import numpy as np
from matplotlib.pylab import show
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from util.getData import *


# 十号线路
# [星期, 天气, 最高温, 最低温, 小时, 线路, 数量(y), datetime]
getData = getData()

data = getData.get_train_data(min_day="20140901", max_day="20141231", line_num=[15])
train = np.array(data)
xtrain = train[:, 0:-3]
xdate = train[:, -1]
ytrain = train[:, -2]

# fig = figure()
# ax1 = fig.add_subplot(1, 1, 1)
# ax1.scatter(xtrain[:, 0], ytrain, c='red',
#             alpha=0.3, edgecolors='none')
# ax1.set_xlabel(u'星期')
# ax1.grid(True)
# show()
#

#
# fig = figure()
コード例 #42
0
ファイル: app.py プロジェクト: yemyat89/MemoryLane
from random import shuffle
from flask import (Flask, request, make_response, jsonify, redirect)
from util import getData, getData2
import os

app = Flask(__name__)

songs, movies = getData()
_t = getData2()
songs.update(_t)

song_indices = {}
for v in songs.itervalues():
	for each_song in v:
		index = str(each_song[1].split()[-1][1:-1])
		label = ' '.join(each_song[1].split()[:-1])
		song_indices[index] = label


movie_indices = {}
for v in movies.itervalues():
	for each_movie in v:
		index = str(each_movie[1].split()[-1][1:-1])
		label = ' '.join(each_movie[1].split()[:-1])
		movie_indices[index] = label


@app.route('/')
def index():
	return make_response(open('templates/index.html').read())
コード例 #43
0
def main():
    X, Y = getData()
    
    model = ANN(200)
    model.fit(X, Y, reg=0, show_fig=True)
    print(model.score(X, Y))
コード例 #44
0
def main():
    X, Y = getData()
    # X, Y = getBinaryData()
    model = ANN([2000, 1000, 500])
    model.fit(X, Y, show_fig=True)
コード例 #45
0
ファイル: Main.py プロジェクト: samuelwshen/Campus-Pathing
Calculates walking distances between a random sampling of buildings and compares to optimal
straight-line path

Sam Shen
"""

import json, util, sys
import networkx as nx
from decimal import *
from osmread import Node

FIRST_RUN = False
SAMPLE_REPEAT_SIZE = 3 #the number of times to sample
SAMPLE_SIZE_SQRT = 8   #the squareroot of the sample size

graph, nodes = util.init_graph(util.getData('../data/berkeley_map.osm'))
print("Initialized graph...")
buildings = json.load(open("../data/buildings.json"))

#only have to run this part once
if FIRST_RUN:
    util.batch_discretize(nodes, buildings, '../data/discrete_locs.txt')


str_coords = []         #list of string coordinate pairs
dec_coords = set()      #set of decimal coordinate pairs
discrete_nodes = []     #the nodes that represent discrete buildings
try:
    file = open("../data/discrete_locs.txt")
except:
    print("Did you download discrete_locs.txt OR run batch discretizing by setting FIRST_RUN to true?")