Example #1
0
def main():
    plt.close("all")
    print "Welcome to my data science project" 
    pickleFileName = 'billData.pickle'
    if REBUILD_DATA or not os.path.isfile(pickleFileName):
        print 'Loading data from json files'
        ld.loadData(pickleFileName)
        
    else:
        print 'Loading previously saved data'
    
    pickleFile = open(pickleFileName, 'rb')
    bills = pickle.load(pickleFile)
    (X,y,z) = getFeatures(bills)
    print 'Got data with: ' + str(len(X[:,0])) + ' observations'
    getPassedPercent(y)
    
    # try kfolds
    crossValidate(X,y)
    
    # look at feature distributions
    labels = ['Sponsor party', 'Number of cosponsors', '% Democratic Cosponsors', '% Republican Cosponsors', 'Numerical Category', 'Party Control', 'President Party']
    getFeatureDistributions(X, labels)
    
    # train, leaving out most recent congress (113), and get confusion matrix
    cm = trainAndTest(X,y,z)
    targetNames = ['fail', 'pass']
    plotConfusionMatrix(cm, targetNames)
Example #2
0
def load_data():
    #custom Mask
    loadImg = loadData('../../pairImg/images', '../../pairImg/newMasks',
                       '../../pairImg/train.csv', 0.8)
    #binary mask
    #loadImg = loadData('../../pairImg/images', '../../pairImg/masks', '../../pairImg/train.csv', 0.8)

    x_trainN = loadImg.trainN
    x_testN = loadImg.testN
    x_trainP = loadImg.trainP
    x_testP = loadImg.testP
    '''
    s = x_test[:10]
    for i in range(len(s)):
        img2avg = s[i]
        plt.imshow(img2avg)
        plt.savefig('vae_mlp/img'+str(i)+'.png')
    '''
    y_trainN = np.ones(800)
    y_testN = np.ones(200)
    y_trainP = np.ones(800)
    y_testP = np.ones(200)

    return (x_trainN, y_trainN), (x_testN, y_testN), (x_trainP,
                                                      y_trainP), (x_testP,
                                                                  y_testP)
def main():
    #ex1data1.
    train_data = loadData('data/ex1data2.txt')
    #evaluateModels(train_data)
    tetas, mean, std_dev, predicted_y = gradient_descent_linear_regression(
        train_data, 0.5)
    test_data = loadData('data/testdata.txt')
    test_data = normalize_test_data(test_data, mean, std_dev)
    test_data = add_bias_term_in_data(test_data)
    print("Start predicting new instances")
    for i in range(len(test_data)):
        predicted_value = predict_instance(test_data[i], tetas)
        print("value of instance %(key1)s is %(key2)s" % {
            'key1': i + 1,
            'key2': predicted_value
        })
def main():

    #dataset = "twoDcurve"
    dataset = "mnist"
    #dataset = "cifar10"
    #dataset = "imageNet"

    filename1 = "2442_original_as_7_with_confidence_0.999989330769.png"
    filename2 = "2442_7_modified_into_3_with_confidence_0.509171962738.png"

    imageNetPath1 = "%s/%s" % (directory_pic_string, filename1)
    imageNetPath2 = "%s/%s" % (directory_pic_string, filename2)

    image1 = NN.readImage(imageNetPath1)
    image2 = NN.readImage(imageNetPath2)

    k, euclideanDistance = diffImage(image1, image2)
    print "%s input elements are changed." % (k)
    print("The Euclidean distance is %s" % (euclideanDistance))

    model = loadData()

    (class1, confidence1) = NN.predictWithImage(model, image1)
    classStr1 = dataBasics.LABELS(int(class1))
    print "the class for the first image is %s (%s) with confidence %s" % (
        class1, classStr1, confidence1)

    (class2, confidence2) = NN.predictWithImage(model, image2)
    classStr2 = dataBasics.LABELS(int(class2))
    print "the class for the first image is %s (%s) with confidence %s" % (
        class2, classStr2, confidence2)

    return 0
Example #5
0
def main():
    ds = loadData.loadData()
    iterator = ds.make_one_shot_iterator()
    batch_images, batch_labels = iterator.get_next()

    L1 = network.conv1(batch_images)
    L1 = network.pool1(L1)
    L1out = network.bn1(L1)
    L2 = network.conv2(L1out)
    L2 = network.bn2(L2)
    L2out = network.pool2(L2)
    L3out = network.fc1(L2out)
    L4out = network.fc2(L3out)
    L5out = network.final(L4out)

    count = 0
    start_time = time.time()
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        while True:
            count += 1
            tom = sess.run(L5out)
            print(tom.shape)
            if count == 1: break
    print("Duration: {:,.0f} (minutes)".format(
        (time.time() - start_time) / 60))
Example #6
0
def main():

    model = loadData()

    plt.axis('off')

    if whichMode == "train": return
    if trainingModel == "autoencoder":
        (model, autoencoder) = model
        if startLayer == -1: autoencoder = model
    else: autoencoder = model

    # finding adversarial examples from original model
    dc = dataCollection(
        "%s_%s_%s" %
        (startIndexOfImage, dataProcessingBatchNum, manipulations[0]))
    succNum = 0
    for i in range(dataProcessingBatchNum):
        for player_mode in twoPlayer_mode:
            dc.addComment("%s -- %s\n" % (i, player_mode))
            re = handleOne(model, autoencoder, startIndexOfImage + i,
                           manipulations[0], dc, player_mode)
        if re == True: succNum += 1
    dc.addSuccPercent(succNum / float(dataProcessingBatchNum))
    dc.provideDetails()
    dc.summarise()
    dc.close()
def tuneThreshold():
    """
        Explore different values of threshold to see which one fits best
    """
    thresholds = np.linspace(0.4,0.6, 10)
    
    bestAcc = 0.0
    bestModel = None
    X_tr, y_tr, w_tr = loadData()
    m, n = X_tr.shape
    for th in thresholds:
        model = LogisticRegression(features=['PRI_tau_eta',
                                            'PRI_lep_eta',
                                            'DER_deltar_tau_lep',
                                            'PRI_met_sumet',
                                            'DER_mass_transverse_met_lep'],
                                    threshold=th)
        model.train(X_tr, y_tr, w_tr)
        p, r = model.predict(X_tr)
        #calculate some accuracy on the same train set
        acc =  100.0*(p.flatten() == y_tr.flatten()).sum()/m
        print "%s %s%%"%(th, acc)
        if acc > bestAcc:
            bestAcc = acc
            bestModel = model
    
    #save the best model
    bestModel.save('data/logisticRegression%.2f.txt'%acc)
Example #8
0
def weightAnalysis():  # 分析不同用户的评价数
    [[m_row, m_col, m_val], implicit] = loadData()
    length = len(m_row)
    i = 0
    rateNum = np.zeros(10)  # rateNum[i]代表给了小于i*1000个评价的用户数
    rateNumLess1000 = np.zeros(10)  # 对于小于1000评价的,以100为一个分层
    rateNumLess10 = 0  # 评论数小于10的用户
    rateNumEqual1 = 0  # 评论数为1的用户
    while i < length:  # 遍历所有用户
        if m_val[i] == 0:  # 隐含信息,不统计
            i += 1
            continue
        rateCnt = 0  # 评价数计算
        for j in range(i, length):  # 遍历该用户的评价
            if m_row[i] != m_row[j]:  # 该用户遍历完毕
                break
            if m_val[j] == 0:  # 隐含信息,不统计
                continue
            rateCnt += 1
        if rateCnt == 1:
            rateNumEqual1 += 1
        if rateCnt <= 10:
            rateNumLess10 += 1
        if rateCnt <= 1000:
            rateNumLess1000[int(rateCnt / 100)] += 1
        else:
            rateNum[int(rateCnt / 1000)] += 1
        for j in range(i, length):  # 跳到下一个用户
            if m_row[i] != m_row[j]:
                i = j
                break
            if j == length - 1:
                i = length
    return [rateNumEqual1, rateNumLess10, rateNumLess1000, rateNum]
Example #9
0
def main():
    #ex1data1.
    train_data = loadData('data/ex1data1.txt')
    #evaluateModels(train_data)
    plotDatain2D(train_data, [])
    predicted_y, tetas = gradient_descent_linear_regression(train_data, 0.01)
    actual_y = [row[-1] for row in train_data]
    print("MAE %(key1)s" % {'key1': compute_MAE(actual_y, predicted_y)})
    print("MSE %(key1)s" % {'key1': compute_MSE(actual_y, predicted_y)})
    print("RMSE %(key1)s" % {'key1': compute_RMSE(actual_y, predicted_y)})
Example #10
0
def main():
    #ex1data1.
    train_data = loadData('data/ex1data2.txt')
    #evaluateModels(train_data)
    tetas = training(train_data)
    predicted_y = predict_test_data(train_data,tetas)
    actual_y = [row[-1] for row in train_data]
    print("MAE %(key1)s"%{'key1':compute_MAE(actual_y,predicted_y)})
    print("MSE %(key1)s"%{'key1':compute_MSE(actual_y,predicted_y)})
    print("RMSE %(key1)s"%{'key1':compute_RMSE(actual_y,predicted_y)})
Example #11
0
def main():
    '''
    When this file is called, one training data file path should be given as argument.
    '''
    print('Number of arguments:', len(sys.argv), 'arguments.')
    print('Argument List:', str(sys.argv))
    if len(sys.argv) == 1:
        print("Please give the training data file.")
    else:
        file_path = sys.argv[1]
        nclass, dimension, labels, images = ld.loadData(file_path)
        u, delta_2, p = estimators(nclass, dimension, labels, images)
        usps_d_param(nclass, dimension, u, delta_2, p)
Example #12
0
def main():
    print('Number of arguments:', len(sys.argv), 'arguments.')
    print('Argument List:', str(sys.argv))
    if len(sys.argv) == 1:
        print("Please give the parameters and testing files.")
    elif len(sys.argv) == 2:
        print("Please give one more file.")
    else:
        d, u, delta_2, p = ld.getParameters(sys.argv[1])
        nclass, dimension, real_labels, testing_images = ld.loadData(sys.argv[2])
        testing_labels, error_rate, con_matrix= classifer(nclass, dimension, u, delta_2, p, real_labels, testing_images)
        usps_d_error(error_rate)
        usps_d_cm(con_matrix,nclass)
Example #13
0
def test(epochs, denoising=False, batch_size=32, num_hidden=100):
    train_x, train_y, test_x, test_y, valid_x, valid_y = loadData.loadData()
    auto_encoder = AutoEncoder(train_x, valid_x, test_x, num_hidden=num_hidden)
    auto_encoder.train(epochs,
                       learning_rate=0.1,
                       batch_size=batch_size,
                       denoising=denoising)
    # hp.visualize_parameter(auto_encoder.W, name="DenoisingAutoEncoderWn30k5")
    hp.plotCrossEntropyError(auto_encoder.train_entropy,
                             auto_encoder.validation_entropy,
                             name="k5")
    hp.save_param(auto_encoder.W, "WAuto" + str(num_hidden) + str(denoising))
    hp.save_param(auto_encoder.b, "bAuto" + str(num_hidden) + str(denoising))
    hp.save_param(auto_encoder.c, "cAuto" + str(num_hidden) + str(denoising))
Example #14
0
def build_model(stock):
    print "Running...this may take a few minutes."
    print "Initializing regression on 5-year historical data..."
    ld.fetchData(str(stock))
    price_data, vol_data = ld.loadData(CSV)
    total = 0
    plot_total = []
    prev_action = 'sell'
    count = 100
    while prev_action == 'sell':
        prev_action = tr.train(MID_TERM, count, price_data, vol_data)
        prev_amt = price_data[count]
        count += 1
    plot_total.append((price_data[count - 1], prev_action))
    for i in range(count, len(price_data) - MID_TERM, 30):
        action = tr.train(MID_TERM, i, price_data, vol_data)
        if action == 'buy':
            if action == prev_action:
                total += price_data[i] - prev_amt
                plot_total.append((price_data[i], 'hold'))
            else:
                plot_total.append((price_data[i], action))
            prev_amt = price_data[i]
            prev_action = action
        elif action == 'sell':
            if action != prev_action:
                total += price_data[i] - prev_amt
                plot_total.append((price_data[i], action))
            else:
                plot_total.append((price_data[i], 'hold'))
            prev_action = action
        # print "Action at time " + str(i) + " : " + action
    print "*********************"
    print "Total return: ", total
    print "Best action now: ", qt.getBestStock(MID_TERM, len(price_data))
    count = 0
    plt.clf()
    plt.plot()
    for point in plot_total:
        t, a = point
        plt.plot(count, t, get_color(a), zorder=1)
        count += 1
    plt.plot(range(len(plot_total)), [x[0] for x in plot_total],
             'b--',
             zorder=2,
             label='Price')
    plt.suptitle(stock)
    plt.legend(loc='upper left')
    plt.show()
Example #15
0
def bagging_ELM(name,
                numberofHiddenNeurons,
                Type='W1',
                C=64,
                ActivationFunction='sig'):
    train, test = loadData(name)
    shapeOfAnswer = []
    numOfBaseClasser = 10
    trainStr = ELMDataStruct(train)
    testStr = ELMDataStruct(test)
    beginTrainTime = time()
    for i in range(numOfBaseClasser):
        print('Begin %d th train' % (i + 1))
        baggingTrain = dataBagging(trainStr)
        baggingTrainStr = ELMDataStruct(baggingTrain)
        answer = WELM(numberofHiddenNeurons,
                      baggingTrainStr,
                      testStr,
                      Type,
                      ActivationFunction,
                      C,
                      baseclasser=True)
        if i == 0:
            answerMatrix = answer
            shapeOfAnswer = shape(answer)
        else:
            answerMatrix = column_stack((answerMatrix, answer))
    outputAnswer = zeros((shapeOfAnswer))
    endTrainTime = time()
    trainTime = endTrainTime - beginTrainTime

    #matrix2CSV_Once(answerMatrix,[])
    for j in range(shapeOfAnswer[0]):
        voteAnswer = 1
        maxVoteNum = 0
        for k in range(trainStr.numOfClass):
            voteNum = sum(answerMatrix[j, :] == (k + 1))
            if voteNum > maxVoteNum:
                maxVoteNum = voteNum
                voteAnswer = k + 1
        outputAnswer[j] = voteAnswer
    #print(outputAnswer)
    #input()
    acc = accuracy(answer, testStr.y)
    print('-' * 20, 'Bagging result', '-' * 20)
    print('Bagging trainTime:', trainTime)
    gmean, Rn = G_mean(answer, testStr.y, testStr.numOfClass)
    print('-' * 20, 'Bagging result', '-' * 20)
    return acc, gmean, Rn, trainTime
Example #16
0
def test(epoch, k, batch_size=32, num_hidden=100):
    train_x, train_y, test_x, test_y, valid_x, valid_y = loadData.loadData()
    rbm = RBM(train_x,
              input_validation=valid_x,
              input_test=test_x,
              batch_size=batch_size,
              n_hidden=num_hidden)
    rbm.train(epoch, k=k)
    # hp.visualize_parameter(rbm.W, name="Wn30k5")
    hp.plotCrossEntropyError(rbm.train_entropy,
                             rbm.validation_entropy,
                             name="k5")
    hp.save_param(rbm.W, "W" + str(num_hidden))
    hp.save_param(rbm.b, "b" + str(num_hidden))
    hp.save_param(rbm.c, "c" + str(num_hidden))
Example #17
0
 def _build_inver_table(self):
     """
     倒排表
     :return:
     """
     doc = loadData(self.fin, sep=',')
     count = 0
     for d in doc:
         print(count)
         count += 1
         UserID = d[0]
         if UserID not in self.userItems:
             self.userItems[UserID] = set()
         MovieID = d[1]
         self.userItems[UserID].add(MovieID)
Example #18
0
def main():
    #ex1data1.
    train_data = loadData('data2/ex2data2.txt')
    legends = ['y = 1', 'y = 0']
    titles = [
        "Microchip Test 1", "Microchip Test 1", "Scatter Plot of training data"
    ]
    plotTrainingData(train_data, [], titles, legends)
    feature_vactor = expand_features(train_data, 6)
    tetas = gradient_descent_logistic_regression(train_data, feature_vactor, 1,
                                                 1)
    tetas = gradient_descent_logistic_regression(train_data, feature_vactor, 1,
                                                 0)
    tetas = gradient_descent_logistic_regression(train_data, feature_vactor, 1,
                                                 100)
Example #19
0
def main():

    model = loadData()

    if whichMode == "train": return
    if trainingModel == "autoencoder":
        (model, autoencoder) = model
        if startLayer == -1: autoencoder = model
    else: autoencoder = model

    # initialise a dataCollection instance
    phase = "firstRound"

    # finding adversarial examples from original model
    handleOne(model, autoencoder, phase, startIndexOfImage,
              dataProcessingBatchNum, firstRound_manipulations[0])
Example #20
0
File: DLV.py Project: msakai/DLV
def main():

    model = loadData()
    dc = dataCollection()

    # handle a set of inputs starting from an index
    succNum = 0
    for whichIndex in range(startIndexOfImage,
                            startIndexOfImage + dataProcessingBatchNum):
        print "\n\nprocessing input of index %s in the dataset: " % (
            str(whichIndex))
        succ = handleOne(model, dc, whichIndex)
        if succ == True: succNum += 1
    dc.addSuccPercent(succNum / float(dataProcessingBatchNum))
    dc.provideDetails()
    dc.summarise()
    dc.close()
Example #21
0
def main():    
    bits = 28
    
    #load data
    X_tr, y_tr, w_tr = loadData()                          
    plotDistribution(X_tr, y_tr, w_tr)
    
    #select some features for plotting
    sel_features = [
    features.index('PRI_tau_eta'),
    features.index('PRI_lep_eta'),
    features.index('DER_deltar_tau_lep'),
    features.index('PRI_met_sumet'),
    features.index('DER_mass_transverse_met_lep')]
    #and make all 2D combinations possible
    for f1, f2 in itertools.combinations(sel_features, 2):
        plot2DFeatures(X_tr, y_tr, w_tr, f1, f2, th=0.0)
Example #22
0
def main():
    #ex1data1.
    train_data = loadData('data2/ex2data1.txt')
    legends = ['Admitted', 'Not Admitted']
    titles = ["Exam 1 Score", "Exam 2 Score", "Scatter Plot of training data"]
    plotTrainingData(train_data, [], titles, legends)
    #train_data = [[0,0,0],[0,1,1],[1,0,1],[1,1,1]]
    tetas, mean, std_dev = gradient_descent_logistic_regression(
        train_data, 0.8)
    test_data = [[45, 85, 1]]
    test_data = normalize_test_data(test_data, mean, std_dev)
    test_data = add_bias_term_in_data(test_data)
    for i in range(len(test_data)):
        predicted_value = predict_instance(test_data[i], tetas)
        print("Probability of Test Example is %(key2)s" % {
            'key1': i + 1,
            'key2': predicted_value
        })
def main():

    with tf.Session() as sess:

        model = loadData()

        if whichMode == "train": return
        if trainingModel == "autoencoder":
            (model, autoencoder) = model
            if startLayer == -1: autoencoder = model
        else: autoencoder = model

        images = []
        labels = []

        for i in range(dataProcessingBatchNum):

            imageIndex = startIndexOfImage + i

            image = NN.getImage(model, imageIndex)
            (originalClass,
             originalConfident) = NN.predictWithImage(model, image)
            if dataset == "imageNet":
                label = np.zeros(NN.nb_classes)
                label[originalClass] = 1
            else:
                label = NN.getLabel(model, imageIndex)

            # keep information for the original image
            origClassStr = dataBasics.LABELS(int(originalClass))
            path0 = "%s/%s_original_as_%s_with_confidence_%s.png" % (
                directory_pic_string, imageIndex, origClassStr,
                originalConfident)
            dataBasics.save(-1, np.squeeze(image), path0)
            print(np.max(image), np.min(image), image.shape)

            images.append(image - 0.5)
            labels.append(label)

        end_vars = tf.global_variables()
        test_attack(sess, model, np.array(images), np.array(labels))

    return
def trainWithRealData():
    """
        Test with the real-deal
    """
    X_tr, y_tr, w_tr = loadData()
    m, n = X_tr.shape
    model = LogisticRegression(features=['PRI_tau_eta',
                                        'PRI_lep_eta',
                                        'DER_deltar_tau_lep',
                                        'PRI_met_sumet',
                                        'DER_mass_transverse_met_lep'])
    #tune parameters later.
    model.train(X_tr, y_tr, w_tr)
    p, r = model.predict(X_tr)
    #calculate some accuracy on the same train set
    acc =  100.0*(p.flatten() == y_tr.flatten()).sum()/m
    print "%s%%"%acc
    #save the model
    model.save('data/logisticRegression%.2f.txt'%acc)
Example #25
0
  def __init__(self,  num_class = 14, limit = None):
    self.num_class = num_class
    self.iteration = 0

    # data
    self.data = loadData.loadData(limit = limit).getDataArray()
    print "number of training data:", len(self.data)

    # model parameters
    # for each pixel in each class, there are: 
      # 1. pi representing probability of the class
      # 2. alpha representing probability of being foreground
      # 3. mu and sigma representing gaussian of observation vs fg/bg data
    self.param_pi = np.array([1.0/self.num_class] * self.num_class)
    self.param_alpha = np.zeros((self.num_class, len(self.data[0]))) + 0.5
    self.param_mu = np.random.random((self.num_class, len(self.data[0]))) 
    self.param_sigma = np.ones((self.num_class, len(self.data[0]))) * 10

    # visualizer
    self.visualizer = visualizer.visualizer()
Example #26
0
def main():
    pd.set_option('display.max_columns', None)
    plt.rcParams['figure.figsize'] = (20, 20)
    ensenyament = "G1042"
    path = 'recommenderItemBL/' + ensenyament

    datas = loadData(ensenyament)
    primer = datas[0]
    segon = datas[1]
    lbl2 = datas[2]

    primer.fillna(value=5.0, inplace=True)
    segon.fillna(value=5.0, inplace=True)

    preds_eval1 = evalRecommender(primer, segon)
    #===========================================================================
    # evalRecommender2(primer, segon)
    #===========================================================================
    plotScatter(preds_eval1, "whitegrid", ensenyament, path, lbl2, primer,
                segon)
Example #27
0
def biasAnalysis():  # 分析有多少用户有偏置
    [[m_row, m_col, m_val], implicit] = loadData()
    length = len(m_row)
    posCnt, negCnt = 0, 0  # 偏好好评和偏好差评的人数
    minNum = 5  # 至少需要minNum个评价才能认为有偏置
    i = 0
    while i < length:  # 遍历所有用户
        if m_val[i] == 0:  # 隐含信息,不统计
            i += 1
            continue
        if m_val[i] >= 4:  # 用户偏好好评
            flag = True
            num = 1
            for j in range(i + 1, length):  # 遍历该用户的剩余评价
                if m_row[i] != m_row[j]:  # 该用户遍历完毕
                    break
                if m_val[j] < 4 and m_val[j] > 0:  # 该用户不偏好好评
                    flag = False
                    break
                num += 1
            if flag and num >= minNum:
                posCnt += 1
        elif m_val[i] <= 2 and m_val[i] > 0:  # 用户偏好差评
            flag = True
            num = 1
            for j in range(i + 1, length):  # 遍历该用户的剩余评价
                if m_row[i] != m_row[j]:  # 该用户遍历完毕
                    break
                if m_val[j] > 2:  # 该用户不偏好差评
                    flag = False
                    break
                num += 1
            if flag and num >= minNum:
                negCnt += 1
        for j in range(i, length):  # 跳到下一个用户
            if m_row[i] != m_row[j]:
                i = j
                break
            if j == length - 1:
                i = length
    return [m_row[-1], posCnt, negCnt]
Example #28
0
def main():

    model = loadData()
    dc = dataCollection()

    # handle a set of inputs starting from an index
    if dataProcessing == "batch":
        for whichIndex in range(startIndexOfImage,
                                startIndexOfImage + dataProcessingBatchNum):
            print "\n\nprocessing input of index %s in the dataset: " % (
                str(whichIndex))
            if task == "safety_check":
                handleOne(model, dc, whichIndex)
    # handle a sinextNumSpane input
    else:
        print "\n\nprocessing input of index %s in the dataset: " % (
            str(startIndexOfImage))
        if task == "safety_check":
            handleOne(model, dc, startIndexOfImage)
    if dataProcessing == "batch":
        dc.provideDetails()
        dc.summarise()
    dc.close()
Example #29
0
def loadData(params, newRun):
    # load or restore data
    print("start loading data, time: {}".format(time.ctime()))
    if params['loadBlob'] is not None:
        img_train, l_train, f_train, \
            img_val, l_val, f_val = ld.restoreData(params['loadBlob'])
    elif newRun:
        img_train, l_train, f_train, \
            img_val, l_val, f_val = ld.loadData(params['out_dir'],
                                                         params)
    else:
        img_train, l_train, f_train, \
            img_val, l_val, f_val = ld.restoreData(params['out_dir'])

    print("end loading data, time: {}".format(time.ctime()))
    print("Train images shape", img_train.shape, l_train.shape)
    print("Train images min/max", img_train.min(), img_train.max())
    print("Train images data type  ", img_train.dtype)
    for i in range(len(img_val)):
        print("Val  images shape", img_val[i].shape, l_val[i].shape)
        print("Val  images data type  ", img_val[i].dtype)

    return img_train, l_train, f_train, img_val, l_val, f_val
Example #30
0
def shortenData():
    workbook = load_workbook("./Data/dataGathering/tokenizedReducedData.xlsx")
    sheet = workbook.active

    data, labels = ld.loadData()
    numericalData = ld.loadNumericalTags()
    tags0 = ld.getTags(0)
    tags1 = ld.getTags(1)
    tags2 = ld.getTags(2)
    tags3 = ld.getTags(3)
    tags4 = ld.getTags(4)
    tags5 = ld.getTags(5)

    shortenedLabels, shortenedNumericalData, shortenedTags0, shortenedTags1, shortenedTags2, shortenedTags3, shortenedTags4, shortenedTags5 = [], [], [], [], [], [], [], []

    for i in range(0, len(labels)):
        if (labels[i] == "n" or labels[i] == "l" or labels[i] == "o"):
            shortenedLabels.append(labels[i])
            shortenedNumericalData.append(numericalData[i])
            shortenedTags0.append(tags0[i])
            shortenedTags1.append(tags1[i])
            shortenedTags2.append(tags2[i])
            shortenedTags3.append(tags3[i])
            shortenedTags4.append(tags4[i])
            shortenedTags5.append(tags5[i])

    for i in range(0, len(shortenedLabels)):
        sheet.cell(row=i + 2, column=1).value = shortenedLabels[i]
        sheet.cell(row=i + 2, column=2).value = shortenedNumericalData[i]
        sheet.cell(row=i + 2, column=3).value = shortenedTags0[i]
        sheet.cell(row=i + 2, column=4).value = shortenedTags1[i]
        sheet.cell(row=i + 2, column=5).value = shortenedTags2[i]
        sheet.cell(row=i + 2, column=6).value = shortenedTags3[i]
        sheet.cell(row=i + 2, column=7).value = shortenedTags4[i]
        sheet.cell(row=i + 2, column=8).value = shortenedTags5[i]

    workbook.save("./Data/dataGathering/tokenizedReducedData.xlsx")
Example #31
0
def run(sample):
    # for missing value imputation

    # load data
    trainData, target = loadData('data', 'SalePrice')

    # get neighborhood geographical coordinates and valence bins
    if not os.path.isfile('neighborhood.json'):
        neighborhoods = prepNeighbors(
            trainData,
            target,
            bins=[0, 100000, 150000, 200000, 250000, 300000, np.inf])
    else:
        with open('neighborhood.json', 'r') as f:
            neighborhoods = json.load(f)

    missingVal = np.nan

    #for price prediction

    #load model
    modelFile = 'RandomCVModel.rfmdl'
    if os.path.isfile(modelFile):
        model = pickle.load(open(modelFile, 'rb'))
    else:
        raise IOError(
            'file {} could not be found.\n Specify directory and make sure file exists'
        )

    price, imputedData = makePrediction(trainData, neighborhoods, sample,
                                        model, missingVal)
    imputedData.to_csv('processed_user_input.csv',
                       sep=',',
                       index=False,
                       header=True)
    return (price, imputedData)
Example #32
0
def plusTest():
    # Reading data
    [m, implicit] = loadData()
    # Reading finished
    # Split data
    dimMax = 10**3
    m1 = dataSplit(m, [dimMax, dimMax])
    [m1_train, m1_test] = getTrainTest(m1)
    m1 = dataSplit(m, [2 * dimMax, 2 * dimMax])
    [m2_train, m2_test] = getTrainTest(m1)
    m1 = dataSplit(m, [3 * dimMax, 3 * dimMax])
    [m3_train, m3_test] = getTrainTest(m1)
    m1_train[3] = dimMax
    m1_train[4] = dimMax
    m2_train[3] = 2 * dimMax
    m2_train[4] = 2 * dimMax
    m3_train[3] = 3 * dimMax
    m3_train[4] = 3 * dimMax
    implicit1 = implicitSplit(implicit, [dimMax, dimMax])
    implicit2 = implicitSplit(implicit, [2 * dimMax, 2 * dimMax])
    implicit3 = implicitSplit(implicit, [3 * dimMax, 3 * dimMax])

    [p1, q1, sigma1, b_user1, b_item1, y] = SVDplus(m1_train, 0.0001, 10, 0.1,
                                                    implicit1)
    [p2, q2, sigma2, b_user2, b_item2, y] = SVDplus(m2_train, 0.0001, 10, 0.01,
                                                    implicit2)
    [p3, q3, sigma3, b_user3, b_item3, y] = SVDplus(m3_train, 0.0001, 10, 0.01,
                                                    implicit3)
    RMSE_SVDplus = [0, 0, 0]
    RMSE_SVDplus[0] = computeRMSEplus(m1_test, p1, q1, sigma1, b_user1,
                                      b_item1, y, implicit1)
    RMSE_SVDplus[1] = computeRMSEplus(m2_test, p2, q2, sigma2, b_user2,
                                      b_item2, y, implicit2)
    RMSE_SVDplus[2] = computeRMSEplus(m3_test, p3, q3, sigma3, b_user3,
                                      b_item3, y, implicit3)
    print(RMSE_SVDplus)
Example #33
0
def newTest():
    # Reading data
    [m, implicit] = loadData()
    # Reading finished
    # Split data
    dimMax = 10**3
    m1 = dataSplit(m, [dimMax, dimMax])
    [m1_train, m1_test] = getTrainTest(m1)
    m1 = dataSplit(m, [2 * dimMax, 2 * dimMax])
    [m2_train, m2_test] = getTrainTest(m1)
    m1 = dataSplit(m, [3 * dimMax, 3 * dimMax])
    [m3_train, m3_test] = getTrainTest(m1)
    implicit1 = implicitSplit(implicit, [dimMax, dimMax])
    implicit2 = implicitSplit(implicit, [2 * dimMax, 2 * dimMax])
    implicit3 = implicitSplit(implicit, [3 * dimMax, 3 * dimMax])
    # Split finished

    # Funk-SVD ###### funkSVD(m, a, maxK, eps)
    [p1, q1] = funkSVD(m1_train, 0.1, 50, 1e-5)
    [p2, q2] = funkSVD(m2_train, 0.1, 50, 1e-5)
    [p3, q3] = funkSVD(m3_train, 0.1, 50, 1e-5)
    RMSE_funkSVD = [0, 0, 0]
    RMSE_funkSVD[0] = computeRMSE(m1_test, p1, q1)
    RMSE_funkSVD[1] = computeRMSE(m2_test, p2, q2)
    RMSE_funkSVD[2] = computeRMSE(m3_test, p3, q3)
    print(RMSE_funkSVD)

    # rfunk-SVD ###### rfunkSVD(m, a, maxK, lamb, eps)
    [p1, q1] = rfunkSVD(m1_train, 1.0, 50, 0.25, 1e-5)
    [p2, q2] = rfunkSVD(m2_train, 1.0, 50, 0.25, 1e-5)
    [p3, q3] = rfunkSVD(m3_train, 1.0, 50, 0.25, 1e-5)
    RMSE_rfunkSVD = [0, 0, 0]
    RMSE_rfunkSVD[0] = computeRMSE(m1_test, p1, q1)
    RMSE_rfunkSVD[1] = computeRMSE(m2_test, p2, q2)
    RMSE_rfunkSVD[2] = computeRMSE(m3_test, p3, q3)
    print(RMSE_rfunkSVD)
Example #34
0
 def main(self):
     try:
         l = loading.loadData()
         table_1, region, dates = l.activeCases()
         table_2, groups, dates, datesInDays = l.totalCases()
         print(colored("Loading completed.", 'blue'))
         p = plotting.plotData()
         for i in range(0, len(region)):
             p.plotActiveCases(table_1, region, dates, i)
         for i in range(0, len(groups)):
             p.plotTotalCases(table_2, groups, dates, datesInDays, i)
             if (groups[i] == 'Total number of cases'):
                 p.plotEvolutionOfSigmoidParameter(table_2.iloc[i][1:],
                                                   datesInDays)
         print(colored("Fitting completed.", 'blue'))
         print(colored("Plotting completed.", 'blue'))
     except Exception as e:
         print(
             colored("The following exception was catched: " + str(e),
                     'red'))
         print(
             colored(
                 str(exc_tb.tb_frame.f_code.co_filename) + " at  line " +
                 str(exc_tb.tb_lineno), 'red'))
Example #35
0
from labelDictionnary import labelDictionnary

from training import training

import numpy as np 

from loadData import loadData


with open( 'twidf_window4_directed_weighted' ,"r") as File:
	X = np.loadtxt(File , delimiter=',')

path = '../data/r8_train_stemmed.txt'
trainData = True

data = loadData(path,trainData)

labels = data['labels']

(dictionnaryOfClasses , labelsInNumbers) = labelDictionnary(labels)

lsi = True
numberOfComponents = 100

(reducedMatrix , Y) = dimensialityReduction(X , labelsInNumbers , lsi , numberOfComponents)


svm = True

scores = training(reducedMatrix , Y , svm )
          if r5_avg > best_r5_avg:
            best_r5_avg = r5_avg
            updateParameters(alpha, beta, gamma, delta, bestParams5)
          if r10_avg > best_r10_avg:
            best_r10_avg = r10_avg
            updateParameters(alpha, beta, gamma, delta, bestParams10)
          r5_tuple = (r5_avg, r5pop_avg, r5syn_avg, r5synpop_avg)
          r10_tuple = (r10_avg, r10pop_avg, r10syn_avg, r10synpop_avg)
          print '(%f, %f, %f, %f): r5 = %s, r10 = %s' % (alpha, beta, gamma, delta, r5_tuple, r10_tuple)
          if outfile:
            outfile.write('%f,%f,%f,%f,%s,%s\n' % (alpha, beta, gamma, delta, r5_tuple, r10_tuple))

  return bestParams5, best_r5_avg, bestParams10, best_r10_avg

## Comment out this entire block if not running from Python shell
ld.loadData(True)
# This function must be run. Be careful if this is commented out.
setQuestionModelModifications(ld.questions)
folds = ld.getCVFolds()
print 'Generating word vectors'
frequentWords, wordToIndex = wordvectors.getFrequentWords(ld.questions)
wordVecs = wordvectors.getWordVectors(ld.questions, wordToIndex)
## End block

counter = 0
recall_test_scores = [0.0, 0.0]
for fold in folds[0:5]:
  resetModels()
  counter += 1
  print 'Starting Fold %d' % counter
  trainQuestions = fold[0]
# Machine Learning | K-Means Clustering 
# Jimmy Wallace

import loadData as ld
import random
import matplotlib.pyplot as plt
from tkinter import *
from tkinter.ttk import *

dataList = ld.loadData()
data = []

class DataPoints(object):
    
    def __init__(self,vector):
        self.vector = vector
        
    def getDim(self):
        return len(self.vector)                  
    
    def getVector(self,vector):
        return self.vector
    
    def getDistance(self,cluster,method):
        dim = self.getDim()
        temp = []
        
        for i in range(dim):
            x = (cluster[i] - self.vector[i])**2 
            temp.append(x)
        if method == 'e':
Example #38
0
 def get(self):
     cities=model.City.all()
     loadData.clearData(self.response, cities)
     loadData.loadData(self.response)
     self.response.out.write( 'data reloaded')
Example #39
0
feature_dim = train_data.shape[1]
label_dim = train_label.shape[1]
 
train_data = normalizeData(train_data)
test_data = normalizeData(test_data)

elm = ELM(feature_dim, feature_dim*10, label_dim, 'lite', 'dec')

elm.trainModel(train_data, train_label)
elm.save(r"D:\workspace\Data\Data Synthesis\synthesis\synthesis\weights\elm1")
elm.testModel(test_data, test_label)
"""

# Train on real data
train_data , train_label, test_data, test_label = loadData('REAL\greyscale', percent = 1)

feature_dim = train_data.shape[1]
label_dim = train_label.shape[1]

train_data = normalizeData(train_data)
test_data = normalizeData(test_data)

elm = ELM(feature_dim, feature_dim*10, label_dim, 'lite', 'dec')
elm.trainModel(train_data, train_label)
elm.save(r"D:\workspace\Data\Data Synthesis\synthesis\synthesis\weights\elmReal")
elm.testModel(test_data, test_label)


"""
# Train on synthetic data and fun-tune on real data
Example #40
0
#-*- coding=utf-8 -*-
__author__ = "Xingwei He"
from keras.utils import np_utils

import Alexnet
import loadData

nb_classes = 10
#load data
X_train,Y_train,X_test,Y_test= loadData.loadData()

#normalize the data
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(Y_train, nb_classes)
Y_test = np_utils.to_categorical(Y_test, nb_classes)

Alex=Alexnet.Alexnet(X_train,Y_train,X_test,Y_test)
Alex.fit()
Example #41
0
def sklClassify():
	verbose = True
	trainingData, trainingTargets, validationData, validationTargets, testData, testTargets = ld.loadData('movements_day1-3.dat')
	clf = SVC()

	newTargets = []
	for i in range(len(trainingTargets)):
		newTargets.append(np.argmax(trainingTargets[i]))
	newTestTargets = []
	for i in range(len(testTargets)):
		newTestTargets.append(np.argmax(testTargets[i]))

	clf.fit(trainingData, newTargets) 

	A = np.zeros((len(testTargets[0]), len(testTargets[0])))
	for index in range(len(testTargets)):
		A[newTestTargets[index], clf.predict(testData[index])[0]] += 1
	total = sum(sum(A))
	correct = 0
	for index in range(len(testTargets[0])):
		correct += A[index,index]
	m1 = 1
	for index in range(len(testTargets[0])):
		denominator = sum(A[:,index])
		if (denominator == 0):
			if (verbose):
				print "P( correct |  yMax =",index,") = NO DATA"
			m1 = -1
		else:
			v = A[index,index]/denominator
			if (v<m1):
				m1 = v
			if (verbose):
				print "P( correct |  yMax =",index,") =",v
	m2 = 1
	for index in range(len(testTargets[0])):
		denominator = sum(A[index,:])
		if (denominator == 0):
			if (verbose):
				print "P( correct | target=",index,") = NO DATA"
			m2 = -1
		else:
			v = A[index,index]/denominator
			if (v<m2):
				m2 = v
			if (verbose):
				print "P( correct | target=",index,") =",v
	if (verbose):
		print A
		print correct," correct of ", total
		print "P( correct ) = ", correct/total
		print "min( P( correct |  yMax  ) ) = ", m1
		print "min( P( correct | target ) ) = ", m2
	return correct/total,m1,m2
Example #42
0
    bleached_list = list(zip(*bleached_list)[::-1])
    print("removed " + str(len(cleaned_list) - len(bleached_list)) + " columns.")
    return bleached_list

def printL(l):
    s = ""
    for i in range(len(l)):
        for j in range(len(l[0])):
            if(l[i][j] == 0):
                s += "0"
            else:
                s += "X"
        s += "\n"
    return s

# Permet de normaliser les valeurs du dataset
# mini/maxi = valeur minimale/maximale qu'on souhaite obtenir pour chaque valeur
def normaliser_bdd(dataset, mini, maxi):
    if(mini == maxi or maxi < mini):
        raise ValueError("valeurs mini maxi fausses")
    normalized_list = list(dataset)    
    #pour chaque entrée
    for i in range(len(normalized_list)):
        for j in range(len(normalized_list[i])):
            #formule de la normalisation
            normalized_list[i][j] = (normalized_list[i][j] - mini) / (maxi - mini)
    return normalized_list

a = loadD.loadData("Starting_Kit/sample_data/cifar10_train.data")

nettoyer_bdd(a.getData())
Example #43
0



#main program started here.

maxLoad=140
terminateTime=12*60.0
filterAngle=30.0
siteSearchRange=1000 #only search sites within 5km
maxDepth=10 #search depth
stopCount=10000 # for each search, stop when we already got enough results
debug=1

startTime=time.time()
(locations,orders)=loadData('../original_data')

if debug==1:
    xgap=1200
    ygap=1200
    shift=8000
    locations=locations[(locations.x<xgap+shift)&(locations.x>-xgap+shift)&(locations.y<ygap+shift)&(locations.y>-ygap+shift)]
    orders=orders[(orders.ox<xgap+shift)&(orders.ox>-xgap+shift)&(orders.oy<ygap+shift)&(orders.oy>-ygap+shift)&(orders.dx<xgap+shift)&(orders.dx>-xgap+shift)&(orders.dy<ygap+shift)&(orders.dy>-ygap+shift)]

sites=locations[locations['location_type']=='sites']
shops=locations[locations['location_type']=='shops']
spots=locations[locations['location_type']=='spots']

numOfSites=len(sites)
numOfOrders=len(orders)
normalOrders=orders[orders['order_type']==0]
Example #44
0
                lines += line
    with open(path + filename, 'wt') as fout:  # 再次文本方式写入,不含空行
        fout.write(lines)


def array2CSV_Once(matrix,
                   indexName,
                   path='D:\桌面\ELM' + '\\',
                   filename='test.csv'):
    # python2可以用file替代open
    with open(path + filename, "wt") as csvfile:
        writer = csv.writer(csvfile, dialect='excel')
        # 先写入columns_name
        writer.writerow(indexName)
        # 写入多行用writerows
        writer.writerows(matrix)

    with open(path + filename, 'rt') as fin:  # 读有空行的csv文件,舍弃空行
        lines = ''
        for line in fin:
            if line != '\n':
                lines += line
    with open(path + filename, 'wt') as fout:  # 再次文本方式写入,不含空行
        fout.write(lines)


#测试
from loadData import loadData
from numpy import mat
train = loadData()[0].A
array2CSV_Once(train, [])
Example #45
0
# Counts number of games a team played, total
def addPlays(game):
    teams = [game["Home Team"]["Team"], game["Vis Team"]["Team"]]
    for team in teams:
        if team in numGames:
            numGames[team] += 1
        else:
            numGames[team] = 1

def printScores(num):
    ranked = rankScores(teamScores)
    for i in range(num):
        print("{}: {}, {:.5f}".format(i+1, ranked[i][0], ranked[i][1]))

games = loadData('wagstatscfb2014.csv')

teamScores = {}
numGames = {}

for game in games:
    addPlays(game)
    home = game["Home Team"]
    away = game["Vis Team"]

    weight = 1.0/game["Week"]

    homeTeam = home["Team"]
    awayTeam = away["Team"]

    homeTotal = 0