Ejemplo n.º 1
0
def TrainBuildTransformer():
    with open("Model/Config.json") as Fd:
        ConfigDict = json.load(Fd)
        MaxLength = ConfigDict["MaxLength"]
        BatchSize = ConfigDict["BatchSize"]
        EmbeddingSize = ConfigDict["EmbeddingSize"]
        HeadNum = ConfigDict["HeadNum"]
        EnLayer = ConfigDict["EnLayer"]
        DeLayer = ConfigDict["DeLayer"]
        Dropout = ConfigDict["Dropout"]
        print("Loading Tgt vocab")
        TgtDict = DLoad.LoadVocabulary("Data/tgt.vocab")
        print("Tgt vocab Loading finished")
        print("Loading Src vocab")
        SrcDict = DLoad.LoadVocabulary("Data/src.vocab")
        print("Src vocab Loadinf finished")
        # SrcIndSentences, SrcLength, SrcDict = DLoad.LoadData(
        #    "Data/src.sents", "Data/src.vocab", MaxLength)
        # TgtIndSentences, TgtLength, TgtDict = DLoad.LoadData(
        #    "Data/tgt.sents", "Data/tgt.vocab", MaxLength)
        # TrainDataset = DLoad.TrainCorpusDataset(
        #    SrcIndSentences, SrcLength, TgtIndSentences, TgtLength)
        #BatchDatas = DLoad.TrainDataLoaderCreator(TrainDataset, BatchSize)
        SrcVocabularySize = SrcDict.VocabularySize()
        TgtVocabularySize = TgtDict.VocabularySize()
        print("Building Model")
        Trans = TransformerNMTModel(HeadNum, EmbeddingSize, SrcVocabularySize,
                                    TgtVocabularySize, MaxLength, EnLayer, DeLayer, Dropout)
        print("Model building finished")
        # return Trans, BatchDatas, SrcDict, TgtDict, MaxLength, EmbeddingSize
        return Trans, BatchSize, SrcDict, TgtDict, MaxLength, EmbeddingSize
Ejemplo n.º 2
0
def Test15():
    Sentences = DL.LoadSentences("src.sents")
    PaddedSentences, Length = DL.PaddingSentences(Sentences, 30)
    for Sent in PaddedSentences:
        print(Sent)
    for L in Length:
        print(L)
Ejemplo n.º 3
0
def Test16():
    Sentences = DL.LoadSentences("src.sents")
    PaddedSentences, Length = DL.PaddingSentences(Sentences, 30)
    Dict = DL.LoadVocabulary("src.vocab")
    IndSentences = DL.ChangePaddedSentencesToInd(PaddedSentences, Dict)
    print(Dict.VocabularySize())
    for Sent in IndSentences:
        print(Sent)
Ejemplo n.º 4
0
    def train_model(self,
                    training_data,
                    training_label,
                    testing_data,
                    usps,
                    NUM_EPOC=5000,
                    BATCH_SIZE=128):
        """
        Train Neural network model
        :param training_data: features for training
        :param training_label: training target
        :param testing_data: testing dataset
        :param NUM_EPOC: Number of epoch
        :param BATCH_SIZE: size of batch
        :return: predicted label, training accuracy
        """
        training_accuracy = []
        with tf.Session() as sess:
            # Set Global Variables ?
            tf.global_variables_initializer().run()
            # '------------training started-------------')
            for epoch in range(NUM_EPOC):

                # Shuffle the Training Dataset at each epoch
                p = np.random.permutation(range(len(training_data)))
                training_data = training_data[p]
                training_label = training_label[p]
                # Start batch training
                for start in range(0, len(training_data), BATCH_SIZE):

                    end = start + BATCH_SIZE
                    sess.run(self.training,
                             feed_dict={
                                 self.inputTensor: training_data[start:end],
                                 self.outputTensor: training_label[start:end]
                             })
                # append training accuracy for current epoch
                training_accuracy.append(
                    np.mean(
                        np.argmax(training_label, axis=1) == sess.run(
                            self.prediction,
                            feed_dict={
                                self.inputTensor: training_data,
                                self.outputTensor: training_label
                            })))
            # Testing
            predicted_test_label = sess.run(
                self.prediction, feed_dict={self.inputTensor: testing_data})
            DataLoad.write_to_csv("nn_test.csv", predicted_test_label)
            predicted_usps_label = sess.run(
                self.prediction, feed_dict={self.inputTensor: usps[0]})
            DataLoad.write_to_csv("nn_test_usps.csv", predicted_usps_label)
        return predicted_test_label, training_accuracy, predicted_usps_label
Ejemplo n.º 5
0
 def predict1(self, users_df, sessions, products):
     users = copy.deepcopy(users_df)
     users = data.favourite_products(users, sessions, products)
     users = data.spendings(users, sessions, products)
     users = data.discounts_stats(users, sessions)
     users = users.set_index('user_id')
     users = users.drop([
         'name',
         'city',
         'street',
     ], axis=1)
     users = users.fillna(0)
     return self.clf.predict(users)
Ejemplo n.º 6
0
 def train(self, users_df, sessions, products):
     users = copy.deepcopy(users_df)
     users = data.favourite_products(users, sessions, products)
     users = data.spendings(users, sessions, products)
     users = data.discounts_stats(users, sessions)
     users = data.discounts_label(users, sessions)
     users = users.set_index('user_id')
     users = users.drop([
         'name',
         'city',
         'street',
     ], axis=1)
     users = users.fillna(0)
     y_train = users['label']
     X_train = users.drop('label', axis=1)
     self.clf.fit(X_train, y_train)
def get_agl_pdf(filename):
    pdf = Aglomerative.create_pdf(DataLoad.get_transformed_data(filename), 10)
    response = make_response(pdf)
    response.headers[
        'Content-Disposition'] = "attachment; filename='police_stops_report.pdf"
    response.mimetype = 'application/pdf'
    return response
    def train_model(self,
                    feature_d,
                    target,
                    learning_rate,
                    num_epoch=400,
                    theta=0.1):
        """
        Train logistic regression model
        :param feature_d: features
        :param target: target values for given feature
        :param learning_rate: learning rate for model
        :param num_epoch: number of epoch
        :return: list of accuracy at each epoch
        """
        # weights initialization
        self.weights = np.zeros((feature_d.shape[1], target.shape[1]))
        training_accuracy = []

        for i in range(num_epoch):
            z = np.dot(feature_d, self.weights)
            #hypothesis = self.sigmoid(z)
            hypothesis = self.softmax(z, theta=theta)
            gradient = np.dot(feature_d.T,
                              (hypothesis - target)) / target.shape[0]
            self.weights -= learning_rate * gradient
            #print("hypothesis shape", hypothesis.shape, "target shape", target.shape, "weights ", self.weights.shape,
            #      "grad shape", gradient.shape,"new weights",self.weights.shape)
            training_accuracy.append(
                DataLoad.get_accuracy_logistic(np.round(hypothesis), target))
        return training_accuracy
Ejemplo n.º 9
0
    def start_neural_network(self,
                             learning_rate=0.002,
                             num_epoch=50,
                             show_graph=False):
        """
        Load features (concat/subtract) and target from dataset (HBD or GSC)
        Start training Neural Network model
        Print accuracy on test dataset
        :param dataset: HBD or GSC dataset
        :param op: concat or subtract feature
        :param limit: size of dataset for training and testing
        :param learning_rate: learning rate
        :param num_buckets: number of output bucket
        :param show_graph: boolean to display accuracy graph
        :return:
        """
        print("Neural Network ")
        train, validation, test, usps = DataLoad.create_dataset()
        train_target = DataLoad.convert_target(train[1])
        test_target = DataLoad.convert_target(test[1])
        print("start define model")
        self.define_model(num_features=train[0].shape[1],
                          num_buckets=train_target.shape[1],
                          learning_rate=learning_rate)
        print("start training model")
        predicted_test_label, training_accuracy, usps_test_label = self.train_model(
            training_data=train[0],
            training_label=train_target,
            testing_data=test[0],
            usps=usps,
            NUM_EPOC=num_epoch,
            BATCH_SIZE=100)

        #xpred=DataLoad.pick_max_result(predicted_test_label)
        print("Testing Accuracy: ",
              DataLoad.get_accuracy(predicted_test_label, test[1]))
        print(confusion_matrix(test[1], predicted_test_label))

        #xpred = DataLoad.pick_max_result(usps_test_label)
        print("Testing USPS Accuracy: ",
              DataLoad.get_accuracy(usps_test_label, usps[1]))
        print(confusion_matrix(usps[1], usps_test_label))

        if show_graph:
            import matplotlib.pyplot as plt
            plt.plot(training_accuracy)
            plt.show()
Ejemplo n.º 10
0
def tRBF():
    '''
    Radial basis function test    
    '''
    Loc, POI, Prec = DataLoad.lcsv('TestData\GaugeLoc.csv',
                                       'TestData\InterpPts.csv',
                                       'TestData\Dataset.csv')
    Z, Zavg = RBF.Interp_bat(Loc, POI, Prec, 0, 20)
    return 'Radial Basis Function Interpolation working fine!'
Ejemplo n.º 11
0
def tCubic():
    '''
    Cubic interpolator test
    '''
    Loc, POI, Prec = DataLoad.lcsv('TestData\GaugeLoc.csv',
                                       'TestData\InterpPts.csv',
                                       'TestData\Dataset.csv')
    Z, Zavg = Cubic.Interp_bat(Loc, POI, Prec, 0, 20)
    return 'Cubic Interpolation working fine!'
Ejemplo n.º 12
0
def tLinear():
    '''
    Nearest Neighborhood test    
    '''
    Loc, POI, Prec = DataLoad.lcsv('TestData\GaugeLoc.csv',
                                       'TestData\InterpPts.csv',
                                       'TestData\Dataset.csv')
    Z, Zavg = Linear.Interp_bat(Loc, POI, Prec, 0, 20)
    return 'Linear Interpolation working fine!'
Ejemplo n.º 13
0
def tIDW():
    '''
    IDW Test
    '''
    Loc, POI, Prec = DataLoad.lcsv('TestData\GaugeLoc.csv',
                                       'TestData\InterpPts.csv',
                                       'TestData\Dataset.csv')
    Z, Zavg = IDW.Interp_bat(Loc, POI, Prec, 2.0, 0.00001, 0, 20)
    return 'IDW working fine!'
Ejemplo n.º 14
0
def create_pdf(count_klast, filename):
    data = DataLoad.get_transformed_data(filename).as_matrix()
    k_means = KMeans(n_clusters=count_klast, random_state=1)
    k_means.fit(data)
    centers = k_means.cluster_centers_
    klusters = []
    for j in range(count_klast):
        klusters.append([])
    for i, la in enumerate(k_means.labels_):
        klusters[la].append(data[i])
    output = cStringIO.StringIO()
    p = canvas.Canvas(output)
    number = 1
    it = 1
    it2 = 1
    for k in klusters:
        if (800 - it * 20) < 20:
            it = 1
            it2 = 1
            p.showPage()
        it = it + 1
        it2 = it2 + 1
        p.drawString(
            100, 800 - it * 20, "center of klusters num" + str(number) +
            " age: " + str(int(centers[number - 1][0])) + " time: " +
            str(datetime.timedelta(seconds=int(centers[number - 1][2]))))
        it = it + 1
        it2 = it2 + 1
        p.drawString(50, 800 - it * 20, "Men")
        p.drawString(200, 800 - it * 20, "Woman")
        it = it + 1
        it2 = it2 + 1
        for i in k:
            if (int(i[1]) == 0):
                if (800 - it * 20) < 20:
                    it = 1
                    it2 = 1
                    p.showPage()
                p.drawString(
                    50, 800 - it * 20, "age: " + str(int(i[0])) + " time: " +
                    str(datetime.timedelta(seconds=int(i[2]))))
                it = it + 1
            else:
                if (800 - it2 * 20) < 20:
                    it = 1
                    it2 = 1
                    p.showPage()
                p.drawString(
                    200, 800 - it2 * 20, "age: " + str(int(i[0])) + " time: " +
                    str(datetime.timedelta(seconds=int(i[2]))))
                it2 = it2 + 1
        number = number + 1

    p.save()
    pdf_out = output.getvalue()
    output.close()
    return pdf_out
def combine_model(data_=MNIST):
    _, _, test, usps = DataLoad.create_dataset()
    logistic = load_data(data_[0])
    ran_for = load_data(data_[1])
    svm = load_data(data_[2])
    nn = load_data(data_[3])

    combine_result = []

    for i in range(len(logistic)):
        combine_result.append(
            get_mode([logistic[i], ran_for[i], svm[i], nn[i]]))

    if data_ == MNIST:
        cm = confusion_matrix(test[1], combine_result)
        print(DataLoad.get_accuracy(test[1], combine_result))
    else:
        cm = (confusion_matrix(usps[1], combine_result))
        print(DataLoad.get_accuracy(usps[1], combine_result))
    print(cm)
Ejemplo n.º 16
0
def TestBuildTransformer():
    with open("Model/Config.json") as Fd:
        ConfigDict = json.load(Fd)
        MaxLength = ConfigDict["MaxLength"]
        BatchSize = ConfigDict["BatchSize"]
        EmbeddingSize = ConfigDict["EmbeddingSize"]
        HeadNum = ConfigDict["HeadNum"]
        EnLayer = ConfigDict["EnLayer"]
        DeLayer = ConfigDict["DeLayer"]
        SrcIndSentences, SrcLength, SrcDict = DLoad.LoadData(
            "Data/test.sents", "Data/src.vocab", MaxLength)
        TgtDict = DLoad.LoadVocabulary("Data/tgt.vocab")
        TestDataset = DLoad.TestCorpusDataset(SrcIndSentences, SrcLength)
        BatchDatas = DLoad.TestDataLoaderCreator(TestDataset, BatchSize)
        SrcVocabularySize = SrcDict.VocabularySize()
        TgtVocabularySize = TgtDict.VocabularySize()
        print("Building Model")
        Trans = TransformerNMTModel(
            HeadNum, EmbeddingSize, SrcVocabularySize, TgtVocabularySize, MaxLength, EnLayer, DeLayer)
        print("Model building finished")
        return Trans, BatchDatas, SrcDict, TgtDict, MaxLength
Ejemplo n.º 17
0
def Test25():
    MaxLength = 30
    BatchSize = 2
    EmbeddingSize = 4
    HeadNum = 2
    SrcIndSentences, SrcLength, SrcDict = DL.LoadData("src.sents", "src.vocab",
                                                      MaxLength)
    TgtIndSentences, TgtLength, TgtDict = DL.LoadData("tgt.sents", "tgt.vocab",
                                                      MaxLength)
    TrainDataset = DL.TrainCorpusDataset(SrcIndSentences, SrcLength,
                                         TgtIndSentences, TgtLength)
    BatchDatas = DL.TrainDataLoaderCreator(TrainDataset, BatchSize)
    for Batch in BatchDatas:
        SrcSent = Batch["SrcSent"]
        print(SrcSent)
        SrcLength = Batch["SrcLength"]
        print(SrcLength)
        TgtSent = Batch["TgtSent"]
        print(TgtSent)
        TgtLength = Batch["TgtLength"]
        print(TgtLength)
Ejemplo n.º 18
0
def get_klasters(count_klast):
    data = DataLoad.get_transformed_data().as_matrix()
    k_means = KMeans(n_clusters=count_klast)
    fits = k_means.fit(data)
    centrx = k_means.cluster_centers_
    klusters = []
    for j in range(count_klast):
        klusters.append([])
    for i, la in enumerate(k_means.labels_):
        klusters[la].append(data[i])
    print "silhuette:", metrics.silhouette_score(data, k_means.labels_)
    return klusters
def recomm(text, data_path, pip_path, lda_path, recomm_num=5):

    input_text = DT.loadInput(text, spark, sc)

    pred_df, pred_dis, pred_index = MD.Model().ldaPredict(input_text,
                                                          pip_path=pip_path,
                                                          lda_path=lda_path)

    data_withTopic = DT.loadTopicData(data_path, topic=pred_index, spark=spark)

    data_withDis = CR.calSimi(pred_dis, data_withTopic)

    data_sort = data_withDis.sort("dis")

    text_list = list()
    source = data_sort.select("text").rdd.take(recomm_num)

    for i in range(recomm_num):

        text_list.append(source[i]["text"])

    return data_sort, text_list
Ejemplo n.º 20
0
def Test21():
    MaxLength = 30

    def CollateFunction(Batch):
        #print(len(Batch))
        OutputBatch = {
            "SrcSent": [],
            "SrcLength": [],
            "TgtSent": [],
            "TgtLength": []
        }
        for Elem in Batch:
            #print(Elem[0][0])
            OutputBatch["SrcSent"].append(Elem[0][0])
            OutputBatch["SrcLength"].append(Elem[0][1])
            OutputBatch["TgtSent"].append(Elem[1][0])
            OutputBatch["TgtLength"].append(Elem[1][1])
        #print(OutputBatch["SrcSent"])
        OutputBatch["SrcSent"] = t.LongTensor(OutputBatch["SrcSent"])
        OutputBatch["TgtSent"] = t.LongTensor(OutputBatch["TgtSent"])
        return OutputBatch

    SrcIndSentences, SrcLength, SrcDict = DL.LoadData("src.sents", "src.vocab",
                                                      MaxLength)
    TgtIndSentences, TgtLength, TgtDict = DL.LoadData("tgt.sents", "tgt.vocab",
                                                      MaxLength)
    TrainDataset = DL.TrainCorpusDataset(SrcIndSentences, SrcLength,
                                         TgtIndSentences, TgtLength)
    z = DL.TrainDataLoaderCreator(TrainDataset, 4)
    Count = 0
    while True:
        if Count == 100:
            break
        Count = Count + 1
        for x in z:
            print("Batch")
            print(x["SrcSent"].size())
    def cargarCondicionesA(self):
        fname = self.QFileDialog.getOpenFileName(None, 'Open file', "*.xlsx")

        if (fname[0] != ""):
            # self.pushButton.setText("Button is clicked")
            ruta = str(fname[0])

            load = ld.DataLoad(ruta)
            load.crearCondicionesA()
            self.listaNombreRegionCA = load.getNombreRegionesCA()
            self.listaCualificacionCA = load.getCualificaionCA()
            #  colocar el bolean de que si cargo C.A
            self.siCondicionesA = True
        else:
            self.siCondicionesA = False
Ejemplo n.º 22
0
def Test26():
    MaxLength = 30
    BatchSize = 2
    EmbeddingSize = 4
    HeadNum = 2
    EnLayer = 2
    DeLayer = 2
    SrcIndSentences, SrcLength, SrcDict = DL.LoadData("src.sents", "src.vocab",
                                                      MaxLength)
    TgtIndSentences, TgtLength, TgtDict = DL.LoadData("tgt.sents", "tgt.vocab",
                                                      MaxLength)
    TrainDataset = DL.TrainCorpusDataset(SrcIndSentences, SrcLength,
                                         TgtIndSentences, TgtLength)
    BatchDatas = DL.TrainDataLoaderCreator(TrainDataset, BatchSize)
    SrcVocabularySize = SrcDict.VocabularySize()
    TgtVocabularySize = TgtDict.VocabularySize()
    Trans = T.TransformerNMTModel(HeadNum, EmbeddingSize, SrcVocabularySize,
                                  TgtVocabularySize, MaxLength, EnLayer,
                                  DeLayer)
    for BatchInd, Batch in enumerate(BatchDatas):
        print("BegingBatch")
        SrcSent = Batch["SrcSent"]
        print(SrcSent.size())
        SrcLength = Batch["SrcLength"]
        #print(SrcLength.size())
        TgtSent = Batch["TgtSent"]
        print(TgtSent.size())
        TgtLength = Batch["TgtLength"]
        #print(TgtLength.size())
        SrcMask = T.BatchLengthToBoolTensorMask(SrcLength, MaxLength)
        TgtMask = T.BatchLengthToBoolTensorMask(TgtLength, MaxLength)
        Output = Trans(SrcSent, TgtSent, SrcMask, TgtMask)
        print("Step")
        print(BatchInd + 1)
        print(Output.size())
        print(Output[0][2])
Ejemplo n.º 23
0
def tKrigP():
    '''
    Kriging test    
    '''
    Loc, POIC, Prec = DataLoad.lcsv('TestData\GaugeLoc.csv',
                                       'TestData\InterpPts.csv',
                                       'TestData\Dataset.csv')
    Loc = numpy.array(Loc)/1000.0
    POIC = numpy.array(POIC)/1000.0
    SVExp, CovMea = KrigingP.exp_semivariogram(Prec, Loc)
    xopt, ModOpt, VarFunArr = KrigingP.theor_variogram(SVExp)
    Z, SP, ZAvg = KrigingP.Krig(10.0, POIC, Loc, Prec, CovMea, ModOpt, xopt, 
                               VarFunArr,10 ,11, 'Ord')
    print Z
    print ZAvg
    return 
Ejemplo n.º 24
0
def run_svm(kernal=k,train_size=50000):
    train, valid, test, usps = DataLoad.create_dataset()
    svclassifier = SVC(kernel=kernal, gamma=1)
    svclassifier.fit(train[0][:train_size], train[1][:train_size])

    y_pred = svclassifier.predict(test[0])
    DataLoad.write_to_csv("svm_test.csv", y_pred)
    print("accuracy test ", DataLoad.get_accuracy(y_pred, test[1]))
    print(confusion_matrix(test[1], y_pred))

    # -------------------------
    y_pred = svclassifier.predict(usps[0])
    DataLoad.write_to_csv("svm_test_usps.csv", y_pred)
    print("accuracy usps test ", DataLoad.get_accuracy(y_pred, usps[1]))
    print(confusion_matrix(usps[1], y_pred))
def run_random_forest():
    train, val, test, usps = DataLoad.create_dataset()
    classifier = RandomForestClassifier(n_estimators=10)
    classifier.fit(train[0], train[1])

    y_pred = classifier.predict(test[0])
    print("accuracy test ", DataLoad.get_accuracy(y_pred, test[1]))
    print(confusion_matrix(test[1], y_pred))
    DataLoad.write_to_csv("random_forest_test.csv", y_pred)

    # -------------------------
    y_pred = classifier.predict(usps[0])
    print("accuracy usps ", DataLoad.get_accuracy(y_pred, usps[1]))
    print(confusion_matrix(usps[1], y_pred))
    DataLoad.write_to_csv("random_forest_test_usps.csv", y_pred)
Ejemplo n.º 26
0
 def wraperMethod(*args, **kwargs):
     try:
         self = args[0]
         self.session = self.repository.findByStatusAndCommit(
             FrameworkStatus[FrameworkConstant.ACTIVE], Session)
         if self.session:
             self.globals.success(
                 f'"{self.session.key}" session loaded successfully')
         else:
             self.session = DataLoad.getBasicSession(
                 self)  ###- getBasicSession(self)
             print(f'self.session = {self.session}')
             self.globals.failure(
                 f'''couldn't find any active session. Running most recent version of "{self.session.key}" session.''',
                 self.globals.NOTHING)
     except Exception as exception:
         print(
             f'''{Constant.WRAPPER}{LoadSession.__name__} failed to load framework session. Cause: {str(exception)}'''
         )
     return function(*args, **kwargs)
Ejemplo n.º 27
0
def Test31():
    TgtDict = DL.LoadVocabulary("Model/tgt.vocab")
    Out = TT.TranslateOutput(TgtDict, 5).Init(4)
    print(Out.IndexSent)
    Out.Add([1, 2, 3, 4])
    print(Out.IndexSent)
    Out.Add([2, 3, 4, 5])
    print(Out.IndexSent)
    Out.Add([2, 3, 4, 5])
    print(Out.AllFinish())
    print(Out.IndexSent)
    Out.Add([2, 3, 4, 5])
    print(Out.AllFinish())
    Out.Add([2, 3, 4, 5])
    Out.Add([2, 3, 4, 5])
    print(Out.AllFinish())
    print(Out.GetCurrentIndexTensor())
    print(Out.IndexSent)
    print(Out.GetWordSent())
    print(Out.ToFile("Output/predict"))
Ejemplo n.º 28
0
def make_diagam(count_klast, filename):
    data = DataLoad.get_transformed_data(filename).as_matrix()
    k_means = KMeans(n_clusters=count_klast, random_state=1)
    k_means.fit(data)
    centers = k_means.cluster_centers_
    print "silhuette:", metrics.silhouette_score(data, k_means.labels_)
    klusters = []
    for j in range(count_klast):
        klusters.append([])
    for i, la in enumerate(k_means.labels_):
        klusters[la].append(data[i])
    # Создание легенды
    legend = []
    for center in centers:
        legend.append('age:' + str(int(center[0])) + '\ntime ' +
                      str(datetime.timedelta(seconds=int(center[2]))))
    # Делаем данные для графика
    klust_sizes = []
    for kluster in klusters:
        klust_sizes.append(len(kluster))
    plt.figure(num=1, figsize=(6, 6))
    plt.axes(aspect=1)
    plt.title('Size of klasters', size=14)
    plt.pie(klust_sizes, labels=legend)
    img = io.BytesIO()
    plt.savefig('static/kmeans2d.png')
    plt.show()
    plt.clf()

    # fig = plt.figure()
    # ax2 = Axes3D(fig)
    # ax2.scatter(data[:, 0], data[:, 1], data[:, 2], c=klusters, cmap='prism')
    # ax2.set_xlabel('driver_age')
    # ax2.set_ylabel('driver_gender')
    # ax2.set_zlabel('stop_time')
    # plt.savefig('static/kmeans3d.png')
    return img
Ejemplo n.º 29
0
def main():
    data_train = dataload.load_data_train()
    modeltrain.Train_Model(data_train)
    data_pre = dataload.load_data_pre()
    pre_result = dataprediction.Predict_Data(data_pre)
    print("真实值为 930291366.85 预测结果为:%f" % (pre_result))
def get_default_test_predictions():
    return DataLoad.get_transformed_data().as_matrix()