コード例 #1
0
 def test_k_means_2(self):
     formatted_data = format_stock_data([self.test_stock, self.test_stock2])
     # perform the clustering
     clusters = k_means(formatted_data, 2)
     # check results are expected - one cluster on each stock
     # Note can make no assumptions about order!
     self.assertIn((-40.938865746148089, 0.0), clusters)
     self.assertIn((-3.6179451381311551, 1.2), clusters)
コード例 #2
0
 def kmeans_cluster_stocks(k):
     """ API endpoint to cluster stocks using k-means
     :param k: number of clusters
     :return: list of cluster centroids
     """
     stock_data = mongo.db.stocks.find()
     # check stock was found in db
     if stock_data is None:
         abort(404)
     clusters = k_means(format_stock_data(list(stock_data)), k)
     return jsonify({"centroids": clusters}), 200
コード例 #3
0
 def kmeans_cluster_stocks_plot(k):
     """ API endpoint to cluster stocks using k-means
     :param k: number of clusters
     :return: list of cluster centroids
     """
     stock_data = mongo.db.stocks.find()
     # check stock was found in db
     if stock_data is None:
         abort(404)
     # format data and do clustering
     cluster_data = format_stock_data(list(stock_data))
     clusters = k_means(cluster_data, k)
     # make the plot
     res = plot_clusters(cluster_data, clusters)
     return jsonify({"plot_url": res}), 200
コード例 #4
0
    def get_stocks():
        """ render template for stock page of site. USe Jinja to dynamically display assignment data

        :return: HTML rendering
        """
        # look up stock in mongo db
        stock_data = list(mongo.db.stocks.find())
        # check that a result was returned from db
        if stock_data is None:
            abort(404)
        # perform ML algos
        features, targets = extract_feature_data(stock_data)
        # create the svm and dtree
        svm = create_support_vector_regression(features, targets)
        dtree = create_decision_tree_regression(features, targets)
        # tack on plotly urls
        formatted_data = format_stock_data(stock_data)
        clusters = k_means(formatted_data, 3)
        cluster_assignments = get_cluster_assignments(formatted_data, clusters)
        plots = {}
        plots['kmeans'] = plot_clusters(formatted_data, clusters)
        plots['hierarchy'] = plot_hierarchy(formatted_data)
        plots['svm'] = plot_support_vector_regression(features, targets, svm)
        plots['dtree'] = plot_decision_tree_regression(features, targets,
                                                       dtree)
        # add expected returns
        stock_rets = {}
        for stock in stock_data:
            features_stock, _ = extract_feature_data([stock])
            if len(features_stock) == 0:
                stock_rets[stock['symbol']] = 0.0
            else:
                stock_rets[
                    stock['symbol']] = (svm.predict(features_stock)[0] +
                                        dtree.predict(features_stock)[0]) / 2
        # render the HTML
        return render_template('stocks.html',
                               stocks=stock_data,
                               plots=plots,
                               clusters=cluster_assignments,
                               stock_rets=stock_rets)
コード例 #5
0
 def test_k_means_1(self):
     formatted_data = format_stock_data([self.test_stock, self.test_stock2])
     # perform the clustering
     clusters = k_means(formatted_data, 1)
     # check results are expected - single cluster located in middle of 2 stocks
     self.assertEqual(clusters[0], (-22.278405442139622, 0.59999999999999998))