def test_k_means_2(self): formatted_data = format_stock_data([self.test_stock, self.test_stock2]) # perform the clustering clusters = k_means(formatted_data, 2) # check results are expected - one cluster on each stock # Note can make no assumptions about order! self.assertIn((-40.938865746148089, 0.0), clusters) self.assertIn((-3.6179451381311551, 1.2), clusters)
def kmeans_cluster_stocks(k): """ API endpoint to cluster stocks using k-means :param k: number of clusters :return: list of cluster centroids """ stock_data = mongo.db.stocks.find() # check stock was found in db if stock_data is None: abort(404) clusters = k_means(format_stock_data(list(stock_data)), k) return jsonify({"centroids": clusters}), 200
def kmeans_cluster_stocks_plot(k): """ API endpoint to cluster stocks using k-means :param k: number of clusters :return: list of cluster centroids """ stock_data = mongo.db.stocks.find() # check stock was found in db if stock_data is None: abort(404) # format data and do clustering cluster_data = format_stock_data(list(stock_data)) clusters = k_means(cluster_data, k) # make the plot res = plot_clusters(cluster_data, clusters) return jsonify({"plot_url": res}), 200
def get_stocks(): """ render template for stock page of site. USe Jinja to dynamically display assignment data :return: HTML rendering """ # look up stock in mongo db stock_data = list(mongo.db.stocks.find()) # check that a result was returned from db if stock_data is None: abort(404) # perform ML algos features, targets = extract_feature_data(stock_data) # create the svm and dtree svm = create_support_vector_regression(features, targets) dtree = create_decision_tree_regression(features, targets) # tack on plotly urls formatted_data = format_stock_data(stock_data) clusters = k_means(formatted_data, 3) cluster_assignments = get_cluster_assignments(formatted_data, clusters) plots = {} plots['kmeans'] = plot_clusters(formatted_data, clusters) plots['hierarchy'] = plot_hierarchy(formatted_data) plots['svm'] = plot_support_vector_regression(features, targets, svm) plots['dtree'] = plot_decision_tree_regression(features, targets, dtree) # add expected returns stock_rets = {} for stock in stock_data: features_stock, _ = extract_feature_data([stock]) if len(features_stock) == 0: stock_rets[stock['symbol']] = 0.0 else: stock_rets[ stock['symbol']] = (svm.predict(features_stock)[0] + dtree.predict(features_stock)[0]) / 2 # render the HTML return render_template('stocks.html', stocks=stock_data, plots=plots, clusters=cluster_assignments, stock_rets=stock_rets)
def test_k_means_1(self): formatted_data = format_stock_data([self.test_stock, self.test_stock2]) # perform the clustering clusters = k_means(formatted_data, 1) # check results are expected - single cluster located in middle of 2 stocks self.assertEqual(clusters[0], (-22.278405442139622, 0.59999999999999998))