Example #1
0
 def test_manhattan_other(self):
     formatted_data = format_stock_data([self.test_stock, self.test_stock2])
     # create centroid located at self
     centroid = (formatted_data['returns'][1], formatted_data['dividends'][1])
     # calculate distance
     manhattan_d = manhattan_dist(formatted_data['returns'][0], formatted_data['dividends'][0], centroid)
     # distance to self should be 0
     self.assertEqual(manhattan_d, 38.52092060801694)
Example #2
0
 def test_k_means_2(self):
     formatted_data = format_stock_data([self.test_stock, self.test_stock2])
     # perform the clustering
     clusters = k_means(formatted_data, 2)
     # check results are expected - one cluster on each stock
     # Note can make no assumptions about order!
     self.assertIn((-40.938865746148089, 0.0), clusters)
     self.assertIn((-3.6179451381311551, 1.2), clusters)
Example #3
0
 def test_pearson_other(self):
     formatted_data = format_stock_data([self.test_stock, self.test_stock2])
     # create centroid located at self
     centroid = (formatted_data['returns'][1], formatted_data['dividends'][1])
     # calculate distance
     pearson_d = pearson_dist(formatted_data['returns'][0], formatted_data['dividends'][0], centroid)
     # distance to self should be non-zero
     self.assertEqual(pearson_d, 1)
Example #4
0
 def test_euclidean_self(self):
     formatted_data = format_stock_data([self.test_stock, self.test_stock2])
     # create centroid located at self
     centroid = (formatted_data['returns'][0], formatted_data['dividends'][0])
     # calculate distance
     euclidean_d = euclidean_dist(formatted_data['returns'][0], formatted_data['dividends'][0], centroid)
     # distance to self should be 0
     self.assertEqual(euclidean_d, 0)
Example #5
0
 def test_euclidean_other(self):
     formatted_data = format_stock_data([self.test_stock, self.test_stock2])
     # create centroid located at other
     centroid = (formatted_data['returns'][1], formatted_data['dividends'][1])
     # calculate distance
     euclidean_d = euclidean_dist(formatted_data['returns'][0], formatted_data['dividends'][0], centroid)
     # distance to self should be 0
     self.assertEqual(euclidean_d, 37.34020775290227)
Example #6
0
 def test_hierarchical_cluster(self):
     formatted_data = format_stock_data([self.test_stock, self.test_stock2])
     # perform the clustering
     clusters = hierarchical_clustering(formatted_data)
     # should be a single cluster (combine two stocks)
     self.assertEqual(clusters[0][0], 0.0)
     self.assertEqual(clusters[0][1], 1.0)
     # Euclidean distance should be distance between two members of top-level cluster
     self.assertEqual(clusters[0][2], 37.34020775290227)
Example #7
0
 def test_assign_centroids(self):
     formatted_data = format_stock_data([self.test_stock, self.test_stock2])
     # create centroids with small offset
     centroid0 = (formatted_data['returns'][0] + 2.0, formatted_data['dividends'][0] + 1.0)
     centroid1 = (formatted_data['returns'][1] + 2.5, formatted_data['dividends'][1] + 1.2)
     # assign stocks to nearest centroid
     assigns = assign_to_centroids(formatted_data, [centroid0, centroid1], euclidean_dist)
     # check stocks assigned to centroid made near its location
     self.assertEqual(assigns[centroid0], [0])
     self.assertEqual(assigns[centroid1], [1])
Example #8
0
 def kmeans_cluster_stocks(k):
     """ API endpoint to cluster stocks using k-means
     :param k: number of clusters
     :return: list of cluster centroids
     """
     stock_data = mongo.db.stocks.find()
     # check stock was found in db
     if stock_data is None:
         abort(404)
     clusters = k_means(format_stock_data(list(stock_data)), k)
     return jsonify({"centroids": clusters}), 200
Example #9
0
 def test_reassign_centroids(self):
     formatted_data = format_stock_data([self.test_stock, self.test_stock2])
     # create centroids with small offset
     centroid0 = (formatted_data['returns'][0] + 2.0, formatted_data['dividends'][0] + 1.0)
     centroid1 = (formatted_data['returns'][1] + 2.5, formatted_data['dividends'][1] + 1.2)
     # assign stocks to nearest centroid
     assigns = assign_to_centroids(formatted_data, [centroid0, centroid1], euclidean_dist)
     # re-calculate the centroid location
     new_centroids = adjust_centroids(formatted_data, [centroid0, centroid1], assigns)
     # since each centroid had one member should be at that (indiviudal stock) location now
     self.assertEqual(new_centroids[0], (-3.6179451381311551, 1.2))
     self.assertEqual(new_centroids[1], (-40.938865746148089, 0.0))
Example #10
0
 def hierarchy_cluster_stocks():
     """ API endpoint to cluster stocks using k-means
     :param k: number of clusters
     :return: list of cluster centroids
     """
     stock_data = mongo.db.stocks.find()
     # check stock was found in db
     if stock_data is None:
         abort(404)
     # format data and do clustering
     cluster_data = format_stock_data(list(stock_data))
     clusters = hierarchical_clustering(cluster_data)
     return jsonify({"clusters": clusters}), 200
Example #11
0
 def hierarchy_cluster_stocks_plot():
     """ API endpoint to cluster stocks using k-means
     :param k: number of clusters
     :return: list of cluster centroids
     """
     stock_data = mongo.db.stocks.find()
     # check stock was found in db
     if stock_data is None:
         abort(404)
     # format data and do clustering
     cluster_data = format_stock_data(list(stock_data))
     # make the plot
     res = plot_hierarchy(cluster_data)
     return jsonify({"plot_url": res}), 200
Example #12
0
    def get_stocks():
        """ render template for stock page of site. USe Jinja to dynamically display assignment data

        :return: HTML rendering
        """
        # look up stock in mongo db
        stock_data = list(mongo.db.stocks.find())
        # check that a result was returned from db
        if stock_data is None:
            abort(404)
        # perform ML algos
        features, targets = extract_feature_data(stock_data)
        # create the svm and dtree
        svm = create_support_vector_regression(features, targets)
        dtree = create_decision_tree_regression(features, targets)
        # tack on plotly urls
        formatted_data = format_stock_data(stock_data)
        clusters = k_means(formatted_data, 3)
        cluster_assignments = get_cluster_assignments(formatted_data, clusters)
        plots = {}
        plots['kmeans'] = plot_clusters(formatted_data, clusters)
        plots['hierarchy'] = plot_hierarchy(formatted_data)
        plots['svm'] = plot_support_vector_regression(features, targets, svm)
        plots['dtree'] = plot_decision_tree_regression(features, targets,
                                                       dtree)
        # add expected returns
        stock_rets = {}
        for stock in stock_data:
            features_stock, _ = extract_feature_data([stock])
            if len(features_stock) == 0:
                stock_rets[stock['symbol']] = 0.0
            else:
                stock_rets[
                    stock['symbol']] = (svm.predict(features_stock)[0] +
                                        dtree.predict(features_stock)[0]) / 2
        # render the HTML
        return render_template('stocks.html',
                               stocks=stock_data,
                               plots=plots,
                               clusters=cluster_assignments,
                               stock_rets=stock_rets)
Example #13
0
 def test_k_means_1(self):
     formatted_data = format_stock_data([self.test_stock, self.test_stock2])
     # perform the clustering
     clusters = k_means(formatted_data, 1)
     # check results are expected - single cluster located in middle of 2 stocks
     self.assertEqual(clusters[0], (-22.278405442139622, 0.59999999999999998))
Example #14
0
 def test_format_cluster_data_multiple(self):
     formatted_data = format_stock_data([self.test_stock, self.test_stock2])
     # test that three lists properly created
     self.assertEqual(formatted_data['dividends'], [1.2, 0.0])
     self.assertEqual(formatted_data['returns'], [-3.617945138131155, -40.93886574614809])
     self.assertEqual(formatted_data['names'], ['LL', 'L2'])
Example #15
0
 def test_format_cluster_data_single(self):
     formatted_data = format_stock_data([self.test_stock])
     # test that three lists properly created
     self.assertEqual(formatted_data['dividends'], [1.2])
     self.assertEqual(formatted_data['returns'], [-3.617945138131155])
     self.assertEqual(formatted_data['names'], ['LL'])