Ejemplo n.º 1
0
 def test_density_estimator(self):
     db = ReviewDB('tests/test_data/')
     nlp = NLPLengths(db.entity_db_dict['all'])
     histogram_comparison = HistogramComparison()
     char_result_cluster = nlp.char_review_length_counter('1-2-1-0-0')
     histogram = char_result_cluster[0]
     density_estimate = histogram_comparison.density_estimator(histogram)
     self.assertEqual(sum(density_estimate.values()), 1.0)
Ejemplo n.º 2
0
 def test_density_estimator(self):
     db = ReviewDB.load(cluster_file='tests/testing_db.csv')
     nlp = NLPLengths(db)
     histogram_comparison = HistogramComparison()
     char_result_cluster = nlp.char_review_length_counter('1-2-1-0-0')
     histogram = char_result_cluster[0]
     density_estimate = histogram_comparison.density_estimator(histogram)
     self.assertEqual(sum(density_estimate.values()), 1.0)
Ejemplo n.º 3
0
 def test_sorensen(self):
     db = ReviewDB('tests/test_data/')
     nlp = NLPLengths(db.entity_db_dict['all'])
     histogram_comparison = HistogramComparison()
     histogram1 = nlp.char_review_length_counter('1-2-1-0-0')[0]
     compare_self = histogram_comparison.sorensen(histogram1, histogram1)
     self.assertEqual(compare_self, 0.0)
     trivial_histogram1 = Counter({"1": 1})
     trivial_histogram2 = Counter({"1": 0})
     compare_trivial = histogram_comparison.sorensen(
         trivial_histogram1, trivial_histogram2)
     self.assertEqual(compare_trivial, 1.0)
     more_complicated_histogram1 = Counter({"1": 1, "2": 2})
     more_complicated_histogram2 = Counter({"2": 3, "3": 4})
     compare_more_complicated = histogram_comparison.sorensen(
         more_complicated_histogram1, more_complicated_histogram2)
     self.assertLess((compare_more_complicated - 0.66667), .001)
Ejemplo n.º 4
0
 def test_hellinger(self):
     db = ReviewDB('tests/test_data/')
     nlp = NLPLengths(db.entity_db_dict['all'])
     histogram_comparison = HistogramComparison()
     histogram1 = nlp.char_review_length_counter('1-2-1-0-0')[0]
     compare_self = histogram_comparison.hellinger(histogram1, histogram1)
     self.assertEqual(compare_self, 0.0)
     trivial_histogram1 = Counter({"1": 1})
     trivial_histogram2 = Counter({"1": 0})
     compare_trivial = histogram_comparison.hellinger(
         trivial_histogram1, trivial_histogram2)
     self.assertEqual(compare_trivial, 0.7071067811865475)
     more_complicated_histogram1 = Counter({"1": 1, "2": 2})
     more_complicated_histogram2 = Counter({"2": 3, "3": 4})
     compare_more_complicated = histogram_comparison.hellinger(
         more_complicated_histogram1, more_complicated_histogram2)
     self.assertLess((compare_more_complicated - 0.6822591268536838), .001)
Ejemplo n.º 5
0
 def test_euclidean(self):
     db = ReviewDB.load(cluster_file='tests/testing_db.csv')
     nlp = NLPLengths(db)
     histogram_comparison = HistogramComparison()
     histogram1 = nlp.char_review_length_counter('1-2-1-0-0')[0]
     compare_self = histogram_comparison.euclidean(histogram1, histogram1)
     self.assertEqual(compare_self, 0.0)
     trivial_histogram1 = Counter({"1": 1})
     trivial_histogram2 = Counter({"1": 0})
     compare_trivial = histogram_comparison.euclidean(
         trivial_histogram1, trivial_histogram2)
     self.assertEqual(compare_trivial, 1.0)
     more_complicated_histogram1 = Counter({"1": 1, "2": 2})
     more_complicated_histogram2 = Counter({"2": 3, "3": 4})
     compare_more_complicated = histogram_comparison.euclidean(
         more_complicated_histogram1, more_complicated_histogram2)
     self.assertLess((compare_more_complicated - 4.24264), .001)
Ejemplo n.º 6
0
# set up data access
CONFIG = json.load(open("./../config.json"))
data_folder = os.path.join(os.environ['DATA_DIR'], CONFIG['dataset'])
schema = json.load(open(os.path.join(data_folder, 'schema.json')))['schema']
database = ReviewDB(data_folder)

app = Flask(__name__,
            static_folder='./react-app/build/static',
            template_folder='./react-app/build')
cors = CORS(app)

app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 0
app.config['TEMPLATES_AUTO_RELOAD'] = True

histogram_comparison_utils = HistogramComparison()


# [Xiong] endpoint for sending static files
@app.route('/data/<path:subpath>')
def data(subpath):
    res = send_from_directory(f'{data_folder}', subpath)
    return res


# [Xiong] endpoint for loading cluster centroids
# GET Args:
#   biz_id: id for locating an entity (e.g. hotel)
#   cid: id for locating a cluster
# Returns:
#   A csv string of requested centroids