Python ReviewDB.ReviewDB 예제들, libs.review_db.ReviewDB.ReviewDB Python 예제들

예제 #1

0

파일 보기

 def test_density_estimator(self):
     db = ReviewDB('tests/test_data/')
     nlp = NLPLengths(db.entity_db_dict['all'])
     histogram_comparison = HistogramComparison()
     char_result_cluster = nlp.char_review_length_counter('1-2-1-0-0')
     histogram = char_result_cluster[0]
     density_estimate = histogram_comparison.density_estimator(histogram)
     self.assertEqual(sum(density_estimate.values()), 1.0)

예제 #2

0

파일 보기

파일: helper_function_tests.py 프로젝트: phymucs/teddy

 def test_ReviewDB_init(self):
     '''
         Test initialization of review ids equivalent to original indices as part of
         init for ReviewDB object.
     '''
     clusters_df = pd.read_csv('tests/test_data/clusters.csv', index_col=0)
     db = ReviewDB('tests/test_data/')
     for i in range(0, 10):
         self.assertEqual(
             db.entity_db_dict['all'].clusters_df[
                 db.entity_db_dict['all'].clusters_df['review_id'] ==
                 i].index.item(), i)

예제 #3

0

파일 보기

 def test_TFIDF_funcs(self):
     db = ReviewDB('tests/test_data/')
     tfidf = TFIDFModel(db.entity_db_dict['all'])
     #test tfidf.tfidf_score(), which also calls tfidf.scores_to_counter()
     tfidf_zero = tfidf.tfidf_score(0, ['wharf'])
     self.assertTrue('wharf' in tfidf_zero.keys())
     self.assertFalse('banana' in tfidf_zero.keys())
     tfidf_cluster = tfidf.tfidf_score(
         '1-2-1-0-0', ['towels', 'unwelcome', 'charge', 'wharf'])
     self.assertGreater(tfidf_cluster['towels'], 0)
     self.assertGreater(tfidf_cluster['unwelcome'], 0)
     self.assertGreater(tfidf_cluster['charge'], 0)
     self.assertEqual(tfidf_cluster['wharf'], 0.0)
     #test tfidf.top_k(), which also calls tfidf.scores_to_counter()
     top_for_zero = tfidf.top_k(0)
     self.assertTrue('wharf' in top_for_zero.keys())
     top_for_cluster = tfidf.top_k('1-2-1-0-0')
     self.assertTrue('towels' in top_for_cluster.keys())
     self.assertTrue('charge' in top_for_cluster.keys())
     self.assertFalse('wharf' in top_for_cluster.keys())
     #test tfidf.compare_top_k()
     group1, group2 = tfidf.compare_top_k(0, '1-2-1-0-0')
     #test combination of keys
     compare_top_k_test1 = True
     compare_top_k_test2 = True
     #test key values
     compare_top_k_test3 = True
     compare_top_k_test4 = True
     for key in top_for_cluster.keys():
         if key not in group1.keys() or key not in group2.keys():
             compare_top_k_test1 = False
             break
         if group2[key] != top_for_cluster[key]:
             compare_top_k_test3 = False
             break
     for key in top_for_zero.keys():
         if key not in group2.keys() or key not in group1.keys():
             compare_top_k_test2 = False
             break
         if group1[key] != top_for_zero[key]:
             print(key, ' ', group1[key], ' ', top_for_zero[key])
             compare_top_k_test4 = False
             break
     self.assertTrue(compare_top_k_test1)
     self.assertTrue(compare_top_k_test2)
     self.assertTrue(compare_top_k_test3)
     self.assertTrue(compare_top_k_test4)
     self.assertEqual(group2['wharf'], 0.0)

예제 #4

0

파일 보기

 def test_sorensen(self):
     db = ReviewDB('tests/test_data/')
     nlp = NLPLengths(db.entity_db_dict['all'])
     histogram_comparison = HistogramComparison()
     histogram1 = nlp.char_review_length_counter('1-2-1-0-0')[0]
     compare_self = histogram_comparison.sorensen(histogram1, histogram1)
     self.assertEqual(compare_self, 0.0)
     trivial_histogram1 = Counter({"1": 1})
     trivial_histogram2 = Counter({"1": 0})
     compare_trivial = histogram_comparison.sorensen(
         trivial_histogram1, trivial_histogram2)
     self.assertEqual(compare_trivial, 1.0)
     more_complicated_histogram1 = Counter({"1": 1, "2": 2})
     more_complicated_histogram2 = Counter({"2": 3, "3": 4})
     compare_more_complicated = histogram_comparison.sorensen(
         more_complicated_histogram1, more_complicated_histogram2)
     self.assertLess((compare_more_complicated - 0.66667), .001)

예제 #5

0

파일 보기

 def test_hellinger(self):
     db = ReviewDB('tests/test_data/')
     nlp = NLPLengths(db.entity_db_dict['all'])
     histogram_comparison = HistogramComparison()
     histogram1 = nlp.char_review_length_counter('1-2-1-0-0')[0]
     compare_self = histogram_comparison.hellinger(histogram1, histogram1)
     self.assertEqual(compare_self, 0.0)
     trivial_histogram1 = Counter({"1": 1})
     trivial_histogram2 = Counter({"1": 0})
     compare_trivial = histogram_comparison.hellinger(
         trivial_histogram1, trivial_histogram2)
     self.assertEqual(compare_trivial, 0.7071067811865475)
     more_complicated_histogram1 = Counter({"1": 1, "2": 2})
     more_complicated_histogram2 = Counter({"2": 3, "3": 4})
     compare_more_complicated = histogram_comparison.hellinger(
         more_complicated_histogram1, more_complicated_histogram2)
     self.assertLess((compare_more_complicated - 0.6822591268536838), .001)

예제 #6

0

파일 보기

    def test_nlplength_funcs(self):
        db = ReviewDB('tests/test_data/')
        nlp = NLPLengths(db.entity_db_dict['all'])
        #Test empty set
        empty1 = nlp.word_token_review_length_counter([])
        self.assertEqual(empty1, (Counter(), 0, 0, 0, 0))

        #Test word_token_review_length_counter
        word_result_zero = nlp.word_token_review_length_counter(0)
        print(word_result_zero)
        self.assertEqual(word_result_zero,
                         (Counter({"12": 1}), 12.0, 12, (12, 1), 0.0))
        word_result_cluster = nlp.word_token_review_length_counter('1-2-1-0-0')
        print(word_result_cluster)
        self.assertEqual(word_result_cluster, (Counter({
            "22": 1,
            "7": 1,
            "6": 1
        }), 11.666666666666666, 7, (6, 1), 8.962886439832502))
        #Test sent_token_review_length_counter
        sent_result_zero = nlp.sent_token_review_length_counter(0)
        print(sent_result_zero)
        self.assertEqual(sent_result_zero,
                         (Counter({"1": 1}), 1.0, 1, (1, 1), 0.0))
        sent_result_cluster = nlp.sent_token_review_length_counter('1-2-1-0-0')
        print(sent_result_cluster)
        self.assertEqual(sent_result_cluster, (Counter({
            "1": 2,
            '3': 1
        }), 1.6666666666666667, 1, (1, 2), 1.1547005383792515))
        #Test char_review_length_counter
        char_result_zero = nlp.char_review_length_counter(0)
        print(char_result_zero)
        self.assertEqual(char_result_zero,
                         (Counter({"53": 1}), 53.0, 53, (53, 1), 0.0))
        char_result_cluster = nlp.char_review_length_counter('1-2-1-0-0')
        print(char_result_cluster)
        self.assertEqual(char_result_cluster, (Counter({
            "101": 1,
            "31": 1,
            "30": 1
        }), 54.0, 31, (30, 1), 40.70626487409524))
        #Test Counter behavior when querying using a value not in the keys
        self.assertEqual(sent_result_cluster[0]['0'], 0)

예제 #7

0

파일 보기

from flask import send_from_directory
from flask_cors import CORS
from libs import nlp_length_functions
from libs.histogram_comparisons import HistogramComparison
from libs.review_db import ReviewDB

# logging configurations
logging.basicConfig(format='%(filename)s:%(lineno)d %(message)s')
log = logging.getLogger(__name__)
log.setLevel('INFO')

# set up data access
CONFIG = json.load(open("./../config.json"))
data_folder = os.path.join(os.environ['DATA_DIR'], CONFIG['dataset'])
schema = json.load(open(os.path.join(data_folder, 'schema.json')))['schema']
database = ReviewDB(data_folder)

app = Flask(__name__,
            static_folder='./react-app/build/static',
            template_folder='./react-app/build')
cors = CORS(app)

app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 0
app.config['TEMPLATES_AUTO_RELOAD'] = True

histogram_comparison_utils = HistogramComparison()


# [Xiong] endpoint for sending static files
@app.route('/data/<path:subpath>')
def data(subpath):

예제 #8

0

파일 보기

파일: helper_function_tests.py 프로젝트: phymucs/teddy

 def test_ReviewDB_funcs(self):
     '''
         Test functions in ReviewDB using a toy data set
     '''
     db = ReviewDB('tests/test_data/')
     #Test empty
     empty1 = db.entity_db_dict['all'].decode_id([])
     empty2 = db.entity_db_dict['all'].fetch_reviews([])
     empty3 = db.entity_db_dict['all'].get_review_from_id([])
     self.assertEqual(empty1, None)
     self.assertEqual(empty2, None)
     self.assertEqual(empty3, None)
     #Testing db.decode_id()
     decode1 = db.entity_db_dict['all'].decode_id(0)
     self.assertEqual(decode1, [0])
     decode2 = db.entity_db_dict['all'].decode_id('1-2-1-0-0')
     self.assertEqual(decode2, [1, 2, 7])
     decode3 = db.entity_db_dict['all'].decode_id('1-2-1-0')
     self.assertEqual(decode3, [1, 2, 7])
     decode4 = db.entity_db_dict['all'].decode_id('1-2-1')
     self.assertEqual(decode4, [1, 2, 7])
     decode5 = db.entity_db_dict['all'].decode_id('1-2')
     self.assertEqual(decode5, [1, 2, 7])
     decode6 = db.entity_db_dict['all'].decode_id('4')
     self.assertEqual(decode6, [0, 5, 6, 8])
     #Testing db.fetch_reviews()
     fetch1 = db.entity_db_dict['all'].fetch_reviews([0])
     self.assertEqual(fetch1.iloc[[0]].author.values, 'guest1')
     fetch2 = db.entity_db_dict['all'].fetch_reviews([0, 3, 7])
     self.assertEqual(fetch2.iloc[[0]].author.values, 'guest1')
     self.assertEqual(fetch2.iloc[[1]].author.values, 'guest4')
     self.assertEqual(fetch2.iloc[[2]].author.values, 'guest8')
     #Testing db.get_review_from_id()
     review1 = db.entity_db_dict['all'].get_review_from_id(0)
     self.assertEqual(review1.iloc[[0]].author.values, 'guest1')
     review2 = db.entity_db_dict['all'].get_review_from_id('1-2-1-0-0')
     self.assertEqual(review2.iloc[[0]].author.values, 'guest2')
     self.assertEqual(review2.iloc[[1]].author.values, 'guest3')
     self.assertEqual(review2.iloc[[2]].author.values, 'guest8')
     review3 = db.entity_db_dict['all'].get_review_from_id('1-2-1-0')
     self.assertEqual(review3.iloc[[0]].author.values, 'guest2')
     self.assertEqual(review3.iloc[[1]].author.values, 'guest3')
     self.assertEqual(review3.iloc[[2]].author.values, 'guest8')
     review4 = db.entity_db_dict['all'].get_review_from_id('1-2-1')
     self.assertEqual(review4.iloc[[0]].author.values, 'guest2')
     self.assertEqual(review4.iloc[[1]].author.values, 'guest3')
     self.assertEqual(review4.iloc[[2]].author.values, 'guest8')
     review5 = db.entity_db_dict['all'].get_review_from_id('1-2')
     self.assertEqual(review5.iloc[[0]].author.values, 'guest2')
     self.assertEqual(review5.iloc[[1]].author.values, 'guest3')
     self.assertEqual(review5.iloc[[2]].author.values, 'guest8')
     review6 = db.entity_db_dict['all'].get_review_from_id('1')
     self.assertEqual(review6.iloc[[0]].author.values, 'guest2')
     self.assertEqual(review6.iloc[[1]].author.values, 'guest3')
     self.assertEqual(review6.iloc[[2]].author.values, 'guest8')
     review7 = db.entity_db_dict['all'].get_review_from_id('4')
     self.assertEqual(review7.iloc[[0]].author.values, 'guest1')
     self.assertEqual(review7.iloc[[1]].author.values, 'guest6')
     self.assertEqual(review7.iloc[[2]].author.values, 'guest7')
     self.assertEqual(review7.iloc[[3]].author.values, 'guest9')
     #Test access using "all" code for all reviews
     alltest = db.entity_db_dict['all'].decode_id("all")
     self.assertEqual(alltest, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

예제 #9

0

파일 보기

파일: app.py 프로젝트: kmkwon94/ainized-teddy

log = logging.getLogger(__name__)
log.setLevel('INFO')

CONFIG = json.load(open("./../config.json"))
data_folder = os.path.join(os.environ['DATA_DIR'], CONFIG['dataset'])
schema = json.load(open(os.path.join(data_folder, 'schema.json')))['schema']

app = Flask(__name__, static_folder = './react-app/build/static', template_folder = './react-app/build')
app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 0
app.config['TEMPLATES_AUTO_RELOAD'] = True

all_centroids_df = pd.read_csv(os.path.join(data_folder, 'centroids.csv'))
log.info('centroids loaded')
all_clusters_df = pd.read_csv(os.path.join(data_folder, 'clusters.csv'))
log.info('clusters loaded')
db_all = ReviewDB(all_clusters_df, all_centroids_df)
working_df = None

tfidf_model = TfidfModel.TFIDFModel(db_all)
tfidf_model_2g = TfidfModel.TFIDFModel(db_all, 2)

hotel_attr_path = lambda biz_id: os.path.join(data_folder, f'hotel-clusters/{biz_id}/attr.csv')
hotel_centroids_path = lambda biz_id: os.path.join(data_folder, f'hotel-clusters/{biz_id}/centroids.csv')

histogram_comparison_utils = HistogramComparison()

# [Xiong] setups for CORS access. I do this because I test the frontend on
# localhost:3000, while the server runs on localhost:5000. Eventually the CORS
# setup will make it possible for data server and front-end hosting server
# running on different machines --- which may not be necessary though
@app.after_request

예제 #10

0

파일 보기

 def test_TFIDF_init(self):
     db = ReviewDB('tests/test_data/')
     tfidf = TFIDFModel(db.entity_db_dict['all'])
     self.assertFalse(tfidf is None)

예제 #11

0

파일 보기

 def test_nlplength_init(self):
     db = ReviewDB('tests/test_data/')
     nlp = NLPLengths(db.entity_db_dict['all'])
     self.assertFalse(nlp is None)

예제 #12

0

파일 보기

 def test_tfidf_bigram(self):
     db = ReviewDB('tests/test_data/')
     tfidf = TFIDFModel(db.entity_db_dict['all'], ngramsize=2)
     tfidf_zero = tfidf.tfidf_score(0, ["wharf rooms"])
     # print(tfidf_zero)
     self.assertTrue(("wharf rooms") in tfidf_zero.keys())