Beispiel #1
0
def indexer():
    """
    Indexes all search criteria.
    """
    if not os.path.exists(INDEX_DIR):
        os.mkdir(INDEX_DIR)

    ix = index.create_in(INDEX_DIR, SCHEMA)
    ix = index.open_dir('index')
    writer = ix.writer()

    products = get_all_media()
    for product in products:
        try:
            s3_json = get_json_from_S3(product.title, product.asin)
        except:
            continue

        sentic_emotions = find_emotions_for_media(product.media_id)
        compound_emotions = s3_json['popular_compound_emotions']

        sentic_values_string = ' '.join([e for e in sentic_emotions])
        compound_emotions_string = ' '.join([e for e in compound_emotions])

        # trim comment dict
        comments = s3_json['comments']
        for comment in comments:
            comment.pop('vector_space')
            comment.pop('emotion_vector')
            comment['relevancy'] = float('%.2f' % comment['relevancy'])
            comment['sentic_emotions'] = [e.capitalize() for e in comment['sentic_emotions']]
            compound_emotions = []
            for e in comment['compound_emotions']:
                compound_emotions.append({
                    'compound_emotion': e['compound_emotion'].capitalize(),
                    'strength': e['strength'].capitalize()
                    })
            comment['compound_emotions'] = compound_emotions


        # write to indexer
        try:
            writer.add_document(
                product_name=product.title,
                sentic_emotions=sentic_values_string,
                compound_emotions=compound_emotions_string,
                comment_number=find_comment_count_for_media(product.media_id),
                image_url=s3_json['image_url'],
                sumy=s3_json['summary'],
                comments=json.dumps(comments),
            )
        except:
            print('ERROR with {}'.format(product.title))
            print(e)
        print('{} indexed'.format(product.title))

    writer.commit()
Beispiel #2
0
    def test_clean(self):
        """ Tests cleaning the database of comments and emotions """

        bat = queries.find_media_by_asin('0440419395')
        queries.insert_comment(bat.media_id, 11, 7, 0.9, 0.4, -0.5, 0.6, 0.3)

        queries.clean_media(bat.media_id)

        comments = queries.find_comments_for_media(bat.media_id)
        emotions = queries.find_emotions_for_media(bat.media_id)

        self.assertEqual(len(comments), 0)
        self.assertEqual(len(emotions), 0)
Beispiel #3
0
    def test_updating_media(self):
        """ Tests updating media and media emotions """

        bat = queries.find_media_by_asin('0440419395')

        # testing updating the emotions
        queries.insert_media_emotion(bat.media_id, 'cool')
        queries.insert_media_emotion(bat.media_id, 'dark')

        emotions = queries.find_emotions_for_media(bat.media_id)

        self.assertTrue('dark' in emotions, \
                        "Didn't find all emotions for media")
        self.assertTrue('cool' in emotions, \
                        "Didn't find all emotions for media")

        # test updating the last_updated column
        queries.update_media(bat.media_id, 20)

        self.assertEqual(bat.last_updated, 20, \
                         'Did not updated date properly')