Esempio n. 1
0
def indexer():
    """
    Indexes all search criteria.
    """
    if not os.path.exists(INDEX_DIR):
        os.mkdir(INDEX_DIR)

    ix = index.create_in(INDEX_DIR, SCHEMA)
    ix = index.open_dir('index')
    writer = ix.writer()

    products = get_all_media()
    for product in products:
        try:
            s3_json = get_json_from_S3(product.title, product.asin)
        except:
            continue

        sentic_emotions = find_emotions_for_media(product.media_id)
        compound_emotions = s3_json['popular_compound_emotions']

        sentic_values_string = ' '.join([e for e in sentic_emotions])
        compound_emotions_string = ' '.join([e for e in compound_emotions])

        # trim comment dict
        comments = s3_json['comments']
        for comment in comments:
            comment.pop('vector_space')
            comment.pop('emotion_vector')
            comment['relevancy'] = float('%.2f' % comment['relevancy'])
            comment['sentic_emotions'] = [e.capitalize() for e in comment['sentic_emotions']]
            compound_emotions = []
            for e in comment['compound_emotions']:
                compound_emotions.append({
                    'compound_emotion': e['compound_emotion'].capitalize(),
                    'strength': e['strength'].capitalize()
                    })
            comment['compound_emotions'] = compound_emotions


        # write to indexer
        try:
            writer.add_document(
                product_name=product.title,
                sentic_emotions=sentic_values_string,
                compound_emotions=compound_emotions_string,
                comment_number=find_comment_count_for_media(product.media_id),
                image_url=s3_json['image_url'],
                sumy=s3_json['summary'],
                comments=json.dumps(comments),
            )
        except:
            print('ERROR with {}'.format(product.title))
            print(e)
        print('{} indexed'.format(product.title))

    writer.commit()
Esempio n. 2
0
def get_asins():
    """
    gets all asins and calls update
    """
    media = queries.get_all_media()
    asins = []
    for item in media:
        asins.append(item.asin)
    for asin in asins:
        asins.pop()
        update_reviews(asins)