Exemple #1
0
def dump_lifetime_ratings(fawkes_config_file=constants.FAWKES_CONFIG_FILE):
    # Read the app-config.json file.
    fawkes_config = FawkesConfig(utils.open_json(fawkes_config_file))
    # For every app registered in app-config.json we
    for app_config_file in fawkes_config.apps:
        # Creating an AppConfig object
        app_config = AppConfig(utils.open_json(app_config_file))
        if app_config.elastic_config.lifetime_rating_index != None:
            time = datetime.strftime(datetime.now() - timedelta(1),
                                     constants.TIMESTAMP_FORMAT)

            playstore_rating = getPlayStoreLifetimeRating(app_config)
            appstore_rating = getAppStoreLifetimeRating(app_config)

            # Creating template for uploading lifetime rating
            playstore_doc = Review(
                {},
                timestamp=time,
                rating=playstore_rating,
                app_name=app_config.app.name,
                channel_name="playstore-lifetime",
                channel_type="playstore-lifetime",
                hash_id=utils.calculate_hash(app_config.app.name +
                                             ReviewChannelTypes.ANDROID))
            appstore_doc = Review(
                {},
                timestamp=time,
                rating=playstore_rating,
                app_name=app_config.app.name,
                channel_name="appstore-lifetime",
                channel_type="appstore-lifetime",
                hash_id=utils.calculate_hash(app_config.app.name +
                                             ReviewChannelTypes.IOS))

            # Deleting document to override
            elasticsearch.delete_document(
                app_config.elastic_config.elastic_search_url,
                app_config.elastic_config.lifetime_rating_index, "_doc",
                playstore_doc.hash_id)
            elasticsearch.delete_document(
                app_config.elastic_config.elastic_search_url,
                app_config.elastic_config.lifetime_rating_index, "_doc",
                appstore_doc.hash_id)

            # Uploading again
            elasticsearch.create_document(
                app_config.elastic_config.elastic_search_url,
                app_config.elastic_config.lifetime_rating_index, "_doc",
                playstore_doc.hash_id, playstore_doc)
            elasticsearch.create_document(
                app_config.elastic_config.elastic_search_url,
                app_config.elastic_config.lifetime_rating_index, "_doc",
                appstore_doc.hash_id, appstore_doc)
Exemple #2
0
def parse_json(raw_user_reviews_file_path, review_channel, app_config):
    """ Parses the JSON files to a Review object """

    reviews = utils.open_json(raw_user_reviews_file_path)
    parsed_reviews = []

    for review in reviews:
        # TODO: Conver this to a standard format like jsonpath.
        # Extract the message.
        message = utils.get_json_key_value(review, review_channel.message_key.split("."))
        # Extract the timestamp.
        timestamp = utils.get_json_key_value(review, review_channel.timestamp_key.split("."))
        # Extract the rating if present.
        rating = None
        if review_channel.rating_key != None:
            rating = utils.get_json_key_value(review, review_channel.rating_key.split("."))

        # Add the review object to the parsed reviews
        parsed_reviews.append(
            Review(
                review,
                message=message,
                timestamp=timestamp,
                rating=rating,
                app_name=app_config.app.name,
                channel_name=review_channel.channel_name,
                channel_type=review_channel.channel_type,
                review_timezone=review_channel.timezone,
                timestamp_format=review_channel.timestamp_format,
            )
        )

    return parsed_reviews
Exemple #3
0
def parse_json_lines(raw_user_reviews_file_path, review_channel, app_config):
    parsed_reviews = []
    with open(raw_user_reviews_file_path, "r") as raw_user_reviews_file_handle:
        # We read the file line by line as each line is a valid json string. https://jsonlines.org/
        for line in raw_user_reviews_file_handle:
            review = json.loads(line)
            # TODO: Conver this to a standard format like jsonpath.
            # Extract the message.
            message = utils.get_json_key_value(review, review_channel.message_key.split("."))
            # Extract the timestamp.
            timestamp = utils.get_json_key_value(review, review_channel.timestamp_key.split("."))
            # Extract the rating if present.
            rating = None
            if review_channel.rating_key != None:
                rating = utils.get_json_key_value(review, review_channel.rating_key.split("."))

            # Add the review object to the parsed reviews
            parsed_reviews.append(
                Review(
                    review,
                    message=message,
                    timestamp=timestamp,
                    rating=rating,
                    app_name=app_config.app.name,
                    channel_name=review_channel.channel_name,
                    channel_type=review_channel.channel_type,
                    review_timezone=review_channel.timezone,
                    timestamp_format=review_channel.timestamp_format,
                )
            )
    return parsed_reviews
Exemple #4
0
def parse_csv(raw_user_reviews_file_path, review_channel, app_config):
    """ Parses the CSV files to a Review object """

    with open(raw_user_reviews_file_path, "r") as file_handle:
        # Read all the reviews from the CSV file
        reviews = csv.reader(file_handle, delimiter=",")

        # We expect the first row to contain the column names.
        # TODO: We should change this to be taken from the configuration.
        # There might be usecases where column names are not present in the data.
        # People might want to indicate the message, timestamp keys using integer indices to the columns.
        json_keys_list = next(reviews)
        parsed_reviews = []

        # Iterate through all the reviews
        for row in reviews:
            review = {}
            timestamp = ""
            message = ""
            rating = None
            user_id = None

            # There are some csvs for which the last column is empty.
            # Hence we need to take the min below
            for i in range(min(len(json_keys_list), len(row))):
                if json_keys_list[i] == review_channel.timestamp_key:
                    # Storing the timestamp
                    timestamp = row[i]
                elif json_keys_list[i] == review_channel.message_key:
                    # Storing the message
                    message = row[i]
                elif json_keys_list[i] == review_channel.rating_key:
                    rating = row[i]
                elif json_keys_list[i] == review_channel.user_id_key:
                    user_id = row[i]
                # Storing the raw review as received from the source.
                review[json_keys_list[i]] = row[i]

            # Add the review object to the parsed reviews
            parsed_reviews.append(
                Review(
                    review,
                    message=message,
                    timestamp=timestamp,
                    rating=rating,
                    user_id=user_id,
                    app_name=app_config.app.name,
                    channel_name=review_channel.channel_name,
                    channel_type=review_channel.channel_type,
                    review_timezone=review_channel.timezone,
                    timestamp_format=review_channel.timestamp_format,
                    rating_max_value=review_channel.rating_max_value,
                    raw_review=review,
                ))

    return parsed_reviews
Exemple #5
0
def get_similar_reviews_for_app(app_config_file, query, num_results):
    # Creating an AppConfig object
    app_config = AppConfig(utils.open_json(app_config_file))

    # Log the current operation which is being performed.
    logging.info(logs.QUERY_START, FawkesActions.QUERY_SIMILAR_REVIEWS, "ALL",
                 app_config.app.name)

    # Path where the user reviews were stored after parsing.
    processed_user_reviews_file_path = constants.PROCESSED_USER_REVIEWS_FILE_PATH.format(
        base_folder=app_config.fawkes_internal_config.data.base_folder,
        dir_name=app_config.fawkes_internal_config.data.processed_data_folder,
        app_name=app_config.app.name,
    )

    # Loading the reviews
    reviews = utils.open_json(processed_user_reviews_file_path)

    # Converting the json object to Review object
    reviews = [Review.from_review_json(review) for review in reviews]

    # Filtering out reviews which are not applicable.
    reviews = filter_utils.filter_reviews_by_time(
        filter_utils.filter_reviews_by_channel(
            reviews,
            filter_utils.filter_disabled_review_channels(app_config),
        ),
        datetime.now(timezone.utc) -
        timedelta(days=app_config.algorithm_config.algorithm_days_filter))

    similar_reviews = get_similar_reviews(reviews, query, num_results)

    # Log the current operation which is being performed.
    logging.info(logs.QUERY_END, FawkesActions.QUERY_SIMILAR_REVIEWS, "ALL",
                 app_config.app.name)

    # Create the intermediate folders
    query_results_file_path = constants.QUERY_RESULTS_FILE_PATH.format(
        base_folder=app_config.fawkes_internal_config.data.base_folder,
        dir_name=app_config.fawkes_internal_config.data.query_folder,
        app_name=app_config.app.name,
        query_hash=utils.calculate_hash(query))

    dir_name = os.path.dirname(query_results_file_path)
    pathlib.Path(dir_name).mkdir(parents=True, exist_ok=True)

    utils.dump_json(
        [{
            "score": score,
            "review": review.to_dict(),
        } for score, review in similar_reviews],
        query_results_file_path,
    )
Exemple #6
0
def send_reviews_to_slack(fawkes_config_file=constants.FAWKES_CONFIG_FILE):
    ## Read the app-config.json file.
    fawkes_config = FawkesConfig(utils.open_json(fawkes_config_file))
    # For every app registered in app-config.json we
    for app_config_file in fawkes_config.apps:
        # Creating an AppConfig object
        app_config = AppConfig(utils.open_json(app_config_file))

        # Log the current operation which is being performed.
        logging.info(logs.OPERATION, FawkesActions.PUSH_SLACK, "ALL",
                     app_config.app.name)

        # Create the intermediate folders
        processed_user_reviews_file_path = constants.PROCESSED_USER_REVIEWS_FILE_PATH.format(
            base_folder=app_config.fawkes_internal_config.data.base_folder,
            dir_name=app_config.fawkes_internal_config.data.
            processed_data_folder,
            app_name=app_config.app.name,
        )

        # Loading the reviews
        reviews = utils.open_json(processed_user_reviews_file_path)

        # Converting the json object to Review object
        reviews = [Review.from_review_json(review) for review in reviews]

        # Filtering out reviews which are not applicable.
        reviews = filter_utils.filter_reviews_by_time(
            filter_utils.filter_reviews_by_channel(
                reviews,
                filter_utils.filter_disabled_review_channels(app_config),
            ),
            datetime.now(timezone.utc) -
            timedelta(minutes=app_config.slack_config.slack_run_interval))

        # Log the number of reviews we got.
        logging.info(logs.NUM_REVIEWS, len(reviews), "ALL",
                     app_config.app.name)

        reviews = sorted(
            reviews,
            key=lambda review: review.derived_insight.sentiment["compound"],
            reverse=True)

        for review in reviews:
            send_review_to_slack(app_config.slack_config.slack_hook_url,
                                 app_config.slack_config.slack_channel, review,
                                 app_config)
Exemple #7
0
def generate_email_summary(fawkes_config_file=constants.FAWKES_CONFIG_FILE):
    # Read the app-config.json file.
    fawkes_config = FawkesConfig(utils.open_json(fawkes_config_file))
    # For every app registered in app-config.json we
    for app_config_file in fawkes_config.apps:
        # Creating an AppConfig object
        app_config = AppConfig(utils.open_json(app_config_file))
        # Path where the user reviews were stored after parsing.
        processed_user_reviews_file_path = constants.PROCESSED_USER_REVIEWS_FILE_PATH.format(
            base_folder=app_config.fawkes_internal_config.data.base_folder,
            dir_name=app_config.fawkes_internal_config.data.
            processed_data_folder,
            app_name=app_config.app.name,
        )

        # Loading the reviews
        reviews = utils.open_json(processed_user_reviews_file_path)

        # Converting the json object to Review object
        reviews = [Review.from_review_json(review) for review in reviews]

        # Filtering out reviews which are not applicable.
        reviews = filter_utils.filter_reviews_by_time(
            filter_utils.filter_reviews_by_channel(
                reviews,
                filter_utils.filter_disabled_review_channels(app_config),
            ),
            datetime.now(timezone.utc) -
            timedelta(days=app_config.email_config.email_time_span))

        # We get all the data.
        template_data = {
            "numberOfReview": queries.numberOfReview(reviews),
            "topCategory": queries.topCategory(reviews),
            "numFeatureReq": queries.numFeatureReq(reviews),
            "numBugsReported": queries.numBugsReported(reviews),
            "appStoreRating":
            "{0:.2f}".format(queries.appStoreRating(reviews)),
            "playStoreRating":
            "{0:.2f}".format(queries.playStoreRating(reviews)),
            "happyReview1": queries.happyReview1(reviews),
            "unhappyReview1": queries.unhappyReview1(reviews),
            "positiveReview": queries.positiveReview(reviews),
            "neutralReview": queries.neutralReview(reviews),
            "negativeReview": queries.negativeReview(reviews),
            "topCategoryNumberOfReview":
            queries.topCategoryNumberOfReview(reviews),
            "fromDate": queries.fromDate(reviews),
            "toDate": queries.toDate(reviews),
            "appLogo": app_config.app.logo,
            "timeSpanWords": app_config.email_config.email_time_span,
            "kibanaDashboardURL": app_config.elastic_config.kibana_url
        }

        # Get the initial HTML from the template file.
        formatted_html = email_utils.generate_email(
            app_config.email_config.email_template_file, template_data)

        # Path where the generated email in html format will be stored
        email_summary_generated_file_path = constants.EMAIL_SUMMARY_GENERATED_FILE_PATH.format(
            base_folder=app_config.fawkes_internal_config.data.base_folder,
            dir_name=app_config.fawkes_internal_config.data.emails_folder,
            app_name=app_config.app.name,
        )

        dir_name = os.path.dirname(email_summary_generated_file_path)
        pathlib.Path(dir_name).mkdir(parents=True, exist_ok=True)

        with open(email_summary_generated_file_path, "w") as email_file_handle:
            email_file_handle.write(formatted_html)
Exemple #8
0
def run_algo(fawkes_config_file=constants.FAWKES_CONFIG_FILE):
    # Read the app-config.json file.
    fawkes_config = FawkesConfig(utils.open_json(fawkes_config_file))
    # For every app registered in app-config.json we
    for app_config_file in fawkes_config.apps:
        # Creating an AppConfig object
        app_config = AppConfig(utils.open_json(app_config_file))
        # Path where the user reviews were stored after parsing.
        parsed_user_reviews_file_path = constants.PARSED_USER_REVIEWS_FILE_PATH.format(
            base_folder=app_config.fawkes_internal_config.data.base_folder,
            dir_name=app_config.fawkes_internal_config.data.parsed_data_folder,
            app_name=app_config.app.name,
        )

        # Loading the reviews
        reviews = utils.open_json(parsed_user_reviews_file_path)

        # Converting the json object to Review object
        reviews = [Review.from_review_json(review) for review in reviews]

        # Filtering out reviews which are not applicable.
        reviews = filter_utils.filter_reviews_by_time(
            filter_utils.filter_reviews_by_channel(
                reviews,
                filter_utils.filter_disabled_review_channels(app_config),
            ),
            datetime.now(timezone.utc) -
            timedelta(days=app_config.algorithm_config.algorithm_days_filter))

        # Number of process to make
        num_processes = min(constants.PROCESS_NUMBER, os.cpu_count())

        if constants.CIRCLECI in os.environ:
            num_processes = 2

        # Adding sentiment
        with Pool(num_processes) as process:
            reviews = process.map(add_review_sentiment_score, reviews)

        if app_config.algorithm_config.categorization_algorithm != None and app_config.algorithm_config.category_keywords_weights_file != None:
            # We read from the topic file first
            topics = {}
            topics = utils.open_json(
                app_config.algorithm_config.category_keywords_weights_file)

            # Adding text-match categorization
            with Pool(num_processes) as process:
                reviews = process.map(
                    partial(text_match_categortization,
                            app_config=app_config,
                            topics=topics), reviews)

        if app_config.algorithm_config.bug_feature_keywords_weights_file != None:
            # We read from the topic file first
            topics = {}
            topics = utils.open_json(
                app_config.algorithm_config.bug_feature_keywords_weights_file)

            # Adding bug/feature classification
            with Pool(num_processes) as process:
                reviews = process.map(
                    partial(bug_feature_classification, topics=topics),
                    reviews)

        if app_config.algorithm_config.categorization_algorithm == CategorizationAlgorithms.LSTM_CLASSIFICATION:
            # Load the TensorFlow model
            model = tf.keras.models.load_model(
                constants.LSTM_CATEGORY_MODEL_FILE_PATH.format(
                    base_folder=app_config.fawkes_internal_config.data.
                    base_folder,
                    dir_name=app_config.fawkes_internal_config.data.
                    models_folder,
                    app_name=app_config.app.name,
                ))

            # Load the article tokenizer file
            tokenizer_json = utils.open_json(
                constants.LSTM_CATEGORY_ARTICLE_TOKENIZER_FILE_PATH.format(
                    base_folder=app_config.fawkes_internal_config.data.
                    base_folder,
                    dir_name=app_config.fawkes_internal_config.data.
                    models_folder,
                    app_name=app_config.app.name,
                ), )
            article_tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(
                tokenizer_json)

            # Load the label tokenizer file
            tokenizer_json = utils.open_json(
                constants.LSTM_CATEGORY_LABEL_TOKENIZER_FILE_PATH.format(
                    base_folder=app_config.fawkes_internal_config.data.
                    base_folder,
                    dir_name=app_config.fawkes_internal_config.data.
                    models_folder,
                    app_name=app_config.app.name,
                ), )
            label_tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(
                tokenizer_json)

            cleaned_labels = {}
            for review in reviews:
                label = review.derived_insight.category
                cleaned_label = re.sub(r'\W+', '', label)
                cleaned_label = cleaned_label.lower()
                cleaned_labels[cleaned_label] = label

            # Adding LSTM categorization
            reviews = lstm_classification(reviews, model, article_tokenizer,
                                          label_tokenizer, cleaned_labels)

        # Create the intermediate folders
        processed_user_reviews_file_path = constants.PROCESSED_USER_REVIEWS_FILE_PATH.format(
            base_folder=app_config.fawkes_internal_config.data.base_folder,
            dir_name=app_config.fawkes_internal_config.data.
            processed_data_folder,
            app_name=app_config.app.name,
        )

        dir_name = os.path.dirname(processed_user_reviews_file_path)
        pathlib.Path(dir_name).mkdir(parents=True, exist_ok=True)

        utils.dump_json(
            [review.to_dict() for review in reviews],
            processed_user_reviews_file_path,
        )
def push_data_to_elasticsearch(
        fawkes_config_file=constants.FAWKES_CONFIG_FILE):
    # Read the app-config.json file.
    fawkes_config = FawkesConfig(utils.open_json(fawkes_config_file))
    # For every app registered in app-config.json we
    for app_config_file in fawkes_config.apps:
        # Creating an AppConfig object
        app_config = AppConfig(utils.open_json(app_config_file))
        # Log the current operation which is being performed.
        logging.info(logs.OPERATION, FawkesActions.PUSH_ELASTICSEARCH, "ALL",
                     app_config.app.name)

        # Path where the user reviews were stored after parsing.
        processed_user_reviews_file_path = constants.PROCESSED_USER_REVIEWS_FILE_PATH.format(
            base_folder=app_config.fawkes_internal_config.data.base_folder,
            dir_name=app_config.fawkes_internal_config.data.
            processed_data_folder,
            app_name=app_config.app.name,
        )

        # Loading the reviews
        reviews = utils.open_json(processed_user_reviews_file_path)

        # Converting the json object to Review object
        reviews = [Review.from_review_json(review) for review in reviews]

        # Filtering out reviews which are not applicable.
        reviews = filter_utils.filter_reviews_by_time(
            filter_utils.filter_reviews_by_channel(
                reviews,
                filter_utils.filter_disabled_review_channels(app_config),
            ),
            datetime.now(timezone.utc) - timedelta(
                days=app_config.elastic_config.elastic_search_days_filter))

        # Log the number of reviews we got.
        logging.info(logs.NUM_REVIEWS, len(reviews), "ALL",
                     app_config.app.name)

        # We shuffle the reviews. This is because of how elastic search.
        random.shuffle(reviews)

        # We first list out all the indices
        indices = get_indices(app_config.elastic_config.elastic_search_url)
        if app_config.elastic_config.index not in indices:
            # Create a new index
            create_index(app_config.elastic_config.elastic_search_url,
                         app_config.elastic_config.index)

        # Bulk push the data
        i = 0
        while i * constants.BULK_UPLOAD_SIZE < len(reviews):
            response = bulk_push_to_elastic(
                app_config.elastic_config.elastic_search_url,
                app_config.elastic_config.index,
                reviews[i * constants.BULK_UPLOAD_SIZE:min(
                    (i + 1) * constants.BULK_UPLOAD_SIZE, len(reviews))])
            if response.status_code != 200:
                print(
                    "[Error] push_data_to_elasticsearch :: Got status code : ",
                    response.status_code)
                print("[Error] push_data_to_elasticsearch :: Response is : ",
                      response.text)
            i += 1
Exemple #10
0
def run_algo(fawkes_config_file=constants.FAWKES_CONFIG_FILE):
    # Read the app-config.json file.
    fawkes_config = FawkesConfig(utils.open_json(fawkes_config_file))
    # For every app registered in app-config.json we
    for app_config_file in fawkes_config.apps:
        # Creating an AppConfig object
        app_config = AppConfig(utils.open_json(app_config_file))
        # Log the current operation which is being performed.
        logging.info(logs.OPERATION, FawkesActions.RUN_ALGO, "ALL",
                     app_config.app.name)

        # Path where the user reviews were stored after parsing.
        parsed_user_reviews_file_path = constants.PARSED_USER_REVIEWS_FILE_PATH.format(
            base_folder=app_config.fawkes_internal_config.data.base_folder,
            dir_name=app_config.fawkes_internal_config.data.parsed_data_folder,
            app_name=app_config.app.name,
        )

        # Loading the reviews
        reviews = utils.open_json(parsed_user_reviews_file_path)

        # Converting the json object to Review object
        reviews = [Review.from_review_json(review) for review in reviews]

        # Filtering out reviews which are not applicable.
        reviews = filter_utils.filter_reviews_by_time(
            filter_utils.filter_reviews_by_channel(
                reviews,
                filter_utils.filter_disabled_review_channels(app_config),
            ),
            datetime.now(timezone.utc) -
            timedelta(days=app_config.algorithm_config.algorithm_days_filter))

        # Log the number of reviews we got.
        logging.info(logs.NUM_REVIEWS, len(reviews), "ALL",
                     app_config.app.name)

        # Number of process to make
        num_processes = min(constants.PROCESS_NUMBER, os.cpu_count())

        if constants.CIRCLECI in os.environ:
            num_processes = 2

        # Running sentiment analysis
        reviews = run_sentiment_analysis(reviews, app_config, num_processes)

        # Running categorization
        reviews = run_categorization(reviews, app_config, num_processes)

        # Running bug/feature categorizatio
        reviews = run_bug_feature_categorization(reviews, app_config,
                                                 num_processes)

        # Running the message encoding
        reviews = run_review_text_encoding(reviews, app_config, num_processes)

        # Create the intermediate folders
        processed_user_reviews_file_path = constants.PROCESSED_USER_REVIEWS_FILE_PATH.format(
            base_folder=app_config.fawkes_internal_config.data.base_folder,
            dir_name=app_config.fawkes_internal_config.data.
            processed_data_folder,
            app_name=app_config.app.name,
        )

        dir_name = os.path.dirname(processed_user_reviews_file_path)
        pathlib.Path(dir_name).mkdir(parents=True, exist_ok=True)

        utils.dump_json(
            [review.to_dict() for review in reviews],
            processed_user_reviews_file_path,
        )
def generate_email_summary_detailed(
        fawkes_config_file=constants.FAWKES_CONFIG_FILE):
    # Read the app-config.json file.
    fawkes_config = FawkesConfig(utils.open_json(fawkes_config_file))
    # For every app registered in app-config.json we
    for app_config_file in fawkes_config.apps:
        # Creating an AppConfig object
        app_config = AppConfig(utils.open_json(app_config_file))
        # Path where the user reviews were stored after parsing.
        processed_user_reviews_file_path = constants.PROCESSED_USER_REVIEWS_FILE_PATH.format(
            base_folder=app_config.fawkes_internal_config.data.base_folder,
            dir_name=app_config.fawkes_internal_config.data.
            processed_data_folder,
            app_name=app_config.app.name,
        )

        # Loading the reviews
        reviews = utils.open_json(processed_user_reviews_file_path)

        # Converting the json object to Review object
        reviews = [Review.from_review_json(review) for review in reviews]

        # Filtering out reviews which are not applicable.
        reviews = filter_utils.filter_reviews_by_time(
            filter_utils.filter_reviews_by_channel(
                reviews,
                filter_utils.filter_disabled_review_channels(app_config),
            ),
            datetime.now(timezone.utc) -
            timedelta(days=app_config.email_config.email_time_span))
        if len(reviews) == 0:
            continue

        review_by_category = queries.getVocByCategory(reviews)

        top_categories = sorted([(len(review_by_category[key]), key)
                                 for key in review_by_category],
                                reverse=True)

        top_categories = top_categories[:5]

        max_sentiment_per_category = {}

        for category in top_categories:
            max_sentiment_per_category[category[1]] = sorted(
                review_by_category[category[1]],
                key=functools.cmp_to_key(compare_review_by_category_score))[0]

        reviewDivHTML = ""

        for category in top_categories:
            if category[1] == constants.CATEGORY_NOT_FOUND:
                continue
            template_data = {
                "catetgoryName": category[1],
                "upOrDown": "down",
                "upDownPercentage": 19,
                "reviewText": max_sentiment_per_category[category[1]].message,
                "usersTalking": len(review_by_category[category[1]])
            }

            formatted_html = email_utils.generate_email(
                constants.WEEKLY_EMAIL_DETAILED_REVIEW_BLOCK_TEMPLATE,
                template_data)

            reviewDivHTML += formatted_html

        # We get all the data.
        template_data = {
            "appStoreRating":
            "{0:.2f}".format(queries.appStoreRating(reviews)),
            "playStoreRating":
            "{0:.2f}".format(queries.playStoreRating(reviews)),
            "positiveReview":
            queries.positiveReview(reviews),
            "neutralReview":
            queries.neutralReview(reviews),
            "negativeReview":
            queries.negativeReview(reviews),
            "fromDate":
            queries.fromDate(reviews),
            "toDate":
            queries.toDate(reviews),
            "appLogo":
            app_config.app.logo,
            "timeSpanWords":
            app_config.email_config.email_time_span_in_words,
            "reviewBlock":
            reviewDivHTML,
            "appStoreNumberOfReview":
            queries.appStoreNumberReview(reviews),
            "playStoreNumberOfReview":
            queries.playStoreNumberReview(reviews),
            "appStoreLifetimeRating":
            lifetime.getAppStoreLifetimeRating(app_config),
            "playStoreLifetimeRating":
            lifetime.getPlayStoreLifetimeRating(app_config),
            "kibanaDashboardURL":
            app_config.elastic_config.kibana_url
        }

        # We finally send the email
        formatted_html = email_utils.generate_email(
            app_config.email_config.email_template_file, template_data)

        # Path where the generated email in html format will be stored
        email_summary_generated_file_path = constants.EMAIL_SUMMARY_GENERATED_FILE_PATH.format(
            base_folder=app_config.fawkes_internal_config.data.base_folder,
            dir_name=app_config.fawkes_internal_config.data.emails_folder,
            app_name=app_config.app.name,
        )

        dir_name = os.path.dirname(email_summary_generated_file_path)
        pathlib.Path(dir_name).mkdir(parents=True, exist_ok=True)

        with open(email_summary_generated_file_path, "w") as email_file_handle:
            email_file_handle.write(formatted_html)
Exemple #12
0
def generate_summary(fawkes_config_file=constants.FAWKES_CONFIG_FILE):
    """
        @param{string}: fawkes_config_file - config file path
        @returns{map<string,list<string>>}: summarized_reviews - summarized reviews per category

        Main function to create a summary of reviews
            - queries to get reviews
            - preprocess reviews based on each category
            - cluster similar reviews
            - rank and summarize amongst cluster to provide a summarize
    """
    # Read the app-config.json file.
    fawkes_config = FawkesConfig(utils.open_json(fawkes_config_file))
    # For every app registered in app-config.json we-
    for app_config_file in fawkes_config.apps:
        # Creating an AppConfig object
        app_config = AppConfig(utils.open_json(app_config_file))
        # Path where the user reviews were stored after parsing.
        processed_user_reviews_file_path = constants.PROCESSED_USER_REVIEWS_FILE_PATH.format(
            base_folder=app_config.fawkes_internal_config.data.base_folder,
            dir_name=app_config.fawkes_internal_config.data.
            processed_data_folder,
            app_name=app_config.app.name,
        )

        # Loading the reviews
        reviews = utils.open_json(processed_user_reviews_file_path)
        # Converting the json object to Review object
        reviews = [Review.from_review_json(review) for review in reviews]

        reviews = queries.getVocByCategory(reviews)
        summarized_reviews = {}

        # For each category, generate a summary
        for category in reviews:
            summarized_category_review = []

            # get reviews per category
            categorized_review = reviews[category]

            # Preprocess reviews
            sentences = preprocess_review(categorized_review)
            # number of sentences in a category should be atleast greater than
            # the number of clusters
            if (len(sentences) >
                    app_config.algorithm_config.summarization.num_clusters -
                    1):
                clustered_sentences = k_means_classification(
                    sentences,
                    app_config.algorithm_config.summarization.num_clusters)
                for cluster in clustered_sentences.values():
                    if len(cluster) < constants.minimum_reviews_per_cluster:
                        continue
                    text = ". ".join(cluster)
                    gen_summary = summarize_text(
                        text,
                        app_config.algorithm_config.summarization.
                        summary_length_per_cluster,
                    )
                    summarized_category_review.append(gen_summary)
            else:
                logging.info(logs.INSUFFICIENT_DATA, category)
            summarized_reviews[category] = summarized_category_review

        query_results_file_path = constants.REVIEW_SUMMARY_RESULTS_FILE_PATH.format(
            base_folder=app_config.fawkes_internal_config.data.base_folder,
            dir_name=app_config.fawkes_internal_config.data.query_folder,
            app_name=app_config.app.name,
        )

        dir_name = os.path.dirname(query_results_file_path)
        pathlib.Path(dir_name).mkdir(parents=True, exist_ok=True)

        utils.dump_json([{
            "summarized_reviews": summarized_reviews
        }], query_results_file_path)

        return summarized_reviews