Beispiel #1
0
    def lookup(self, config: PlayStoreConfig,
               **kwargs) -> List[AnalyzerRequest]:
        source_responses: List[AnalyzerRequest] = []
        # Refer https://github.com/googleapis/google-api-python-client/blob/master/docs/start.md
        with build(serviceName='androidpublisher',
                   version='v3',
                   credentials=config.get_google_credentials(),
                   developerKey=config.cred_info.developer_key) as service:
            reviews = service.reviews()
            pagination_token: Optional[str] = None

            # Get data from state
            id: str = kwargs.get("id", None)
            state: Dict[
                str,
                Any] = None if id is None else self.store.get_source_state(id)
            start_index: Optional[
                str] = config.start_index or None if state is None else state.get(
                    "start_index", None)
            update_state: bool = True if id else False
            state = state or dict()
            review_id = start_index

            while True:
                # Refer https://googleapis.github.io/google-api-python-client/docs/dyn/androidpublisher_v3.reviews.html#list
                responses = reviews.list(package_name=config.package_name,
                                         max_results=config.max_results,
                                         start_index=start_index,
                                         token=pagination_token)

                if "reviews" in responses:
                    reviews = responses["responses"]
                    for review in reviews:
                        if "comments" not in review:
                            continue

                        review_id = review["reviewId"]

                        # Currently only one user comment is supported
                        text = review["comments"][0]["userComment"]["text"]
                        source_responses.append(
                            AnalyzerRequest(processed_text=text,
                                            meta=review,
                                            source_name=self.NAME))

                pagination_token = None
                if "tokenPagination" in responses:
                    if "nextPageToken" in responses["tokenPagination"]:
                        pagination_token = responses["tokenPagination"][
                            "nextPageToken"]

                if pagination_token is None:
                    break

        if update_state:
            state["start_index"] = review_id
            self.store.update_source_state(workflow_id=id, state=state)

        return source_responses
Beispiel #2
0
    def _get_source_output(self, tweet: Dict[str, Any]):
        tweet_url = TwitterSource.get_tweet_url(tweet["text"])
        processed_text = TwitterSource.clean_tweet_text(tweet["text"])

        tweet["tweet_url"] = tweet_url
        return AnalyzerRequest(processed_text=processed_text,
                               meta=tweet,
                               source_name=self.NAME)
Beispiel #3
0
def test_text_analyzer_with_vader(text_analyzer_with_vader):
    source_responses = [AnalyzerRequest(text, "sample") for text in TEXTS]
    analyzer_responses = text_analyzer_with_vader.analyze_input(
        source_response_list=source_responses,
        analyzer_config=AnalyzerConfig(use_sentiment_model=False))

    assert len(analyzer_responses) == len(TEXTS)

    for analyzer_response in analyzer_responses:
        assert len(analyzer_response.classification) == 2
        assert "positive" in analyzer_response.classification
        assert "negative" in analyzer_response.classification
    def lookup(self, config: AppStoreScrapperConfig, **kwargs) -> List[AnalyzerRequest]:
        source_responses: List[AnalyzerRequest] = []

        # Get data from state
        id: str = kwargs.get("id", None)
        state: Dict[str, Any] = None if id is None else self.store.get_source_state(id)
        update_state: bool = True if id else False
        state = state or dict()

        for scrapper in config.get_review_readers():
            country_stat: Dict[str, Any] = state.get(scrapper.country, dict())
            lookup_period: str = country_stat.get(
                "since_time",
                config.lookup_period
            )
            lookup_period = lookup_period or "1h"
            if len(lookup_period) <= 5:
                since_time = convert_utc_time(lookup_period)
            else:
                since_time = datetime.strptime(lookup_period, DATETIME_STRING_PATTERN)

            last_since_time: datetime = since_time

            since_id: Optional[str] = country_stat.get("since_id", None)
            last_index = since_id
            state[scrapper.country] = country_stat

            reviews = scrapper.fetch_reviews(
                after=since_time,
                since_id=since_id
            )
            reviews = reviews or []

            for review in reviews:
                source_responses.append(AnalyzerRequest(
                        processed_text=f"{review.title}. {review.content}",
                        meta=review.__dict__,
                        source_name=self.NAME
                    )
                )

                if last_since_time is None or last_since_time < review.date:
                    last_since_time = review.date
                if last_index is None or last_index < review.id:
                    last_index = review.id

            country_stat["since_time"] = last_since_time.strftime(DATETIME_STRING_PATTERN)
            country_stat["since_id"] = last_index

        if update_state:
            self.store.update_source_state(workflow_id=id, state=state)

        return source_responses
Beispiel #5
0
def test_text_analyzer_with_model(text_analyzer_with_model):
    labels = ["facility", "food", "comfortable", "positive", "negative"]

    source_responses = [AnalyzerRequest(text, "sample") for text in TEXTS]
    analyzer_responses = text_analyzer_with_model.analyze_input(
        source_response_list=source_responses,
        analyzer_config=AnalyzerConfig(labels=labels,
                                       use_sentiment_model=True))

    assert len(analyzer_responses) == len(TEXTS)

    for analyzer_response in analyzer_responses:
        assert len(analyzer_response.classification) == len(labels)
        assert "positive" in analyzer_response.classification
        assert "negative" in analyzer_response.classification
Beispiel #6
0
def classify_texts(request: ClassifierRequest):
    with rate_limiter.run():
        analyzer_requests: List[AnalyzerRequest] = [
            AnalyzerRequest(processed_text=text, source_name="API")
            for text in request.texts
        ]
        analyzer_responses = text_analyzer.analyze_input(
            source_response_list=analyzer_requests,
            analyzer_config=request.analyzer_config,
        )

        response = []
        for analyzer_response in analyzer_responses:
            response.append(analyzer_response.classification)

        return ClassifierResponse(data=response)
Beispiel #7
0
    def lookup(self, config: PlayStoreConfig) -> List[AnalyzerRequest]:
        source_responses: List[AnalyzerRequest] = []
        # Refer https://github.com/googleapis/google-api-python-client/blob/master/docs/start.md
        with build(serviceName='androidpublisher',
                   version='v3',
                   credentials=config.get_google_credentials(),
                   developerKey=config.cred_info.developer_key) as service:
            reviews = service.reviews()
            pagination_token: Optional[str] = None

            while True:
                # Refer https://googleapis.github.io/google-api-python-client/docs/dyn/androidpublisher_v3.reviews.html#list
                responses = reviews.list(package_name=config.package_name,
                                         max_results=config.max_results,
                                         start_index=config.start_index,
                                         token=pagination_token)

                if "reviews" in responses:
                    reviews = responses["responses"]
                    for review in reviews:
                        if "comments" not in review:
                            continue
                        # Currently only one user comment is supported
                        text = review["comments"][0]["userComment"]["text"]
                        source_responses.append(
                            AnalyzerRequest(processed_text=text,
                                            meta=review,
                                            source_name=self.NAME))

                pagination_token = None
                if "tokenPagination" in responses:
                    if "nextPageToken" in responses["tokenPagination"]:
                        pagination_token = responses["tokenPagination"][
                            "nextPageToken"]

                if pagination_token is None:
                    break

        return source_responses