Esempio n. 1
0
def get_app_store(app_id, n_reviews=200, filter="all"):
    """get play store reviews for app"""
    data = AppStore(app_name=app_id, country='us')
    data.review(how_many=n_reviews)
    reviews = data.reviews
    reviews_df = pd.DataFrame(reviews)
    if filter == "all":
        return reviews_df
    else:
        return reviews_df.query('rating' + filter)
Esempio n. 2
0
def scrape_data():
    app_info = AppStore(
        app_name=input("Enter name of the app in the App Store: "),
        country="in")
    app_info.review(how_many=3000)
    reviews = list(
        map(lambda review: normalize_review(review), app_info.reviews))
    f = open("../app-store-reviews.json", "w")
    f.write(json.dumps(reviews))
    f.close()
def Crawl_appsList(l):
    dataTable = []
    for i, app in enumerate(l):
        try:
            application = AppStore(country="eg", app_name=app)
            application.review()
            tupl = Crawl_app(application, application.reviews)
            dataTable.append(tupl)
        except:
            print("Couldn't found App: ", app, "\nCheck the application name again.")
            continue

    return dataTable
Esempio n. 4
0
class TestEmptyApp:
    country = "Nz"
    app_name = "Cool App"
    app_id = 7357
    app = AppStore(country=country, app_name=app_name, app_id=app_id)

    def test_init_attributes(self):
        assert self.app.country == self.country.lower()
        assert self.app.app_name == self.app_name.lower().replace(" ", "-")
        assert self.app.app_id == self.app_id
        assert self.app.reviews == []
        assert self.app.reviews_count == 0

    def test_init_url(self):
        base_landing_url = "https://apps.apple.com"
        landing_path = f"{self.app.country}/app/{self.app.app_name}/id{self.app.app_id}"
        landing_url = f"{base_landing_url}/{landing_path}"
        assert self.app.url == landing_url

    def test_repr(self):
        assert self.app.__repr__() == (
            f"AppStore(country='{self.app.country}', "
            f"app_name='{self.app.app_name}', "
            f"app_id={self.app.app_id})")

    def test_str(self, capsys):
        print(self.app)
        captured = capsys.readouterr()
        assert captured.out == (f"     Country | {self.app.country}\n"
                                f"        Name | {self.app.app_name}\n"
                                f"          ID | {self.app.app_id}\n"
                                f"         URL | {self.app.url}\n"
                                f"Review count | {self.app.reviews_count}\n")
Esempio n. 5
0
class TestAppStore:
    app = AppStore(country="us", app_name="minecraft")

    def test_search_id(self):
        self.app.search_id()
        assert self.app.app_id == 479516143

    def test_review(self):
        self.app.review(how_many=3)
        assert len(self.app.reviews) == 20
        assert len(self.app.reviews) == self.app.reviews_count

    def test_review_continuation(self):
        assert len(self.app.reviews) == 20
        self.app.review(how_many=7)
        assert len(self.app.reviews) == 40

    def test_reviews_for_duplicates(self):
        for i in range(len(self.app.reviews) - 1):
            assert self.app.reviews[i] != self.app.reviews[i + 1]

    def test_reviews_for_after(self):
        t1 = datetime.now()
        t0 = t1 - timedelta(weeks=26)
        self.app.reviews = []
        self.app.review(how_many=3, after=t0)
        for review in self.app.reviews:
            assert review["date"] >= t0 and review["date"] < t1

    def test_reviews_for_sleep(self):
        t_start = datetime.now()
        self.app.review(how_many=40, sleep=2)
        t_diff = datetime.now() - t_start
        assert t_diff.seconds >= 2
Esempio n. 6
0
def app_store_scraper(app_name):
    app = AppStore(country="dk", app_name=app_name)
    app.review(how_many=1000)

    # Creates or updates the CSV with this name
    with open(r'appstore_reviews.csv', 'w', newline='') as file:
        fieldnames = ['username', 'review', 'score']
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        writer.writeheader()

        for review in app.reviews:
            score = review['rating']
            username = review['userName']
            review = review['review']

            try:
                print(f'{username} says: {review}')
                writer.writerow({'username': username, 'review': review, 'score': score})
            except:
                print('Failed to add entry XXX')
import csv

from app_store_scraper import AppStore

# Change this to the name of the csv file containing your app metadata
# requires "url_name" and "id" fields
IN_FILE = 'as_eating_disorder_apps.csv'

# read app name and id from csv file
apps = []
with open(IN_FILE,newline='',encoding='utf-8') as file:
    reader = csv.DictReader(file)
    for row in reader:
    apps.append({'app_name':row['url_name'],'app_id':row['id']})

# for each app, use the AppStore scraper to fetch all reviews
for app in apps:
    app_temp = AppStore(country='gb', app_name=app['app_name'], app_id=app['app_id']) 
    app_temp.review()
    app['reviews'] = app_temp.reviews # save list of reviews to existing dictionary

# save reviews to separate csv files for each app
for app in apps:
    if app.get('reviews'): # Not all apps have reviews
        outfile = app['app_name'] + '_reviews.csv' # use the app_name to name the output file
        with open(outfile,'w',encoding='utf-8',newline='') as file:
            # Needed to add developerResponse by hand as only some reviews have this field
            dict_writer = csv.DictWriter(file, list(app['reviews'][0].keys()) + ['developerResponse'])
            dict_writer.writeheader()
            dict_writer.writerows(app['reviews'])
Esempio n. 8
0
    def __init__(self, country, app_name):
        self.country = country
        self.app_name = app_name
        self.review_unit = AppStore(country=country, app_name=app_name)

        self.topic_reviews = pd.DataFrame()
Esempio n. 9
0
class AppReview(TopicModeller):
    def __init__(self, country, app_name):
        self.country = country
        self.app_name = app_name
        self.review_unit = AppStore(country=country, app_name=app_name)

        self.topic_reviews = pd.DataFrame()

    def get_reviews(self, num_reviews=None):
        '''
            Returns the reviews as a pandas 
            dataframe.
        '''
        review_dict = {
            'title': [],
            'rating': [],
            'userName': [],
            'review': [],
            'date': [],
            'isEdited': []
        }
        if num_reviews:
            self.review_unit.review(how_many=num_reviews)
        else:
            self.review_unit.review()

        for review in self.review_unit.reviews:
            review_dict['title'].append(review['title'])
            review_dict['rating'].append(review['rating'])
            review_dict['userName'].append(review['userName'])
            review_dict['review'].append(review['review'])
            review_dict['date'].append(
                review['date'].strftime("%m/%d/%Y, %H:%M:%S"))
            review_dict['isEdited'].append(review['isEdited'])

        self.review_df = pd.DataFrame(data=review_dict)

        return self.review_df

    def generate_embeddings(self):
        '''
            Generates embeddings based on 
            reviews from app store
        '''
        df_dict = {'sentence': []}
        for idx, c in enumerate(self.review_df.review):
            df_dict['sentence'].append(c)

        df_pd = pd.DataFrame(data=df_dict)

        super().__init__(df_pd)

    def cluster_embeddings(self, num_topics=None):
        self.num_topics = num_topics
        self.topic_reviews = super().project(topics=num_topics)

    def plot_embeddings(self, port_num=9000):
        super().plot(self.app_name, port_num=port_num)

    def generate_topic_csv(self, csv_path):
        generate_csv(self.topic_reviews, csv_path, self.num_topics)
def get_reviews(app_name):
    app_search = AppStore(country="in", app_name=app_name)
    app_search.review(how_many=50000)
    return app_search.reviews
Esempio n. 11
0
import json
from app_store_scraper import AppStore

arrivecan = AppStore(country="ca", app_name="arrivecan")
arrivecan.review()
for i in arrivecan.reviews:
    i['date'] = i['date'].strftime("%m/%d/%Y, %H:%M:%S")

with open('apple.json', 'w', encoding='utf-8') as f:
    json.dump(arrivecan.reviews, f, ensure_ascii=False, indent=4)
    #     config['app_id']['uber_apple'],
    config['app_id']['blablacar_apple'],
    #     config['app_id']['cabify_apple'],
    #     config['app_id']['via_apple'],
    #     config['app_id']['getaround_apple'],
    #     config['app_id']['olacabs_apple'],
    #     config['app_id']['taxieu_apple'],
    #     config['app_id']['freenow_apple'],
    #     config['app_id']['yandexgo_apple']
]

list_of_country_code = config['country_code']
output_path = config['output_path']

for app_id in list_of_app_id:
    df_merged = None

    for country_code in list_of_country_code:
        appstore = AppStore(country=country_code, app_name=app_id)
        appstore.review(how_many=100000)

        if df_merged is not None:
            df = pd.json_normalize(appstore.reviews)
            df_merged = df_merged.append(df)
        else:
            df_merged = pd.json_normalize(appstore.reviews)

        csv_file_name = app_id + '_apple_appstore_review.csv'

    df_merged.to_csv(output_path + csv_file_name)