def get_app_store(app_id, n_reviews=200, filter="all"): """get play store reviews for app""" data = AppStore(app_name=app_id, country='us') data.review(how_many=n_reviews) reviews = data.reviews reviews_df = pd.DataFrame(reviews) if filter == "all": return reviews_df else: return reviews_df.query('rating' + filter)
def scrape_data(): app_info = AppStore( app_name=input("Enter name of the app in the App Store: "), country="in") app_info.review(how_many=3000) reviews = list( map(lambda review: normalize_review(review), app_info.reviews)) f = open("../app-store-reviews.json", "w") f.write(json.dumps(reviews)) f.close()
def Crawl_appsList(l): dataTable = [] for i, app in enumerate(l): try: application = AppStore(country="eg", app_name=app) application.review() tupl = Crawl_app(application, application.reviews) dataTable.append(tupl) except: print("Couldn't found App: ", app, "\nCheck the application name again.") continue return dataTable
class TestEmptyApp: country = "Nz" app_name = "Cool App" app_id = 7357 app = AppStore(country=country, app_name=app_name, app_id=app_id) def test_init_attributes(self): assert self.app.country == self.country.lower() assert self.app.app_name == self.app_name.lower().replace(" ", "-") assert self.app.app_id == self.app_id assert self.app.reviews == [] assert self.app.reviews_count == 0 def test_init_url(self): base_landing_url = "https://apps.apple.com" landing_path = f"{self.app.country}/app/{self.app.app_name}/id{self.app.app_id}" landing_url = f"{base_landing_url}/{landing_path}" assert self.app.url == landing_url def test_repr(self): assert self.app.__repr__() == ( f"AppStore(country='{self.app.country}', " f"app_name='{self.app.app_name}', " f"app_id={self.app.app_id})") def test_str(self, capsys): print(self.app) captured = capsys.readouterr() assert captured.out == (f" Country | {self.app.country}\n" f" Name | {self.app.app_name}\n" f" ID | {self.app.app_id}\n" f" URL | {self.app.url}\n" f"Review count | {self.app.reviews_count}\n")
class TestAppStore: app = AppStore(country="us", app_name="minecraft") def test_search_id(self): self.app.search_id() assert self.app.app_id == 479516143 def test_review(self): self.app.review(how_many=3) assert len(self.app.reviews) == 20 assert len(self.app.reviews) == self.app.reviews_count def test_review_continuation(self): assert len(self.app.reviews) == 20 self.app.review(how_many=7) assert len(self.app.reviews) == 40 def test_reviews_for_duplicates(self): for i in range(len(self.app.reviews) - 1): assert self.app.reviews[i] != self.app.reviews[i + 1] def test_reviews_for_after(self): t1 = datetime.now() t0 = t1 - timedelta(weeks=26) self.app.reviews = [] self.app.review(how_many=3, after=t0) for review in self.app.reviews: assert review["date"] >= t0 and review["date"] < t1 def test_reviews_for_sleep(self): t_start = datetime.now() self.app.review(how_many=40, sleep=2) t_diff = datetime.now() - t_start assert t_diff.seconds >= 2
def app_store_scraper(app_name): app = AppStore(country="dk", app_name=app_name) app.review(how_many=1000) # Creates or updates the CSV with this name with open(r'appstore_reviews.csv', 'w', newline='') as file: fieldnames = ['username', 'review', 'score'] writer = csv.DictWriter(file, fieldnames=fieldnames) writer.writeheader() for review in app.reviews: score = review['rating'] username = review['userName'] review = review['review'] try: print(f'{username} says: {review}') writer.writerow({'username': username, 'review': review, 'score': score}) except: print('Failed to add entry XXX')
import csv from app_store_scraper import AppStore # Change this to the name of the csv file containing your app metadata # requires "url_name" and "id" fields IN_FILE = 'as_eating_disorder_apps.csv' # read app name and id from csv file apps = [] with open(IN_FILE,newline='',encoding='utf-8') as file: reader = csv.DictReader(file) for row in reader: apps.append({'app_name':row['url_name'],'app_id':row['id']}) # for each app, use the AppStore scraper to fetch all reviews for app in apps: app_temp = AppStore(country='gb', app_name=app['app_name'], app_id=app['app_id']) app_temp.review() app['reviews'] = app_temp.reviews # save list of reviews to existing dictionary # save reviews to separate csv files for each app for app in apps: if app.get('reviews'): # Not all apps have reviews outfile = app['app_name'] + '_reviews.csv' # use the app_name to name the output file with open(outfile,'w',encoding='utf-8',newline='') as file: # Needed to add developerResponse by hand as only some reviews have this field dict_writer = csv.DictWriter(file, list(app['reviews'][0].keys()) + ['developerResponse']) dict_writer.writeheader() dict_writer.writerows(app['reviews'])
def __init__(self, country, app_name): self.country = country self.app_name = app_name self.review_unit = AppStore(country=country, app_name=app_name) self.topic_reviews = pd.DataFrame()
class AppReview(TopicModeller): def __init__(self, country, app_name): self.country = country self.app_name = app_name self.review_unit = AppStore(country=country, app_name=app_name) self.topic_reviews = pd.DataFrame() def get_reviews(self, num_reviews=None): ''' Returns the reviews as a pandas dataframe. ''' review_dict = { 'title': [], 'rating': [], 'userName': [], 'review': [], 'date': [], 'isEdited': [] } if num_reviews: self.review_unit.review(how_many=num_reviews) else: self.review_unit.review() for review in self.review_unit.reviews: review_dict['title'].append(review['title']) review_dict['rating'].append(review['rating']) review_dict['userName'].append(review['userName']) review_dict['review'].append(review['review']) review_dict['date'].append( review['date'].strftime("%m/%d/%Y, %H:%M:%S")) review_dict['isEdited'].append(review['isEdited']) self.review_df = pd.DataFrame(data=review_dict) return self.review_df def generate_embeddings(self): ''' Generates embeddings based on reviews from app store ''' df_dict = {'sentence': []} for idx, c in enumerate(self.review_df.review): df_dict['sentence'].append(c) df_pd = pd.DataFrame(data=df_dict) super().__init__(df_pd) def cluster_embeddings(self, num_topics=None): self.num_topics = num_topics self.topic_reviews = super().project(topics=num_topics) def plot_embeddings(self, port_num=9000): super().plot(self.app_name, port_num=port_num) def generate_topic_csv(self, csv_path): generate_csv(self.topic_reviews, csv_path, self.num_topics)
def get_reviews(app_name): app_search = AppStore(country="in", app_name=app_name) app_search.review(how_many=50000) return app_search.reviews
import json from app_store_scraper import AppStore arrivecan = AppStore(country="ca", app_name="arrivecan") arrivecan.review() for i in arrivecan.reviews: i['date'] = i['date'].strftime("%m/%d/%Y, %H:%M:%S") with open('apple.json', 'w', encoding='utf-8') as f: json.dump(arrivecan.reviews, f, ensure_ascii=False, indent=4)
# config['app_id']['uber_apple'], config['app_id']['blablacar_apple'], # config['app_id']['cabify_apple'], # config['app_id']['via_apple'], # config['app_id']['getaround_apple'], # config['app_id']['olacabs_apple'], # config['app_id']['taxieu_apple'], # config['app_id']['freenow_apple'], # config['app_id']['yandexgo_apple'] ] list_of_country_code = config['country_code'] output_path = config['output_path'] for app_id in list_of_app_id: df_merged = None for country_code in list_of_country_code: appstore = AppStore(country=country_code, app_name=app_id) appstore.review(how_many=100000) if df_merged is not None: df = pd.json_normalize(appstore.reviews) df_merged = df_merged.append(df) else: df_merged = pd.json_normalize(appstore.reviews) csv_file_name = app_id + '_apple_appstore_review.csv' df_merged.to_csv(output_path + csv_file_name)