def parse_and_save(self): """Grab raw data from Google Analytics and save to the database""" from ga_auth import (init_service, get_profile_id) tokenfile = self.args[0] if not os.path.exists(tokenfile): raise Exception('Cannot find the token file %s' % self.args[0]) try: self.service = init_service(self.args[0], None) except TypeError: print( 'Have you correctly run the getauthtoken task and ' 'specified the correct file here') raise Exception('Unable to create a service') self.profile_id = get_profile_id(self.service) if len(self.args) > 1: if len(self.args) > 2 and self.args[1].lower() != 'internal': raise Exception('Illegal argument %s' % self.args[1]) self.bulk_import() else: query = 'ga:pagePath=~%s,ga:pagePath=~%s' % \ (PACKAGE_URL, self.resource_url_tag) packages_data = self.get_ga_data(query_filter=query) self.save_ga_data(packages_data) log.info("Saved %s records from google" % len(packages_data))
def command(self): self._load_config() from download_analytics import DownloadAnalytics from ga_auth import (init_service, get_profile_id) ga_token_filepath = os.path.expanduser(config.get('googleanalytics.token.filepath', '')) if not ga_token_filepath: print 'ERROR: In the CKAN config you need to specify the filepath of the ' \ 'Google Analytics token file under key: googleanalytics.token.filepath' return try: self.token, svc = init_service(ga_token_filepath, None) except TypeError: print ('Have you correctly run the getauthtoken task and ' 'specified the correct token file in the CKAN config under ' '"googleanalytics.token.filepath"?') return downloader = DownloadAnalytics(svc, self.token, profile_id=get_profile_id(svc), delete_first=self.options.delete_first, skip_url_stats=self.options.skip_url_stats) time_period = self.args[0] if self.args else 'latest' if time_period == 'all': downloader.all_() elif time_period == 'latest': downloader.latest() else: # The month to use for_date = datetime.datetime.strptime(time_period, '%Y-%m') downloader.specific_month(for_date)
def parse_and_save(self): """Grab raw data from Google Analytics and save to the database""" from ga_auth import (init_service, get_profile_id) tokenfile = self.args[0] if not os.path.exists(tokenfile): raise Exception('Cannot find the token file %s' % self.args[0]) try: self.service = init_service(self.args[0], None) except TypeError: print ('Have you correctly run the getauthtoken task and ' 'specified the correct file here') raise Exception('Unable to create a service') self.profile_id = get_profile_id(self.service) if len(self.args) > 1: if len(self.args) > 2 and self.args[1].lower() != 'internal': raise Exception('Illegal argument %s' % self.args[1]) self.bulk_import() else: query = 'ga:pagePath=~%s,ga:pagePath=~%s' % \ (PACKAGE_URL, self.resource_url_tag) packages_data = self.get_ga_data(query_filter=query) self.save_ga_data(packages_data) log.info("Saved %s records from google" % len(packages_data))
def parse_and_save(self, args): """Grab raw data from Google Analytics and save to the database""" from ga_auth import (init_service, get_profile_id) if len(args) == 1: raise Exception("Missing token file") tokenfile = args[1] if not os.path.exists(tokenfile): raise Exception('Cannot find the token file %s' % args[1]) try: self.service = init_service(args[1], None) except TypeError: print( 'Have you correctly run the getauthtoken task and ' 'specified the correct file here') raise Exception('Unable to create a service') self.profile_id = get_profile_id(self.service) if len(args) > 3: raise Exception('Too many arguments') given_start_date = None if len(args) == 3: given_start_date = datetime.datetime.strptime(args[2], '%Y-%m-%d').date() packages_data = self.get_ga_data(start_date=given_start_date) self.save_ga_data(packages_data) self.log.info("Saved %s records from google" % len(packages_data))
def command(self): self._load_config() from download_analytics import DownloadAnalytics from ga_auth import (init_service, get_profile_id) ga_token_filepath = os.path.expanduser( config.get('googleanalytics.token.filepath', '')) if not ga_token_filepath: print 'ERROR: In the CKAN config you need to specify the filepath of the ' \ 'Google Analytics token file under key: googleanalytics.token.filepath' return try: svc = init_service(ga_token_filepath) except TypeError as e: print('Unable to create a service: {0}'.format(e)) return downloader = DownloadAnalytics(svc, self.token, profile_id=get_profile_id(svc), delete_first=self.options.delete_first, stat=self.options.stat, print_progress=True) time_period = self.args[0] if self.args else 'latest' if time_period == 'all': downloader.all_() elif time_period == 'latest': downloader.latest() else: # The month to use for_date = datetime.datetime.strptime(time_period, '%Y-%m') downloader.specific_month(for_date)
def parse_and_save(self): """Grab raw data from Google Analytics and save to the database""" from ga_auth import (init_service, get_profile_id) if len(self.args) == 0: raise Exception("Missing token file") tokenfile = self.args[0] if not os.path.exists(tokenfile): raise Exception('Cannot find the token file %s' % self.args[1]) try: self.service = init_service(self.args[0]) except TypeError as e: raise Exception('Unable to create a service: {0}'.format(e)) self.profile_id = get_profile_id(self.service) if len(self.args) > 2: raise Exception('Too many arguments') given_start_date = None if len(self.args) == 2: given_start_date = datetime.datetime.strptime(self.args[1], '%Y-%m-%d').date() packages_data = self.get_ga_data(start_date=given_start_date) self.save_ga_data(packages_data) log.info("Saved %s records from google" % len(packages_data))
def command(self): self._load_config() from download_analytics import DownloadAnalytics from ga_auth import (init_service, get_profile_id) ga_token_filepath = os.path.expanduser(config.get('googleanalytics.token.filepath', '')) if not ga_token_filepath: print 'ERROR: In the CKAN config you need to specify the filepath of the ' \ 'Google Analytics token file under key: googleanalytics.token.filepath' return try: self.token, svc = init_service(ga_token_filepath, None) except TypeError: print ('Have you correctly run the getauthtoken task and ' 'specified the correct token file in the CKAN config under ' '"googleanalytics.token.filepath"?') return downloader = DownloadAnalytics(svc, self.token, profile_id=get_profile_id(svc), delete_first=self.options.delete_first, stat=self.options.stat, print_progress=True) time_period = self.args[0] if self.args else 'latest' if time_period == 'all': downloader.all_() elif time_period == 'latest': downloader.latest() elif time_period == 'year': year_date = self.args[1] downloader.specific_year(year_date) elif time_period == 'year-month': year = self.args[1] month = self.args[2] downloader.specific_year_month(year, month) elif time_period == 'year-step': year_date = self.args[1] downloader.specific_year_step(year_date) elif time_period == 'only': only_data = str(self.args[1]) selected_date = self.args[2] for_date = datetime.datetime.strptime(selected_date, '%Y-%m') downloader.specific_month_only(for_date, only_data) else: # The month to use for_date = datetime.datetime.strptime(time_period, '%Y-%m') downloader.specific_month(for_date)
def command(self): self._load_config() from download_analytics import DownloadAnalytics from ga_auth import init_service, get_profile_id ga_token_filepath = os.path.expanduser(config.get("googleanalytics.token.filepath", "")) if not ga_token_filepath: print "ERROR: In the CKAN config you need to specify the filepath of the " "Google Analytics token file under key: googleanalytics.token.filepath" return try: self.token, svc = init_service(ga_token_filepath, None) except TypeError: print ( "Have you correctly run the getauthtoken task and " "specified the correct token file in the CKAN config under " '"googleanalytics.token.filepath"?' ) return downloader = DownloadAnalytics( svc, self.token, profile_id=get_profile_id(svc), delete_first=self.options.delete_first, stat=self.options.stat, print_progress=True, ) time_period = self.args[0] if self.args else "latest" if time_period == "all": downloader.all_() elif time_period == "latest": downloader.latest() else: # The month to use for_date = datetime.datetime.strptime(time_period, "%Y-%m") downloader.specific_month(for_date)
def command(self): self._load_config() from download_analytics import DownloadAnalytics from ga_auth import (init_service, get_profile_id) ga_token_filepath = os.path.expanduser( config.get('ds_stats.ga.token.filepath', '')) if not ga_token_filepath: print 'ERROR: In the CKAN config you need to specify the ' \ 'filepath of the Google Analytics token file under ' \ 'key: ds_stats.ga.token.filepath' return try: self.token, svc = init_service(ga_token_filepath) except TypeError: print( 'Have you correctly run the getauthtoken task and ' 'specified the correct token file in the CKAN config under ' '"ds_stats.ga.token.filepath"?') return downloader = DownloadAnalytics( svc, self.token, profile_id=get_profile_id(svc), delete_first=self.options.delete_first, skip_url_stats=self.options.skip_url_stats) time_period = self.args[0] if self.args else 'latest' if time_period == 'all': downloader.all_() elif time_period == 'latest': downloader.latest() else: # The month to use for_date = datetime.datetime.strptime(time_period, '%Y-%m') downloader.specific_month(for_date)
def parse_and_save(self): """Grab raw data from Google Analytics and save to the database""" from ga_auth import (init_service, get_profile_id) tokenfile = self.args[0] if not os.path.exists(tokenfile): raise Exception('Cannot find the token file %s' % self.args[0]) try: self.service = init_service(self.args[0]) except TypeError as e: raise Exception('Unable to create a service: {0}'.format(e)) self.profile_id = get_profile_id(self.service) if len(self.args) > 1: if len(self.args) > 2 and (self.args[1].lower() != 'internal' or self.args[1].lower() != 'downloads'): raise Exception('Illegal argument %s' % self.args[1]) self.bulk_import(downloads=self.args[1].lower()=='downloads') else: query = 'ga:pagePath=~%s,ga:pagePath=~%s' % \ (PACKAGE_URL, self.resource_url_tag) packages_data = self.get_ga_data(query_filter=query) self.save_ga_data(packages_data) log.info("Saved %s records from google" % len(packages_data))
def command(self): """Grab raw data from Google Analytics and save to the database""" init = datetime.datetime.now() s_args = (self.args if self.args else '(no args)') print '[%s] - Init DgeGaReportLoadAnalytics command with args: %s.' % ( init.strftime(DgeGaReportLoadAnalytics.datetime_format), s_args) try: self._load_config() from download_analytics import DownloadAnalytics from ga_auth import (init_service, get_profile_id) ga_token_filepath = config.get( 'ckanext-dge-ga-report.token.filepath', '') if not ga_token_filepath or not os.path.exists(ga_token_filepath): print 'ERROR: In the CKAN config you need to specify the filepath of the ' \ 'Google Analytics token file under key: googleanalytics.token.filepath' #return sys.exit(1) try: self.token, svc = init_service(ga_token_filepath, None) except TypeError: print( 'Unable to create a service. Have you correctly run the getauthtoken task and ' 'specified the correct token file in the CKAN config under ' '"ckanext-dge-ga-report.token.filepath"?') #return sys.exit(1) save_print = self.args[0] if self.args else 'print' save = True if save_print == 'save' else False kind = self.args[1] if self.args else None if kind is None or kind not in DownloadAnalytics.KIND_STATS: print( 'A valid kind of statistics that you want to load must be ' 'specified: %s' % DownloadAnalytics.KIND_STATS) #return sys.exit(1) downloader = DownloadAnalytics( svc, self.token, profile_id=get_profile_id(svc), delete_first=self.options.delete_first, stat=self.options.stat, print_progress=True, kind_stats=kind, save_stats=save) time_period = self.args[2] if self.args else 'latest' if time_period == 'latest': downloader.latest() elif time_period == 'last_month': now = datetime.datetime.now() if now.month == 1: last_month = datetime.datetime(now.year - 1, 12, 1, 0, 0, 0) else: last_month = datetime.datetime(now.year, now.month - 1, 1, 0, 0, 0) downloader.specific_month(last_month) else: # The month to use for_date = datetime.datetime.strptime(time_period, '%Y-%m') downloader.specific_month(for_date) except Exception as e: print 'Exception %s' % e sys.exit(1) finally: end = datetime.datetime.now() print '[%s] - End DgeGaReportLoadAnalytics command with args %s. Executed command in %s milliseconds.' % ( end.strftime(DgeGaReportLoadAnalytics.datetime_format), s_args, (end - init).total_seconds() * 1000) sys.exit(0)
def parse_and_save(self, args): """Grab raw data from Google Analytics and save to the database""" from ga_auth import get_profile_id self.init_service(args) self.profile_id = get_profile_id(self.service) if len(args) > 3: raise Exception('Too many arguments') given_start_date = None if len(args) == 3: given_start_date = datetime.datetime.strptime(args[2], '%Y-%m-%d').date() botFilters = [ 'ga:browser!@StatusCake', 'ga:browser!@Python', 'ga:sessionDurationBucket!=0', 'ga:sessionDurationBucket!=1', 'ga:sessionDurationBucket!=2', 'ga:sessionDurationBucket!=3', 'ga:networkDomain!=ua.es', 'ga:networkDomain!=amazonaws.com', 'ga:networkDomain!=kcura.com', 'ga:networkDomain!=relativity.com', ] # list of queries to send to analytics queries = [{ 'type': 'package', 'dates': self.get_dates_between_update(given_start_date, PackageStats.get_latest_update_date()), 'filters': 'ga:pagePath=~%s,ga:pagePath=~%s' % (PACKAGE_URL, self.resource_url_tag), 'metrics': 'ga:uniquePageviews, ga:entrances', 'sort': 'ga:date', 'dimensions': 'ga:pagePath, ga:date', 'resolver': self.resolver_type_package, 'save': self.save_type_package, }, { 'type': 'resource', 'dates': self.get_dates_between_update(given_start_date, ResourceStats.get_latest_update_date()), 'filters': 'ga:pagePath=~%s' % self.resource_url_tag, 'metrics': 'ga:uniquePageviews', 'sort': 'ga:date', 'dimensions': 'ga:pagePath, ga:date', 'resolver': self.resolver_type_resource, 'save': self.save_type_resource, }, { 'type': 'visitorlocation', 'dates': self.get_dates_between_update(given_start_date, AudienceLocationDate.get_latest_update_date()), 'filters': ";".join(botFilters), 'metrics': 'ga:sessions', 'sort': 'ga:date', 'dimensions': 'ga:country, ga:date', 'resolver': self.resolver_type_visitorlocation, 'save': self.save_type_visitorlocation, }, { 'type': 'package_downloads', 'dates': self.get_dates_between_update(given_start_date, PackageStats.get_latest_update_date()), 'filters': "ga:eventCategory==Resource;ga:eventAction==Download", 'metrics': "ga:uniqueEvents", 'sort': "ga:date", 'dimensions': "ga:pagePath, ga:date, ga:eventCategory", 'resolver': self.resolver_type_package_downloads, 'save': self.save_type_package_downloads, }, { 'type': 'search_terms', 'dates': self.get_dates_between_update(given_start_date, SearchStats.get_latest_update_date()), 'filters': ";".join(botFilters), 'metrics': "ga:searchUniques", 'sort': "ga:date", 'dimensions': "ga:searchKeyword, ga:date", 'resolver': self.resolver_type_search_terms, 'save': self.save_type_search_terms, }] # loop through queries, parse and save them to db for query in queries: data = {} current = datetime.datetime.now() self.log.info('performing analytics query of type: %s' % query['type']) print 'Querying type: %s' % query['type'] for date in query['dates']: # run query with current query values results = self.ga_query(start_date=date, end_date=current, filters=query['filters'], metrics=query['metrics'], sort=query['sort'], dimensions=query['dimensions']) # parse query resolver = query['resolver'] data = resolver(results, data) current = date save_function = query['save'] print 'Saving type: %s' % query['type'] save_function(data) model.Session.commit() print 'Saving done' self.log.info("Successfully saved analytics query of type: %s" % query['type'])