def test_date_range(self): now = date.today() yesterday = now - timedelta(days=1) last_week = now - timedelta(days=7) res = list(date_range(last_week, now)) self.assertEqual(res[0], last_week) self.assertEqual(res[-1], now) self.assertEqual(len(res), 8) self.assertEqual(list(date_range(yesterday, now)), [yesterday, now]) self.assertEqual(list(date_range(yesterday, yesterday)), [yesterday])
def extract(self, start_date, end_date): # we won't use GA aggregation feature here, # but extract day-by-day # can this query be batched for current in date_range(start_date, end_date): iso = current.isoformat() options = {'ids': self.profile_id, 'start_date': iso, 'end_date': iso, 'dimensions': self.qdimensions, 'filters': self.qfilters, 'metrics': self.qmetrics, 'start_index': 1, 'max_results': 1000} rows = [] results = self._rate_limited_get(**options) while results.get('totalResults', 0) > 0: rows.extend(results['rows']) if results.get('nextLink'): options['start_index'] += options['max_results'] results = self._rate_limited_get(**options) else: break cols = [col['name'] for col in results['columnHeaders']] for data in self.processor(rows, current, cols): yield data
def extract(self, start_date, end_date): # we won't use GA aggregation feature here, # but extract day-by-day # can this query be batched for current in date_range(start_date, end_date): iso = current.isoformat() options = { 'ids': self.profile_id, 'start_date': iso, 'end_date': iso, 'dimensions': self.qdimensions, 'filters': self.qfilters, 'metrics': self.qmetrics, 'start_index': 1, 'max_results': 1000 } rows = [] results = self._rate_limited_get(**options) while results.get('totalResults', 0) > 0: rows.extend(results['rows']) if results.get('nextLink'): options['start_index'] += options['max_results'] results = self._rate_limited_get(**options) else: break cols = [col['name'] for col in results['columnHeaders']] for data in self.processor(rows, current, cols): yield data
def extract(self, start_date, end_date): for current in date_range(start_date, end_date): # Results are keyed by app ID to perform a sum before inserting # into Monolith. results = defaultdict(list) content = self.get_s3_file(current) for line in content: uuid = line[1] source = line[9] # TODO: Handle in-app payments. if source == 'marketplace': url = self.endpoint.replace(':transaction_id', uuid) tx_data = self.read_api(url) if tx_data: results[tx_data['app_id']].append( tx_data['amount_USD']) for app_id, prices in results.items(): yield { '_date': current, '_type': self.type, 'gross_revenue': sum(map(decimal.Decimal, prices)), 'app-id': app_id }
def extract(self, start_date, end_date): # we won't use GA aggregation feature here, # but extract day-by-day # can this query be batched for current in date_range(start_date, end_date): iso = current.isoformat() options = {'ids': self.profile_id, 'start_date': iso, 'end_date': iso, 'dimensions': self.qdimensions, 'metrics': self.qmetrics} results = self._rate_limited_get(**options) if results['totalResults'] == 0: continue cols = [col['name'] for col in results['columnHeaders']] for entry in results['rows']: data = {'_date': current, '_type': 'visitors'} for index, value in enumerate(entry): field = self._fix_name(cols[index]) # XXX see how to convert genericaly if field in ('pageviews', 'visits'): value = int(value) data[field] = value yield data
def add_entry(self, sources, start_date, end_date=None, num=0): with self.transaction() as session: if end_date is None: drange = (start_date,) else: drange = date_range(start_date, end_date) for date in drange: for source in sources: session.add(Transaction(source=source.get_id(), date=date))
def extract(self, start_date, end_date): # Override `extract` to customize dimensions based on date. # # We added the region dimension Jan 21 2014. Queries prior to that # should exclude that dimension. for current in date_range(start_date, end_date): iso = current.isoformat() dimensions = self.dimensions[:] # Remove region vars and add them back based on date. if self.region_var in dimensions: dimensions.remove(self.region_var) if self.region_dimension in dimensions: dimensions.remove(self.region_dimension) if self.date_var_added <= current < self.date_dimension_added: dimensions.append(self.region_var) elif self.date_dimension_added <= current: dimensions.append(self.region_dimension) options = { 'ids': self.profile_id, 'start_date': iso, 'end_date': iso, 'dimensions': ','.join(dimensions), 'filters': self.qfilters, 'metrics': self.qmetrics, 'start_index': 1, 'max_results': 1000 } rows = [] results = self._rate_limited_get(**options) while results.get('totalResults', 0) > 0: rows.extend(results['rows']) if results.get('nextLink'): options['start_index'] += options['max_results'] results = self._rate_limited_get(**options) else: break cols = [col['name'] for col in results['columnHeaders']] for data in self.processor(rows, current, cols): yield data
def extract(self, start_date, end_date): # Override `extract` to customize dimensions based on date. # # We added the region dimension Jan 21 2014. Queries prior to that # should exclude that dimension. for current in date_range(start_date, end_date): iso = current.isoformat() dimensions = self.dimensions[:] # Remove region vars and add them back based on date. if self.region_var in dimensions: dimensions.remove(self.region_var) if self.region_dimension in dimensions: dimensions.remove(self.region_dimension) if self.date_var_added <= current < self.date_dimension_added: dimensions.append(self.region_var) elif self.date_dimension_added <= current: dimensions.append(self.region_dimension) options = {'ids': self.profile_id, 'start_date': iso, 'end_date': iso, 'dimensions': ','.join(dimensions), 'filters': self.qfilters, 'metrics': self.qmetrics, 'start_index': 1, 'max_results': 1000} rows = [] results = self._rate_limited_get(**options) while results.get('totalResults', 0) > 0: rows.extend(results['rows']) if results.get('nextLink'): options['start_index'] += options['max_results'] results = self._rate_limited_get(**options) else: break cols = [col['name'] for col in results['columnHeaders']] for data in self.processor(rows, current, cols): yield data
def extract(self, start_date, end_date): for current in date_range(start_date, end_date): # Results are keyed by app ID to perform a sum before inserting # into Monolith. results = defaultdict(list) content = self.get_s3_file(current) for line in content: uuid = line[1] source = line[9] # TODO: Handle in-app payments. if source == 'marketplace': url = self.endpoint.replace(':transaction_id', uuid) tx_data = self.read_api(url) if tx_data: results[tx_data['app_id']].append( tx_data['amount_USD']) for app_id, prices in results.items(): yield {'_date': current, '_type': self.type, 'gross_revenue': sum(map(decimal.Decimal, prices)), 'app-id': app_id}