def post(self): q, email, name, latlon = map(self.request.get, ['q', 'email', 'name', 'latlon']) q = json.loads(q) requesttime = self.request.get('requesttime') filepattern = self.request.get('filepattern') fileindex = int(self.request.get('fileindex')) reccount = int(self.request.get('reccount')) fromapi=self.request.get('fromapi') source=self.request.get('source') filename = '/%s/%s-%s.%s' % (TEMP_BUCKET, filepattern, fileindex, FILE_EXTENSION) cursor = self.request.get('cursor') try: total_res_counts = json.loads(self.request.get('res_counts')) except: total_res_counts = {} if cursor: curs = search.Cursor(web_safe_string=cursor) else: curs = None # Write single chunk to file, GCS does not support append records, next_cursor, count, query_version = \ vnsearch.query(q, SEARCH_CHUNK_SIZE, curs=curs) this_record_count = len(records) # Build dict for search counts res_counts = vnutil.search_resource_counts(records, total_res_counts) # Now merge the two dictionaries, summing counts if total_res_counts is None or len(total_res_counts)==0: total_res_counts=res_counts else: for r in res_counts: try: count = total_res_counts[r] total_res_counts[r]=count+res_counts[r] except: total_res_counts[r]=res_counts[r] # Update the total number of records retrieved reccount = reccount+this_record_count # Make a chunk to write to a file chunk = '%s\n' % _get_tsv_chunk(records) # Cleanup records in attempt to conserve memory records = None # Attempt to keep memory usage at a minimum by garbage collecting gc.collect() if fileindex==0 and not next_cursor: # This is a query with fewer than SEARCH_CHUNK_SIZE results filename = '/%s/%s.%s' % (TEMP_BUCKET, filepattern, FILE_EXTENSION) max_retries = 2 retry_count = 0 success = False while not success and retry_count < max_retries: try: with gcs.open(filename, 'w', content_type='text/tab-separated-values', options={'x-goog-acl': 'public-read'}) as f: if fileindex==0: f.write('%s\n' % vnutil.download_header()) f.write(chunk) success = True # Cleanup chunk in attempt to conserve memory chunk = None # Attempt to keep memory usage at a minimum by garbage collecting gc.collect() # logging.info('Download chunk saved to %s: Total %s records. Has next \ #cursor: %s \nVersion: %s' # % (filename, reccount, not next_cursor is None, DOWNLOAD_VERSION)) except Exception, e: logging.error("Error writing chunk to FILE: %s for\nQUERY: %s \ Error: %s\nVersion: %s" % (filename, q, e, DOWNLOAD_VERSION) ) retry_count += 1
def post(self): q, email, name, latlon = map(self.request.get, ['q', 'email', 'name', 'latlon']) q = json.loads(q) requesttime = self.request.get('requesttime') filepattern = self.request.get('filepattern') fileindex = int(self.request.get('fileindex')) reccount = int(self.request.get('reccount')) fromapi = self.request.get('fromapi') source = self.request.get('source') filename = '/%s/%s-%s.%s' % (TEMP_BUCKET, filepattern, fileindex, FILE_EXTENSION) cursor = self.request.get('cursor') try: total_res_counts = json.loads(self.request.get('res_counts')) except: total_res_counts = {} if cursor: curs = search.Cursor(web_safe_string=cursor) else: curs = None # Write single chunk to file, GCS does not support append records, next_cursor, count, query_version = \ vnsearch.query(q, SEARCH_CHUNK_SIZE, curs=curs) this_record_count = len(records) # Build dict for search counts res_counts = vnutil.search_resource_counts(records, total_res_counts) # Now merge the two dictionaries, summing counts if total_res_counts is None or len(total_res_counts) == 0: total_res_counts = res_counts else: for r in res_counts: try: count = total_res_counts[r] total_res_counts[r] = count + res_counts[r] except: total_res_counts[r] = res_counts[r] # Update the total number of records retrieved reccount = reccount + this_record_count # Make a chunk to write to a file chunk = '%s\n' % _get_tsv_chunk(records) # Cleanup records in attempt to conserve memory records = None # Attempt to keep memory usage at a minimum by garbage collecting gc.collect() if fileindex == 0 and not next_cursor: # This is a query with fewer than SEARCH_CHUNK_SIZE results filename = '/%s/%s.%s' % (TEMP_BUCKET, filepattern, FILE_EXTENSION) max_retries = 2 retry_count = 0 success = False while not success and retry_count < max_retries: try: with gcs.open(filename, 'w', content_type='text/tab-separated-values', options={'x-goog-acl': 'public-read'}) as f: if fileindex == 0: f.write('%s\n' % vnutil.download_header()) f.write(chunk) success = True # Cleanup chunk in attempt to conserve memory chunk = None # Attempt to keep memory usage at a minimum by garbage collecting gc.collect() # logging.info('Download chunk saved to %s: Total %s records. Has next \ #cursor: %s \nVersion: %s' # % (filename, reccount, not next_cursor is None, DOWNLOAD_VERSION)) except Exception, e: logging.error("Error writing chunk to FILE: %s for\nQUERY: %s \ Error: %s\nVersion: %s" % (filename, q, e, DOWNLOAD_VERSION)) retry_count += 1
def get(self): count, keywords, email, name = map(self.request.get, ['count', 'keywords', 'email', 'name']) q = ' '.join(json.loads(keywords)) latlon = self.request.headers.get('X-AppEngine-CityLatLong') fromapi = self.request.get('api') countonly = self.request.get('countonly') # Force count to be an integer # count is a limit on the number of records to download count=int(str(count)) source='DownloadPortal' if fromapi is not None and len(fromapi)>0: source='DownloadAPI' # Try to send an indicator to the browser if it came from one. body = '' if countonly is not None and len(countonly)>0: body = 'Counting results:<br>' source = 'CountAPI' else: body = 'Downloading results:<br>' if email is None or len(email)==0 or email=='None': body += 'ERROR: You must provide an email address.' else: body += 'File name: %s<br>' % name body += 'Email: %s<br>' % email body += 'Keywords: %s<br>' % keywords body += 'X-AppEngine-CityLatLong: %s<br>' % latlon body += 'Source: %s<br>' % source body += 'API: %s<br>' % fromapi body += 'len(API): %s<br>' % len(fromapi) body += 'Request headers: %s<br>' % self.request.headers self.response.out.write(body) logging.info('API download request. API: %s Source: %s Count: %s \ Keywords: %s Email: %s Name: %s LatLon: %s\nVersion: %s' % (fromapi, source, count, keywords, email, name, latlon, DOWNLOAD_VERSION) ) if email is None or len(email)==0: return else: logging.info('Portal download request. API: %s Source: %s Count: %s \ Keywords: %s Email: %s Name: %s LatLon: %s\nVersion: %s' % (fromapi, source, count, keywords, email, name, latlon, DOWNLOAD_VERSION) ) if count==0 or count > SEARCH_CHUNK_SIZE: # The results are larger than SEARCH_CHUNK_SIZE, compose a file for download self._queue(q, email, name, latlon, fromapi, source, countonly) else: # The results are smaller than SEARCH_CHUNK_SIZE, download directly and make # a copy of the file in the download bucket filename = str('%s.txt' % name) self.response.headers['Content-Type'] = "text/tab-separated-values" self.response.headers['Content-Disposition'] = "attachment; filename=%s" \ % filename records, cursor, count, query_version = vnsearch.query(q, count) record_count = len(records) # logging.debug('%s: RECORDS: %s' % (DOWNLOAD_VERSION, records)) # Build dictionary for search counts res_counts = vnutil.search_resource_counts(records) # Write the header for the output file data = '%s\n%s' % (vnutil.download_header(), _get_tsv_chunk(records)) # Cleanup records in attempt to conserve memory records = None # Attempt to keep memory usage at a minimum by garbage collecting gc.collect() # Write the data self.response.out.write(data) # Write single chunk to file in DOWNLOAD_BUCKET filepattern = '%s-%s' % (name, uuid.uuid4().hex) filename = '/%s/%s.%s' % (DOWNLOAD_BUCKET, filepattern, FILE_EXTENSION) # Parameters for the coming apitracker taskqueue apitracker_params = dict( api_version=fromapi, count=record_count, download=filename, downloader=email, error=None, latlon=latlon, matching_records=record_count, query=q, query_version=query_version, request_source=source, response_records=record_count, res_counts=json.dumps(res_counts), type='download') max_retries = 2 retry_count = 0 success = False while not success and retry_count < max_retries: try: with gcs.open(filename, 'w', content_type='text/tab-separated-values', options={'x-goog-acl': 'public-read'}) as f: f.write(data) success = True # Cleanup data in attempt to conserve memory data = None # Attempt to keep memory usage at a minimum by garbage collecting gc.collect() # logging.info('Sending small res_counts to apitracker: %s' # % res_counts ) taskqueue.add(url='/apitracker', params=apitracker_params, queue_name="apitracker") except Exception, e: logging.error("Error writing small result set to %s.\nError: %s \n\ Version: %s" % (filename,e,DOWNLOAD_VERSION) ) retry_count += 1
def get(self): count, keywords, email, name = map( self.request.get, ['count', 'keywords', 'email', 'name']) q = ' '.join(json.loads(keywords)) latlon = self.request.headers.get('X-AppEngine-CityLatLong') fromapi = self.request.get('api') countonly = self.request.get('countonly') # Force count to be an integer # count is a limit on the number of records to download count = int(str(count)) source = 'DownloadPortal' if fromapi is not None and len(fromapi) > 0: source = 'DownloadAPI' # Try to send an indicator to the browser if it came from one. body = '' if countonly is not None and len(countonly) > 0: body = 'Counting results:<br>' source = 'CountAPI' else: body = 'Downloading results:<br>' if email is None or len(email) == 0 or email == 'None': body += 'ERROR: You must provide an email address.' else: body += 'File name: %s<br>' % name body += 'Email: %s<br>' % email body += 'Keywords: %s<br>' % keywords body += 'X-AppEngine-CityLatLong: %s<br>' % latlon body += 'Source: %s<br>' % source body += 'API: %s<br>' % fromapi body += 'len(API): %s<br>' % len(fromapi) body += 'Request headers: %s<br>' % self.request.headers self.response.out.write(body) logging.info( 'API download request. API: %s Source: %s Count: %s Keywords: %s Email: %s Name: %s LatLon: %s\nVersion: %s' % (fromapi, source, count, keywords, email, name, latlon, DOWNLOAD_VERSION)) if email is None or len(email) == 0 or email == '*****@*****.**': logging.info( 'Ignoring download request from email: %s. Version: %s' % (email, DOWNLOAD_VERSION)) return else: logging.info( 'Portal download request. API: %s Source: %s Count: %s Keywords: %s Email: %s Name: %s LatLon: %s\nVersion: %s' % (fromapi, source, count, keywords, email, name, latlon, DOWNLOAD_VERSION)) if count == 0 or count > SEARCH_CHUNK_SIZE: # The results are larger than SEARCH_CHUNK_SIZE, compose a file for download self._queue(q, email, name, latlon, fromapi, source, countonly) else: # The results are smaller than SEARCH_CHUNK_SIZE, download directly and make # a copy of the file in the download bucket filename = str('%s.txt' % name) self.response.headers['Content-Type'] = "text/tab-separated-values" self.response.headers['Content-Disposition'] = "attachment; filename=%s" \ % filename records, cursor, count, query_version = vnsearch.query(q, count) record_count = len(records) # logging.debug('%s: RECORDS: %s' % (DOWNLOAD_VERSION, records)) # Build dictionary for search counts res_counts = vnutil.search_resource_counts(records) # Write the header for the output file data = '%s\n%s' % (vnutil.download_header(), _get_tsv_chunk(records)) # Cleanup records in attempt to conserve memory records = None # Attempt to keep memory usage at a minimum by garbage collecting gc.collect() # Write the data self.response.out.write(data) # Write single chunk to file in DOWNLOAD_BUCKET filepattern = '%s-%s' % (name, uuid.uuid4().hex) filename = '/%s/%s.%s' % (DOWNLOAD_BUCKET, filepattern, FILE_EXTENSION) # Parameters for the coming apitracker taskqueue apitracker_params = dict(api_version=fromapi, count=record_count, download=filename, downloader=email, error=None, latlon=latlon, matching_records=record_count, query=q, query_version=query_version, request_source=source, response_records=record_count, res_counts=json.dumps(res_counts), type='download') max_retries = 2 retry_count = 0 success = False while not success and retry_count < max_retries: try: with gcs.open(filename, 'w', content_type='text/tab-separated-values', options={'x-goog-acl': 'public-read'}) as f: f.write(data) success = True # Cleanup data in attempt to conserve memory data = None # Attempt to keep memory usage at a minimum by garbage collecting gc.collect() # logging.info('Sending small res_counts to apitracker: %s' # % res_counts ) taskqueue.add(url='/apitracker', params=apitracker_params, queue_name="apitracker") except Exception, e: logging.error( "Error writing small result set to %s.\nError: %s \n\ Version: %s" % (filename, e, DOWNLOAD_VERSION)) retry_count += 1