Ejemplo n.º 1
0
    def post(self):
        q, email, name, latlon = map(self.request.get, ['q', 'email', 'name', 'latlon'])
        q = json.loads(q)
        requesttime = self.request.get('requesttime')
        filepattern = self.request.get('filepattern')
        fileindex = int(self.request.get('fileindex'))
        reccount = int(self.request.get('reccount'))
        fromapi=self.request.get('fromapi')
        source=self.request.get('source')
        filename = '/%s/%s-%s.%s' % (TEMP_BUCKET, filepattern, fileindex, FILE_EXTENSION)
        cursor = self.request.get('cursor')

        try:
            total_res_counts = json.loads(self.request.get('res_counts'))
        except:
            total_res_counts = {}

        if cursor:
            curs = search.Cursor(web_safe_string=cursor)
        else:
            curs = None

        # Write single chunk to file, GCS does not support append
        records, next_cursor, count, query_version = \
            vnsearch.query(q, SEARCH_CHUNK_SIZE, curs=curs)
        this_record_count = len(records)
        # Build dict for search counts
        res_counts = vnutil.search_resource_counts(records, total_res_counts)

        # Now merge the two dictionaries, summing counts
        if total_res_counts is None or len(total_res_counts)==0:
            total_res_counts=res_counts
        else:
            for r in res_counts:
                try:
                    count = total_res_counts[r]
                    total_res_counts[r]=count+res_counts[r]
                except:
                    total_res_counts[r]=res_counts[r]

        # Update the total number of records retrieved
        reccount = reccount+this_record_count

        # Make a chunk to write to a file
        chunk = '%s\n' % _get_tsv_chunk(records)
        # Cleanup records in attempt to conserve memory
        records = None
        # Attempt to keep memory usage at a minimum by garbage collecting
        gc.collect()
        
        if fileindex==0 and not next_cursor:
            # This is a query with fewer than SEARCH_CHUNK_SIZE results
            filename = '/%s/%s.%s' % (TEMP_BUCKET, filepattern, FILE_EXTENSION)

        max_retries = 2
        retry_count = 0
        success = False
        while not success and retry_count < max_retries:
            try:
                with gcs.open(filename, 'w', content_type='text/tab-separated-values',
                             options={'x-goog-acl': 'public-read'}) as f:
                    if fileindex==0:
                        f.write('%s\n' % vnutil.download_header())
                    f.write(chunk)
                    success = True
                    # Cleanup chunk in attempt to conserve memory
                    chunk = None
                    # Attempt to keep memory usage at a minimum by garbage collecting
                    gc.collect()
#                    logging.info('Download chunk saved to %s: Total %s records. Has next \
#cursor: %s \nVersion: %s' 
#                        % (filename, reccount, not next_cursor is None, DOWNLOAD_VERSION))
            except Exception, e:
                logging.error("Error writing chunk to FILE: %s for\nQUERY: %s \
Error: %s\nVersion: %s" % (filename, q, e, DOWNLOAD_VERSION) )
                retry_count += 1
Ejemplo n.º 2
0
    def post(self):
        q, email, name, latlon = map(self.request.get,
                                     ['q', 'email', 'name', 'latlon'])
        q = json.loads(q)
        requesttime = self.request.get('requesttime')
        filepattern = self.request.get('filepattern')
        fileindex = int(self.request.get('fileindex'))
        reccount = int(self.request.get('reccount'))
        fromapi = self.request.get('fromapi')
        source = self.request.get('source')
        filename = '/%s/%s-%s.%s' % (TEMP_BUCKET, filepattern, fileindex,
                                     FILE_EXTENSION)
        cursor = self.request.get('cursor')

        try:
            total_res_counts = json.loads(self.request.get('res_counts'))
        except:
            total_res_counts = {}

        if cursor:
            curs = search.Cursor(web_safe_string=cursor)
        else:
            curs = None

        # Write single chunk to file, GCS does not support append
        records, next_cursor, count, query_version = \
            vnsearch.query(q, SEARCH_CHUNK_SIZE, curs=curs)
        this_record_count = len(records)
        # Build dict for search counts
        res_counts = vnutil.search_resource_counts(records, total_res_counts)

        # Now merge the two dictionaries, summing counts
        if total_res_counts is None or len(total_res_counts) == 0:
            total_res_counts = res_counts
        else:
            for r in res_counts:
                try:
                    count = total_res_counts[r]
                    total_res_counts[r] = count + res_counts[r]
                except:
                    total_res_counts[r] = res_counts[r]

        # Update the total number of records retrieved
        reccount = reccount + this_record_count

        # Make a chunk to write to a file
        chunk = '%s\n' % _get_tsv_chunk(records)
        # Cleanup records in attempt to conserve memory
        records = None
        # Attempt to keep memory usage at a minimum by garbage collecting
        gc.collect()

        if fileindex == 0 and not next_cursor:
            # This is a query with fewer than SEARCH_CHUNK_SIZE results
            filename = '/%s/%s.%s' % (TEMP_BUCKET, filepattern, FILE_EXTENSION)

        max_retries = 2
        retry_count = 0
        success = False
        while not success and retry_count < max_retries:
            try:
                with gcs.open(filename,
                              'w',
                              content_type='text/tab-separated-values',
                              options={'x-goog-acl': 'public-read'}) as f:
                    if fileindex == 0:
                        f.write('%s\n' % vnutil.download_header())
                    f.write(chunk)
                    success = True
                    # Cleanup chunk in attempt to conserve memory
                    chunk = None
                    # Attempt to keep memory usage at a minimum by garbage collecting
                    gc.collect()
#                    logging.info('Download chunk saved to %s: Total %s records. Has next \
#cursor: %s \nVersion: %s'
#                        % (filename, reccount, not next_cursor is None, DOWNLOAD_VERSION))
            except Exception, e:
                logging.error("Error writing chunk to FILE: %s for\nQUERY: %s \
Error: %s\nVersion: %s" % (filename, q, e, DOWNLOAD_VERSION))
                retry_count += 1
Ejemplo n.º 3
0
    def get(self):
        count, keywords, email, name = map(self.request.get, 
            ['count', 'keywords', 'email', 'name'])
        q = ' '.join(json.loads(keywords))
        latlon = self.request.headers.get('X-AppEngine-CityLatLong')
        fromapi = self.request.get('api')
        countonly = self.request.get('countonly')
        # Force count to be an integer
        # count is a limit on the number of records to download
        count=int(str(count))

        source='DownloadPortal'
        if fromapi is not None and len(fromapi)>0:
            source='DownloadAPI'
            # Try to send an indicator to the browser if it came from one.
            body = ''
            if countonly is not None and len(countonly)>0:
                body = 'Counting results:<br>'
                source = 'CountAPI'
            else:
                body = 'Downloading results:<br>'
            if email is None or len(email)==0 or email=='None':
                body += 'ERROR: You must provide an email address.'
            else:
                body += 'File name: %s<br>' % name
                body += 'Email: %s<br>' % email
                body += 'Keywords: %s<br>' % keywords
                body += 'X-AppEngine-CityLatLong: %s<br>' % latlon
                body += 'Source: %s<br>' % source
                body += 'API: %s<br>' % fromapi
                body += 'len(API): %s<br>' % len(fromapi)
                body += 'Request headers: %s<br>' % self.request.headers
            
            self.response.out.write(body)
            logging.info('API download request. API: %s Source: %s Count: %s \
Keywords: %s Email: %s Name: %s LatLon: %s\nVersion: %s' 
                % (fromapi, source, count, keywords, email, name, latlon, 
                DOWNLOAD_VERSION) )
            if email is None or len(email)==0:
                return
        else:
            logging.info('Portal download request. API: %s Source: %s Count: %s \
Keywords: %s Email: %s Name: %s LatLon: %s\nVersion: %s' 
                % (fromapi, source, count, keywords, email, name, latlon, 
                DOWNLOAD_VERSION) )

        if count==0 or count > SEARCH_CHUNK_SIZE:
            # The results are larger than SEARCH_CHUNK_SIZE, compose a file for download
            self._queue(q, email, name, latlon, fromapi, source, countonly)
        else:
            # The results are smaller than SEARCH_CHUNK_SIZE, download directly and make
            # a copy of the file in the download bucket
            filename = str('%s.txt' % name)
            self.response.headers['Content-Type'] = "text/tab-separated-values"
            self.response.headers['Content-Disposition'] = "attachment; filename=%s" \
                % filename
            records, cursor, count, query_version = vnsearch.query(q, count)
            record_count = len(records)
            # logging.debug('%s: RECORDS: %s' % (DOWNLOAD_VERSION, records))

            # Build dictionary for search counts
            res_counts = vnutil.search_resource_counts(records)

            # Write the header for the output file 
            data = '%s\n%s' % (vnutil.download_header(), _get_tsv_chunk(records))
            # Cleanup records in attempt to conserve memory
            records = None
            # Attempt to keep memory usage at a minimum by garbage collecting
            gc.collect()
            # Write the data
            self.response.out.write(data)

            # Write single chunk to file in DOWNLOAD_BUCKET
            filepattern = '%s-%s' % (name, uuid.uuid4().hex)
            filename = '/%s/%s.%s' % (DOWNLOAD_BUCKET, filepattern, 
                FILE_EXTENSION)

            # Parameters for the coming apitracker taskqueue
            apitracker_params = dict(
                api_version=fromapi, count=record_count, download=filename, 
                downloader=email, error=None, latlon=latlon, 
                matching_records=record_count, query=q, query_version=query_version, 
                request_source=source, response_records=record_count, 
                res_counts=json.dumps(res_counts), type='download')

            max_retries = 2
            retry_count = 0
            success = False
            while not success and retry_count < max_retries:
                try:
                    with gcs.open(filename, 'w', content_type='text/tab-separated-values',
                            options={'x-goog-acl': 'public-read'}) as f:
                        f.write(data)
                        success = True
                        # Cleanup data in attempt to conserve memory
                        data = None
                        # Attempt to keep memory usage at a minimum by garbage collecting
                        gc.collect()
#                        logging.info('Sending small res_counts to apitracker: %s' 
#                            % res_counts ) 
                        taskqueue.add(url='/apitracker', params=apitracker_params, 
                            queue_name="apitracker") 
                except Exception, e:
                    logging.error("Error writing small result set to %s.\nError: %s \n\
Version: %s" % (filename,e,DOWNLOAD_VERSION) )
                    retry_count += 1
Ejemplo n.º 4
0
    def get(self):
        count, keywords, email, name = map(
            self.request.get, ['count', 'keywords', 'email', 'name'])

        q = ' '.join(json.loads(keywords))
        latlon = self.request.headers.get('X-AppEngine-CityLatLong')
        fromapi = self.request.get('api')
        countonly = self.request.get('countonly')
        # Force count to be an integer
        # count is a limit on the number of records to download
        count = int(str(count))

        source = 'DownloadPortal'
        if fromapi is not None and len(fromapi) > 0:
            source = 'DownloadAPI'
            # Try to send an indicator to the browser if it came from one.
            body = ''
            if countonly is not None and len(countonly) > 0:
                body = 'Counting results:<br>'
                source = 'CountAPI'
            else:
                body = 'Downloading results:<br>'
            if email is None or len(email) == 0 or email == 'None':
                body += 'ERROR: You must provide an email address.'
            else:
                body += 'File name: %s<br>' % name
                body += 'Email: %s<br>' % email
                body += 'Keywords: %s<br>' % keywords
                body += 'X-AppEngine-CityLatLong: %s<br>' % latlon
                body += 'Source: %s<br>' % source
                body += 'API: %s<br>' % fromapi
                body += 'len(API): %s<br>' % len(fromapi)
                body += 'Request headers: %s<br>' % self.request.headers

            self.response.out.write(body)
            logging.info(
                'API download request. API: %s Source: %s Count: %s Keywords: %s Email: %s Name: %s LatLon: %s\nVersion: %s'
                % (fromapi, source, count, keywords, email, name, latlon,
                   DOWNLOAD_VERSION))
            if email is None or len(email) == 0 or email == '*****@*****.**':
                logging.info(
                    'Ignoring download request from email: %s. Version: %s' %
                    (email, DOWNLOAD_VERSION))
                return
        else:
            logging.info(
                'Portal download request. API: %s Source: %s Count: %s Keywords: %s Email: %s Name: %s LatLon: %s\nVersion: %s'
                % (fromapi, source, count, keywords, email, name, latlon,
                   DOWNLOAD_VERSION))

        if count == 0 or count > SEARCH_CHUNK_SIZE:
            # The results are larger than SEARCH_CHUNK_SIZE, compose a file for download
            self._queue(q, email, name, latlon, fromapi, source, countonly)
        else:
            # The results are smaller than SEARCH_CHUNK_SIZE, download directly and make
            # a copy of the file in the download bucket
            filename = str('%s.txt' % name)
            self.response.headers['Content-Type'] = "text/tab-separated-values"
            self.response.headers['Content-Disposition'] = "attachment; filename=%s" \
                % filename
            records, cursor, count, query_version = vnsearch.query(q, count)
            record_count = len(records)
            # logging.debug('%s: RECORDS: %s' % (DOWNLOAD_VERSION, records))

            # Build dictionary for search counts
            res_counts = vnutil.search_resource_counts(records)

            # Write the header for the output file
            data = '%s\n%s' % (vnutil.download_header(),
                               _get_tsv_chunk(records))
            # Cleanup records in attempt to conserve memory
            records = None
            # Attempt to keep memory usage at a minimum by garbage collecting
            gc.collect()
            # Write the data
            self.response.out.write(data)

            # Write single chunk to file in DOWNLOAD_BUCKET
            filepattern = '%s-%s' % (name, uuid.uuid4().hex)
            filename = '/%s/%s.%s' % (DOWNLOAD_BUCKET, filepattern,
                                      FILE_EXTENSION)

            # Parameters for the coming apitracker taskqueue
            apitracker_params = dict(api_version=fromapi,
                                     count=record_count,
                                     download=filename,
                                     downloader=email,
                                     error=None,
                                     latlon=latlon,
                                     matching_records=record_count,
                                     query=q,
                                     query_version=query_version,
                                     request_source=source,
                                     response_records=record_count,
                                     res_counts=json.dumps(res_counts),
                                     type='download')

            max_retries = 2
            retry_count = 0
            success = False
            while not success and retry_count < max_retries:
                try:
                    with gcs.open(filename,
                                  'w',
                                  content_type='text/tab-separated-values',
                                  options={'x-goog-acl': 'public-read'}) as f:
                        f.write(data)
                        success = True
                        # Cleanup data in attempt to conserve memory
                        data = None
                        # Attempt to keep memory usage at a minimum by garbage collecting
                        gc.collect()
                        #                        logging.info('Sending small res_counts to apitracker: %s'
                        #                            % res_counts )
                        taskqueue.add(url='/apitracker',
                                      params=apitracker_params,
                                      queue_name="apitracker")
                except Exception, e:
                    logging.error(
                        "Error writing small result set to %s.\nError: %s \n\
Version: %s" % (filename, e, DOWNLOAD_VERSION))
                    retry_count += 1