def get(self, resource): blob_key = str(urllib.unquote(resource)) blob_reader = get_reader(blob_key) zip_reader = zipfile.ZipFile(blob_reader) logging.info('contents: %s', zip_reader.namelist()) urls = {} for lno, mno, _id, text in all_matching_files(zip_reader, 'url.out', url_file_pattern): urls[_id] = text if lno < 3: logging.info(' match %d (line %d): %s: %s', mno, lno, _id, text) logging.info('url.out: %d ids', len(urls.keys())) self.redirect('/blobs2')
def get(self, dataset_name, file_id): def cleanup(text): return text.replace('\\n', ' ') for blob_info, blob_reader in get_all_blob_info(): if blob_info.filename == dataset_name: zip_reader = zipfile.ZipFile(blob_reader) for member in zip_reader.namelist(): for lno, mno, _id, text in all_matching_files(zip_reader, member, text_file_pattern): if file_id == _id: self.response.out.write(cleanup(text)) return message = 'ID %s not found' % file_id self.response.out.write('<html><body><p>%s</p></body></html>' % message) return message = 'Blob %s not found' % dataset_name self.response.out.write('<html><body><p>%s</p></body></html>' % message) return