def handler(conn): ret = { 'extracted': { 'filecount': 0, 'bytes': 0, }, 'skipped': { 'filecount': 0, 'bytes': 0, 'names': [], } } fileobj = ReadHttpResponse(conn, **kwargs) with tarfile.open(mode='r|*', fileobj=fileobj) as tf: # monkeypatch the TarFile object to allow printing messages and # collecting stats for each extracted file. extractall makes a single # linear pass over the tarfile, which is compatible with # ReadHttpResponse; other naive implementations (such as `getmembers`) # do random access over the file and would require buffering the whole # thing (!!). em = tf._extract_member def _extract_member(tarinfo, targetpath): if not os.path.abspath(targetpath).startswith(args.extract_to): print 'Skipping %s' % (tarinfo.name,) ret['skipped']['filecount'] += 1 ret['skipped']['bytes'] += tarinfo.size ret['skipped']['names'].append(tarinfo.name) return print 'Extracting %s' % (tarinfo.name,) ret['extracted']['filecount'] += 1 ret['extracted']['bytes'] += tarinfo.size return em(tarinfo, targetpath) tf._extract_member = _extract_member tf.extractall(args.extract_to) return ret
def main(args): parsed_url = urlparse.urlparse(args.url) if not parsed_url.scheme.startswith('http'): raise Error('Invalid URI scheme (expected http or https): %s' % args.url) # Force the format specified on command-line. qdict = {} if parsed_url.query: qdict.update(urlparse.parse_qs(parsed_url.query)) f = qdict.get('format') if f: # Load the latest format specification. f = f[-1] else: # Default to JSON. f = 'json' path = parsed_url.path if qdict: path = '%s?%s' % (path, urllib.urlencode(qdict, doseq=True)) if args.attempts < 1: args.attempts = 1 retry_delay_seconds = 2 for i in xrange(args.attempts): conn = CreateHttpConn(parsed_url.netloc, path) if f == 'json': result = ReadHttpJsonResponse(conn) elif f == 'text': # Text fetching will pack the text into structured JSON. result = ReadHttpResponse(conn).read() if result: # Wrap in a structured JSON for export to recipe module. result = { 'value': result, } else: result = None else: raise ValueError('Unnknown format: %s' % (f, )) logging.info('Reading from %s (%d/%d)', conn.req_params['url'], (i + 1), args.attempts) if result is not None or (i + 1) >= args.attempts: if not args.quiet: logging.info('Read from %s (%d/%d): %s', conn.req_params['url'], (i + 1), args.attempts, result) break logging.error("Request returned empty result; sleeping %d seconds", retry_delay_seconds) time.sleep(retry_delay_seconds) retry_delay_seconds *= 2 with open(args.json_file, 'w') as json_file: json.dump(result, json_file)
def handler(conn): result = ReadHttpResponse(conn).read() if not result: return None # Wrap in a structured JSON for export to recipe module. return { 'value': result, }
def handler(conn): # Wrap in a structured JSON for export to recipe module. return { 'value': ReadHttpResponse(conn, **kwargs).read() or None, }