return (record, (), offset) def trim(self, stream): return () def get_header_list(self, values): return zip(self.headers, values) @staticmethod def get_content_headers(headers): content_type = None content_length = None errors = [] for name, value in headers: if type_rx.match(name): if value: content_type = value else: errors.append(('invalid header', name, value)) elif length_rx.match(name): try: content_length = int(value) except ValueError: errors.append(('invalid header', name, value)) return content_type, content_length, errors register_record_type(re.compile('^filedesc://'), ArcRecord)
except ValueError: record.error('invalid header', name, value) # have read blank line following headers record.content_file = stream record.content_file.bytes_to_eoc = content_length # check mandatory headers # WARC-Type WARC-Date WARC-Record-ID Content-Length return (record, (), offset) blank_rx = rx(br'^$') register_record_type(version_rx, WarcRecord) register_record_type(blank_rx, WarcRecord) def make_response(id, date, url, content, request_id): # pylint: disable-msg=E1101 headers = [ (WarcRecord.TYPE, WarcRecord.RESPONSE), (WarcRecord.ID, id), (WarcRecord.DATE, date), (WarcRecord.URL, url), ] if request_id: headers.append((WarcRecord.CONCURRENT_TO, request_id)) record = WarcRecord(headers=headers, content=content)
else: #print 'line', line, newlines newlines = 0 errors.append(('trailing data after content', line)) line = stream.readline() if newlines > 0: errors.append( ('less than two terminating newlines at end of record, missing', newlines)) return errors blank_rx = rx(r'^$') register_record_type(version_rx, WarcRecord) register_record_type(blank_rx, WarcRecord) def make_response(id, date, url, content, request_id): # pylint: disable-msg=E1101 headers = [ (WarcRecord.TYPE, WarcRecord.RESPONSE), (WarcRecord.ID, id), (WarcRecord.DATE, date), (WarcRecord.URL, url), ] if request_id: headers.append((WarcRecord.CONCURRENT_TO, request_id))
values = SPLIT(line, len(self.headers)-1) if len(self.headers) != len(values): raise StandardError('missing headers %s %s'%(",".join(values), ",".join(self.headers))) return zip(self.headers, values) @staticmethod def get_content_headers(headers): content_type = None content_length = None errors = [] for name, value in headers: if type_rx.match(name): if value: content_type = value else: errors.append(('invalid header', name, value)) elif length_rx.match(name): try: content_length = int(value) except ValueError: errors.append(('invalid header', name, value)) return content_type, content_length, errors register_record_type(re.compile('^filedesc://'), ArcRecord)