Ejemplo n.º 1
0
        return (record, (), offset)

    def trim(self, stream):
        return ()

    def get_header_list(self, values):
        return zip(self.headers, values)

    @staticmethod
    def get_content_headers(headers):
        content_type = None
        content_length = None
        errors = []

        for name, value in headers:
            if type_rx.match(name):
                if value:
                    content_type = value
                else:
                    errors.append(('invalid header', name, value))
            elif length_rx.match(name):
                try:
                    content_length = int(value)
                except ValueError:
                    errors.append(('invalid header', name, value))

        return content_type, content_length, errors


register_record_type(re.compile('^filedesc://'), ArcRecord)
Ejemplo n.º 2
0
                        except ValueError:
                            record.error('invalid header', name, value)

            # have read blank line following headers

            record.content_file = stream
            record.content_file.bytes_to_eoc = content_length

            # check mandatory headers
            # WARC-Type WARC-Date WARC-Record-ID Content-Length

            return (record, (), offset)


blank_rx = rx(br'^$')
register_record_type(version_rx, WarcRecord)
register_record_type(blank_rx, WarcRecord)


def make_response(id, date, url, content, request_id):
    # pylint: disable-msg=E1101
    headers = [
        (WarcRecord.TYPE, WarcRecord.RESPONSE),
        (WarcRecord.ID, id),
        (WarcRecord.DATE, date),
        (WarcRecord.URL, url),
    ]
    if request_id:
        headers.append((WarcRecord.CONCURRENT_TO, request_id))

    record = WarcRecord(headers=headers, content=content)
Ejemplo n.º 3
0
                else:
                    #print 'line', line, newlines
                    newlines = 0
                    errors.append(('trailing data after content', line))
                line = stream.readline()
            if newlines > 0:
                errors.append(
                    ('less than two terminating newlines at end of record, missing',
                     newlines))

        return errors


blank_rx = rx(r'^$')
register_record_type(version_rx, WarcRecord)
register_record_type(blank_rx, WarcRecord)


def make_response(id, date, url, content, request_id):
    # pylint: disable-msg=E1101
    headers = [
            (WarcRecord.TYPE, WarcRecord.RESPONSE),
            (WarcRecord.ID, id),
            (WarcRecord.DATE, date),
            (WarcRecord.URL, url),

    ]
    if request_id:
        headers.append((WarcRecord.CONCURRENT_TO, request_id))
Ejemplo n.º 4
0
                values = SPLIT(line, len(self.headers)-1)

        if len(self.headers) != len(values):
            raise StandardError('missing headers %s %s'%(",".join(values), ",".join(self.headers)))
                
        return zip(self.headers, values)


    @staticmethod
    def get_content_headers(headers):
        content_type = None
        content_length = None
        errors = []

        for name, value in headers:
            if type_rx.match(name):
                if value:
                    content_type = value
                else:
                    errors.append(('invalid header', name, value))
            elif length_rx.match(name):
                try:
                    content_length = int(value)
                except ValueError:
                    errors.append(('invalid header', name, value))

        return content_type, content_length, errors


register_record_type(re.compile('^filedesc://'), ArcRecord)