コード例 #1
0
ファイル: utils.py プロジェクト: yingziwu/perma
def make_detailed_warcinfo(filename, guid, coll_title, coll_desc, rec_title, pages):
    # #
    # Thank you! Rhizome/Webrecorder.io/Ilya Kreymer
    # #

    coll_metadata = {'type': 'collection',
                     'title': coll_title,
                     'desc': coll_desc}

    rec_metadata = {'type': 'recording',
                    'title': rec_title,
                    'pages': pages}

    # Coll info
    writer = BufferWARCWriter(gzip=True)
    params = OrderedDict([('operator', 'Perma.cc download'),
                          ('Perma-GUID', guid),
                          ('format', 'WARC File Format 1.0'),
                          ('json-metadata', json.dumps(coll_metadata))])

    record = writer.create_warcinfo_record(filename, params)
    writer.write_record(record)

    # Rec Info
    params['json-metadata'] = json.dumps(rec_metadata)

    record = writer.create_warcinfo_record(filename, params)
    writer.write_record(record)

    return writer.get_contents()
コード例 #2
0
    def create_warcinfo(self, creator, title, metadata, source, filename):
        for name, value in iteritems(source):
            if name in self.COPY_FIELDS:
                metadata[name] = value

        info = OrderedDict([
                ('software', 'Webrecorder Platform v' + __version__),
                ('format', 'WARC File Format 1.0'),
                ('creator', creator),
                ('isPartOf', title),
                ('json-metadata', json.dumps(metadata)),
               ])

        wi_writer = BufferWARCWriter()
        wi_writer.write_record(wi_writer.create_warcinfo_record(filename, info))
        return wi_writer.get_contents()
コード例 #3
0
    def create_warcinfo(self, creator, name, metadata, source, serialized, filename):
        for key, value in iteritems(serialized):
            if key in self.COPY_FIELDS:
                metadata[key] = value

        if not metadata.get('title'):
            metadata['title'] = self.DEFAULT_REC_TITLE.format(source.to_iso_date(metadata['created_at'], no_T=True))
            metadata['auto_title'] = True

        info = OrderedDict([
                ('software', 'Webrecorder Platform v' + __version__),
                ('format', 'WARC File Format 1.0'),
                ('creator', creator.name),
                ('isPartOf', name),
                ('json-metadata', json.dumps(metadata)),
               ])

        wi_writer = BufferWARCWriter()
        wi_writer.write_record(wi_writer.create_warcinfo_record(filename, info))
        return wi_writer.get_contents()
コード例 #4
0
    def create_warcinfo(self, creator, name, metadata, source, serialized,
                        filename):
        for key, value in iteritems(serialized):
            if key in self.COPY_FIELDS:
                metadata[key] = value

        if not metadata.get('title'):
            metadata['title'] = self.DEFAULT_REC_TITLE.format(
                source.to_iso_date(metadata['created_at'], no_T=True))
            metadata['auto_title'] = True

        info = OrderedDict([
            ('software', 'Webrecorder Platform v' + __version__),
            ('format', 'WARC File Format 1.0'),
            ('creator', creator.name),
            ('isPartOf', name),
            ('json-metadata', json.dumps(metadata)),
        ])

        wi_writer = BufferWARCWriter()
        wi_writer.write_record(wi_writer.create_warcinfo_record(
            filename, info))
        return wi_writer.get_contents()