Exemple #1
0
def addToWarc(w, uri, data, f, mime):
    o  = urlparse.urlparse(uri)
    ip = socket.gethostbyname(o.hostname)

    r = WRecord()
    r.setRecordType(warc.WARC_RESOURCE_RECORD)
    r.setTargetUri(uri, len(uri)) 

    #warc-tools can't handle the updated date in the format '2009-04-07T05:12:50+02:00'   
    #r.setDate(str(f.feed.updated), len(str(f.feed.updated)))
    t       = f.feed.updated_parsed
    dt      = datetime.datetime(t.tm_year, t.tm_mon, t.tm_mday, t.tm_hour, t.tm_min, t.tm_sec)
    updated = dt.strftime("%Y-%m-%dT%H:%M:%SZ")
    r.setDate(updated, len(updated))
    
    r.setContentType(mime, len(mime))
    r.setRecordId(str(f.feed.id), len(str(f.feed.id)))
    r.setIpAddress(ip, len(ip))
    r.setContentFromString(data, len(data))
    
    w.storeRecord(r)
    r.destroy()