def goTask(self): for entry in self._urls: assert 'uri' in entry.keys() uri = entry['uri'] if 'xxhash' in entry.keys(): assert type(entry['xxhash']) is str # print("%s has an xxhash value<%s>" % ( uri, entry['xxhash'])) #pass self._taskCount += 1 url = FormatMyUrl(uri) base, _ = getDir(uri) if len(base) > 0: tbase = self.formatSub(base) self.__makeSubDir(tbase) else: pass #print("base is (%s)" % base) print("get <%s> ... " % (url), end="") # sys.stdout.flush() try: url = self._subspace.sub('%20', url) #if there is any space in url, urlopen won't return. url_input = urlopen(url) turi = self.formatSub(uri) chunk = url_input.read() # verify if 'xxhash' in entry.keys(): thatHash = entry['xxhash'] # print("seed %s" %self._seed) thisHash = xxhash(chunk, self._seed) if thatHash == thisHash: print("<%s> OK" % (thisHash), end="") self._verifiCount += 1 else: print("verification failed for <%s> from <%s>" % (thisHash, thatHash), end="") self._verifiFailed += 1 with open(turi, "wb") as fout: fout.write(chunk) #print("saved to %s" % (uri)) #sys.stdout.flush() # Must be flush to stay in updated. self._passedCount += 1 print('.') except HTTPError as e: print('failed') self._failedUrl.append(url)
import sys import os from PyXxhashMod import xxhash if '__main__' == __name__: if len(sys.argv) < 3: print("Need more parameter <path key>") sys.exit(-1) path, key = sys.argv[1], sys.argv[2] with open(path, 'rb') as fin: org = fin.read() hashed = xxhash(org, int(key)) #print(type(hashed)) print(hashed)