def processdata(engine, dictname, offset, totallen, input, output): logging.info('Enter processdata. offset=%d, totallen=%d, input len=%d, input crc=%d' % (offset, totallen, len(input), zlib.crc32(input))) functionmap = getfuncmap() if not functionmap.has_key(engine): logging.error('Engine %s not found in function map' % (engine,)) return False results = DictData.gql('WHERE dict_name = :1', dictname) if results.count() == 0: dictdata = DictData(dict_name=dictname, alternative=False, ready=False) elif results.count() == 1: if results[0].ready: dictdata = DictData(dict_name=dictname, alternative=not results[0].ready, ready=False) else: dictdata = results[0] else: assert results.count() == 2 try: if results[0].ready: assert not results[1].ready dictdata = results[1] else: dictdata = results[0] except AssertionError: # both are ready, remove the older one if results[0].timestamp < results[1].timestamp: alt = results[0].alternative results[0].delete() else: alt = results[1].alternative results[1].delete() dictdata = DictData(dict_name=dictname, alternative=alt, ready=False) if offset == 0: gz = GzipStreamReader() gz.feed(input) try: gz.read_header() dictdata.zipped = True except IOError: dictdata.zipped = False del gz else: if dictdata.zipped: assert dictdata.zip_data is not None gz = pickle.loads(dictdata.zip_data) gz.feed(input) flush = offset + len(input) >= totallen if dictdata.zipped: if not flush: input = gz.read() else: input = gz.read() + gz.flush() dictdata.zip_data = pickle.dumps(gz) #dictdata is expected to be put in engine's function return functionmap[engine](dictdata, flush, input, output)