def WriteTempFile(self, data, hash_name=None): if self.use_cache == True: if hash_name is None: hash = md5(self.url ) hash_name = hash.hexdigest() self.last_hash_name = hash_name self.log.debug('write file to cache: ', hash_name) self.log.debug('use mongo: %s' % self.use_mongo) # open(self.download_temp+hash_name, 'wb').write(data) if self.use_mongo == False: f_name = self.download_temp + hash_name + '.gz' f = gzip.open(f_name, 'wb') f.write(data) f.close() if self.use_mongo == True: connection = Connection("localhost", 27017) db = connection['parser'] s = StringIO.StringIO() f = gzip.GzipFile(fileobj=s, mode='wb') f.write(data) f.close() val = s.getvalue() s.close() del (s) del (f) fs = GridFS(db) fp = fs.open(hash_name , 'w', self.download_temp.replace('/', '') ) fp.write(val) fp.close() connection.disconnect()
def GetTempFile(self): """ Metoda pobiera/zapisuje stronke pobierana do cache'u """ data = None if self.use_cache == True: hash = md5(self.url) self.hash_name = hash.hexdigest() self.page_from_cache = False # f_name = self.download_temp + self.hash_name f_name = self.download_temp + self.hash_name + '.gz' if self._devel == True: print 'seek cache: ',f_name, '::', self.url # czy plik lokalny jest gz if os.path.exists(f_name.replace('.gz', '') ): data = open(f_name.replace('.gz', ''), 'rb').read() f = gzip.open(f_name, 'wb') f.write(data) f.close() os.unlink( f_name.replace('.gz', '') ) return data # teraz odczyt pliku gzip if self.read_cache == True: if self.use_mongo == True: try: connection = Connection("localhost", 27017) db = connection['parser'] fs = GridFS(db) fp = fs.open(self.hash_name , 'r', self.download_temp.replace('/', '') ) f = gzip.GzipFile(fileobj=fp, mode='rb') data = f.read() f.close() fp.close() del(f) connection.disconnect() except Exception, e: print 'read cahce error: ', e self.page_from_cache = False return None elif os.path.exists(f_name): f = gzip.open(f_name, 'rb') data = f.read() f.close() else: data = '' if self._devel == True: print '# Found cache: ', self.hash_name self.page_from_cache = True