コード例 #1
0
ファイル: Requester.py プロジェクト: darkman66/PageGet
    def WriteTempFile(self, data, hash_name=None):
        
        if self.use_cache == True:
            if hash_name is None:
                hash = md5(self.url )
                hash_name = hash.hexdigest()
                self.last_hash_name = hash_name
                
            self.log.debug('write file to cache: ', hash_name)
            self.log.debug('use mongo: %s' % self.use_mongo)
#            open(self.download_temp+hash_name, 'wb').write(data)
            if self.use_mongo == False: 
                f_name = self.download_temp + hash_name + '.gz'
                f = gzip.open(f_name, 'wb')
                f.write(data)
                f.close()

            if self.use_mongo == True:
                connection = Connection("localhost", 27017)
                db = connection['parser']

                s = StringIO.StringIO()
                f = gzip.GzipFile(fileobj=s, mode='wb')
                f.write(data)
                f.close()
                val = s.getvalue()
                s.close()
                del (s)
                del (f)

                fs = GridFS(db)
                fp = fs.open(hash_name , 'w', self.download_temp.replace('/', '') )
                fp.write(val)
                fp.close()
                connection.disconnect()
コード例 #2
0
ファイル: Requester.py プロジェクト: darkman66/PageGet
    def GetTempFile(self):
        """
        Metoda pobiera/zapisuje stronke pobierana do cache'u
        """
        data = None
        if self.use_cache == True:
            hash = md5(self.url)
            self.hash_name = hash.hexdigest()
            self.page_from_cache = False
            
#            f_name = self.download_temp + self.hash_name
            f_name = self.download_temp + self.hash_name + '.gz'
            if self._devel == True:
                print 'seek cache: ',f_name, '::', self.url
                
            # czy plik lokalny jest gz
            if os.path.exists(f_name.replace('.gz', '') ):
                data = open(f_name.replace('.gz', ''), 'rb').read()
                f = gzip.open(f_name, 'wb')
                f.write(data)
                f.close()
                os.unlink( f_name.replace('.gz', '') )
                return data
            
            # teraz odczyt pliku gzip
            if self.read_cache == True:
                if self.use_mongo == True:
                    try:
                        connection = Connection("localhost", 27017)
                        db = connection['parser']

                        fs = GridFS(db)
                        fp = fs.open(self.hash_name , 'r', self.download_temp.replace('/', '') )
                        f = gzip.GzipFile(fileobj=fp, mode='rb')
                        data = f.read()
                        f.close()
                        fp.close()
                        del(f)
                        connection.disconnect()

                    except Exception, e:
                        print 'read cahce error: ', e
                        self.page_from_cache = False
                        return None

                elif os.path.exists(f_name):
                        f = gzip.open(f_name, 'rb')
                        data = f.read()
                        f.close()
            else:
                data = ''
                    
            if self._devel == True:
                print '# Found cache: ', self.hash_name
            self.page_from_cache = True