class ShaManager:

    def __init__(self, path='shas.db', app='futil'):
        self._logger = FutilLogger(app)
        self.connection = None
        self.path = path
        if (not os.path.exists(self.path)):
            self.createEmptyDB()

    def createEmptyDB(self):
        (con, cur) = self.connect()
        cur.execute("CREATE TABLE shas (uri TEXT, sha VARCHAR(40), PRIMARY KEY(uri, sha) )")
        con.commit()

    def insertUriSha(self, uri, sha):
        try:
            (con, cur) = self.connect()
            query = """
                    INSERT INTO shas(uri, sha)
                    VALUES ('%s','%s')
                """ % (uri, sha)
            cur.execute(query)
            con.commit()
        except Exception, e:
            self._logger.error("Inserting in sha database " + str(e))
            pass
class IndexAppService(Indexer):

    def __init__(self, directory, shaManager, app='futil'):
        self._directory = directory
        create = not IndexReader.indexExists(self._directory)
        self._writer = IndexWriter(self._directory, StandardAnalyzer(), create)
        self.shaBBDD = shaManager
        self.logger = FutilLogger(app)
        self.uriLoader = UriLoader(logger=self.logger)
        self.resetCounter()

    def resetCounter(self):
        self.counter = 1000

    def countInsertion(self):
        self.counter -= 1
        if self.counter == 0:
            self.resetCounter()
            self._writer.optimize()


    def indexFOAF(self, foaf):
        document = FoafDocumentFactory.getDocumentFromFOAF(foaf)
        self._writer.addDocument(document)
        self.countInsertion()
        if ( foaf.has_key('sha')):
            for sha in foaf['sha']:
                self.shaBBDD.insertUriSha(foaf['uri'][0], sha)

        if ( foaf.has_key('friends')):
            for friendSha, friendUri in filter( lambda x: x[0] != '', foaf['friends']):
                self.shaBBDD.insertUriSha(friendUri, friendSha)
            return [u for (v,u) in foaf['friends']]
        return []

    def indexFOAFUri(self, foafUri):
        try:
            f = self.uriLoader.getFoafFrom(foafUri)
            return self.indexFOAF(f)
        except:
            self.logger.info("Unknow error indexing " + foafUri)
            return []


    def close(self):
        if self._writer:
            self._writer.close()
        self._writer = None
        self.shaBBDD.close()
class MySQLWrapper(DBWrapper):

    def __init__(self, app='futil', host='localhost', db='futil', user='******', passwd='futil', table='foafs'):
        self.data = { 'host':host, 'db':db, 'user':user, 'passwd':passwd, 'table':table}
        self.connection = None
        self.log = FutilLogger(app)
        self.pendingCache = []

    def realConnect(self):
        try:
            return MySQLdb.connect(host=self.data['host'], db=self.data['db'],
                                   user=self.data['user'], passwd=self.data['passwd']) 
        except MySQLdb.Error, e:
            self.log.error('conecting to db: ' + str(e[1]))
            sys.exit(-1)
 def __init__(self, path="foaf.db"):
     self.path = path
     self.connection = None
     if (not os.path.exists(self.path)):
         self.createEmptyDB()
     self.log = FutilLogger()
     self.pendingCache = []
 def __init__(self, directory, shaManager, app='futil'):
     self._directory = directory
     create = not IndexReader.indexExists(self._directory)
     self._writer = IndexWriter(self._directory, StandardAnalyzer(), create)
     self.shaBBDD = shaManager
     self.logger = FutilLogger(app)
     self.uriLoader = UriLoader(logger=self.logger)
     self.resetCounter()
Exemple #6
0
class PTSW:
    
    def __init__(self, app='futil', config='.ptsw'):
        self.rest = "http://pingthesemanticweb.com/rest/?url="
        self.log = FutilLogger(app)
        self.stats = {'pinged':0, 'sioc':0, 'foaf':0, 'doap':0, 'owl':0, 'rdfs':0, 'rdf':0, 'flerror':0}
        self.pathStats = config
        self.loadStats()

    def ping(self, uri):
        try:
            import socket
            socket.setdefaulttimeout(TIMEOUT)
            
            #TODO: 
            # proxy: urllib2.ProxyHandler({})
            
            url = self.rest + urllib.quote(uri)
            data = {}
            headers = { 'User-Agent' : futil.__agent__ }
            request = urllib2.Request(url, data, headers)
            response = urllib2.urlopen(request).read()
            responseParsed = self.parseResponse(response)
            
            ok = (responseParsed['flerror'] == 0)
            self.setStats(responseParsed)
            if ok:
                self.stats['pinged'] += 1
                self.log.info(uri+' pinged')
            else:
                self.log.error('error pinging ' + uri + ': ' + responseParsed['message'])
            return ok
        except Exception, details:
            print str(details)
            self.log.error('problem pinging ' + uri + ': ' + str(details))
            return False
 def __init__(self, path='shas.db', app='futil'):
     self._logger = FutilLogger(app)
     self.connection = None
     self.path = path
     if (not os.path.exists(self.path)):
         self.createEmptyDB()
 def __init__(self, app='futil', host='localhost', db='futil', user='******', passwd='futil', table='foafs'):
     self.data = { 'host':host, 'db':db, 'user':user, 'passwd':passwd, 'table':table}
     self.connection = None
     self.log = FutilLogger(app)
     self.pendingCache = []
class PySQLiteWrapper(DBWrapper):

    def __init__(self, path="foaf.db"):
        self.path = path
        self.connection = None
        if (not os.path.exists(self.path)):
            self.createEmptyDB()
        self.log = FutilLogger()
        self.pendingCache = []
            
    def createEmptyDB(self):
        (con, cur) = self.connect()
        cur.execute("CREATE TABLE foafs (uri TEXT PRIMARY KEY, visited BOOL, date TEXT)")
        con.commit()

    def realConnect(self):
        return sqlite.connect(self.path)
    
    def connect(self):
        if (self.connection==None):
            self.connection = self.realConnect()
        return (self.connection, self.connection.cursor())

    def query(self, uri):
        con, cur = self.connect()
        query = "SELECT uri FROM foafs WHERE uri =?"
        cur.execute(query, (uri,))
        return cur.fetchall()

    def insert(self, uri, visited=False):
        if not self.exists(uri):
            date = self.todayDate()
            (con, cur) = self.connect()
            query = """
                        INSERT INTO foafs(uri, visited, date)
                        VALUES ('%s','%s','%s')
                    """ % (uri, visited, date)
            try:
                cur.execute(query)
                con.commit()
                return True
            except:
                self.log.info('Error inserting: ' + uri)
                return False
        else:
            self.log.info('Error: ' + uri + ' already exists on db')
            return False
            
    def visit(self, uri):
        if self.exists(uri):
            date = self.todayDate()
            (con, cur) = self.connect()
            query = "UPDATE foafs SET visited='True', date='" + date + "' WHERE uri='" + uri + "'"
            cur.execute(query)
            con.commit()
            return True
        else:
            self.log.info('Error: ' + uri + ' not exists on db')
            return False

    def exists(self, uri):
        return (len(self.query(uri)) > 0)
    
    def visited(self, uri):
        con, cur = self.connect()
        query = "SELECT visited FROM foafs WHERE uri =?"
        cur.execute(query, (uri,))
        result = cur.fetchall()
        if (len(result)>0):
            return self.str2bool(result[0])
        else:
            return False
        
    def getPending(self):
        con, cur = self.connect()
        query = "SELECT uri FROM foafs WHERE visited='False'"
        cur.execute(query)
        return cur.fetchall()
    
    def getNextPending(self):
        if (len(self.pendingCache) == 0):
            pending = self.getPending()
            pendingSize = len(pending)
            self.log.info(str(pendingSize) + ' URIs pending to visit')
            if (pendingSize > CACHE):
                self.pendingCache = pending[:CACHE]
            else:
                self.pendingCache = pending
                
        return self.pendingCache.pop()[0]
    
    def pending(self):
        return (len(self.getPending())>0)
    
    def str2bool(self, query):
        if (query[0] == 'True'):
            return True
        else:
            return False
        
    def close(self):
        
        def alwaysFalse():
            return False
        self.pending = alwaysFalse
        
        if (self.connection != None):
            self.connection.close()
Exemple #10
0
 def __init__(self, app='futil', config='.ptsw'):
     self.rest = "http://pingthesemanticweb.com/rest/?url="
     self.log = FutilLogger(app)
     self.stats = {'pinged':0, 'sioc':0, 'foaf':0, 'doap':0, 'owl':0, 'rdfs':0, 'rdf':0, 'flerror':0}
     self.pathStats = config
     self.loadStats()