class ShaManager: def __init__(self, path='shas.db', app='futil'): self._logger = FutilLogger(app) self.connection = None self.path = path if (not os.path.exists(self.path)): self.createEmptyDB() def createEmptyDB(self): (con, cur) = self.connect() cur.execute("CREATE TABLE shas (uri TEXT, sha VARCHAR(40), PRIMARY KEY(uri, sha) )") con.commit() def insertUriSha(self, uri, sha): try: (con, cur) = self.connect() query = """ INSERT INTO shas(uri, sha) VALUES ('%s','%s') """ % (uri, sha) cur.execute(query) con.commit() except Exception, e: self._logger.error("Inserting in sha database " + str(e)) pass
class IndexAppService(Indexer): def __init__(self, directory, shaManager, app='futil'): self._directory = directory create = not IndexReader.indexExists(self._directory) self._writer = IndexWriter(self._directory, StandardAnalyzer(), create) self.shaBBDD = shaManager self.logger = FutilLogger(app) self.uriLoader = UriLoader(logger=self.logger) self.resetCounter() def resetCounter(self): self.counter = 1000 def countInsertion(self): self.counter -= 1 if self.counter == 0: self.resetCounter() self._writer.optimize() def indexFOAF(self, foaf): document = FoafDocumentFactory.getDocumentFromFOAF(foaf) self._writer.addDocument(document) self.countInsertion() if ( foaf.has_key('sha')): for sha in foaf['sha']: self.shaBBDD.insertUriSha(foaf['uri'][0], sha) if ( foaf.has_key('friends')): for friendSha, friendUri in filter( lambda x: x[0] != '', foaf['friends']): self.shaBBDD.insertUriSha(friendUri, friendSha) return [u for (v,u) in foaf['friends']] return [] def indexFOAFUri(self, foafUri): try: f = self.uriLoader.getFoafFrom(foafUri) return self.indexFOAF(f) except: self.logger.info("Unknow error indexing " + foafUri) return [] def close(self): if self._writer: self._writer.close() self._writer = None self.shaBBDD.close()
class MySQLWrapper(DBWrapper): def __init__(self, app='futil', host='localhost', db='futil', user='******', passwd='futil', table='foafs'): self.data = { 'host':host, 'db':db, 'user':user, 'passwd':passwd, 'table':table} self.connection = None self.log = FutilLogger(app) self.pendingCache = [] def realConnect(self): try: return MySQLdb.connect(host=self.data['host'], db=self.data['db'], user=self.data['user'], passwd=self.data['passwd']) except MySQLdb.Error, e: self.log.error('conecting to db: ' + str(e[1])) sys.exit(-1)
def __init__(self, path="foaf.db"): self.path = path self.connection = None if (not os.path.exists(self.path)): self.createEmptyDB() self.log = FutilLogger() self.pendingCache = []
def __init__(self, directory, shaManager, app='futil'): self._directory = directory create = not IndexReader.indexExists(self._directory) self._writer = IndexWriter(self._directory, StandardAnalyzer(), create) self.shaBBDD = shaManager self.logger = FutilLogger(app) self.uriLoader = UriLoader(logger=self.logger) self.resetCounter()
class PTSW: def __init__(self, app='futil', config='.ptsw'): self.rest = "http://pingthesemanticweb.com/rest/?url=" self.log = FutilLogger(app) self.stats = {'pinged':0, 'sioc':0, 'foaf':0, 'doap':0, 'owl':0, 'rdfs':0, 'rdf':0, 'flerror':0} self.pathStats = config self.loadStats() def ping(self, uri): try: import socket socket.setdefaulttimeout(TIMEOUT) #TODO: # proxy: urllib2.ProxyHandler({}) url = self.rest + urllib.quote(uri) data = {} headers = { 'User-Agent' : futil.__agent__ } request = urllib2.Request(url, data, headers) response = urllib2.urlopen(request).read() responseParsed = self.parseResponse(response) ok = (responseParsed['flerror'] == 0) self.setStats(responseParsed) if ok: self.stats['pinged'] += 1 self.log.info(uri+' pinged') else: self.log.error('error pinging ' + uri + ': ' + responseParsed['message']) return ok except Exception, details: print str(details) self.log.error('problem pinging ' + uri + ': ' + str(details)) return False
def __init__(self, path='shas.db', app='futil'): self._logger = FutilLogger(app) self.connection = None self.path = path if (not os.path.exists(self.path)): self.createEmptyDB()
def __init__(self, app='futil', host='localhost', db='futil', user='******', passwd='futil', table='foafs'): self.data = { 'host':host, 'db':db, 'user':user, 'passwd':passwd, 'table':table} self.connection = None self.log = FutilLogger(app) self.pendingCache = []
class PySQLiteWrapper(DBWrapper): def __init__(self, path="foaf.db"): self.path = path self.connection = None if (not os.path.exists(self.path)): self.createEmptyDB() self.log = FutilLogger() self.pendingCache = [] def createEmptyDB(self): (con, cur) = self.connect() cur.execute("CREATE TABLE foafs (uri TEXT PRIMARY KEY, visited BOOL, date TEXT)") con.commit() def realConnect(self): return sqlite.connect(self.path) def connect(self): if (self.connection==None): self.connection = self.realConnect() return (self.connection, self.connection.cursor()) def query(self, uri): con, cur = self.connect() query = "SELECT uri FROM foafs WHERE uri =?" cur.execute(query, (uri,)) return cur.fetchall() def insert(self, uri, visited=False): if not self.exists(uri): date = self.todayDate() (con, cur) = self.connect() query = """ INSERT INTO foafs(uri, visited, date) VALUES ('%s','%s','%s') """ % (uri, visited, date) try: cur.execute(query) con.commit() return True except: self.log.info('Error inserting: ' + uri) return False else: self.log.info('Error: ' + uri + ' already exists on db') return False def visit(self, uri): if self.exists(uri): date = self.todayDate() (con, cur) = self.connect() query = "UPDATE foafs SET visited='True', date='" + date + "' WHERE uri='" + uri + "'" cur.execute(query) con.commit() return True else: self.log.info('Error: ' + uri + ' not exists on db') return False def exists(self, uri): return (len(self.query(uri)) > 0) def visited(self, uri): con, cur = self.connect() query = "SELECT visited FROM foafs WHERE uri =?" cur.execute(query, (uri,)) result = cur.fetchall() if (len(result)>0): return self.str2bool(result[0]) else: return False def getPending(self): con, cur = self.connect() query = "SELECT uri FROM foafs WHERE visited='False'" cur.execute(query) return cur.fetchall() def getNextPending(self): if (len(self.pendingCache) == 0): pending = self.getPending() pendingSize = len(pending) self.log.info(str(pendingSize) + ' URIs pending to visit') if (pendingSize > CACHE): self.pendingCache = pending[:CACHE] else: self.pendingCache = pending return self.pendingCache.pop()[0] def pending(self): return (len(self.getPending())>0) def str2bool(self, query): if (query[0] == 'True'): return True else: return False def close(self): def alwaysFalse(): return False self.pending = alwaysFalse if (self.connection != None): self.connection.close()
def __init__(self, app='futil', config='.ptsw'): self.rest = "http://pingthesemanticweb.com/rest/?url=" self.log = FutilLogger(app) self.stats = {'pinged':0, 'sioc':0, 'foaf':0, 'doap':0, 'owl':0, 'rdfs':0, 'rdf':0, 'flerror':0} self.pathStats = config self.loadStats()