def main(): #arguments parser parser = argparse.ArgumentParser(prog='pushCwe', epilog="Example: ./%(prog)s.py") parser.add_argument('-c', '--couchdburi', action='store', type=str, dest='couchdb',default="http://127.0.0.1:5984", help='Couchdb URL (default http://127.0.0.1:5984)') #arguments put in variables args = parser.parse_args() #default value from ENV COUCHDB couchdb = os.environ.get('COUCHDB') #Else from argument if not couchdb: couchdb = args.couchdb __serv = Server(uri = couchdb) # reports = os.path.join(os.getcwd(), "views", "reports") workspace = __serv.get_or_create_db("cwe") # designer.push(reports, workspace, atomic = False) with open('data/cwe.csv', 'r') as csvfile: cwereader = csv.reader(csvfile, delimiter=',') header = cwereader.next() for cwe in cwereader: cwe_doc = dict(zip(header, cwe)) workspace.save_doc(cwe_doc)
class CouchClient(object): def __init__(self,config): try: # First figure out which document template we are using here. if config.get_param('template') not in ['pdst','tslp','random']: raise Exception('Template parameter not recognized. Try: pdst tslp random') self.server = Server(uri='http://' + \ config.get_param('server') + \ ':' + config.get_param('port')) self.db = self.server.get_or_create_db(config.get_param('db')) self. docu_class = None if config.get_param('template') == 'tslp': self.docu_class = TSLPDocument if config.get_param('template') == 'pdst': self.docu_class = PDSTDocument elif config.get_param('template') == 'random': self.docu_class = DemoDocument self.docu_class.set_db(self.db) # OK, if that all worked, consider the object initialized wrt couch, # and store the other params we need to iterate. self.maxcounts = int(config.get_param('total')) self.increment = int(config.get_param('increment')) self.verbose = config.get_param('verbose') except Exception, msg: raise Exception(msg)
class CouchDBServer(object): def __init__(self): self.__get_server_uri() self.__authenticate() self.__connect() def __get_server_uri(self): couchdb_port = config.couchdb.port if config.couchdb.protocol == 'http' else config.couchdb.ssl_port self.__couchdb_uri = "%s://%s:%s" % (config.couchdb.protocol, config.couchdb.host, couchdb_port) def __authenticate(self): user, passwd = config.couchdb.user, config.couchdb.password if all((user, passwd)): auth = restkit.BasicAuth(user, passwd) self.__auth_resource = CouchdbResource(filters=[auth]) else: self.__auth_resource = None def __connect(self): self.__server = Server(uri=self.__couchdb_uri, resource_instance=self.__auth_resource) def list_workspaces(self): return filter(is_usable_workspace, self.__server.all_dbs()) def get_workspace_handler(self, ws_name): return self.__server.get_db(ws_name) def get_or_create_db(self, ws_name): return self.__server.get_or_create_db(ws_name)
def __init__(self, databases): """ initialize couchdbkit handler with COUCHDB_DATABASES settings """ self.__dict__ = self.__shared_state__ # create databases sessions for app_name, uri in databases: try: if isinstance(uri, tuple): # case when you want to specify server uri # and database name specifically. usefull # when you proxy couchdb on some path server_uri, dbname = uri else: server_uri, dbname = uri.rsplit("/", 1) except ValueError: raise ValueError("couchdb uri [%s:%s] invalid" % ( app_name, uri)) res = CouchdbResource(server_uri, timeout=COUCHDB_TIMEOUT) server = Server(server_uri, resource_instance=res) app_label = app_name.split('.')[-1] self._databases[app_label] = server.get_or_create_db(dbname)
def main(views_directory, server_uri): directory = os.path.abspath(views_directory) server = Server(server_uri) db = server.get_or_create_db("v1") loader = FileSystemDocsLoader(directory) loader.sync(db, debug=True, verbose=True)
def main(): #arguments parser parser = argparse.ArgumentParser(prog='pushCwe', epilog="Example: ./%(prog)s.py") parser.add_argument('-c', '--couchdburi', action='store', type=str, dest='couchdb', default="http://127.0.0.1:5984", help='Couchdb URL (default http://127.0.0.1:5984)') #arguments put in variables args = parser.parse_args() #default value from ENV COUCHDB couchdb = os.environ.get('COUCHDB') #Else from argument if not couchdb: couchdb = args.couchdb __serv = Server(uri=couchdb) workspace = __serv.get_or_create_db("cwe") with open('data/cwe.csv', 'r') as csvfile: cwereader = csv.reader(csvfile, delimiter=',') header = cwereader.next() for cwe in cwereader: cwe_doc = dict(zip(header, cwe)) workspace.save_doc(cwe_doc)
def uploadFile(fname, uri, dbname): print 'Upload contents of %s to %s/%s' % (fname, uri, dbname) # Connect to the database theServer = Server(uri) db = theServer.get_or_create_db(dbname) # Loop on file for upload reader = DictReader(open(fname, 'rU'), dialect = 'excel') # For bulk upload docs = list() checkpoint = 100 for doc in reader: # Convert strings that are really numbers into ints and floats newdoc = parseDoc(doc) # Check if doc already exists in the DB # If it already exists, update it #if db.doc_exist(newdoc.get('_id')): # newdoc['_rev'] = db.get_rev(newdoc.get('_id')) docs.append(newdoc) if len(docs) % checkpoint == 0: docs = upload(db, docs) # Upload the lasr batch docs = upload(db, docs)
class Coucher: def __init__(self, uri='http://127.0.0.1:5984'): self.server = Server(uri=uri) self.db = self.server.get_or_create_db("south") def handle(self, fid, fdata): self.db[fid] = fdata
def _list_couch_docs(self, db_name="dmscouch_test"): """Downloads all the documents that are currently in CouchDB now""" docs = {} server = Server() db = server.get_or_create_db(db_name) r = db.view("dmscouch/all", include_docs=True) for row in r: docs[row["doc"]["_id"]] = row["doc"] return docs
class SPARQLBinBackend(object): # init with URL of CouchDB server and database name and credentials def __init__(self, serverURL, dbname, username, pwd): self.serverURL = serverURL self.dbname = dbname self.username = username self.pwd = pwd self.server = Server(self.serverURL, filters=[BasicAuth(self.username, self.pwd)]) # adds a document to the database def add(self, entry): try: db = self.server.get_or_create_db(self.dbname) PasteEntry.set_db(db) doc = PasteEntry( endpoint=entry["endpoint"], querystr=entry["querystr"], timestamp=datetime.datetime.utcnow() ) doc.save() logging.debug("Adding entry with ID %s" % doc["_id"]) return doc["_id"] except Exception as err: logging.error("Error while adding entry: %s" % err) return None # finds a document via its ID in the database def find(self, eid): try: db = self.server.get_or_create_db(self.dbname) if db.doc_exist(eid): ret = db.get(eid) # don't expose details, clean up ret.pop("doc_type") ret.pop("_rev") ret.pop("_id") return (True, ret) else: return (False, None) except Exception as err: logging.error("Error while looking up entry: %s" % err) return (False, None)
def init_db(uri, dbname, main_db=True): """Returns a db object and syncs the design documents on demand. If main_db is set to true then all models will use that one by default. """ server = Server(uri) db = server.get_or_create_db(dbname) if main_db: Document.set_db(db) return db
class SPARQLBinBackend(object): # init with URL of CouchDB server and database name and credentials def __init__(self, serverURL, dbname, username, pwd): self.serverURL = serverURL self.dbname = dbname self.username = username self.pwd = pwd self.server = Server(self.serverURL, filters=[BasicAuth(self.username, self.pwd)]) # adds a document to the database def add(self, entry): try: db = self.server.get_or_create_db(self.dbname) PasteEntry.set_db(db) doc = PasteEntry(endpoint=entry['endpoint'], querystr=entry['querystr'], timestamp=datetime.datetime.utcnow()) doc.save() logging.debug('Adding entry with ID %s' % doc['_id']) return doc['_id'] except Exception as err: logging.error('Error while adding entry: %s' % err) return None # finds a document via its ID in the database def find(self, eid): try: db = self.server.get_or_create_db(self.dbname) if db.doc_exist(eid): ret = db.get(eid) # don't expose details, clean up ret.pop('doc_type') ret.pop('_rev') ret.pop('_id') return (True, ret) else: return (False, None) except Exception as err: logging.error('Error while looking up entry: %s' % err) return (False, None)
def getDatabase(servname, dbn): try: serv = Server(servname) db = serv.get_or_create_db(dbn) except: try: #if we cannot connect to the given database, we try the local database before giving up print 'unable to connect to', servname print 'trying local database' serv = Server('http://localhost:5984') db = serv.get_or_create_db(dbn) except Exception as e: print e raise e print 'connected to', db.uri return db, serv
def setup(dbstring, path): server_uri, db_name, docid = parse_uri(dbstring) if "@" in server_uri: username, password, server_uri = parse_auth(server_uri) server = Server(server_uri) server.res.add_authorization(httpc.BasicAuth((uri.username, uri.password))) else: server = Server(server_uri) db = server.get_or_create_db("couchdbproxy") loader = FileSystemDocsLoader(path) loader.sync(db, verbose=True)
class RevCountResolutionTestCase(ConflictResolverTestCase): """ This tests resolution that is based on rev_count, given the time difference between two conflicting versions is less than configuration.SESSION_LAG_TIME, we can't rely on timestamps due to server time skew. So we decide the winning document based on the number of modifications, assuming the one with the largest number of modifs should be the current one. """ def _runTest(self): self.s.replicate(self.database_name, 'http://localhost:5984/'+self.repdb_name) self.s = Server('http://localhost:5984') self.repdb = self.s.get_or_create_db(self.repdb_name) self.replicated_doc = self.repdb.get(self.docid) # increasing the revision log (add 6 more revisions) for i in range(6): self.replicated_doc['text'] = 'bigger revision number' self.replicated_doc['timestamp'] = str(int(round(time.time()))) self.replicated_doc['rev_count'] = str(int(self.replicated_doc['rev_count']) + 1) self.repdb.save_doc(self.replicated_doc) # create the conflict, change the same # text field of the original at the source database. master_db = self.s.get_or_create_db(self.database_name) doc = master_db.get(self.docid) doc['text'] = 'smaller revision number' doc['timestamp'] = str(int(round(time.time()))) doc['rev_count'] = str(int(doc['rev_count']) + 1) master_db.save_doc(doc) self.s.replicate('http://localhost:5984/'+self.repdb_name, self.database_name) doc = self.db.get(self.docid) self.assertEqual(doc['text'], 'bigger revision number') start_time = time.time() while (self.db.get(self.docid, conflicts=True).has_key('_conflicts')): pass end_time = time.time() print "Time to conflicts clear: %s" % (end_time - start_time) def runTest(self): for i in range(10): self._runTest()
def _list_couch_docs(self, db_name='dmscouch_test'): """Downloads all the documents that are currently in CouchDB now""" docs = {} server = Server() db = server.get_or_create_db(db_name) r = db.view( 'dmscouch/all', include_docs=True, ) for row in r: docs[row['doc']['_id']] = row['doc'] return docs
class CloudantPool(object): def __init__(self, nworkers, maxdepth, tsleep, timeout, uri, dbname): self.tstart = time.time() self.nworkers = nworkers self.maxdepth=maxdepth self.tsleep= tsleep self.timeout = timeout self.uri=uri self.dbname=dbname self.counter = 0 #make sure DB exists self.server = Server(self.uri) self.db = self.server.get_or_create_db(self.dbname) print self.db.info() #create worker pool self.inputQ = Queue.Queue() self.workers = list() print '\nUse:\t%i workers' % self.nworkers for i in range(0,self.nworkers): #each worker gets its own connection thd = thread.start_new_thread(worker, (i,self.timeout, self.uri, self.dbname, self.inputQ) ) self.workers.append(thd) def pushDoc(self, doc): #put it on the queue self.inputQ.put(doc) self.counter += 1 if self.counter%100==0: print 'pushing doc:%i with depth %i' % (self.counter, self.inputQ.qsize()) #keep the work queue from getting too backed up while (self.inputQ.qsize()>self.maxdepth): print 'qdepth = at %i sleep(%f)' % (self.inputQ.qsize(), self.tsleep) sys.stdout.flush() time.sleep(self.tsleep) def flush(self): print '\nFinal Queue Flush' print 'size:\t', self.inputQ.qsize() sys.stdout.flush() self.inputQ.join() print 'done flushing' self.tstop = time.time() self.rate = float(self.counter)/float(self.tstop-self.tstart) print 'Saved %i documents in %i seconds for %f docs/sec' % (self.counter, self.tstop-self.tstart, self.rate) def cleanup(self): for thd in enumerate(): print thd
def push_cwe(couchdb_url, filename): __serv = Server(uri=couchdb_url) workspace = __serv.get_or_create_db("cwe") with open(filename, 'r') as csvfile: cwereader = csv.reader(csvfile, delimiter=',') header = cwereader.next() print "[*] Beginning upload" for cwe in cwereader: cwe_doc = dict(zip(header, cwe)) workspace.save_doc(cwe_doc) print "[*] Upload finished"
def upload_file(fname, uri, dbname): print 'Upload contents of %s to %s/%s' % (fname, uri, dbname) theServer = Server(uri, filters=[BasicAuth(cloudant_username, cloudant_password)]) db = theServer.get_or_create_db(dbname) reader = DictReader(open(fname, 'rU'), dialect = 'excel') docs = list() checkpoint = 100 for doc in reader: newdoc = parse_doc(doc) docs.append(newdoc) if len(docs)%checkpoint==0: docs = upload(db,docs) docs = upload(db,docs)
def process_couchdb_changes(): server = Server(settings.COUCHDB_SERVER) db = server.get_or_create_db("openelm") consumer = Consumer(db) sequence = SyncSequenceCache.objects.get(pk=1) changes = consumer.fetch(filter="record/new_records", since=sequence.last_sequence_id) if changes: for change in changes["results"]: record_id = change["id"] copy_photo_for_record.delay(record_id) send_new_record_email.delay(record_id) sequence.last_sequence_id = changes["last_seq"] sequence.save()
def process_couchdb_changes(): server = Server(settings.COUCHDB_SERVER) db = server.get_or_create_db('openelm') consumer = Consumer(db) sequence = SyncSequenceCache.objects.get(pk=1) changes = consumer.fetch(filter='record/new_records', since=sequence.last_sequence_id) if changes: for change in changes['results']: record_id = change['id'] copy_photo_for_record.delay(record_id) send_new_record_email.delay(record_id) sequence.last_sequence_id = changes['last_seq'] sequence.save()
def before_request(): """Make sure we are connected to the database for each request.""" username = app.config.get('COUCHDB_SERVER_USERNAME') password = app.config.get('COUCHDB_SERVER_PASSWORD') if username and password: server = Server(app.config['COUCHDB_SERVER'], filters=[BasicAuth(username, password)]) else: server = Server(app.config['COUCHDB_SERVER']) # create databases g.mdb = server.get_or_create_db(app.config['COUCHDB_MESSAGES_DB']) Message.set_db(g.mdb)
def upload_file(fname, uri, dbname): print 'Upload contents of %s to %s/%s' % (fname, uri, dbname) theServer = Server( uri, filters=[BasicAuth(cloudant_username, cloudant_password)]) db = theServer.get_or_create_db(dbname) reader = DictReader(open(fname, 'rU'), dialect='excel') docs = list() checkpoint = 100 for doc in reader: newdoc = parse_doc(doc) docs.append(newdoc) if len(docs) % checkpoint == 0: docs = upload(db, docs) docs = upload(db, docs)
class Importer(object): def __init__(self): self.server = None self.db = None def prep_connection(self, uri, db_name): # create server object self.server = Server(uri=uri) # create database self.db = self.server.get_or_create_db(db_name) # associate local objects to the db Book.set_db(self.db)
class ConflictResolverTestCase(unittest.TestCase): """ Test conflict resolution policy when we're in the state of lag, that is, there's more than 10 minutes time difference between conflicting version of a document: The document with longest rev count wins. If not in a lag, the latest document should win. """ database_name = 'session_store' repdb_name = 'session_store_rep' s = None db = None repdb = None docid = 'testing_doc' doc = None replicated_doc = None source_rev_count = 0 def setUp(self): """ Creating connection to the database according to configuration, and creating docs that act in test. """ self.s = Server('http://localhost:5984') assert len(self.s.info())!=0, 'CouchDB server is down or not working properly.' self.db = self.s.get_or_create_db(self.database_name) self.repdb = self.s.get_or_create_db(self.repdb_name) self.doc = {'_id': self.docid, 'timestamp': str(int(round(time.time()))), 'text' : 'initial text', 'rev_count': str(self.source_rev_count)} # clear previous test residual if self.docid in self.db: self.db.delete_doc(self.docid) if self.docid in self.repdb: self.repdb.delete_doc(self.docid) self.db.save_doc(self.doc) def tearDown(self): pass
def __getattr__( self, name ): if not self._settings: self._load() if name == 'db' or name == 'connection': if 'db' not in self._settings: p = urlparse.urlparse(self._settings['db_url']) database_uri= "{0.scheme}://{0.netloc}".format( p ) database_name= p.path connection = Server(database_uri) db = connection.get_or_create_db(database_name) self._settings['connection']= connection self._settings['db']= db return self._settings[name]
def index(request): server = Server(settings.COUCHDB_SERVER) databases = [server.get_or_create_db(db).info() for db in server.all_dbs()] create_database_form = CreateDatabaseForm(request.POST or None) if create_database_form.is_valid(): database_name = create_database_form.cleaned_data["name"] return HttpResponseRedirect(reverse("cushion_database", args=(database_name,))) return render_to_response("cushion/index.html", {"title": "CouchDB", "server": server, "databases": databases, "form": create_database_form}, context_instance=RequestContext(request))
def migrate(dbstring, path): server_uri, db_name, docid = parse_uri(dbstring) if "@" in server_uri: username, password, server_uri = parse_auth(server_uri) server = Server(server_uri) server.res.add_authorization(httpc.BasicAuth((uri.username, uri.password))) else: server = Server(server_uri) db = server.get_or_create_db("couchdbproxy") # make sure design docs are here loader = FileSystemDocsLoader(path) loader.sync(db, verbose=True) nodes = db.view("api01/nodes", include_docs=True) nodes_docs = [] for node in nodes: doc = node["doc"] doc["type"] = "machine" doc["name"] = doc["nodename"] del doc["nodename"] nodes_docs.append(doc) db.bulk_save(nodes_docs, use_uuids=False) aliases = db.view("api01/aliases", include_docs=True) aliases_docs = [] for alias in aliases: doc = alias["doc"] user = db.view("api01/users", include_docs=True).one() if user: doc["nodename"] = user['doc']["username"] del doc["port"] aliases_docs.append(doc) db.bulk_save(aliases_docs, use_uuids=False) users = db.view("api01/users", include_docs=True) users_docs = [] for user in users: doc = user["doc"] doc["type"] = "node" doc["machine"] = doc["nodename"] doc["nodename"] = doc["username"] users_docs.append(doc) db.bulk_save(users_docs, use_uuids=False)
class Couch(): def __init__(self): self.server = Server() self.server.delete_db('test') self.db = self.server.get_or_create_db('test') def populate(self): things = [ {"name": "Vishnu"}, {"name": "Lakshmi"}, {"name": "Ganesha"}, {"name": "Krishna"}, {"name": "Murugan"} ] self.db.save_docs(things) def count(self): return self.db.all_docs().count()
class Couch(object): """Convenience wrapper around the ``couchdbkit`` ``Server`` and ``FileSystemDocsLoader`` internals. Provides the ``couchdbkit.Database`` called "thruflo" as ``self.db``. """ def sync(self): path = join_path(dirname(__file__), '_design') self.loader = FileSystemDocsLoader(path) self.loader.sync(self.db) def __init__(self, dbname='thruflo', settings={}): self.server = Server() self.db = self.server.get_or_create_db(dbname) self.app_settings = settings
class DBProcess(object): ''' This is a generic data Processing class used to handle data processing based upon documents found in an instance of a CouchDB. In order for this to work, the function reference that you pass into this objects constructor must be of the form def yourFunction(*args, **kwargs) ''' def __init__(self, servername, dbname, yourFunction): self.sv = Server(servername) self.db = self.sv.get_or_create_db(dbname) self.func = yourFunction def doprocess(self, *args, **kwargs): ''' call yourFunction with the args/kwargs and return the results ''' return self.func(*args, **kwargs) def upload(self, doc): ''' Save the document to the database. ''' if self.db.doc_exist(doc['_id']): doc['_rev'] = self.db.get_rev(doc['_id']) res = self.db.save_doc(doc) if not res['ok']: raise KDataDatabaseError('DBprocess could not save doc') return doc['_id'] def view(self, view_name, schema=None, wrapper=None, **params): ''' just calls couchdbkit.Database.view method. all arguments are the same ''' return self.db.view(view_name, schema, wrapper, **params) def get(self, doc_id, **params): ''' just calls couchdbkit.Database.get method. all arguments are the same ''' return self.db.get(doc_id, **params)
class DBProcess(object): """ This is a generic data Processing class used to handle data processing based upon documents found in an instance of a CouchDB. In order for this to work, the function reference that you pass into this objects constructor must be of the form def yourFunction(*args, **kwargs) """ def __init__(self, servername, dbname, yourFunction): self.sv = Server(servername) self.db = self.sv.get_or_create_db(dbname) self.func = yourFunction def doprocess(self, *args, **kwargs): """ call yourFunction with the args/kwargs and return the results """ return self.func(*args, **kwargs) def upload(self, doc): """ Save the document to the database. """ if self.db.doc_exist(doc["_id"]): doc["_rev"] = self.db.get_rev(doc["_id"]) self.db.save_doc(doc) return doc["_id"] def view(self, view_name, schema=None, wrapper=None, **params): """ just calls couchdbkit.Database.view method. all arguments are the same """ return self.db.view(view_name, schema, wrapper, **params) def get(self, doc_id, **params): """ just calls couchdbkit.Database.get method. all arguments are the same """ return self.db.get(doc_id, **params)
class DBProcess(object): ''' This is a generic data Processing class used to handle data processing based upon documents found in an instance of a CouchDB. In order for this to work, the function reference that you pass into this objects constructor must be of the form def yourFunction(*args, **kwargs) ''' def __init__(self, servername, dbname, yourFunction): self.sv = Server(servername) self.db = self.sv.get_or_create_db(dbname) self.func = yourFunction def doprocess(self, *args, **kwargs): ''' call yourFunction with the args/kwargs and return the results ''' return self.func(*args, **kwargs) def upload(self, doc): ''' Save the document to the database. ''' if self.db.doc_exist(doc['_id']): doc['_rev'] = self.db.get_rev(doc['_id']) self.db.save_doc(doc) return doc['_id'] def view(self, view_name, schema=None, wrapper=None, **params): ''' just calls couchdbkit.Database.view method. all arguments are the same ''' return self.db.view(view_name, schema, wrapper, **params) def get(self, doc_id, **params): ''' just calls couchdbkit.Database.get method. all arguments are the same ''' return self.db.get(doc_id, **params)
class BaseConflictResolver(object): """ Base class for all the conflict resolver classes that can be treated like plugin. """ db_uri = None dbname = None dbserver = None conflicts = None db = None conflict = None logger = None def __init__(self, database_uri=None, dbname=None): ''' Create a Server object, initiate connection and set up the databse URI (Not particulary in that order :-) Oh and also sync up the views from the on disk storage onto CouchDB ''' self.logger = configuration.get_logger(logging_instance=conflictlogging, system_name='conflictmanager') configuration.info(self.logger) self.logger.info('Conflict manager starting.') if not database_uri: database_uri = "http://127.0.0.1:5984" if not dbname: dbname = 'session_store' self.dbname = dbname self.db_uri = database_uri self.dbserver = Server(self.db_uri) loader = FileSystemDocsLoader('/root/relaxession/_design/') try: self.db = self.dbserver.get_or_create_db(self.dbname) loader.sync(self.db, verbose=True) except Exception as e: self.logger.info('Init error: %s' % e) sys.exit(1) self.db.res.put('/_revs_limit',str(configuration.REVS_LIMIT)) def resolve(self): raise NotImplemented def resolveForever(self): raise NotImplemented
class Couch(): def __init__(self): self.server = Server() self.server.delete_db('test') self.db = self.server.get_or_create_db('test') def populate(self): things = [{ "name": "Vishnu" }, { "name": "Lakshmi" }, { "name": "Ganesha" }, { "name": "Krishna" }, { "name": "Murugan" }] self.db.save_docs(things) def count(self): return self.db.all_docs().count()
def uploadFile(fname, uri, dbname): print datetime.datetime.strftime( datetime.datetime.today(), "%Y/%m/%d %H:%M:%S" ) + ": Upload contents of %s to %s/%s started" % (fname, uri, dbname) # #connect to the db theServer = Server(uri) db = theServer.get_or_create_db(dbname) # loop on file for upload reader = DictReader(open(fname, "rU"), dialect="excel") # see the python csv module # for other options, such as using the tab delimeter. The first line in your csv # file should contain all of the "key" and all subsequent lines hold the values # for those keys. # used for bulk uploading docs = list() checkpoint = 100 for doc in reader: newdoc = parseDoc(doc) # this just converts strings that are really numbers into ints and floats # Here I check to see if the doc is already on the database. If it is, then I assign # the _rev key so that it updates the doc on the db. # following two lines were commented out to ensure docs were created # if db.doc_exist(newdoc.get('_id')): # newdoc['_rev'] = db.get_rev(newdoc.get('_id')) docs.append(newdoc) if len(docs) % checkpoint == 0: docs = upload(db, docs) # don't forget the last batch docs = upload(db, docs) print datetime.datetime.strftime( datetime.datetime.today(), "%Y/%m/%d %H:%M:%S" ) + ": Upload contents of %s to %s/%s finished" % (fname, uri, dbname)
class TierProcess(object): ''' This is a generic Tier Processing class used to handle data processing based upon documents found in an instance of a CouchDB. In order for this to work, the function reference that you pass into this objects constructor must be of the form def myfunction(*args, **kwargs) ''' def __init__(self, servername, dbname, tiername, dofunction): self.sv = Server(servername) self.db = self.sv.get_or_create_db(dbname) self.name = tiername self.func = dofunction def doprocess(self, *args, **kwargs): return self.func(*args, **kwargs) def record(self, doc, tierdict)
def __init__(self, node, db): server = Server(node) self._db = server.get_or_create_db(db)
parser.add_option("-d", "--database", dest="database", help="db name in couchdb", default="tmp") parser.add_option("-i", "--input", dest="input", help="json file to post to couchdb", default="test.json") options, args = parser.parse_args() try: server = Server(options.node) db = server.get_or_create_db(options.database) input_json = json.loads(open(options.input).read()) log.info("loaded file : {0} into a json document".format( options.input)) attachments = {} build_info = flatten(["buildinfo"], input_json) info = flatten(["info"], input_json) info = {"info.test_time": info["info.test_time"]} name = {"name": input_json["name"]} z = dict(build_info.items() + info.items() + name.items()) if "buildinfo.thisNode" in z: del z["buildinfo.thisNode"] if "buildinfo.couchApiBase" in z:
from pylonsapp.tests import * from couchdbkit import Server try: server = Server() if server: pass except: server = None else: try: server.delete_db('formalchemy_test') except: pass db = server.get_or_create_db('formalchemy_test') def couchdb_runing(func): if server: return func else: def f(self): pass return f class TestCouchdbController(TestController): @couchdb_runing def test_index(self): response = self.app.get('/couchdb')
del docs return list() if __name__ == '__main__': fname = sys.argv[1] uri = sys.argv[2] dbname = sys.argv[3] print 'Upload contents of %s to %s/%s' % (fname, uri, dbname) if (fname.endswith('.gz')): fname = gunzip(fname) # #connect to the db cloudant = Server(uri) db = cloudant.get_or_create_db(dbname) print db.info() #sync the views for prebuilt indices loader = FileSystemDocsLoader('_design/') loader.sync(db, verbose=True) #loop on file for upload reader = DictReader(open(fname), delimiter='|') #used for bulk uploading docs = list() checkpoint = 1000 n = 0 start = time.time()
while True: try: consumer.wait(process, **self._params) except NoMoreData: pass def reserve(db, change): try: body = db.res.put('_design/message/_update/reserve/%s' % change['id']).json_body except ResourceConflict: body = 'conflict while updating' if body == 'ok': print 'reserved:', change['id'] else: print 'error:', body, change['id'] if __name__ == "__main__": import sys server = Server(cfg.server) db = server.get_or_create_db(cfg.db) rcw = ReconnectingChangesWaiter(db) rcw.wait(partial(reserve, db), filter='message/state', state='available')
class CouchdbManager(PersistenceManager): """ This is a couchdb manager for the workspace, it will load from the couchdb databases""" def __init__(self, uri): self._last_seq_ack = 0 model.api.log("Initializing CouchDBManager for url [%s]" % uri) self._lostConnection = False self.__uri = uri self.__dbs = {} self.__seq_nums = {} self.__serv = NoConectionServer() self.mutex = threading.Lock() self._available = False try: self.testCouchUrl(uri) url = urlparse(uri) print("Setting user,pass %s %s" % (url.username, url.password)) self.__serv = Server(uri=uri) #print dir(self.__serv) self.__serv.resource_class.credentials = (url.username, url.password) self._available = True except: model.api.log("No route to couchdb server on: %s" % uri) print(traceback.format_exc()) def isAvailable(self): return self._available def lostConnectionResolv(self): self._lostConnection = True self.__dbs.clear() self.__serv = NoConectionServer() def reconnect(self): ret_val = False ur = self.__uri if CouchdbManager.testCouch(ur): self.__serv = Server(uri=ur) self.__dbs.clear() self._lostConnection = False ret_val = True return ret_val @staticmethod def testCouch(uri): host, port = None, None try: import socket url = urlparse(uri) proto = url.scheme host = url.hostname port = url.port port = port if port else socket.getservbyname(proto) s = socket.socket() s.settimeout(1) s.connect((host, int(port))) except: return False model.api.log("Connecting Couch to: %s:%s" % (host, port)) return True def testCouchUrl(self, uri): url = urlparse(uri) proto = url.scheme host = url.hostname port = url.port self.test(host, int(port)) def test(self, address, port): import socket s = socket.socket() s.settimeout(1) s.connect((address, port)) @trap_timeout def getWorkspacesNames(self): return filter(lambda x: not x.startswith("_"), self.__serv.all_dbs()) def workspaceExists(self, name): return name in self.getWorkspacesNames() @trap_timeout def addWorkspace(self, aWorkspace): self.__serv.create_db(aWorkspace.lower()) return self.__getDb(aWorkspace) @trap_timeout def addDocument(self, aWorkspaceName, documentId, aDocument): self.incrementSeqNumber(aWorkspaceName) self.__getDb(aWorkspaceName)[documentId] = aDocument @trap_timeout def saveDocument(self, aWorkspaceName, aDocument): self.incrementSeqNumber(aWorkspaceName) model.api.log("Saving document in remote workspace %s" % aWorkspaceName) self.__getDb(aWorkspaceName).save_doc(aDocument, use_uuids=True, force_update=True) @trap_timeout def __getDb(self, aWorkspaceName): aWorkspaceName = aWorkspaceName.lower() model.api.log("Getting workspace [%s]" % aWorkspaceName) workspacedb = self.__dbs.get(aWorkspaceName, self.__serv.get_db(aWorkspaceName)) if not self.__dbs.has_key(aWorkspaceName): model.api.log("Asking couchdb for workspace [%s]" % aWorkspaceName) self.__dbs[aWorkspaceName] = workspacedb self.__seq_nums[aWorkspaceName] = workspacedb.info()['update_seq'] return workspacedb @trap_timeout def getDocument(self, aWorkspaceName, documentId): model.api.log("Getting document for workspace [%s]" % aWorkspaceName) return self.__getDb(aWorkspaceName).get(documentId) @trap_timeout def checkDocument(self, aWorkspaceName, documentName): return self.__getDb(aWorkspaceName).doc_exist(documentName) @trap_timeout def replicate(self, workspace, *targets_dbs, **kwargs): model.api.log("Targets to replicate %s" % str(targets_dbs)) for target_db in targets_dbs: src_db_path = "/".join([self.__uri, workspace]) dst_db_path = "/".join([target_db, workspace]) try: model.api.devlog( "workspace: %s, src_db_path: %s, dst_db_path: %s, **kwargs: %s" % (workspace, src_db_path, dst_db_path, kwargs)) self.__peerReplication(workspace, src_db_path, dst_db_path, **kwargs) except ResourceNotFound as e: raise e except Exception as e: model.api.devlog(e) raise def __peerReplication(self, workspace, src, dst, **kwargs): mutual = kwargs.get("mutual", True) continuous = kwargs.get("continuous", True) ct = kwargs.get("create_target", True) self.__serv.replicate(workspace, dst, mutual=mutual, continuous=continuous, create_target=ct) if mutual: self.__serv.replicate(dst, src, continuous=continuous, **kwargs) def getLastChangeSeq(self, workspaceName): self.mutex.acquire() seq = self.__seq_nums[workspaceName] self.mutex.release() return seq def setLastChangeSeq(self, workspaceName, seq_num): self.mutex.acquire() self.__seq_nums[workspaceName] = seq_num self.mutex.release() @trap_timeout def waitForDBChange(self, db_name, since=0, timeout=15000): """ Be warned this will return after the database has a change, if there was one before call it will return immediatly with the changes done""" changes = [] last_seq = max(self.getLastChangeSeq(db_name), since) db = self.__getDb(db_name) with ChangesStream(db, feed="longpoll", since=last_seq, timeout=timeout) as stream: for change in stream: if change['seq'] > self.getLastChangeSeq(db_name): changes.append(change) last_seq = reduce(lambda x, y: max(y['seq'], x), changes, self.getLastChangeSeq(db_name)) self.setLastChangeSeq(db_name, last_seq) return changes @trap_timeout def delete_all_dbs(self): for db in self.__serv.all_dbs(): self.__serv.delete_db(db) @trap_timeout def existWorkspace(self, name): return name in self.__serv.all_dbs() @trap_timeout def workspaceDocumentsIterator(self, workspaceName): return filter(lambda x: not x["id"].startswith("_"), self.__getDb(workspaceName).documents(include_docs=True)) @trap_timeout def removeWorkspace(self, workspace_name): return self.__serv.delete_db(workspace_name) @trap_timeout def remove(self, workspace, host_id): self.incrementSeqNumber(workspace) self.__dbs[workspace].delete_doc(host_id) @trap_timeout def compactDatabase(self, aWorkspaceName): self.__getDb(aWorkspaceName).compact() def pushReports(self): vmanager = ViewsManager() reports = os.path.join(os.getcwd(), "views", "reports") workspace = self.__serv.get_or_create_db("reports") vmanager.addView(reports, workspace) return self.__uri + "/reports/_design/reports/index.html" def addViews(self, workspaceName): vmanager = ViewsManager() workspace = self.__getDb(workspaceName) for v in vmanager.getAvailableViews(): vmanager.addView(v, workspace) def getViews(self, workspaceName): vmanager = ViewsManager() workspace = self.__getDb(workspaceName) return vmanager.getViews(workspace) def syncWorkspaceViews(self, workspaceName): vmanager = ViewsManager() workspace = self.__getDb(workspaceName) installed_views = vmanager.getViews(workspace) for v in vmanager.getAvailableViews(): if v not in installed_views: vmanager.addView(v, workspace) def incrementSeqNumber(self, workspaceName): self.mutex.acquire() self.__seq_nums[workspaceName] += 1 self.mutex.release()
class CouchDbManager(AbstractPersistenceManager): """ This is a couchdb manager for the workspace, it will load from the couchdb databases """ def __init__(self, uri, couch_exception_callback): super(CouchDbManager, self).__init__() getLogger(self).debug( "Initializing CouchDBManager for url [%s]" % uri) self._lostConnection = False self.__uri = uri self._available = False self.couch_exception_callback = couch_exception_callback test_couch_thread = threading.Thread(target=self.continuosly_check_connection) test_couch_thread.daemon = True test_couch_thread.start() try: if uri is not None: self.testCouchUrl(uri) url = urlparse(uri) getLogger(self).debug( "Setting user,pass %s %s" % (url.username, url.password)) self.__serv = Server(uri=uri) self.__serv.resource_class.credentials = (url.username, url.password) self._available = True self.pushReports() self._loadDbs() except: getLogger(self).warn("No route to couchdb server on: %s" % uri) getLogger(self).debug(traceback.format_exc()) def continuosly_check_connection(self): """Intended to use on a separate thread. Call module-level function testCouch every second to see if response to the server_uri of the DB is still 200. Call the exception_callback if we can't access the server three times in a row. """ tolerance = 0 server_uri = self.__uri while True: time.sleep(1) test_was_successful = test_couch(server_uri) if test_was_successful: tolerance = 0 else: tolerance += 1 if tolerance == 3: self.couch_exception_callback() return False # kill the thread if something went wrong def _create(self, name): db = self.__serv.create_db(name.lower()) return CouchDbConnector(db) def _delete(self, name): self.__serv.delete_db(name) def _loadDbs(self): def conditions(database): begins_with_underscore = database.startswith("_") is_blacklisted = database in CONST_BLACKDBS return not begins_with_underscore and not is_blacklisted try: for dbname in filter(conditions, self.__serv.all_dbs()): if dbname not in self.dbs.keys(): getLogger(self).debug( "Asking for dbname[%s], registering for lazy initialization" % dbname) self.dbs[dbname] = lambda x: self._loadDb(x) except restkit.errors.RequestError as req_error: getLogger(self).error("Couldn't load databases. " "The connection to the CouchDB was probably lost. ") def _loadDb(self, dbname): db = self.__serv.get_db(dbname) seq = db.info()['update_seq'] self.dbs[dbname] = CouchDbConnector(db, seq_num=seq) return self.dbs[dbname] def refreshDbs(self): """Refresh databases using inherited method. On exception, asume no databases are available. """ try: return AbstractPersistenceManager.refreshDbs() except: return [] def pushReports(self): vmanager = ViewsManager() reports = os.path.join(os.getcwd(), "views", "reports") try: workspace = self.__serv.get_or_create_db("reports") vmanager.addView(reports, workspace) except: getLogger(self).warn( "Reports database couldn't be uploaded. You need to be an admin to do it") return self.__uri + "/reports/_design/reports/index.html" @staticmethod def testCouch(uri): """Redirect to the module-level function of the name, which serves the same purpose and is used by other classes too.""" return test_couch(uri) def testCouchUrl(self, uri): if uri is not None: url = urlparse(uri) host = url.hostname port = url.port self.test(host, int(port)) def test(self, address, port): import socket s = socket.socket() s.settimeout(1) s.connect((address, port)) def replicate(self, workspace, *targets_dbs, **kwargs): getLogger(self).debug("Targets to replicate %s" % str(targets_dbs)) for target_db in targets_dbs: src_db_path = "/".join([self.__uri, workspace]) dst_db_path = "/".join([target_db, workspace]) try: getLogger(self).info("workspace: %s, src_db_path: %s, dst_db_path: %s, **kwargs: %s" % (workspace, src_db_path, dst_db_path, kwargs)) self.__peerReplication(workspace, src_db_path, dst_db_path, **kwargs) except ResourceNotFound as e: raise e except Exception as e: getLogger(self).error(e) raise def __peerReplication(self, workspace, src, dst, **kwargs): mutual = kwargs.get("mutual", True) continuous = kwargs.get("continuous", True) ct = kwargs.get("create_target", True) self.__serv.replicate(workspace, dst, mutual = mutual, continuous = continuous, create_target = ct) if mutual: self.__serv.replicate(dst, src, continuous = continuous, **kwargs)
class CouchDbManager(AbstractPersistenceManager): """ This is a couchdb manager for the workspace, it will load from the couchdb databases """ def __init__(self, uri): super(CouchDbManager, self).__init__() getLogger(self).debug( "Initializing CouchDBManager for url [%s]" % uri) self._lostConnection = False self.__uri = uri self.__serv = NoConectionServer() self._available = False try: if uri is not None: self.testCouchUrl(uri) url = urlparse(uri) getLogger(self).debug( "Setting user,pass %s %s" % (url.username, url.password)) self.__serv = Server(uri=uri) self.__serv.resource_class.credentials = (url.username, url.password) self._available = True self.pushReports() self._loadDbs() except: getLogger(self).warn("No route to couchdb server on: %s" % uri) getLogger(self).debug(traceback.format_exc()) #@trap_timeout def _create(self, name): db = self.__serv.create_db(name.lower()) return CouchDbConnector(db) #@trap_timeout def _delete(self, name): self.__serv.delete_db(name) #@trap_timeout def _loadDbs(self): conditions = lambda x: not x.startswith("_") and x != 'reports' for dbname in filter(conditions, self.__serv.all_dbs()): if dbname not in self.dbs.keys(): getLogger(self).debug( "Asking for dbname[%s], registering for lazy initialization" % dbname) self.dbs[dbname] = lambda x: self._loadDb(x) def _loadDb(self, dbname): db = self.__serv.get_db(dbname) seq = db.info()['update_seq'] self.dbs[dbname] = CouchDbConnector(db, seq_num=seq) return self.dbs[dbname] #@trap_timeout def pushReports(self): vmanager = ViewsManager() reports = os.path.join(os.getcwd(), "views", "reports") workspace = self.__serv.get_or_create_db("reports") vmanager.addView(reports, workspace) return self.__uri + "/reports/_design/reports/index.html" def lostConnectionResolv(self): self._lostConnection = True self.__dbs.clear() self.__serv = NoConectionServer() def reconnect(self): ret_val = False ur = self.__uri if CouchDbManager.testCouch(ur): self.__serv = Server(uri = ur) self.__dbs.clear() self._lostConnection = False ret_val = True return ret_val @staticmethod def testCouch(uri): if uri is not None: host, port = None, None try: import socket url = urlparse(uri) proto = url.scheme host = url.hostname port = url.port port = port if port else socket.getservbyname(proto) s = socket.socket() s.settimeout(1) s.connect((host, int(port))) except: return False #getLogger(CouchdbManager).info("Connecting Couch to: %s:%s" % (host, port)) return True def testCouchUrl(self, uri): if uri is not None: url = urlparse(uri) proto = url.scheme host = url.hostname port = url.port self.test(host, int(port)) def test(self, address, port): import socket s = socket.socket() s.settimeout(1) s.connect((address, port)) #@trap_timeout def replicate(self, workspace, *targets_dbs, **kwargs): getLogger(self).debug("Targets to replicate %s" % str(targets_dbs)) for target_db in targets_dbs: src_db_path = "/".join([self.__uri, workspace]) dst_db_path = "/".join([target_db, workspace]) try: getLogger(self).info("workspace: %s, src_db_path: %s, dst_db_path: %s, **kwargs: %s" % (workspace, src_db_path, dst_db_path, kwargs)) self.__peerReplication(workspace, src_db_path, dst_db_path, **kwargs) except ResourceNotFound as e: raise e except Exception as e: getLogger(self).error(e) raise def __peerReplication(self, workspace, src, dst, **kwargs): mutual = kwargs.get("mutual", True) continuous = kwargs.get("continuous", True) ct = kwargs.get("create_target", True) self.__serv.replicate(workspace, dst, mutual = mutual, continuous = continuous, create_target = ct) if mutual: self.__serv.replicate(dst, src, continuous = continuous, **kwargs)
def translate(self, mess, args): params = {} #todo: regex not handling all assignment cases correctly if args.count("="): regex = re.compile("(?P<key>\w*?)=(?P<value>\w*?)\s|$") last_value = "" for (k, v) in regex.findall(args): if k and v: params[k.lower()] = v last_value = v #urls can be crazy and all those '=' signs can muck things up if len(args.split("word_source=")) > 1: params['word_source'], args = self.get_word_source(args) if len(args.split("="+last_value+" ")) > 1: params['query'] = args.split("="+last_value+" ")[1].strip() else: params['query'] = args.strip() try: self.bot_log.debug("connecting to server: %s" % COUCHDB_URI) server = Server(COUCHDB_URI) except: self.bot_log.error("could not connect to database %s" % COUCHDB_URI) return "hi" #return "could not connect to database." try: db = server.get_or_create_db(COUCHDB) except: self.bot_log.error("could not create or access database: %s" % COUCHDB) return "could not create or access dictionary database." UserRecord.set_db(db) user = str(mess.getFrom()) result = db.view("user_record/last_used", key=user) self.bot_log.info("getting record: user_record/last_used: %s" % user) if result.count() == 0: user_record = UserRecord( user_name = user, creation_time = datetime.datetime.utcnow() ) if result.count() > 1: self.bot_log.error("ERROR: Multiple User Records found.") return "ERROR: Multiple User Records found." else: record = result.one() user_record = UserRecord.get( record['id']) #dump parameters not in quesy into user_record missing_params = set(['source', 'target', 'word_source']).difference(set(params)) for each in missing_params: if record['value'].has_key('last_'+each): user_record['last_'+each] = record['value']['last_'+each] #dump parameters into user_record for each in params: user_record['last_'+each] = params[each] #Logic check (More to come?) if len(missing_params) == 3 and not params.has_key('query'): return "Error: No query or setting present:\n" + \ "Available setting are:\n" + \ "source\n" + "target\n" + "word_source\n" if user_record['last_source'] == user_record['last_target']: return "Error: source language and target language are the same." #don't save query & refresh if user_record.contains('last_query'): del user_record['last_query'] if user_record.contains('last_refresh'): del user_record['last_refresh'] # if not query updated if params: user_record.save() if not params.has_key('query'): self.bot_log.info("User Record Updated") return "User Record Updated" DictionaryRecord.set_db(db) dict_record = db.view("dictionary_record/translate", key=[params['query'], user_record['last_source'], user_record['last_target'] ]) #we should be building pipeline #this will change when we split into runners and add other ones but for the moment #any one key won't have more than two records #if refresh == both # write log entries and fire translate functions #if refresh == shallow # write log entry and fire translate shallow #if refresh == full # write log entry and fire translate full #else # for each record: display response_message = "" if dict_record.count(): for each in dict_record: if each['value'][0]: if params.has_key('refresh') and (params['refresh'] == 'shallow' or \ params['refresh'] == 'both'): self.bot_log.info("refreshing shallow record: %s" % params['query']) response_message += self.translate_shallow(mess, params, user_record, each['id']) else: self.bot_log.info("retrieved shallow record: %s" % params['query']) response_message += "simple: " + each['value'][1] + "\n" else: if params.has_key('refresh') and (params['refresh'] == 'full' or \ params['refresh'] == 'both'): self.bot_log.info("refreshing full record: %s" % params['query']) response_message += self.translate_full(mess, params, user_record, each['id']) else: full_lookup = []; self.bot_log.info("retrieved full record: %s" % params['query']) for part_of_speech in each['value'][1].keys(): full_lookup.append(part_of_speech + ":") for word in each['value'][1][part_of_speech]: full_lookup.append(word) full_lookup.append("") response_message += "\n".join(full_lookup) #just turning off for now #todo: giant hack! and will refresh twice if full record is present #if params.has_key('refresh') and (params['refresh'] == 'full' or params['refresh'] == 'both'): # self.bot_log.info("refreshing full record: %s" % params['query']) # response_message += self.translate_full(mess, params, user_record, each['id']) else: response_message += self.translate_shallow(mess, params, user_record) response_message += self.translate_full(mess, params, user_record) return response_message
from nose.tools import assert_in from lettuce import world from couchdbkit import Server from tornado.escape import json_decode from tornado.httpclient import HTTPClient, HTTPRequest from newebe.config import CONFIG from newebe.settings import COUCHDB_DB_NAME from newebe.apps.profile.models import UserManager, User ROOT_URL = u"http://localhost:8888/" SECOND_NEWEBE_ROOT_URL = u"http://localhost:8889/" server = Server() server2 = Server() db = server.get_or_create_db(CONFIG.db.name) db2 = server.get_or_create_db(CONFIG.db.name + "2") print db2 def reset_documents(cls, get_func, database=db): ''' Clear all documents corresponding to *cls*. ''' cls._db = database docs = get_func() while docs: for doc in docs: doc.delete() docs = get_func()
def main(): parser = OptionParser() parser.add_option("-n", "--node", dest="node", default="http://127.0.0.1:5984", help="couchdb ip , defaults to 127.0.0.1:5984") parser.add_option("-d", "--database", dest="database", help="db name in couchdb", default="tmp") parser.add_option("-i", "--input", dest="input", help="json file to post to couchdb", default="test.json") options, args = parser.parse_args() try: server = Server(options.node) db = server.get_or_create_db(options.database) log.info("loading file {0}".format(options.input)) input_json = json.loads(open(options.input).read()) post_to_cbm(input_json) attachments = {} build_info = flatten(["buildinfo"], input_json) info = flatten(["info"], input_json) info = {"info.test_time": info["info.test_time"]} name = {"name": input_json["name"]} z = dict(build_info.items() + info.items() + name.items()) if "buildinfo.thisNode" in z: del z["buildinfo.thisNode"] if "buildinfo.couchApiBase" in z: del z["buildinfo.couchApiBase"] if "time" in z: del z["time"] if "ns_server_data" in input_json: attachments["ns_server_data"] = [] ns_server_data = input_json["ns_server_data"] index = 1 log.info("flattening ns_server_data") for item in ns_server_data: if "op" in item: samples = item["op"]["samples"] _time = 0 _new_samples = [] max_length = 1 for sample in samples: if len(samples[sample]) > max_length: max_length = len(samples[sample]) for i in range(0, max_length): _new_sample = {} for sample in samples: if len(samples[sample]) > i: _new_sample[sample] = samples[sample][i] _new_sample.update(z) _new_sample.update({"row": index}) attachments["ns_server_data"].append(_new_sample) index += 1 del input_json["ns_server_data"] if "ns_server_data_system" in input_json: log.info("flattening ns_server_data_system") attachments["ns_server_data_system"] = [] for index, row in enumerate(input_json["ns_server_data_system"], start=1): temp_row = convert(row) values = temp_row.get('nodes', {}) for row in values: row_dict = {} if 'cpu_utilization_rate' in row['systemStats']: row_dict['cpu_util'] = row['systemStats'][ 'cpu_utilization_rate'] if 'swap_used' in row['systemStats']: row_dict['swap_used'] = row['systemStats']['swap_used'] if not 'vb_replica_curr_items' in row['interestingStats']: continue row_dict['vb_replica_curr_items'] = row[ 'interestingStats']['vb_replica_curr_items'] row_dict['curr_items_tot'] = row['interestingStats'][ 'curr_items_tot'] row_dict['curr_items'] = row['interestingStats'][ 'curr_items'] row_dict['node'] = row['hostname'] row_dict.update(z) row_dict.update({"row": index}) attachments["ns_server_data_system"].append(row_dict) del input_json["ns_server_data_system"] if "dispatcher" in input_json: log.info("flattening dispatcher") attachments["dispatcher"] = input_json["dispatcher"] for index, row in enumerate(attachments["dispatcher"], start=1): row.update(z) row.update({"row": index}) del input_json["dispatcher"] if "timings" in input_json: log.info("flattening timings") attachments["timings"] = input_json["timings"] for index, row in enumerate(attachments["timings"], start=1): row.update(z) row.update({"row": index}) del input_json["timings"] if "ops" in input_json: log.info("flattening ops") attachments["ops"] = input_json["ops"] for index, row in enumerate(attachments["ops"], start=1): row.update(z) row.update({"row": index}) del input_json["ops"] if "qps" in input_json: log.info("flattening qps") attachments["qps"] = input_json["qps"] attachments["qps"].update(z) del input_json["qps"] if "systemstats" in input_json: log.info("flattening systemstats") attachments["systemstats"] = input_json["systemstats"] for index, row in enumerate(attachments["systemstats"], start=1): row.update(z) row.update({"row": index}) del input_json["systemstats"] if "iostats" in input_json: log.info("flattening iostats") attachments["iostats"] = input_json["iostats"] for index, row in enumerate(attachments["iostats"], start=1): row.update(z) row.update({"row": index}) del input_json["iostats"] if "data-size" in input_json: log.info("flattening data-size") attachments["data-size"] = input_json["data-size"] for index, row in enumerate(attachments["data-size"], start=1): row.update(z) row.update({"row": index}) del input_json["data-size"] if "bucket-size" in input_json: log.info("flattening bucket-size") values = input_json["bucket-size"] bucket_sizes = [] for index, row in enumerate(values, start=1): row_dict = {} row_dict['size'] = row row_dict.update(z) row_dict.update({"row": index}) bucket_sizes.append(row_dict.copy()) del input_json["bucket-size"] attachments["bucket-size"] = bucket_sizes if "membasestats" in input_json: log.info("flattening membasestats") attachments["membasestats"] = input_json["membasestats"] for index, row in enumerate(attachments["membasestats"], start=1): row.update(z) row.update({"row": index}) del input_json["membasestats"] if "view_info" in input_json: log.info("flattening view info") attachments["view_info"] = input_json["view_info"] for index, row in enumerate(attachments["view_info"], start=1): row.update(z) row.update({"row": index}) del input_json["view_info"] if "indexer_info" in input_json: log.info("flattening indexer info") attachments["indexer_info"] = input_json["indexer_info"] for index, row in enumerate(attachments["indexer_info"], start=1): row.update(z) row.update({"row": index}) del input_json["indexer_info"] if "xdcr_lag" in input_json: log.info("flattening xdcr lag stats") attachments["xdcr_lag"] = input_json["xdcr_lag"] for index, row in enumerate(attachments["xdcr_lag"], start=1): row.update(z) row.update({"row": index}) del input_json["xdcr_lag"] if "rebalance_progress" in input_json: log.info("flattening rebalance_progress") attachments["rebalance_progress"] = input_json[ "rebalance_progress"] for index, row in enumerate(attachments["rebalance_progress"], start=1): row.update(z) row.update({"row": index}) del input_json["rebalance_progress"] for histogram in input_json.keys(): if histogram.startswith("latency") and histogram.endswith( "histogram"): log.info("flattening {0} snapshot".format(histogram)) if not isinstance(input_json[histogram], dict): log.warn( "cannot flatten {0} snapshot: not a dict or empty". format(histogram)) continue if not "client_id" in input_json[histogram]: log.warn( "cannot flatten {0} snapshot: no client_id".format( histogram)) continue client_id = input_json[histogram].get("client_id", "") del input_json[histogram]["client_id"] attachments[histogram] = [] index = 1 for key, value in input_json[histogram].iteritems(): lr = { "row": index, "time": key, "count": value, "client_id": client_id } lr.update(z) attachments[histogram].append(lr) index += 1 del input_json[histogram] for latency in input_json.keys(): if latency.startswith('latency'): log.info("flattening {0}".format(latency)) attachments[latency] = [] index = 1 for row in input_json[latency]: if isinstance(row[0], list): lr = { "percentile_90th": row[0][1], "percentile_95th": 0, "percentile_99th": row[1][1], "client_id": "UNKNOWN", "mystery": "" } lr.update(z) lr.update({"row": index}) index += 1 attachments[latency].append(lr) else: #create a new dict lr = { "percentile_80th": row[0], "percentile_90th": row[1], "percentile_95th": row[2], "percentile_99th": row[3], "percentile_999th": row[4], "client_id": row[5], "time": row[6], "mystery": row[7] } lr.update(z) lr.update({"row": index}) index += 1 attachments[latency].append(lr) del input_json[latency] log.info("attachments has {0} objects".format(len(attachments))) res = db.save_doc(input_json, force_update=True) doc_id = res["id"] rev_id = res["rev"] msg = "inserted document with id: {0} and rev: {1} in database: {2}" log.info(msg.format(doc_id, rev_id, options.database)) for name in attachments: log.info("inserting attachment with name : {0}".format(name)) db.put_attachment(input_json, attachments[name], name, "text/plain") except Exception, error: log.error(error)
class LDInCouchBinBackend(object): # init with URL of CouchDB server, database name, and credentials def __init__(self, serverURL, dbname, username, pwd): self.serverURL = serverURL self.dbname = dbname self.username = username self.pwd = pwd self.server = Server(self.serverURL, filters=[BasicAuth(self.username, self.pwd)]) set_logging('info') # suppress DEBUG output of the couchdbkit/restkit # looks up a document via its ID def look_up_by_id(self, eid): try: db = self.server.get_or_create_db(self.dbname) if db.doc_exist(eid): ret = db.get(eid) return (True, ret) else: return (False, None) except Exception as err: logging.error('Error while looking up entity: %s' % err) return (False, None) # finds an RDFEntity document by subject and returns its ID, for example: # curl 'http://127.0.0.1:5984/rdf/_design/lookup/_view/by_subject?key="http%3A//example.org/%23r"' def look_up_by_subject(self, subject, in_graph): viewURL = ''.join([ COUCHDB_SERVER, LOOKUP_BY_SUBJECT_PATH, '"', urllib.quote(subject), urllib.quote(in_graph), '"' ]) logging.debug(' ... querying view %s ' % (viewURL)) doc = urllib.urlopen(viewURL) doc = json.JSONDecoder().decode(doc.read()) if len(doc['rows']) > 0: eid = doc['rows'][0]['id'] logging.debug( 'Entity with %s in subject position (in graph %s) has the ID %s' % (subject, in_graph, eid)) return eid else: logging.debug( 'Entity with %s in subject position does not exist, yet in graph %s' % (subject, in_graph)) return None # imports an RDF NTriples file triple by triple into JSON documents of RDFEntity type # as of the pseudo-algorthim laid out in https://github.com/mhausenblas/ld-in-couch/blob/master/README.md def import_NTriples(self, file_name, target_graph): triple_count = 1 subjects = [] # for remembering which subjects we've already seen logging.info('Starting import ...') input_doc = open(file_name, "r") db = self.server.get_or_create_db(self.dbname) RDFEntity.set_db(db) # associate the document type with database if (not target_graph): target_graph = file_name logging.info('Importing NTriples file \'%s\' into graph <%s>' % (file_name, target_graph)) # scan each line (triple) of the input document for input_line in input_doc: # parsing a triple @@FIXME: employ real NTriples parser here! triple = input_line.split( ' ') # naively assumes SPO is separated by a single whitespace is_literal_object = False s = triple[0][ 1:-1] # get rid of the <>, naively assumes no bNodes for now p = triple[1][1:-1] # get rid of the <> o = triple[2][ 1: -1] # get rid of the <> or "", naively assumes no bNodes for now if not triple[2][0] == '<': is_literal_object = True logging.debug('-' * 20) logging.debug('#%d: S: %s P: %s O: %s' % (triple_count, s, p, o)) # creating RDFEntity as we need if not s in subjects: # a new resource, never seen in subject position before ... logging.debug( '%s is a resource I haven\'t seen in subject position, yet' % (s)) subjects.append(s) try: doc = RDFEntity(g=target_graph, s=s, p=[p], o=[o], o_in=[]) # ... so create a new entity doc doc.save() eid = doc['_id'] logging.debug(' ... created new entity with ID %s' % eid) except Exception as err: logging.error('ERROR while creating entity: %s' % err) else: # we've already seen the resource in subject position ... logging.debug('I\'ve seen %s already in subject position' % (s)) eid = self.look_up_by_subject( s, target_graph ) # ... so look up existing entity doc by subject ... try: doc = db.get( eid) # ... and update entity doc with new PO pair doc['p'].append(p) doc['o'].append(o) db.save_doc(doc) logging.debug(' ... updated existing entity with ID %s' % eid) except Exception as err: logging.error('ERROR while updating existing entity: %s' % err) # setting back-links for non-literals in object position if not is_literal_object: # make sure to remember non-literal objects via back-link ref_eid = self.look_up_by_subject( o, target_graph) # ... check if already exists ... if ref_eid: try: doc = db.get( ref_eid) # ... and update entity doc back-link doc['o_in'].append(eid) db.save_doc(doc) logging.debug( ' ... updated existing entity with ID %s with back-link %s' % (ref_eid, eid)) except Exception as err: logging.error( 'ERROR while updating existing entity: %s' % err) else: subjects.append( o ) # need to remember that we've now seen this object value already in subject position try: doc = RDFEntity( g=target_graph, s=o, p=[], o=[], o_in=[ eid ]) # ... or create a new back-link entity doc doc.save() logging.debug( ' ... created new back-link entity with ID %s with back-link %s' % (doc['_id'], eid)) except Exception as err: logging.error( 'ERROR while creating back-link entity: %s' % err) triple_count += 1 logging.info( 'Import completed. I\'ve processed %d triples and seen %d subjects (incl. back-links).' % (triple_count, len(subjects)))
def compact_couch(): server = Server(settings.SECURE_COUCHDB_SERVER) db = server.get_or_create_db('openelm') db.compact() db.view_cleanup()
from couchdbkit import Server, push from flask_login import LoginManager #init the flask app app = Flask(__name__) app.secret_key = "troleolol" login_manager = LoginManager() login_manager.init_app(app) # server object server = Server() # create databases db = server.get_or_create_db("post") userdb = server.get_or_create_db("user") emaildb = server.get_or_create_db("email") # from models.post import Post # from models.user import User # # # associate Models to the db # Post.set_db(db) # User.set_db(userdb) #this is views we need on the server for searching and retrieving doc's #push the to server push('messageboard/designdocs/Post', db) import models
from couchdbkit.loaders import FileSystemDocsLoader from couchdbkit import Server, Database import sys, os path_to_design = sys.argv[1] uri = sys.argv[2] dbname = sys.argv[3] print 'upload views from directory root:\t%s \t to db:\t%s/%s' % ( path_to_design, uri, dbname) server = Server(uri) db = server.get_or_create_db(dbname) loader = FileSystemDocsLoader(path_to_design) loader.sync(db, verbose=True)
def id_generator(size=6, chars=string.ascii_uppercase + string.digits): return ''.join(random.choice(chars) for x in range(size)) if __name__ == '__main__': #CHANGE THESE TWO LINES TO SPECIFY CLUSTER AND CREDS uri = 'https://<username>:<pwd>@<username>.cloudant.com' dbname = 'rate_test' #NUMBER OF DOCUMENTS TO UPLOAD ndocs = 100000 nworkers = 20 #n_threads s = Server(uri) db = s.get_or_create_db(dbname) print db.info() #create writer pool pool = CloudantPool(nworkers, 100, 0.1, 1., uri, dbname) start = time.time() original = start step = 1000 docs = [] for i in range(0, ndocs): if (i % step) == 0: delta = float(time.time() - start) rate = float(step) / delta print 'saved:\t%i\tdocs in:\t%f\tseconds for:\t%f\tdocs/sec' % (
def get_record_by_id(record_id): server = Server(settings.SECURE_COUCHDB_SERVER) db = server.get_or_create_db('openelm') Record.set_db(db) return Record.get(record_id)
# # This script updates a field for all documents in a view. Modify as needed. # # This script requires couchdbkit: # $ sudo easy_install -U Couchdbkit from couchdbkit import Server import logging # Configuration server_url = "http://example.com:5984" database = "database" view = "design_name/view_name" field = "field_name" old_value = "old_value" new_value = "new_value" # Program logging.basicConfig(level=logging.INFO) server = Server(server_url) db = server.get_or_create_db(database) entries_to_process = db.view(view, key=old_value, reduce=False, include_docs=True) for entry in entries_to_process: doc = entry["doc"] doc[field] = new_value db.save_doc(doc)