def has_mode(db, conn, model, field, mode_table="Fcq"): model_id = "{0}_id".format(model).lower() mode_query = ( r.db(db) .table(mode_table) .group(model_id) .ungroup() .for_each( lambda doc: r.db(db) .table(model) .get(doc["group"]) .update( { field: doc["reduction"] .group(field) .count() .ungroup() .order_by("reduction") .nth(-1) .default({"group": None})["group"] } ) ) .run(conn, array_limit=200000) ) logging.info(mode_query)
def create_registration(event_id, custom_fields): # Validate custom fields by comparing them to the event fields cursor = rethink.db(config['database']['name']).table( 'events' ).get(event_id).get_field('fields').run(database.connection) fields = list(cursor) # TODO: len(fields) == 0 -> invalid event ID error? custom_fields = __sanitize_registration(fields, custom_fields) invalid_fields = __validate_registration(fields, custom_fields) if invalid_fields: return None, RegistrationValidateException( 'Invalid fields', invalid_fields ) response = rethink.db(config['database']['name']).table( 'registrations' ).insert({ 'event_id': event_id, 'custom_fields': custom_fields }).run(database.connection) if response['inserted'] != 1: return None, RegistrationInsertException() # returns the inserted ID return response['generated_keys'][0], None
def update_registration(registration_id, new_registration): registration, err = get_registration(registration_id) if not registration: return None, RegistrationNotFoundException() event_id = registration.get('event_id') # Validate custom fields by comparing them to the event fields cursor = rethink.db(config['database']['name']).table( 'events' ).get(event_id).get_field('fields').run(database.connection) fields = list(cursor) custom_fields = __sanitize_registration( fields, registration.get('custom_fields') ) invalid_fields = __validate_registration(fields, custom_fields) if invalid_fields: return None, RegistrationValidateException( 'Invalid fields', invalid_fields ) response = rethink.db(config['database']['name']).table( 'registrations' ).get( registration_id ).update( new_registration ).run(database.connection) if response['errors'] != 0: return None, RegistrationUpdateException() return registration_id, None
def setup(): tables = [ { 'name' : 'testbeds', 'pkey' : 'id' }, { 'name' : 'resources', 'pkey' : 'hostname' } ] c = connect() try: r.db_create(Config.rethinkdb["db"]).run(c) logger.info('MyOps2 database created successfully') except RqlRuntimeError: logger.info('MyOps2 database already exists') for t in tables: try: r.db(Config.rethinkdb["db"]).table_create(t['name'], primary_key=t['pkey']).run(c) logger.info('MyOps2 table %s setup completed', t['name']) except RqlRuntimeError: logger.info('MyOps2 table %s already exists', t['name']) c.close()
def init(conn, event): # try to drop table (may or may not exist) rv = '' try: r.db_drop(TIX).run(conn) rv = 'dropped, then created' except: rv = 'created' r.db_create(TIX).run(conn) r.db(TIX).table_create(VENU).run(conn) r.db(TIX).table(VENU).index_create(TS).run(conn) smap = {} umap = {} for x in range(1, CNT + 1): smap[str(x)] = 'free' umap[str(x)] = '' rv += str(r.db(TIX).table(VENU).insert({ ID: 0, SMAP: smap, UMAP: umap, MAX: CNT, TS: time.time() }).run(conn)) return rv
def upload_project(project_id): """ Upload the bup backup of this project to the gcloud bucket. """ path = path_to_project(project_id) run("sudo chmod a+r -R %s"%path) log('path: ', project_id) bup = os.path.join(path, 'bup') if not os.path.exists(bup): raise RuntimeError("no bup directory to upload -- done") target = os.path.join('gs://{bucket}/projects/{project_id}.zfs/bup'.format( bucket=GCLOUD_BUCKET, project_id=project_id)) log('upload: rsync new pack files') run(['gsutil', '-m', 'rsync', '-x', '.*\.bloom|.*\.midx', '-r', '{bup}/objects/'.format(bup=bup), '{target}/objects/'.format(target=target)]) log('gsutil upload refs/logs') for path in ['refs', 'logs']: run(['gsutil', '-m', 'rsync', '-c', '-r', '{bup}/{path}/'.format(bup=bup, path=path), '{target}/{path}/'.format(target=target, path=path)]) #auth_key = open(RETHINKDB_SECRET).read().strip() conn = rethinkdb.connect(host=DB_HOST, timeout=10)#, auth_key=auth_key) timestamp = datetime.datetime.fromtimestamp(time.time()).strftime(TIMESTAMP_FORMAT) rethinkdb.db('smc').table('projects').get(project_id).update( {'last_backup_to_gcloud':timestamp_to_rethinkdb(timestamp)}).run(conn)
def test_multi_join(self, conn): query = r.db('x').table('employees').eq_join( 'person', r.db('x').table('people') ).map( lambda d: d['left'].merge({'person': d['right']['name']}) ).eq_join( 'job', r.db('x').table('jobs') ).map( lambda d: d['left'].merge({'job': d['right']['name']}) ) expected = [ { 'id': 'joe-employee-id', 'person': 'joe', 'job': 'Lawyer' }, { 'id': 'tim-employee-id', 'person': 'tim', 'job': 'Nurse' }, { 'id': 'bob-employee-id', 'person': 'bob', 'job': 'Assistant' }, { 'id': 'todd-employee-id', 'person': 'todd', 'job': 'Lawyer' } ] assertEqUnordered(expected, list(query.run(conn)))
def step1(): response = {} conn = r.connect(host=current_app.config['RETHINKDB_HOST']) users = json.loads(request.data) users = { 'name': users['name'], 'user': users['user'], 'email': users['email'], 'password': users['password'], 'ubication': [], 'sale': [] } check_user = r.db('food').table('user_register').filter({'email': users['email']}).run(conn) check_user = list(check_user) if len(check_user) > 0: response['success'] = 200 response['message'] = u'El usuario ya existe' response['code'] = 1 else: insert = r.db(current_app.config['DATABASE']).table('user_register').insert(users).run(conn) response['success'] = 200 response['message'] = u'Usuario registrado' response['code'] = 0 pprint.pprint(response) return jsonify(response)
def main(): # connect rethinkdb rethinkdb.connect("localhost", 28015, "mysql") try: rethinkdb.db_drop("mysql").run() except: pass rethinkdb.db_create("mysql").run() tables = ["dept_emp", "dept_manager", "titles", "salaries", "employees", "departments"] for table in tables: rethinkdb.db("mysql").table_create(table).run() stream = BinLogStreamReader( connection_settings=MYSQL_SETTINGS, blocking=True, only_events=[DeleteRowsEvent, WriteRowsEvent, UpdateRowsEvent], ) # process Feed for binlogevent in stream: if not isinstance(binlogevent, WriteRowsEvent): continue for row in binlogevent.rows: if not binlogevent.schema == "employees": continue vals = {} vals = {str(k): str(v) for k, v in row["values"].iteritems()} rethinkdb.table(binlogevent.table).insert(vals).run() stream.close()
def go(): with except_printer(): r.connect(host="localhost", port="123abc") with except_printer(): r.expr({'err': r.error('bob')}).run(c) with except_printer(): r.expr([1,2,3, r.error('bob')]).run(c) with except_printer(): (((r.expr(1) + 1) - 8) * r.error('bob')).run(c) with except_printer(): r.expr([1,2,3]).append(r.error('bob')).run(c) with except_printer(): r.expr([1,2,3, r.error('bob')])[1:].run(c) with except_printer(): r.expr({'a':r.error('bob')})['a'].run(c) with except_printer(): r.db('test').table('test').filter(lambda a: a.contains(r.error('bob'))).run(c) with except_printer(): r.expr(1).do(lambda x: r.error('bob')).run(c) with except_printer(): r.expr(1).do(lambda x: x + r.error('bob')).run(c) with except_printer(): r.branch(r.db('test').table('test').get(0)['a'].contains(r.error('bob')), r.expr(1), r.expr(2)).run(c) with except_printer(): r.expr([1,2]).reduce(lambda a,b: a + r.error("bob")).run(c)
def setDictionary(): dict = {} #print "getting top stories from hacker-news" result = firebase.get('/v0/topstories', None) # result = result[:200] for itemid in result: try: data = firebase.get('/v0/item/' + str(itemid), None) if (data['type'] == 'story'): # get tags url = data['url'] (to_insert, tags) = selectTags(itemid) # store to temp db r.db("tagger_db").table("id2html").insert({"id": itemid, "tag_string": to_insert}).run(connection) if len(tags) > 1: title = data['title'] score = str(data['score']) usr = data['by'] comments = str(data['descendants']) myString = "<tr class='athing'><td align=\"right\" valign=\"top\" class=\"title\"><span class=\"rank\"> </span></td><td><center><a id=\"up_10287983\"><div class=\"votearrow\" title=\"upvote\"></div></a></center></td><td class=\"title\"><span class=\"deadmark\"></span><a href=\"" + url + "\">" + title + "</a>" + to_insert + "</td><td><center><a id=\"up_10287983\"><div class=\"votearrow\" title=\"upvote\"></div></a></center></td></tr><tr><td colspan=\"2\"></td><td class=\"subtext\"><span class=\"score\">" + score + " points</span> by <a>" + usr + "</a> | <a>" + comments +" comments</a></td></tr><tr class=\"spacer\" style=\"height:5px\"></tr>" print "tags: ", tags[0], tags[1] add(tags[0], myString, dict) add(tags[1], myString, dict) except KeyError: pass # r.db("test").table("tag_dict").delete().run(connection) r.db("tagger_db").table("tag2html").insert(dict).run(connection)
def __init__(self, count): self.con = r.connect("localhost", 28015).repl() tables = r.db("test").table_list().run(self.con) if "items" in tables: r.db("test").table_drop("items").run(self.con) r.db("test").table_create("items").run(self.con) self.count = count
def sync_facebook(name): #import ipdb; ipdb.set_trace(); try: form_data = json.loads(request.data) except: return response_msg('error', 'data not correct') try: graph = GraphAPI(form_data['access_token']) try: # #import ipdb; ipdb.set_trace(); email = graph.get_object('me', fields='email')['email'] pic = graph.get_object('me/picture', width='400', height='400')['url'] print pic if email != form_data['fb_email']: return response_msg('error', 'incorrect facebook email') except: return response_msg('error', 'data not complete') except: return response_msg('error', 'invalid access token') try: connection = get_rdb_conn() cursor = rdb.db(TODO_DB).table('user').filter( rdb.row['username'] == name ).update({'fb_email': email, 'pic': pic} ).run(connection) cursor = rdb.db(TODO_DB).table('user').filter( rdb.row['username'] == name ).run(connection) except: return response_msg('error', 'Could not connect to db') return response_msg('success', 'OK', data=cursor.items[0])
def sync_ratings(): try: connection = get_rdb_conn() cursor = rdb.db(TODO_DB).table('user').run(connection) except: return response_msg('error', 'could not connect to db') for user in cursor.items: ratings = rating(user['cfhandle'], user['cchandle'], user['colg_rating']) ratings = json.loads(ratings[0]) colg_rating = 0 try: colg_rating = colg_rating + 20 * ((ratings['cf_rating']/100)**2) colg_rating = colg_rating + 2000 + 7 * (((ratings['lrating']/1000)**2) + (ratings['lrating']/20)) colg_rating = colg_rating + 2000 + 5 * (((ratings['srating']/100)**2) + (ratings['srating']/20)) except: pass print colg_rating try: cursor = rdb.db(TODO_DB).table('user').filter( rdb.row['username'] == user['username'] ).update({ 'lrating': ratings['lrating'], 'srating': ratings['srating'], 'cfrating': ratings['cf_rating'], 'colg_rating': colg_rating/3, }).run(connection) print user['username'] except: print 'error' + user['username'] return response_msg('sucess', 'OK')
def remove_pending_user(self, user_id, row_id, user_pending_name=None): """ removes a user id to a model's pending list. """ if user_id is None: logging.error("user_id cannot be None") return False if row_id is None: logging.error("row_id cannot be None") return False row_table = self.__class__.__name__ user_table = 'User' user_data = r.db(self.DB).table(user_table).get(user_id).run(self.conn) row_data = r.db(self.DB).table(row_table).get(row_id).run(self.conn) if user_data is None: logging.error("User {0} does not exist".format(user_data)) return False if row_data is None: logging.error("{0} {1} does not exist".format(row_table, row_data)) return False if user_pending_name is not None: user_pending = user_data.get(user_pending_name, []) try: user_pending.remove(row_id) except ValueError: logging.warn("row_id {0} not in user {1}".format(row_id, user_pending_name)) pass r.db(self.DB).table(user_table).get(user_id).update({user_pending_name: user_pending}).run(self.conn) penders = row_data['penders'] try: penders.remove(user_id) except ValueError: pass return r.db(self.DB).table(row_table).get(row_id).update({'penders': penders}).run(self.conn)
def save(self): try: r.db_create(self.db).run(self.bigchain.conn) except r.ReqlOpFailedError: pass try: r.db(self.db).table_create('accounts').run(self.bigchain.conn) except r.ReqlOpFailedError: pass user_exists = list(r.db(self.db) .table('accounts') .filter(lambda user: (user['name'] == self.name) & (user['ledger']['id'] == self.ledger['id'])) .run(self.bigchain.conn)) if not len(user_exists): r.db(self.db)\ .table('accounts')\ .insert(self.as_dict(), durability='hard')\ .run(self.bigchain.conn) else: user_persistent = user_exists[0] self.vk = user_persistent['vk'] self.sk = user_persistent['sk']
def insert_r(conn,table,sent,rel,val): bulk = {} if isinstance(rel["e1"],unicode): bulk["e1"] = rel["e1"] else: bulk["e1"] = unicode(rel["e1"],errors="ignore") if isinstance(rel["rel"],unicode): bulk["rel"] = rel["rel"] else: bulk["rel"] = unicode(rel["rel"],errors="ignore") if isinstance(rel["e2"],unicode): bulk["e2"] = rel["e2"] else: bulk["e2"] = unicode(rel["e2"],errors="ignore") if isinstance(sent,unicode): bulk["sent"] = sent else: bulk["sent"] = unicode(sent,errors="ignore") bulk["cfval"] = val r.db("wikikb").table(table).insert(bulk).run(conn)
def get_table(): try: r.db(dbname).table_create('boards').run(_get_conn()) except r.RqlRuntimeError: # already created pass return r.db(dbname).table('boards')
def import_from_queue(progress, conn, task_queue, error_queue, replace_conflicts, durability, write_count): if progress[0] is not None and not replace_conflicts: # We were interrupted and it's not ok to overwrite rows, check that the batch either: # a) does not exist on the server # b) is exactly the same on the server task = progress[0] pkey = r.db(task[0]).table(task[1]).info().run(conn)["primary_key"] for i in reversed(range(len(task[2]))): obj = pickle.loads(task[2][i]) if pkey not in obj: raise RuntimeError("Connection error while importing. Current row has no specified primary key, so cannot guarantee absence of duplicates") row = r.db(task[0]).table(task[1]).get(obj[pkey]).run(conn) if row == obj: write_count[0] += 1 del task[2][i] else: raise RuntimeError("Duplicate primary key `%s`:\n%s\n%s" % (pkey, str(obj), str(row))) task = task_queue.get() if progress[0] is None else progress[0] while not isinstance(task, StopIteration): try: # Unpickle objects (TODO: super inefficient, would be nice if we could pass down json) objs = [pickle.loads(obj) for obj in task[2]] conflict_action = 'replace' if replace_conflicts else 'error' res = r.db(task[0]).table(task[1]).insert(objs, durability=durability, conflict=conflict_action).run(conn) except: progress[0] = task raise if res["errors"] > 0: raise RuntimeError("Error when importing into table '%s.%s': %s" % (task[0], task[1], res["first_error"])) write_count[0] += len(objs) task = task_queue.get()
def subscribe_user(self, user_id, row_id, user_subscription_name=None): """ adds a user id to a model's subscription list. """ row_table = self.__class__.__name__ user_table = 'User' user_data = r.db(self.DB).table(user_table).get(user_id).run(self.conn) row_data = r.db(self.DB).table(row_table).get(row_id).run(self.conn) if user_data is None: logging.error("User {0} does not exist".format(user_data)) return False if user_data is None: logging.error("{0} {1} does not exist".format(table, row_data)) return False try: if user_subscription_name is not None: user_subscription = user_data[user_subscription_name] user_subscription.append(row_id) r.db(self.DB).table(user_table).get(user_id).update({user_subscription_name: user_subscription}).run(self.conn) except KeyError: logging.error("user subscription {0} not known in user data".format(user_subscription_name)) return False subscribers = row_data['subscribers'] subscribers.append(user_id) return r.db(self.DB).table(row_table).get(row_id).update({'subscribers': subscribers}).run(self.conn)
def save(db_host, db_port, db_name, db_table, data): if not isinstance(db_host, str): raise TypeError("Invalid database host name argument type. Can't create Cache Walker instance.") if not isinstance(db_port, int): raise TypeError("Invalid database port argument type. Can't create Cache Walker instance.") if not isinstance(db_name, str): raise TypeError("Invalid database name argument type. Can't create Cache Walker instance.") if not isinstance(db_table, str): raise TypeError("Invalid database table name argument type. Can't create Cache Walker instance.") try: connection = r.connect(db_host, db_port) except Exception as e: logger.debug("Can't connect to the database.") raise e try: r.db(db_name).table(db_table).insert(data).run(connection) except Exception as e: logger.debug("Can't insert data into the database.") raise e
def LoadTestData(file, db, conn, v = False): '''Loading test data into the database.''' ## Loading data. data_dir = os.path.split(dir)[0] path = os.path.join(data_dir, 'tests', 'data', file) print path try: with open(path) as csv_file: data = csv.DictReader(csv_file) test_data = [] for row in data: test_data.append(row) except Exception as e: print "Couldn't load test data." return False ## Storing in db. try: # Checking for existing records. n = r.db(db['name']).table('values').count().run(conn) if n > 0: if v: print "Data already in db. Deleting ..." r.db(db['name']).table('values').delete().run(conn) r.db(db['name']).table('values').insert(test_data).run(conn) return True except Exception as e: print "Could not insert data into database." return False
def create_table(self): try: r.db('Raiden').table_create(self.corpus_table).run(self.connection) print 'Created table [Raiden.'+self.corpus_table+']' except Exception, e: print 'Error occured during '+self.corpus_table+' table creation! Maybe it already exists!' print str(e)
def table_reader(options, file_info, task_queue, error_queue, exit_event): try: db = file_info["db"] table = file_info["table"] primary_key = file_info["info"]["primary_key"] conn = r.connect(options["host"], options["port"], auth_key=options["auth_key"]) if table not in r.db(db).table_list().run(conn): r.db(db).table_create(table, primary_key=primary_key).run(conn) if file_info["format"] == "json": json_reader(task_queue, file_info["file"], db, table, primary_key, options["fields"], exit_event) elif file_info["format"] == "csv": csv_reader(task_queue, file_info["file"], db, table, primary_key, options, exit_event) else: raise RuntimeError("unknown file format specified") except (r.RqlClientError, r.RqlDriverError, r.RqlRuntimeError) as ex: error_queue.put((RuntimeError, RuntimeError(ex.message), traceback.extract_tb(sys.exc_info()[2]))) except InterruptedError: pass # Don't save interrupted errors, they are side-effects except: ex_type, ex_class, tb = sys.exc_info() error_queue.put((ex_type, ex_class, traceback.extract_tb(tb), file_info["file"]))
def init_database_with_default_tables(args): """ Create a new RethinkDB database and initialise (default) tables :param args: an argparse argument (force) """ # Add additional (default) tables here... def_tables = ['determined_variants', 'strains_under_investigation', 'references', 'reference_features', 'strain_features'] with database.make_connection() as connection: try: r.db_create(connection.db).run(connection) for atable in def_tables: r.db(connection.db).table_create(atable).run(connection) except RqlRuntimeError: print ("Database %s already exists. Use '--force' option to " "reinitialise the database." % (connection.db)) if args.force: print "Reinitialising %s" % (connection.db) r.db_drop(connection.db).run(connection) r.db_create(connection.db).run(connection) for atable in def_tables: r.db(connection.db).table_create(atable).run(connection) else: sys.exit(1) print ("Initalised database %s. %s contains the following tables: " "%s" % (connection.db, connection.db, ', '.join(def_tables)))
def get_tables(host, port, auth_key, tables): try: conn = r.connect(host, port, auth_key=auth_key) except r.RqlDriverError as ex: raise RuntimeError(ex.message) dbs = r.db_list().run(conn) res = [] if len(tables) == 0: tables = [[db] for db in dbs] for db_table in tables: if db_table[0] not in dbs: raise RuntimeError("Error: Database '%s' not found" % db_table[0]) if len(db_table) == 1: # This is just a db name res.extend([(db_table[0], table) for table in r.db(db_table[0]).table_list().run(conn)]) else: # This is db and table name if db_table[1] not in r.db(db_table[0]).table_list().run(conn): raise RuntimeError("Error: Table not found: '%s.%s'" % tuple(db_table)) res.append(tuple(db_table)) # Remove duplicates by making results a set return set(res)
def create(self): conn = self.connect() db_list = r.db_list().run(conn) db_created = False table_created = False if not self.db_name in db_list: r.db_create(self.db_name).run(conn) db_created = True table_list = r.db(self.db_name).table_list().run(conn) if not self.config_table_name in table_list: r.db(self.db_name).table_create( self.config_table_name, primary_key=self.primary_key ).run(conn) r.db(self.db_name).table(self.config_table_name)\ .index_create(self.secondary_index).run(conn) table_created = True return {"db": db_created, "table": table_created}
def read_table_into_queue(progress, conn, db, table, pkey, task_queue, progress_info, exit_event): read_rows = 0 if progress[0] is None: cursor = r.db(db).table(table).order_by(index=pkey).run(conn, time_format="raw", binary_format='raw') else: cursor = r.db(db).table(table).between(progress[0], None, left_bound="open").order_by(index=pkey).run(conn, time_format="raw", binary_format='raw') try: for row in cursor: if exit_event.is_set(): break task_queue.put([row]) # Set progress so we can continue from this point if a connection error occurs progress[0] = row[pkey] # Update the progress every 20 rows - to reduce locking overhead read_rows += 1 if read_rows % 20 == 0: progress_info[0].value += 20 finally: progress_info[0].value += read_rows % 20 # Export is done - since we used estimates earlier, update the actual table size progress_info[1].value = progress_info[0].value
async def put(self): """ .. http:put:: /?queue={string:queue} Creates a queue if it does not exist. **Example request**: .. sourcecode:: http GET /?queue=foo Host: example.com Accept: application/json, text/javascript **Example response**: .. sourcecode:: http HTTP/1.1 200 OK Vary: Accept Content-Type: text/javascript ok :query queue: queue (table) to create :statuscode 200: This method always should return 200 """ opts = self.request.app['rethinkdb'] conn = await r.connect(**opts) qname = self.request.GET['queue'] with suppress(r.errors.ReqlOpFailedError): r.db(opts['db']).table_create(qname).run(conn) return web.Response(body=b'ok')
def bulk_insert(ifile): bulk_size = 1000 i = 0 bulk_ins = [] bulk = {} for line in ifile: bulk = {} if line[0] == '#' or len(line) < 10 or line[0] == '@': continue line = line[:len(line)-2].replace("<","").replace(">","").strip() line_arr = line.split("\t") print line_arr,i bulk["id"] = unicode(line_arr[0],errors="ignore") bulk["rel"] = unicode(line_arr[1],errors="ignore") bulk["id2"] = unicode(line_arr[2],errors="ignore") if i < bulk_size - 1: bulk_ins.append(bulk) i += 1 elif i == bulk_size - 1: bulk_ins.append(bulk) r.db("yago").table("test").insert(bulk_ins).run(conn) i = 0 if i < bulk_size - 1 and i > 0: bulk_ins.append(bulk) r.db("yago").table("test").insert(bulk_ins).run(conn)
def test_reduce_1(self, conn): expected = 191 result = r.db('d').table('nums').map(lambda doc: doc['points']).reduce( lambda elem, acc: elem + acc).run(conn) assertEqual(expected, result)
def insert(tablename, thing, conn): res = r.db(DB).table(tablename).insert(thing).run(conn) return res
def jumbo_write_json(data, db_name, table_name, chunk_size=5000, silent=True): '''Write big JSON lists to RethinkDB. Essential for datasets that are larger than 100,000 docs (ReQL max write). Often necessary even for smaller ones. data [list]: a list of dicts in JSON format. db_name [str]: a RethinkDB database, existing or not. table_name [str]: a RethinkDB table, existing or not. chunk_size [int or float of form BASEeEXP]: input list will be broken into chunks of this size. If you encounter memory use issues, reduce this value. silent [bool]: if True, does not print reports. Must be connected to a RethinkDB instance before using this.''' if chunk_size > 1e5: raise (Exception('Maximum JSON chunk_size is 100,000.')) #determine list length, number of chunks, and remainder list_length = len(data) chunk_size = int( chunk_size ) #max array length for a ReQL write is 100k; but that uses too much mem nchunks = math.ceil(list_length / chunk_size) rem = list_length % chunk_size #create database if it doesn't already exist if db_name not in r.db_list().run(): print('Creating database "' + db_name + '".') r.db_create(db_name).run() #create table if it doesn't already exist if table_name not in r.db(db_name).table_list().run(): print('Creating table "' + table_name + '" in database "' \ + db_name + '".') r.db(db_name).table_create(table_name).run() if silent == False: print('Writing list of ' + str(list_length) + ' trips to table "' \ + table_name + '".') #digest data and write to RethinkDB for i in range(nchunks): s = i * chunk_size #chunk_start if i == nchunks - 1 and rem != 0: e = s + rem + 1 else: e = (i + 1) * chunk_size if silent == False: print('Writing trips ' + str(s) + '-' + str(e - 1) + '.') #write chunk to rethink (some data may be lost in case of power failure) r.db(db_name).table(table_name).insert(data[s:e]).run( durability='soft', noreply=False) if silent == False: ndocs = r.db(db_name).table(table_name).count().run() print('Table "' + table_name + '" now contains ' + str(ndocs) \ + ' trips.')
if value == 1: result = value elif value == 0: result = is_following(user, owner) if result == 1: collaboration_cache[user][owner] = True return result con = rdb.connect() db_name, table_name = 'member_events', 'year_2016' db_ref = rdb.db(db_name).table(table_name) if db_name not in rdb.db_list().run(con): rdb.db_create(db_name).run(con) if table_name not in rdb.db(db_name).table_list().run(con): rdb.db(db_name).table_create(table_name).run(con) for i in range(2, 7): print '2016, {0}'.format(i) with open('{0}.json'.format(i)) as f: events = json.load(f) events = events[0] entries = []
def run_vod_kpis(ucis, view_type): started_views = view_count(ucis) week_ucis = ucis.filter((dt_end - timedelta(days=6) < ucis.firstEvent) & (ucis.firstEvent < dt_end + timedelta(days=1))) week_ago_ucis = ucis.filter((dt_end - timedelta(days=13) < ucis.firstEvent) & (ucis.firstEvent < dt_end - timedelta(days=6))) weekly_active_user = user_number(week_ucis) total_active_user = user_number(ucis) total_viewtime = total_viewing_time(ucis) user_viewtime = avg_user_viewtime(week_ucis) weekly_hibernation = user_hibernation(week_ucis, week_ago_ucis) top_program = top_programs_in_vod(ucis, 20) top_channel = normalize(top_tag_by_view_count(ucis, 'channelName'), started_views) hour_of_day = normalize(view_count_by_hour_of_day(ucis), started_views) day_of_week = normalize(view_count_by_day_of_week(ucis), started_views) tag_user_package, user_package = users_package_overview(ucis) package_overview = { "{} user".format(view_type): tag_user_package, "linear TV user": user_package } res = [{ "title": 'started-views', "id": 'started-views', "started-views": started_views }, { "title": 'weekly-active-user', "id": 'weekly-active-user', "weekly-active-user": weekly_active_user }, { "title": 'total-active-user', "id": 'total-active-user', "total-active-user": total_active_user }, { "title": 'total-viewing-time', "id": 'total-viewing-time', "total-viewing-time": total_viewtime }, { "title": 'viewing-time', "id": 'viewing-time', "viewing-time": user_viewtime }, { "title": 'user-hibernation', "id": 'user-hibernation', "user-hibernation": weekly_hibernation }, { "title": 'top-programs', "id": 'top-programs', "data": top_program }, { "title": 'top-provider', "id": 'top-provider', "data": top_channel }, { "title": 'hour-of-day', "id": 'hour-of-day', "data": hour_of_day }, { "title": 'day-of-week', "id": 'day-of-week', "data": day_of_week }, { "title": 'package-overview', "id": 'package-overview', "data": package_overview }] r.db('telenortv_insight_api').table(view_type).insert( res, conflict='replace').run()
def test_order_by_bracket(self, conn): res = r.db('x').table('farms').order_by(lambda doc: doc['id']).map( lambda doc: doc['id']).run(conn) expected = [1, 2] assertEqual(expected, list(res))
def user_leave(self, user, room): r.db(self.db).table(self.table).filter({ 'room': room, 'room_user': user }).delete().run(self.conn)
def add_user(self, user, room, color): r.db(self.db).table(self.table).insert({ 'room': room, 'room_user': user, 'color': color }).run(self.conn)
def create_table(self, table): try: r.db(self.db).table_create(table).run(self.conn) print('table created') except: print('table exists')
def read(DB, tablename, accountaddress, conn): cursor = r.db(DB).table(tablename).filter({ 'address': accountaddress }).pluck('address', 'balance').run(conn) for document in cursor: return document
import rethinkdb as r import algos c = r.connect() cursor = r.db("themis").table("pages").limit(1).run(c) data = [] for document in cursor: databaseId = document['id'] print(databaseId) kmeansResult = algos.kmeans(str(document['content']).decode('unicode-escape')) r.db("themis").table("pages").get(databaseId).update({"cluster": kmeansResult}).run(c)
# Returned Docopt arguments. docArgs = doc(__doc__, version="0.0.1") # Values from Docopt. noDB = True if (int(docArgs["--nodb"]) == 1) else (False if (int(docArgs["--nodb"]) == 0) else None) online = True if (int(docArgs["-o" ]) == 1) else (False if (int(docArgs["-o" ]) == 0) else None) #print(docArgs) dbA = str(docArgs["--dba"][0]) dbN = str(docArgs["--dbn"][0]) tOut = int(docArgs["--tout"][0]) app = Flask(__name__) sIO = sio(app) db = r.db(dbN) if not noDB: c = database.conn(dbA); #print(db) #print(type(db)) # Routings. @app.route("/") def index(): return render_template("index.html") @app.route("/api/client") def api_client(): return DatabaseAPI(c, db, dbA, noDB, "client_name")
def tables(): import rethinkdb as r r.connect(host=DB_HOST, auth_key=open(AUTH).read().strip(), timeout=20).repl() return r.db('smc').table_list().run()
""" Create the tables we are going to use """ global connection, tables print "Creating databases/tables...", sys.stdout.flush() try: r.db_drop("test").run(connection) except r.errors.RqlRuntimeError, e: pass r.db_create("test").run(connection) for table in tables: r.db("test").table_create(table["name"]).run(connection) for table in tables: r.db("test").table( table["name"]).index_create("field0").run(connection) r.db("test").table( table["name"]).index_create("field1").run(connection) print " Done." sys.stdout.flush() def execute_read_write_queries(suffix): """ Execute all the queries (inserts/update, reads, delete) """
def tests(): print r.expr(1).run(c) print r.expr("bob").run(c) print r.expr(True).run(c) print r.expr(False).run(c) print r.expr(3.12).run(c) print r.expr([1, 2, 3, 4, 5]).run(c) print r.expr({'a': 1, 'b': 2}).run(c) #print r.js('1 + 1').run(c) print(r.expr(1) == 2).run(c) # false print(r.expr(1) != 2).run(c) # true print(r.expr(1) < 2).run(c) # true print(r.expr(1) <= 2).run(c) # true print(r.expr(1) > 2).run(c) # false print(r.expr(1) >= 2).run(c) # false print(~r.expr(True)).run(c) # false print(~r.expr(False)).run(c) # true print(r.expr(1) + 2).run(c) # 3 print(r.expr(1) - 2).run(c) # -1 print(r.expr(1) * 2).run(c) # 2 print(r.expr(1) / 2).run(c) # .5 print(r.expr(12) % 10).run(c) # 2 print(((r.expr(12) / 6) * 4) - 3).run(c) # 5 arr = r.expr([1, 2, 3, 4]) print arr.append(5).run(c) print arr[1].run(c) print arr[2].run(c) print arr[1:2].run(c) print arr[:2].run(c) print arr[2:].run(c) print arr.count().run(c) print arr.union(arr).run(c) print arr.union(arr).distinct().run(c) print arr.inner_join(arr, lambda a, b: a == b).run(c) print arr.outer_join(arr, lambda a, b: a == (b - 2)).run(c) #print r.expr([{'id':0, 'a':0}, {'id':1, 'a':0}]).eq_join([{'id':0, 'b':1}, {'id':1, 'b':1}], 'id').run(c) obj = r.expr({'a': 1, 'b': 2}) print obj['a'].run(c) print obj.contains('a').run(c) print obj.pluck('a').run(c) print obj.without('a').run(c) print obj.merge({'c': 3}).run(c) print r.db_list().run(c) print r.db_create('bob').run(c) print r.db_create('test').run(c) print r.db_list().run(c) print r.db('test').table_list().run(c) print r.db('test').table_create('test').run(c) print r.db('test').table_create('bob').run(c) print r.db('test').table_list().run(c) print r.db('test').table_drop('bob').run(c) print r.db('test').table_list().run(c) test = r.db('test').table('test') print test.run(c) print test.insert({'id': 1, 'a': 2}).run(c) print test.insert({'id': 2, 'a': 3}).run(c) print test.insert({'id': 3, 'a': 4}).run(c) print test.run(c) print test.between(right_bound=2).run(c) print test.update(lambda row: {'a': row['a'] + 1}).run(c) print test.run(c) print test.replace(lambda row: {'id': row['id'], 'a': row['a'] + 1}).run(c) print test.run(c) print test.delete().run(c) print test.run(c) print r.expr(1).do(lambda a: a + 1).run(c) print r.expr(2).do(lambda a: {'b': a / a}).run(c) print r.expr([1, 2, 3]).map(lambda a: a + 1).run(c) print r.expr([1, 2, 3]).map(lambda a: a.do(lambda b: b + a)).run(c) print r.expr([1, 2, 3]).reduce(lambda a, b: a + b).run(c) print r.expr([1, 2, 3, 4]).filter(lambda a: a < 3).run(c) print r.expr([1, 2]).concat_map(lambda a: [a, a]).run(c) print r.branch(r.expr(1) < 2, "a", "b").run(c) print r.branch(r.expr(1) < 0, "a", "b").run(c) print(r.expr(True) & r.expr(False)).run(c) print(r.expr(True) | r.expr(False)).run(c) print(r.expr(True) & r.expr(True)).run(c) print(r.expr(False) | r.expr(False)).run(c) #print r.expr([1,2]).map(3).run(c) #print r.expr([1,2]).map(r.row + 3).run(c) print r.expr([{'id': 2}, {'id': 3}, {'id': 1}]).order_by('id').run(c) print r.expr([{ 'g': 0, 'v': 1 }, { 'g': 0, 'v': 2 }, { 'g': 1, 'v': 1 }, { 'g': 1, 'v': 2 }]).grouped_map_reduce(lambda row: row['g'], lambda row: row['v'] + 1, lambda a, b: a + b).run(c) #print r.expr([1,2]).for_each(lambda i: [test.insert({'id':i, 'a': i+1})]).run(c) print test.run(c)
def execute_read_write_queries(suffix): """ Execute all the queries (inserts/update, reads, delete) """ global results, connection, time_per_query, executions_per_query, constant_queries print "Running inserts...", sys.stdout.flush() for table in tables: docs = [] num_writes = gen_num_docs(table["size_doc"]) for i in xrange(num_writes): docs.append(gen_doc(table["size_doc"], i)) i = 0 durations = [] start = time.time() while (time.time() - start < time_per_query) & (i < num_writes): start_query = time.time() result = r.db('test').table(table['name']).insert( docs[i]).run(connection) durations.append(time.time() - start_query) if "generated_keys" in result: table["ids"].append(result["generated_keys"][0]) i += 1 durations.sort() results["single-inserts-" + table["name"] + "-" + suffix] = { "average": (time.time() - start) / i, "min": durations[0], "max": durations[len(durations) - 1], "first_centile": durations[int(math.floor(len(durations) / 100. * 1))], "last_centile": durations[int(math.floor(len(durations) / 100. * 99))] } # Save it to know how many batch inserts we did single_inserts = i # Finish inserting the remaining data size_batch = 500 durations = [] start = time.time() count_batch_insert = 0 if i < num_writes: while i + size_batch < num_writes: start_query = time.time() resutl = r.db('test').table(table['name']).insert( docs[i:i + size_batch]).run(connection) durations.append(time.time() - start_query) end = time.time() count_batch_insert += 1 table["ids"] += result["generated_keys"] i += size_batch if i < num_writes: result = r.db('test').table(table['name']).insert( docs[i:len(docs)]).run(connection) table["ids"] += result["generated_keys"] if num_writes - single_inserts != 0: results["batch-inserts-" + table["name"] + "-" + suffix] = { "average": (end - start) / (count_batch_insert * size_batch), "min": durations[0], "max": durations[len(durations) - 1], "first_centile": durations[int(math.floor(len(durations) / 100. * 1))], "last_centile": durations[int(math.floor(len(durations) / 100. * 99))] } table["ids"].sort() print " Done." sys.stdout.flush() # Execute the insert queries print "Running update/replace...", sys.stdout.flush() for table in tables: for p in xrange(len(write_queries)): docs = [] num_writes = gen_num_docs(table["size_doc"]) for i in xrange(num_writes): docs.append(gen_doc(table["size_doc"], i)) i = 0 durations = [] start = time.time() while (time.time() - start < time_per_query) & (i < len( table["ids"])): start_query = time.time() eval(write_queries[p]["query"]).run(connection) durations.append(time.time() - start_query) i += 1 durations.sort() results[write_queries[p]["tag"] + "-" + table["name"] + "-" + suffix] = { "average": (time.time() - start) / i, "min": durations[0], "max": durations[len(durations) - 1], "first_centile": durations[int(math.floor(len(durations) / 100. * 1))], "last_centile": durations[int(math.floor(len(durations) / 100. * 99))] } i -= 1 # We need i in write_queries[p]["clean"] (to revert only the document we updated) # Clean the update eval(write_queries[p]["clean"]).run(connection) print " Done." sys.stdout.flush() # Execute the read queries on every tables print "Running reads...", sys.stdout.flush() for table in tables: for p in xrange(len(table_queries)): count = 0 i = 0 if "imax" in table_queries[p]: max_i = table_queries[p]["imax"] + 1 else: max_i = 1 durations = [] start = time.time() while (time.time() - start < time_per_query) & (count < executions_per_query): start_query = time.time() try: cursor = eval(table_queries[p]["query"]).run(connection) if isinstance(cursor, r.net.Cursor): list(cursor) cursor.close() if i >= len(table["ids"]) - max_i: i = 0 else: i += 1 except: print "Query failed" print constant_queries[p] sys.stdout.flush() break durations.append(time.time() - start_query) count += 1 durations.sort() results[table_queries[p]["tag"] + "-" + table["name"] + "-" + suffix] = { "average": (time.time() - start) / count, "min": durations[0], "max": durations[len(durations) - 1], "first_centile": durations[int(math.floor(len(durations) / 100. * 1))], "last_centile": durations[int(math.floor(len(durations) / 100. * 99))] } print " Done." sys.stdout.flush() # Execute the delete queries print "Running delete...", sys.stdout.flush() for table in tables: for p in xrange(len(delete_queries)): start = time.time() i = 0 durations = [] start = time.time() while (time.time() - start < time_per_query) & (i < len( table["ids"])): start_query = time.time() eval(delete_queries[p]["query"]).run(connection) durations.append(time.time() - start_query) i += 1 durations.sort() results[delete_queries[p]["tag"] + "-" + table["name"] + "-" + suffix] = { "average": (time.time() - start) / i, "min": durations[0], "max": durations[len(durations) - 1], "first_centile": durations[int(math.floor(len(durations) / 100. * 1))], "last_centile": durations[int(math.floor(len(durations) / 100. * 99))] } print " Done." sys.stdout.flush()
def drop(): """Delete all chats (truncate)""" r.db('chat').table('chats').delete().run(conn)
def jumbo_write_df(df, db_name, table_name, df_chunk_size=5e5, json_chunk_size=5e3, verbosity=1): '''Write big pandas dataframes to RethinkDB. Essential for datasets that are larger than 100,000 rows (ReQL max write). Often necessary even for smaller ones. df [pandas DataFrame]: 'nuff said. db_name [str]: a RethinkDB database, existing or not. table_name [str]: a RethinkDB table, existing or not. df_chunk_size [int or float of form BASEeEXP]: input df will be broken into chunks of this many rows. If you encounter memory use issues, reduce this value first. Maximum accepted value is 1,000,000. json_chunk_size [int or float of form BASEeEXP]: input list passed to jumbo_write_json will be broken into chunks of this size. If you encounter memory use issues, reduce this value second. Maximum accepted value is 100,000 (ReQL write limit). verbosity [int]: determines the number of reports that will be printed. 0 = no reports 1 = reports from this function only 2 = reports from this function and subroutine jumbo_write_json. Calls jumbo_write_json. Must be connected to a RethinkDB instance before using this.''' if df_chunk_size > 1e6: raise (Exception('Maximum df_chunk_size is 1,000,000.')) if json_chunk_size > 1e5: raise (Exception('Maximum json_chunk_size is 100,000. This size is \ rarely a good idea.')) #set verbosity for jumbo_write_json sil = False if verbosity == 2 else True if verbosity > 0: print('Preparing ' + str(len(df)) + '-row DataFrame for database.') # json_list = [] while len(df): #runs as long as rows remain in the dataframe #take a chunk of the dataframe and convert to json list l = min(len(df), int(df_chunk_size) ) #get the first chunk_size lines, or all the rest if fewer chunk = df.iloc[0:l] #subset them from the df df = df.drop(df.index[0:l]) #drop those lines json_list = chunk.to_dict('records') if verbosity > 0: print('Converting chunk of ' + str(l) + ' rows to JSON format.') # s_buf = io.StringIO() #create string buffer # chunk.to_csv(s_buf, index=False) #send chunk as csv to buffer # s_buf.seek(0) #reset buffer to first position # json_list = list(csv.DictReader(s_buf)) #read csv into json list # s_buf.close() #close string buffer #free up some memory del (chunk) gc.collect() #remove all vars no longer referenced to free a bit more #open connection to null device for banishing unneeded outputs black_hole = open(os.devnull, 'w') # black_hole = [json_list[i].pop('', None) for i in range(len(json_list))] # black_hole = [json_list[i].pop('Unnamed: 0', None) for i in range(len(json_list))] #sort by hash. json_list = sorted(json_list, key=operator.itemgetter('hash')) #group json list by hash and remove hash from each reduction jl2 = [] for hsh, red in itt.groupby(json_list, key=operator.itemgetter('hash')): red = list(red) black_hole = [red[i].pop('hash', None) for i in range(len(red))] jl2.append({'group': hsh, 'reduction': red}) del (json_list) if verbosity > 0: print('Finished grouping chunk by hash. Passing list of length ' \ + str(len(jl2)) + ' to jumbo_write_json.') #write list to rethink jumbo_write_json(data=jl2, db_name=db_name, table_name=table_name, chunk_size=json_chunk_size, silent=sil) del (jl2) if verbosity > 0: ndocs = r.db(db_name).table(table_name).count().run() print('Finished writing day of records. Wrote ' + str(ndocs) \ + ' docs to table "' + table_name + '".')
def count_documents(self, table): ''' return integer count of number of documents in table ''' return r.db(self.database).table(table).count().run()
def __init__(self): self.conn = r.connect(host="172.16.1.2",port=28015) # r.db_list().contains('Atlas').do(lambda databaseExists: r.branch(databaseExists, 0 ,r.db_create('Atlas'))).run(self.conn) # r.db('Atlas').table_create('DomainTable').run(self.conn) #r.db('Atlas').contains('DomainTable').do(lambda exists : r.branch( exists, 0, r.db('Atlas').table_create('DomainTable'))).run(self.conn) self.table = r.db('Atlas').table("WordSearchCount")
def retrieve_records( api_key, sensor_path, db_name, end_date=( datetime.datetime.strptime(time.strftime('%Y-%m-%d'), '%Y-%m-%d') - datetime.timedelta(days=1)).strftime('%Y-%m-%d'), start_date=None, json_chunk_size=5e3, verbosity=1): '''Pull records from Acyclica's API and write to RethinkDB. api_key [str]: the 41-character alphanumeric key you were given by Acyclica. Should be read in from an environment variable, encrypted if possible. sensor_path [str]: the path to Acyclica_sensors_CBD.csv (should be fetched automatically once we package this thing). db_name [str]: the name of the RethinkDB database that will be populated. end_date [str]: a date string of the form 'YYYY-MM-DD' specifying the last day of data to pull from Acyclica. Defaults to yesterday. start_date [str]: a date string of the form 'YYYY-MM-DD' specifying the first day of data to fetch from Acyclica. Defaults to None, which means only end_date will be fetched. Set this to 'prev_week' to fetch the full week starting 8 days ago and ending yesterday. json_chunk_size [int or float of form BASEeEXP]: lists passed to jumbo_write_json will be broken into chunks of this size. No need to modify unless you encounter memory use issues, in which case you should first try reducing the default value of 5,000. verbosity [int]: determines the number of reports that will be printed. 0 = no reports 1 = reports from this function only 2 = more reports from this function and from subroutine jumbo_write_json. Calls jumbo_write_df, which calls jumbo_write_json. Must be connected to a RethinkDB instance before using this. Pull at minimum 1 day and at maximum 1 week of data in increments of 1 day.''' #start timing start_time = time.time() #check for size limit errors # if df_chunk_size > 1e6: # raise(Exception('Maximum df_chunk_size is 1,000,000.')) if json_chunk_size > 1e5: raise (Exception('Maximum json_chunk_size is 100,000. This size is \ rarely a good idea.')) #check for end_date format error try: nul = datetime.datetime.strptime(end_date, '%Y-%m-%d') except: raise (Exception('end_date must be of the form "YYYY-MM-DD".')) #set appropriate start dates based on input if start_date == 'prev_week': start_date = (datetime.datetime.strptime(end_date, '%Y-%m-%d') - datetime.timedelta(days=6)).strftime('%Y-%m-%d') elif start_date is None: start_date = end_date else: pass #check for start_date format error try: nul = datetime.datetime.strptime(start_date, '%Y-%m-%d') except: raise (Exception('start_date must be of the form "YYYY-MM-DD".')) #add 23 h, 59 m, and 59 s to the end date (to grab the whole day) end_date = datetime.datetime.strptime(end_date, '%Y-%m-%d') + datetime.timedelta( hours=23, minutes=59, seconds=59) #convert datetime objects to unix time start_unix = int( time.mktime( datetime.datetime.strptime(start_date, '%Y-%m-%d').timetuple())) end_unix = int(time.mktime(end_date.timetuple())) #make sure the user isn't trying to grab more than a week of data, and that #end is after start if end_unix - start_unix > 604800: raise (Exception( 'Please specify a range of dates no greater than one week.')) if end_unix - start_unix < 0: raise (Exception('end_date must be later than start date.')) #determine how many days have been selected dif = end_unix - start_unix ndays = math.ceil(dif / (24 * 3600)) #get sensor data sensors = pd.read_csv(sensor_path) # sensors = sensors.drop(['name', 'short_name','latitude','longitude'], axis=1) sensors.columns = ['IntersectionID', 'sensor'] sensor_list = list(sensors['sensor']) if verbosity > 0: print('Preparing to acquire data for ' + str(ndays) + ' day(s) and ' \ + str(len(sensor_list)) + ' sensors.') #create database if it doesn't already exist if db_name not in r.db_list().run(): r.db_create(db_name).run() #request and process one day at a time (roughly 5-10m records acquired per day) day_start_unix = start_unix for day in range(ndays): print('Acquiring records for day ' + str(day + 1) + ' of ' \ + str(ndays) + '. May take several minutes.') #date string will be the table name on RethinkDB tname = datetime.datetime.fromtimestamp( int(day_start_unix)).strftime('%Y_%m_%d') if tname in r.db(db_name).table_list().run(): print('Table "' + tname + '" already exists in database "' \ + db_name + '". Skipping this day.') day_start_unix = day_start_unix + (24 * 3600) #increment day continue else: r.db(db_name).table_create(tname).run() #get endpoints for each iteration and (re)instantiate dataframe day_end_unix = day_start_unix + (23 * 3600) + 3599 df = pd.DataFrame( columns=['Timestamp', 'MAC Hash', 'Strength', 'Serial']) #request and preprocess each sensor separately for i in range(len(sensor_list)): # sensorID = sensor_list[1] URL = "https://cr.acyclica.com/datastream/device/csv/time/" \ + api_key + "/" + str(sensor_list[i]) + "/" \ + str(day_start_unix) + "/" + str(day_end_unix) #get raw web content and read into a dataframe items = requests.get(URL).content newdf = pd.read_csv( io.StringIO(items.decode('utf-8')), usecols=['Timestamp', 'MAC Hash', 'Strength', 'Serial']) #round timestamp to nearest second newdf['Timestamp'] = newdf['Timestamp'].round().astype('int') #drop repeated reads within 1s, keeping read with highest strength strmaxes = newdf.groupby(['Timestamp', 'MAC Hash'])['Serial'].transform(max) newdf = newdf[newdf['Serial'] == strmaxes] #append to main dataframe df = df.append(newdf, ignore_index=True) if verbosity == 2: if i + 1 in [15, 30, 45]: print('Got data for ' + str(i + 1) + ' of ' \ + str(len(sensor_list)) \ + ' sensors. So far there are ' + str(len(df)) \ + ' reads for day ' + str(day + 1) + '.') del (newdf) #drop repeated reads again, keeping read with highest strength strmaxes = df.groupby(['Timestamp', 'MAC Hash'])['Serial'].transform(max) df = df[df['Serial'] == strmaxes] pre_filt_len = str(len(df)) if verbosity > 0: print('Found ' + pre_filt_len + ' sensor reads for day ' \ + str(day + 1) + '. Cleaning those now.') json_list = df_to_json_etc(df, verbosity, pre_filt_len, sensors) if verbosity > 0: print('Converted DataFrame to JSON list and grouped by hash. ' \ + 'Passing list of length ' + str(len(json_list)) \ + ' to jumbo_write_json.') #set verbosity for jumbo_write_json sil = False if verbosity == 2 else True jumbo_write_json(data=json_list, db_name=db_name, table_name=tname, chunk_size=json_chunk_size, silent=sil) #increment day day_start_unix = day_start_unix + (24 * 3600) if verbosity > 0: run_time = round((time.time() - start_time) / 60, 2) print('Finished writing all records for ' + str(ndays) + ' day(s) ' \ + 'in ' + str(run_time) + ' minutes.\nRecords are in database "' \ + db_name + '".')
def create_table(name, conn): res = r.db(DB).table_create(name).run(conn)
def clear_current(sample_id, conn): r.db("samplesdb").table("sample2attribute_set")\ .get_all(sample_id, index="sample_id")\ .update({"current": False})\ .run(conn)
parameters = yaml.load(parameter_file) print "Connecting database ..." rethink = r.connect(parameters['rethinkdb_server']['host'], parameters['rethinkdb_server']['port']).repl() rethink_db = parameters['rethinkdb_server']['database'] url_queue_table = parameters['rethinkdb_server']['tables']['url_queue'] raw_result_table = parameters['rethinkdb_server']['tables']['raw_result'] indexed_result_table = parameters['rethinkdb_server']['tables'][ 'indexed_result'] # Init database db_list = r.db_list().run(rethink) if rethink_db not in db_list: print "Init database ..." r.db_create(rethink_db).run(rethink) r.db(rethink_db).table_create(url_queue_table).run(rethink) r.db(rethink_db).table(url_queue_table).index_create('ts').run(rethink) r.db(rethink_db).table_create(raw_result_table).run(rethink) r.db(rethink_db).table_create(indexed_result_table).run(rethink) rethink.use(rethink_db) def main(argv): # Main code here print "I'm manager :)" if len(argv) > 1: seed_url = argv[1] r.table(url_queue_table).insert({
def __init__(self): r.connect(settings['RETHINKDB_SERVER'], settings['RETHINKDB_PORT']).repl() self.db = r.db(settings['RETHINKDB_DB']).table( settings['RETHINKDB_TABLE'])
#!/usr/bin/env python # coding: utf-8 import rethinkdb as r r.connect ('localhost', 28015).repl() watchcount = r.db('polltime').table('votes').get_all('b0aae840-f52e-4bdd-abcd-74789f52c6bd', index='choice').count().run() dontwatchcount = r.db('polltime').table('votes').get_all('a966c7b3-9277-4c09-9254-8806762bbea0', index='choice').count().run() watchint = int(watchcount) dontwatchint = int(dontwatchcount) file = open("tmp/finalcountq4.txt","w") if watchint > dontwatchint: file.write("TRY") else: file.write("GIVE") file.close()
# along with BigBlueTutor. If not, see <http://www.gnu.org/licenses/>. #Prints the contents of all the tables in a RethinkDB database #You can also pass the names of specific tables as command-line arguments to print only those tables #Users' messages are excluded from printing import rethinkdb as r import dotenv import os import json import sys dotenv.load_dotenv("./.env") r.connect(os.environ.get("DB_HOST"), int(os.environ.get("DB_PORT"))).repl() tableList = [] if (len(sys.argv) > 1): tableList = sys.argv[1:len(sys.argv)] else: tableList = r.db("deepstream").table_list().run() print("Table list:") print(tableList) print() for table in tableList: print("Table name: " + table) table = list(r.db("deepstream").table(table).run()) #.without("messages").run()) print(json.dumps(table, indent=1, sort_keys=True)) print()
def test_simple(self, conn): res = r.db('x').table('farms').map(lambda doc: doc['animals'][0]).run( conn) assertEqual(set(['frog', 'horse']), set(list(res)))
def test_filter_by_bracket(self, conn): res = r.db('x').table('farms').filter(lambda doc: doc['id'] < 2).run( conn) expected = [1] results = [doc['id'] for doc in res] assertEqual(expected, results)
def test_set_intersection(self, conn): expected = [set(['x', 'y']), set(['x'])] result = r.db('z').table('t').map( lambda doc: doc['simple'].set_intersection(['x', 'y'])).run(conn) result = map(lambda d: set(d), result) assertEqUnordered(expected, result)