예제 #1
0
def has_mode(db, conn, model, field, mode_table="Fcq"):
    model_id = "{0}_id".format(model).lower()
    mode_query = (
        r.db(db)
        .table(mode_table)
        .group(model_id)
        .ungroup()
        .for_each(
            lambda doc: r.db(db)
            .table(model)
            .get(doc["group"])
            .update(
                {
                    field: doc["reduction"]
                    .group(field)
                    .count()
                    .ungroup()
                    .order_by("reduction")
                    .nth(-1)
                    .default({"group": None})["group"]
                }
            )
        )
        .run(conn, array_limit=200000)
    )
    logging.info(mode_query)
예제 #2
0
def create_registration(event_id, custom_fields):
    # Validate custom fields by comparing them to the event fields
    cursor = rethink.db(config['database']['name']).table(
        'events'
    ).get(event_id).get_field('fields').run(database.connection)

    fields = list(cursor)  # TODO: len(fields) == 0 -> invalid event ID error?

    custom_fields = __sanitize_registration(fields, custom_fields)
    invalid_fields = __validate_registration(fields, custom_fields)
    if invalid_fields:
        return None, RegistrationValidateException(
            'Invalid fields', invalid_fields
        )

    response = rethink.db(config['database']['name']).table(
        'registrations'
    ).insert({
        'event_id': event_id,
        'custom_fields': custom_fields
    }).run(database.connection)

    if response['inserted'] != 1:
        return None, RegistrationInsertException()

    # returns the inserted ID
    return response['generated_keys'][0], None
예제 #3
0
def update_registration(registration_id, new_registration):
    registration, err = get_registration(registration_id)
    if not registration:
        return None, RegistrationNotFoundException()
    event_id = registration.get('event_id')

    # Validate custom fields by comparing them to the event fields
    cursor = rethink.db(config['database']['name']).table(
        'events'
    ).get(event_id).get_field('fields').run(database.connection)

    fields = list(cursor)

    custom_fields = __sanitize_registration(
        fields, registration.get('custom_fields')
    )
    invalid_fields = __validate_registration(fields, custom_fields)
    if invalid_fields:
        return None, RegistrationValidateException(
            'Invalid fields', invalid_fields
        )

    response = rethink.db(config['database']['name']).table(
        'registrations'
    ).get(
        registration_id
    ).update(
        new_registration
    ).run(database.connection)

    if response['errors'] != 0:
        return None, RegistrationUpdateException()

    return registration_id, None
예제 #4
0
def setup():
    tables = [
        {
            'name' : 'testbeds',
            'pkey' : 'id'
        },
        {
            'name' : 'resources',
            'pkey' : 'hostname'
        }
    ]

    c = connect()

    try:
        r.db_create(Config.rethinkdb["db"]).run(c)
        logger.info('MyOps2 database created successfully')
    except RqlRuntimeError:
        logger.info('MyOps2 database already exists')

    for t in tables:
        try:
            r.db(Config.rethinkdb["db"]).table_create(t['name'], primary_key=t['pkey']).run(c)
            logger.info('MyOps2 table %s setup completed', t['name'])
        except RqlRuntimeError:
            logger.info('MyOps2 table %s already exists', t['name'])

    c.close()
예제 #5
0
def init(conn, event):
    # try to drop table (may or may not exist)
    rv = ''
    try:
        r.db_drop(TIX).run(conn)
        rv = 'dropped, then created'
    except:
        rv = 'created'
    r.db_create(TIX).run(conn)
    r.db(TIX).table_create(VENU).run(conn)
    r.db(TIX).table(VENU).index_create(TS).run(conn)

    smap = {}
    umap = {}
    for x in range(1, CNT + 1):
        smap[str(x)] = 'free' 
        umap[str(x)] = ''

    rv += str(r.db(TIX).table(VENU).insert({
        ID: 0,
        SMAP: smap,
        UMAP: umap,
        MAX: CNT,
        TS: time.time()
    }).run(conn))

    return rv
예제 #6
0
def upload_project(project_id):
    """
    Upload the bup backup of this project to the gcloud bucket.
    """
    path = path_to_project(project_id)

    run("sudo chmod a+r -R %s"%path)

    log('path: ', project_id)
    bup = os.path.join(path, 'bup')
    if not os.path.exists(bup):
        raise RuntimeError("no bup directory to upload -- done")
    target = os.path.join('gs://{bucket}/projects/{project_id}.zfs/bup'.format(
            bucket=GCLOUD_BUCKET, project_id=project_id))

    log('upload: rsync new pack files')
    run(['gsutil', '-m', 'rsync', '-x', '.*\.bloom|.*\.midx', '-r',
         '{bup}/objects/'.format(bup=bup),
         '{target}/objects/'.format(target=target)])
    log('gsutil upload refs/logs')
    for path in ['refs', 'logs']:
        run(['gsutil', '-m', 'rsync', '-c', '-r',
             '{bup}/{path}/'.format(bup=bup, path=path),
             '{target}/{path}/'.format(target=target, path=path)])

    #auth_key = open(RETHINKDB_SECRET).read().strip()
    conn = rethinkdb.connect(host=DB_HOST, timeout=10)#, auth_key=auth_key)
    timestamp = datetime.datetime.fromtimestamp(time.time()).strftime(TIMESTAMP_FORMAT)
    rethinkdb.db('smc').table('projects').get(project_id).update(
        {'last_backup_to_gcloud':timestamp_to_rethinkdb(timestamp)}).run(conn)
예제 #7
0
 def test_multi_join(self, conn):
     query = r.db('x').table('employees').eq_join(
         'person', r.db('x').table('people')
     ).map(
         lambda d: d['left'].merge({'person': d['right']['name']})
     ).eq_join(
         'job', r.db('x').table('jobs')
     ).map(
         lambda d: d['left'].merge({'job': d['right']['name']})
     )
     expected = [
         {
             'id': 'joe-employee-id',
             'person': 'joe',
             'job': 'Lawyer'
         },
         {
             'id': 'tim-employee-id',
             'person': 'tim',
             'job': 'Nurse'
         },
         {
             'id': 'bob-employee-id',
             'person': 'bob',
             'job': 'Assistant'
         },
         {
             'id': 'todd-employee-id',
             'person': 'todd',
             'job': 'Lawyer'
         }
     ]
     assertEqUnordered(expected, list(query.run(conn)))
예제 #8
0
def step1():

    response = {}
    conn = r.connect(host=current_app.config['RETHINKDB_HOST'])

    users = json.loads(request.data)
    users = {
        'name': users['name'],
        'user': users['user'],
        'email': users['email'],
        'password': users['password'],
        'ubication': [],
        'sale': []
    }
    
    check_user = r.db('food').table('user_register').filter({'email': users['email']}).run(conn)
    check_user = list(check_user)
    if len(check_user) > 0:
        
        response['success'] = 200
        response['message'] = u'El usuario ya existe'
        response['code'] = 1

    else:    
     
        insert = r.db(current_app.config['DATABASE']).table('user_register').insert(users).run(conn)
        response['success'] = 200
        response['message'] = u'Usuario registrado'
        response['code'] = 0

    pprint.pprint(response)
    return jsonify(response)
def main():
    # connect rethinkdb
    rethinkdb.connect("localhost", 28015, "mysql")
    try:
        rethinkdb.db_drop("mysql").run()
    except:
        pass
    rethinkdb.db_create("mysql").run()

    tables = ["dept_emp", "dept_manager", "titles",
              "salaries", "employees", "departments"]
    for table in tables:
        rethinkdb.db("mysql").table_create(table).run()

    stream = BinLogStreamReader(
        connection_settings=MYSQL_SETTINGS,
        blocking=True,
        only_events=[DeleteRowsEvent, WriteRowsEvent, UpdateRowsEvent],
    )

    # process Feed
    for binlogevent in stream:
        if not isinstance(binlogevent, WriteRowsEvent):
            continue

        for row in binlogevent.rows:
            if not binlogevent.schema == "employees":
                continue

            vals = {}
            vals = {str(k): str(v) for k, v in row["values"].iteritems()}
            rethinkdb.table(binlogevent.table).insert(vals).run()

    stream.close()
예제 #10
0
파일: test.py 프로젝트: isidorn/test2
def go():
    with except_printer():
        r.connect(host="localhost", port="123abc")
    with except_printer():
        r.expr({'err': r.error('bob')}).run(c)
    with except_printer():
        r.expr([1,2,3, r.error('bob')]).run(c)
    with except_printer():
        (((r.expr(1) + 1) - 8) * r.error('bob')).run(c)
    with except_printer():
        r.expr([1,2,3]).append(r.error('bob')).run(c)
    with except_printer():
        r.expr([1,2,3, r.error('bob')])[1:].run(c)
    with except_printer():
        r.expr({'a':r.error('bob')})['a'].run(c)
    with except_printer():
        r.db('test').table('test').filter(lambda a: a.contains(r.error('bob'))).run(c)
    with except_printer():
        r.expr(1).do(lambda x: r.error('bob')).run(c)
    with except_printer():
        r.expr(1).do(lambda x: x + r.error('bob')).run(c)
    with except_printer():
        r.branch(r.db('test').table('test').get(0)['a'].contains(r.error('bob')), r.expr(1), r.expr(2)).run(c)
    with except_printer():
        r.expr([1,2]).reduce(lambda a,b: a + r.error("bob")).run(c)
예제 #11
0
def setDictionary():
	dict = {}
	#print "getting top stories from hacker-news"
	result = firebase.get('/v0/topstories', None)
	# result = result[:200]
	for itemid in result:
		try:
			data = firebase.get('/v0/item/' + str(itemid), None)
			if (data['type'] == 'story'):
				# get tags
				url = data['url']
				(to_insert, tags) = selectTags(itemid)
				# store to temp db
				r.db("tagger_db").table("id2html").insert({"id": itemid, "tag_string": to_insert}).run(connection)
				if len(tags) > 1:
					title = data['title']
					score = str(data['score'])
					usr = data['by']
					comments = str(data['descendants'])
					myString = "<tr class='athing'><td align=\"right\" valign=\"top\" class=\"title\"><span class=\"rank\"> </span></td><td><center><a id=\"up_10287983\"><div class=\"votearrow\" title=\"upvote\"></div></a></center></td><td class=\"title\"><span class=\"deadmark\"></span><a href=\"" + url + "\">" + title + "</a>" + to_insert + "</td><td><center><a id=\"up_10287983\"><div class=\"votearrow\" title=\"upvote\"></div></a></center></td></tr><tr><td colspan=\"2\"></td><td class=\"subtext\"><span class=\"score\">" + score + " points</span> by <a>" + usr + "</a> | <a>" + comments +" comments</a></td></tr><tr class=\"spacer\" style=\"height:5px\"></tr>"
					print "tags: ", tags[0], tags[1]
					add(tags[0], myString, dict)
					add(tags[1], myString, dict)
		except KeyError:
			pass
	# r.db("test").table("tag_dict").delete().run(connection)
	r.db("tagger_db").table("tag2html").insert(dict).run(connection)
예제 #12
0
 def __init__(self, count):
     self.con = r.connect("localhost", 28015).repl()
     tables = r.db("test").table_list().run(self.con)
     if "items" in tables:
         r.db("test").table_drop("items").run(self.con)
     r.db("test").table_create("items").run(self.con)
     self.count = count
예제 #13
0
def sync_facebook(name):
    #import ipdb; ipdb.set_trace();
    try:
        form_data = json.loads(request.data)
    except:
        return response_msg('error', 'data not correct')

    try:
        graph = GraphAPI(form_data['access_token'])
        try:
            # #import ipdb; ipdb.set_trace();
            email = graph.get_object('me', fields='email')['email']
            pic = graph.get_object('me/picture', width='400', height='400')['url']
            print pic
            if email != form_data['fb_email']:
                return response_msg('error', 'incorrect facebook email')
        except:
            return response_msg('error', 'data not complete')
    except:
        return response_msg('error', 'invalid access token')

    try:
        connection = get_rdb_conn()
        cursor = rdb.db(TODO_DB).table('user').filter(
            rdb.row['username'] == name
            ).update({'fb_email': email, 'pic': pic}
            ).run(connection)
        cursor = rdb.db(TODO_DB).table('user').filter(
            rdb.row['username'] == name
            ).run(connection)
    except:
        return response_msg('error', 'Could not connect to db')

    return response_msg('success', 'OK', data=cursor.items[0])
예제 #14
0
def sync_ratings():
    try:
        connection = get_rdb_conn()
        cursor = rdb.db(TODO_DB).table('user').run(connection)
    except:
        return response_msg('error', 'could not connect to db')
    for user in cursor.items:
        ratings = rating(user['cfhandle'], user['cchandle'], user['colg_rating'])
        ratings = json.loads(ratings[0])
        colg_rating = 0
        try:
            colg_rating = colg_rating + 20 * ((ratings['cf_rating']/100)**2)
            colg_rating = colg_rating + 2000 + 7 * (((ratings['lrating']/1000)**2) + (ratings['lrating']/20))
            colg_rating = colg_rating + 2000 + 5 * (((ratings['srating']/100)**2) + (ratings['srating']/20))
        except:
            pass
        print colg_rating
        try:
            cursor = rdb.db(TODO_DB).table('user').filter(
                rdb.row['username'] == user['username']
                ).update({
                'lrating': ratings['lrating'],
                'srating': ratings['srating'],
                'cfrating': ratings['cf_rating'],
                'colg_rating': colg_rating/3,
                }).run(connection)
            print user['username']
        except:
            print 'error' + user['username']

    return response_msg('sucess', 'OK')
예제 #15
0
파일: basemodel.py 프로젝트: sungbae/scq
 def remove_pending_user(self, user_id, row_id, user_pending_name=None):
     """
     removes a user id to a model's pending list.
     """
     if user_id is None:
         logging.error("user_id cannot be None")
         return False
     if row_id is None:
         logging.error("row_id cannot be None")
         return False
     row_table = self.__class__.__name__
     user_table = 'User'
     user_data = r.db(self.DB).table(user_table).get(user_id).run(self.conn)
     row_data = r.db(self.DB).table(row_table).get(row_id).run(self.conn)
     if user_data is None:
         logging.error("User {0} does not exist".format(user_data))
         return False
     if row_data is None:
         logging.error("{0} {1} does not exist".format(row_table, row_data))
         return False
     if user_pending_name is not None:
         user_pending = user_data.get(user_pending_name, [])
         try:
             user_pending.remove(row_id)
         except ValueError:
             logging.warn("row_id {0} not in user {1}".format(row_id, user_pending_name))
             pass
         r.db(self.DB).table(user_table).get(user_id).update({user_pending_name: user_pending}).run(self.conn)
     penders = row_data['penders']
     try:
         penders.remove(user_id)
     except ValueError:
         pass
     return r.db(self.DB).table(row_table).get(row_id).update({'penders': penders}).run(self.conn)
예제 #16
0
    def save(self):
        try:
            r.db_create(self.db).run(self.bigchain.conn)
        except r.ReqlOpFailedError:
            pass

        try:
            r.db(self.db).table_create('accounts').run(self.bigchain.conn)
        except r.ReqlOpFailedError:
            pass

        user_exists = list(r.db(self.db)
                           .table('accounts')
                           .filter(lambda user: (user['name'] == self.name)
                                                & (user['ledger']['id'] == self.ledger['id']))
                           .run(self.bigchain.conn))
        if not len(user_exists):
            r.db(self.db)\
                .table('accounts')\
                .insert(self.as_dict(), durability='hard')\
                .run(self.bigchain.conn)
        else:
            user_persistent = user_exists[0]
            self.vk = user_persistent['vk']
            self.sk = user_persistent['sk']
예제 #17
0
def insert_r(conn,table,sent,rel,val):
	bulk = {}
	if isinstance(rel["e1"],unicode):
		bulk["e1"] = rel["e1"]
	else:
		bulk["e1"] = unicode(rel["e1"],errors="ignore")

	if isinstance(rel["rel"],unicode):
		bulk["rel"] = rel["rel"]
	else:
		bulk["rel"] = unicode(rel["rel"],errors="ignore")

	if isinstance(rel["e2"],unicode):
		bulk["e2"] = rel["e2"]
	else:
		bulk["e2"] = unicode(rel["e2"],errors="ignore")

	if isinstance(sent,unicode):
		bulk["sent"] = sent
	else:
		bulk["sent"] = unicode(sent,errors="ignore")

	bulk["cfval"] = val
		

	r.db("wikikb").table(table).insert(bulk).run(conn)
예제 #18
0
	def get_table():
		try:
			r.db(dbname).table_create('boards').run(_get_conn())
		except r.RqlRuntimeError:
			# already created
			pass
		return r.db(dbname).table('boards')
예제 #19
0
def import_from_queue(progress, conn, task_queue, error_queue, replace_conflicts, durability, write_count):
    if progress[0] is not None and not replace_conflicts:
        # We were interrupted and it's not ok to overwrite rows, check that the batch either:
        # a) does not exist on the server
        # b) is exactly the same on the server
        task = progress[0]
        pkey = r.db(task[0]).table(task[1]).info().run(conn)["primary_key"]
        for i in reversed(range(len(task[2]))):
            obj = pickle.loads(task[2][i])
            if pkey not in obj:
                raise RuntimeError("Connection error while importing.  Current row has no specified primary key, so cannot guarantee absence of duplicates")
            row = r.db(task[0]).table(task[1]).get(obj[pkey]).run(conn)
            if row == obj:
                write_count[0] += 1
                del task[2][i]
            else:
                raise RuntimeError("Duplicate primary key `%s`:\n%s\n%s" % (pkey, str(obj), str(row)))

    task = task_queue.get() if progress[0] is None else progress[0]
    while not isinstance(task, StopIteration):
        try:
            # Unpickle objects (TODO: super inefficient, would be nice if we could pass down json)
            objs = [pickle.loads(obj) for obj in task[2]]
            conflict_action = 'replace' if replace_conflicts else 'error'
            res = r.db(task[0]).table(task[1]).insert(objs, durability=durability, conflict=conflict_action).run(conn)
        except:
            progress[0] = task
            raise

        if res["errors"] > 0:
            raise RuntimeError("Error when importing into table '%s.%s': %s" %
                               (task[0], task[1], res["first_error"]))

        write_count[0] += len(objs)
        task = task_queue.get()
예제 #20
0
 def subscribe_user(self, user_id, row_id, user_subscription_name=None):
     """
     adds a user id to a model's subscription list.
     """
     row_table = self.__class__.__name__
     user_table = 'User'
     user_data = r.db(self.DB).table(user_table).get(user_id).run(self.conn)
     row_data = r.db(self.DB).table(row_table).get(row_id).run(self.conn)
     if user_data is None:
         logging.error("User {0} does not exist".format(user_data))
         return False
     if user_data is None:
         logging.error("{0} {1} does not exist".format(table, row_data))
         return False
     try:
         if user_subscription_name is not None:
             user_subscription = user_data[user_subscription_name]
             user_subscription.append(row_id)
             r.db(self.DB).table(user_table).get(user_id).update({user_subscription_name: user_subscription}).run(self.conn)
     except KeyError:
         logging.error("user subscription {0} not known in user data".format(user_subscription_name))
         return False
     subscribers = row_data['subscribers']
     subscribers.append(user_id)
     return r.db(self.DB).table(row_table).get(row_id).update({'subscribers': subscribers}).run(self.conn)
예제 #21
0
    def save(db_host, db_port, db_name, db_table, data):

        if not isinstance(db_host, str):
            raise TypeError("Invalid database host name argument type. Can't create Cache Walker instance.")

        if not isinstance(db_port, int):
            raise TypeError("Invalid database port argument type. Can't create Cache Walker instance.")

        if not isinstance(db_name, str):
            raise TypeError("Invalid database name argument type. Can't create Cache Walker instance.")

        if not isinstance(db_table, str):
            raise TypeError("Invalid database table name argument type. Can't create Cache Walker instance.")

        try:
            connection = r.connect(db_host, db_port)

        except Exception as e:
            logger.debug("Can't connect to the database.")
            raise e

        try:
            r.db(db_name).table(db_table).insert(data).run(connection)

        except Exception as e:
            logger.debug("Can't insert data into the database.")
            raise e
예제 #22
0
def LoadTestData(file, db, conn, v = False):
  '''Loading test data into the database.'''

  ## Loading data.
  data_dir = os.path.split(dir)[0]
  path = os.path.join(data_dir, 'tests', 'data', file)
  print path
  try:
    with open(path) as csv_file:
      data = csv.DictReader(csv_file)
      test_data = []
      for row in data:
        test_data.append(row)

  except Exception as e:
    print "Couldn't load test data."
    return False


  ## Storing in db.
  try:
    # Checking for existing records.
    n = r.db(db['name']).table('values').count().run(conn)
    if n > 0:
      if v:
        print "Data already in db. Deleting ..."
      r.db(db['name']).table('values').delete().run(conn)

    r.db(db['name']).table('values').insert(test_data).run(conn)
    return True

  except Exception as e:
    print "Could not insert data into database."
    return False
예제 #23
0
파일: corpus.py 프로젝트: nkman/Raiden
 def create_table(self):
   try:
     r.db('Raiden').table_create(self.corpus_table).run(self.connection)
     print 'Created table [Raiden.'+self.corpus_table+']'
   except Exception, e:
     print 'Error occured during '+self.corpus_table+' table creation! Maybe it already exists!'
     print str(e)
예제 #24
0
파일: _import.py 프로젝트: isidorn/test2
def table_reader(options, file_info, task_queue, error_queue, exit_event):
    try:
        db = file_info["db"]
        table = file_info["table"]
        primary_key = file_info["info"]["primary_key"]
        conn = r.connect(options["host"], options["port"], auth_key=options["auth_key"])

        if table not in r.db(db).table_list().run(conn):
            r.db(db).table_create(table, primary_key=primary_key).run(conn)

        if file_info["format"] == "json":
            json_reader(task_queue,
                        file_info["file"],
                        db, table,
                        primary_key,
                        options["fields"],
                        exit_event)
        elif file_info["format"] == "csv":
            csv_reader(task_queue,
                       file_info["file"],
                       db, table,
                       primary_key,
                       options,
                       exit_event)
        else:
            raise RuntimeError("unknown file format specified")
    except (r.RqlClientError, r.RqlDriverError, r.RqlRuntimeError) as ex:
        error_queue.put((RuntimeError, RuntimeError(ex.message), traceback.extract_tb(sys.exc_info()[2])))
    except InterruptedError:
        pass # Don't save interrupted errors, they are side-effects
    except:
        ex_type, ex_class, tb = sys.exc_info()
        error_queue.put((ex_type, ex_class, traceback.extract_tb(tb), file_info["file"]))
예제 #25
0
def init_database_with_default_tables(args):
    """
    Create a new RethinkDB database and initialise (default) tables

    :param args: an argparse argument (force)
    """
    # Add additional (default) tables here...
    def_tables = ['determined_variants', 'strains_under_investigation',
                  'references', 'reference_features', 'strain_features']
    with database.make_connection() as connection:
        try:
            r.db_create(connection.db).run(connection)
            for atable in def_tables:
                r.db(connection.db).table_create(atable).run(connection)
        except RqlRuntimeError:
            print ("Database %s already exists. Use '--force' option to "
                   "reinitialise the database." % (connection.db))
            if args.force:
                print "Reinitialising %s" % (connection.db)
                r.db_drop(connection.db).run(connection)
                r.db_create(connection.db).run(connection)
                for atable in def_tables:
                    r.db(connection.db).table_create(atable).run(connection)
            else:
                sys.exit(1)
        print ("Initalised database %s. %s contains the following tables: "
               "%s" % (connection.db, connection.db, ', '.join(def_tables)))
예제 #26
0
def get_tables(host, port, auth_key, tables):
    try:
        conn = r.connect(host, port, auth_key=auth_key)
    except r.RqlDriverError as ex:
        raise RuntimeError(ex.message)

    dbs = r.db_list().run(conn)
    res = []

    if len(tables) == 0:
        tables = [[db] for db in dbs]

    for db_table in tables:
        if db_table[0] not in dbs:
            raise RuntimeError("Error: Database '%s' not found" % db_table[0])

        if len(db_table) == 1: # This is just a db name
            res.extend([(db_table[0], table) for table in r.db(db_table[0]).table_list().run(conn)])
        else: # This is db and table name
            if db_table[1] not in r.db(db_table[0]).table_list().run(conn):
                raise RuntimeError("Error: Table not found: '%s.%s'" % tuple(db_table))
            res.append(tuple(db_table))

    # Remove duplicates by making results a set
    return set(res)
예제 #27
0
    def create(self):
        conn = self.connect()

        db_list = r.db_list().run(conn)

        db_created = False
        table_created = False

        if not self.db_name in db_list:
            r.db_create(self.db_name).run(conn)
            db_created = True

        table_list = r.db(self.db_name).table_list().run(conn)

        if not self.config_table_name in table_list:
            r.db(self.db_name).table_create(
                self.config_table_name, primary_key=self.primary_key
            ).run(conn)

            r.db(self.db_name).table(self.config_table_name)\
                .index_create(self.secondary_index).run(conn)

            table_created = True

        return {"db": db_created, "table": table_created}
예제 #28
0
def read_table_into_queue(progress, conn, db, table, pkey, task_queue, progress_info, exit_event):
    read_rows = 0
    if progress[0] is None:
        cursor = r.db(db).table(table).order_by(index=pkey).run(conn, time_format="raw", binary_format='raw')
    else:
        cursor = r.db(db).table(table).between(progress[0], None, left_bound="open").order_by(index=pkey).run(conn, time_format="raw", binary_format='raw')

    try:
        for row in cursor:
            if exit_event.is_set():
                break
            task_queue.put([row])

            # Set progress so we can continue from this point if a connection error occurs
            progress[0] = row[pkey]

            # Update the progress every 20 rows - to reduce locking overhead
            read_rows += 1
            if read_rows % 20 == 0:
                progress_info[0].value += 20
    finally:
        progress_info[0].value += read_rows % 20

    # Export is done - since we used estimates earlier, update the actual table size
    progress_info[1].value = progress_info[0].value
예제 #29
0
파일: dispatcher.py 프로젝트: XayOn/tqueues
    async def put(self):
        """
        .. http:put:: /?queue={string:queue}

            Creates a queue if it does not exist.

        **Example request**:

        .. sourcecode:: http

            GET /?queue=foo
            Host: example.com
            Accept: application/json, text/javascript

        **Example response**:

        .. sourcecode:: http

            HTTP/1.1 200 OK
            Vary: Accept
            Content-Type: text/javascript

            ok

        :query queue: queue (table) to create
        :statuscode 200: This method always should return 200

        """
        opts = self.request.app['rethinkdb']
        conn = await r.connect(**opts)
        qname = self.request.GET['queue']
        with suppress(r.errors.ReqlOpFailedError):
            r.db(opts['db']).table_create(qname).run(conn)

        return web.Response(body=b'ok')
예제 #30
0
파일: rethink.py 프로젝트: 52nlp/WikiKB
def bulk_insert(ifile):
	bulk_size = 1000
	i = 0
	bulk_ins = []
	bulk = {}
	for line in ifile:
		bulk = {}
		if line[0] == '#' or len(line) < 10 or line[0] == '@':
			continue
		line = line[:len(line)-2].replace("<","").replace(">","").strip()
		line_arr = line.split("\t")
		print line_arr,i
		bulk["id"] = unicode(line_arr[0],errors="ignore")
		bulk["rel"] = unicode(line_arr[1],errors="ignore")
		bulk["id2"] = unicode(line_arr[2],errors="ignore")

		if i < bulk_size - 1:
			bulk_ins.append(bulk)
			i += 1
		elif i == bulk_size - 1:
			bulk_ins.append(bulk)
			r.db("yago").table("test").insert(bulk_ins).run(conn)
			i = 0


	if i < bulk_size - 1 and i > 0:
		bulk_ins.append(bulk)
		r.db("yago").table("test").insert(bulk_ins).run(conn)
예제 #31
0
 def test_reduce_1(self, conn):
     expected = 191
     result = r.db('d').table('nums').map(lambda doc: doc['points']).reduce(
         lambda elem, acc: elem + acc).run(conn)
     assertEqual(expected, result)
예제 #32
0
파일: Maker.py 프로젝트: adleonis/Mnet
def insert(tablename, thing, conn):
    res = r.db(DB).table(tablename).insert(thing).run(conn)
    return res
예제 #33
0
def jumbo_write_json(data, db_name, table_name, chunk_size=5000, silent=True):
    '''Write big JSON lists to RethinkDB.

    Essential for datasets that are larger than 100,000 docs (ReQL max write).
    Often necessary even for smaller ones.

    data [list]: a list of dicts in JSON format.
    db_name [str]: a RethinkDB database, existing or not.
    table_name [str]: a RethinkDB table, existing or not.
    chunk_size [int or float of form BASEeEXP]: input list will be broken into
        chunks of this size. If you encounter memory use issues, reduce this
        value.
    silent [bool]: if True, does not print reports.

    Must be connected to a RethinkDB instance before using this.'''

    if chunk_size > 1e5:
        raise (Exception('Maximum JSON chunk_size is 100,000.'))

    #determine list length, number of chunks, and remainder
    list_length = len(data)
    chunk_size = int(
        chunk_size
    )  #max array length for a ReQL write is 100k; but that uses too much mem
    nchunks = math.ceil(list_length / chunk_size)
    rem = list_length % chunk_size

    #create database if it doesn't already exist
    if db_name not in r.db_list().run():
        print('Creating database "' + db_name + '".')
        r.db_create(db_name).run()

    #create table if it doesn't already exist
    if table_name not in r.db(db_name).table_list().run():
        print('Creating table "' + table_name + '" in database "' \
            + db_name + '".')
        r.db(db_name).table_create(table_name).run()

    if silent == False:
        print('Writing list of ' + str(list_length) + ' trips to table "' \
            + table_name + '".')

    #digest data and write to RethinkDB
    for i in range(nchunks):
        s = i * chunk_size  #chunk_start

        if i == nchunks - 1 and rem != 0:
            e = s + rem + 1
        else:
            e = (i + 1) * chunk_size

        if silent == False:
            print('Writing trips ' + str(s) + '-' + str(e - 1) + '.')

        #write chunk to rethink (some data may be lost in case of power failure)
        r.db(db_name).table(table_name).insert(data[s:e]).run(
            durability='soft', noreply=False)

    if silent == False:
        ndocs = r.db(db_name).table(table_name).count().run()
        print('Table "' + table_name + '" now contains ' + str(ndocs) \
            + ' trips.')
예제 #34
0
        if value == 1:
            result = value
        elif value == 0:
            result = is_following(user, owner)

        if result == 1:
            collaboration_cache[user][owner] = True

        return result


con = rdb.connect()
db_name, table_name = 'member_events', 'year_2016'

db_ref = rdb.db(db_name).table(table_name)

if db_name not in rdb.db_list().run(con):
    rdb.db_create(db_name).run(con)

if table_name not in rdb.db(db_name).table_list().run(con):
    rdb.db(db_name).table_create(table_name).run(con)

for i in range(2, 7):
    print '2016, {0}'.format(i)

    with open('{0}.json'.format(i)) as f:
        events = json.load(f)
        events = events[0]

    entries = []
예제 #35
0
def run_vod_kpis(ucis, view_type):
    started_views = view_count(ucis)
    week_ucis = ucis.filter((dt_end - timedelta(days=6) < ucis.firstEvent)
                            & (ucis.firstEvent < dt_end + timedelta(days=1)))
    week_ago_ucis = ucis.filter((dt_end - timedelta(days=13) < ucis.firstEvent)
                                &
                                (ucis.firstEvent < dt_end - timedelta(days=6)))
    weekly_active_user = user_number(week_ucis)
    total_active_user = user_number(ucis)
    total_viewtime = total_viewing_time(ucis)
    user_viewtime = avg_user_viewtime(week_ucis)
    weekly_hibernation = user_hibernation(week_ucis, week_ago_ucis)
    top_program = top_programs_in_vod(ucis, 20)
    top_channel = normalize(top_tag_by_view_count(ucis, 'channelName'),
                            started_views)
    hour_of_day = normalize(view_count_by_hour_of_day(ucis), started_views)
    day_of_week = normalize(view_count_by_day_of_week(ucis), started_views)
    tag_user_package, user_package = users_package_overview(ucis)
    package_overview = {
        "{} user".format(view_type): tag_user_package,
        "linear TV user": user_package
    }
    res = [{
        "title": 'started-views',
        "id": 'started-views',
        "started-views": started_views
    }, {
        "title": 'weekly-active-user',
        "id": 'weekly-active-user',
        "weekly-active-user": weekly_active_user
    }, {
        "title": 'total-active-user',
        "id": 'total-active-user',
        "total-active-user": total_active_user
    }, {
        "title": 'total-viewing-time',
        "id": 'total-viewing-time',
        "total-viewing-time": total_viewtime
    }, {
        "title": 'viewing-time',
        "id": 'viewing-time',
        "viewing-time": user_viewtime
    }, {
        "title": 'user-hibernation',
        "id": 'user-hibernation',
        "user-hibernation": weekly_hibernation
    }, {
        "title": 'top-programs',
        "id": 'top-programs',
        "data": top_program
    }, {
        "title": 'top-provider',
        "id": 'top-provider',
        "data": top_channel
    }, {
        "title": 'hour-of-day',
        "id": 'hour-of-day',
        "data": hour_of_day
    }, {
        "title": 'day-of-week',
        "id": 'day-of-week',
        "data": day_of_week
    }, {
        "title": 'package-overview',
        "id": 'package-overview',
        "data": package_overview
    }]
    r.db('telenortv_insight_api').table(view_type).insert(
        res, conflict='replace').run()
예제 #36
0
 def test_order_by_bracket(self, conn):
     res = r.db('x').table('farms').order_by(lambda doc: doc['id']).map(
         lambda doc: doc['id']).run(conn)
     expected = [1, 2]
     assertEqual(expected, list(res))
예제 #37
0
 def user_leave(self, user, room):
     r.db(self.db).table(self.table).filter({
         'room': room,
         'room_user': user
     }).delete().run(self.conn)
예제 #38
0
 def add_user(self, user, room, color):
     r.db(self.db).table(self.table).insert({
         'room': room,
         'room_user': user,
         'color': color
     }).run(self.conn)
예제 #39
0
 def create_table(self, table):
     try:
         r.db(self.db).table_create(table).run(self.conn)
         print('table created')
     except:
         print('table exists')
예제 #40
0
파일: Maker.py 프로젝트: adleonis/Mnet
def read(DB, tablename, accountaddress, conn):
    cursor = r.db(DB).table(tablename).filter({
        'address': accountaddress
    }).pluck('address', 'balance').run(conn)
    for document in cursor:
        return document
예제 #41
0
import rethinkdb as r
import algos

c = r.connect()
cursor = r.db("themis").table("pages").limit(1).run(c)
data = []
for document in cursor:
    databaseId = document['id']
    print(databaseId)
    kmeansResult = algos.kmeans(str(document['content']).decode('unicode-escape'))
    r.db("themis").table("pages").get(databaseId).update({"cluster": kmeansResult}).run(c)
예제 #42
0
    # Returned Docopt arguments.
    docArgs = doc(__doc__, version="0.0.1")

    # Values from Docopt.
    noDB    = True if (int(docArgs["--nodb"]) == 1) else (False if (int(docArgs["--nodb"]) == 0) else None)
    online  = True if (int(docArgs["-o"    ]) == 1) else (False if (int(docArgs["-o"    ]) == 0) else None)

    #print(docArgs)

    dbA     = str(docArgs["--dba"][0])
    dbN     = str(docArgs["--dbn"][0])
    tOut    = int(docArgs["--tout"][0])

    app     = Flask(__name__)
    sIO     = sio(app)
    db      = r.db(dbN)

    if not noDB:
        c = database.conn(dbA);

    #print(db)
    #print(type(db))

    # Routings.
    @app.route("/")
    def index(): return render_template("index.html")

    @app.route("/api/client")
    def api_client():
        return DatabaseAPI(c, db, dbA, noDB, "client_name")
예제 #43
0
def tables():
    import rethinkdb as r
    r.connect(host=DB_HOST, auth_key=open(AUTH).read().strip(), timeout=20).repl()
    return r.db('smc').table_list().run()
예제 #44
0
    """
    Create the tables we are going to use
    """
    global connection, tables

    print "Creating databases/tables...",
    sys.stdout.flush()
    try:
        r.db_drop("test").run(connection)
    except r.errors.RqlRuntimeError, e:
        pass

    r.db_create("test").run(connection)

    for table in tables:
        r.db("test").table_create(table["name"]).run(connection)

    for table in tables:
        r.db("test").table(
            table["name"]).index_create("field0").run(connection)
        r.db("test").table(
            table["name"]).index_create("field1").run(connection)

    print " Done."
    sys.stdout.flush()


def execute_read_write_queries(suffix):
    """
    Execute all the queries (inserts/update, reads, delete)
    """
예제 #45
0
파일: test.py 프로젝트: bitsmike/rethinkdb
def tests():
    print r.expr(1).run(c)
    print r.expr("bob").run(c)
    print r.expr(True).run(c)
    print r.expr(False).run(c)
    print r.expr(3.12).run(c)
    print r.expr([1, 2, 3, 4, 5]).run(c)
    print r.expr({'a': 1, 'b': 2}).run(c)
    #print r.js('1 + 1').run(c)

    print(r.expr(1) == 2).run(c)  # false
    print(r.expr(1) != 2).run(c)  # true
    print(r.expr(1) < 2).run(c)  # true
    print(r.expr(1) <= 2).run(c)  # true
    print(r.expr(1) > 2).run(c)  # false
    print(r.expr(1) >= 2).run(c)  # false
    print(~r.expr(True)).run(c)  # false
    print(~r.expr(False)).run(c)  # true

    print(r.expr(1) + 2).run(c)  # 3
    print(r.expr(1) - 2).run(c)  # -1
    print(r.expr(1) * 2).run(c)  # 2
    print(r.expr(1) / 2).run(c)  # .5
    print(r.expr(12) % 10).run(c)  # 2

    print(((r.expr(12) / 6) * 4) - 3).run(c)  # 5

    arr = r.expr([1, 2, 3, 4])

    print arr.append(5).run(c)
    print arr[1].run(c)
    print arr[2].run(c)
    print arr[1:2].run(c)
    print arr[:2].run(c)
    print arr[2:].run(c)
    print arr.count().run(c)
    print arr.union(arr).run(c)
    print arr.union(arr).distinct().run(c)
    print arr.inner_join(arr, lambda a, b: a == b).run(c)
    print arr.outer_join(arr, lambda a, b: a == (b - 2)).run(c)

    #print r.expr([{'id':0, 'a':0}, {'id':1, 'a':0}]).eq_join([{'id':0, 'b':1}, {'id':1, 'b':1}], 'id').run(c)

    obj = r.expr({'a': 1, 'b': 2})

    print obj['a'].run(c)
    print obj.contains('a').run(c)
    print obj.pluck('a').run(c)
    print obj.without('a').run(c)
    print obj.merge({'c': 3}).run(c)

    print r.db_list().run(c)
    print r.db_create('bob').run(c)
    print r.db_create('test').run(c)
    print r.db_list().run(c)
    print r.db('test').table_list().run(c)
    print r.db('test').table_create('test').run(c)
    print r.db('test').table_create('bob').run(c)
    print r.db('test').table_list().run(c)
    print r.db('test').table_drop('bob').run(c)
    print r.db('test').table_list().run(c)

    test = r.db('test').table('test')

    print test.run(c)
    print test.insert({'id': 1, 'a': 2}).run(c)
    print test.insert({'id': 2, 'a': 3}).run(c)
    print test.insert({'id': 3, 'a': 4}).run(c)
    print test.run(c)
    print test.between(right_bound=2).run(c)

    print test.update(lambda row: {'a': row['a'] + 1}).run(c)
    print test.run(c)
    print test.replace(lambda row: {'id': row['id'], 'a': row['a'] + 1}).run(c)
    print test.run(c)
    print test.delete().run(c)
    print test.run(c)

    print r.expr(1).do(lambda a: a + 1).run(c)
    print r.expr(2).do(lambda a: {'b': a / a}).run(c)
    print r.expr([1, 2, 3]).map(lambda a: a + 1).run(c)
    print r.expr([1, 2, 3]).map(lambda a: a.do(lambda b: b + a)).run(c)
    print r.expr([1, 2, 3]).reduce(lambda a, b: a + b).run(c)
    print r.expr([1, 2, 3, 4]).filter(lambda a: a < 3).run(c)

    print r.expr([1, 2]).concat_map(lambda a: [a, a]).run(c)

    print r.branch(r.expr(1) < 2, "a", "b").run(c)
    print r.branch(r.expr(1) < 0, "a", "b").run(c)

    print(r.expr(True) & r.expr(False)).run(c)
    print(r.expr(True) | r.expr(False)).run(c)
    print(r.expr(True) & r.expr(True)).run(c)
    print(r.expr(False) | r.expr(False)).run(c)

    #print r.expr([1,2]).map(3).run(c)
    #print r.expr([1,2]).map(r.row + 3).run(c)
    print r.expr([{'id': 2}, {'id': 3}, {'id': 1}]).order_by('id').run(c)
    print r.expr([{
        'g': 0,
        'v': 1
    }, {
        'g': 0,
        'v': 2
    }, {
        'g': 1,
        'v': 1
    }, {
        'g': 1,
        'v': 2
    }]).grouped_map_reduce(lambda row: row['g'], lambda row: row['v'] + 1,
                           lambda a, b: a + b).run(c)

    #print r.expr([1,2]).for_each(lambda i: [test.insert({'id':i, 'a': i+1})]).run(c)
    print test.run(c)
예제 #46
0
def execute_read_write_queries(suffix):
    """
    Execute all the queries (inserts/update, reads, delete)
    """
    global results, connection, time_per_query, executions_per_query, constant_queries

    print "Running inserts...",
    sys.stdout.flush()
    for table in tables:
        docs = []
        num_writes = gen_num_docs(table["size_doc"])
        for i in xrange(num_writes):
            docs.append(gen_doc(table["size_doc"], i))

        i = 0

        durations = []
        start = time.time()
        while (time.time() - start < time_per_query) & (i < num_writes):
            start_query = time.time()
            result = r.db('test').table(table['name']).insert(
                docs[i]).run(connection)
            durations.append(time.time() - start_query)

            if "generated_keys" in result:
                table["ids"].append(result["generated_keys"][0])
            i += 1

        durations.sort()
        results["single-inserts-" + table["name"] + "-" + suffix] = {
            "average": (time.time() - start) / i,
            "min": durations[0],
            "max": durations[len(durations) - 1],
            "first_centile":
            durations[int(math.floor(len(durations) / 100. * 1))],
            "last_centile":
            durations[int(math.floor(len(durations) / 100. * 99))]
        }

        # Save it to know how many batch inserts we did
        single_inserts = i

        # Finish inserting the remaining data
        size_batch = 500
        durations = []
        start = time.time()
        count_batch_insert = 0
        if i < num_writes:
            while i + size_batch < num_writes:
                start_query = time.time()
                resutl = r.db('test').table(table['name']).insert(
                    docs[i:i + size_batch]).run(connection)
                durations.append(time.time() - start_query)
                end = time.time()
                count_batch_insert += 1

                table["ids"] += result["generated_keys"]
                i += size_batch

            if i < num_writes:
                result = r.db('test').table(table['name']).insert(
                    docs[i:len(docs)]).run(connection)
                table["ids"] += result["generated_keys"]

        if num_writes - single_inserts != 0:
            results["batch-inserts-" + table["name"] + "-" + suffix] = {
                "average": (end - start) / (count_batch_insert * size_batch),
                "min":
                durations[0],
                "max":
                durations[len(durations) - 1],
                "first_centile":
                durations[int(math.floor(len(durations) / 100. * 1))],
                "last_centile":
                durations[int(math.floor(len(durations) / 100. * 99))]
            }

        table["ids"].sort()

    print " Done."
    sys.stdout.flush()

    # Execute the insert queries
    print "Running update/replace...",
    sys.stdout.flush()
    for table in tables:
        for p in xrange(len(write_queries)):
            docs = []
            num_writes = gen_num_docs(table["size_doc"])
            for i in xrange(num_writes):
                docs.append(gen_doc(table["size_doc"], i))

            i = 0

            durations = []
            start = time.time()
            while (time.time() - start < time_per_query) & (i < len(
                    table["ids"])):
                start_query = time.time()
                eval(write_queries[p]["query"]).run(connection)
                durations.append(time.time() - start_query)
                i += 1

            durations.sort()
            results[write_queries[p]["tag"] + "-" + table["name"] + "-" +
                    suffix] = {
                        "average": (time.time() - start) / i,
                        "min":
                        durations[0],
                        "max":
                        durations[len(durations) - 1],
                        "first_centile":
                        durations[int(math.floor(len(durations) / 100. * 1))],
                        "last_centile":
                        durations[int(math.floor(len(durations) / 100. * 99))]
                    }

            i -= 1  # We need i in write_queries[p]["clean"] (to revert only the document we updated)
            # Clean the update
            eval(write_queries[p]["clean"]).run(connection)

    print " Done."
    sys.stdout.flush()

    # Execute the read queries on every tables
    print "Running reads...",
    sys.stdout.flush()
    for table in tables:
        for p in xrange(len(table_queries)):
            count = 0
            i = 0
            if "imax" in table_queries[p]:
                max_i = table_queries[p]["imax"] + 1
            else:
                max_i = 1

            durations = []
            start = time.time()
            while (time.time() - start <
                   time_per_query) & (count < executions_per_query):
                start_query = time.time()
                try:
                    cursor = eval(table_queries[p]["query"]).run(connection)
                    if isinstance(cursor, r.net.Cursor):
                        list(cursor)
                        cursor.close()

                    if i >= len(table["ids"]) - max_i:
                        i = 0
                    else:
                        i += 1
                except:
                    print "Query failed"
                    print constant_queries[p]
                    sys.stdout.flush()
                    break
                durations.append(time.time() - start_query)
                count += 1

            durations.sort()
            results[table_queries[p]["tag"] + "-" + table["name"] + "-" +
                    suffix] = {
                        "average": (time.time() - start) / count,
                        "min":
                        durations[0],
                        "max":
                        durations[len(durations) - 1],
                        "first_centile":
                        durations[int(math.floor(len(durations) / 100. * 1))],
                        "last_centile":
                        durations[int(math.floor(len(durations) / 100. * 99))]
                    }

    print " Done."
    sys.stdout.flush()

    # Execute the delete queries
    print "Running delete...",
    sys.stdout.flush()
    for table in tables:
        for p in xrange(len(delete_queries)):
            start = time.time()

            i = 0

            durations = []
            start = time.time()
            while (time.time() - start < time_per_query) & (i < len(
                    table["ids"])):
                start_query = time.time()
                eval(delete_queries[p]["query"]).run(connection)
                durations.append(time.time() - start_query)

                i += 1

            durations.sort()
            results[delete_queries[p]["tag"] + "-" + table["name"] + "-" +
                    suffix] = {
                        "average": (time.time() - start) / i,
                        "min":
                        durations[0],
                        "max":
                        durations[len(durations) - 1],
                        "first_centile":
                        durations[int(math.floor(len(durations) / 100. * 1))],
                        "last_centile":
                        durations[int(math.floor(len(durations) / 100. * 99))]
                    }

    print " Done."
    sys.stdout.flush()
예제 #47
0
파일: ipy.py 프로젝트: daemondev/tChat
def drop():
    """Delete all chats (truncate)"""
    r.db('chat').table('chats').delete().run(conn)
예제 #48
0
def jumbo_write_df(df,
                   db_name,
                   table_name,
                   df_chunk_size=5e5,
                   json_chunk_size=5e3,
                   verbosity=1):
    '''Write big pandas dataframes to RethinkDB.

    Essential for datasets that are larger than 100,000 rows (ReQL max write).
    Often necessary even for smaller ones.

    df [pandas DataFrame]: 'nuff said.
    db_name [str]: a RethinkDB database, existing or not.
    table_name [str]: a RethinkDB table, existing or not.
    df_chunk_size [int or float of form BASEeEXP]: input df will be broken into
        chunks of this many rows. If you encounter memory use issues, reduce
        this value first. Maximum accepted value is 1,000,000.
    json_chunk_size [int or float of form BASEeEXP]: input list passed to
        jumbo_write_json will be broken into chunks of this size. If you
        encounter memory use issues, reduce this value second. Maximum
        accepted value is 100,000 (ReQL write limit).
    verbosity [int]: determines the number of reports that will be printed.
        0 = no reports
        1 = reports from this function only
        2 = reports from this function and subroutine jumbo_write_json.

    Calls jumbo_write_json.
    Must be connected to a RethinkDB instance before using this.'''

    if df_chunk_size > 1e6:
        raise (Exception('Maximum df_chunk_size is 1,000,000.'))
    if json_chunk_size > 1e5:
        raise (Exception('Maximum json_chunk_size is 100,000. This size is \
            rarely a good idea.'))

    #set verbosity for jumbo_write_json
    sil = False if verbosity == 2 else True

    if verbosity > 0:
        print('Preparing ' + str(len(df)) + '-row DataFrame for database.')

    # json_list = []
    while len(df):  #runs as long as rows remain in the dataframe

        #take a chunk of the dataframe and convert to json list
        l = min(len(df), int(df_chunk_size)
                )  #get the first chunk_size lines, or all the rest if fewer
        chunk = df.iloc[0:l]  #subset them from the df
        df = df.drop(df.index[0:l])  #drop those lines
        json_list = chunk.to_dict('records')

        if verbosity > 0:
            print('Converting chunk of ' + str(l) + ' rows to JSON format.')

        # s_buf = io.StringIO() #create string buffer
        # chunk.to_csv(s_buf, index=False) #send chunk as csv to buffer
        # s_buf.seek(0) #reset buffer to first position
        # json_list = list(csv.DictReader(s_buf)) #read csv into json list
        # s_buf.close() #close string buffer

        #free up some memory
        del (chunk)
        gc.collect()  #remove all vars no longer referenced to free a bit more

        #open connection to null device for banishing unneeded outputs
        black_hole = open(os.devnull, 'w')
        # black_hole = [json_list[i].pop('', None) for i in range(len(json_list))]
        # black_hole = [json_list[i].pop('Unnamed: 0', None) for i in range(len(json_list))]

        #sort by hash.
        json_list = sorted(json_list, key=operator.itemgetter('hash'))

        #group json list by hash and remove hash from each reduction
        jl2 = []
        for hsh, red in itt.groupby(json_list,
                                    key=operator.itemgetter('hash')):
            red = list(red)
            black_hole = [red[i].pop('hash', None) for i in range(len(red))]
            jl2.append({'group': hsh, 'reduction': red})
        del (json_list)

        if verbosity > 0:
            print('Finished grouping chunk by hash. Passing list of length ' \
                + str(len(jl2)) + ' to jumbo_write_json.')

        #write list to rethink
        jumbo_write_json(data=jl2,
                         db_name=db_name,
                         table_name=table_name,
                         chunk_size=json_chunk_size,
                         silent=sil)
        del (jl2)

    if verbosity > 0:
        ndocs = r.db(db_name).table(table_name).count().run()
        print('Finished writing day of records. Wrote ' + str(ndocs) \
            + ' docs to table "' + table_name + '".')
예제 #49
0
파일: download.py 프로젝트: hgwu80/fauna
 def count_documents(self, table):
     '''
     return integer count of number of documents in table
     '''
     return r.db(self.database).table(table).count().run()
예제 #50
0
 def __init__(self):
     self.conn = r.connect(host="172.16.1.2",port=28015)
    # r.db_list().contains('Atlas').do(lambda databaseExists: r.branch(databaseExists, 0 ,r.db_create('Atlas'))).run(self.conn)
    # r.db('Atlas').table_create('DomainTable').run(self.conn)
     #r.db('Atlas').contains('DomainTable').do(lambda exists : r.branch( exists, 0,  r.db('Atlas').table_create('DomainTable'))).run(self.conn)
     self.table = r.db('Atlas').table("WordSearchCount")
예제 #51
0
def retrieve_records(
        api_key,
        sensor_path,
        db_name,
        end_date=(
            datetime.datetime.strptime(time.strftime('%Y-%m-%d'), '%Y-%m-%d') -
            datetime.timedelta(days=1)).strftime('%Y-%m-%d'),
        start_date=None,
        json_chunk_size=5e3,
        verbosity=1):
    '''Pull records from Acyclica's API and write to RethinkDB.

    api_key [str]: the 41-character alphanumeric key you were given by Acyclica.
        Should be read in from an environment variable, encrypted if possible.
    sensor_path [str]: the path to Acyclica_sensors_CBD.csv
        (should be fetched automatically once we package this thing).
    db_name [str]: the name of the RethinkDB database that will be populated.
    end_date [str]: a date string of the form 'YYYY-MM-DD' specifying the last
        day of data to pull from Acyclica. Defaults to yesterday.
    start_date [str]: a date string of the form 'YYYY-MM-DD' specifying the first
        day of data to fetch from Acyclica. Defaults to None, which means only
        end_date will be fetched. Set this to 'prev_week' to fetch the full week
        starting 8 days ago and ending yesterday.
    json_chunk_size [int or float of form BASEeEXP]: lists passed to
        jumbo_write_json will be broken into chunks of this size. No need to
        modify unless you encounter memory use issues, in which case you should
        first try reducing the default value of 5,000.
    verbosity [int]: determines the number of reports that will be printed.
        0 = no reports
        1 = reports from this function only
        2 = more reports from this function and from subroutine
            jumbo_write_json.

    Calls jumbo_write_df, which calls jumbo_write_json.
    Must be connected to a RethinkDB instance before using this.

    Pull at minimum 1 day and at maximum 1 week of data in increments of 1
    day.'''

    #start timing
    start_time = time.time()

    #check for size limit errors
    # if df_chunk_size > 1e6:
    #     raise(Exception('Maximum df_chunk_size is 1,000,000.'))
    if json_chunk_size > 1e5:
        raise (Exception('Maximum json_chunk_size is 100,000. This size is \
            rarely a good idea.'))

    #check for end_date format error
    try:
        nul = datetime.datetime.strptime(end_date, '%Y-%m-%d')
    except:
        raise (Exception('end_date must be of the form "YYYY-MM-DD".'))

    #set appropriate start dates based on input
    if start_date == 'prev_week':
        start_date = (datetime.datetime.strptime(end_date, '%Y-%m-%d') -
                      datetime.timedelta(days=6)).strftime('%Y-%m-%d')
    elif start_date is None:
        start_date = end_date
    else:
        pass

    #check for start_date format error
    try:
        nul = datetime.datetime.strptime(start_date, '%Y-%m-%d')
    except:
        raise (Exception('start_date must be of the form "YYYY-MM-DD".'))

    #add 23 h, 59 m, and 59 s to the end date (to grab the whole day)
    end_date = datetime.datetime.strptime(end_date,
                                          '%Y-%m-%d') + datetime.timedelta(
                                              hours=23, minutes=59, seconds=59)

    #convert datetime objects to unix time
    start_unix = int(
        time.mktime(
            datetime.datetime.strptime(start_date, '%Y-%m-%d').timetuple()))
    end_unix = int(time.mktime(end_date.timetuple()))

    #make sure the user isn't trying to grab more than a week of data, and that
    #end is after start
    if end_unix - start_unix > 604800:
        raise (Exception(
            'Please specify a range of dates no greater than one week.'))
    if end_unix - start_unix < 0:
        raise (Exception('end_date must be later than start date.'))

    #determine how many days have been selected
    dif = end_unix - start_unix
    ndays = math.ceil(dif / (24 * 3600))

    #get sensor data
    sensors = pd.read_csv(sensor_path)
    # sensors = sensors.drop(['name', 'short_name','latitude','longitude'], axis=1)
    sensors.columns = ['IntersectionID', 'sensor']
    sensor_list = list(sensors['sensor'])

    if verbosity > 0:
        print('Preparing to acquire data for ' + str(ndays) + ' day(s) and ' \
            + str(len(sensor_list)) + ' sensors.')

    #create database if it doesn't already exist
    if db_name not in r.db_list().run():
        r.db_create(db_name).run()

    #request and process one day at a time (roughly 5-10m records acquired per day)
    day_start_unix = start_unix
    for day in range(ndays):

        print('Acquiring records for day ' + str(day + 1) + ' of ' \
            + str(ndays) + '. May take several minutes.')

        #date string will be the table name on RethinkDB
        tname = datetime.datetime.fromtimestamp(
            int(day_start_unix)).strftime('%Y_%m_%d')
        if tname in r.db(db_name).table_list().run():
            print('Table "' + tname + '" already exists in database "' \
                + db_name + '". Skipping this day.')
            day_start_unix = day_start_unix + (24 * 3600)  #increment day
            continue
        else:
            r.db(db_name).table_create(tname).run()

        #get endpoints for each iteration and (re)instantiate dataframe
        day_end_unix = day_start_unix + (23 * 3600) + 3599
        df = pd.DataFrame(
            columns=['Timestamp', 'MAC Hash', 'Strength', 'Serial'])

        #request and preprocess each sensor separately
        for i in range(len(sensor_list)):

            # sensorID = sensor_list[1]
            URL = "https://cr.acyclica.com/datastream/device/csv/time/" \
                + api_key + "/" + str(sensor_list[i]) + "/" \
                + str(day_start_unix) + "/" + str(day_end_unix)

            #get raw web content and read into a dataframe
            items = requests.get(URL).content
            newdf = pd.read_csv(
                io.StringIO(items.decode('utf-8')),
                usecols=['Timestamp', 'MAC Hash', 'Strength', 'Serial'])

            #round timestamp to nearest second
            newdf['Timestamp'] = newdf['Timestamp'].round().astype('int')

            #drop repeated reads within 1s, keeping read with highest strength
            strmaxes = newdf.groupby(['Timestamp',
                                      'MAC Hash'])['Serial'].transform(max)
            newdf = newdf[newdf['Serial'] == strmaxes]

            #append to main dataframe
            df = df.append(newdf, ignore_index=True)

            if verbosity == 2:
                if i + 1 in [15, 30, 45]:
                    print('Got data for ' + str(i + 1) + ' of ' \
                        + str(len(sensor_list)) \
                        + ' sensors. So far there are ' + str(len(df)) \
                        + ' reads for day ' + str(day + 1) + '.')

        del (newdf)

        #drop repeated reads again, keeping read with highest strength
        strmaxes = df.groupby(['Timestamp',
                               'MAC Hash'])['Serial'].transform(max)
        df = df[df['Serial'] == strmaxes]

        pre_filt_len = str(len(df))
        if verbosity > 0:
            print('Found ' + pre_filt_len + ' sensor reads for day ' \
                + str(day + 1) + '. Cleaning those now.')

        json_list = df_to_json_etc(df, verbosity, pre_filt_len, sensors)

        if verbosity > 0:
            print('Converted DataFrame to JSON list and grouped by hash. ' \
                + 'Passing list of length ' + str(len(json_list)) \
                + ' to jumbo_write_json.')

        #set verbosity for jumbo_write_json
        sil = False if verbosity == 2 else True

        jumbo_write_json(data=json_list,
                         db_name=db_name,
                         table_name=tname,
                         chunk_size=json_chunk_size,
                         silent=sil)

        #increment day
        day_start_unix = day_start_unix + (24 * 3600)

    if verbosity > 0:
        run_time = round((time.time() - start_time) / 60, 2)
        print('Finished writing all records for ' + str(ndays) + ' day(s) ' \
            + 'in ' + str(run_time) + ' minutes.\nRecords are in database "' \
            + db_name + '".')
예제 #52
0
파일: Maker.py 프로젝트: adleonis/Mnet
def create_table(name, conn):
    res = r.db(DB).table_create(name).run(conn)
예제 #53
0
def clear_current(sample_id, conn):
    r.db("samplesdb").table("sample2attribute_set")\
                     .get_all(sample_id, index="sample_id")\
                     .update({"current": False})\
                     .run(conn)
예제 #54
0
parameters = yaml.load(parameter_file)

print "Connecting database ..."
rethink = r.connect(parameters['rethinkdb_server']['host'],
                    parameters['rethinkdb_server']['port']).repl()
rethink_db = parameters['rethinkdb_server']['database']
url_queue_table = parameters['rethinkdb_server']['tables']['url_queue']
raw_result_table = parameters['rethinkdb_server']['tables']['raw_result']
indexed_result_table = parameters['rethinkdb_server']['tables'][
    'indexed_result']
# Init database
db_list = r.db_list().run(rethink)
if rethink_db not in db_list:
    print "Init database ..."
    r.db_create(rethink_db).run(rethink)
    r.db(rethink_db).table_create(url_queue_table).run(rethink)
    r.db(rethink_db).table(url_queue_table).index_create('ts').run(rethink)
    r.db(rethink_db).table_create(raw_result_table).run(rethink)
    r.db(rethink_db).table_create(indexed_result_table).run(rethink)

rethink.use(rethink_db)


def main(argv):
    # Main code here
    print "I'm manager :)"

    if len(argv) > 1:
        seed_url = argv[1]

        r.table(url_queue_table).insert({
예제 #55
0
 def __init__(self):
     r.connect(settings['RETHINKDB_SERVER'],
               settings['RETHINKDB_PORT']).repl()
     self.db = r.db(settings['RETHINKDB_DB']).table(
         settings['RETHINKDB_TABLE'])
예제 #56
0
#!/usr/bin/env python
# coding: utf-8
import rethinkdb as r
r.connect ('localhost', 28015).repl()
watchcount = r.db('polltime').table('votes').get_all('b0aae840-f52e-4bdd-abcd-74789f52c6bd', index='choice').count().run()
dontwatchcount = r.db('polltime').table('votes').get_all('a966c7b3-9277-4c09-9254-8806762bbea0', index='choice').count().run()
watchint = int(watchcount)
dontwatchint = int(dontwatchcount)
file = open("tmp/finalcountq4.txt","w")
if watchint > dontwatchint:
	file.write("TRY")
else:
	file.write("GIVE")
file.close()
예제 #57
0
# along with BigBlueTutor.  If not, see <http://www.gnu.org/licenses/>.
#Prints the contents of all the tables in a RethinkDB database
#You can also pass the names of specific tables as command-line arguments to print only those tables
#Users' messages are excluded from printing

import rethinkdb as r
import dotenv
import os
import json
import sys

dotenv.load_dotenv("./.env")

r.connect(os.environ.get("DB_HOST"), int(os.environ.get("DB_PORT"))).repl()
tableList = []
if (len(sys.argv) > 1):
    tableList = sys.argv[1:len(sys.argv)]
else:
    tableList = r.db("deepstream").table_list().run()

print("Table list:")
print(tableList)
print()

for table in tableList:
    print("Table name: " + table)
    table = list(r.db("deepstream").table(table).run())
    #.without("messages").run())
    print(json.dumps(table, indent=1, sort_keys=True))
    print()
예제 #58
0
 def test_simple(self, conn):
     res = r.db('x').table('farms').map(lambda doc: doc['animals'][0]).run(
         conn)
     assertEqual(set(['frog', 'horse']), set(list(res)))
예제 #59
0
 def test_filter_by_bracket(self, conn):
     res = r.db('x').table('farms').filter(lambda doc: doc['id'] < 2).run(
         conn)
     expected = [1]
     results = [doc['id'] for doc in res]
     assertEqual(expected, results)
예제 #60
0
 def test_set_intersection(self, conn):
     expected = [set(['x', 'y']), set(['x'])]
     result = r.db('z').table('t').map(
         lambda doc: doc['simple'].set_intersection(['x', 'y'])).run(conn)
     result = map(lambda d: set(d), result)
     assertEqUnordered(expected, result)