コード例 #1
0
ファイル: digestor.py プロジェクト: antsankov/cufcq-new
def has_mode(db, conn, model, field, mode_table="Fcq"):
    model_id = "{0}_id".format(model).lower()
    mode_query = (
        r.db(db)
        .table(mode_table)
        .group(model_id)
        .ungroup()
        .for_each(
            lambda doc: r.db(db)
            .table(model)
            .get(doc["group"])
            .update(
                {
                    field: doc["reduction"]
                    .group(field)
                    .count()
                    .ungroup()
                    .order_by("reduction")
                    .nth(-1)
                    .default({"group": None})["group"]
                }
            )
        )
        .run(conn, array_limit=200000)
    )
    logging.info(mode_query)
コード例 #2
0
ファイル: registration.py プロジェクト: AntiPaste/ilmo
def create_registration(event_id, custom_fields):
    # Validate custom fields by comparing them to the event fields
    cursor = rethink.db(config['database']['name']).table(
        'events'
    ).get(event_id).get_field('fields').run(database.connection)

    fields = list(cursor)  # TODO: len(fields) == 0 -> invalid event ID error?

    custom_fields = __sanitize_registration(fields, custom_fields)
    invalid_fields = __validate_registration(fields, custom_fields)
    if invalid_fields:
        return None, RegistrationValidateException(
            'Invalid fields', invalid_fields
        )

    response = rethink.db(config['database']['name']).table(
        'registrations'
    ).insert({
        'event_id': event_id,
        'custom_fields': custom_fields
    }).run(database.connection)

    if response['inserted'] != 1:
        return None, RegistrationInsertException()

    # returns the inserted ID
    return response['generated_keys'][0], None
コード例 #3
0
ファイル: registration.py プロジェクト: AntiPaste/ilmo
def update_registration(registration_id, new_registration):
    registration, err = get_registration(registration_id)
    if not registration:
        return None, RegistrationNotFoundException()
    event_id = registration.get('event_id')

    # Validate custom fields by comparing them to the event fields
    cursor = rethink.db(config['database']['name']).table(
        'events'
    ).get(event_id).get_field('fields').run(database.connection)

    fields = list(cursor)

    custom_fields = __sanitize_registration(
        fields, registration.get('custom_fields')
    )
    invalid_fields = __validate_registration(fields, custom_fields)
    if invalid_fields:
        return None, RegistrationValidateException(
            'Invalid fields', invalid_fields
        )

    response = rethink.db(config['database']['name']).table(
        'registrations'
    ).get(
        registration_id
    ).update(
        new_registration
    ).run(database.connection)

    if response['errors'] != 0:
        return None, RegistrationUpdateException()

    return registration_id, None
コード例 #4
0
ファイル: store.py プロジェクト: onelab-eu/myslice-mooc
def setup():
    tables = [
        {
            'name' : 'testbeds',
            'pkey' : 'id'
        },
        {
            'name' : 'resources',
            'pkey' : 'hostname'
        }
    ]

    c = connect()

    try:
        r.db_create(Config.rethinkdb["db"]).run(c)
        logger.info('MyOps2 database created successfully')
    except RqlRuntimeError:
        logger.info('MyOps2 database already exists')

    for t in tables:
        try:
            r.db(Config.rethinkdb["db"]).table_create(t['name'], primary_key=t['pkey']).run(c)
            logger.info('MyOps2 table %s setup completed', t['name'])
        except RqlRuntimeError:
            logger.info('MyOps2 table %s already exists', t['name'])

    c.close()
コード例 #5
0
ファイル: tix.py プロジェクト: phonyphonecall/open-lambda
def init(conn, event):
    # try to drop table (may or may not exist)
    rv = ''
    try:
        r.db_drop(TIX).run(conn)
        rv = 'dropped, then created'
    except:
        rv = 'created'
    r.db_create(TIX).run(conn)
    r.db(TIX).table_create(VENU).run(conn)
    r.db(TIX).table(VENU).index_create(TS).run(conn)

    smap = {}
    umap = {}
    for x in range(1, CNT + 1):
        smap[str(x)] = 'free' 
        umap[str(x)] = ''

    rv += str(r.db(TIX).table(VENU).insert({
        ID: 0,
        SMAP: smap,
        UMAP: umap,
        MAX: CNT,
        TS: time.time()
    }).run(conn))

    return rv
コード例 #6
0
ファイル: upload_project.py プロジェクト: DrXyzzy/smc
def upload_project(project_id):
    """
    Upload the bup backup of this project to the gcloud bucket.
    """
    path = path_to_project(project_id)

    run("sudo chmod a+r -R %s"%path)

    log('path: ', project_id)
    bup = os.path.join(path, 'bup')
    if not os.path.exists(bup):
        raise RuntimeError("no bup directory to upload -- done")
    target = os.path.join('gs://{bucket}/projects/{project_id}.zfs/bup'.format(
            bucket=GCLOUD_BUCKET, project_id=project_id))

    log('upload: rsync new pack files')
    run(['gsutil', '-m', 'rsync', '-x', '.*\.bloom|.*\.midx', '-r',
         '{bup}/objects/'.format(bup=bup),
         '{target}/objects/'.format(target=target)])
    log('gsutil upload refs/logs')
    for path in ['refs', 'logs']:
        run(['gsutil', '-m', 'rsync', '-c', '-r',
             '{bup}/{path}/'.format(bup=bup, path=path),
             '{target}/{path}/'.format(target=target, path=path)])

    #auth_key = open(RETHINKDB_SECRET).read().strip()
    conn = rethinkdb.connect(host=DB_HOST, timeout=10)#, auth_key=auth_key)
    timestamp = datetime.datetime.fromtimestamp(time.time()).strftime(TIMESTAMP_FORMAT)
    rethinkdb.db('smc').table('projects').get(project_id).update(
        {'last_backup_to_gcloud':timestamp_to_rethinkdb(timestamp)}).run(conn)
コード例 #7
0
ファイル: test_things.py プロジェクト: scivey/mockthink
 def test_multi_join(self, conn):
     query = r.db('x').table('employees').eq_join(
         'person', r.db('x').table('people')
     ).map(
         lambda d: d['left'].merge({'person': d['right']['name']})
     ).eq_join(
         'job', r.db('x').table('jobs')
     ).map(
         lambda d: d['left'].merge({'job': d['right']['name']})
     )
     expected = [
         {
             'id': 'joe-employee-id',
             'person': 'joe',
             'job': 'Lawyer'
         },
         {
             'id': 'tim-employee-id',
             'person': 'tim',
             'job': 'Nurse'
         },
         {
             'id': 'bob-employee-id',
             'person': 'bob',
             'job': 'Assistant'
         },
         {
             'id': 'todd-employee-id',
             'person': 'todd',
             'job': 'Lawyer'
         }
     ]
     assertEqUnordered(expected, list(query.run(conn)))
コード例 #8
0
ファイル: views.py プロジェクト: omarlopez/appRestaurant
def step1():

    response = {}
    conn = r.connect(host=current_app.config['RETHINKDB_HOST'])

    users = json.loads(request.data)
    users = {
        'name': users['name'],
        'user': users['user'],
        'email': users['email'],
        'password': users['password'],
        'ubication': [],
        'sale': []
    }
    
    check_user = r.db('food').table('user_register').filter({'email': users['email']}).run(conn)
    check_user = list(check_user)
    if len(check_user) > 0:
        
        response['success'] = 200
        response['message'] = u'El usuario ya existe'
        response['code'] = 1

    else:    
     
        insert = r.db(current_app.config['DATABASE']).table('user_register').insert(users).run(conn)
        response['success'] = 200
        response['message'] = u'Usuario registrado'
        response['code'] = 0

    pprint.pprint(response)
    return jsonify(response)
コード例 #9
0
def main():
    # connect rethinkdb
    rethinkdb.connect("localhost", 28015, "mysql")
    try:
        rethinkdb.db_drop("mysql").run()
    except:
        pass
    rethinkdb.db_create("mysql").run()

    tables = ["dept_emp", "dept_manager", "titles",
              "salaries", "employees", "departments"]
    for table in tables:
        rethinkdb.db("mysql").table_create(table).run()

    stream = BinLogStreamReader(
        connection_settings=MYSQL_SETTINGS,
        blocking=True,
        only_events=[DeleteRowsEvent, WriteRowsEvent, UpdateRowsEvent],
    )

    # process Feed
    for binlogevent in stream:
        if not isinstance(binlogevent, WriteRowsEvent):
            continue

        for row in binlogevent.rows:
            if not binlogevent.schema == "employees":
                continue

            vals = {}
            vals = {str(k): str(v) for k, v in row["values"].iteritems()}
            rethinkdb.table(binlogevent.table).insert(vals).run()

    stream.close()
コード例 #10
0
ファイル: test.py プロジェクト: isidorn/test2
def go():
    with except_printer():
        r.connect(host="localhost", port="123abc")
    with except_printer():
        r.expr({'err': r.error('bob')}).run(c)
    with except_printer():
        r.expr([1,2,3, r.error('bob')]).run(c)
    with except_printer():
        (((r.expr(1) + 1) - 8) * r.error('bob')).run(c)
    with except_printer():
        r.expr([1,2,3]).append(r.error('bob')).run(c)
    with except_printer():
        r.expr([1,2,3, r.error('bob')])[1:].run(c)
    with except_printer():
        r.expr({'a':r.error('bob')})['a'].run(c)
    with except_printer():
        r.db('test').table('test').filter(lambda a: a.contains(r.error('bob'))).run(c)
    with except_printer():
        r.expr(1).do(lambda x: r.error('bob')).run(c)
    with except_printer():
        r.expr(1).do(lambda x: x + r.error('bob')).run(c)
    with except_printer():
        r.branch(r.db('test').table('test').get(0)['a'].contains(r.error('bob')), r.expr(1), r.expr(2)).run(c)
    with except_printer():
        r.expr([1,2]).reduce(lambda a,b: a + r.error("bob")).run(c)
コード例 #11
0
ファイル: parser.py プロジェクト: zljiljana/TaggerNews
def setDictionary():
	dict = {}
	#print "getting top stories from hacker-news"
	result = firebase.get('/v0/topstories', None)
	# result = result[:200]
	for itemid in result:
		try:
			data = firebase.get('/v0/item/' + str(itemid), None)
			if (data['type'] == 'story'):
				# get tags
				url = data['url']
				(to_insert, tags) = selectTags(itemid)
				# store to temp db
				r.db("tagger_db").table("id2html").insert({"id": itemid, "tag_string": to_insert}).run(connection)
				if len(tags) > 1:
					title = data['title']
					score = str(data['score'])
					usr = data['by']
					comments = str(data['descendants'])
					myString = "<tr class='athing'><td align=\"right\" valign=\"top\" class=\"title\"><span class=\"rank\"> </span></td><td><center><a id=\"up_10287983\"><div class=\"votearrow\" title=\"upvote\"></div></a></center></td><td class=\"title\"><span class=\"deadmark\"></span><a href=\"" + url + "\">" + title + "</a>" + to_insert + "</td><td><center><a id=\"up_10287983\"><div class=\"votearrow\" title=\"upvote\"></div></a></center></td></tr><tr><td colspan=\"2\"></td><td class=\"subtext\"><span class=\"score\">" + score + " points</span> by <a>" + usr + "</a> | <a>" + comments +" comments</a></td></tr><tr class=\"spacer\" style=\"height:5px\"></tr>"
					print "tags: ", tags[0], tags[1]
					add(tags[0], myString, dict)
					add(tags[1], myString, dict)
		except KeyError:
			pass
	# r.db("test").table("tag_dict").delete().run(connection)
	r.db("tagger_db").table("tag2html").insert(dict).run(connection)
コード例 #12
0
 def __init__(self, count):
     self.con = r.connect("localhost", 28015).repl()
     tables = r.db("test").table_list().run(self.con)
     if "items" in tables:
         r.db("test").table_drop("items").run(self.con)
     r.db("test").table_create("items").run(self.con)
     self.count = count
コード例 #13
0
ファイル: views.py プロジェクト: sundarrinsa/longclaw
def sync_facebook(name):
    #import ipdb; ipdb.set_trace();
    try:
        form_data = json.loads(request.data)
    except:
        return response_msg('error', 'data not correct')

    try:
        graph = GraphAPI(form_data['access_token'])
        try:
            # #import ipdb; ipdb.set_trace();
            email = graph.get_object('me', fields='email')['email']
            pic = graph.get_object('me/picture', width='400', height='400')['url']
            print pic
            if email != form_data['fb_email']:
                return response_msg('error', 'incorrect facebook email')
        except:
            return response_msg('error', 'data not complete')
    except:
        return response_msg('error', 'invalid access token')

    try:
        connection = get_rdb_conn()
        cursor = rdb.db(TODO_DB).table('user').filter(
            rdb.row['username'] == name
            ).update({'fb_email': email, 'pic': pic}
            ).run(connection)
        cursor = rdb.db(TODO_DB).table('user').filter(
            rdb.row['username'] == name
            ).run(connection)
    except:
        return response_msg('error', 'Could not connect to db')

    return response_msg('success', 'OK', data=cursor.items[0])
コード例 #14
0
ファイル: views.py プロジェクト: sundarrinsa/longclaw
def sync_ratings():
    try:
        connection = get_rdb_conn()
        cursor = rdb.db(TODO_DB).table('user').run(connection)
    except:
        return response_msg('error', 'could not connect to db')
    for user in cursor.items:
        ratings = rating(user['cfhandle'], user['cchandle'], user['colg_rating'])
        ratings = json.loads(ratings[0])
        colg_rating = 0
        try:
            colg_rating = colg_rating + 20 * ((ratings['cf_rating']/100)**2)
            colg_rating = colg_rating + 2000 + 7 * (((ratings['lrating']/1000)**2) + (ratings['lrating']/20))
            colg_rating = colg_rating + 2000 + 5 * (((ratings['srating']/100)**2) + (ratings['srating']/20))
        except:
            pass
        print colg_rating
        try:
            cursor = rdb.db(TODO_DB).table('user').filter(
                rdb.row['username'] == user['username']
                ).update({
                'lrating': ratings['lrating'],
                'srating': ratings['srating'],
                'cfrating': ratings['cf_rating'],
                'colg_rating': colg_rating/3,
                }).run(connection)
            print user['username']
        except:
            print 'error' + user['username']

    return response_msg('sucess', 'OK')
コード例 #15
0
ファイル: basemodel.py プロジェクト: sungbae/scq
 def remove_pending_user(self, user_id, row_id, user_pending_name=None):
     """
     removes a user id to a model's pending list.
     """
     if user_id is None:
         logging.error("user_id cannot be None")
         return False
     if row_id is None:
         logging.error("row_id cannot be None")
         return False
     row_table = self.__class__.__name__
     user_table = 'User'
     user_data = r.db(self.DB).table(user_table).get(user_id).run(self.conn)
     row_data = r.db(self.DB).table(row_table).get(row_id).run(self.conn)
     if user_data is None:
         logging.error("User {0} does not exist".format(user_data))
         return False
     if row_data is None:
         logging.error("{0} {1} does not exist".format(row_table, row_data))
         return False
     if user_pending_name is not None:
         user_pending = user_data.get(user_pending_name, [])
         try:
             user_pending.remove(row_id)
         except ValueError:
             logging.warn("row_id {0} not in user {1}".format(row_id, user_pending_name))
             pass
         r.db(self.DB).table(user_table).get(user_id).update({user_pending_name: user_pending}).run(self.conn)
     penders = row_data['penders']
     try:
         penders.remove(user_id)
     except ValueError:
         pass
     return r.db(self.DB).table(row_table).get(row_id).update({'penders': penders}).run(self.conn)
コード例 #16
0
    def save(self):
        try:
            r.db_create(self.db).run(self.bigchain.conn)
        except r.ReqlOpFailedError:
            pass

        try:
            r.db(self.db).table_create('accounts').run(self.bigchain.conn)
        except r.ReqlOpFailedError:
            pass

        user_exists = list(r.db(self.db)
                           .table('accounts')
                           .filter(lambda user: (user['name'] == self.name)
                                                & (user['ledger']['id'] == self.ledger['id']))
                           .run(self.bigchain.conn))
        if not len(user_exists):
            r.db(self.db)\
                .table('accounts')\
                .insert(self.as_dict(), durability='hard')\
                .run(self.bigchain.conn)
        else:
            user_persistent = user_exists[0]
            self.vk = user_persistent['vk']
            self.sk = user_persistent['sk']
コード例 #17
0
ファイル: pyinsert_rethink.py プロジェクト: 52nlp/WikiKB
def insert_r(conn,table,sent,rel,val):
	bulk = {}
	if isinstance(rel["e1"],unicode):
		bulk["e1"] = rel["e1"]
	else:
		bulk["e1"] = unicode(rel["e1"],errors="ignore")

	if isinstance(rel["rel"],unicode):
		bulk["rel"] = rel["rel"]
	else:
		bulk["rel"] = unicode(rel["rel"],errors="ignore")

	if isinstance(rel["e2"],unicode):
		bulk["e2"] = rel["e2"]
	else:
		bulk["e2"] = unicode(rel["e2"],errors="ignore")

	if isinstance(sent,unicode):
		bulk["sent"] = sent
	else:
		bulk["sent"] = unicode(sent,errors="ignore")

	bulk["cfval"] = val
		

	r.db("wikikb").table(table).insert(bulk).run(conn)
コード例 #18
0
ファイル: models.py プロジェクト: jskrzypek/leaderboard
	def get_table():
		try:
			r.db(dbname).table_create('boards').run(_get_conn())
		except r.RqlRuntimeError:
			# already created
			pass
		return r.db(dbname).table('boards')
コード例 #19
0
def import_from_queue(progress, conn, task_queue, error_queue, replace_conflicts, durability, write_count):
    if progress[0] is not None and not replace_conflicts:
        # We were interrupted and it's not ok to overwrite rows, check that the batch either:
        # a) does not exist on the server
        # b) is exactly the same on the server
        task = progress[0]
        pkey = r.db(task[0]).table(task[1]).info().run(conn)["primary_key"]
        for i in reversed(range(len(task[2]))):
            obj = pickle.loads(task[2][i])
            if pkey not in obj:
                raise RuntimeError("Connection error while importing.  Current row has no specified primary key, so cannot guarantee absence of duplicates")
            row = r.db(task[0]).table(task[1]).get(obj[pkey]).run(conn)
            if row == obj:
                write_count[0] += 1
                del task[2][i]
            else:
                raise RuntimeError("Duplicate primary key `%s`:\n%s\n%s" % (pkey, str(obj), str(row)))

    task = task_queue.get() if progress[0] is None else progress[0]
    while not isinstance(task, StopIteration):
        try:
            # Unpickle objects (TODO: super inefficient, would be nice if we could pass down json)
            objs = [pickle.loads(obj) for obj in task[2]]
            conflict_action = 'replace' if replace_conflicts else 'error'
            res = r.db(task[0]).table(task[1]).insert(objs, durability=durability, conflict=conflict_action).run(conn)
        except:
            progress[0] = task
            raise

        if res["errors"] > 0:
            raise RuntimeError("Error when importing into table '%s.%s': %s" %
                               (task[0], task[1], res["first_error"]))

        write_count[0] += len(objs)
        task = task_queue.get()
コード例 #20
0
ファイル: basemodel.py プロジェクト: cdsalazar/cufcq-new
 def subscribe_user(self, user_id, row_id, user_subscription_name=None):
     """
     adds a user id to a model's subscription list.
     """
     row_table = self.__class__.__name__
     user_table = 'User'
     user_data = r.db(self.DB).table(user_table).get(user_id).run(self.conn)
     row_data = r.db(self.DB).table(row_table).get(row_id).run(self.conn)
     if user_data is None:
         logging.error("User {0} does not exist".format(user_data))
         return False
     if user_data is None:
         logging.error("{0} {1} does not exist".format(table, row_data))
         return False
     try:
         if user_subscription_name is not None:
             user_subscription = user_data[user_subscription_name]
             user_subscription.append(row_id)
             r.db(self.DB).table(user_table).get(user_id).update({user_subscription_name: user_subscription}).run(self.conn)
     except KeyError:
         logging.error("user subscription {0} not known in user data".format(user_subscription_name))
         return False
     subscribers = row_data['subscribers']
     subscribers.append(user_id)
     return r.db(self.DB).table(row_table).get(row_id).update({'subscribers': subscribers}).run(self.conn)
コード例 #21
0
    def save(db_host, db_port, db_name, db_table, data):

        if not isinstance(db_host, str):
            raise TypeError("Invalid database host name argument type. Can't create Cache Walker instance.")

        if not isinstance(db_port, int):
            raise TypeError("Invalid database port argument type. Can't create Cache Walker instance.")

        if not isinstance(db_name, str):
            raise TypeError("Invalid database name argument type. Can't create Cache Walker instance.")

        if not isinstance(db_table, str):
            raise TypeError("Invalid database table name argument type. Can't create Cache Walker instance.")

        try:
            connection = r.connect(db_host, db_port)

        except Exception as e:
            logger.debug("Can't connect to the database.")
            raise e

        try:
            r.db(db_name).table(db_table).insert(data).run(connection)

        except Exception as e:
            logger.debug("Can't insert data into the database.")
            raise e
コード例 #22
0
ファイル: setup_db.py プロジェクト: luiscape/hio-setup
def LoadTestData(file, db, conn, v = False):
  '''Loading test data into the database.'''

  ## Loading data.
  data_dir = os.path.split(dir)[0]
  path = os.path.join(data_dir, 'tests', 'data', file)
  print path
  try:
    with open(path) as csv_file:
      data = csv.DictReader(csv_file)
      test_data = []
      for row in data:
        test_data.append(row)

  except Exception as e:
    print "Couldn't load test data."
    return False


  ## Storing in db.
  try:
    # Checking for existing records.
    n = r.db(db['name']).table('values').count().run(conn)
    if n > 0:
      if v:
        print "Data already in db. Deleting ..."
      r.db(db['name']).table('values').delete().run(conn)

    r.db(db['name']).table('values').insert(test_data).run(conn)
    return True

  except Exception as e:
    print "Could not insert data into database."
    return False
コード例 #23
0
ファイル: corpus.py プロジェクト: nkman/Raiden
 def create_table(self):
   try:
     r.db('Raiden').table_create(self.corpus_table).run(self.connection)
     print 'Created table [Raiden.'+self.corpus_table+']'
   except Exception, e:
     print 'Error occured during '+self.corpus_table+' table creation! Maybe it already exists!'
     print str(e)
コード例 #24
0
ファイル: _import.py プロジェクト: isidorn/test2
def table_reader(options, file_info, task_queue, error_queue, exit_event):
    try:
        db = file_info["db"]
        table = file_info["table"]
        primary_key = file_info["info"]["primary_key"]
        conn = r.connect(options["host"], options["port"], auth_key=options["auth_key"])

        if table not in r.db(db).table_list().run(conn):
            r.db(db).table_create(table, primary_key=primary_key).run(conn)

        if file_info["format"] == "json":
            json_reader(task_queue,
                        file_info["file"],
                        db, table,
                        primary_key,
                        options["fields"],
                        exit_event)
        elif file_info["format"] == "csv":
            csv_reader(task_queue,
                       file_info["file"],
                       db, table,
                       primary_key,
                       options,
                       exit_event)
        else:
            raise RuntimeError("unknown file format specified")
    except (r.RqlClientError, r.RqlDriverError, r.RqlRuntimeError) as ex:
        error_queue.put((RuntimeError, RuntimeError(ex.message), traceback.extract_tb(sys.exc_info()[2])))
    except InterruptedError:
        pass # Don't save interrupted errors, they are side-effects
    except:
        ex_type, ex_class, tb = sys.exc_info()
        error_queue.put((ex_type, ex_class, traceback.extract_tb(tb), file_info["file"]))
コード例 #25
0
ファイル: banzaidb.py プロジェクト: m-emerson/BanzaiDB
def init_database_with_default_tables(args):
    """
    Create a new RethinkDB database and initialise (default) tables

    :param args: an argparse argument (force)
    """
    # Add additional (default) tables here...
    def_tables = ['determined_variants', 'strains_under_investigation',
                  'references', 'reference_features', 'strain_features']
    with database.make_connection() as connection:
        try:
            r.db_create(connection.db).run(connection)
            for atable in def_tables:
                r.db(connection.db).table_create(atable).run(connection)
        except RqlRuntimeError:
            print ("Database %s already exists. Use '--force' option to "
                   "reinitialise the database." % (connection.db))
            if args.force:
                print "Reinitialising %s" % (connection.db)
                r.db_drop(connection.db).run(connection)
                r.db_create(connection.db).run(connection)
                for atable in def_tables:
                    r.db(connection.db).table_create(atable).run(connection)
            else:
                sys.exit(1)
        print ("Initalised database %s. %s contains the following tables: "
               "%s" % (connection.db, connection.db, ', '.join(def_tables)))
コード例 #26
0
ファイル: _export.py プロジェクト: JorgeRios/blogember
def get_tables(host, port, auth_key, tables):
    try:
        conn = r.connect(host, port, auth_key=auth_key)
    except r.RqlDriverError as ex:
        raise RuntimeError(ex.message)

    dbs = r.db_list().run(conn)
    res = []

    if len(tables) == 0:
        tables = [[db] for db in dbs]

    for db_table in tables:
        if db_table[0] not in dbs:
            raise RuntimeError("Error: Database '%s' not found" % db_table[0])

        if len(db_table) == 1: # This is just a db name
            res.extend([(db_table[0], table) for table in r.db(db_table[0]).table_list().run(conn)])
        else: # This is db and table name
            if db_table[1] not in r.db(db_table[0]).table_list().run(conn):
                raise RuntimeError("Error: Table not found: '%s.%s'" % tuple(db_table))
            res.append(tuple(db_table))

    # Remove duplicates by making results a set
    return set(res)
コード例 #27
0
ファイル: db.py プロジェクト: IAmUser4574/zeromq-ros
    def create(self):
        conn = self.connect()

        db_list = r.db_list().run(conn)

        db_created = False
        table_created = False

        if not self.db_name in db_list:
            r.db_create(self.db_name).run(conn)
            db_created = True

        table_list = r.db(self.db_name).table_list().run(conn)

        if not self.config_table_name in table_list:
            r.db(self.db_name).table_create(
                self.config_table_name, primary_key=self.primary_key
            ).run(conn)

            r.db(self.db_name).table(self.config_table_name)\
                .index_create(self.secondary_index).run(conn)

            table_created = True

        return {"db": db_created, "table": table_created}
コード例 #28
0
ファイル: _export.py プロジェクト: HiroIshikawa/21playground
def read_table_into_queue(progress, conn, db, table, pkey, task_queue, progress_info, exit_event):
    read_rows = 0
    if progress[0] is None:
        cursor = r.db(db).table(table).order_by(index=pkey).run(conn, time_format="raw", binary_format='raw')
    else:
        cursor = r.db(db).table(table).between(progress[0], None, left_bound="open").order_by(index=pkey).run(conn, time_format="raw", binary_format='raw')

    try:
        for row in cursor:
            if exit_event.is_set():
                break
            task_queue.put([row])

            # Set progress so we can continue from this point if a connection error occurs
            progress[0] = row[pkey]

            # Update the progress every 20 rows - to reduce locking overhead
            read_rows += 1
            if read_rows % 20 == 0:
                progress_info[0].value += 20
    finally:
        progress_info[0].value += read_rows % 20

    # Export is done - since we used estimates earlier, update the actual table size
    progress_info[1].value = progress_info[0].value
コード例 #29
0
ファイル: dispatcher.py プロジェクト: XayOn/tqueues
    async def put(self):
        """
        .. http:put:: /?queue={string:queue}

            Creates a queue if it does not exist.

        **Example request**:

        .. sourcecode:: http

            GET /?queue=foo
            Host: example.com
            Accept: application/json, text/javascript

        **Example response**:

        .. sourcecode:: http

            HTTP/1.1 200 OK
            Vary: Accept
            Content-Type: text/javascript

            ok

        :query queue: queue (table) to create
        :statuscode 200: This method always should return 200

        """
        opts = self.request.app['rethinkdb']
        conn = await r.connect(**opts)
        qname = self.request.GET['queue']
        with suppress(r.errors.ReqlOpFailedError):
            r.db(opts['db']).table_create(qname).run(conn)

        return web.Response(body=b'ok')
コード例 #30
0
ファイル: rethink.py プロジェクト: 52nlp/WikiKB
def bulk_insert(ifile):
	bulk_size = 1000
	i = 0
	bulk_ins = []
	bulk = {}
	for line in ifile:
		bulk = {}
		if line[0] == '#' or len(line) < 10 or line[0] == '@':
			continue
		line = line[:len(line)-2].replace("<","").replace(">","").strip()
		line_arr = line.split("\t")
		print line_arr,i
		bulk["id"] = unicode(line_arr[0],errors="ignore")
		bulk["rel"] = unicode(line_arr[1],errors="ignore")
		bulk["id2"] = unicode(line_arr[2],errors="ignore")

		if i < bulk_size - 1:
			bulk_ins.append(bulk)
			i += 1
		elif i == bulk_size - 1:
			bulk_ins.append(bulk)
			r.db("yago").table("test").insert(bulk_ins).run(conn)
			i = 0


	if i < bulk_size - 1 and i > 0:
		bulk_ins.append(bulk)
		r.db("yago").table("test").insert(bulk_ins).run(conn)
コード例 #31
0
ファイル: test_misc.py プロジェクト: techdragon/mockthink
 def test_reduce_1(self, conn):
     expected = 191
     result = r.db('d').table('nums').map(lambda doc: doc['points']).reduce(
         lambda elem, acc: elem + acc).run(conn)
     assertEqual(expected, result)
コード例 #32
0
ファイル: Maker.py プロジェクト: adleonis/Mnet
def insert(tablename, thing, conn):
    res = r.db(DB).table(tablename).insert(thing).run(conn)
    return res
コード例 #33
0
def jumbo_write_json(data, db_name, table_name, chunk_size=5000, silent=True):
    '''Write big JSON lists to RethinkDB.

    Essential for datasets that are larger than 100,000 docs (ReQL max write).
    Often necessary even for smaller ones.

    data [list]: a list of dicts in JSON format.
    db_name [str]: a RethinkDB database, existing or not.
    table_name [str]: a RethinkDB table, existing or not.
    chunk_size [int or float of form BASEeEXP]: input list will be broken into
        chunks of this size. If you encounter memory use issues, reduce this
        value.
    silent [bool]: if True, does not print reports.

    Must be connected to a RethinkDB instance before using this.'''

    if chunk_size > 1e5:
        raise (Exception('Maximum JSON chunk_size is 100,000.'))

    #determine list length, number of chunks, and remainder
    list_length = len(data)
    chunk_size = int(
        chunk_size
    )  #max array length for a ReQL write is 100k; but that uses too much mem
    nchunks = math.ceil(list_length / chunk_size)
    rem = list_length % chunk_size

    #create database if it doesn't already exist
    if db_name not in r.db_list().run():
        print('Creating database "' + db_name + '".')
        r.db_create(db_name).run()

    #create table if it doesn't already exist
    if table_name not in r.db(db_name).table_list().run():
        print('Creating table "' + table_name + '" in database "' \
            + db_name + '".')
        r.db(db_name).table_create(table_name).run()

    if silent == False:
        print('Writing list of ' + str(list_length) + ' trips to table "' \
            + table_name + '".')

    #digest data and write to RethinkDB
    for i in range(nchunks):
        s = i * chunk_size  #chunk_start

        if i == nchunks - 1 and rem != 0:
            e = s + rem + 1
        else:
            e = (i + 1) * chunk_size

        if silent == False:
            print('Writing trips ' + str(s) + '-' + str(e - 1) + '.')

        #write chunk to rethink (some data may be lost in case of power failure)
        r.db(db_name).table(table_name).insert(data[s:e]).run(
            durability='soft', noreply=False)

    if silent == False:
        ndocs = r.db(db_name).table(table_name).count().run()
        print('Table "' + table_name + '" now contains ' + str(ndocs) \
            + ' trips.')
コード例 #34
0
        if value == 1:
            result = value
        elif value == 0:
            result = is_following(user, owner)

        if result == 1:
            collaboration_cache[user][owner] = True

        return result


con = rdb.connect()
db_name, table_name = 'member_events', 'year_2016'

db_ref = rdb.db(db_name).table(table_name)

if db_name not in rdb.db_list().run(con):
    rdb.db_create(db_name).run(con)

if table_name not in rdb.db(db_name).table_list().run(con):
    rdb.db(db_name).table_create(table_name).run(con)

for i in range(2, 7):
    print '2016, {0}'.format(i)

    with open('{0}.json'.format(i)) as f:
        events = json.load(f)
        events = events[0]

    entries = []
コード例 #35
0
def run_vod_kpis(ucis, view_type):
    started_views = view_count(ucis)
    week_ucis = ucis.filter((dt_end - timedelta(days=6) < ucis.firstEvent)
                            & (ucis.firstEvent < dt_end + timedelta(days=1)))
    week_ago_ucis = ucis.filter((dt_end - timedelta(days=13) < ucis.firstEvent)
                                &
                                (ucis.firstEvent < dt_end - timedelta(days=6)))
    weekly_active_user = user_number(week_ucis)
    total_active_user = user_number(ucis)
    total_viewtime = total_viewing_time(ucis)
    user_viewtime = avg_user_viewtime(week_ucis)
    weekly_hibernation = user_hibernation(week_ucis, week_ago_ucis)
    top_program = top_programs_in_vod(ucis, 20)
    top_channel = normalize(top_tag_by_view_count(ucis, 'channelName'),
                            started_views)
    hour_of_day = normalize(view_count_by_hour_of_day(ucis), started_views)
    day_of_week = normalize(view_count_by_day_of_week(ucis), started_views)
    tag_user_package, user_package = users_package_overview(ucis)
    package_overview = {
        "{} user".format(view_type): tag_user_package,
        "linear TV user": user_package
    }
    res = [{
        "title": 'started-views',
        "id": 'started-views',
        "started-views": started_views
    }, {
        "title": 'weekly-active-user',
        "id": 'weekly-active-user',
        "weekly-active-user": weekly_active_user
    }, {
        "title": 'total-active-user',
        "id": 'total-active-user',
        "total-active-user": total_active_user
    }, {
        "title": 'total-viewing-time',
        "id": 'total-viewing-time',
        "total-viewing-time": total_viewtime
    }, {
        "title": 'viewing-time',
        "id": 'viewing-time',
        "viewing-time": user_viewtime
    }, {
        "title": 'user-hibernation',
        "id": 'user-hibernation',
        "user-hibernation": weekly_hibernation
    }, {
        "title": 'top-programs',
        "id": 'top-programs',
        "data": top_program
    }, {
        "title": 'top-provider',
        "id": 'top-provider',
        "data": top_channel
    }, {
        "title": 'hour-of-day',
        "id": 'hour-of-day',
        "data": hour_of_day
    }, {
        "title": 'day-of-week',
        "id": 'day-of-week',
        "data": day_of_week
    }, {
        "title": 'package-overview',
        "id": 'package-overview',
        "data": package_overview
    }]
    r.db('telenortv_insight_api').table(view_type).insert(
        res, conflict='replace').run()
コード例 #36
0
 def test_order_by_bracket(self, conn):
     res = r.db('x').table('farms').order_by(lambda doc: doc['id']).map(
         lambda doc: doc['id']).run(conn)
     expected = [1, 2]
     assertEqual(expected, list(res))
コード例 #37
0
 def user_leave(self, user, room):
     r.db(self.db).table(self.table).filter({
         'room': room,
         'room_user': user
     }).delete().run(self.conn)
コード例 #38
0
 def add_user(self, user, room, color):
     r.db(self.db).table(self.table).insert({
         'room': room,
         'room_user': user,
         'color': color
     }).run(self.conn)
コード例 #39
0
 def create_table(self, table):
     try:
         r.db(self.db).table_create(table).run(self.conn)
         print('table created')
     except:
         print('table exists')
コード例 #40
0
ファイル: Maker.py プロジェクト: adleonis/Mnet
def read(DB, tablename, accountaddress, conn):
    cursor = r.db(DB).table(tablename).filter({
        'address': accountaddress
    }).pluck('address', 'balance').run(conn)
    for document in cursor:
        return document
コード例 #41
0
import rethinkdb as r
import algos

c = r.connect()
cursor = r.db("themis").table("pages").limit(1).run(c)
data = []
for document in cursor:
    databaseId = document['id']
    print(databaseId)
    kmeansResult = algos.kmeans(str(document['content']).decode('unicode-escape'))
    r.db("themis").table("pages").get(databaseId).update({"cluster": kmeansResult}).run(c)
コード例 #42
0
    # Returned Docopt arguments.
    docArgs = doc(__doc__, version="0.0.1")

    # Values from Docopt.
    noDB    = True if (int(docArgs["--nodb"]) == 1) else (False if (int(docArgs["--nodb"]) == 0) else None)
    online  = True if (int(docArgs["-o"    ]) == 1) else (False if (int(docArgs["-o"    ]) == 0) else None)

    #print(docArgs)

    dbA     = str(docArgs["--dba"][0])
    dbN     = str(docArgs["--dbn"][0])
    tOut    = int(docArgs["--tout"][0])

    app     = Flask(__name__)
    sIO     = sio(app)
    db      = r.db(dbN)

    if not noDB:
        c = database.conn(dbA);

    #print(db)
    #print(type(db))

    # Routings.
    @app.route("/")
    def index(): return render_template("index.html")

    @app.route("/api/client")
    def api_client():
        return DatabaseAPI(c, db, dbA, noDB, "client_name")
コード例 #43
0
ファイル: rethinkdb_backup.py プロジェクト: tornaria/cocalc
def tables():
    import rethinkdb as r
    r.connect(host=DB_HOST, auth_key=open(AUTH).read().strip(), timeout=20).repl()
    return r.db('smc').table_list().run()
コード例 #44
0
    """
    Create the tables we are going to use
    """
    global connection, tables

    print "Creating databases/tables...",
    sys.stdout.flush()
    try:
        r.db_drop("test").run(connection)
    except r.errors.RqlRuntimeError, e:
        pass

    r.db_create("test").run(connection)

    for table in tables:
        r.db("test").table_create(table["name"]).run(connection)

    for table in tables:
        r.db("test").table(
            table["name"]).index_create("field0").run(connection)
        r.db("test").table(
            table["name"]).index_create("field1").run(connection)

    print " Done."
    sys.stdout.flush()


def execute_read_write_queries(suffix):
    """
    Execute all the queries (inserts/update, reads, delete)
    """
コード例 #45
0
ファイル: test.py プロジェクト: bitsmike/rethinkdb
def tests():
    print r.expr(1).run(c)
    print r.expr("bob").run(c)
    print r.expr(True).run(c)
    print r.expr(False).run(c)
    print r.expr(3.12).run(c)
    print r.expr([1, 2, 3, 4, 5]).run(c)
    print r.expr({'a': 1, 'b': 2}).run(c)
    #print r.js('1 + 1').run(c)

    print(r.expr(1) == 2).run(c)  # false
    print(r.expr(1) != 2).run(c)  # true
    print(r.expr(1) < 2).run(c)  # true
    print(r.expr(1) <= 2).run(c)  # true
    print(r.expr(1) > 2).run(c)  # false
    print(r.expr(1) >= 2).run(c)  # false
    print(~r.expr(True)).run(c)  # false
    print(~r.expr(False)).run(c)  # true

    print(r.expr(1) + 2).run(c)  # 3
    print(r.expr(1) - 2).run(c)  # -1
    print(r.expr(1) * 2).run(c)  # 2
    print(r.expr(1) / 2).run(c)  # .5
    print(r.expr(12) % 10).run(c)  # 2

    print(((r.expr(12) / 6) * 4) - 3).run(c)  # 5

    arr = r.expr([1, 2, 3, 4])

    print arr.append(5).run(c)
    print arr[1].run(c)
    print arr[2].run(c)
    print arr[1:2].run(c)
    print arr[:2].run(c)
    print arr[2:].run(c)
    print arr.count().run(c)
    print arr.union(arr).run(c)
    print arr.union(arr).distinct().run(c)
    print arr.inner_join(arr, lambda a, b: a == b).run(c)
    print arr.outer_join(arr, lambda a, b: a == (b - 2)).run(c)

    #print r.expr([{'id':0, 'a':0}, {'id':1, 'a':0}]).eq_join([{'id':0, 'b':1}, {'id':1, 'b':1}], 'id').run(c)

    obj = r.expr({'a': 1, 'b': 2})

    print obj['a'].run(c)
    print obj.contains('a').run(c)
    print obj.pluck('a').run(c)
    print obj.without('a').run(c)
    print obj.merge({'c': 3}).run(c)

    print r.db_list().run(c)
    print r.db_create('bob').run(c)
    print r.db_create('test').run(c)
    print r.db_list().run(c)
    print r.db('test').table_list().run(c)
    print r.db('test').table_create('test').run(c)
    print r.db('test').table_create('bob').run(c)
    print r.db('test').table_list().run(c)
    print r.db('test').table_drop('bob').run(c)
    print r.db('test').table_list().run(c)

    test = r.db('test').table('test')

    print test.run(c)
    print test.insert({'id': 1, 'a': 2}).run(c)
    print test.insert({'id': 2, 'a': 3}).run(c)
    print test.insert({'id': 3, 'a': 4}).run(c)
    print test.run(c)
    print test.between(right_bound=2).run(c)

    print test.update(lambda row: {'a': row['a'] + 1}).run(c)
    print test.run(c)
    print test.replace(lambda row: {'id': row['id'], 'a': row['a'] + 1}).run(c)
    print test.run(c)
    print test.delete().run(c)
    print test.run(c)

    print r.expr(1).do(lambda a: a + 1).run(c)
    print r.expr(2).do(lambda a: {'b': a / a}).run(c)
    print r.expr([1, 2, 3]).map(lambda a: a + 1).run(c)
    print r.expr([1, 2, 3]).map(lambda a: a.do(lambda b: b + a)).run(c)
    print r.expr([1, 2, 3]).reduce(lambda a, b: a + b).run(c)
    print r.expr([1, 2, 3, 4]).filter(lambda a: a < 3).run(c)

    print r.expr([1, 2]).concat_map(lambda a: [a, a]).run(c)

    print r.branch(r.expr(1) < 2, "a", "b").run(c)
    print r.branch(r.expr(1) < 0, "a", "b").run(c)

    print(r.expr(True) & r.expr(False)).run(c)
    print(r.expr(True) | r.expr(False)).run(c)
    print(r.expr(True) & r.expr(True)).run(c)
    print(r.expr(False) | r.expr(False)).run(c)

    #print r.expr([1,2]).map(3).run(c)
    #print r.expr([1,2]).map(r.row + 3).run(c)
    print r.expr([{'id': 2}, {'id': 3}, {'id': 1}]).order_by('id').run(c)
    print r.expr([{
        'g': 0,
        'v': 1
    }, {
        'g': 0,
        'v': 2
    }, {
        'g': 1,
        'v': 1
    }, {
        'g': 1,
        'v': 2
    }]).grouped_map_reduce(lambda row: row['g'], lambda row: row['v'] + 1,
                           lambda a, b: a + b).run(c)

    #print r.expr([1,2]).for_each(lambda i: [test.insert({'id':i, 'a': i+1})]).run(c)
    print test.run(c)
コード例 #46
0
def execute_read_write_queries(suffix):
    """
    Execute all the queries (inserts/update, reads, delete)
    """
    global results, connection, time_per_query, executions_per_query, constant_queries

    print "Running inserts...",
    sys.stdout.flush()
    for table in tables:
        docs = []
        num_writes = gen_num_docs(table["size_doc"])
        for i in xrange(num_writes):
            docs.append(gen_doc(table["size_doc"], i))

        i = 0

        durations = []
        start = time.time()
        while (time.time() - start < time_per_query) & (i < num_writes):
            start_query = time.time()
            result = r.db('test').table(table['name']).insert(
                docs[i]).run(connection)
            durations.append(time.time() - start_query)

            if "generated_keys" in result:
                table["ids"].append(result["generated_keys"][0])
            i += 1

        durations.sort()
        results["single-inserts-" + table["name"] + "-" + suffix] = {
            "average": (time.time() - start) / i,
            "min": durations[0],
            "max": durations[len(durations) - 1],
            "first_centile":
            durations[int(math.floor(len(durations) / 100. * 1))],
            "last_centile":
            durations[int(math.floor(len(durations) / 100. * 99))]
        }

        # Save it to know how many batch inserts we did
        single_inserts = i

        # Finish inserting the remaining data
        size_batch = 500
        durations = []
        start = time.time()
        count_batch_insert = 0
        if i < num_writes:
            while i + size_batch < num_writes:
                start_query = time.time()
                resutl = r.db('test').table(table['name']).insert(
                    docs[i:i + size_batch]).run(connection)
                durations.append(time.time() - start_query)
                end = time.time()
                count_batch_insert += 1

                table["ids"] += result["generated_keys"]
                i += size_batch

            if i < num_writes:
                result = r.db('test').table(table['name']).insert(
                    docs[i:len(docs)]).run(connection)
                table["ids"] += result["generated_keys"]

        if num_writes - single_inserts != 0:
            results["batch-inserts-" + table["name"] + "-" + suffix] = {
                "average": (end - start) / (count_batch_insert * size_batch),
                "min":
                durations[0],
                "max":
                durations[len(durations) - 1],
                "first_centile":
                durations[int(math.floor(len(durations) / 100. * 1))],
                "last_centile":
                durations[int(math.floor(len(durations) / 100. * 99))]
            }

        table["ids"].sort()

    print " Done."
    sys.stdout.flush()

    # Execute the insert queries
    print "Running update/replace...",
    sys.stdout.flush()
    for table in tables:
        for p in xrange(len(write_queries)):
            docs = []
            num_writes = gen_num_docs(table["size_doc"])
            for i in xrange(num_writes):
                docs.append(gen_doc(table["size_doc"], i))

            i = 0

            durations = []
            start = time.time()
            while (time.time() - start < time_per_query) & (i < len(
                    table["ids"])):
                start_query = time.time()
                eval(write_queries[p]["query"]).run(connection)
                durations.append(time.time() - start_query)
                i += 1

            durations.sort()
            results[write_queries[p]["tag"] + "-" + table["name"] + "-" +
                    suffix] = {
                        "average": (time.time() - start) / i,
                        "min":
                        durations[0],
                        "max":
                        durations[len(durations) - 1],
                        "first_centile":
                        durations[int(math.floor(len(durations) / 100. * 1))],
                        "last_centile":
                        durations[int(math.floor(len(durations) / 100. * 99))]
                    }

            i -= 1  # We need i in write_queries[p]["clean"] (to revert only the document we updated)
            # Clean the update
            eval(write_queries[p]["clean"]).run(connection)

    print " Done."
    sys.stdout.flush()

    # Execute the read queries on every tables
    print "Running reads...",
    sys.stdout.flush()
    for table in tables:
        for p in xrange(len(table_queries)):
            count = 0
            i = 0
            if "imax" in table_queries[p]:
                max_i = table_queries[p]["imax"] + 1
            else:
                max_i = 1

            durations = []
            start = time.time()
            while (time.time() - start <
                   time_per_query) & (count < executions_per_query):
                start_query = time.time()
                try:
                    cursor = eval(table_queries[p]["query"]).run(connection)
                    if isinstance(cursor, r.net.Cursor):
                        list(cursor)
                        cursor.close()

                    if i >= len(table["ids"]) - max_i:
                        i = 0
                    else:
                        i += 1
                except:
                    print "Query failed"
                    print constant_queries[p]
                    sys.stdout.flush()
                    break
                durations.append(time.time() - start_query)
                count += 1

            durations.sort()
            results[table_queries[p]["tag"] + "-" + table["name"] + "-" +
                    suffix] = {
                        "average": (time.time() - start) / count,
                        "min":
                        durations[0],
                        "max":
                        durations[len(durations) - 1],
                        "first_centile":
                        durations[int(math.floor(len(durations) / 100. * 1))],
                        "last_centile":
                        durations[int(math.floor(len(durations) / 100. * 99))]
                    }

    print " Done."
    sys.stdout.flush()

    # Execute the delete queries
    print "Running delete...",
    sys.stdout.flush()
    for table in tables:
        for p in xrange(len(delete_queries)):
            start = time.time()

            i = 0

            durations = []
            start = time.time()
            while (time.time() - start < time_per_query) & (i < len(
                    table["ids"])):
                start_query = time.time()
                eval(delete_queries[p]["query"]).run(connection)
                durations.append(time.time() - start_query)

                i += 1

            durations.sort()
            results[delete_queries[p]["tag"] + "-" + table["name"] + "-" +
                    suffix] = {
                        "average": (time.time() - start) / i,
                        "min":
                        durations[0],
                        "max":
                        durations[len(durations) - 1],
                        "first_centile":
                        durations[int(math.floor(len(durations) / 100. * 1))],
                        "last_centile":
                        durations[int(math.floor(len(durations) / 100. * 99))]
                    }

    print " Done."
    sys.stdout.flush()
コード例 #47
0
ファイル: ipy.py プロジェクト: daemondev/tChat
def drop():
    """Delete all chats (truncate)"""
    r.db('chat').table('chats').delete().run(conn)
コード例 #48
0
def jumbo_write_df(df,
                   db_name,
                   table_name,
                   df_chunk_size=5e5,
                   json_chunk_size=5e3,
                   verbosity=1):
    '''Write big pandas dataframes to RethinkDB.

    Essential for datasets that are larger than 100,000 rows (ReQL max write).
    Often necessary even for smaller ones.

    df [pandas DataFrame]: 'nuff said.
    db_name [str]: a RethinkDB database, existing or not.
    table_name [str]: a RethinkDB table, existing or not.
    df_chunk_size [int or float of form BASEeEXP]: input df will be broken into
        chunks of this many rows. If you encounter memory use issues, reduce
        this value first. Maximum accepted value is 1,000,000.
    json_chunk_size [int or float of form BASEeEXP]: input list passed to
        jumbo_write_json will be broken into chunks of this size. If you
        encounter memory use issues, reduce this value second. Maximum
        accepted value is 100,000 (ReQL write limit).
    verbosity [int]: determines the number of reports that will be printed.
        0 = no reports
        1 = reports from this function only
        2 = reports from this function and subroutine jumbo_write_json.

    Calls jumbo_write_json.
    Must be connected to a RethinkDB instance before using this.'''

    if df_chunk_size > 1e6:
        raise (Exception('Maximum df_chunk_size is 1,000,000.'))
    if json_chunk_size > 1e5:
        raise (Exception('Maximum json_chunk_size is 100,000. This size is \
            rarely a good idea.'))

    #set verbosity for jumbo_write_json
    sil = False if verbosity == 2 else True

    if verbosity > 0:
        print('Preparing ' + str(len(df)) + '-row DataFrame for database.')

    # json_list = []
    while len(df):  #runs as long as rows remain in the dataframe

        #take a chunk of the dataframe and convert to json list
        l = min(len(df), int(df_chunk_size)
                )  #get the first chunk_size lines, or all the rest if fewer
        chunk = df.iloc[0:l]  #subset them from the df
        df = df.drop(df.index[0:l])  #drop those lines
        json_list = chunk.to_dict('records')

        if verbosity > 0:
            print('Converting chunk of ' + str(l) + ' rows to JSON format.')

        # s_buf = io.StringIO() #create string buffer
        # chunk.to_csv(s_buf, index=False) #send chunk as csv to buffer
        # s_buf.seek(0) #reset buffer to first position
        # json_list = list(csv.DictReader(s_buf)) #read csv into json list
        # s_buf.close() #close string buffer

        #free up some memory
        del (chunk)
        gc.collect()  #remove all vars no longer referenced to free a bit more

        #open connection to null device for banishing unneeded outputs
        black_hole = open(os.devnull, 'w')
        # black_hole = [json_list[i].pop('', None) for i in range(len(json_list))]
        # black_hole = [json_list[i].pop('Unnamed: 0', None) for i in range(len(json_list))]

        #sort by hash.
        json_list = sorted(json_list, key=operator.itemgetter('hash'))

        #group json list by hash and remove hash from each reduction
        jl2 = []
        for hsh, red in itt.groupby(json_list,
                                    key=operator.itemgetter('hash')):
            red = list(red)
            black_hole = [red[i].pop('hash', None) for i in range(len(red))]
            jl2.append({'group': hsh, 'reduction': red})
        del (json_list)

        if verbosity > 0:
            print('Finished grouping chunk by hash. Passing list of length ' \
                + str(len(jl2)) + ' to jumbo_write_json.')

        #write list to rethink
        jumbo_write_json(data=jl2,
                         db_name=db_name,
                         table_name=table_name,
                         chunk_size=json_chunk_size,
                         silent=sil)
        del (jl2)

    if verbosity > 0:
        ndocs = r.db(db_name).table(table_name).count().run()
        print('Finished writing day of records. Wrote ' + str(ndocs) \
            + ' docs to table "' + table_name + '".')
コード例 #49
0
ファイル: download.py プロジェクト: hgwu80/fauna
 def count_documents(self, table):
     '''
     return integer count of number of documents in table
     '''
     return r.db(self.database).table(table).count().run()
コード例 #50
0
ファイル: dbOperation.py プロジェクト: sec-u/Machine-Learning
 def __init__(self):
     self.conn = r.connect(host="172.16.1.2",port=28015)
    # r.db_list().contains('Atlas').do(lambda databaseExists: r.branch(databaseExists, 0 ,r.db_create('Atlas'))).run(self.conn)
    # r.db('Atlas').table_create('DomainTable').run(self.conn)
     #r.db('Atlas').contains('DomainTable').do(lambda exists : r.branch( exists, 0,  r.db('Atlas').table_create('DomainTable'))).run(self.conn)
     self.table = r.db('Atlas').table("WordSearchCount")
コード例 #51
0
def retrieve_records(
        api_key,
        sensor_path,
        db_name,
        end_date=(
            datetime.datetime.strptime(time.strftime('%Y-%m-%d'), '%Y-%m-%d') -
            datetime.timedelta(days=1)).strftime('%Y-%m-%d'),
        start_date=None,
        json_chunk_size=5e3,
        verbosity=1):
    '''Pull records from Acyclica's API and write to RethinkDB.

    api_key [str]: the 41-character alphanumeric key you were given by Acyclica.
        Should be read in from an environment variable, encrypted if possible.
    sensor_path [str]: the path to Acyclica_sensors_CBD.csv
        (should be fetched automatically once we package this thing).
    db_name [str]: the name of the RethinkDB database that will be populated.
    end_date [str]: a date string of the form 'YYYY-MM-DD' specifying the last
        day of data to pull from Acyclica. Defaults to yesterday.
    start_date [str]: a date string of the form 'YYYY-MM-DD' specifying the first
        day of data to fetch from Acyclica. Defaults to None, which means only
        end_date will be fetched. Set this to 'prev_week' to fetch the full week
        starting 8 days ago and ending yesterday.
    json_chunk_size [int or float of form BASEeEXP]: lists passed to
        jumbo_write_json will be broken into chunks of this size. No need to
        modify unless you encounter memory use issues, in which case you should
        first try reducing the default value of 5,000.
    verbosity [int]: determines the number of reports that will be printed.
        0 = no reports
        1 = reports from this function only
        2 = more reports from this function and from subroutine
            jumbo_write_json.

    Calls jumbo_write_df, which calls jumbo_write_json.
    Must be connected to a RethinkDB instance before using this.

    Pull at minimum 1 day and at maximum 1 week of data in increments of 1
    day.'''

    #start timing
    start_time = time.time()

    #check for size limit errors
    # if df_chunk_size > 1e6:
    #     raise(Exception('Maximum df_chunk_size is 1,000,000.'))
    if json_chunk_size > 1e5:
        raise (Exception('Maximum json_chunk_size is 100,000. This size is \
            rarely a good idea.'))

    #check for end_date format error
    try:
        nul = datetime.datetime.strptime(end_date, '%Y-%m-%d')
    except:
        raise (Exception('end_date must be of the form "YYYY-MM-DD".'))

    #set appropriate start dates based on input
    if start_date == 'prev_week':
        start_date = (datetime.datetime.strptime(end_date, '%Y-%m-%d') -
                      datetime.timedelta(days=6)).strftime('%Y-%m-%d')
    elif start_date is None:
        start_date = end_date
    else:
        pass

    #check for start_date format error
    try:
        nul = datetime.datetime.strptime(start_date, '%Y-%m-%d')
    except:
        raise (Exception('start_date must be of the form "YYYY-MM-DD".'))

    #add 23 h, 59 m, and 59 s to the end date (to grab the whole day)
    end_date = datetime.datetime.strptime(end_date,
                                          '%Y-%m-%d') + datetime.timedelta(
                                              hours=23, minutes=59, seconds=59)

    #convert datetime objects to unix time
    start_unix = int(
        time.mktime(
            datetime.datetime.strptime(start_date, '%Y-%m-%d').timetuple()))
    end_unix = int(time.mktime(end_date.timetuple()))

    #make sure the user isn't trying to grab more than a week of data, and that
    #end is after start
    if end_unix - start_unix > 604800:
        raise (Exception(
            'Please specify a range of dates no greater than one week.'))
    if end_unix - start_unix < 0:
        raise (Exception('end_date must be later than start date.'))

    #determine how many days have been selected
    dif = end_unix - start_unix
    ndays = math.ceil(dif / (24 * 3600))

    #get sensor data
    sensors = pd.read_csv(sensor_path)
    # sensors = sensors.drop(['name', 'short_name','latitude','longitude'], axis=1)
    sensors.columns = ['IntersectionID', 'sensor']
    sensor_list = list(sensors['sensor'])

    if verbosity > 0:
        print('Preparing to acquire data for ' + str(ndays) + ' day(s) and ' \
            + str(len(sensor_list)) + ' sensors.')

    #create database if it doesn't already exist
    if db_name not in r.db_list().run():
        r.db_create(db_name).run()

    #request and process one day at a time (roughly 5-10m records acquired per day)
    day_start_unix = start_unix
    for day in range(ndays):

        print('Acquiring records for day ' + str(day + 1) + ' of ' \
            + str(ndays) + '. May take several minutes.')

        #date string will be the table name on RethinkDB
        tname = datetime.datetime.fromtimestamp(
            int(day_start_unix)).strftime('%Y_%m_%d')
        if tname in r.db(db_name).table_list().run():
            print('Table "' + tname + '" already exists in database "' \
                + db_name + '". Skipping this day.')
            day_start_unix = day_start_unix + (24 * 3600)  #increment day
            continue
        else:
            r.db(db_name).table_create(tname).run()

        #get endpoints for each iteration and (re)instantiate dataframe
        day_end_unix = day_start_unix + (23 * 3600) + 3599
        df = pd.DataFrame(
            columns=['Timestamp', 'MAC Hash', 'Strength', 'Serial'])

        #request and preprocess each sensor separately
        for i in range(len(sensor_list)):

            # sensorID = sensor_list[1]
            URL = "https://cr.acyclica.com/datastream/device/csv/time/" \
                + api_key + "/" + str(sensor_list[i]) + "/" \
                + str(day_start_unix) + "/" + str(day_end_unix)

            #get raw web content and read into a dataframe
            items = requests.get(URL).content
            newdf = pd.read_csv(
                io.StringIO(items.decode('utf-8')),
                usecols=['Timestamp', 'MAC Hash', 'Strength', 'Serial'])

            #round timestamp to nearest second
            newdf['Timestamp'] = newdf['Timestamp'].round().astype('int')

            #drop repeated reads within 1s, keeping read with highest strength
            strmaxes = newdf.groupby(['Timestamp',
                                      'MAC Hash'])['Serial'].transform(max)
            newdf = newdf[newdf['Serial'] == strmaxes]

            #append to main dataframe
            df = df.append(newdf, ignore_index=True)

            if verbosity == 2:
                if i + 1 in [15, 30, 45]:
                    print('Got data for ' + str(i + 1) + ' of ' \
                        + str(len(sensor_list)) \
                        + ' sensors. So far there are ' + str(len(df)) \
                        + ' reads for day ' + str(day + 1) + '.')

        del (newdf)

        #drop repeated reads again, keeping read with highest strength
        strmaxes = df.groupby(['Timestamp',
                               'MAC Hash'])['Serial'].transform(max)
        df = df[df['Serial'] == strmaxes]

        pre_filt_len = str(len(df))
        if verbosity > 0:
            print('Found ' + pre_filt_len + ' sensor reads for day ' \
                + str(day + 1) + '. Cleaning those now.')

        json_list = df_to_json_etc(df, verbosity, pre_filt_len, sensors)

        if verbosity > 0:
            print('Converted DataFrame to JSON list and grouped by hash. ' \
                + 'Passing list of length ' + str(len(json_list)) \
                + ' to jumbo_write_json.')

        #set verbosity for jumbo_write_json
        sil = False if verbosity == 2 else True

        jumbo_write_json(data=json_list,
                         db_name=db_name,
                         table_name=tname,
                         chunk_size=json_chunk_size,
                         silent=sil)

        #increment day
        day_start_unix = day_start_unix + (24 * 3600)

    if verbosity > 0:
        run_time = round((time.time() - start_time) / 60, 2)
        print('Finished writing all records for ' + str(ndays) + ' day(s) ' \
            + 'in ' + str(run_time) + ' minutes.\nRecords are in database "' \
            + db_name + '".')
コード例 #52
0
ファイル: Maker.py プロジェクト: adleonis/Mnet
def create_table(name, conn):
    res = r.db(DB).table_create(name).run(conn)
コード例 #53
0
def clear_current(sample_id, conn):
    r.db("samplesdb").table("sample2attribute_set")\
                     .get_all(sample_id, index="sample_id")\
                     .update({"current": False})\
                     .run(conn)
コード例 #54
0
parameters = yaml.load(parameter_file)

print "Connecting database ..."
rethink = r.connect(parameters['rethinkdb_server']['host'],
                    parameters['rethinkdb_server']['port']).repl()
rethink_db = parameters['rethinkdb_server']['database']
url_queue_table = parameters['rethinkdb_server']['tables']['url_queue']
raw_result_table = parameters['rethinkdb_server']['tables']['raw_result']
indexed_result_table = parameters['rethinkdb_server']['tables'][
    'indexed_result']
# Init database
db_list = r.db_list().run(rethink)
if rethink_db not in db_list:
    print "Init database ..."
    r.db_create(rethink_db).run(rethink)
    r.db(rethink_db).table_create(url_queue_table).run(rethink)
    r.db(rethink_db).table(url_queue_table).index_create('ts').run(rethink)
    r.db(rethink_db).table_create(raw_result_table).run(rethink)
    r.db(rethink_db).table_create(indexed_result_table).run(rethink)

rethink.use(rethink_db)


def main(argv):
    # Main code here
    print "I'm manager :)"

    if len(argv) > 1:
        seed_url = argv[1]

        r.table(url_queue_table).insert({
コード例 #55
0
 def __init__(self):
     r.connect(settings['RETHINKDB_SERVER'],
               settings['RETHINKDB_PORT']).repl()
     self.db = r.db(settings['RETHINKDB_DB']).table(
         settings['RETHINKDB_TABLE'])
コード例 #56
0
#!/usr/bin/env python
# coding: utf-8
import rethinkdb as r
r.connect ('localhost', 28015).repl()
watchcount = r.db('polltime').table('votes').get_all('b0aae840-f52e-4bdd-abcd-74789f52c6bd', index='choice').count().run()
dontwatchcount = r.db('polltime').table('votes').get_all('a966c7b3-9277-4c09-9254-8806762bbea0', index='choice').count().run()
watchint = int(watchcount)
dontwatchint = int(dontwatchcount)
file = open("tmp/finalcountq4.txt","w")
if watchint > dontwatchint:
	file.write("TRY")
else:
	file.write("GIVE")
file.close()
コード例 #57
0
# along with BigBlueTutor.  If not, see <http://www.gnu.org/licenses/>.
#Prints the contents of all the tables in a RethinkDB database
#You can also pass the names of specific tables as command-line arguments to print only those tables
#Users' messages are excluded from printing

import rethinkdb as r
import dotenv
import os
import json
import sys

dotenv.load_dotenv("./.env")

r.connect(os.environ.get("DB_HOST"), int(os.environ.get("DB_PORT"))).repl()
tableList = []
if (len(sys.argv) > 1):
    tableList = sys.argv[1:len(sys.argv)]
else:
    tableList = r.db("deepstream").table_list().run()

print("Table list:")
print(tableList)
print()

for table in tableList:
    print("Table name: " + table)
    table = list(r.db("deepstream").table(table).run())
    #.without("messages").run())
    print(json.dumps(table, indent=1, sort_keys=True))
    print()
コード例 #58
0
 def test_simple(self, conn):
     res = r.db('x').table('farms').map(lambda doc: doc['animals'][0]).run(
         conn)
     assertEqual(set(['frog', 'horse']), set(list(res)))
コード例 #59
0
 def test_filter_by_bracket(self, conn):
     res = r.db('x').table('farms').filter(lambda doc: doc['id'] < 2).run(
         conn)
     expected = [1]
     results = [doc['id'] for doc in res]
     assertEqual(expected, results)
コード例 #60
0
ファイル: test_misc.py プロジェクト: techdragon/mockthink
 def test_set_intersection(self, conn):
     expected = [set(['x', 'y']), set(['x'])]
     result = r.db('z').table('t').map(
         lambda doc: doc['simple'].set_intersection(['x', 'y'])).run(conn)
     result = map(lambda d: set(d), result)
     assertEqUnordered(expected, result)