def test_basic(): """ run a db temporarily """ from testre.runner import run with run() as the_port: port = the_port connection = rethinkdb.connect(port=port) result = rethinkdb.db('test').table_create('testre').run(connection) assert_equals(result['tables_created'], 1) with assert_raises(rethinkdb.ReqlDriverError): connection = rethinkdb.connect(port=port) # a new connection should happen on a new db with run(port=port) as the_port: assert_equals(port, the_port) connection = rethinkdb.connect(port=port) assert_equals(rethinkdb.db('test').table_list().run(connection), []) with assert_raises(rethinkdb.ReqlDriverError): connection = rethinkdb.connect(port=port)
def main(): parser = argparse.ArgumentParser() parser.add_argument("-r", "--rethinkdb-host", default="localhost:28015") parser.add_argument("-m", "--machine-id", default=socket.gethostname()) args = parser.parse_args() host, port = args.rethinkdb_host.split(":") r.connect(host, port).repl() try: r.db("logcentral") except r.ReqlOpFailedError: r.db_create("logcentral").run() db = r.db("logcentral") if 'cursor_state' not in db.table_list().run(): r.db("logcentral").table_create("cursor_state").run() if 'log' not in db.table_list().run(): r.db("logcentral").table_create("log").run() cursor_table = r.db("logcentral").table('cursor_state') log_table = r.db("logcentral").table('log') c = cursor_table.get(args.machine_id).run() c = None if c is None else c['cursor'] for line in yield_log_lines(c): cursor, data = prepare_for_table(line, args.machine_id) log_table.insert(data).run() cursor_table.insert({"id": args.machine_id, "cursor": cursor}, durability="soft", conflict="replace").run()
def parse_url(self, urldata): print 'GOT URL: %s' % urldata allowed_types = ['text', 'audio', 'image'] video_hosts = ['www.youtube.com', 'youtube.com', 'vimeo.com', 'www.vimeo.com', 'youtu.be'] r = requests.get(urldata['url'], timeout=5) if r.status_code == 200: content_type = r.headers['content-type'].split('/')[0] if content_type not in allowed_types: return None if content_type == 'text': parse = urlparse.urlparse(urldata['url']) if parse.hostname in video_hosts: urldata['type'] = 'video' else: urldata['type'] = 'website' try: urldata['title'] = lxml.html.parse(urldata['url']).find(".//title").text except: urldata['title'] = "No Title" else: urldata['title'] = content_type.title() urldata['type'] = content_type rethinkdb.connect('localhost', 28015).repl() url_db = rethinkdb.db('siri').table('urls') url_db.insert(urldata).run() urldata['timestamp'] = datetime.datetime.utcnow() self.red.publish('irc_urls', json.dumps(urldata, default=json_datetime))
def parse_message(self, message): message = json.loads(message['data']) rethinkdb.connect('localhost', 28015).repl() log_db = rethinkdb.db('siri').table('logs') data = { 'channel': message['channel'], 'timestamp': rethinkdb.now(), 'user': message['user'], 'content': message['content'], 'server': message['server'], 'bot': message['bot'] } log_db.insert(data).run() urls = re.findall(url_re, message['content']) if urls: for url in urls: urldata = { 'url': url, 'user': message['user'], 'channel': message['channel'], 'server': message['server'], 'bot': message['bot'], 'timestamp': rethinkdb.now() } gevent.spawn(self.parse_url, urldata) data['timestamp'] = datetime.datetime.utcnow() self.red.publish('irc_chat', json.dumps(data, default=json_datetime))
def check_db(): r.connect(properties.get('RETHINK_HOST'), properties.get('RETHINK_PORT')).repl() if 'relayr' in r.db_list().run(): return True return False
def main(): # connect rethinkdb rethinkdb.connect("localhost", 28015, "mysql") try: rethinkdb.db_drop("mysql").run() except: pass rethinkdb.db_create("mysql").run() tables = ["dept_emp", "dept_manager", "titles", "salaries", "employees", "departments"] for table in tables: rethinkdb.db("mysql").table_create(table).run() stream = BinLogStreamReader( connection_settings=MYSQL_SETTINGS, blocking=True, only_events=[DeleteRowsEvent, WriteRowsEvent, UpdateRowsEvent], ) # process Feed for binlogevent in stream: if not isinstance(binlogevent, WriteRowsEvent): continue for row in binlogevent.rows: if not binlogevent.schema == "employees": continue vals = {} vals = {str(k): str(v) for k, v in row["values"].iteritems()} rethinkdb.table(binlogevent.table).insert(vals).run() stream.close()
def __init__(self, server=None, port=None): self.server = "localhost" self.port = 28015 self.r = None try: import rethinkdb as r except ImportError: from sys import exit print "The rethinkdb client driver is required for this object" exit() if server: self.server = server if port: self.port = port try: # Top level objects r.connect(self.server, self.port).repl() self.r = r except r.errors.RqlDriverError: from sys import exit print "WARNING. Could not connect to %s port %s" % (self.server, self.port) exit()
def update(): print "update() begin" # Connect to ReThinkDB r.connect("localhost", 28015).repl() # Get XML data from remote API and parse it url = "http://api.ezaxess.com/v2/pd/longbeach/crimes/all" root = ElementTree.fromstring(requests.get(url).content) for item in root.findall("item"): # Construct Python dictionary from XML nodes incident = { "id": int(item.find("id").text), "item_id": int(item.find("id").text), "case_id": int(item.find("case_number").text), "incident_id": int(item.find("incident_id").text), "title": item.find("title").text.strip(), "description": item.find("description").text.strip(), "time": dateutil.parser.parse(item.find("date_occured").text), "address": item.find("block_address").text.strip(), "city": item.find("city").text.strip(), "state": item.find("state").text.strip(), "latitude": item.find("latitude").text.strip(), "longitude": item.find("longitude").text.strip(), } response = r.db("lbpd").table("incidents").insert(incident, conflict="update").run() print (incident["id"], response["inserted"]) print "update() completed"
def index(): import rethinkdb as r r.connect('localhost', 28015).repl() try: r.db_create('wlps').run() except RqlRuntimeError: pass try: r.db('wlps').table_create('episode').run() except RqlRuntimeError: pass try: r.db('wlps').table_create('show').run() except RqlRuntimeError: pass try: r.db('wlps').table_create('notifications').run() except RqlRuntimeError: pass try: r.db('wlps').table_create('queue').run() except RqlRuntimeError: pass
def rethinkdb(): """Prepare database and table in RethinkDB""" from rethinkdb.errors import ReqlOpFailedError, ReqlRuntimeError conn = r.connect(host=conf.RethinkDBConf.HOST) # Create database try: r.db_create(conf.RethinkDBConf.DB).run(conn) click.secho('Created database {}'.format(conf.RethinkDBConf.DB), fg='yellow') except ReqlOpFailedError: click.secho('Database {} already exists'.format(conf.RethinkDBConf.DB), fg='green') # Create table 'domains' conn = r.connect(host=conf.RethinkDBConf.HOST, db=conf.RethinkDBConf.DB) try: r.table_create('domains', durability=conf.RethinkDBConf.DURABILITY).\ run(conn) click.secho('Created table domains', fg='yellow') except ReqlOpFailedError: click.secho('Table domains already exists', fg='green') # Create index on domains.name try: r.table('domains').index_create('name').run(conn) click.secho('Created index domains.name', fg='yellow') except ReqlRuntimeError: click.secho('Index domains.name already exists', fg='green')
def connect_db(): """connecting to rethinkDB""" r.connect('localhost', 28015).repl() connection = r.connect(host='localhost', port=28015, db='fbscrap') return connection
def setup_rethinkdb(): import rethinkdb as r r.connect( "localhost", 28015).repl() try: r.db_create("nonpublic").run() except: pass try: r.db_create("public").run() except: pass db = r.db("public") dbs_and_tables = {'nonpublic': ['third_party_creds', 'subscribers', 'users', 'sessions'], 'public': ['crawling_instructions', 'apps', 'police_internal_affairs_cases', 'police_internal_affairs_allegations', 'organizations', 'tables', 'queries']} for database in dbs_and_tables.keys(): try: r.db_create(database).run() except: pass db = r.db(database) tables_needed = dbs_and_tables[database] existing_tables = db.table_list().run() tables_to_create = set(tables_needed) - set(existing_tables) # remove existing tables from what we need for table in tables_to_create: db.table_create(table).run() for table in dbs_and_tables['public']: #tables_ids = [item['id'] for item in r.db('public').table('tables').run()] #if not table in tables_ids: if 'police' in table: category = "policing" else: category = "People's NSA" r.db('public').table('tables').insert({'id': table, 'name': table.replace('_', ' ').capitalize(), 'categories': [category]}, conflict='update').run()
def __init__(self): r.connect('builder', 28015).repl() self.db = r.db('leevalley') #if 'sessions' not in self.db.tableList().run(): # self.sessions_table = self.db.table_create('sessions').run() #else: self.sessions_table = self.db.table('sessions')
def test_setup_db(self): """ Test creation of a db and tables """ # test that the 'TEST' database doesn't exist with rethinkdb.connect(host='localhost', port=28015) as conn: db_list = rethinkdb.db_list().run(conn) self.assertTrue('TEST' not in db_list) creations = self.run_setup_db() # confirm the correct tables were created self.assertSetEqual(creations, set(template.test_dataset.keys()+template.test_tables)) with rethinkdb.connect(host='localhost', port=28015) as conn: # test that the 'TEST' database was created db_list = rethinkdb.db_list().run(conn) self.assertTrue('TEST' in db_list) conn.use('TEST') # test that the 'test' table was created table_list = rethinkdb.table_list().run(conn) self.assertEqual(len(table_list), len(template.test_dataset.keys()+template.test_tables)) self.assertTrue(template.test_dataset.keys()[0] in table_list) # test that the data is correct by checking columns data = [row for row in rethinkdb.table( template.test_dataset.keys()[0]).run(conn)] with open(template.test_json) as f: self.assertSetEqual( set(data[0].keys())-set([u'id']), set(json.loads(f.read())[0].keys())) self.run_clear_test_db()
def main(): import rethinkdb as r from rethinkdb.errors import RqlRuntimeError # Lib para auxilio na insercao de dados de teste from faker import Factory fake = Factory.create('pt_BR') # Conecta ao banco local r.connect(HOST, PORT).repl() try: r.db_drop(DBNAME).run() except RqlRuntimeError: pass # Cria o banco de dados r.db_create(DBNAME).run() # Cria a tabela r.db(DBNAME).table_create(TABLENAME).run() # Insere os registros na tabela for frase in range(TOTAL_FRASES): reg = { 'id': frase, 'frase': fake.text(), 'autor': fake.name() } r.db(DBNAME).table(TABLENAME).insert(reg).run()
def all(): """Get all rounds from database. Args: none Returns: bool: result object if successful, False otherwise. """ if config.log: print('getting all...') # Setup database variables db_name = 'aroundlb' table_name = 'rounds' # Connect to RethinkDB r.connect('localhost', 28015).repl() # Insert message into table <name> if config.log: print('Getting...') # response = r.db(db_name).table(table_name).run() documents = [] cursor = r.db(db_name).table(table_name).run() for document in cursor: documents.append(document) return documents
def __init__(self): self.prod_db = rethinkdb.connect("localhost", 28016) self.local_db = rethinkdb.connect("localhost", 28015) self.prod_uow = UoW(self.prod_db) self.local_uow = UoW(self.local_db)
def go(): with except_printer(): r.connect(host="localhost", port="123abc") with except_printer(): r.expr({'err': r.error('bob')}).run(c) with except_printer(): r.expr([1,2,3, r.error('bob')]).run(c) with except_printer(): (((r.expr(1) + 1) - 8) * r.error('bob')).run(c) with except_printer(): r.expr([1,2,3]).append(r.error('bob')).run(c) with except_printer(): r.expr([1,2,3, r.error('bob')])[1:].run(c) with except_printer(): r.expr({'a':r.error('bob')})['a'].run(c) with except_printer(): r.db('test').table('test').filter(lambda a: a.contains(r.error('bob'))).run(c) with except_printer(): r.expr(1).do(lambda x: r.error('bob')).run(c) with except_printer(): r.expr(1).do(lambda x: x + r.error('bob')).run(c) with except_printer(): r.branch(r.db('test').table('test').get(0)['a'].contains(r.error('bob')), r.expr(1), r.expr(2)).run(c) with except_printer(): r.expr([1,2]).reduce(lambda a,b: a + r.error("bob")).run(c)
def conn_rethinkdb(host, port, auth_key): """connect to rethinkdb""" try: r.connect(host, port, auth_key=auth_key).repl() except ReqlDriverError as error: print "Error connecting to RethinkDB:", error exit(1)
def load_data_dirs(user, dirs, state_id): try: r.connect('localhost', 30815, db='materialscommons').repl() for directory in dirs: load_directory(user, directory, state_id) except Exception as exc: raise load_data_dirs.retry(exc=exc)
def main(): argparser = argparse.ArgumentParser() subparsers = argparser.add_subparsers(help='Firmware MapReduce Controls', dest='command') parser_guid_group = subparsers.add_parser("guid_group", help= "Groupby UEFI file GUIDs.") parser_object_group = subparsers.add_parser("object_group", help= "Groupby Object hashs.") parser_vendor_object_sum = subparsers.add_parser("vendor_object_sum", help= "Sum objects by vendor.") parser_vendor_content_sum = subparsers.add_parser("vendor_content_sum", help= "Sum content by vendor.") parser_vendor_object_count = subparsers.add_parser("vendor_object_count", help= "Count objects by vendor.") parser_vendor_content_count = subparsers.add_parser("vendor_content_count", help= "Count content by vendor.") parser_vendor_update_count = subparsers.add_parser("vendor_update_count", help= "Count updates by vendor.") parser_vendor_products_count = subparsers.add_parser("vendor_product_count", help= "Count products by vendor.") args = argparser.parse_args() controller = Controller() command = "command_%s" % args.command r.connect("localhost", 28015).repl() db = r.db("uefi") command_ptr = getattr(controller, command, None) if command_ptr is not None: print "Running command (%s)..." % args.command begin = time.time() db.table("stats").insert(command_ptr(db, args).limit(99999)).run() end = time.time() print "...finished (%d) seconds." % (end-begin) else: print "Cannot find command: %s" % command
def test_start(self): assert not dbc.alive() dbc.start() try: r.connect() except r.errors.ReqlDriverError: assert False
def main(): home = os.environ.get('RESYNC_HOME') if not home: return [] with open(p.join(home,'conf.json')) as f: conf = json.load(f) r.connect(conf['host'],conf['port'],conf['db']).repl() masterRecords = (Master(r.table(t),p) for p,t in conf['tbMap'].items()) return masterRecords
def test_port_conversion(self): c = r.connect(port=str(self.port)) r.expr(1).run(c) c.close() self.assertRaisesRegexp( r.RqlDriverError, "Could not convert port abc to an integer.", lambda: r.connect(port='abc'))
def is_healthy(): try: auth_key = open('/secrets/rethinkdb/rethinkdb').read().strip() if not auth_key: auth_key = None rethinkdb.connect(host='localhost', timeout=4, auth_key=auth_key) return True except: return False
def process_item(self, item, spider): r.connect().repl() data = dict() data.update(item) if r.db('games').table('bgg').get(data['objectid']).run(): r.db('games').table('bgg').get(data['objectid']).update(data).run() else: r.db('games').table('bgg').insert(data).run() return item
def get_db_conn_synchronous(): """Returns a RethinkDB connection, synchronous - mostly for testing and management commands.""" try: conn = r.connect(host=RETHINK_HOST, port=RETHINK_PORT, db=DB_NAME) except r.RqlRuntimeError: conn = r.connect(host=RETHINK_HOST, port=RETHINK_PORT) return conn
def get_db_conn(): """Yields a RethinkDB connection""" r.set_loop_type("tornado") try: conn = r.connect(host=RETHINK_HOST, port=RETHINK_PORT, db=DB_NAME) except r.RqlRuntimeError: conn = r.connect(host=RETHINK_HOST, port=RETHINK_PORT) return conn
def connect_rethink(self): ''' Connect to rethink database, Check for existing table, otherwise create it ''' try: r.connect(host="ec2-52-90-204-136.compute-1.amazonaws.com", port=28015, db=self.database, auth_key=self.auth_key).repl() print("Connected to the \"" + self.database + "\" database") except: raise Exception("Failed to connect to the database" + self.database)
def save(request): """Saves Slack bot messages to RethinkDB. Args: request (object): the Flask request object, including the the form- encoded message fields which Slack POSTs Returns: bool: result object if successful, False otherwise. """ if config.log: print('listening...') # Grab every key/value from the POST and stuff it into a dict message = {} for key, value in request.form.items(): message[key] = value # Set defaults if 'channel_name' in message: channel_name = message['channel_name'] channel_name = ''.join(e for e in channel_name if e.isalnum()) else: 'unknown' if 'team_domain' in message: server_name = message['team_domain'] server_name = ''.join(e for e in server_name if e.isalnum()) else: 'unknown' # Setup logging variables db_name = server_name table_name = channel_name # Connect to RethinkDB r.connect('localhost', 28015).repl() # Create RethinkDB database if it doesn't exist if db_name not in r.db_list().run(): if config.log: print('database {} does not exist'.format(db_name)) r.db_create(db_name).run() # Create RethinkDB table if it doesn't exist if table_name not in r.db(db_name).table_list().run(): if config.log: print('table {} does not exist'.format(table_name)) r.db(db_name).table_create(table_name).run() r.db(db_name).table(table_name).index_create('timestamp').run() r.db(db_name).table(table_name).index_create('channel_name').run() # Insert message into table <name> if config.log: print('Inserting...') response = r.db(db_name).table(table_name).insert(message).run() return True
def analyze_comments_from_db(self,pipe): r.connect('localhost', 28015).repl() #####################Delete the following patch once all of the comments are received and processed.##################### try: r.db_create('lagrammar').run() except RqlRuntimeError: try: r.db('lagrammar').table_create('raw_comments').run() except RqlRuntimeError: print("The table raw_comments aready exists") ######################################################################################################################## dict_of_comments_by_users={} spelling_mistake_rule_id='MORFOLOGIK_RULE_EN' try: with open('pypg.config','r') as config: path=config.read() sys.path.append(path) import language_check from gibberish_detector import gib_detect_train from gibberish_detector import gib_detect gib_detect_train.train() except : print("I am sorry, but the language_check is not found, or a valid path where does the language_check package preside. ") sys.exit() tool_for_replace_errors=language_check.LanguageTool('en-US') page=0 comments_per_page=50 #cursors=r.db('lagrammar').table('analyzed_comments').filter({'type':'plagiarised'}).eq_join('comment_id',r.db('lagrammar').table('raw_comments')).run()#r.db('lagrammar').table('raw_comments').filter({'type':'plagiarised'}).run() while True: page+=1 # if(page==1): # break self.buffer = io.BytesIO() c = pycurl.Curl() c.setopt(c.URL, 'http://learnapt.informationworks.in/api/grammar_check/comments?per_page='+str(comments_per_page)+'&page='+str(page)) c.setopt(c.HTTPHEADER, ['Authorization: Token %s' % str('b2661fa415440adb2ef6eb37af6ca3e5')]) c.setopt(c.WRITEDATA, self.buffer) c.perform() c.close() self.body = self.buffer.getvalue() comments_details=json.loads(self.body.decode('UTF-8'))['comments'] print("Got "+str(comments_per_page) + " comments for the page "+str(page) ) if(len(comments_details)==0): break # counts=0 for comment_details in comments_details: # counts=counts+1 # if counts>10: # break if comment_details['content'] is None or str(comment_details['content']).strip() == '' : continue if comment_details['user_id'] not in dict_of_comments_by_users.keys(): dict_of_comments_by_users[comment_details['user_id']]={} if comment_details['commentable_id'] not in dict_of_comments_by_users[comment_details['user_id']].keys(): dict_of_comments_by_users[comment_details['user_id']][comment_details['commentable_id']]=[] dict_of_comments_by_users[comment_details['user_id']][comment_details['commentable_id']].append({'id':comment_details['id'], 'data':comment_details['content'].strip(),'datetime':comment_details['created_at'],'commentable_type':comment_details['commentable_type']}) try: r.db('lagrammar').table('raw_comments').insert(comment_details).run() except: traceback.print_stack() print('Not able to insert the raw comment details. Is it important? ') # dict_of_comments_by_users={1:{1:[{'id':'1','data':'hi','datetime':'2014-12-05T17:04:31.813+05:30', 'commentable_type':'Item'},{'id':'2','data':'hi','datetime':'2014-12-05T17:04:31.813+05:30','commentable_type':'Item'}, {'id':'3','data':'hi','datetime':'2014-12-05T17:04:31.813+05:30','commentable_type':'Item'}]},2:{2:[{'id':'1','data':'hi','datetime':'2014-12-05T17:04:31.813+05:30', 'commentable_type':'Item'}]}} # Creating and/or updating the lagrammar database and the comments table in rethinkdb. tool=language_check.LanguageTool('en-GB') try: r.db_create('lagrammar').run() except RqlRuntimeError: try: r.db('lagrammar').table_create('analyzed_comments').run() except RqlRuntimeError: print("The table aready exists") users=dict_of_comments_by_users.keys() dict_of_items={} for user in users: items=dict_of_comments_by_users[user].keys() for item in items: if (item not in dict_of_items): dict_of_items[item]={} if (user not in dict_of_items[item]): dict_of_items[item][user]=[] comments=dict_of_comments_by_users[user][item] for comment in comments: type_of_comment=None comment_dict={} comment_dict['user_id']=user comment_dict['comment_id']=comment['id'] comment_dict['item_id']=item comment_dict['commentable_type']=comment['commentable_type'] comment_dict['data']=comment['data'] comment_dict['datetimestamp']=str(comment['datetime']) comment_dict['rule_id']=[] comment_dict['category']=[] comment_dict['msg']=[] comment_dict['spos']=[] comment_dict['epos']=[] comment_dict['suggestions']=[] print('The comment is: '+comment['data']) # Next we check whether the comment is gibberish. gib_detect_tokens=[] gib_detect_results=gib_detect.check(comment['data']) if gib_detect_results is not None: type_of_comment='gibberish' print('The comment '+comment['data'] +' is a gibberish one.') for result in gib_detect_results: gib_detect_tokens.append(result['token']) # Next we check for the comment to be either copied, repeated or plagiarized if the comment is not a gibberish one. if(type_of_comment != 'gibberish'): users_by_item=dict_of_items[item].keys() for user_by_item in users_by_item: if comment['data'] in dict_of_items[item][user_by_item]: if user!=user_by_item: comment_dict['type']='copied' comment_dict['copied_from_user']=user_by_item type_of_comment='copied' else: comment_dict['type']='repetition' type_of_comment='repetition' break if type_of_comment!='copied' and type_of_comment!='repetition' and len(comment['data'].strip()) > 140: # Constrain the character size of the comment to be greater than 140. try:#Check for plagiarism against the sources from the internet. plagiarism_results=PScripts.main(comment['data'],'po.txt') if len(plagiarism_results.keys())>0: type_of_comment='plagiarised' comment_dict['type']='plagiarised' comment_dict['plagiarised_dict']=plagiarism_results except: print("Plagiarism check failed.") count_retries=0 while count_retries<2: count_retries+=1 try: matches=tool.check(comment['data']) break except : tool=language_check.LanguageTool('en-GB') #analysis={'rule_id':[],'category':[],'msg':[],'spos':[],'epos':[],'suggestions':[]} # Special handling for comments which aren't found to be having an error if len(matches)==0: if type_of_comment!='plagiarised' and type_of_comment!='repetition' and type_of_comment!='copied': comment_dict['type']='good' dict_of_items[item][user].append(comment['data']) r.db('lagrammar').table('analyzed_comments').insert(comment_dict).run() continue else: if type_of_comment is not None and type_of_comment != 'gibberish': comment_dict['type']=type_of_comment else: comment_dict['type']='incorrect' for match in matches: # This check is to ensure that words which are misspelled as per exactly one of British and American english dictionaries, and not as per the other, are not to be shown to be as if they are misspelled. Only if there is a spelling mistake as per both the dictionaries, should it consider as a spelling mistake. token_with_error=comment['data'][match.fromx:match.tox] if match.ruleId == spelling_mistake_rule_id+'_GB': count_retries=0 while count_retries<2: count_retries+=1 try : matches_for_replace=tool_for_replace_errors.check(comment['data']) break except: tool_for_replace_errors=language_check.LanguageTool('en-US') to_continue=True for match_for_replace in matches_for_replace: if match_for_replace.ruleId==spelling_mistake_rule_id+'_US': if token_with_error in gib_detect_tokens: comment_dict['type']='gibberish' comment_dict['gibberish_details']=gib_detect_results type_of_comment='gibberish' to_continue=False break if to_continue==True: continue # The check to follow is to skip the errors for those words which highlight the differences in american and british dictionaries. This is to narrow the gap between the american and the british dictionaries. if match.ruleId == 'EN_GB_SIMPLE_REPLACE': continue comment_dict['rule_id'].append(match.ruleId) comment_dict['category'].append(match.category) comment_dict['msg'].append(match.msg) comment_dict['spos'].append(match.fromx) comment_dict['epos'].append(match.tox) comment_dict['suggestions'].append(match.replacements) dict_of_items[item][user].append(comment['data']) r.db('lagrammar').table('analyzed_comments').insert(comment_dict).run()
from diagnostic.data_interface.input_data import SparkParquetIO from diagnostic.calculation.calculation import * from diagnostic.calculation.utils import normalize from diagnostic.execution.utils import channel_ids from datetime import datetime, timedelta import rethinkdb as r import pytz import os RDB_HOST = os.getenv('RDB_HOST', 'localhost') RDB_PORT = os.getenv('RDB_PORT', 28015) r.connect(host=RDB_HOST, port=RDB_PORT).repl() def query_channel_weekly(channel_id): return r.db('telenortv_insight_api').table('channel_by_week')\ .filter({'channelID': channel_id}).run() def channel_overview(dt, week_ucis, channel_id): channel_by_week = query_channel_weekly(channel_id) channel_data = [(x['started-views'], int(x['viewing-time'] * x['weekly-active-user'])) for x in channel_by_week] total_views = sum([x[0] for x in channel_data]) # this is already measured in minutes total_viewtime = sum([x[1] for x in channel_data]) channel_hour_of_day = view_count_by_hour_of_day(week_ucis) channel_day_of_week = view_count_by_day_of_week(week_ucis) weekly_active_user = user_number(week_ucis) completion_ratio = avg_completion_ratio(week_ucis)
def import_directory(options): # Scan for all files, make sure no duplicated tables with different formats dbs = False db_filter = set([db_table[0] for db_table in options["db_tables"]]) files_to_import = [] files_ignored = [] for root, dirs, files in os.walk(options["directory"]): if not dbs: files_ignored.extend([os.path.join(root, f) for f in files]) # The first iteration through should be the top-level directory, which contains the db folders dbs = True if len(db_filter) > 0: for i in reversed(xrange(len(dirs))): if dirs[i] not in db_filter: del dirs[i] else: if len(dirs) != 0: files_ignored.extend([os.path.join(root, d) for d in dirs]) del dirs[0:len(dirs)] for f in files: split_file = f.split(".") if len(split_file) != 2 or split_file[1] not in ["json", "csv", "info"]: files_ignored.append(os.path.join(root, f)) elif split_file[1] == "info": pass # Info files are included based on the data files elif not os.access(os.path.join(root, split_file[0] + ".info"), os.F_OK): files_ignored.append(os.path.join(root, f)) else: files_to_import.append(os.path.join(root, f)) # For each table to import collect: file, format, db, table, info files_info = [] for filename in files_to_import: res = get_import_info_for_file(filename, options["db_tables"]) if res is not None: files_info.append(res) # Ensure no two files are for the same db/table, and that all formats are recognized db_tables = set() for file_info in files_info: if (file_info["db"], file_info["table"]) in db_tables: raise RuntimeError("Error: Duplicate db.table found in directory tree: %s.%s" % (file_info["db"], file_info["table"])) if file_info["format"] not in ["csv", "json"]: raise RuntimeError("Error: Unrecognized format for file %s" % file_info["file"]) db_tables.add((file_info["db"], file_info["table"])) conn_fn = lambda: r.connect(options["host"], options["port"], auth_key=options["auth_key"]) # Make sure this isn't a pre-`reql_admin` cluster - which could result in data loss # if the user has a database named 'rethinkdb' rdb_call_wrapper(conn_fn, "version check", check_minimum_version, (1, 16, 0)) already_exist = rdb_call_wrapper(conn_fn, "tables check", tables_check, files_info, options["force"]) if len(already_exist) == 1: raise RuntimeError("Error: Table '%s' already exists, run with --force to import into the existing table" % already_exist[0]) elif len(already_exist) > 1: already_exist.sort() extant_tables = "\n ".join(already_exist) raise RuntimeError("Error: The following tables already exist, run with --force to import into the existing tables:\n %s" % extant_tables) # Warn the user about the files that were ignored if len(files_ignored) > 0: print("Unexpected files found in the specified directory. Importing a directory expects", file=sys.stderr) print(" a directory from `rethinkdb export`. If you want to import individual tables", file=sys.stderr) print(" import them as single files. The following files were ignored:", file=sys.stderr) for f in files_ignored: print("%s" % str(f), file=sys.stderr) spawn_import_clients(options, files_info)
import feedparser import settings import rethinkdb as r from time import sleep import os r.connect(os.environ.get('RETHINK_HOST', 'localhost'), int(os.environ.get('RETHINK_PORT', 28015))).repl() while True: for f in settings.FEED_LIST: try: feed = feedparser.parse(f) for entry in feed['entries']: terms = [] for t in settings.TERMS: if t.lower() in entry['summary']: terms.append(t) if terms: d = { 'external_id': entry['id'], 'agent': 'feed-monitor', 'source': feed['feed']['title'], 'text': entry['summary'], 'type': 'news', 'sub_type': 'term-match', 'date': entry['published'], 'url': entry['link'], 'summary': entry['summary'], 'terms': terms,
def ext_pillar(minion_id, pillar, table="pillar", id_field=None, field=None, pillar_key=None): """ Collect minion external pillars from a RethinkDB database Arguments: * `table`: The RethinkDB table containing external pillar information. Defaults to ``'pillar'`` * `id_field`: Field in document containing the minion id. If blank then we assume the table index matches minion ids * `field`: Specific field in the document used for pillar data, if blank then the entire document will be used * `pillar_key`: The salt-master will nest found external pillars under this key before merging into the minion pillars. If blank, external pillars will be merged at top level """ host = __opts__["rethinkdb.host"] port = __opts__["rethinkdb.port"] database = __opts__["rethinkdb.database"] username = __opts__["rethinkdb.username"] password = __opts__["rethinkdb.password"] log.debug( "Connecting to %s:%s as user '%s' for RethinkDB ext_pillar", host, port, username, ) # Connect to the database conn = rethinkdb.connect(host=host, port=port, db=database, user=username, password=password) data = None try: if id_field: log.debug( "ext_pillar.rethinkdb: looking up pillar. " "table: %s, field: %s, minion: %s", table, id_field, minion_id, ) if field: data = (rethinkdb.table(table).filter({ id_field: minion_id }).pluck(field).run(conn)) else: data = rethinkdb.table(table).filter({ id_field: minion_id }).run(conn) else: log.debug( "ext_pillar.rethinkdb: looking up pillar. " "table: %s, field: id, minion: %s", table, minion_id, ) if field: data = rethinkdb.table(table).get(minion_id).pluck(field).run( conn) else: data = rethinkdb.table(table).get(minion_id).run(conn) finally: if conn.is_open(): conn.close() if data.items: # Return nothing if multiple documents are found for a minion if len(data.items) > 1: log.error( "ext_pillar.rethinkdb: ambiguous documents found for " "minion %s", minion_id, ) return {} else: result = data.items.pop() if pillar_key: return {pillar_key: result} return result else: # No document found in the database log.debug("ext_pillar.rethinkdb: no document found") return {}
def test_connect(something): return r.connect(something).repl()
#!/usr/bin/python # -*- coding: utf-8 -*- import rethinkdb as r r.connect("localhost", 28015).repl() cursor = r.table("tweets").group(lambda tweet: tweet["source"]).map( lambda tweet: 1).reduce(lambda a, b: a + b).run() print cursor
def before_request(): try: g.rdb_conn = r.connect(host=RDB_HOST, port=RDB_PORT, db=LOCATION_DB) except RqlDriverError: abort(503, "No database connection could be established.")
from models import * import rethinkdb as r import os import json conn = r.connect(host=os.environ.get("RETHINKDB_HOSTNAME") or "127.0.0.1", user=os.environ.get("RETHINKDB_USER") or "admin", password=os.environ.get("RETHINKDB_PASSWORD") or "") def create_db_if_not_exists(db): try: r.db_create(db).run(conn) except BaseException: pass def create_table_if_not_exists(table): try: r.table_create(table).run(conn) except BaseException: pass def create_index_if_not_exists(table, *args, **kwargs): try: r.table(table).index_create(*args, **kwargs).run(conn) except BaseException: pass
import rethinkdb as r r.connect('localhost', 28015).repl() r.table('waitingTweets').filter(r.row['created_at'].to_epoch_time() <= ( r.now() - r.epoch_time(172800)).run()).delete().run()
ret = rql.run() return ret except r.RqlRuntimeError: pass if __name__ == "__main__": MCDB_PORT = environ.get('MCDB_PORT') print( "Clearing all tables in materialscommons and mcpub; except for the following:" ) print( " materialscommons.users, mcpub.users, and materialscommons.templates." ) if not MCDB_PORT: print( "This script requires that MCDB_PORT be set - and it is not; exiting." ) exit(-1) print(" Clear materialscommons and mcpub databases on port = " + MCDB_PORT + "...") r.connect("localhost", int(MCDB_PORT)).repl() clear_mc_tables() clear_mcpub_tables() print("Done clearing tables in materialscommons and mcpub.")
def on_error(self, status_code): """Called when a non-200 status code is returned""" logging.error('Twitter returned error code %s', status_code) self.error = status_code return False def on_unknown(self, entity): """Called when an unrecognized object arrives""" logging.error('Unknown object received: %s', repr(entity)) return True print("get users to monitor...") with r.connect(**rdb_config) as conn: for user in r.table('users').run(conn): users.append(user) user_ids = [user['id_str'] for user in users] logging.info("Following %i users", len(user_ids)) stream = tweepy.Stream(auth=api.auth, listener=listener()) while True: try: print("starting stream...") stream.filter(follow=user_ids) except Exception as e: print(e)
import gmplot import geojson import rethinkdb as r import time import sys import numpy as np import random from datetime import datetime from sklearn.cluster import KMeans center = [38.360556, -92.592181] gmap = gmplot.GoogleMapPlotter(center[0], center[1], 7) num_clinics = 10 r.connect("DESKTOP-K4G0PLO", 28015).repl() #for i in range(0, 6): while 1: tabletemp = r.db("ArchHacks").table("County_Lines").run() for item in tabletemp: county_ind = 0 county_lat = [] county_lng = [] coords_temp = item["geometry"]["coordinates"][0] for coords in coords_temp: county_lat.append(coords[0]) county_lng.append(coords[1]) county_ind += 1 gmap.polygon(county_lng, county_lat, "pink",
def analyze_comments_from_stdin(self,pipe): self.pipe=pipe spelling_mistake_rule_id='MORFOLOGIK_RULE_EN' #print ('Starting the Processor') r.connect('localhost', 28015).repl() try: with open('pypg.config','r') as config: path=config.read() sys.path.append(path) import language_check except FileNotFoundError: print("I am sorry, but the language_check is not found, or a valid path where does the language_check package preside. ") sys.exit() tool=language_check.LanguageTool('en-GB') tool_for_replace_errors=language_check.LanguageTool('en-US') try: r.db_create('lagrammar').run() except RqlRuntimeError: try: r.db('lagrammar').table_create('analyzed_comments').run() except RqlRuntimeError: print("The table aready exists") comments={} pc = PlagiarismChecker() while True: user_dict={} comment_dict={} comment_dict['rule_id']=[] comment_dict['category']=[] comment_dict['msg']=[] comment_dict['spos']=[] comment_dict['epos']=[] comment_dict['suggestions']=[] if True: print ('Enter the user name:') input_stream=sys.stdin user_name=input_stream.readline().strip() comment_dict['name']=user_name if user_name not in pc.comments.keys(): pc.add_user(user_name) print ('Enter the comment to be checked for grammar') input_data=input_stream.readline().strip() print(input_data) comment_dict['data']=input_data comment_dict['datetimestamp']=str(datetime.now()) print('The comment is:'+input_data) try: plagiarism_results=PScripts.main(input_data,'po.txt') if len(plagiarism_results.keys()) >0: print("The comment by the user "+user_name+ " is Plagiarised and hence will not be analyzed" ) comment_dict['type']='plagiarised' comment_dict['plagiarised_dict']=plagiarism_results r.db('lagrammar').table('analyzed_comments').insert(comment_dict).run() continue except: print("Plagiarism check failed") print("Analyzing the comment "+input_data) pc.add_comments(user_name,[input_data]) count_retries=0 matches=[] while True: count_retries+=1 if count_retries>1: break try : matches=tool.check(input_data) break except: tool=language_check.LanguageTool('en-GB') #analysis={'rule_id':[],'str':[],'category':[],'msg':[],'spos':[],'epos':[],'suggestions':[]} for match in matches: # This check is to ensure that words which are misspelled as per exactly one of British and American english dictionaries, and not as per the other, are not to be shown to be as if they are misspelled. Only if there is a spelling mistake as per both the dictionaries, should it consider as a spelling mistake. if match.ruleId == spelling_mistake_rule_id+'_GB': count_retries=0 while True: count_retries+=1 if count_retries>1: break try : matches_for_replace=tool_for_replace_errors.check(input_data) break except: tool_for_replace_errors=language_check.LanguageTool('en-US') to_continue=True for match_for_replace in matches_for_replace: if match_for_replace.ruleId==spelling_mistake_rule_id+'_US': to_continue=False break if to_continue==True: continue # The check to follow is to skip the errors for those words which highlight the differences in american and british dictionaries. This is to narrow the gap between the american and the british dictionaries. if match.ruleId == 'EN_GB_SIMPLE_REPLACE': continue comment_dict['rule_id'].append(match.ruleId) comment_dict['str'].append(match.__str__()) comment_dict['category'].append(match.category) comment_dict['msg'].append(match.msg) comment_dict['spos'].append(match.fromx) comment_dict['epos'].append(match.tox) comment_dict['suggestions'].append(match.replacements) print (str(match)+' THE CORRECTION AND THE SUGGESTION') r.db('lagrammar').table('analyzed_comments').insert(comment_dict).run()
def r_conn(box=[None]): if box[0] is None: box[0] = r.connect() box[0].use('vim_awesome') return box[0]
def get_conn(): '''Get the connection to the database.''' return r.connect(host=bigchaindb.config['database']['host'], port=bigchaindb.config['database']['port'], db=bigchaindb.config['database']['name'])
def get_rdb_conn(): connection = rdb.connect(host=RDB_HOST, port=RDB_PORT) return connection
# -*- coding: utf-8 -*- # Define your item pipelines here # # Don't forget to add your pipeline to the ITEM_PIPELINES setting # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html import rethinkdb as r try: from settings import RDB_HOST, RDB_PORT, RDB_DATABASE except: RDB_HOST = "localhost" RDB_PORT = 28015 RDB_DATABASE = "vagascrawler" table_name = "vagas" r.connect(RDB_HOST, RDB_PORT).repl() # Cria o banco de dados r.db_create(RDB_DATABASE).run() # Cria a tabela r.db(RDB_DATABASE).table_create(table_name).run() r.db(RDB_DATABASE).table(table_name).index_create("body").run() r.db(RDB_DATABASE).table(table_name).index_create("datetime").run() r.db(RDB_DATABASE).table(table_name).index_create("cidade").run() r.db(RDB_DATABASE).table(table_name).index_create("categoria").run() r.db(RDB_DATABASE).table(table_name).index_create("titulo").run()
To help json.dump to deal with datetime.datetime objects. """ from datetime import datetime if isinstance(obj, datetime): serial = obj.isoformat() return serial raise TypeError("Type not serializable") # Rethinkdb Setup ### SMC_ROOT = os.environ.get("SMC_ROOT", '.') if os.environ.get("DEVEL", False): # DEV mode import dev.project.util port = dev.project.util.get_ports()["rethinkdb"] r.connect(host="localhost", db="smc", port=port, timeout=20).repl() else: AUTH = open(join(SMC_ROOT, 'data/secrets/rethinkdb')).read().strip() r.connect(host="db0", db="smc", auth_key=AUTH, timeout=20).repl() # or proxy on localhost: # r.connect(db = "smc", auth_key=AUTH, timeout=20).repl() # print("Registering tables:", end=" ") for t in r.table_list().run(): globals()[t] = r.table(t) # print(t, end=", ") # system tables rdb = r.db("rethinkdb") for t in rdb.table_list().run(): globals()['r_%s' % t] = rdb.table(t)
import os import rethinkdb as r from rethinkdb.errors import RqlRuntimeError, RqlDriverError RDB_HOST = 'localhost' RDB_PORT = 28015 PROJECT_DB = 'crypto' PROJECT_TABLE_BTC = 'btc' PROJECT_TABLE_DOGE = 'doge' PROJECT_TABLE_ETHERIUM = 'eth' PROJECT_TABLE_LITECOIN = 'ltc' PRIMARY_KEY = 'date' # Set up db connection client db_connection = r.connect(RDB_HOST, RDB_PORT) def dbSetup(): """Function is for cross-checking database and table exists """ try: r.db_create(PROJECT_DB).run(db_connection) print('Database setup completed.') except RqlRuntimeError: try: r.db(PROJECT_DB).table_create(PROJECT_TABLE_BTC).run(db_connection) print('Table - %s, creation completed' % PROJECT_TABLE_BTC) r.db(PROJECT_DB).table_create(PROJECT_TABLE_DOGE).run( db_connection) print('Table - %s, creation completed' % PROJECT_TABLE_DOGE)
ui_modules=uimodules, login_url='/user/login', cookie_secret='cookie-secret-change-this', xsrf_cookies=True, debug=True, ) # ========================================================== # DATABASE & SECURITY SETTINGS # ========================================================== DBNAME = 'appname' ENV = os.environ.get('APPNAME_ENV') # Local if ENV in ['local', None]: DBCON = rethinkdb.connect() TORNADO['debug'] = True TORNADO['xsrf_cookies'] = False # Production else: DBCON = rethinkdb.connect(host='YOUR-SQL-PRIVATE-IP') if ENV is None: logging.warning('* APPNAME_ENV var not set. Defaulting to local.') # ========================================================== # REDIS CONNECTION + rq QUEUES # ========================================================== REDIS = redis.Redis() JOBS_QUEUE = rq.Queue(connection=REDIS)
from random import randint from random import choice import csv, string ### The name of the table that will be created in the RethinkDB instance table_name = 'Vehicle' file_name = '2010-2016vehicles.csv' file_url = "https://s3.amazonaws.com/v8sdemoapp/2010-2016vehicles.csv" ### Define the host for the database (default to localhost) dbhost = environ['DATABASE_HOST'] if 'DATABASE_HOST' in environ.keys( ) else 'localhost' ### Connect to the RethinkDB instance try: dbconn = rdb.connect(dbhost) print("Connected to RethinkDB") except: print("Could not establish connection to database service on {0}.".format( dbhost)) exit(10) def download_data(fname): args = {'url': file_url, 'filename': fname} urlretrieve(**args) print('Finished downloading vehicles data to file: {0}'.format(fname)) def create_table_if_not_exists(tname): if tname in rdb.table_list().run(dbconn):
""" return the key-value pair with minimum numeric key in a defaultdict """ if len(d) == 0: return MAX_DISTANCE, [] else: min_key, min_val = d.items()[0] for k, v in d.items(): if k < min_key: min_key, min_val = k, v return min_key, min_val # collect ordered WatchEvents as they appeared con = rdb.connect() stargazers = rdb.db('repo_stars').table('pravj_Doga').order_by( 'starred_at').run(con) # stargazers' mutual following network for repository 'pravj/Doga' with open( '/home/pravendra/projects/gitworld/collector/pravj_Doga_stargazers.json', 'r') as f: graph_data = json.load(f) stargazers_following_graph = json_graph.adjacency_graph(graph_data) # stargazers connection graph stargazers_network_graph = nx.DiGraph() # add root node (creator of the repo) and its attributes stargazers_network_graph.add_node('pravj')
import rethinkdb as r import math import numpy as np import time r_conn = r.connect(db="nekobot") def get_single(): userid = str(input("Userid: ")) data = r.table("levelSystem").get(userid).run(r_conn, array_limit=1000000) if not data: print("User not found") exit(0) print("Blacklisted? %s" % data["blacklisted"]) print("Last XP %s" % data["lastxp"]) print("Amount of xp times %s" % len(data["lastxptimes"])) print("XP %s" % data["xp"]) print("Level %s" % (int((1 / 278) * (9 + math.sqrt(81 + 1112 * (data["xp"])))))) lasttime = data["lastxptimes"][0] i = [] for times in data["lastxptimes"]: x = (int(times) - int(lasttime)) i.append(x) print("Seconds Since: %s" % x) lasttime = times
#!/usr/bin/env python3 import datetime import pytz import rethinkdb as r import twitter from flags import flags from monitor import Monitor monitor = Monitor() db_connection = r.connect() table = r.db(flags.database).table(flags.table) api = twitter.Api( consumer_key=flags.consumer_key, consumer_secret=flags.consumer_secret, access_token_key=flags.access_token_key, access_token_secret=flags.access_token_secret, ) stream = api.GetStreamFilter(track=flags.filter) for tweet in stream: try: row = { "timestamp": pytz.utc.localize( datetime.datetime.utcfromtimestamp( int(tweet["timestamp_ms"]) / 1000.0)),
import rethinkdb as r conn = r.connect("localhost", 28015, db='heck') #r.db_create('heck').run(conn); #r.db_drop('superheroes').run(conn) #r.db("heck").table_create("beacons").run(conn) cursor = r.table("beacons").run(conn) #cursor = r.table("beacons").delete().run(conn) for document in cursor: print(document)
# Redis Server try: r_server = redis.Redis(host=config['redis_host'], port=config['redis_port'], db=config['redis_db'], password=config['redis_password']) logger.info("Connected to Redis on port %s" % config['redis_port']) except: logger.error("Cannot connect to redis, shutting down") sys.exit(1) # RethinkDB Server try: if config['rethink_authkey']: rdb_server = r.connect(host=config['rethink_host'], port=config['rethink_port'], auth_key=config['rethink_authkey'], db=config['rethink_db']) else: rdb_server = r.connect(host=config['rethink_host'], port=config['rethink_port'], db=config['rethink_db']) logger.info("Connected to Rethinkdb on port %s" % config['rethink_port']) cacheonly = False except (RqlDriverError, RqlRuntimeError, socket.error) as e: logger.critical("Cannot connect to rethinkdb, going into cacheonly mode") logger.critical("RethinkDB: %s" % e.message) cacheonly = True rdb_server = None # Start ZeroMQ listener context = zmq.Context()
def initialize(self): self.conn = r.connect(self.application.settings['host'], self.application.settings['port'], db=self.application.settings['db'])
def __init__(self, host='localhost', port=28015, db='game'): self._conn = r.connect(host=host, port=port, db=db)
def connect(self): """Initializes a connection to the database """ LOGGER.debug("Connecting to database: %s:%s", self._host, self._port) self._conn = r.connect(host=self._host, port=self._port)