def init(self): """Inits the USN table.""" # Create table that holds the view on acquaintances # It only has one column family called 'a' for acquaintance and # three columns: a:name (the target), a:network, and a:comment self.create_table(table_name=TABLE_USN_FRIENDS, col_fam={'a': {}}) # Query Hive and fill HBase table client = hiver.connect('localhost', 10000) client.execute('USE usn') client.execute( 'SELECT actiontime, username, friend, network, note FROM usn_friends' ) rows = client.fetchAll() for row in rows: fields = row.split('\t') # turns actiontime=2012-09-11T21:04:23-07:00 and user=Ted # into row_key=Ted_2012-09-11 row_key = fields[1] + '_' + fields[0].split('T')[0] # the row key a_name = fields[2] # a:name (the target) a_network = fields[3] # a:network a_comment = fields[4] # a:comment print row_key + ' - ' + a_name + ' - ' + a_network + ' - ' + a_comment table = self.connection.table(TABLE_USN_FRIENDS) table.put(row_key, { 'a:name': a_name, 'a:network': a_network, 'a:comment': a_comment }) logging.info('Initialized USN tables in serving layer.')
def hive_worker(params): """Connect to hive and run query""" # Parse Parameters name = params[0] data = json.loads(params[1]) options = params[2] # data is a json stored in the beeswax_savedquery table; it contains {"query": {"query": "SELECT ..."}} query = data["query"]["query"] logging.info("Starting worker for query " + name) try: # Connect to hive client = hiver.connect(options['hive-host'], options['hive-port']) # Execute queries from config client.execute(options['hive-initial-commands']) # Execute main query client.execute(query) # Get schema schema = client.getSchema() column_names = [column.name for column in schema.fieldSchemas] # First row should be column_names output = [column_names] except hiver.hive_service.ttypes.HiveServerException as e: logging.error("Failed to run hive query " + name + ":" + e.message) return # Rest of the rows are data for row in client.fetchAll(): output.append(row.split("\t")) memcache_address = "%s:%d" % (options['memcache-host'], options['memcache-port']) push_to_memcache(name, json.dumps(output), memcache_address)
def check_setup(): client = hiver.connect(HIVE_THRIFT_SERVER_HOST, HIVE_THRIFT_SERVER_PORT) client.execute('SHOW DATABASES') print('All is fine, go ahead and enjoy!')