Beispiel #1
0
    def init(self):
        """Inits the USN table."""
        # Create table that holds the view on acquaintances
        # It only has one column family called 'a' for acquaintance and
        # three columns: a:name (the target), a:network, and a:comment
        self.create_table(table_name=TABLE_USN_FRIENDS, col_fam={'a': {}})

        # Query Hive and fill HBase table
        client = hiver.connect('localhost', 10000)
        client.execute('USE usn')
        client.execute(
            'SELECT actiontime, username, friend, network, note FROM usn_friends'
        )
        rows = client.fetchAll()
        for row in rows:
            fields = row.split('\t')
            # turns actiontime=2012-09-11T21:04:23-07:00 and user=Ted
            # into row_key=Ted_2012-09-11
            row_key = fields[1] + '_' + fields[0].split('T')[0]  # the row key
            a_name = fields[2]  # a:name (the target)
            a_network = fields[3]  # a:network
            a_comment = fields[4]  # a:comment
            print row_key + ' - ' + a_name + ' - ' + a_network + ' - ' + a_comment
            table = self.connection.table(TABLE_USN_FRIENDS)
            table.put(row_key, {
                'a:name': a_name,
                'a:network': a_network,
                'a:comment': a_comment
            })

        logging.info('Initialized USN tables in serving layer.')
Beispiel #2
0
def hive_worker(params):
    """Connect to hive and run query"""
    # Parse Parameters
    name = params[0]
    data = json.loads(params[1])
    options = params[2]

    # data is a json stored in the beeswax_savedquery table; it contains {"query": {"query": "SELECT ..."}}
    query = data["query"]["query"]

    logging.info("Starting worker for query " + name)

    try:
        # Connect to hive
        client = hiver.connect(options['hive-host'], options['hive-port'])

        # Execute queries from config
        client.execute(options['hive-initial-commands'])

        # Execute main query
        client.execute(query)

        # Get schema
        schema = client.getSchema()
        column_names = [column.name for column in schema.fieldSchemas]

        # First row should be column_names
        output = [column_names]
    except hiver.hive_service.ttypes.HiveServerException as e:
        logging.error("Failed to run hive query " + name + ":" + e.message)
        return

    # Rest of the rows are data
    for row in client.fetchAll():
        output.append(row.split("\t"))

    memcache_address = "%s:%d" % (options['memcache-host'],
                                  options['memcache-port'])
    push_to_memcache(name, json.dumps(output), memcache_address)
Beispiel #3
0
def check_setup():
    client = hiver.connect(HIVE_THRIFT_SERVER_HOST, HIVE_THRIFT_SERVER_PORT)
    client.execute('SHOW DATABASES')
    print('All is fine, go ahead and enjoy!')