Python get_table_location Examples

Programming Language: Python

Namespace/Package Name: hive_mysql_connector

Method/Function: get_table_location

Examples at hotexamples.com: 4

Python get_table_location - 4 examples found. These are the top rated real world Python examples of hive_mysql_connector.get_table_location extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: report_generator.py Project: nathanleiby/analytics

def wait_for_data(wait_for_config, options):
    """Wait for data before kicking off hive jobs"""
    # Step 1 - read meta data.
    hive_mysql_connector.configure(options.hive_masternode,
        options.ssh_keyfile)

    if options.hive_init:
        hive_mysql_connector.run_hive_init()
    # Step 2 - wait for all the data partitions are available
    boto_util.initialize_creds_from_file()
    s3conn = boto.connect_s3()
    s3bucket = s3conn.get_bucket('ka-mapreduce')
    max_wait = datetime.timedelta(hours=options.max_wait)
    start = datetime.datetime.now()
    for d in wait_for_config:
        table = d['table_name']
        table_location = hive_mysql_connector.get_table_location(table)
        for p in d['partitions']:
            partition_location = table_location + '/' + p
            #TODO(yunfang): abstract the following to wait_for_partition
            #               for boto_util
            while True:
                if partition_available(s3bucket, partition_location):
                    g_logger.info("%s is available" % (partition_location))
                    break
                if (datetime.datetime.now() - start) > max_wait:
                    # Wait for a long time already. Can't wait any more
                    g_logger.fatal("Wait for too long. "
                                   "Data is still not available."
                                   "Exiting...")
                    sys.exit(1)
                # Wait for a minute to check again
                g_logger.info("Waiting for %s to be available... " % (
                              partition_location))
                time.sleep(60)

Example #2

Show file

File: report_generator.py Project: bopopescu/analytics-1

def wait_for_data(wait_for_config, options):
    """Wait for data before kicking off hive jobs"""
    # Step 1 - read meta data.
    hive_mysql_connector.configure(options.hive_masternode,
        options.ssh_keyfile)

    if options.hive_init:
        hive_mysql_connector.run_hive_init()
    # Step 2 - wait for all the data partitions are available
    boto_util.initialize_creds_from_file()
    s3conn = boto.connect_s3()
    s3bucket = s3conn.get_bucket('ka-mapreduce')
    max_wait = datetime.timedelta(hours=options.max_wait)
    start = datetime.datetime.now()
    for d in wait_for_config:
        table = d['table_name']
        table_location = hive_mysql_connector.get_table_location(table)
        for p in d['partitions']:
            partition_location = table_location + '/' + p
            #TODO(yunfang): abstract the following to wait_for_partition
            #               for boto_util
            while True:
                if partition_available(s3bucket, partition_location):
                    g_logger.info("%s is available" % (partition_location))
                    break
                if (datetime.datetime.now() - start) > max_wait:
                    # Wait for a long time already. Can't wait any more
                    g_logger.fatal("Wait for too long. "
                                   "Data is still not available."
                                   "Exiting...")
                    sys.exit(1)
                # Wait for a minute to check again
                g_logger.info("Waiting for %s to be available... " % (
                              partition_location))
                time.sleep(60)

Example #3

Show file

File: report_importer.py Project: prantik/analytics

if __name__ == '__main__':
    start_dt = datetime.datetime.now()

    options, args = parse_command_line_args()

    # Step 1 - read meta data.
    hive_masternode = args[0]
    hive_mysql_connector.configure(hive_masternode, options.ssh_keyfile)
        
    if options.hive_init:
        hive_mysql_connector.run_hive_init()

    table_name = args[1]
    print "Fetching table info..."
    table_location = hive_mysql_connector.get_table_location(table_name)

    if not table_location:
        raise Exception("Can't read info about %s in Hive master %s" %
                        (hive_masternode, table_name))
    if not table_location.startswith('s3://ka-mapreduce/'):
        raise Exception("Can only import from s3://ka-mapreduce for now")
    column_info = hive_mysql_connector.get_table_columns(table_name)

    target_db = args[2]
    target_collection = args[3]
    partition_cols = args[4:]

    # TODO(benkomalo): prompt/dry-run flags?
    # Step 2 - print locations
    print_locations(table_location, column_info, partition_cols,

Example #4

Show file

File: report_importer.py Project: bopopescu/analytics-1

    start_dt = datetime.datetime.now()

    # Named arguments appear as properties on the options object
    # Unnamed arguments appears as elements in the args array
    options, args = parse_command_line_args()

    # Step 1 - read meta data.
    hive_masternode = args[0]  # Generally, 'ka-hive'
    hive_mysql_connector.configure(hive_masternode, options.ssh_keyfile)

    if options.hive_init:
        hive_mysql_connector.run_hive_init()

    table_name = args[1]  # Generally == target_collection = args[3]
    print "Fetching table info..."
    table_location = hive_mysql_connector.get_table_location(table_name)

    if not table_location:
        raise Exception("Can't read info about %s in Hive master %s" %
                        (hive_masternode, table_name))
    if not table_location.startswith('s3://ka-mapreduce/'):
        raise Exception("Can only import from s3://ka-mapreduce for now")
    column_info = hive_mysql_connector.get_table_columns(table_name)

    target_db = args[2]  # Always 'report'
    target_collection = args[3]  # Generally == table_name = args[1]
    partition_cols = args[4:]  # Something like ['dt=2013-05', user='******']

    # TODO(benkomalo): prompt/dry-run flags?
    # Step 2 - print locations
    print_locations(table_location, column_info, partition_cols, target_db,