Example #1
0
def read_csv_table(filehandle, options):
    """
    Read csv table from the input file handle.
    """

    from csv_table_reader import CsvTableReader

    nr_errors = 0

    # Create a csv table reader object.

    csv_table_reader = CsvTableReader()

    # The csv files we'll be reading in will have this format:
    # ------------------------
    # version/options
    # tablename
    # column names (csv)
    # column types (csv)
    # column descriptions (csv)
    # data (csv)
    # ...
    # ------------------------

    # Ignore the 2nd and 5th lines.  Names in 3rd, types in 4th.  The "version" in the first line is now the options list.
    (table, error) = csv_table_reader.read_table(filehandle,
                                                 None,
                                                 skip_linenos={4},
                                                 options_lineno=0,
                                                 types_lineno=3,
                                                 tablename_lineno=1,
                                                 colnames_lineno=2)

    if table == None:
        print "Failure reading csv file: {}".format(error)
        return None

    return table
Example #2
0
def read_csv_table(filehandle, options):
    """
    Read csv table from the input file handle.
    """

    from csv_table_reader import CsvTableReader

    nr_errors = 0

    # Create a csv table reader object.

    csv_table_reader = CsvTableReader()

    # The csv files we'll be reading in will have this format:
    # ------------------------
    # version/options
    # tablename
    # column names (csv)
    # column types (csv)
    # column descriptions (csv)
    # data (csv)
    # ...
    # ------------------------

    # Ignore the 2nd and 5th lines.  Names in 3rd, types in 4th.  The "version" in the first line is now the options list.
    (table, error) = csv_table_reader.read_table(filehandle, None,
                                                 skip_linenos={4},
                                                 options_lineno=0,
                                                 types_lineno=3,
                                                 tablename_lineno=1,
                                                 colnames_lineno=2)

    if table == None:
        print "Failure reading csv file: {}".format(error)
        return None

    return table
Example #3
0
def read_and_send_tables(requestor, options):
    """
    Given the specified indexes, read the csv files and publish them
    to a qasino server.
    """

    nr_tables = 0
    nr_errors = 0
    table_info = {}

    # Make a table whitelist lookup dict.
    table_whitelist = {}
    use_table_whitelist = False

    # Was there one more tables given on the command line?

    if options.tables and len(options.tables) > 0:
        use_table_whitelist = True
        for x in options.tables:
            table_whitelist[x] = 1

    # Was there a table list file given on the command line?

    if options.table_list != None and len(options.table_list) > 0:

        tables_from_file = get_table_list_file_tables(options.table_list)

        if tables_from_file and len(tables_from_file) > 0:
            use_table_whitelist = True
            for x in tables_from_file:
                table_whitelist[x] = 1


    # Create a csv table reader object.

    csv_table_reader = CsvTableReader()

    # This will be the list of indexes to process.
    indexes = []

    # Was there one or more index files given on the command line?

    if options.indexes: 
        indexes = indexes + options.indexes

    # Was there an index list file given on the command line?

    if options.index_list:

        indexes_from_file = get_index_list_file_indexes(options.index_list)

        if indexes_from_file:
            indexes = indexes + indexes_from_file


    # Now process all the indexes.

    for index_file in indexes:

        index_dir = os.path.dirname(index_file)

        if index_dir == "":
            index_dir = "."

        csv_files = get_csv_files_from_index(index_file)

        if csv_files is None or len(csv_files) <= 0:

            logging.info("Warning: no csv files found in index '%s'", index_file)
            continue

        for csv_file_item in csv_files:

            (filename, tablename) = csv_file_item

            # Is this a tablename we can process?

            if use_table_whitelist:
                if tablename not in table_whitelist:
                    continue

            filepath = '/'.join( [index_dir, filename] )

            logging.info("Reading file '%s'.", filepath)

            # The csv files we'll be reading in will have this format:
            # ------------------------
            # version/options
            # tablename
            # column names (csv)
            # column types (csv)
            # column descriptions (csv)
            # data (csv)
            # ...
            # ------------------------

            table_info[tablename] = {}
            table_info[tablename]["filepath"] = filepath
            table_info[tablename]["nr_rows"] = -1
            table_info[tablename]["nr_errors"] = 0
            table_info[tablename]["error_message"] = ''
            table_info[tablename]["read_epoch"] = time.time()
            table_info[tablename]["mtime"] = get_mtime(filepath)
            table_info[tablename]["read_time_s"] = -1

            try:
                filehandle = open(filepath, 'r')
            except Exception as e:
                nr_errors += 1
                table_info[tablename]["nr_errors"] = 1
                table_info[tablename]["error_message"] = str(e)
                logging.info("Failure opening csv file '%s': %s", filepath, str(e))
                continue

            # Ignore the 2nd and 5th lines.  Names in 3rd, types in 4th.  The "version" in the first line is now the options list.
            (table, error) = csv_table_reader.read_table(filehandle, tablename,
                                                         skip_linenos={1, 4},
                                                         options_lineno=0,
                                                         types_lineno=3,
                                                         colnames_lineno=2)

            filehandle.close()

            table_info[tablename]["read_time_s"] = time.time() - table_info[tablename]["read_epoch"]

            if table == None:
                nr_errors += 1
                table_info[tablename]["nr_errors"] = 1
                table_info[tablename]["error_message"] = error
                logging.info("Failure reading csv file '%s': %s", filepath, error)
                continue

            nr_tables += 1

            table_info[tablename]["nr_rows"] = table.get_nr_rows()

            properties = []
            if table.get_property('static'): properties.append(' static')
            if table.get_property('update'): properties.append(' update')
            if table.get_property('persist'): properties.append(' persist')

            logging.info("Sending{} table '{}' to '{}:{}' ({} rows).".format( ''.join(properties), tablename, options.hostname, options.port, table_info[tablename]["nr_rows"] ) )

            error = requestor.send_table(table)

            if error is not None:
                logging.info("Error sending table '{}': {}".format(tablename, error))
        
        # END for each csv file

    # END for each index

    # Publish an info table

    publish_info_table(requestor, nr_tables, nr_errors)

    # Publish a table list table.

    publish_tables_table(requestor, table_info)
Example #4
0
def read_and_send_tables(requestor, options):
    """
    Given the specified indexes, read the csv files and publish them
    to a qasino server.
    """

    nr_tables = 0
    nr_errors = 0
    table_info = {}

    # Make a table whitelist lookup dict.
    table_whitelist = {}
    use_table_whitelist = False

    # Was there one more tables given on the command line?

    if options.tables and len(options.tables) > 0:
        use_table_whitelist = True
        for x in options.tables:
            table_whitelist[x] = 1

    # Was there a table list file given on the command line?

    if options.table_list != None and len(options.table_list) > 0:

        tables_from_file = get_table_list_file_tables(options.table_list)

        if tables_from_file and len(tables_from_file) > 0:
            use_table_whitelist = True
            for x in tables_from_file:
                table_whitelist[x] = 1

    # Create a csv table reader object.

    csv_table_reader = CsvTableReader()

    # This will be the list of indexes to process.
    indexes = []

    # Was there one or more index files given on the command line?

    if options.indexes:
        indexes = indexes + options.indexes

    # Was there an index list file given on the command line?

    if options.index_list:

        indexes_from_file = get_index_list_file_indexes(options.index_list)

        if indexes_from_file:
            indexes = indexes + indexes_from_file

    # Now process all the indexes.

    for index_file in indexes:

        index_dir = os.path.dirname(index_file)

        if index_dir == "":
            index_dir = "."

        csv_files = get_csv_files_from_index(index_file)

        if csv_files is None or len(csv_files) <= 0:

            logging.info("Warning: no csv files found in index '%s'",
                         index_file)
            continue

        for csv_file_item in csv_files:

            (filename, tablename) = csv_file_item

            # Is this a tablename we can process?

            if use_table_whitelist:
                if tablename not in table_whitelist:
                    continue

            filepath = '/'.join([index_dir, filename])

            logging.info("Reading file '%s'.", filepath)

            # The csv files we'll be reading in will have this format:
            # ------------------------
            # version/options
            # tablename
            # column names (csv)
            # column types (csv)
            # column descriptions (csv)
            # data (csv)
            # ...
            # ------------------------

            table_info[tablename] = {}
            table_info[tablename]["filepath"] = filepath
            table_info[tablename]["nr_rows"] = -1
            table_info[tablename]["nr_errors"] = 0
            table_info[tablename]["error_message"] = ''
            table_info[tablename]["read_epoch"] = time.time()
            table_info[tablename]["mtime"] = get_mtime(filepath)
            table_info[tablename]["read_time_s"] = -1

            try:
                filehandle = open(filepath, 'r')
            except Exception as e:
                nr_errors += 1
                table_info[tablename]["nr_errors"] = 1
                table_info[tablename]["error_message"] = str(e)
                logging.info("Failure opening csv file '%s': %s", filepath,
                             str(e))
                continue

            # Ignore the 2nd and 5th lines.  Names in 3rd, types in 4th.  The "version" in the first line is now the options list.
            (table, error) = csv_table_reader.read_table(filehandle,
                                                         tablename,
                                                         skip_linenos={1, 4},
                                                         options_lineno=0,
                                                         types_lineno=3,
                                                         colnames_lineno=2)

            filehandle.close()

            table_info[tablename]["read_time_s"] = time.time(
            ) - table_info[tablename]["read_epoch"]

            if table == None:
                nr_errors += 1
                table_info[tablename]["nr_errors"] = 1
                table_info[tablename]["error_message"] = error
                logging.info("Failure reading csv file '%s': %s", filepath,
                             error)
                continue

            nr_tables += 1

            table_info[tablename]["nr_rows"] = table.get_nr_rows()

            properties = []
            if table.get_property('static'): properties.append(' static')
            if table.get_property('update'): properties.append(' update')
            if table.get_property('persist'): properties.append(' persist')

            logging.info("Sending{} table '{}' to '{}:{}' ({} rows).".format(
                ''.join(properties), tablename, options.hostname, options.port,
                table_info[tablename]["nr_rows"]))

            error = requestor.send_table(table)

            if error is not None:
                logging.info("Error sending table '{}': {}".format(
                    tablename, error))

        # END for each csv file

    # END for each index

    # Publish an info table

    publish_info_table(requestor, nr_tables, nr_errors)

    # Publish a table list table.

    publish_tables_table(requestor, table_info)