def read_csv_table(filehandle, options): """ Read csv table from the input file handle. """ from csv_table_reader import CsvTableReader nr_errors = 0 # Create a csv table reader object. csv_table_reader = CsvTableReader() # The csv files we'll be reading in will have this format: # ------------------------ # version/options # tablename # column names (csv) # column types (csv) # column descriptions (csv) # data (csv) # ... # ------------------------ # Ignore the 2nd and 5th lines. Names in 3rd, types in 4th. The "version" in the first line is now the options list. (table, error) = csv_table_reader.read_table(filehandle, None, skip_linenos={4}, options_lineno=0, types_lineno=3, tablename_lineno=1, colnames_lineno=2) if table == None: print "Failure reading csv file: {}".format(error) return None return table
def read_and_send_tables(requestor, options): """ Given the specified indexes, read the csv files and publish them to a qasino server. """ nr_tables = 0 nr_errors = 0 table_info = {} # Make a table whitelist lookup dict. table_whitelist = {} use_table_whitelist = False # Was there one more tables given on the command line? if options.tables and len(options.tables) > 0: use_table_whitelist = True for x in options.tables: table_whitelist[x] = 1 # Was there a table list file given on the command line? if options.table_list != None and len(options.table_list) > 0: tables_from_file = get_table_list_file_tables(options.table_list) if tables_from_file and len(tables_from_file) > 0: use_table_whitelist = True for x in tables_from_file: table_whitelist[x] = 1 # Create a csv table reader object. csv_table_reader = CsvTableReader() # This will be the list of indexes to process. indexes = [] # Was there one or more index files given on the command line? if options.indexes: indexes = indexes + options.indexes # Was there an index list file given on the command line? if options.index_list: indexes_from_file = get_index_list_file_indexes(options.index_list) if indexes_from_file: indexes = indexes + indexes_from_file # Now process all the indexes. for index_file in indexes: index_dir = os.path.dirname(index_file) if index_dir == "": index_dir = "." csv_files = get_csv_files_from_index(index_file) if csv_files is None or len(csv_files) <= 0: logging.info("Warning: no csv files found in index '%s'", index_file) continue for csv_file_item in csv_files: (filename, tablename) = csv_file_item # Is this a tablename we can process? if use_table_whitelist: if tablename not in table_whitelist: continue filepath = '/'.join( [index_dir, filename] ) logging.info("Reading file '%s'.", filepath) # The csv files we'll be reading in will have this format: # ------------------------ # version/options # tablename # column names (csv) # column types (csv) # column descriptions (csv) # data (csv) # ... # ------------------------ table_info[tablename] = {} table_info[tablename]["filepath"] = filepath table_info[tablename]["nr_rows"] = -1 table_info[tablename]["nr_errors"] = 0 table_info[tablename]["error_message"] = '' table_info[tablename]["read_epoch"] = time.time() table_info[tablename]["mtime"] = get_mtime(filepath) table_info[tablename]["read_time_s"] = -1 try: filehandle = open(filepath, 'r') except Exception as e: nr_errors += 1 table_info[tablename]["nr_errors"] = 1 table_info[tablename]["error_message"] = str(e) logging.info("Failure opening csv file '%s': %s", filepath, str(e)) continue # Ignore the 2nd and 5th lines. Names in 3rd, types in 4th. The "version" in the first line is now the options list. (table, error) = csv_table_reader.read_table(filehandle, tablename, skip_linenos={1, 4}, options_lineno=0, types_lineno=3, colnames_lineno=2) filehandle.close() table_info[tablename]["read_time_s"] = time.time() - table_info[tablename]["read_epoch"] if table == None: nr_errors += 1 table_info[tablename]["nr_errors"] = 1 table_info[tablename]["error_message"] = error logging.info("Failure reading csv file '%s': %s", filepath, error) continue nr_tables += 1 table_info[tablename]["nr_rows"] = table.get_nr_rows() properties = [] if table.get_property('static'): properties.append(' static') if table.get_property('update'): properties.append(' update') if table.get_property('persist'): properties.append(' persist') logging.info("Sending{} table '{}' to '{}:{}' ({} rows).".format( ''.join(properties), tablename, options.hostname, options.port, table_info[tablename]["nr_rows"] ) ) error = requestor.send_table(table) if error is not None: logging.info("Error sending table '{}': {}".format(tablename, error)) # END for each csv file # END for each index # Publish an info table publish_info_table(requestor, nr_tables, nr_errors) # Publish a table list table. publish_tables_table(requestor, table_info)
def read_and_send_tables(requestor, options): """ Given the specified indexes, read the csv files and publish them to a qasino server. """ nr_tables = 0 nr_errors = 0 table_info = {} # Make a table whitelist lookup dict. table_whitelist = {} use_table_whitelist = False # Was there one more tables given on the command line? if options.tables and len(options.tables) > 0: use_table_whitelist = True for x in options.tables: table_whitelist[x] = 1 # Was there a table list file given on the command line? if options.table_list != None and len(options.table_list) > 0: tables_from_file = get_table_list_file_tables(options.table_list) if tables_from_file and len(tables_from_file) > 0: use_table_whitelist = True for x in tables_from_file: table_whitelist[x] = 1 # Create a csv table reader object. csv_table_reader = CsvTableReader() # This will be the list of indexes to process. indexes = [] # Was there one or more index files given on the command line? if options.indexes: indexes = indexes + options.indexes # Was there an index list file given on the command line? if options.index_list: indexes_from_file = get_index_list_file_indexes(options.index_list) if indexes_from_file: indexes = indexes + indexes_from_file # Now process all the indexes. for index_file in indexes: index_dir = os.path.dirname(index_file) if index_dir == "": index_dir = "." csv_files = get_csv_files_from_index(index_file) if csv_files is None or len(csv_files) <= 0: logging.info("Warning: no csv files found in index '%s'", index_file) continue for csv_file_item in csv_files: (filename, tablename) = csv_file_item # Is this a tablename we can process? if use_table_whitelist: if tablename not in table_whitelist: continue filepath = '/'.join([index_dir, filename]) logging.info("Reading file '%s'.", filepath) # The csv files we'll be reading in will have this format: # ------------------------ # version/options # tablename # column names (csv) # column types (csv) # column descriptions (csv) # data (csv) # ... # ------------------------ table_info[tablename] = {} table_info[tablename]["filepath"] = filepath table_info[tablename]["nr_rows"] = -1 table_info[tablename]["nr_errors"] = 0 table_info[tablename]["error_message"] = '' table_info[tablename]["read_epoch"] = time.time() table_info[tablename]["mtime"] = get_mtime(filepath) table_info[tablename]["read_time_s"] = -1 try: filehandle = open(filepath, 'r') except Exception as e: nr_errors += 1 table_info[tablename]["nr_errors"] = 1 table_info[tablename]["error_message"] = str(e) logging.info("Failure opening csv file '%s': %s", filepath, str(e)) continue # Ignore the 2nd and 5th lines. Names in 3rd, types in 4th. The "version" in the first line is now the options list. (table, error) = csv_table_reader.read_table(filehandle, tablename, skip_linenos={1, 4}, options_lineno=0, types_lineno=3, colnames_lineno=2) filehandle.close() table_info[tablename]["read_time_s"] = time.time( ) - table_info[tablename]["read_epoch"] if table == None: nr_errors += 1 table_info[tablename]["nr_errors"] = 1 table_info[tablename]["error_message"] = error logging.info("Failure reading csv file '%s': %s", filepath, error) continue nr_tables += 1 table_info[tablename]["nr_rows"] = table.get_nr_rows() properties = [] if table.get_property('static'): properties.append(' static') if table.get_property('update'): properties.append(' update') if table.get_property('persist'): properties.append(' persist') logging.info("Sending{} table '{}' to '{}:{}' ({} rows).".format( ''.join(properties), tablename, options.hostname, options.port, table_info[tablename]["nr_rows"])) error = requestor.send_table(table) if error is not None: logging.info("Error sending table '{}': {}".format( tablename, error)) # END for each csv file # END for each index # Publish an info table publish_info_table(requestor, nr_tables, nr_errors) # Publish a table list table. publish_tables_table(requestor, table_info)