def replace_txt_remove_right(target, print_flag, input_data_file=None, output_data_file=None, log_file=None): # Removes all text to right of text string in input_data_file # and writes result to output_data_file. logger = logger_message(__name__, log_file) assert input_data_file is not None, 'An input CSV file with columns of values.' assert output_data_file is not None, 'An output CSV file to write new values.' record = 0 count = 0 with open(input_data_file, newline='') as _in, \ open(output_data_file, 'w', newline='') as _out: logger.info('Remove text right of {}'.format(target)) if print_flag: logger.info('{:>10} {:>20} {:>20}'.format('Record', 'Old Value', 'New Value')) output = csv.writer(_out) reader = csv.reader(_in) for line in reader: modified_text = line for i, item in enumerate(line): record = record + 1 modified_text[i] = item if re.search(target, item): count = count + 1 search_object = re.search(target, item) modified_text[i] = item[:search_object.end()] if print_flag: logger.info('{:10.0f} {:>20} {:>20}'.format(float(record), item, modified_text[i])) else: modified_text[i] = item output.writerow(modified_text) logger.info('\n\t Total number ={}'.format(count))
def utm_to_latlong(input_data_file=None, output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL): """Converts UTM coordinates into latitude/longitude. assumes rows are easting, northing, zone number, either 'N' for northern hemisphere or 'S' for southern hemisphere """ logger = logger_message(__name__, log_file, log_level) # Check required input and output data file names were given. assert input_data_file is not None, 'An input CSV file with columns of values.' assert output_data_file is not None, 'An output CSV file to write new values.' _in = open(input_data_file, 'r') try: _out = open(output_data_file, 'w') try: data = csv.reader(_in) output = csv.writer(_out) for row_ind, row in enumerate(data): east = float(row[0]) north = float(row[1]) zone = int(row[2]) latlong = utm.to_latlon(east, north, zone, northern=('N' == row[3])) logger.info('Changed row {} from: {} to: {}'.format(row_ind, (row[0], row[1]), latlong)) output.writerow(latlong) finally: _out.close() finally: _in.close()
def make_kml_point(input_data_file=None, output_data_file=None, log_file=None): # Makes a kml point file. logger = logger_message(__name__, log_file) assert input_data_file is not None, 'An input CSV file with columns of values.' assert output_data_file is not None, 'A kml point file.' with open(input_data_file, newline='') as _in, \ open(output_data_file, 'w', newline='') as _out: logger.info('Make kml point file: {}'.format(output_data_file)) output = csv.writer(_out) reader = csv.reader(_in) original_values = [] # transfer input values to local array points = 0 for i, line in enumerate(reader): points = points + 1 original_values.append([]) for j, item in enumerate(line): original_values[i].append(item) logger.info('\tNumber points: {}'.format(points)) # make kml point file kml = simplekml.Kml() for i, line in enumerate(original_values): pnt = kml.newpoint(name=line[0], coords=[(line[1], line[2])]) kml.save(output_data_file)
def chk_num_columns(print_flag, input_data_file=None, output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL): # Checks that all rows have the same number of columns. # assumes first row has correct number of columns. logger = logger_message(__name__, log_file, log_level) assert input_data_file is not None, 'An input CSV file with columns of values.' assert output_data_file is not None, 'An output CSV file to write new values.' record = 0 count = 0 with open(input_data_file, newline='') as _in, \ open(output_data_file, 'w', newline='') as _out: logger.info('Checking the number of columns') reader = csv.reader(_in) for i, line in enumerate(reader): record = record + 1 if i == 0: ref_num_column = 0 for j, item in enumerate(line): ref_num_column = ref_num_column + 1 logger.info('\tCorrect number of columns: {:10.0f}'.format(float (ref_num_column))) if print_flag: logger.info('{:>10} {:>20}'.format('Record', 'Num Columns')) num_column = 0 for j, item in enumerate(line): num_column = num_column + 1 if float(num_column) != float(ref_num_column): count = count + 1 if print_flag: logger.info('{:10.0f} {:>20} '.format(float(record), num_column)) logger.info('\tTotal number rows with incorrect number columns={}'.format(count))
def calc_dec_deg_to_deg_min_sec(missing_value, input_data_file=None, output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL): # Convert degrees minutes seconds to decimal degrees logger = logger_message(__name__, log_file, log_level) assert input_data_file is not None, 'An input CSV file with columns of values.' assert output_data_file is not None, 'An output CSV file to write new values.' with open(input_data_file, newline='') as _in, \ open(output_data_file, 'w', newline='') as _out: logger.info('Convert degrees minutes seconds to decimal degrees') output = csv.writer(_out) reader = csv.reader(_in) for line in reader: new_line = array('f') if (float(line[0]) != float(missing_value)): decimal_degree = math.fabs(float(line[0])) sign = float(line[0])/math.fabs(float(line[0])) degree = math.trunc(decimal_degree) minute = (decimal_degree-degree)*60. second = (minute-math.trunc(minute))*60. minute = math.trunc(minute) degree = sign*degree else: degree = float(missing_value) minute = float(missing_value) second = float(missing_value) new_line.append(degree) new_line.append(minute) new_line.append(second) # output.writerow(new_line) # output.writerow(['{:.0f},{:.0f},{:.4f}'.format(new_line)]) output.writerow(['{:.0f}'.format(x) for x in new_line])
def replace_num_out_range_equal(startval, endval, constant, print_flag, input_data_file=None, output_data_file=None, log_file=None): # Replaces values outside startval and endval with constant in input_data_file # and writes result to output_data_file. logger = logger_message(__name__, log_file) assert input_data_file is not None, 'An input CSV file with columns of values.' assert output_data_file is not None, 'An output CSV file to write new values.' record = 0 count = 0 with open(input_data_file, newline='') as _in, \ open(output_data_file, 'w', newline='') as _out: logger.info('Replacing <={} or >={} with {}'.format(startval, endval, constant)) if print_flag: logger.info('{:>10} {:>20} {:>20}'.format('Record', 'Old Value', 'New Value')) output = csv.writer(_out) reader = csv.reader(_in) for line in reader: new_line = array('f') for item in line: record = record + 1 if (float(item) <= float(startval)) or (float(item) >= float(endval)): if print_flag: logger.info('{:10.0f} {:>20} {:>20}'.format(float(record), item, constant)) value = float(constant) count = count + 1 else: value = float(item) new_line.append(value) output.writerow(['{:.2f}'.format(x) for x in new_line]) logger.info('\n\t Total number ={}'.format(count))
def replace_txt_remove_left(target, print_flag, input_data_file=None, output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL): # Removes all text to left of text string in input_data_file # and writes result to output_data_file. logger = logger_message(__name__, log_file, log_level) assert input_data_file is not None, 'An input CSV file with columns of values.' assert output_data_file is not None, 'An output CSV file to write new values.' record = 0 count = 0 with open(input_data_file, newline='') as _in, \ open(output_data_file, 'w', newline='') as _out: logger.info('Remove text left of {}'.format(target)) if print_flag: logger.info('{:>10} {:>20} {:>20}'.format('Record', 'Old Value', 'New Value')) output = csv.writer(_out) reader = csv.reader(_in) for line in reader: modified_text = line for i, item in enumerate(line): record = record + 1 modified_text[i] = item if re.search(target, item): count = count+1 search_object = re.search(target, item) modified_text[i] = item[search_object.start():] if print_flag: logger.info('{:10.0f} {:>20} {:>20}'.format(float(record), item, modified_text[i])) else: modified_text[i] = item output.writerow(modified_text) logger.info('\n\t Total number ={}'.format(count))
def replace_txt_empty(replace, print_flag, input_data_file=None, output_data_file=None, log_file=None): # Replaces empty array location (empty string) with text string logger = logger_message(__name__, log_file) assert input_data_file is not None, 'An input CSV file with columns of values.' assert output_data_file is not None, 'An output CSV file to write new values.' record = 0 count = 0 with open(input_data_file, newline='') as _in, \ open(output_data_file, 'w', newline='') as _out: logger.info('Replacing empty string with {}'.format(replace)) if print_flag: logger.info('{:>10} {:>20}'.format('Record', 'New Value')) output = csv.writer(_out) reader = csv.reader(_in) test='' for line in reader: modified_text = line for i, item in enumerate(line): record = record + 1 logger.info('{}'.format(item)) if (item == test): if print_flag: logger.info('{:10.0f} {:>20}'.format(float(record),replace)) count = count + 1 modified_text[i] = replace else: modified_text[i] = item output.writerow(modified_text) logger.info('\n\t Total number ={}'.format(count))
def chk_nans(print_flag, input_data_file=None, output_data_file=None, log_file=None): # checks if numerical value is not a number, but rather text. logger = logger_message(__name__, log_file) assert input_data_file is not None, 'An input CSV file with columns of values.' assert output_data_file is not None, 'An output CSV file to write new values.' record = 0 count = 0 with open(input_data_file, newline='') as _in, \ open(output_data_file, 'w', newline='') as _out: logger.info('Find values that are not numbers') if print_flag: logger.info('{:>10}{:>20}'.format('Record', 'Value')) reader = csv.reader(_in) for line in reader: for item in line: record = record + 1 flag = is_float(item) if not flag: count = count + 1 if print_flag: logger.info('{:10.0f}{:>20}'.format( float(record), item)) logger.info('\tTotal number ={}'.format(count))
def replace_num_out_range_equal(startval, endval, constant, print_flag, input_data_file=None, output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL): # Replaces values outside startval and endval with constant in input_data_file # and writes result to output_data_file. logger = logger_message(__name__, log_file, log_level) assert input_data_file is not None, 'An input CSV file with columns of values.' assert output_data_file is not None, 'An output CSV file to write new values.' record = 0 count = 0 with open(input_data_file, newline='') as _in, \ open(output_data_file, 'w', newline='') as _out: logger.info('Replacing <={} or >={} with {}'.format(startval, endval, constant)) if print_flag: logger.info('{:>10} {:>20} {:>20}'.format('Record', 'Old Value', 'New Value')) output = csv.writer(_out) reader = csv.reader(_in) for line in reader: new_line = array('f') for item in line: record = record + 1 if (float(item) <= float(startval)) or (float(item) >= float(endval)): if print_flag: logger.info('{:10.0f} {:>20} {:>20}'.format(float(record), item, constant)) value = float(constant) count = count + 1 else: value = float(item) new_line.append(value) output.writerow(['{:.2f}'.format(x) for x in new_line]) logger.info('\n\t Total number ={}'.format(count))
def make_kml_point(input_data_file=None, output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL): # Makes a kml point file. logger = logger_message(__name__, log_file, log_level) assert input_data_file is not None, 'An input CSV file with columns of values.' assert output_data_file is not None, 'A kml point file.' with open(input_data_file, newline='') as _in, \ open(output_data_file, 'w', newline='') as _out: logger.info('Make kml point file: {}'.format(output_data_file)) output = csv.writer(_out) reader = csv.reader(_in) original_values = [] # transfer input values to local array points = 0 for i, line in enumerate(reader): points = points + 1 original_values.append([]) for j, item in enumerate(line): original_values[i].append(item) logger.info('\tNumber points: {}'.format(points)) # make kml point file kml = simplekml.Kml() for i, line in enumerate(original_values): pnt = kml.newpoint(name=line[0], coords=[(line[1], line[2])]) kml.save(output_data_file)
def math_multiply_constant(constant, missing_value, input_data_file=None, output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL): # multiplies all values in input_data_file by a constant and writes result to # output_data_file. logger = logger_message(__name__, log_file, log_level) assert input_data_file is not None, 'An input CSV file with columns of values.' assert output_data_file is not None, 'An output CSV file to write new values.' NaN_toggle = True NaN_count = 0 record = 0 with open(input_data_file, newline='') as _in, \ open(output_data_file, 'w', newline='') as _out: logger.info('Multiplying column by {}'.format(constant)) output = csv.writer(_out) reader = csv.reader(_in) for line in reader: new_line = array('f') for item in line: record = record + 1 if is_number(item): if float(item) != float(missing_value): value = float(item) * float(constant) else: value = float(missing_value) new_line.append(value) else: NaN_count = NaN_count + 1 if NaN_toggle: # print the legend only once logger.info(' Records with non-number entry types:'\ '\n{:>15} {:>20}'.format('Record', 'Value')) NaN_toggle = False logger.info('{:15.0f} {:>20}'.format(float(record), item)) new_line.append(missing_value) output.writerow(['{:.2f}'.format(x) for x in new_line]) logger.info(' Total number of non-number entries: {}'.format(NaN_count))
def calc_copy_col(input_data_file=None, output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL): # output = in_column_A + in_column_B logger = logger_message(__name__, log_file, log_level) assert input_data_file is not None, 'An input CSV file with columns of values.' assert output_data_file is not None, 'An output CSV file to write new values.' with open(input_data_file, newline='') as _in, \ open(output_data_file, 'w', newline='') as _out: logger.info('Copy input column to output column') output = csv.writer(_out) reader = csv.reader(_in) for line in reader: output.writerow(line)
def cond_if_greater(criteria, constant, print_flag, input_data_file=None, output_data_file=None, log_file=None): # If column A > criteria in input_data_file column B = constant in output_data_file. logger = logger_message(__name__, log_file) assert input_data_file is not None, 'An input CSV file with columns of values.' assert output_data_file is not None, 'An output CSV file to write new values.' record = 0 with open(input_data_file, newline='') as _in, \ open(output_data_file, 'w', newline='') as _out: logger.info('If column A > {} set column B = {}'.format( criteria, constant)) if print_flag: logger.info('{:>10}{:>20}{:>20}{:>20}'.format( 'Record', 'Col A Value', 'Old Col B Value', 'New Col B Value')) output = csv.writer(_out) reader = csv.reader(_in) original_values = [] # transfer input values to local array for i, line in enumerate(reader): original_values.append([]) for j, item in enumerate(line): original_values[i].append(item) # perform conditional check record = 0 count = 0 for i, line in enumerate(original_values): new_line = [] record = record + 1 col_a_value = line[0] for j, item in enumerate(line): old_col_b_value = item if (float(col_a_value) > float(criteria)): count = count + 1 new_line.append(col_a_value) new_line.append(constant) if print_flag: logger.info('{:10.0f}{:>20}{:>20}{:>20}'.format( float(record), col_a_value, old_col_b_value, constant)) else: for j, item in enumerate(line): new_line.append(item) output.writerow(['{}'.format(x) for x in new_line]) logger.info('\t Total number ={}'.format(count))
def chk_print_num_less_equal(criteria, input_data_file=None, output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL): # Prints values = criteria in input_data_file. logger = logger_message(__name__, log_file, log_level) assert input_data_file is not None, 'An input CSV file with columns of values.' count = 0 record = 0 with open(input_data_file, newline='') as _in: logger.info('Print values <= {}'.format(criteria)) logger.info('{:>10} {:>20}'.format('Record', 'Value')) reader = csv.reader(_in) for line in reader: for item in line: record = record + 1 if float(item) <= float(criteria): count = count + 1 logger.info('{:10.0f} {:>20}'.format(float(record), item)) logger.info('\n\t Total number <={}: {:10.0f}'.format(criteria, float(count)))
def chk_print_num_less(criteria, input_data_file=None, output_data_file=None, log_file=None): # Prints values = criteria in input_data_file. logger = logger_message(__name__, log_file) assert input_data_file is not None, 'An input CSV file with columns of values.' count = 0 record = 0 with open(input_data_file, newline='') as _in: logger.info('Print values < {}'.format(criteria)) logger.info('{:>10} {:>20}'.format('Record', 'Value')) reader = csv.reader(_in) for line in reader: for item in line: record = record + 1 if float(item) < float(criteria): count = count + 1 logger.info('{:10.0f} {:>20}'.format(float(record), item)) logger.info('\n\t Total number < {}: {:10.0f}'.format(criteria, float(count)))
def replace_txt_fill(fill, input_data_file=None, output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL): # Fills input_data_file with text string and writes result to output_data_file. logger = logger_message(__name__, log_file, log_level) assert input_data_file is not None, 'An input CSV file with columns of values.' assert output_data_file is not None, 'An output CSV file to write new values.' record = 0 with open(input_data_file, newline='') as _in, \ open(output_data_file, 'w', newline='') as _out: logger.info('Filling with {}'.format(fill)) output = csv.writer(_out) reader = csv.reader(_in) for line in reader: modified_text = line for i, item in enumerate(line): record = record + 1 modified_text[i] = fill output.writerow(modified_text) logger.info('\n\t Total number ={}'.format(record))
def calc_subtract_col(missing_value, input_data_file=None, output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL): # output = in_column_A + in_column_B logger = logger_message(__name__, log_file, log_level) assert input_data_file is not None, 'An input CSV file with columns of values.' assert output_data_file is not None, 'An output CSV file to write new values.' with open(input_data_file, newline='') as _in, \ open(output_data_file, 'w', newline='') as _out: logger.info('Subtract two columns (out = column_a +column_b)') output = csv.writer(_out) reader = csv.reader(_in) for line in reader: new_line = array('f') if (float(line[0]) != float(missing_value)) and (float(line[1]) != float(missing_value)): value = float(line[0]) - float(line[1]) else: value = float(missing_value) new_line.append(value) output.writerow(['{:.10f}'.format(x) for x in new_line])
def cond_if_contains(criteria, constant, print_flag, input_data_file=None, output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL): # If column A contains criteria, column B = constant. logger = logger_message(__name__, log_file, log_level) assert input_data_file is not None, 'An input CSV file with columns of values.' assert output_data_file is not None, 'An output CSV file to write new values.' record = 0 with open(input_data_file, newline='') as _in, \ open(output_data_file, 'w', newline='') as _out: logger.info('If column A contains {} set column B = {}'.format(criteria, constant)) if print_flag: logger.info('{:>10}{:>20}{:>20}{:>20}'.format('Record', 'Col A Value', 'Old Col B Value', 'New Col B Value')) output = csv.writer(_out) reader = csv.reader(_in) original_values = [] # transfer input values to local array for i, line in enumerate(reader): original_values.append([]) for j, item in enumerate(line): original_values[i].append(item) # perform conditional check record = 0 count = 0 for i, line in enumerate(original_values): new_line = [] record = record + 1 col_a_value = line[0] for j, item in enumerate(line): old_col_b_value = item if re.search(str(criteria), str(col_a_value)): count = count + 1 new_line.append(col_a_value) new_line.append(constant) if print_flag: logger.info('{:10.0f}{:>20}{:>20}{:>20}'.format(float(record), col_a_value, old_col_b_value, constant)) else: for j, item in enumerate(line): new_line.append(item) output.writerow(['{}'.format(x) for x in new_line]) logger.info('\t Total number ={}'.format(count))
def sort_by_columns(column_list, input_data_file=None, output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL): """ Takes a list of columns to sort by in ascending order. :param input_data_file: CSV file to sort :param output_data_file: sorted CSV file :param column_list: list of tuples (index, type) describing sort columns """ logger = logger_message(__name__, log_file, log_level) logger.info('Sorting input file by columns:') if isinstance(column_list, str): column_list = tuple_list(column_list) shifted_list = [] for index, ind_type in column_list: index = index - 1 new_tuple = (index, ind_type) logger.info('\t' + str(new_tuple)) shifted_list.append(new_tuple) sorted_writer = csv.writer(open(output_data_file, 'w'), quotechar="'", quoting=csv.QUOTE_NONNUMERIC, lineterminator='\n') header_row = None sorted_data = [] with open(input_data_file, 'r') as csvfile: unsorted_reader = csv.reader(csvfile, delimiter=',', quotechar="'") csv_data = [] ind = 0 for row in unsorted_reader: row = [cast_data_value(col_val.strip()) for col_val in row] if ind > 0: typed_row = create_typed_row(row, shifted_list, logger) csv_data.append(typed_row) else: header_row = row ind += 1 sorted_data = csv_data for index, type in reversed(shifted_list): sorted_data = sorted(sorted_data, key=lambda sort_by: sort_by[index]) sorted_writer.writerow(header_row) for sorted_row in sorted_data: if date_time_index is not None: row_list = list(sorted_row) row_list[date_time_index] = row_list[date_time_index].strftime(gtnp_date_time_format) sorted_row = tuple(row_list) sorted_writer.writerow(sorted_row)
def calc_copy_col_mult_const(constant, missing_value, input_data_file=None, output_data_file=None, log_file=None): # out = in * constant logger = logger_message(__name__, log_file) assert input_data_file is not None, 'An input CSV file with columns of values.' assert output_data_file is not None, 'An output CSV file to write new values.' with open(input_data_file, newline='') as _in, \ open(output_data_file, 'w', newline='') as _out: logger.info('Copy column and multiply by constant (out = in * constant)') output = csv.writer(_out) reader = csv.reader(_in) for line in reader: new_line = array('f') for item in line: if (float(item) != float(missing_value)): value = float(item) * float(constant) else: value = float(missing_value) new_line.append(value) output.writerow(['{:.8f}'.format(x) for x in new_line])
def chk_count_valid_records_num(missing_value, input_data_file=None, output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL): # Counts number of valid records in input_data_file. logger = logger_message(__name__, log_file, log_level) assert input_data_file is not None, 'An input CSV file with columns of values.' count = 0 record = 0 with open(input_data_file, newline='') as _in: logger.info('Print valid values (not {})'.format(missing_value)) logger.info('{:>10} {:>10} {:>10}'.format('Total', 'Valid', 'Percent')) reader = csv.reader(_in) for line in reader: for item in line: record = record + 1 if float(item) != float(missing_value): count = count + 1 valid_fraction = float(count)/float(record)*100. logger.info('{:10.0f} {:10.0f} {:10.3f}'.format(float(record), float(count), float(valid_fraction)))
def chk_count_valid_records_num(missing_value, input_data_file=None, output_data_file=None, log_file=None): # Counts number of valid records in input_data_file. logger = logger_message(__name__, log_file) assert input_data_file is not None, 'An input CSV file with columns of values.' count = 0 record = 0 with open(input_data_file, newline='') as _in: logger.info('Print valid values (not {})'.format(missing_value)) logger.info('{:>10} {:>10} {:>10}'.format('Total', 'Valid', 'Percent')) reader = csv.reader(_in) for line in reader: for item in line: record = record + 1 if float(item) != float(missing_value): count = count + 1 valid_fraction = float(count)/float(record)*100. logger.info('{:10.0f} {:10.0f} {:10.3f}'.format(float(record), float(count), float(valid_fraction)))
def chk_print_num_in_range(startval, endval, input_data_file=None, output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL): # Prints values > startval and values < endval in input_data_file. logger = logger_message(__name__, log_file, log_level) assert input_data_file is not None, 'An input CSV file with columns of values.' count = 0 record = 0 with open(input_data_file, newline='') as _in: logger.info('Print values > {} and < {}'.format(startval, endval)) logger.info('{:>10} {:>20}'.format('Record', 'Value')) reader = csv.reader(_in) for line in reader: for item in line: record = record + 1 if (float(item) > float(startval)) and (float(item) < float(endval)): count = count + 1 logger.info('{:10.0f} {:>20}'.format(float(record), item)) logger.info('\n\t Total number > {} and < {}: {:10.0f}'.format(startval, endval, float(count)))
def chk_statistics(missing_value, input_data_file=None, output_data_file=None, log_file=None): # Calculates statistics for each input column in input_data_file. logger = logger_message(__name__, log_file) assert input_data_file is not None, 'An input CSV file with columns of values.' with open(input_data_file, newline='') as _in: logger.info('Count distinct values') reader = csv.reader(_in) original_values = [] # transfer input values to local array record = 0 for i, line in enumerate(reader): record = record + 1 original_values.append([]) column = 0 for j, item in enumerate(line): column = column + 1 original_values[i].append(item) logger.info('\tTotal number ={}'.format(column)) # extract valid values each column and calculate statistics logger.info('{:>10}{:>10}{:>10}{:>10}{:>10}{:>10}{:>10}'.format( 'Col', 'nrec', 'Mean', 'Stdev', 'Median', 'Min', 'Max')) for i in range(column): Column_valid = [] count = 0 for j, line in enumerate(original_values): if float(line[i]) != float(missing_value): count = count + 1 Column_valid.append(float(line[i])) mean = statistics.mean(Column_valid) stdev = statistics.stdev(Column_valid) median = statistics.median(Column_valid) minimum = min(Column_valid) maximum = max(Column_valid) logger.info( '{:>10.0f}{:>10.0f}{:>10.3f}{:>10.3f}{:>10.3f}{:>10.3f}{:>10.3f}' .format(i + 1, count, mean, stdev, median, minimum, maximum))
def chk_statistics(missing_value, input_data_file=None, output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL): # Calculates statistics for each input column in input_data_file. logger = logger_message(__name__, log_file, log_level) assert input_data_file is not None, 'An input CSV file with columns of values.' with open(input_data_file, newline='') as _in: logger.info('Calculate Statistics') reader = csv.reader(_in) original_values = [] # transfer input values to local array record = 0 for i, line in enumerate(reader): record = record + 1 original_values.append([]) column = 0 for j, item in enumerate(line): column = column+1 original_values[i].append(item) logger.info('\tTotal number ={}'.format(column)) # extract valid values each column and calculate statistics logger.info('{:>4}{:>6}{:>10}{:>10}{:>10}{:>10}{:>10}{:>10}{:>10}' .format('Col', 'nrec', 'Mean', 'Stdev', 'Min', '1st_qrtl', 'Median', '3rd_qrtl', 'Max')) for i in range(column): Column_valid = [] count = 0 for j, line in enumerate(original_values): if float(line[i]) != float(missing_value): count = count + 1 Column_valid.append(float(line[i])) Column_valid.sort() middle = int(count/2.) mean = statistics.mean(Column_valid) stdev = statistics.stdev(Column_valid) minimum = min(Column_valid) percentile_25 = statistics.median(Column_valid[:middle]) median = statistics.median(Column_valid) percentile_75 = statistics.median(Column_valid[middle:]) maximum = max(Column_valid) logger.info('{:>4.0f}{:>6.0f}{:>10.3f}{:>10.3f}{:>10.3f}{:>10.3f}{:>10.3f}{:>10.3f}{:>10.3f}' .format(i+1, count, mean, stdev, minimum, percentile_25, median, percentile_75, maximum))
def calc_deg_min_sec_to_dec_deg(missing_value, input_data_file=None, output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL): # Convert degrees minutes seconds to decimal degrees logger = logger_message(__name__, log_file, log_level) assert input_data_file is not None, 'An input CSV file with columns of values.' assert output_data_file is not None, 'An output CSV file to write new values.' with open(input_data_file, newline='') as _in, \ open(output_data_file, 'w', newline='') as _out: logger.info('Convert degrees minutes seconds to decimal degrees') output = csv.writer(_out) reader = csv.reader(_in) for line in reader: new_line = array('f') if (float(line[0]) != float(missing_value)) and (float(line[1]) != float(missing_value)) and (float(line[2]) != float(missing_value)): sign = float(line[0])/math.fabs(float(line[0])) value = math.fabs(float(line[0])) + float(line[1])/60. + float(line[2])/3600. value = value*sign else: value = float(missing_value) new_line.append(value) output.writerow(['{:.10f}'.format(x) for x in new_line])
def replace_txt_fill(fill, input_data_file=None, output_data_file=None, log_file=None): # Fills input_data_file with text string and writes result to output_data_file. logger = logger_message(__name__, log_file) assert input_data_file is not None, 'An input CSV file with columns of values.' assert output_data_file is not None, 'An output CSV file to write new values.' record = 0 with open(input_data_file, newline='') as _in, \ open(output_data_file, 'w', newline='') as _out: logger.info('Filling with {}'.format(fill)) output = csv.writer(_out) reader = csv.reader(_in) for line in reader: modified_text = line for i, item in enumerate(line): record = record + 1 modified_text[i] = fill output.writerow(modified_text) logger.info('\n\t Total number ={}'.format(record))
def calc_vwc_gpr(missing_value, print_flag, input_data_file=None, output_data_file=None, log_file=None): # calculate VWC given TWTT and ALT # TWTT is two way travel time from Ground penetrating radar (ns) # VWC is volumetric water content: ratio water volume to total soil volume (m3/m3) # ALT is active layer thickness: maximum thaw depth at end of summer (cm) # velocity = wave velocity (cm/ns) logger = logger_message(__name__, log_file) assert input_data_file is not None, 'An input CSV file with columns of values.' assert output_data_file is not None, 'An output CSV file to write new values.' with open(input_data_file, newline='') as _in, \ open(output_data_file, 'w', newline='') as _out: logger.info('Calculate VWC given TWTT and ALT') output = csv.writer(_out) reader = csv.reader(_in) if (print_flag): logger.info('\t{:>15} {:>15} {:>15} {:>15} {:>15}'.format('twtt (ns)','alt (cm)','velocity (cm/ns)', 'dielectric (-)', 'vwc (-)')) for line in reader: new_line = [] twtt = float(line[0]) alt = float(line[1]) if (twtt != float(missing_value)) and \ (alt != float(missing_value)): velocity = float(line[1]) * 2. /float(line[0]) dielectric = (30./velocity)**2. vwc = -2.5 + 2.508 * dielectric - 3.634e-2 * dielectric * dielectric vwc = vwc + 2.394e-4 * dielectric*dielectric*dielectric vwc = vwc /100. vwc = min(vwc, 1.) else: velocity = float(missing_value) dielectric = float(missing_value) vwc = float(missing_value) if (print_flag): logger.info('\t{:>15.7f} {:>15.7f} {:>15.7f} {:>15.7f} {:>15.7f}'.format(twtt,alt,velocity, dielectric, vwc)) new_line.append(velocity) new_line.append(dielectric) new_line.append(vwc) output.writerow(new_line)
def chk_print_num_in_range_equal(startval, endval, input_data_file=None, output_data_file=None, log_file=None): # Prints values >= startval and values <= endval in input_data_file. logger = logger_message(__name__, log_file) assert input_data_file is not None, 'An input CSV file with columns of values.' count = 0 record = 0 with open(input_data_file, newline='') as _in: logger.info('Print values >= {} and <= {}'.format(startval, endval)) logger.info('{:>10} {:>20}'.format('Record', 'Value')) reader = csv.reader(_in) for line in reader: for item in line: record = record + 1 if (float(item) >= float(startval)) and (float(item) <= float(endval)): count = count + 1 logger.info('{:10.0f} {:>20}'.format(float(record), item)) logger.info('\n\t Total number >= {} and <= {}: {:10.0f}'.format( startval, endval, float(count)))
def math_add_constant(constant, missing_value, input_data_file=None, output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL): # Adds constant to all values in input_data_file and writes the result to # output_data_file. logger = logger_message(__name__, log_file, log_level) assert input_data_file is not None, 'An input CSV file with columns of values to be added to is required.' assert output_data_file is not None, 'An output CSV file to write new values to is required.' NaN_toggle = True NaN_count = 0 record = 0 with open(input_data_file, newline='') as _in, \ open(output_data_file, 'w', newline='') as _out: logger.info('Adding {} to the column'.format(constant)) output = csv.writer(_out) reader = csv.reader(_in, quotechar="'", quoting=csv.QUOTE_ALL) decimal_constant = cast_float_to_decimal(constant) decimal_missing = cast_float_to_decimal(missing_value) for line in reader: new_line = [] for item in line: record = record + 1 if is_number(item): decimal_item = cast_float_to_decimal(item) if decimal_item != decimal_missing: value = decimal_item + decimal_constant else: value = decimal_missing new_line.append(str(value)) else: NaN_count = NaN_count + 1 if NaN_toggle: # print the legend only once, but only if needed logger.info(' Records with non-number entry types:'\ '\n{:>15} {:>20}'.format('Record', 'Value')) NaN_toggle = False logger.info('{:15.0f} {:>20}'.format(float(record), item)) new_line.append(missing_value) output.writerow(new_line) logger.info(' Total number of non-number entries: {}'.format(NaN_count))
def chk_num_columns(print_flag, input_data_file=None, output_data_file=None, log_file=None): # Checks that all rows have the same number of columns. # assumes first row has correct number of columns. logger = logger_message(__name__, log_file) assert input_data_file is not None, 'An input CSV file with columns of values.' assert output_data_file is not None, 'An output CSV file to write new values.' record = 0 count = 0 with open(input_data_file, newline='') as _in, \ open(output_data_file, 'w', newline='') as _out: logger.info('Checking the number of columns') reader = csv.reader(_in) for i, line in enumerate(reader): record = record + 1 if i == 0: ref_num_column = 0 for j, item in enumerate(line): ref_num_column = ref_num_column + 1 logger.info('\tCorrect number of columns: {:10.0f}'.format( float(ref_num_column))) if print_flag: logger.info('{:>10} {:>20}'.format('Record', 'Num Columns')) num_column = 0 for j, item in enumerate(line): num_column = num_column + 1 if float(num_column) != float(ref_num_column): count = count + 1 if print_flag: logger.info('{:10.0f} {:>20} '.format( float(record), num_column)) logger.info( '\tTotal number rows with incorrect number columns={}'.format( count))
def chk_count_distinct(input_data_file=None, output_data_file=None, log_file=None): # Counts number of distinct values in input_data_file. logger = logger_message(__name__, log_file) assert input_data_file is not None, 'An input CSV file with columns of values.' record = 0 with open(input_data_file, newline='') as _in: logger.info('Count distinct values') reader = csv.reader(_in) temp_distinct_values = set() original_values = [] distinct_values = [] # identify distinct values, transfer input values to array for line in reader: for item in line: record = record + 1 temp_distinct_values.add(item) original_values.append(item) logger.info('\tTotal number ={}'.format(record)) # set array randomly changes order with each execution, so transfer to array and sort for value in temp_distinct_values: distinct_values.append(value) distinct_values.sort() # count number records for each value logger.info('{:>5} {:>40} {:>10} {:>10}'.format( 'Num', 'Distinct Value', 'number', 'Percent')) count = 0 for value in distinct_values: count = count + 1 number = original_values.count(value) num_fraction = float(number) / float(record) * 100 logger.info('{:5.0f} {:>40} {:10.0f} {:10.3f}'.format( count, value, number, num_fraction))
def replace_txt_exclude(target, replace, print_flag, input_data_file=None, output_data_file=None, log_file=None): # Replaces fields containing a text string in input_data_file # and writes result to output_data_file. logger = logger_message(__name__, log_file) assert input_data_file is not None, 'An input CSV file with columns of values.' assert output_data_file is not None, 'An output CSV file to write new values.' record = 0 count = 0 with open(input_data_file, newline='') as _in, \ open(output_data_file, 'w', newline='') as _out: logger.info('Replacing fields containing {} with {}'.format( target, replace)) if print_flag: logger.info('{:>10} {:>20} {:>20}'.format('Record', 'Old Value', 'New Value')) output = csv.writer(_out) reader = csv.reader(_in) for line in reader: modified_text = line for i, item in enumerate(line): record = record + 1 modified_text[i] = item if re.search(target, item): if print_flag: logger.info('{:10.0f} {:>20} {:>20}'.format( float(record), item, replace)) count = count + 1 modified_text[i] = replace else: modified_text[i] = item output.writerow(modified_text) logger.info('\n\t Total number ={}'.format(count))
def chk_count_distinct(print_flag, input_data_file=None, output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL): # Counts number of distinct values in input_data_file. logger = logger_message(__name__, log_file, log_level) assert input_data_file is not None, 'An input CSV file with columns of values.' record = 0 with open(input_data_file, newline='') as _in: logger.info('Count distinct values') reader = csv.reader(_in) temp_distinct_values = set() original_values = [] distinct_values = [] # identify distinct values, transfer input values to array for line in reader: for item in line: record = record + 1 temp_distinct_values.add(item) original_values.append(item) logger.info('\tTotal number ={}'.format(record)) # set array randomly changes order with each execution, so transfer to array and sort for value in temp_distinct_values: distinct_values.append(value) distinct_values.sort() # count number records for each value if print_flag: logger.info('{:>5} {:>40} {:>10} {:>10}'.format('Num', 'Distinct Value', 'number', 'Percent')) count = 0 for value in distinct_values: count = count+1 number = original_values.count(value) num_fraction = float(number)/float(record)*100 logger.info('{:5.0f} {:>40} {:10.0f} {:10.3f}'.format(count, value, number, num_fraction))
def chk_count_distinct_double(input_data_file=None, output_data_file=None, log_file=None): # Counts number of distinct values in a col A # then corresponing distinct values in col B in input_data_file. logger = logger_message(__name__, log_file) assert input_data_file is not None, 'An input CSV file with columns of values.' with open(input_data_file, newline='') as _in: logger.info('Count distinct values') reader = csv.reader(_in) temp_col_a_distinct = set() col_a_distinct = [] column_a_values = [] column_b_values = [] original_values = [] # transfer input values to local array record = 0 for i, line in enumerate(reader): record = record + 1 original_values.append([]) for item in line: original_values[i].append(item) logger.info('\tTotal number ={}'.format(record)) # identify column A distinct values, transfer original values to col A/B arrays for i, line in enumerate(original_values): temp_col_a_distinct.add(original_values[i][0]) column_a_values.append(original_values[i][0]) column_b_values.append(original_values[i][1]) # transfer col A distinct values from temporary to permanent array and sort for value in temp_col_a_distinct: col_a_distinct.append(value) col_a_distinct.sort() # count number records in column B for each distinct col A value logger.info('{:>5} {:>40} {:>40} {:>10} {:>10}'.format( 'Num', 'Col A Value', 'Col B Value', 'number', 'Percent')) count = 0 for value in col_a_distinct: # subset column B array and identify distinct col B values col_b_subset = [] temp_col_b_distinct = set() col_b_distinct = [] for i, subset in enumerate(column_a_values): if subset == value: col_b_subset.append(column_b_values[i]) temp_col_b_distinct.add(column_b_values[i]) # transfer col b distinct values from temporary to permanent array and sort for subset in temp_col_b_distinct: col_b_distinct.append(subset) col_b_distinct.sort() # write results to log file for subset in col_b_distinct: count = count + 1 number = col_b_subset.count(subset) num_fraction = float(number) / float(record) * 100 logger.info('{:5.0f} {:>40} {:>40} {:10.0f} {:10.3f}'.format( count, value, subset, number, num_fraction))
def merge_2col_match(in_col1, in_col2, merge_col1, merge_col2, map_file, merge_file, input_data_file=None, output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL): # Merges two files by matching values in two columns logger = logger_message(__name__, log_file, log_level) assert input_data_file is not None, 'An input CSV file with columns of values.' assert output_data_file is not None, 'An output CSV file to write new values.' logger.info('Merge Files') # save some messages to log file logger.info('\tMerge file: {}'.format(merge_file)) logger.info('\tMap file: {}'.format(map_file)) # read in all variable mapping information for inserting merge_file into input_data_file col_number = [] in_name = [] operation = [] out_name = [] in_index = [] out_index = [] units = [] description = [] num_map_record = 0 with open(map_file, newline='') as _in: # logger.info('\tRead merge Variable Mapping File') reader = csv.reader(_in) firstline = True for line in reader: # logger.info('{}'.format(line)) if (firstline): # skips first line firstline = False continue num_map_record = num_map_record +1 col_number.append(line[0]) in_name.append(line[1]) operation.append(line[2]) out_name.append(line[3]) in_index.append(line[4]) out_index.append(line[5]) units.append(line[6]) description.append(line[7]) # logger.info('\tNumber map records: {} '.format(num_map_record)) # Figure out which columns to copy num_copies = 0 copy_from=[] copy_to=[] for i in range(num_map_record): if (operation[i] == 'copy'): copy_from.append(int(in_index[i])-1) copy_to.append(int(out_index[i])-1) # logger.info('\toperation: {} {} '.format(copy_from[num_copies],copy_to[num_copies])) num_copies = num_copies + 1 logger.info('\tnum_copies: {} '.format(num_copies)) # adjust column numbers to zero start in_col1 = in_col1 - 1 in_col2 = in_col2 - 1 merge_col1 = merge_col1 - 1 merge_col2 = merge_col2 - 1 # read input data to local array with open(input_data_file, newline='') as _in: reader = csv.reader(_in) input_data = [] num_in_record = 0 for i, line in enumerate(reader): num_in_record = num_in_record + 1 input_data.append(line) # logger.info('\tNumber in records: {}'.format(num_in_record)) # read merge data to local array unmatched = [] with open(merge_file, newline='') as _in: reader = csv.reader(_in) merge_data = [] num_merge_record = 0 firstline = True for i, line in enumerate(reader): if (firstline): firstline = False continue num_merge_record = num_merge_record + 1 unmatched.append('true') merge_data.append(line) # logger.info('\tNumber merge file records: {}'.format(num_merge_record)) # merge the two files output_data = [] num_records_merged = 0 for i, line_in in enumerate(input_data): for j, line_merge in enumerate(merge_data): if (line_merge[merge_col1] == line_in[in_col1]): if (line_merge[merge_col2] == line_in[in_col2]): num_records_merged = num_records_merged + 1 unmatched[j] = 'false' for k in range(num_copies): line_in[copy_to[k]]=line_merge[copy_from[k]] # logger.info('\tcopy: {} {} '.format(line_merge[copy_from[k]],line_in[copy_to[k]])) output_data.append(line_in) logger.info('\tRecords merged input file: {}'.format(num_records_merged)) # write output data with open(output_data_file, 'w', newline='') as _out: output = csv.writer(_out) for line in output_data: output.writerow(line) # count unmatched records in merge file num_unmatched = 0 num_matched = 0 for i, line_merge in enumerate(merge_data): if (unmatched[i]=='true'): num_unmatched = num_unmatched + 1 else: num_matched = num_matched + 1 # print unmatched records if (num_unmatched != 0): logger.info('\tUnmatched records in merge file: {}'.format(num_unmatched)) logger.info('\t{:>5} {:>30} {:>30}'.format('Rec','col1','col2')) for i, line_merge in enumerate(merge_data): if (unmatched[i]=='true'): logger.info('\t{:>5} {:>30} {:>30}'.format(i + 1,line_merge[merge_col1],line_merge[merge_col2]))
def chk_count_distinct_double(print_flag, input_data_file=None, output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL): # Counts number of distinct values in a col A # then corresponing distinct values in col B in input_data_file. logger = logger_message(__name__, log_file, log_level) assert input_data_file is not None, 'An input CSV file with columns of values.' with open(input_data_file, newline='') as _in: logger.info('Count distinct values') reader = csv.reader(_in) temp_col_a_distinct = set() col_a_distinct = [] column_a_values = [] column_b_values = [] original_values = [] # transfer input values to local array record = 0 for i, line in enumerate(reader): record = record + 1 original_values.append([]) for item in line: original_values[i].append(item) logger.info('\tTotal number ={}'.format(record)) # identify column A distinct values, transfer original values to col A/B arrays for i, line in enumerate(original_values): temp_col_a_distinct.add(original_values[i][0]) column_a_values.append(original_values[i][0]) column_b_values.append(original_values[i][1]) # transfer col A distinct values from temporary to permanent array and sort for value in temp_col_a_distinct: col_a_distinct.append(value) col_a_distinct.sort() # count number records in column B for each distinct col A value logger.info('{:>5} {:>40} {:>40} {:>10} {:>10}'.format('Num', 'Col A Value', 'Col B Value', 'number', 'Percent')) count = 0 for value in col_a_distinct: # subset column B array and identify distinct col B values col_b_subset = [] temp_col_b_distinct = set() col_b_distinct = [] for i, subset in enumerate(column_a_values): if subset == value: col_b_subset.append(column_b_values[i]) temp_col_b_distinct.add(column_b_values[i]) # transfer col b distinct values from temporary to permanent array and sort for subset in temp_col_b_distinct: col_b_distinct.append(subset) col_b_distinct.sort() # write results to log file for subset in col_b_distinct: count = count+1 number = col_b_subset.count(subset) num_fraction = float(number)/float(record)*100 logger.info('{:5.0f} {:>40} {:>40} {:10.0f} {:10.3f}'.format(count, value, subset, number, num_fraction))
def make_kml_wall(missing_value, multiplier, input_data_file=None, output_data_file=None, log_file=None): # Makes a kml wal file with height of wall = variable logger = logger_message(__name__, log_file) assert input_data_file is not None, 'An input CSV file with columns of values.' assert output_data_file is not None, 'A kml point file.' with open(input_data_file, newline='') as _in: logger.info('Make kml wall file: {}'.format(output_data_file)) reader = csv.reader(_in) # transfer input values to local array original_values = [] points = 0 for i, line in enumerate(reader): points = points + 1 original_values.append([]) for j, item in enumerate(line): original_values[i].append(item) # line[0] = line name or id (treated as string) # line[1] = longitude decimal degree # line[2] = latitude decimal degree # line[3] = variabe to serve as height of wall (z-variable) # count distinct lines temp_distinct_values = set() distinct_values = [] for i, line in enumerate(original_values): temp_distinct_values.add(line[0]) for value in temp_distinct_values: distinct_values.append(value) distinct_values.sort() # extract coordinates kml = simplekml.Kml() zero = 0.0 for value in distinct_values: coordinates = [] count = 0 for i, line in enumerate(original_values): if(value == line[0]): add_pt = True if(float(line[1]) == float(missing_value)): add_pt = False if(float(line[2]) == float(missing_value)): add_pt = False if(add_pt): coordinates.append([]) coordinates[count].append(line[1]) coordinates[count].append(line[2]) if(float(line[3]) == float(missing_value)): coordinates[count].append(zero) else: height = float(line[3]) * float(multiplier) coordinates[count].append(height) count = count + 1 ls = kml.newlinestring(name='{}'.format(value)) ls.coords = coordinates ls.extrude = 1 ls.altitudemode = simplekml.AltitudeMode.relativetoground ls.style.linestyle.width = 5 ls.style.linestyle.color = simplekml.Color.blue kml.save(output_data_file)