예제 #1
0
def replace_txt_remove_right(target, print_flag, input_data_file=None,
                             output_data_file=None, log_file=None):
    # Removes all text to right of text string in input_data_file
    # and writes result to output_data_file.
    logger = logger_message(__name__, log_file)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    assert output_data_file is not None, 'An output CSV file to write new values.'
    record = 0
    count = 0
    with open(input_data_file, newline='') as _in, \
            open(output_data_file, 'w', newline='') as _out:
            logger.info('Remove text right of {}'.format(target))
            if print_flag:
                logger.info('{:>10} {:>20} {:>20}'.format('Record', 'Old Value', 'New Value'))
            output = csv.writer(_out)
            reader = csv.reader(_in)
            for line in reader:
                modified_text = line
                for i, item in enumerate(line):
                    record = record + 1
                    modified_text[i] = item
                    if re.search(target, item):
                        count = count + 1
                        search_object = re.search(target, item)
                        modified_text[i] = item[:search_object.end()]
                        if print_flag:
                            logger.info('{:10.0f} {:>20} {:>20}'.format(float(record),
                                                                        item, modified_text[i]))
                    else:
                        modified_text[i] = item
                output.writerow(modified_text)
            logger.info('\n\t Total number ={}'.format(count))
예제 #2
0
def utm_to_latlong(input_data_file=None, output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL):
    """Converts UTM coordinates into latitude/longitude.
    assumes rows are easting, northing, zone number, either 'N' for northern
    hemisphere or 'S' for southern hemisphere
    """
    logger = logger_message(__name__, log_file, log_level)

    # Check required input and output data file names were given.
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    assert output_data_file is not None, 'An output CSV file to write new values.'

    _in = open(input_data_file, 'r')
    try:
        _out = open(output_data_file, 'w')
        try:
            data = csv.reader(_in)
            output = csv.writer(_out)
            for row_ind, row in enumerate(data):
                east = float(row[0])
                north = float(row[1])
                zone = int(row[2])

                latlong = utm.to_latlon(east, north, zone, northern=('N' == row[3]))
                logger.info('Changed row {} from: {}  to: {}'.format(row_ind,
                                                                     (row[0], row[1]), latlong))

                output.writerow(latlong)
        finally:
            _out.close()
    finally:
        _in.close()
예제 #3
0
def make_kml_point(input_data_file=None, output_data_file=None, log_file=None):
    # Makes a kml point file.
    logger = logger_message(__name__, log_file)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    assert output_data_file is not None, 'A kml point file.'
    with open(input_data_file, newline='') as _in, \
            open(output_data_file, 'w', newline='') as _out:
        logger.info('Make kml point file: {}'.format(output_data_file))
        output = csv.writer(_out)
        reader = csv.reader(_in)
        original_values = []

        # transfer input values to local array
        points = 0
        for i, line in enumerate(reader):
            points = points + 1
            original_values.append([])
            for j, item in enumerate(line):
                original_values[i].append(item)
        logger.info('\tNumber points: {}'.format(points))


# make kml point file
    kml = simplekml.Kml()
    for i, line in enumerate(original_values):
        pnt = kml.newpoint(name=line[0], coords=[(line[1], line[2])])
    kml.save(output_data_file)
예제 #4
0
def chk_num_columns(print_flag, input_data_file=None, output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL):
    # Checks that all rows have the same number of columns.
    # assumes first row has correct number of columns.
    logger = logger_message(__name__, log_file, log_level)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    assert output_data_file is not None, 'An output CSV file to write new values.'
    record = 0
    count = 0
    with open(input_data_file, newline='') as _in, \
            open(output_data_file, 'w', newline='') as _out:
            logger.info('Checking the number of columns')
            reader = csv.reader(_in)
            for i, line in enumerate(reader):
                record = record + 1
                if i == 0:
                    ref_num_column = 0
                    for j, item in enumerate(line):
                        ref_num_column = ref_num_column + 1
                    logger.info('\tCorrect number of columns: {:10.0f}'.format(float
                                                                               (ref_num_column)))
                    if print_flag:
                        logger.info('{:>10} {:>20}'.format('Record', 'Num Columns'))
                num_column = 0
                for j, item in enumerate(line):
                    num_column = num_column + 1
                if float(num_column) != float(ref_num_column):
                    count = count + 1
                    if print_flag:
                        logger.info('{:10.0f} {:>20} '.format(float(record), num_column))
            logger.info('\tTotal number rows with incorrect number columns={}'.format(count))
예제 #5
0
def calc_dec_deg_to_deg_min_sec(missing_value, input_data_file=None,
                                output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL):
    #   Convert degrees minutes seconds to decimal degrees
    logger = logger_message(__name__, log_file, log_level)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    assert output_data_file is not None, 'An output CSV file to write new values.'
    with open(input_data_file, newline='') as _in, \
            open(output_data_file, 'w', newline='') as _out:
            logger.info('Convert degrees minutes seconds to decimal degrees')
            output = csv.writer(_out)
            reader = csv.reader(_in)
            for line in reader:
                new_line = array('f')
                if (float(line[0]) != float(missing_value)):
                    decimal_degree = math.fabs(float(line[0]))
                    sign = float(line[0])/math.fabs(float(line[0]))
                    degree = math.trunc(decimal_degree)
                    minute = (decimal_degree-degree)*60.
                    second = (minute-math.trunc(minute))*60.
                    minute = math.trunc(minute)
                    degree = sign*degree
                else:
                    degree = float(missing_value)
                    minute = float(missing_value)
                    second = float(missing_value)
                new_line.append(degree)
                new_line.append(minute)
                new_line.append(second)
#               output.writerow(new_line)
#               output.writerow(['{:.0f},{:.0f},{:.4f}'.format(new_line)])
                output.writerow(['{:.0f}'.format(x) for x in new_line])
예제 #6
0
def replace_num_out_range_equal(startval, endval, constant, print_flag, input_data_file=None,
                                output_data_file=None, log_file=None):
    # Replaces values outside startval and endval with constant in input_data_file
    # and writes result to output_data_file.
    logger = logger_message(__name__, log_file)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    assert output_data_file is not None, 'An output CSV file to write new values.'
    record = 0
    count = 0
    with open(input_data_file, newline='') as _in, \
            open(output_data_file, 'w', newline='') as _out:
            logger.info('Replacing <={} or >={} with {}'.format(startval, endval, constant))
            if print_flag:
                logger.info('{:>10} {:>20} {:>20}'.format('Record', 'Old Value', 'New Value'))
            output = csv.writer(_out)
            reader = csv.reader(_in)
            for line in reader:
                new_line = array('f')
                for item in line:
                    record = record + 1
                    if (float(item) <= float(startval)) or (float(item) >= float(endval)):
                        if print_flag:
                            logger.info('{:10.0f} {:>20} {:>20}'.format(float(record),
                                                                        item, constant))
                        value = float(constant)
                        count = count + 1
                    else:
                        value = float(item)
                    new_line.append(value)
                output.writerow(['{:.2f}'.format(x) for x in new_line])
            logger.info('\n\t Total number ={}'.format(count))
예제 #7
0
def replace_txt_remove_left(target, print_flag, input_data_file=None,
                            output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL):
    # Removes all text to left of text string in input_data_file
    # and writes result to output_data_file.
    logger = logger_message(__name__, log_file, log_level)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    assert output_data_file is not None, 'An output CSV file to write new values.'
    record = 0
    count = 0
    with open(input_data_file, newline='') as _in, \
            open(output_data_file, 'w', newline='') as _out:
            logger.info('Remove text left of {}'.format(target))
            if print_flag:
                logger.info('{:>10} {:>20} {:>20}'.format('Record', 'Old Value', 'New Value'))
            output = csv.writer(_out)
            reader = csv.reader(_in)
            for line in reader:
                modified_text = line
                for i, item in enumerate(line):
                    record = record + 1
                    modified_text[i] = item
                    if re.search(target, item):
                        count = count+1
                        search_object = re.search(target, item)
                        modified_text[i] = item[search_object.start():]
                        if print_flag:
                            logger.info('{:10.0f} {:>20} {:>20}'.format(float(record),
                                                                        item, modified_text[i]))
                    else:
                        modified_text[i] = item
                output.writerow(modified_text)
            logger.info('\n\t Total number ={}'.format(count))
예제 #8
0
def replace_txt_empty(replace, print_flag, input_data_file=None,
                      output_data_file=None, log_file=None):
# Replaces empty array location (empty string) with text string
    logger = logger_message(__name__, log_file)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    assert output_data_file is not None, 'An output CSV file to write new values.'
    record = 0
    count = 0
    with open(input_data_file, newline='') as _in, \
            open(output_data_file, 'w', newline='') as _out:
            logger.info('Replacing empty string with {}'.format(replace))
            if print_flag:
                logger.info('{:>10} {:>20}'.format('Record',  'New Value'))
            output = csv.writer(_out)
            reader = csv.reader(_in)
            test=''
            for line in reader:
                modified_text = line
                for i, item in enumerate(line):
                    record = record + 1
                    logger.info('{}'.format(item))
                    if (item == test):
                        if print_flag:
                            logger.info('{:10.0f} {:>20}'.format(float(record),replace))
                        count = count + 1
                        modified_text[i] = replace
                    else:
                        modified_text[i] = item
                output.writerow(modified_text)
            logger.info('\n\t Total number ={}'.format(count))
예제 #9
0
파일: chk_nans.py 프로젝트: mnhan32/DIT
def chk_nans(print_flag,
             input_data_file=None,
             output_data_file=None,
             log_file=None):
    # checks if numerical value is not a number, but rather text.
    logger = logger_message(__name__, log_file)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    assert output_data_file is not None, 'An output CSV file to write new values.'
    record = 0
    count = 0
    with open(input_data_file, newline='') as _in, \
            open(output_data_file, 'w', newline='') as _out:
        logger.info('Find values that are not numbers')
        if print_flag:
            logger.info('{:>10}{:>20}'.format('Record', 'Value'))
        reader = csv.reader(_in)
        for line in reader:
            for item in line:
                record = record + 1
                flag = is_float(item)
                if not flag:
                    count = count + 1
                    if print_flag:
                        logger.info('{:10.0f}{:>20}'.format(
                            float(record), item))
        logger.info('\tTotal number ={}'.format(count))
예제 #10
0
def replace_num_out_range_equal(startval, endval, constant, print_flag, input_data_file=None,
                                output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL):
    # Replaces values outside startval and endval with constant in input_data_file
    # and writes result to output_data_file.
    logger = logger_message(__name__, log_file, log_level)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    assert output_data_file is not None, 'An output CSV file to write new values.'
    record = 0
    count = 0
    with open(input_data_file, newline='') as _in, \
            open(output_data_file, 'w', newline='') as _out:
            logger.info('Replacing <={} or >={} with {}'.format(startval, endval, constant))
            if print_flag:
                logger.info('{:>10} {:>20} {:>20}'.format('Record', 'Old Value', 'New Value'))
            output = csv.writer(_out)
            reader = csv.reader(_in)
            for line in reader:
                new_line = array('f')
                for item in line:
                    record = record + 1
                    if (float(item) <= float(startval)) or (float(item) >= float(endval)):
                        if print_flag:
                            logger.info('{:10.0f} {:>20} {:>20}'.format(float(record),
                                                                        item, constant))
                        value = float(constant)
                        count = count + 1
                    else:
                        value = float(item)
                    new_line.append(value)
                output.writerow(['{:.2f}'.format(x) for x in new_line])
            logger.info('\n\t Total number ={}'.format(count))
예제 #11
0
def make_kml_point(input_data_file=None, output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL):
    # Makes a kml point file.
    logger = logger_message(__name__, log_file, log_level)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    assert output_data_file is not None, 'A kml point file.'
    with open(input_data_file, newline='') as _in, \
            open(output_data_file, 'w', newline='') as _out:
        logger.info('Make kml point file: {}'.format(output_data_file))
        output = csv.writer(_out)
        reader = csv.reader(_in)
        original_values = []

# transfer input values to local array
        points = 0
        for i, line in enumerate(reader):
                points = points + 1
                original_values.append([])
                for j, item in enumerate(line):
                    original_values[i].append(item)
        logger.info('\tNumber points: {}'.format(points))

# make kml point file
    kml = simplekml.Kml()
    for i, line in enumerate(original_values):
        pnt = kml.newpoint(name=line[0], coords=[(line[1], line[2])])
    kml.save(output_data_file)
예제 #12
0
def math_multiply_constant(constant, missing_value, input_data_file=None,
                           output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL):
    # multiplies all values in input_data_file by a constant and writes result to
    # output_data_file.
    logger = logger_message(__name__, log_file, log_level)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    assert output_data_file is not None, 'An output CSV file to write new values.'
    NaN_toggle = True
    NaN_count = 0
    record = 0
    with open(input_data_file, newline='') as _in, \
            open(output_data_file, 'w', newline='') as _out:
            logger.info('Multiplying column by {}'.format(constant))
            output = csv.writer(_out)
            reader = csv.reader(_in)
            for line in reader:
                new_line = array('f')
                for item in line:
                    record = record + 1
                    if is_number(item):
                        if float(item) != float(missing_value):
                            value = float(item) * float(constant)
                        else:
                            value = float(missing_value)
                        new_line.append(value)
                    else:
                        NaN_count = NaN_count + 1
                        if NaN_toggle: # print the legend only once
                            logger.info('    Records with non-number entry types:'\
                                        '\n{:>15} {:>20}'.format('Record', 'Value'))
                            NaN_toggle = False
                        logger.info('{:15.0f} {:>20}'.format(float(record), item))
                        new_line.append(missing_value)
                output.writerow(['{:.2f}'.format(x) for x in new_line])
            logger.info('    Total number of non-number entries: {}'.format(NaN_count))
예제 #13
0
def calc_copy_col(input_data_file=None, output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL):
    # output = in_column_A + in_column_B
    logger = logger_message(__name__, log_file, log_level)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    assert output_data_file is not None, 'An output CSV file to write new values.'
    with open(input_data_file, newline='') as _in, \
            open(output_data_file, 'w', newline='') as _out:
            logger.info('Copy input column to output column')
            output = csv.writer(_out)
            reader = csv.reader(_in)
            for line in reader:
                output.writerow(line)
예제 #14
0
def cond_if_greater(criteria,
                    constant,
                    print_flag,
                    input_data_file=None,
                    output_data_file=None,
                    log_file=None):
    # If column A > criteria in input_data_file column B = constant in output_data_file.
    logger = logger_message(__name__, log_file)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    assert output_data_file is not None, 'An output CSV file to write new values.'
    record = 0
    with open(input_data_file, newline='') as _in, \
            open(output_data_file, 'w', newline='') as _out:
        logger.info('If column A > {} set column B = {}'.format(
            criteria, constant))
        if print_flag:
            logger.info('{:>10}{:>20}{:>20}{:>20}'.format(
                'Record', 'Col A Value', 'Old Col B Value', 'New Col B Value'))
        output = csv.writer(_out)
        reader = csv.reader(_in)
        original_values = []

        # transfer input values to local array
        for i, line in enumerate(reader):
            original_values.append([])
            for j, item in enumerate(line):
                original_values[i].append(item)


# perform conditional check
        record = 0
        count = 0
        for i, line in enumerate(original_values):
            new_line = []
            record = record + 1
            col_a_value = line[0]
            for j, item in enumerate(line):
                old_col_b_value = item
            if (float(col_a_value) > float(criteria)):
                count = count + 1
                new_line.append(col_a_value)
                new_line.append(constant)
                if print_flag:
                    logger.info('{:10.0f}{:>20}{:>20}{:>20}'.format(
                        float(record), col_a_value, old_col_b_value, constant))
            else:
                for j, item in enumerate(line):
                    new_line.append(item)
            output.writerow(['{}'.format(x) for x in new_line])
        logger.info('\t Total number ={}'.format(count))
예제 #15
0
def chk_print_num_less_equal(criteria, input_data_file=None, output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL):
    # Prints values = criteria in input_data_file.
    logger = logger_message(__name__, log_file, log_level)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    count = 0
    record = 0
    with open(input_data_file, newline='') as _in:
        logger.info('Print values <= {}'.format(criteria))
        logger.info('{:>10} {:>20}'.format('Record', 'Value'))
        reader = csv.reader(_in)
        for line in reader:
            for item in line:
                record = record + 1
                if float(item) <= float(criteria):
                    count = count + 1
                    logger.info('{:10.0f} {:>20}'.format(float(record), item))
        logger.info('\n\t Total number <={}: {:10.0f}'.format(criteria, float(count)))
예제 #16
0
def chk_print_num_less(criteria, input_data_file=None, output_data_file=None, log_file=None):
    # Prints values = criteria in input_data_file.
    logger = logger_message(__name__, log_file)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    count = 0
    record = 0
    with open(input_data_file, newline='') as _in:
        logger.info('Print values < {}'.format(criteria))
        logger.info('{:>10} {:>20}'.format('Record', 'Value'))
        reader = csv.reader(_in)
        for line in reader:
            for item in line:
                record = record + 1
                if float(item) < float(criteria):
                    count = count + 1
                    logger.info('{:10.0f} {:>20}'.format(float(record), item))
        logger.info('\n\t Total number < {}: {:10.0f}'.format(criteria, float(count)))
예제 #17
0
def replace_txt_fill(fill, input_data_file=None, output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL):
    # Fills input_data_file with text string and writes result to output_data_file.
    logger = logger_message(__name__, log_file, log_level)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    assert output_data_file is not None, 'An output CSV file to write new values.'
    record = 0
    with open(input_data_file, newline='') as _in, \
            open(output_data_file, 'w', newline='') as _out:
            logger.info('Filling with {}'.format(fill))
            output = csv.writer(_out)
            reader = csv.reader(_in)
            for line in reader:
                modified_text = line
                for i, item in enumerate(line):
                    record = record + 1
                    modified_text[i] = fill
                output.writerow(modified_text)
            logger.info('\n\t Total number ={}'.format(record))
예제 #18
0
def calc_subtract_col(missing_value, input_data_file=None, output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL):
    # output = in_column_A + in_column_B
    logger = logger_message(__name__, log_file, log_level)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    assert output_data_file is not None, 'An output CSV file to write new values.'
    with open(input_data_file, newline='') as _in, \
            open(output_data_file, 'w', newline='') as _out:
            logger.info('Subtract two columns (out = column_a +column_b)')
            output = csv.writer(_out)
            reader = csv.reader(_in)
            for line in reader:
                new_line = array('f')
                if (float(line[0]) != float(missing_value)) and (float(line[1]) != float(missing_value)):
                    value = float(line[0]) - float(line[1])
                else:
                    value = float(missing_value)
                new_line.append(value)
                output.writerow(['{:.10f}'.format(x) for x in new_line])
예제 #19
0
def cond_if_contains(criteria, constant, print_flag, input_data_file=None,
                     output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL):
    # If column A contains criteria, column B = constant.
    logger = logger_message(__name__, log_file, log_level)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    assert output_data_file is not None, 'An output CSV file to write new values.'
    record = 0
    with open(input_data_file, newline='') as _in, \
            open(output_data_file, 'w', newline='') as _out:
        logger.info('If column A contains {} set column B = {}'.format(criteria, constant))
        if print_flag:
            logger.info('{:>10}{:>20}{:>20}{:>20}'.format('Record', 'Col A Value',
                                                          'Old Col B Value', 'New Col B Value'))
        output = csv.writer(_out)
        reader = csv.reader(_in)
        original_values = []

# transfer input values to local array
        for i, line in enumerate(reader):
            original_values.append([])
            for j, item in enumerate(line):
                original_values[i].append(item)

# perform conditional check
        record = 0
        count = 0
        for i, line in enumerate(original_values):
            new_line = []
            record = record + 1
            col_a_value = line[0]
            for j, item in enumerate(line):
                old_col_b_value = item
            if re.search(str(criteria), str(col_a_value)):
                count = count + 1
                new_line.append(col_a_value)
                new_line.append(constant)
                if print_flag:
                    logger.info('{:10.0f}{:>20}{:>20}{:>20}'.format(float(record), col_a_value,
                                                                    old_col_b_value, constant))
            else:
                for j, item in enumerate(line):
                    new_line.append(item)
            output.writerow(['{}'.format(x) for x in new_line])
        logger.info('\t Total number ={}'.format(count))
예제 #20
0
def sort_by_columns(column_list, input_data_file=None, output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL):
    """
    Takes a list of columns to sort by in ascending order.
    :param input_data_file: CSV file to sort
    :param output_data_file: sorted CSV file
    :param column_list: list of tuples (index, type) describing sort columns
    """
    logger = logger_message(__name__, log_file, log_level)
    logger.info('Sorting input file by columns:')
    if isinstance(column_list, str):
        column_list = tuple_list(column_list)
    shifted_list = []
    for index, ind_type in column_list:
        index = index - 1
        new_tuple = (index, ind_type)
        logger.info('\t' + str(new_tuple))
        shifted_list.append(new_tuple)
    sorted_writer = csv.writer(open(output_data_file, 'w'), quotechar="'",
                               quoting=csv.QUOTE_NONNUMERIC, lineterminator='\n')
    header_row = None
    sorted_data = []
    with open(input_data_file, 'r') as csvfile:
        unsorted_reader = csv.reader(csvfile, delimiter=',', quotechar="'")
        csv_data = []
        ind = 0
        for row in unsorted_reader:
            row = [cast_data_value(col_val.strip()) for col_val in row]
            if ind > 0:
                typed_row = create_typed_row(row, shifted_list, logger)
                csv_data.append(typed_row)
            else:
                header_row = row
            ind += 1
        sorted_data = csv_data
        for index, type in reversed(shifted_list):
            sorted_data = sorted(sorted_data, key=lambda sort_by: sort_by[index])

    sorted_writer.writerow(header_row)
    for sorted_row in sorted_data:
        if date_time_index is not None:
            row_list = list(sorted_row)
            row_list[date_time_index] = row_list[date_time_index].strftime(gtnp_date_time_format)
            sorted_row = tuple(row_list)
        sorted_writer.writerow(sorted_row)
예제 #21
0
def calc_copy_col_mult_const(constant, missing_value, input_data_file=None, output_data_file=None, log_file=None):
# out = in * constant
    logger = logger_message(__name__, log_file)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    assert output_data_file is not None, 'An output CSV file to write new values.'
    with open(input_data_file, newline='') as _in, \
            open(output_data_file, 'w', newline='') as _out:
            logger.info('Copy column and multiply by constant (out = in * constant)')
            output = csv.writer(_out)
            reader = csv.reader(_in)
            for line in reader:
                new_line = array('f')
                for item in line:
                    if (float(item) != float(missing_value)):
                        value = float(item) * float(constant)
                    else:
                        value = float(missing_value)
                    new_line.append(value)
                output.writerow(['{:.8f}'.format(x) for x in new_line])
예제 #22
0
def chk_count_valid_records_num(missing_value, input_data_file=None,
                                output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL):
    # Counts number of valid records in input_data_file.
    logger = logger_message(__name__, log_file, log_level)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    count = 0
    record = 0
    with open(input_data_file, newline='') as _in:
        logger.info('Print valid values (not {})'.format(missing_value))
        logger.info('{:>10} {:>10} {:>10}'.format('Total', 'Valid', 'Percent'))
        reader = csv.reader(_in)
        for line in reader:
            for item in line:
                record = record + 1
                if float(item) != float(missing_value):
                    count = count + 1
        valid_fraction = float(count)/float(record)*100.
        logger.info('{:10.0f} {:10.0f} {:10.3f}'.format(float(record),
                                                        float(count), float(valid_fraction)))
예제 #23
0
def chk_count_valid_records_num(missing_value, input_data_file=None,
                                output_data_file=None, log_file=None):
    # Counts number of valid records in input_data_file.
    logger = logger_message(__name__, log_file)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    count = 0
    record = 0
    with open(input_data_file, newline='') as _in:
        logger.info('Print valid values (not {})'.format(missing_value))
        logger.info('{:>10} {:>10} {:>10}'.format('Total', 'Valid', 'Percent'))
        reader = csv.reader(_in)
        for line in reader:
            for item in line:
                record = record + 1
                if float(item) != float(missing_value):
                    count = count + 1
        valid_fraction = float(count)/float(record)*100.
        logger.info('{:10.0f} {:10.0f} {:10.3f}'.format(float(record),
                                                        float(count), float(valid_fraction)))
예제 #24
0
def chk_print_num_in_range(startval, endval, input_data_file=None,
                           output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL):
    # Prints values > startval and values < endval in input_data_file.
    logger = logger_message(__name__, log_file, log_level)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    count = 0
    record = 0
    with open(input_data_file, newline='') as _in:
        logger.info('Print values > {} and < {}'.format(startval, endval))
        logger.info('{:>10} {:>20}'.format('Record', 'Value'))
        reader = csv.reader(_in)
        for line in reader:
            for item in line:
                record = record + 1
                if (float(item) > float(startval)) and (float(item) < float(endval)):
                    count = count + 1
                    logger.info('{:10.0f} {:>20}'.format(float(record), item))
        logger.info('\n\t Total number > {} and < {}: {:10.0f}'.format(startval,
                                                                       endval, float(count)))
예제 #25
0
def chk_statistics(missing_value,
                   input_data_file=None,
                   output_data_file=None,
                   log_file=None):
    # Calculates statistics for each input column in input_data_file.
    logger = logger_message(__name__, log_file)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    with open(input_data_file, newline='') as _in:
        logger.info('Count distinct values')
        reader = csv.reader(_in)
        original_values = []

        # transfer input values to local array
        record = 0
        for i, line in enumerate(reader):
            record = record + 1
            original_values.append([])
            column = 0
            for j, item in enumerate(line):
                column = column + 1
                original_values[i].append(item)
        logger.info('\tTotal number ={}'.format(column))

        # extract valid values each column and calculate statistics
        logger.info('{:>10}{:>10}{:>10}{:>10}{:>10}{:>10}{:>10}'.format(
            'Col', 'nrec', 'Mean', 'Stdev', 'Median', 'Min', 'Max'))
        for i in range(column):
            Column_valid = []
            count = 0
            for j, line in enumerate(original_values):
                if float(line[i]) != float(missing_value):
                    count = count + 1
                    Column_valid.append(float(line[i]))
            mean = statistics.mean(Column_valid)
            stdev = statistics.stdev(Column_valid)
            median = statistics.median(Column_valid)
            minimum = min(Column_valid)
            maximum = max(Column_valid)
            logger.info(
                '{:>10.0f}{:>10.0f}{:>10.3f}{:>10.3f}{:>10.3f}{:>10.3f}{:>10.3f}'
                .format(i + 1, count, mean, stdev, median, minimum, maximum))
예제 #26
0
def chk_statistics(missing_value, input_data_file=None, output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL):
    # Calculates statistics for each input column in input_data_file.
    logger = logger_message(__name__, log_file, log_level)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    with open(input_data_file, newline='') as _in:
        logger.info('Calculate Statistics')
        reader = csv.reader(_in)
        original_values = []

# transfer input values to local array
        record = 0
        for i, line in enumerate(reader):
            record = record + 1
            original_values.append([])
            column = 0
            for j, item in enumerate(line):
                column = column+1
                original_values[i].append(item)
        logger.info('\tTotal number ={}'.format(column))

# extract valid values each column and calculate statistics
        logger.info('{:>4}{:>6}{:>10}{:>10}{:>10}{:>10}{:>10}{:>10}{:>10}'
                    .format('Col', 'nrec', 'Mean', 'Stdev', 'Min', '1st_qrtl', 'Median', '3rd_qrtl', 'Max'))
        for i in range(column):
            Column_valid = []
            count = 0
            for j, line in enumerate(original_values):
                if float(line[i]) != float(missing_value):
                    count = count + 1
                    Column_valid.append(float(line[i]))
            Column_valid.sort()
            middle = int(count/2.)
            mean = statistics.mean(Column_valid)
            stdev = statistics.stdev(Column_valid)
            minimum = min(Column_valid)
            percentile_25 = statistics.median(Column_valid[:middle])
            median = statistics.median(Column_valid)
            percentile_75 = statistics.median(Column_valid[middle:])
            maximum = max(Column_valid)
            logger.info('{:>4.0f}{:>6.0f}{:>10.3f}{:>10.3f}{:>10.3f}{:>10.3f}{:>10.3f}{:>10.3f}{:>10.3f}'
                        .format(i+1, count, mean, stdev, minimum, percentile_25, median, percentile_75, maximum))
예제 #27
0
def calc_deg_min_sec_to_dec_deg(missing_value, input_data_file=None,
                                output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL):
    # Convert degrees minutes seconds to decimal degrees
    logger = logger_message(__name__, log_file, log_level)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    assert output_data_file is not None, 'An output CSV file to write new values.'
    with open(input_data_file, newline='') as _in, \
            open(output_data_file, 'w', newline='') as _out:
            logger.info('Convert degrees minutes seconds to decimal degrees')
            output = csv.writer(_out)
            reader = csv.reader(_in)
            for line in reader:
                new_line = array('f')
                if (float(line[0]) != float(missing_value)) and (float(line[1]) != float(missing_value)) and (float(line[2]) != float(missing_value)):
                    sign = float(line[0])/math.fabs(float(line[0]))
                    value = math.fabs(float(line[0])) + float(line[1])/60. + float(line[2])/3600.
                    value = value*sign
                else:
                    value = float(missing_value)
                new_line.append(value)
                output.writerow(['{:.10f}'.format(x) for x in new_line])
예제 #28
0
def replace_txt_fill(fill,
                     input_data_file=None,
                     output_data_file=None,
                     log_file=None):
    # Fills input_data_file with text string and writes result to output_data_file.
    logger = logger_message(__name__, log_file)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    assert output_data_file is not None, 'An output CSV file to write new values.'
    record = 0
    with open(input_data_file, newline='') as _in, \
            open(output_data_file, 'w', newline='') as _out:
        logger.info('Filling with {}'.format(fill))
        output = csv.writer(_out)
        reader = csv.reader(_in)
        for line in reader:
            modified_text = line
            for i, item in enumerate(line):
                record = record + 1
                modified_text[i] = fill
            output.writerow(modified_text)
        logger.info('\n\t Total number ={}'.format(record))
예제 #29
0
def calc_vwc_gpr(missing_value, print_flag, input_data_file=None, output_data_file=None, log_file=None):
# calculate VWC given TWTT and ALT
# TWTT is two way travel time from Ground penetrating radar (ns)
# VWC is volumetric water content: ratio water volume to total soil volume (m3/m3)
# ALT is active layer thickness: maximum thaw depth at end of summer (cm)
# velocity = wave velocity (cm/ns)

    logger = logger_message(__name__, log_file)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    assert output_data_file is not None, 'An output CSV file to write new values.'
    with open(input_data_file, newline='') as _in, \
            open(output_data_file, 'w', newline='') as _out:
            logger.info('Calculate VWC given TWTT and ALT')
            output = csv.writer(_out)
            reader = csv.reader(_in)
            if (print_flag):
                logger.info('\t{:>15} {:>15} {:>15} {:>15} {:>15}'.format('twtt (ns)','alt (cm)','velocity (cm/ns)', 'dielectric (-)', 'vwc (-)'))
            for line in reader:
                new_line = []
                twtt = float(line[0])
                alt = float(line[1])
                if (twtt != float(missing_value)) and \
                   (alt != float(missing_value)):
                    velocity = float(line[1]) * 2. /float(line[0])
                    dielectric = (30./velocity)**2.
                    vwc = -2.5 + 2.508 * dielectric - 3.634e-2 * dielectric * dielectric
                    vwc = vwc + 2.394e-4 * dielectric*dielectric*dielectric
                    vwc = vwc /100.
                    vwc = min(vwc, 1.)
                else:
                    velocity = float(missing_value)
                    dielectric = float(missing_value)
                    vwc = float(missing_value)
                if (print_flag):
                    logger.info('\t{:>15.7f} {:>15.7f} {:>15.7f} {:>15.7f} {:>15.7f}'.format(twtt,alt,velocity, dielectric, vwc))
                new_line.append(velocity)
                new_line.append(dielectric)
                new_line.append(vwc)
                output.writerow(new_line)
예제 #30
0
def chk_print_num_in_range_equal(startval,
                                 endval,
                                 input_data_file=None,
                                 output_data_file=None,
                                 log_file=None):
    # Prints values >= startval and values <= endval in input_data_file.
    logger = logger_message(__name__, log_file)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    count = 0
    record = 0
    with open(input_data_file, newline='') as _in:
        logger.info('Print values >= {} and <= {}'.format(startval, endval))
        logger.info('{:>10} {:>20}'.format('Record', 'Value'))
        reader = csv.reader(_in)
        for line in reader:
            for item in line:
                record = record + 1
                if (float(item) >= float(startval)) and (float(item) <=
                                                         float(endval)):
                    count = count + 1
                    logger.info('{:10.0f} {:>20}'.format(float(record), item))
        logger.info('\n\t Total number >= {} and <= {}: {:10.0f}'.format(
            startval, endval, float(count)))
예제 #31
0
def math_add_constant(constant, missing_value, input_data_file=None, output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL):
    # Adds constant to all values in input_data_file and writes the result to
    # output_data_file.
    logger = logger_message(__name__, log_file, log_level)
    assert input_data_file is not None, 'An input CSV file with columns of values to be added to is required.'
    assert output_data_file is not None, 'An output CSV file to write new values to is required.'
    NaN_toggle = True
    NaN_count = 0
    record = 0
    with open(input_data_file, newline='') as _in, \
            open(output_data_file, 'w', newline='') as _out:
            logger.info('Adding {} to the column'.format(constant))
            output = csv.writer(_out)
            reader = csv.reader(_in, quotechar="'", quoting=csv.QUOTE_ALL)
            decimal_constant = cast_float_to_decimal(constant)
            decimal_missing = cast_float_to_decimal(missing_value)
            for line in reader:
                new_line = []
                for item in line:
                    record = record + 1
                    if is_number(item):
                        decimal_item = cast_float_to_decimal(item)
                        if decimal_item != decimal_missing:
                            value = decimal_item + decimal_constant
                        else:
                            value = decimal_missing
                        new_line.append(str(value))
                    else:
                        NaN_count = NaN_count + 1
                        if NaN_toggle: # print the legend only once, but only if needed
                            logger.info('    Records with non-number entry types:'\
                                        '\n{:>15} {:>20}'.format('Record', 'Value'))
                            NaN_toggle = False
                        logger.info('{:15.0f} {:>20}'.format(float(record), item))
                        new_line.append(missing_value)
                output.writerow(new_line)
            logger.info('    Total number of non-number entries: {}'.format(NaN_count))
예제 #32
0
def chk_num_columns(print_flag,
                    input_data_file=None,
                    output_data_file=None,
                    log_file=None):
    # Checks that all rows have the same number of columns.
    # assumes first row has correct number of columns.
    logger = logger_message(__name__, log_file)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    assert output_data_file is not None, 'An output CSV file to write new values.'
    record = 0
    count = 0
    with open(input_data_file, newline='') as _in, \
            open(output_data_file, 'w', newline='') as _out:
        logger.info('Checking the number of columns')
        reader = csv.reader(_in)
        for i, line in enumerate(reader):
            record = record + 1
            if i == 0:
                ref_num_column = 0
                for j, item in enumerate(line):
                    ref_num_column = ref_num_column + 1
                logger.info('\tCorrect number of columns: {:10.0f}'.format(
                    float(ref_num_column)))
                if print_flag:
                    logger.info('{:>10} {:>20}'.format('Record',
                                                       'Num Columns'))
            num_column = 0
            for j, item in enumerate(line):
                num_column = num_column + 1
            if float(num_column) != float(ref_num_column):
                count = count + 1
                if print_flag:
                    logger.info('{:10.0f} {:>20} '.format(
                        float(record), num_column))
        logger.info(
            '\tTotal number rows with incorrect number columns={}'.format(
                count))
예제 #33
0
def chk_count_distinct(input_data_file=None,
                       output_data_file=None,
                       log_file=None):
    # Counts number of distinct values in input_data_file.
    logger = logger_message(__name__, log_file)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    record = 0
    with open(input_data_file, newline='') as _in:
        logger.info('Count distinct values')
        reader = csv.reader(_in)
        temp_distinct_values = set()
        original_values = []
        distinct_values = []

        # identify distinct values, transfer input values to array
        for line in reader:
            for item in line:
                record = record + 1
                temp_distinct_values.add(item)
                original_values.append(item)
        logger.info('\tTotal number ={}'.format(record))

        # set array randomly changes order with each execution, so transfer to array and sort
        for value in temp_distinct_values:
            distinct_values.append(value)
        distinct_values.sort()

        # count number records for each value
        logger.info('{:>5} {:>40} {:>10} {:>10}'.format(
            'Num', 'Distinct Value', 'number', 'Percent'))
        count = 0
        for value in distinct_values:
            count = count + 1
            number = original_values.count(value)
            num_fraction = float(number) / float(record) * 100
            logger.info('{:5.0f} {:>40} {:10.0f} {:10.3f}'.format(
                count, value, number, num_fraction))
예제 #34
0
def replace_txt_exclude(target,
                        replace,
                        print_flag,
                        input_data_file=None,
                        output_data_file=None,
                        log_file=None):
    # Replaces fields containing a text string in input_data_file
    # and writes result to output_data_file.
    logger = logger_message(__name__, log_file)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    assert output_data_file is not None, 'An output CSV file to write new values.'
    record = 0
    count = 0
    with open(input_data_file, newline='') as _in, \
            open(output_data_file, 'w', newline='') as _out:
        logger.info('Replacing fields containing {} with {}'.format(
            target, replace))
        if print_flag:
            logger.info('{:>10} {:>20} {:>20}'.format('Record', 'Old Value',
                                                      'New Value'))
        output = csv.writer(_out)
        reader = csv.reader(_in)
        for line in reader:
            modified_text = line
            for i, item in enumerate(line):
                record = record + 1
                modified_text[i] = item
                if re.search(target, item):
                    if print_flag:
                        logger.info('{:10.0f} {:>20} {:>20}'.format(
                            float(record), item, replace))
                    count = count + 1
                    modified_text[i] = replace
                else:
                    modified_text[i] = item
            output.writerow(modified_text)
        logger.info('\n\t Total number ={}'.format(count))
예제 #35
0
def chk_count_distinct(print_flag, input_data_file=None, output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL):
    # Counts number of distinct values in input_data_file.
    logger = logger_message(__name__, log_file, log_level)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    record = 0
    with open(input_data_file, newline='') as _in:
        logger.info('Count distinct values')
        reader = csv.reader(_in)
        temp_distinct_values = set()
        original_values = []
        distinct_values = []

# identify distinct values, transfer input values to array
        for line in reader:
            for item in line:
                record = record + 1
                temp_distinct_values.add(item)
                original_values.append(item)
        logger.info('\tTotal number ={}'.format(record))

# set array randomly changes order with each execution, so transfer to array and sort
        for value in temp_distinct_values:
            distinct_values.append(value)
        distinct_values.sort()

# count number records for each value
        if print_flag:
            logger.info('{:>5} {:>40} {:>10} {:>10}'.format('Num',
                                                            'Distinct Value', 'number', 'Percent'))
            count = 0
            for value in distinct_values:
                count = count+1
                number = original_values.count(value)
                num_fraction = float(number)/float(record)*100
                logger.info('{:5.0f} {:>40} {:10.0f} {:10.3f}'.format(count,
                                                                  value, number, num_fraction))
예제 #36
0
def chk_count_distinct_double(input_data_file=None,
                              output_data_file=None,
                              log_file=None):
    # Counts number of distinct values in a col A
    # then corresponing distinct values in col B in input_data_file.
    logger = logger_message(__name__, log_file)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    with open(input_data_file, newline='') as _in:
        logger.info('Count distinct values')
        reader = csv.reader(_in)
        temp_col_a_distinct = set()
        col_a_distinct = []
        column_a_values = []
        column_b_values = []
        original_values = []

        # transfer input values to local array
        record = 0
        for i, line in enumerate(reader):
            record = record + 1
            original_values.append([])
            for item in line:
                original_values[i].append(item)
        logger.info('\tTotal number ={}'.format(record))

        # identify column A distinct values, transfer original values to col A/B arrays
        for i, line in enumerate(original_values):
            temp_col_a_distinct.add(original_values[i][0])
            column_a_values.append(original_values[i][0])
            column_b_values.append(original_values[i][1])

# transfer col A distinct values from temporary to permanent array and sort
        for value in temp_col_a_distinct:
            col_a_distinct.append(value)
        col_a_distinct.sort()

        # count number records in column B for each distinct col A value
        logger.info('{:>5} {:>40} {:>40} {:>10} {:>10}'.format(
            'Num', 'Col A Value', 'Col B Value', 'number', 'Percent'))
        count = 0
        for value in col_a_distinct:

            # subset column B array and identify distinct col B values
            col_b_subset = []
            temp_col_b_distinct = set()
            col_b_distinct = []
            for i, subset in enumerate(column_a_values):
                if subset == value:
                    col_b_subset.append(column_b_values[i])
                    temp_col_b_distinct.add(column_b_values[i])


# transfer col b distinct values from temporary to permanent array and sort
            for subset in temp_col_b_distinct:
                col_b_distinct.append(subset)
            col_b_distinct.sort()

            # write results to log file
            for subset in col_b_distinct:
                count = count + 1
                number = col_b_subset.count(subset)
                num_fraction = float(number) / float(record) * 100
                logger.info('{:5.0f} {:>40} {:>40} {:10.0f} {:10.3f}'.format(
                    count, value, subset, number, num_fraction))
예제 #37
0
def merge_2col_match(in_col1, in_col2, merge_col1, merge_col2, map_file, merge_file, 
                     input_data_file=None, output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL):
# Merges two files by matching values in two columns 

    logger = logger_message(__name__, log_file, log_level)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    assert output_data_file is not None, 'An output CSV file to write new values.'
    logger.info('Merge Files')

# save some messages to log file
    logger.info('\tMerge file: {}'.format(merge_file))
    logger.info('\tMap file: {}'.format(map_file))


# read in all variable mapping information for inserting merge_file into input_data_file
    col_number = []
    in_name = []
    operation = []
    out_name = []
    in_index = []
    out_index = []
    units = []
    description = []
    num_map_record = 0
    with open(map_file, newline='') as _in:
#        logger.info('\tRead merge Variable Mapping File')
        reader = csv.reader(_in)
        firstline = True
        for line in reader:
#            logger.info('{}'.format(line))
            if (firstline):
                # skips first line
                firstline = False
                continue
            num_map_record = num_map_record +1
            col_number.append(line[0])
            in_name.append(line[1])
            operation.append(line[2])
            out_name.append(line[3])
            in_index.append(line[4])
            out_index.append(line[5])
            units.append(line[6])
            description.append(line[7])
#        logger.info('\tNumber map records: {} '.format(num_map_record))
 
# Figure out which columns to copy
    num_copies = 0
    copy_from=[]
    copy_to=[]
    for i in range(num_map_record):
        if (operation[i] == 'copy'):
            copy_from.append(int(in_index[i])-1)
            copy_to.append(int(out_index[i])-1)
#            logger.info('\toperation: {} {} '.format(copy_from[num_copies],copy_to[num_copies]))
            num_copies = num_copies + 1
    logger.info('\tnum_copies: {} '.format(num_copies))

# adjust column numbers to zero start
    in_col1 = in_col1 - 1
    in_col2 = in_col2 - 1
    merge_col1 = merge_col1 - 1
    merge_col2 = merge_col2 - 1

# read input data to local array
    with open(input_data_file, newline='') as _in:
        reader = csv.reader(_in)
        input_data = []
        num_in_record = 0
        for i, line in enumerate(reader):
            num_in_record = num_in_record + 1
            input_data.append(line)
#        logger.info('\tNumber in records: {}'.format(num_in_record))

# read merge data to local array
    unmatched = []
    with open(merge_file, newline='') as _in:
        reader = csv.reader(_in)
        merge_data = []
        num_merge_record = 0
        firstline = True
        for i, line in enumerate(reader):
            if (firstline):
                firstline = False
                continue
            num_merge_record = num_merge_record + 1
            unmatched.append('true')
            merge_data.append(line)
#        logger.info('\tNumber merge file records: {}'.format(num_merge_record))

# merge the two files
    output_data = []
    num_records_merged = 0
    for i, line_in in enumerate(input_data):
        for j, line_merge in enumerate(merge_data):
            if (line_merge[merge_col1] == line_in[in_col1]):
                if (line_merge[merge_col2] == line_in[in_col2]):
                    num_records_merged = num_records_merged + 1
                    unmatched[j] = 'false'
                    for k in range(num_copies):
                        line_in[copy_to[k]]=line_merge[copy_from[k]]
#                        logger.info('\tcopy: {} {} '.format(line_merge[copy_from[k]],line_in[copy_to[k]]))
        output_data.append(line_in)
    logger.info('\tRecords merged input file: {}'.format(num_records_merged))

# write output data
    with open(output_data_file, 'w', newline='') as _out:
        output = csv.writer(_out)
        for line in output_data:
            output.writerow(line)

# count unmatched records in merge file
    num_unmatched = 0
    num_matched = 0
    for i, line_merge in enumerate(merge_data):
        if (unmatched[i]=='true'):
            num_unmatched = num_unmatched + 1
        else:
            num_matched = num_matched + 1
 
# print unmatched records
    if (num_unmatched != 0):
        logger.info('\tUnmatched records in merge file: {}'.format(num_unmatched))
        logger.info('\t{:>5} {:>30} {:>30}'.format('Rec','col1','col2'))
        for i, line_merge in enumerate(merge_data):
            if (unmatched[i]=='true'):
                logger.info('\t{:>5} {:>30} {:>30}'.format(i + 1,line_merge[merge_col1],line_merge[merge_col2]))
예제 #38
0
def chk_count_distinct_double(print_flag, input_data_file=None, output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL):
    # Counts number of distinct values in a col A
    # then corresponing distinct values in col B in input_data_file.
    logger = logger_message(__name__, log_file, log_level)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    with open(input_data_file, newline='') as _in:
        logger.info('Count distinct values')
        reader = csv.reader(_in)
        temp_col_a_distinct = set()
        col_a_distinct = []
        column_a_values = []
        column_b_values = []
        original_values = []

# transfer input values to local array
        record = 0
        for i, line in enumerate(reader):
            record = record + 1
            original_values.append([])
            for item in line:
                original_values[i].append(item)
        logger.info('\tTotal number ={}'.format(record))

# identify column A distinct values, transfer original values to col A/B arrays
        for i, line in enumerate(original_values):
            temp_col_a_distinct.add(original_values[i][0])
            column_a_values.append(original_values[i][0])
            column_b_values.append(original_values[i][1])

# transfer col A distinct values from temporary to permanent array and sort
        for value in temp_col_a_distinct:
            col_a_distinct.append(value)
        col_a_distinct.sort()

# count number records in column B for each distinct col A value
        logger.info('{:>5} {:>40} {:>40} {:>10} {:>10}'.format('Num', 'Col A Value',
                                                               'Col B Value', 'number', 'Percent'))
        count = 0
        for value in col_a_distinct:

            # subset column B array and identify distinct col B values
            col_b_subset = []
            temp_col_b_distinct = set()
            col_b_distinct = []
            for i, subset in enumerate(column_a_values):
                if subset == value:
                    col_b_subset.append(column_b_values[i])
                    temp_col_b_distinct.add(column_b_values[i])

# transfer col b distinct values from temporary to permanent array and sort
            for subset in temp_col_b_distinct:
                col_b_distinct.append(subset)
            col_b_distinct.sort()

# write results to log file
            for subset in col_b_distinct:
                count = count+1
                number = col_b_subset.count(subset)
                num_fraction = float(number)/float(record)*100
                logger.info('{:5.0f} {:>40} {:>40} {:10.0f} {:10.3f}'.format(count, value, subset,
                                                                             number, num_fraction))
예제 #39
0
def make_kml_wall(missing_value, multiplier, input_data_file=None, output_data_file=None, log_file=None):
# Makes a kml wal file with height of wall = variable
    logger = logger_message(__name__, log_file)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    assert output_data_file is not None, 'A kml point file.'
    with open(input_data_file, newline='') as _in:
        logger.info('Make kml wall file: {}'.format(output_data_file))
        reader = csv.reader(_in)

# transfer input values to local array
        original_values = []
        points = 0
        for i, line in enumerate(reader):
                points = points + 1
                original_values.append([])
                for j, item in enumerate(line):
                    original_values[i].append(item)

# line[0] = line name or id (treated as string)
# line[1] = longitude decimal degree
# line[2] = latitude decimal degree
# line[3] = variabe to serve as height of wall (z-variable)

# count distinct lines
    temp_distinct_values = set()
    distinct_values = []
    for i, line in enumerate(original_values):
        temp_distinct_values.add(line[0])
    for value in temp_distinct_values:
        distinct_values.append(value)
    distinct_values.sort()

# extract coordinates
    kml = simplekml.Kml()
    zero = 0.0
    for value in distinct_values:
        coordinates = []
        count = 0
        for i, line in enumerate(original_values):
            if(value == line[0]):
                add_pt = True
                if(float(line[1]) == float(missing_value)):
                    add_pt = False
                if(float(line[2]) == float(missing_value)):
                    add_pt = False
                if(add_pt):
                    coordinates.append([])
                    coordinates[count].append(line[1])
                    coordinates[count].append(line[2])
                    if(float(line[3]) == float(missing_value)):
                        coordinates[count].append(zero)
                    else:
                        height = float(line[3]) * float(multiplier)
                        coordinates[count].append(height)
                    count = count + 1
        ls = kml.newlinestring(name='{}'.format(value))
        ls.coords = coordinates
        ls.extrude = 1
        ls.altitudemode = simplekml.AltitudeMode.relativetoground
        ls.style.linestyle.width = 5
        ls.style.linestyle.color = simplekml.Color.blue
    kml.save(output_data_file)