Ejemplo n.º 1
0
def utm_to_latlong(input_data_file=None, output_data_file=None, log_file=None):
    """Converts UTM coordinates into latitude/longitude.
    assumes rows are easting, northing, zone number, either 'N' for northern
    hemisphere or 'S' for southern hemisphere
    """
    logger = setup_logger(__name__, log_file)

    # Check required input and output data file names were given.
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    assert output_data_file is not None, 'An output CSV file to write new values.'

    _in = open(input_data_file, 'r')
    try:
        _out = open(output_data_file, 'w')
        try:
            data = csv.reader(_in)
            output = csv.writer(_out)
            for row_ind, row in enumerate(data):
                east = float(row[0])
                north = float(row[1])
                zone = int(row[2])

                latlong = utm.to_latlon(east,
                                        north,
                                        zone,
                                        northern=('N' == row[3]))
                logger.info('Changed row {} from: {}  to: {}'.format(
                    row_ind, (row[0], row[1]), latlong))

                output.writerow(latlong)
        finally:
            _out.close()
    finally:
        _in.close()
Ejemplo n.º 2
0
def calc_dec_deg_to_deg_min_sec(missing_value,
                                input_data_file=None,
                                output_data_file=None,
                                log_file=None):
    #   Convert degrees minutes seconds to decimal degrees
    logger = setup_logger(__name__, log_file)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    assert output_data_file is not None, 'An output CSV file to write new values.'
    with open(input_data_file, newline='') as _in, \
            open(output_data_file, 'w', newline='') as _out:
        logger.info('Convert degrees minutes seconds to decimal degrees')
        output = csv.writer(_out)
        reader = csv.reader(_in)
        for line in reader:
            new_line = array('f')
            if (float(line[0]) != float(missing_value)):
                decimal_degree = math.fabs(float(line[0]))
                sign = float(line[0]) / math.fabs(float(line[0]))
                degree = math.trunc(decimal_degree)
                minute = (decimal_degree - degree) * 60.
                second = (minute - math.trunc(minute)) * 60.
                minute = math.trunc(minute)
                degree = sign * degree
            else:
                degree = float(missing_value)
                minute = float(missing_value)
                second = float(missing_value)
            new_line.append(degree)
            new_line.append(minute)
            new_line.append(second)
            #               output.writerow(new_line)
            #               output.writerow(['{:.0f},{:.0f},{:.4f}'.format(new_line)])
            output.writerow(['{:.0f}'.format(x) for x in new_line])
Ejemplo n.º 3
0
 def read_config(self, config_file):
     self.config_file = config_file
     with open(self.config_file) as open_config:
         self.config = yaml.safe_load(open_config)
     self.logger = setup_logger(__name__, log_file=self.get_log_file(), log_level=self.get_log_level())
     self.logger.debug('Loaded configuration file: {}'.format(config_file))
     return self.config
Ejemplo n.º 4
0
def calc_deg_min_sec_to_dec_deg(missing_value,
                                input_data_file=None,
                                output_data_file=None,
                                log_file=None):
    # Convert degrees minutes seconds to decimal degrees
    logger = setup_logger(__name__, log_file)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    assert output_data_file is not None, 'An output CSV file to write new values.'
    with open(input_data_file, newline='') as _in, \
            open(output_data_file, 'w', newline='') as _out:
        logger.info('Convert degrees minutes seconds to decimal degrees')
        output = csv.writer(_out)
        reader = csv.reader(_in)
        for line in reader:
            new_line = array('f')
            if (float(line[0]) != float(missing_value)) and (float(
                    line[1]) != float(missing_value)) and (float(
                        line[2]) != float(missing_value)):
                sign = float(line[0]) / math.fabs(float(line[0]))
                value = math.fabs(float(
                    line[0])) + float(line[1]) / 60. + float(line[2]) / 3600.
                value = value * sign
            else:
                value = float(missing_value)
            new_line.append(value)
            output.writerow(['{:.10f}'.format(x) for x in new_line])
Ejemplo n.º 5
0
 def file_manager(self, filenames, output_dir, temp_dir, log_file=None, log_level=DEFAULT_LOG_LEVEL):
     """
     filenames: a sequence of paths to data files
     CURRENT: sends out a sequence of filenames after confirming they exist
     FID: a sequential numeric identifier for each file
     LOGFILE: sends out a sequence of log filenames that correspond to a data file
     """
     self.logger = setup_logger(__name__, log_file, log_level)
     step_files = []
     for identifier, name in enumerate(filenames, start=1):
         name_path = Path(name)
         output_dir_path = Path(output_dir)
         temp_dir_path = Path(temp_dir)
         if name_path.suffix is '':
             output_filename = name_path.name + '_out.csv'
             log_filename = name_path.name + '.log'
         else:
             output_filename = name_path.name.replace(name_path.suffix, '_out.csv')
             log_filename = name_path.name.replace(name_path.suffix, '.log')
         output_path = output_dir_path.joinpath(output_filename)
         log_path = temp_dir_path.joinpath(log_filename)
         try:
             # Open the file. If the file doesn't exist, the error will be
             # caught.
             if log_path.exists():
                 log_path.write_bytes(b'')
             else:
                 log_path.touch(mode=0o666)
             step_files.append((name, str(output_path), identifier, str(log_path)))
         except FileNotFoundError:
             self.logger.error('The log file {f} does not exist and cannot be created.'.format(f=str(log_path)))
     return step_files
Ejemplo n.º 6
0
def read_csv_file(file_name, log_file=None):
    logger = setup_logger(__name__, log_file)
    logger.info('Reading file: {}'.format(file_name))
    data = []
    with open(file_name, newline='') as _from:
        count = []
        data = []
        data_reader = csv.reader(_from,
                                 quoting=csv.QUOTE_ALL,
                                 skipinitialspace=True,
                                 quotechar="'")
        for line in data_reader:
            new_line = []
            for elem in line:
                new_line.append(cast_to_decimal(elem))
            data.append(new_line)
            count.append(len(line))
        try:
            if len(data) == 0:
                logger.warn('Data file is empty.')
            else:
                column_check(count, logger)
        except IOError as e:
            logger.error(e)

    return data
Ejemplo n.º 7
0
def math_add_constant(constant,
                      missing_value,
                      input_data_file=None,
                      output_data_file=None,
                      log_file=None):
    # Adds constant to all values in input_data_file and writes the result to
    # output_data_file.
    logger = setup_logger(__name__, log_file)
    assert input_data_file is not None, 'An input CSV file with columns of values to be added to is required.'
    assert output_data_file is not None, 'An output CSV file to write new values to is required.'
    with open(input_data_file, newline='') as _in, \
            open(output_data_file, 'w', newline='') as _out:
        logger.info('Adding {} to the column'.format(constant))
        output = csv.writer(_out)
        reader = csv.reader(_in, quotechar="'", quoting=csv.QUOTE_ALL)
        decimal_constant = cast_float_to_decimal(constant)
        decimal_missing = cast_float_to_decimal(missing_value)
        for line in reader:
            new_line = []
            for item in line:
                decimal_item = cast_float_to_decimal(item)
                if decimal_item != decimal_missing:
                    value = decimal_item + decimal_constant
                else:
                    value = decimal_missing
                new_line.append(str(value))
            output.writerow(new_line)
Ejemplo n.º 8
0
 def __init__(self, *args, **kwargs):
     super(ConfigTranslator, self).__init__(*args, **kwargs)
     self.config_file = None
     self.config = None
     if 'log_file' not in kwargs.keys():
         kwargs['log_file'] = None
     self.logger = setup_logger(__name__, kwargs['log_file'])
Ejemplo n.º 9
0
def reformat_dates_to_gtnp(date_time_format,
                           input_data_file=None,
                           output_data_file=None,
                           log_file=None):
    """
    Reformat the date/times.
    :param column_file: file containing date/time column
    :param out_file: CSV filename for reformatted date/times
    :param in_format: python strptime format string of date/times in column_file
    """
    logger = setup_logger(__name__, log_file)
    with open(input_data_file, newline='') as _in, \
            open(output_data_file, 'w', newline='') as _out:
        data = csv.reader(_in)
        output = csv.writer(_out)
        for line in data:
            for i, item in enumerate(line):
                try:
                    date_time = dt.datetime.strptime(item.strip(),
                                                     date_time_format)
                    quoted_dt = "'{0}'".format(
                        date_time.strftime(gtnp_date_time_format))
                    line[i] = quoted_dt
                except ValueError as error:
                    logger.error(error)
            output.writerow(line)
Ejemplo n.º 10
0
def calc_add_quad_col(missing_value,
                      input_data_file=None,
                      output_data_file=None,
                      log_file=None):
    # output = in_column_A + in_column_B
    logger = setup_logger(__name__, log_file)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    assert output_data_file is not None, 'An output CSV file to write new values.'
    with open(input_data_file, newline='') as _in, \
            open(output_data_file, 'w', newline='') as _out:
        logger.info(
            'Add two columns in quadrature (out = sqrt(column_a**2 +column_b**2))'
        )
        output = csv.writer(_out)
        reader = csv.reader(_in)
        for line in reader:
            new_line = array('f')
            if (float(line[0]) != float(missing_value)) and (float(
                    line[1]) != float(missing_value)):
                value = math.sqrt(
                    float(line[0]) * float(line[0]) +
                    float(line[1]) * float(line[1]))
            else:
                value = float(missing_value)
            new_line.append(value)
            output.writerow(['{:.10f}'.format(x) for x in new_line])
Ejemplo n.º 11
0
def calc_copy_col(input_data_file=None, output_data_file=None, log_file=None):
    # output = in_column_A + in_column_B
    logger = setup_logger(__name__, log_file)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    assert output_data_file is not None, 'An output CSV file to write new values.'
    with open(input_data_file, newline='') as _in, \
            open(output_data_file, 'w', newline='') as _out:
        logger.info('Copy input column to output column')
        output = csv.writer(_out)
        reader = csv.reader(_in)
        for line in reader:
            output.writerow(line)
Ejemplo n.º 12
0
def write_csv_file(output_file, output_data, log_file=None, log_level=DEFAULT_LOG_LEVEL):
    logger = setup_logger(__name__, log_file=log_file, log_level=log_level)
    logger.info('Writing data to file: {}'.format(output_file))
    output_path = Path(output_file)
    with output_path.open('w', newline='\n') as _to:
        for line in output_data:
            for cnt, elem in enumerate(line):
                if isinstance(elem, Decimal):
                    _to.write(str(elem))
                else:
                    _to.write("'{}'".format(elem))
                if cnt < len(line) - 1:
                    _to.write(',')
                if cnt == len(line) - 1:
                    _to.write('\n')
    output_path.touch(mode=0o666, exist_ok=True)
Ejemplo n.º 13
0
def write_csv_file(output_file, output_data, log_file=None):
    logger = setup_logger(__name__, log_file)
    logger.info('Writing data to file: {}'.format(output_file))
    output_path = Path(output_file)
    with output_path.open('w', newline='\n') as _to:
        for line in output_data:
            for cnt, elem in enumerate(line):
                if isinstance(elem, Decimal):
                    _to.write(str(elem))
                else:
                    _to.write("'{}'".format(elem))
                if cnt < len(line) - 1:
                    _to.write(',')
                if cnt == len(line) - 1:
                    _to.write('\n')
    output_path.touch(mode=0o666, exist_ok=True)
Ejemplo n.º 14
0
def widget_template(method_arg_1, method_arg_2, input_data_file=None, output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL):
    if log_file:
        logger = setup_logger(__name__, log_file=log_file, log_level=log_level)
        logger.info('I am a widget. Here are my arguments:')
        logger.info('\tinput_data_file = {}'.format(input_data_file))
        logger.info('\toutput_data_file = {}'.format(output_data_file))
        logger.info('\tlog_file = {}'.format(log_file))
        logger.info('\tlog_level = {}'.format(log_level))
        logger.info('\tmethod_arg_1 = {}'.format(method_arg_1))
        logger.info('\tmethod_arg_2 = {}'.format(method_arg_2))
    else:
        print('I am a widget. Here are my arguments:')
        print('\tinput_data_file = {}'.format(input_data_file))
        print('\toutput_data_file = {}'.format(output_data_file))
        print('\tlog_file = {}'.format(log_file))
        print('\tlog_level = {}'.format(log_level))
        print('\tmethod_arg_1 = {}'.format(method_arg_1))
        print('\tmethod_arg_2 = {}'.format(method_arg_2))
Ejemplo n.º 15
0
def sort_by_columns(column_list, input_data_file=None, output_data_file=None, log_file=None):
    """
    Takes a list of columns to sort by in ascending order.
    :param input_data_file: CSV file to sort
    :param output_data_file: sorted CSV file
    :param column_list: list of tuples (index, type) describing sort columns
    """
    logger = setup_logger(__name__, log_file)
    logger.info('Sorting input file by columns:')
    if isinstance(column_list, str):
        column_list = tuple_list(column_list)
    shifted_list = []
    for index, ind_type in column_list:
        index = index - 1
        new_tuple = (index, ind_type)
        logger.info('\t' + str(new_tuple))
        shifted_list.append(new_tuple)
    sorted_writer = csv.writer(open(output_data_file, 'w'), quotechar="'",
                               quoting=csv.QUOTE_NONNUMERIC, lineterminator='\n')
    header_row = None
    sorted_data = []
    with open(input_data_file, 'r') as csvfile:
        unsorted_reader = csv.reader(csvfile, delimiter=',', quotechar="'")
        csv_data = []
        ind = 0
        for row in unsorted_reader:
            row = [cast_data_value(col_val.strip()) for col_val in row]
            if ind > 0:
                typed_row = create_typed_row(row, shifted_list, logger)
                csv_data.append(typed_row)
            else:
                header_row = row
            ind += 1
        sorted_data = csv_data
        for index, type in reversed(shifted_list):
            sorted_data = sorted(sorted_data, key=lambda sort_by: sort_by[index])

    sorted_writer.writerow(header_row)
    for sorted_row in sorted_data:
        if date_time_index is not None:
            row_list = list(sorted_row)
            row_list[date_time_index] = row_list[date_time_index].strftime(gtnp_date_time_format)
            sorted_row = tuple(row_list)
        sorted_writer.writerow(sorted_row)
Ejemplo n.º 16
0
def widget_template(method_arg_1,
                    method_arg_2,
                    input_data_file=None,
                    output_data_file=None,
                    log_file=None):
    if log_file:
        logger = setup_logger(__name__, log_file)
        logger.info('I am a widget. Here are my arguments:')
        logger.info('\tinput_data_file = {}'.format(input_data_file))
        logger.info('\toutput_data_file = {}'.format(output_data_file))
        logger.info('\tlog_file = {}'.format(log_file))
        logger.info('\tmethod_arg_1 = {}'.format(method_arg_1))
        logger.info('\tmethod_arg_2 = {}'.format(method_arg_2))
    else:
        print('I am a widget. Here are my arguments:')
        print('\tinput_data_file = {}'.format(input_data_file))
        print('\toutput_data_file = {}'.format(output_data_file))
        print('\tlog_file = {}'.format(log_file))
        print('\tmethod_arg_1 = {}'.format(method_arg_1))
        print('\tmethod_arg_2 = {}'.format(method_arg_2))
Ejemplo n.º 17
0
def math_absolute_value(missing_value, input_data_file=None, output_data_file=None, log_file=None):
    # Takes absolute value of all values in input_data_file and writes result to
    # output_data_file.
    logger = setup_logger(__name__, log_file)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    assert output_data_file is not None, 'An output CSV file to write new values.'
    with open(input_data_file, newline='') as _in, \
            open(output_data_file, 'w', newline='') as _out:
            logger.info('Absolute value of column')
            output = csv.writer(_out)
            reader = csv.reader(_in)
            for line in reader:
                new_line = array('f')
                for item in line:
                    if float(item) != float(missing_value):
                        value = math.fabs(float(item))
                    else:
                        value = float(missing_value)
                    new_line.append(value)
                output.writerow(['{:.2f}'.format(x) for x in new_line])
Ejemplo n.º 18
0
def math_multiply_constant(constant, missing_value, input_data_file=None,
                           output_data_file=None, log_file=None):
    # multiplies all values in input_data_file by a constant and writes result to
    # output_data_file.
    logger = setup_logger(__name__, log_file)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    assert output_data_file is not None, 'An output CSV file to write new values.'
    with open(input_data_file, newline='') as _in, \
            open(output_data_file, 'w', newline='') as _out:
            logger.info('Multiplying column by {}'.format(constant))
            output = csv.writer(_out)
            reader = csv.reader(_in)
            for line in reader:
                new_line = array('f')
                for item in line:
                    if float(item) != float(missing_value):
                        value = float(item) * float(constant)
                    else:
                        value = float(missing_value)
                    new_line.append(value)
                output.writerow(['{:.2f}'.format(x) for x in new_line])
Ejemplo n.º 19
0
def reformat_dates_to_gtnp(date_time_format, input_data_file=None, output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL):
    """
    Reformat the date/times.
    :param column_file: file containing date/time column
    :param out_file: CSV filename for reformatted date/times
    :param in_format: python strptime format string of date/times in column_file
    """
    logger = setup_logger(__name__, log_file, log_level)
    with open(input_data_file, newline='') as _in, \
            open(output_data_file, 'w', newline='') as _out:
        data = csv.reader(_in)
        output = csv.writer(_out)
        for line in data:
            for i, item in enumerate(line):
                try:
                    date_time = dt.datetime.strptime(item.strip(), date_time_format)
                    quoted_dt = "'{0}'".format(date_time.strftime(gtnp_date_time_format))
                    line[i] = quoted_dt
                except ValueError as error:
                    logger.error(error)
            output.writerow(line)
Ejemplo n.º 20
0
def calc_divide_col(missing_value,
                    input_data_file=None,
                    output_data_file=None,
                    log_file=None):
    # output = in_column_A + in_column_B
    logger = setup_logger(__name__, log_file)
    assert input_data_file is not None, 'An input CSV file with columns of values.'
    assert output_data_file is not None, 'An output CSV file to write new values.'
    with open(input_data_file, newline='') as _in, \
            open(output_data_file, 'w', newline='') as _out:
        logger.info('Divide two columns (out = column_a / column_b)')
        output = csv.writer(_out)
        reader = csv.reader(_in)
        for line in reader:
            new_line = array('f')
            if (float(line[0]) != float(missing_value)) \
                    and (float(line[1]) != float(missing_value)) and (float(line[1]) != 0.0):
                value = float(line[0]) / float(line[1])
            else:
                value = float(missing_value)
            new_line.append(value)
            output.writerow(['{:.10f}'.format(x) for x in new_line])
Ejemplo n.º 21
0
    def __init__(self, flow_name, log_file=None):
        # Flow level information and utilities
        self.flow_name = flow_name
        if log_file is None:
            self.log_file = os.path.join(os.getcwd(), self.flow_name.split('.')[0] + '.log')
        else:
            self.log_file = log_file
        self.logger = setup_logger('', self.log_file)
        self.logger.info('Setup logging into: {}'.format(self.log_file))

        self.config_translator = ConfigTranslator()
        self.config_translator.read_config(self.flow_name)
        self.widget_factory = WidgetFactory()
        self.file_manager = None
        self.variable_mapper = None

        self.file_reader = None
        self.input_files = None
        self.input_manipulations = []

        self.file_writer = None
        self.output_directory = None
        self.output_manipulations = []
Ejemplo n.º 22
0
def read_csv_file(file_name, log_file=None, log_level=DEFAULT_LOG_LEVEL):
    logger = setup_logger(__name__, log_file=log_file, log_level=log_level)
    logger.info('Reading file: {}'.format(file_name))
    data = []
    with open(file_name, newline='') as _from:
        count = []
        data = []
        data_reader = csv.reader(_from, quoting=csv.QUOTE_ALL, skipinitialspace=True, quotechar="'")
        for line in data_reader:
            new_line = []
            for elem in line:
                new_line.append(cast_to_decimal(elem))
            data.append(new_line)
            count.append(len(line))
        try:
            if len(data) == 0:
                logger.warn('Data file is empty.')
            else:
                column_check(count, logger)
        except IOError as e:
            logger.error(e)

    return data
Ejemplo n.º 23
0
 def file_manager(self, filenames, output_dir, temp_dir, log_file=None):
     """
     filenames: a sequence of paths to data files
     CURRENT: sends out a sequence of filenames after confirming they exist
     FID: a sequential numeric identifier for each file
     LOGFILE: sends out a sequence of log filenames that correspond to a data file
     """
     self.logger = setup_logger(__name__, log_file)
     step_files = []
     for identifier, name in enumerate(filenames, start=1):
         name_path = Path(name)
         output_dir_path = Path(output_dir)
         temp_dir_path = Path(temp_dir)
         if name_path.suffix is '':
             output_filename = name_path.name + '_out.csv'
             log_filename = name_path.name + '.log'
         else:
             output_filename = name_path.name.replace(
                 name_path.suffix, '_out.csv')
             log_filename = name_path.name.replace(name_path.suffix, '.log')
         output_path = output_dir_path.joinpath(output_filename)
         log_path = temp_dir_path.joinpath(log_filename)
         try:
             # Open the file. If the file doesn't exist, the error will be
             # caught.
             if log_path.exists():
                 log_path.write_bytes(b'')
             else:
                 log_path.touch(mode=0o666)
             step_files.append(
                 (name, str(output_path), identifier, str(log_path)))
         except FileNotFoundError:
             self.logger.error(
                 'The log file {f} does not exist and cannot be created.'.
                 format(f=str(log_path)))
     return step_files
Ejemplo n.º 24
0
    def variable_map(self, input_data, map_file, log_file=None):
        # Columns are separated by whitespace
        sep = '  '
        n_entries = 7

        logger = setup_logger(__name__, log_file)
        logger.info('Running variable mapper.')
        # in_map = {column name: column index} of the original data file
        # in_details: {column name: [units, description]} of the original data file
        # out_map = {column name: column index} of the processed file
        # out_details: {column name: [units, description]} of the processed file
        # name_converter: {input column name: output column name}
        in_map = {}
        in_details = {}
        out_map = {}
        out_details = {}
        name_converter = {}
        with open(map_file) as f:
            # Possible improvement: skip over n "headlines" instead of just 1
            firstline = True
            for line in f:
                if (firstline):
                    # skips first line
                    firstline = False
                    continue
                # Divide each line into entries
                pattern = '{0}+'.format(sep)
                entries = re.split(pattern, line)
                if (len(entries) != n_entries and len(entries) != 0):
                    # Check that the number of entries is correct
                    logger.info('Map file: {m}'.format(m=map_file))
                    logger.info(
                        'Expected number of columns: {e}'.format(e=n_entries))
                    logger.info(
                        'Read number of columns: {r}'.format(r=len(entries)))
                    logger.info('Read entries: ', entries, sep=' ')
                    raise IndexError('File has the wrong number of columns.')
                else:
                    in_header, operation, out_header, in_index, out_index, \
                        units, description = self.entries_breakout(entries)
                    # TODO: description and units should be passed around as metadata
                    # Build the name converter
                    name_converter[in_header] = out_header
                    if (in_header and in_index > 0):
                        # If the input exists, store data about it
                        in_map.update({in_header: in_index - 1})
                        in_details.update(
                            {in_header: [operation, description]})
                    if (out_header and out_index > 0):
                        # If the output exists, store data about it
                        out_map.update({out_header: out_index - 1})
                        out_details.update({out_header: [units, description]})

        output_data = []
        # headline = next(data)  # Pulls the first line of the file as headers
        # Construct the first line of the output file from the given information
        headline = [''] * len(out_map)
        for name, index, details in zip(out_map.keys(), out_map.values(),
                                        out_details.values()):
            if (details[0]):
                # units exist
                formatstr = '{name} ({unit})'
            else:
                formatstr = '{name}'
            headline[index] = formatstr.format(name=name, unit=details[0])
        output_data.append(headline)
        copies = {}
        for in_name in in_map.keys():
            # Figure out which columns need to be copied
            if name_converter[in_name] in out_map:
                # copies is a dictionary of input column index -> output column index
                copies[in_map[in_name]] = out_map[name_converter[in_name]]
        firstline = True
        for line in input_data:
            # Copy selected columns
            if (firstline):
                firstline = False
                continue
            outputline = [''] * len(out_map)
            for _from, _to in copies.items():
                outputline[_to] = line[_from]
            output_data.append(outputline)

        # Returns:
        #   - the output data.
        #   - a dictionary of column name -> index for the input csv
        #   - a dictionary of column name -> index for the output csv
        #   - a dictionary of data column name -> destination column name
        result = [
            output_data, in_map, out_map,
            {v: k
             for k, v in name_converter.items()}
        ]

        return result
Ejemplo n.º 25
0
 def setup_logger(self, name, log_file):
     if name is None:
         name = self.channel
     self.logger = setup_logger(name, log_file)
Ejemplo n.º 26
0
    def variable_map(self, input_data, map_file, log_file=None, log_level=DEFAULT_LOG_LEVEL):
        # reads in a file mapping input columns to output columns
        # then copies the input data to the output data

        logger = setup_logger(__name__, log_file, log_level=log_level)
        logger.info('Running variable mapper.')
        # in_map = {column name: column index} of the original data file
        # in_details: {column name: [units, description]} of the original data file
        # out_map = {column name: column index} of the processed file
        # out_details: {column name: [units, description]} of the processed file
        # name_converter: {input column name: output column name}
        in_map = {}
        in_details = {}
        out_map = {}
        out_details = {}
        name_converter = {}

        # read in all the variable mapping information
        col_number = []
        in_name = []
        operation = []
        out_name = []
        in_index = []
        out_index = []
        units = []
        description = []
        num_rec = 0
        in_column = 0
        out_column = 0
        with open(map_file, newline='') as _in:
#            logger.info('\tRead Variable Mapping File')
            logger.info('\tMap file: {m}'.format(m=map_file))
            reader = csv.reader(_in)
            # Possible improvement: skip over n "headlines" instead of just 1
            firstline = True
            for line in reader:
#                logger.info('{}'.format(line))
                if (firstline):
                    # skips first line
                    firstline = False
                    continue
                # Divide each line into entries
                num_rec = num_rec + 1
                col_number.append(line[0])
                in_name.append(line[1])
                operation.append(line[2])
                out_name.append(line[3])
                in_index.append(line[4])
                out_index.append(line[5])
                units.append(line[6])
                description.append(line[7])

# Create mapping files
        headline = []
        for i in range(num_rec):
#            logger.info('record: {}'.format(i))
            if (in_index[i] != '0'):
                in_column = in_column + 1
                    # If the input exists, store data about it
                in_map.update({in_name[i]: int(in_index[i]) - 1})
                in_details.update({in_name[i]: [operation[i], description[i]]})
            if (out_index[i] != '0'):
                out_column = out_column + 1
                    # If the output exists, store data about it
                out_map.update({out_name[i]: int(out_index[i]) - 1})
                out_details.update({out_name[i]: [units[i], description[i]]})
                name_converter[in_name[i]] = out_name[i]
                text_string = ('{} ({})'.format(out_name[i], units[i]))
                headline.append(text_string)
#        logger.info('in col: {} out col: {}'.format(in_column, out_column))

# define output data
        output_data = []

# append header of output data
        output_data.append(headline)

# Figure out which columns need to be copied
        num_copies = 0
        copy_from = []
        copy_to = []
        for i in range(num_rec):
            if (operation[i] == 'copy'):
                num_copies = num_copies + 1
                copy_from.append(int(in_index[i]) - 1)
                copy_to.append(int(out_index[i]) - 1)
#                logger.info('from: {} to: {}'.format(copy_from[i],copy_to[i]))
        logger.info('\tnum_copies: {} '.format(num_copies))

# copy input data to output data
        firstline = True
        for line in input_data:
            # Copy selected columns
            if (firstline):
                firstline = False
                continue
            outputline = [''] * out_column
            for i in range(num_copies):
                outputline[copy_to[i]] = line[copy_from[i]]
            output_data.append(outputline)

# return to DIT
        # Returns:
        #   - the output data.
        #   - a dictionary of column name -> index for the input csv
        #   - a dictionary of column name -> index for the output csv
        #   - a dictionary of data column name -> destination column name
        result = [output_data,
                  in_map,
                  out_map,
                  {v: k for k, v in name_converter.items()}]

        return result