def doit(input_file, output_file, columns, delimiter, wants_negative_one, has_header): # Read the entire input, accounting for the header, if any. if has_header: header= next(input_file) data= tuple(line.rstrip().split(delimiter) for line in input_file) if not data: return # Set the lower bound and width, if necessary. lower= -1.0 if wants_negative_one else 0.0 width= 1.0 - lower # Determine desired column indices. indices= csr.parse(columns, true) if columns else tuple(range(len(data[0]))) # Transpose the data into rows. data= list(zip(*data)) # Normalize the desired columns, now in rows. for i in indices: data[i]= list(map(float, data[i])) normalize(data[i], lower, width) # Transpose the data back into columns. data= list(zip(*data)) if has_header: print(header, end="", file=output_file) for row in data: print(*row, sep=delimiter, file=output_file)
def save(settings_file, input_file, output_file, columns_to_ignore, using_negative_one, using_standard_deviation): import csr columns_to_ignore= csr.parse(columns_to_ignore, as_index=True) min_max_dict= MinMaxDict(columns_to_ignore, using_negative_one, using_standard_deviation) input_file, input_handle= make_seekable(input_file) for k, v in read_pairs(input_file): min_max_dict[k].add(v) for k in min_max_dict: print(k, min_max_dict[k], sep=':', file=settings_file) input_file.seek(0) normalize(min_max_dict, input_file, output_file)