Ejemplo n.º 1
0
def doit(input_file, output_file, columns, delimiter, wants_negative_one, has_header):
	# Read the entire input, accounting for the header, if any.
	if has_header:
		header= next(input_file)
	data= tuple(line.rstrip().split(delimiter) for line in input_file)
	if not data:
		return

	# Set the lower bound and width, if necessary.
	lower= -1.0 if wants_negative_one else 0.0
	width= 1.0 - lower

	# Determine desired column indices.
	indices= csr.parse(columns, true) if columns else tuple(range(len(data[0])))

	# Transpose the data into rows.
	data= list(zip(*data))

	# Normalize the desired columns, now in rows.
	for i in indices:
		data[i]= list(map(float, data[i]))
		normalize(data[i], lower, width)

	# Transpose the data back into columns.
	data= list(zip(*data))

	if has_header:
		print(header, end="", file=output_file)
	for row in data:
		print(*row, sep=delimiter, file=output_file)
Ejemplo n.º 2
0
def save(settings_file, input_file, output_file, columns_to_ignore, using_negative_one, using_standard_deviation):
  import csr
  columns_to_ignore= csr.parse(columns_to_ignore, as_index=True)
  min_max_dict= MinMaxDict(columns_to_ignore, using_negative_one, using_standard_deviation)
  input_file, input_handle= make_seekable(input_file)
  for k, v in read_pairs(input_file):
    min_max_dict[k].add(v)
  for k in min_max_dict:
    print(k, min_max_dict[k], sep=':', file=settings_file)
  input_file.seek(0)
  normalize(min_max_dict, input_file, output_file)