def main(input_path_1, input_path_2, match_input_path, output_path_1, output_path_2): """Main entry point.""" dialect_1 = get_dialect(input_path_1) dialect_2 = get_dialect(input_path_2) data1, rows1, columns1, corner1 = read_csv(input_path_1, dialect_1) data2, rows2, columns2, corner2 = read_csv(input_path_2, dialect_2) try: match_results = json.loads(match_input_path) except Exception: with open(match_input_path) as f: match_results = json.load(f) mode1, mode2 = match_results.get('mode', 'cc').lower() if mode1 == 'r': data1 = data1.transpose() rows1, columns1 = columns1, rows1 if mode2 == 'r': data2 = data2.transpose() rows2, columns2 = columns2, rows2 index1, index2 = zip(*(tuple(p['index'] for p in assign['pair'][:2]) for assign in match_results.get('assignments', []))) index1 = list(index1) index2 = list(index2) new_cols1 = columns1[index1] new_data1 = data1[:, index1] new_cols2 = columns2[index2] new_data2 = data2[:, index2] write_csv(new_data1, rows1, new_cols1, corner1, output_path_1, csv.excel) write_csv(new_data2, rows2, new_cols2, corner2, output_path_2, csv.excel)
"""Remove columns with missing/invalid data.""" import csv import math # include<lib/csv.py> # ifdef<LINTING> from included_files import get_dialect, read_csv, write_csv from girder_worker_environment import input_path_1, input_path_2 # endif dialect_1 = get_dialect(input_path_1) dialect_2 = get_dialect(input_path_2) data1, rows1, columns1, corner = read_csv(input_path_1, dialect_1) data2, rows2, columns2, corner = read_csv(input_path_2, dialect_2) invalid_indexes = set() n_columns1 = len(columns1) n_columns2 = len(columns2) for column_index in range(max(n_columns1, n_columns2)): is_invalid = False if column_index < n_columns1: for row_index in range(len(rows1)): is_invalid = (math.isinf(data1[row_index][column_index]) or math.isnan(data1[row_index][column_index]))
"""Extract the columns from one or two data sets.""" # include<lib/csv.py> import json # ifdef<LINTING> from included_files import get_dialect, ColumnExtractor, RowExtractor from girder_worker_environment import \ extract_columns, input_path_1, input_path_2, single_mode # endif feature_extractor = ColumnExtractor if extract_columns else RowExtractor if single_mode: dialect1 = get_dialect(input_path_1) list1 = None with open(input_path_1, 'rU') as input1: list1 = list(feature_extractor(input1, dialect1)) values = ({'id': a, 'description': a} for a in set(list1)) extract_result = list(sorted(values, key=(lambda x: x['id']))) else: dialect1 = get_dialect(input_path_1) dialect2 = get_dialect(input_path_2) list1 = None with open(input_path_1, 'rU') as input1: list1 = list(feature_extractor(input1, dialect1))
"""Remove headers from columns & rows.""" import csv from six import PY2 from tempfile import NamedTemporaryFile # include<lib/csv.py> # ifdef<LINTING> from included_files import get_dialect from girder_worker_environment import input_path # endif dialect = get_dialect(input_path) with open(input_path, 'rU') as input_f: reader = csv.reader(input_f, dialect) output_f = None if PY2: output_f = NamedTemporaryFile(mode='wb', delete=False) else: output_f = NamedTemporaryFile(mode='w', newline='', delete=False) output_path = output_f.name with output_f: writer = csv.writer(output_f, dialect=csv.excel) # Unconditionally skip the first row and # the first value in every subsequent row.