Example #1
0
def main(input_path_1, input_path_2, match_input_path, output_path_1,
         output_path_2):
    """Main entry point."""
    dialect_1 = get_dialect(input_path_1)
    dialect_2 = get_dialect(input_path_2)

    data1, rows1, columns1, corner1 = read_csv(input_path_1, dialect_1)
    data2, rows2, columns2, corner2 = read_csv(input_path_2, dialect_2)

    try:
        match_results = json.loads(match_input_path)
    except Exception:
        with open(match_input_path) as f:
            match_results = json.load(f)

    mode1, mode2 = match_results.get('mode', 'cc').lower()

    if mode1 == 'r':
        data1 = data1.transpose()
        rows1, columns1 = columns1, rows1

    if mode2 == 'r':
        data2 = data2.transpose()
        rows2, columns2 = columns2, rows2

    index1, index2 = zip(*(tuple(p['index'] for p in assign['pair'][:2])
                           for assign in match_results.get('assignments', [])))

    index1 = list(index1)
    index2 = list(index2)

    new_cols1 = columns1[index1]
    new_data1 = data1[:, index1]

    new_cols2 = columns2[index2]
    new_data2 = data2[:, index2]

    write_csv(new_data1, rows1, new_cols1, corner1, output_path_1, csv.excel)
    write_csv(new_data2, rows2, new_cols2, corner2, output_path_2, csv.excel)
Example #2
0
"""Remove columns with missing/invalid data."""

import csv
import math

# include<lib/csv.py>

# ifdef<LINTING>
from included_files import get_dialect, read_csv, write_csv
from girder_worker_environment import input_path_1, input_path_2
# endif

dialect_1 = get_dialect(input_path_1)
dialect_2 = get_dialect(input_path_2)

data1, rows1, columns1, corner = read_csv(input_path_1, dialect_1)
data2, rows2, columns2, corner = read_csv(input_path_2, dialect_2)

invalid_indexes = set()

n_columns1 = len(columns1)
n_columns2 = len(columns2)

for column_index in range(max(n_columns1, n_columns2)):
    is_invalid = False

    if column_index < n_columns1:
        for row_index in range(len(rows1)):
            is_invalid = (math.isinf(data1[row_index][column_index])
                          or math.isnan(data1[row_index][column_index]))
Example #3
0
"""Extract the columns from one or two data sets."""

# include<lib/csv.py>

import json

# ifdef<LINTING>
from included_files import get_dialect, ColumnExtractor, RowExtractor
from girder_worker_environment import \
    extract_columns, input_path_1, input_path_2, single_mode
# endif

feature_extractor = ColumnExtractor if extract_columns else RowExtractor

if single_mode:
    dialect1 = get_dialect(input_path_1)

    list1 = None
    with open(input_path_1, 'rU') as input1:
        list1 = list(feature_extractor(input1, dialect1))

    values = ({'id': a, 'description': a} for a in set(list1))
    extract_result = list(sorted(values, key=(lambda x: x['id'])))

else:
    dialect1 = get_dialect(input_path_1)
    dialect2 = get_dialect(input_path_2)

    list1 = None
    with open(input_path_1, 'rU') as input1:
        list1 = list(feature_extractor(input1, dialect1))
Example #4
0
"""Remove headers from columns & rows."""

import csv
from six import PY2
from tempfile import NamedTemporaryFile

# include<lib/csv.py>

# ifdef<LINTING>
from included_files import get_dialect
from girder_worker_environment import input_path
# endif

dialect = get_dialect(input_path)

with open(input_path, 'rU') as input_f:
    reader = csv.reader(input_f, dialect)

    output_f = None
    if PY2:
        output_f = NamedTemporaryFile(mode='wb', delete=False)
    else:
        output_f = NamedTemporaryFile(mode='w', newline='', delete=False)

    output_path = output_f.name

    with output_f:
        writer = csv.writer(output_f, dialect=csv.excel)

        # Unconditionally skip the first row and
        # the first value in every subsequent row.