Example #1
0
def iter_column(idx, f):
    """

    :param idx: index of column
    :param f: gzip file object of CSV dataset
    :return: col_type, null_values
             where col_type is inferred type from typeinference.py
             and null_values is whether null values were found and normalized.
    """
    f.seek(0)
    reader = UnicodeCSVReader(f)

    # Discard the header
    reader.next()

    col = []
    for row in reader:
        if row:
            try:
                col.append(row[idx])
            except IndexError:
                # Bad data. Maybe we can fill with nulls?
                pass
    col_type, null_values = normalize_column_type(col)
    return col_type, null_values
Example #2
0
def iter_column(idx, f):
    """

    :param idx: index of column
    :param f: gzip file object of CSV dataset
    :return: col_type, null_values
             where col_type is inferred type from typeinference.py
             and null_values is whether null values were found and normalized.
    """
    f.seek(0)
    reader = UnicodeCSVReader(f)

    # Discard the header
    reader.next()

    col = []
    for row in reader:
        if row:
            try:
                col.append(row[idx])
            except IndexError:
                # Bad data. Maybe we can fill with nulls?
                pass
    col_type, null_values = normalize_column_type(col)
    return col_type, null_values
Example #3
0
def iter_column(idx, f):
    f.seek(0)
    reader = UnicodeCSVReader(f)
    header = reader.next()
    col = []
    for row in reader:
        if row:
            try:
                col.append(row[idx])
            except IndexError:
                # Bad data. Maybe we can fill with nulls?
                pass
    col_type, null_values = normalize_column_type(col)
    return col_type, null_values
Example #4
0
def iter_column(idx, f):
    f.seek(0)
    reader = UnicodeCSVReader(f)
    header = reader.next()
    col = []
    for row in reader:
        if row:
            try:
                col.append(row[idx])
            except IndexError:
                # Bad data. Maybe we can fill with nulls?
                pass
    col_type, null_values = normalize_column_type(col)
    return col_type, null_values