Beispiel #1
0
    def main(self):
        if self.args.names_only:
            self.print_column_names()
            return

        rows = agate.reader(self.input_file, **self.reader_kwargs)

        if self.args.no_header_row:
            row = next(rows)

            column_names = make_default_headers(len(row))

            # Put the row back on top
            rows = itertools.chain([row], rows)
        else:
            column_names = next(rows)

        column_ids = parse_column_identifiers(self.args.columns, column_names, self.args.zero_based, self.args.not_columns)
        output = agate.writer(self.output_file, **self.writer_kwargs)

        output.writerow([column_names[c] for c in column_ids])

        for row in rows:
            out_row = [row[c] if c < len(row) else None for c in column_ids]

            if self.args.delete_empty:
                if ''.join(out_row) == '':
                    continue

            output.writerow(out_row)
Beispiel #2
0
    def main(self):
        self.input_files = []

        for path in self.args.input_paths:
            self.input_files.append(self._open_input_file(path))

        if len(self.input_files) < 2:
            self.argparser.error('You must specify at least two files to stack.')

        if self.args.group_by_filenames:
            groups = [os.path.split(f.name)[1] for f in self.input_files] 
        elif self.args.groups:
            groups = self.args.groups.split(',')

            if len(groups) != len(self.input_files):
                self.argparser.error('The number of grouping values must be equal to the number of CSV files being stacked.')
        else:
            groups = None
                
        group_name = self.args.group_name if self.args.group_name else 'group'

        output = CSVKitWriter(self.output_file, **self.writer_kwargs)

        for i, f in enumerate(self.input_files):
            rows = CSVKitReader(f, **self.reader_kwargs)
            #headers = next(rows, [])

            # If we have header rows, use them
            if not self.args.no_header_row:
                headers = next(rows, [])

                if i == 0:
                    if groups:
                        headers.insert(0, group_name)

                    output.writerow(headers)
            # If we don't generate simple column names based on first row
            else:
                row = next(rows, [])

                headers = make_default_headers(len(row))

                if i == 0:
                    if groups:
                        headers.insert(0, group_name)

                    output.writerow(headers)

                if groups:
                    row.insert(0, groups[i])

                output.writerow(row)

            for row in rows:
                if groups:
                    row.insert(0, groups[i])

                output.writerow(row)

            f.close()
Beispiel #3
0
    def main(self):
        rows = CSVKitReader(self.input_file, **self.reader_kwargs)

        if self.args.no_header_row:
            row = next(rows)

            column_names = make_default_headers(len(row))

            # Put the row back on top
            rows = itertools.chain([row], rows)
        else:
            column_names = next(rows)

        column_ids = parse_column_identifiers(self.args.columns, column_names,
                                              self.args.zero_based,
                                              self.args.not_columns)

        output = CSVKitWriter(self.output_file, **self.writer_kwargs)

        output.writerow([column_names[c] for c in column_ids])

        drop_white = lambda i: re.sub('\s+$', '', re.sub('^\s+', '', i))
        for row in rows:
            out_row = [
                drop_white(row[c]) if c < len(row) else None
                for c in column_ids
            ]
            output.writerow(out_row)
Beispiel #4
0
    def main(self):
        rows = CSVKitReader(self.args.file, **self.reader_kwargs)

        if self.args.no_header_row:
            row = rows.next()

            column_names = make_default_headers(len(row))

            # Put the row back on top
            rows = itertools.chain([row], rows)
        else:
            column_names = rows.next()

        column_names = self.args.columns.split(',')

        part_count = 0
        output = CSVKitWriter( open(self.args.file._lazy_args[0]+".part.%d" % part_count, 'w'), **self.writer_kwargs)
        output.writerow(column_names)

        count = 0
        for row in rows:
            if (self.args.lines > 0) and (count == self.args.lines):
                part_count += 1
                count = 0
                # couldn't find a better way to close the file
                del output
                output = CSVKitWriter( open(self.args.file._lazy_args[0]+".part.%d" % part_count, 'w'), **self.writer_kwargs)
                output.writerow(column_names)

            output.writerow(row)
            count += 1
Beispiel #5
0
    def main(self):
        rows = CSVKitReader(self.input_file, **self.reader_kwargs)
        if self.args.no_header_row:
            row = next(rows)
            column_names = make_default_headers(len(row))
            # Put the row back on top
            rows = itertools.chain([row], rows)
        else:
            column_names = next(rows)

        column_ids = parse_column_identifiers(None, column_names,
                                              self.args.zero_based)
        uniq_column_id = parse_column_identifiers(self.args.uniq_column,
                                                  column_names,
                                                  self.args.zero_based)
        output = CSVKitWriter(self.output_file, **self.writer_kwargs)
        output.writerow([column_names[c] for c in column_ids])
        d = set()  # cache for used-rows
        # use tuple as keys for cache
        cache_key = lambda row: tuple([row[i] for i in uniq_column_id])
        for row in rows:
            if cache_key(row) in d: continue
            d.update([cache_key(row)])
            out_row = [row[c] if c < len(row) else None for c in column_ids]
            output.writerow(out_row)
Beispiel #6
0
    def main(self):
        rows = CSVKitReader(self.input_file, **self.reader_kwargs)
        if self.args.no_header_row:
            row = next(rows)
            column_names = make_default_headers(len(row))
            # Put the row back on top
            rows = itertools.chain([row], rows)
        else:
            column_names = next(rows)

        column_ids = parse_column_identifiers(None, column_names, self.args.zero_based)
        output = CSVKitWriter(self.output_file, **self.writer_kwargs)
        # write header
        output.writerow([column_names[c] for c in column_ids])
        def float_or_else(x):
            try: return float(x)
            except ValueError: return x
        if self.args.filter_expr:
            for row in rows:
                d = {i:float_or_else(j) for i,j in zip(column_names,row)} 
                if eval(self.args.filter_expr,d): 
                    out_row = [row[c] if c < len(row) else None for c in column_ids]
                    output.writerow(out_row)
        elif self.args.not_filter_expr:
            for row in rows:
                d = {i:float_or_else(j) for i,j in zip(column_names,row)} 
                if not eval(self.args.not_filter_expr,d): 
                    out_row = [row[c] if c < len(row) else None for c in column_ids]
                    output.writerow(out_row)
Beispiel #7
0
    def main(self):
        if self.args.names_only:
            self.print_column_names()
            return

        rows = CSVKitReader(self.input_file, **self.reader_kwargs)

        if self.args.no_header_row:
            row = next(rows)

            column_names = make_default_headers(len(row))

            # Put the row back on top
            rows = itertools.chain([row], rows)
        else:
            column_names = next(rows)

        column_ids = parse_column_identifiers(self.args.columns, column_names,
                                              self.args.zero_based,
                                              self.args.not_columns)
        output = CSVKitWriter(self.output_file, **self.writer_kwargs)

        output.writerow([column_names[c] for c in column_ids])

        for row in rows:
            out_row = [row[c] if c < len(row) else None for c in column_ids]

            if self.args.delete_empty:
                if ''.join(out_row) == '':
                    continue

            output.writerow(out_row)
Beispiel #8
0
    def main(self):
        rows = CSVKitReader(self.input_file, **self.reader_kwargs)

        if self.args.no_header_row:
            row = next(rows)

            column_names = make_default_headers(len(row))

            # Put the row back on top
            rows = itertools.chain([row], rows)
        else:
            column_names = next(rows)

        all_column_ids = parse_column_identifiers(None,column_names, self.args.zero_based, self.args.not_columns)
        column_ids = parse_column_identifiers(self.args.columns, column_names, self.args.zero_based, self.args.not_columns)

        output = CSVKitWriter(self.output_file, **self.writer_kwargs)

        output.writerow([column_names[c] for c in all_column_ids])
        d = {} # namespace dict for map_expr
        exec "def f(x): return %s"%(self.args.map_expr) in d

        for row in rows:
            out_row = []
            for c in all_column_ids:
                if c in column_ids:
                    out_row.append(d['f'](row[c]) if c <len(row) else None) 
                else:
                    out_row.append(row[c] if c <len(row) else None) 
            output.writerow(out_row)
Beispiel #9
0
    def main(self):
        rows = CSVKitReader(self.args.file, **self.reader_kwargs)

        # Make a default header row if none exists
        if self.args.no_header_row:
            row = rows.next()

            column_names = make_default_headers(len(row))

            # Put the row back on top
            rows = itertools.chain([row], rows)
        else:
            column_names = rows.next()

        column_names = list(column_names)

        # prepend 'line_number' column with line numbers if --linenumbers option
        if self.args.line_numbers:
            column_names.insert(0, 'line_number')
            rows = [
                list(itertools.chain([str(i + 1)], row))
                for i, row in enumerate(rows)
            ]

        # Convert to normal list of rows
        rows = list(rows)

        # Insert the column names at the top
        rows.insert(0, column_names)

        widths = []

        for row in rows:
            for i, v in enumerate(row):
                try:
                    if len(v) > widths[i]:
                        widths[i] = len(v)
                except IndexError:
                    widths.append(len(v))

        # Dashes span each width with '+' character at intersection of
        # horizontal and vertical dividers.
        divider = '|--' + '-+-'.join('-' * w for w in widths) + '--|'

        self.output_file.write('%s\n' % divider)

        for i, row in enumerate(rows):
            output = []

            for j, d in enumerate(row):
                if d is None:
                    d = ''
                output.append(' %s ' % unicode(d).ljust(widths[j]))

            self.output_file.write(
                ('| %s |\n' % ('|'.join(output))).encode('utf-8'))

            if (i == 0 or i == len(rows) - 1):
                self.output_file.write('%s\n' % divider)
Beispiel #10
0
    def main(self):
        self.input_files = []

        for path in self.args.input_paths:
            self.input_files.append(self._open_input_file(path))

        if len(self.input_files) < 2:
            self.argparser.error('You must specify at least two files to stack.')

        if self.args.group_by_filenames:
            groups = [os.path.split(f.name)[1] for f in self.input_files] 
        elif self.args.groups:
            groups = self.args.groups.split(',')

            if len(groups) != len(self.input_files):
                self.argparser.error('The number of grouping values must be equal to the number of CSV files being stacked.')
        else:
            groups = None
                
        group_name = self.args.group_name if self.args.group_name else 'group'

        output = CSVKitWriter(self.output_file, **self.writer_kwargs)

        for i, f in enumerate(self.input_files):
            rows = CSVKitReader(f, **self.reader_kwargs)

            # If we have header rows, use them
            if not self.args.no_header_row:
                headers = next(rows, [])

                if i == 0:
                    if groups:
                        headers.insert(0, group_name)

                    output.writerow(headers)
            # If we don't generate simple column names based on first row
            else:
                row = next(rows, [])

                headers = make_default_headers(len(row))

                if i == 0:
                    if groups:
                        headers.insert(0, group_name)

                    output.writerow(headers)

                if groups:
                    row.insert(0, groups[i])

                output.writerow(row)

            for row in rows:
                if groups:
                    row.insert(0, groups[i])

                output.writerow(row)

            f.close()
Beispiel #11
0
    def from_csv(cls, f, name='from_csv_table', snifflimit=None, column_ids=None, blanks_as_nulls=True, zero_based=False, infer_types=True, no_header_row=False, **kwargs):
        """
        Creates a new Table from a file-like object containing CSV data.

        Note: the column_ids argument will cause only those columns with a matching identifier
        to be parsed, type inferred, etc. However, their order/index property will reflect the
        original data (e.g. column 8 will still be "order" 7, even if it's the third column
        in the resulting Table.
        """
        # This bit of nonsense is to deal with "files" from stdin,
        # which are not seekable and thus must be buffered
        contents = f.read()

        # snifflimit == 0 means do not sniff
        if snifflimit is None:
            kwargs['dialect'] = sniffer.sniff_dialect(contents)
        elif snifflimit > 0:
            kwargs['dialect'] = sniffer.sniff_dialect(contents[:snifflimit])

        f = six.StringIO(contents)
        rows = CSVKitReader(f, **kwargs)

        if no_header_row:
            # Peek at a row to infer column names from
            row = next(rows) 

            headers = make_default_headers(len(row))
            column_ids = parse_column_identifiers(column_ids, headers, zero_based)
            headers = [headers[c] for c in column_ids]
            data_columns = [[] for c in headers]

            # Put row back on top
            rows = itertools.chain([row], rows)
        else:
            headers = next(rows)
            
            if column_ids:
                column_ids = parse_column_identifiers(column_ids, headers, zero_based)
                headers = [headers[c] for c in column_ids]
            else:
                column_ids = range(len(headers))
        
            data_columns = [[] for c in headers]

        for i, row in enumerate(rows):
            for j, d in enumerate(row):
                try:
                    data_columns[j].append(row[column_ids[j]].strip())
                except IndexError:
                    # Non-rectangular data is truncated
                    break

        columns = []

        for i, c in enumerate(data_columns):
            columns.append(Column(column_ids[i], headers[i], c, blanks_as_nulls=blanks_as_nulls, infer_types=infer_types))

        return Table(columns, name=name)
Beispiel #12
0
    def main(self):
        rows = CSVKitReader(self.input_file, **self.reader_kwargs)

        # Make a default header row if none exists
        if self.args.no_header_row:
            row = next(rows)

            column_names = make_default_headers(len(row))

            # Put the row back on top
            rows = itertools.chain([row], rows)
        else:
            column_names = next(rows)

        column_names = list(column_names)

        # prepend 'line_number' column with line numbers if --linenumbers option
        if self.args.line_numbers:
            column_names.insert(0, 'line_number')
            rows = [list(itertools.chain([str(i + 1)], row)) for i, row in enumerate(rows)]


        # Convert to normal list of rows
        rows = list(rows)

        # Insert the column names at the top
        rows.insert(0, column_names)

        widths = []

        for row in rows:
            for i, v in enumerate(row):
                try:
                    if len(v) > widths[i]:
                        widths[i] = len(v)
                except IndexError:
                    widths.append(len(v))

        # Dashes span each width with '+' character at intersection of
        # horizontal and vertical dividers.
        divider = '|--' + '-+-'.join('-'* w for w in widths) + '--|'

        write = lambda t: self.output_file.write(t.encode('utf-8'))

        write('%s\n' % divider)

        for i, row in enumerate(rows):
            output = []

            for j, d in enumerate(row):
                if d is None:
                    d = ''
                output.append(' %s ' % six.text_type(d).ljust(widths[j]))

            write('| %s |\n' % ('|'.join(output)))

            if (i == 0 or i == len(rows) - 1):
                write('%s\n' % divider)
Beispiel #13
0
    def main(self):
        if self.args.names_only:
            self.print_column_names()
            return

        if not self.args.columns:
            self.argparser.error(
                'You must specify at least one column to search using the -c option.'
            )

        if self.args.regex is None and self.args.pattern is None and self.args.matchfile is None:
            self.argparser.error(
                'One of -r, -m or -f must be specified, unless using the -n option.'
            )

        rows = agate.reader(self.input_file, **self.reader_kwargs)

        if self.args.no_header_row:
            row = next(rows)

            column_names = make_default_headers(len(row))

            # Put the row back on top
            rows = itertools.chain([row], rows)
        else:
            column_names = next(rows)

        column_ids = parse_column_identifiers(self.args.columns, column_names,
                                              self.args.zero_based)

        if self.args.regex:
            pattern = re.compile(self.args.regex)
        elif self.args.matchfile:
            lines = set(line.rstrip() for line in self.args.matchfile)
            pattern = lambda x: x in lines
        else:
            pattern = self.args.pattern

        patterns = dict((c, pattern) for c in column_ids)

        output = agate.writer(self.output_file, **self.writer_kwargs)
        output.writerow(column_names)

        filter_reader = FilteringCSVReader(rows,
                                           header=False,
                                           patterns=patterns,
                                           inverse=self.args.inverse)

        for row in filter_reader:
            output.writerow(row)
Beispiel #14
0
    def main(self):
        if self.args.names_only:
            self.print_column_names()
            return

        if not self.args.columns:
            self.argparser.error('You must specify at least one column to search using the -c option.')

        if self.args.regex is None and self.args.pattern is None and self.args.matchfile is None:
            self.argparser.error('One of -r, -m or -f must be specified, unless using the -n option.')

        rows = agate.reader(self.input_file, **self.reader_kwargs)

        if self.args.no_header_row:
            row = next(rows)

            column_names = make_default_headers(len(row))

            # Put the row back on top
            rows = itertools.chain([row], rows)
        else:
            column_names = next(rows)

        column_ids = parse_column_identifiers(self.args.columns, column_names, self.args.zero_based)

        if self.args.regex:
            pattern = re.compile(self.args.regex)
        elif self.args.matchfile:
            lines = set(line.rstrip() for line in self.args.matchfile)
            pattern = lambda x: x in lines
        else:
            pattern = self.args.pattern

        patterns = dict((c, pattern) for c in column_ids)

        output = agate.writer(self.output_file, **self.writer_kwargs)
        output.writerow(column_names)

        filter_reader = FilteringCSVReader(rows, header=False, patterns=patterns, inverse=self.args.inverse)

        for row in filter_reader:
            output.writerow(row)
Beispiel #15
0
    def main(self):
        rows = CSVKitReader(self.input_file, **self.reader_kwargs)
        if self.args.no_header_row:
            row = next(rows)
            column_names = make_default_headers(len(row))
            # Put the row back on top
            rows = itertools.chain([row], rows)
        else:
            column_names = next(rows)

        column_ids = parse_column_identifiers(None,column_names, self.args.zero_based)
        uniq_column_id = parse_column_identifiers(self.args.uniq_column, column_names, self.args.zero_based)
        output = CSVKitWriter(self.output_file, **self.writer_kwargs)
        output.writerow([column_names[c] for c in column_ids])
        d = set() # cache for used-rows
        # use tuple as keys for cache
        cache_key = lambda row: tuple([row[i] for i in uniq_column_id])
        for row in rows:
            if cache_key(row) in d: continue
            d.update([ cache_key(row) ])
            out_row = [row[c] if c < len(row) else None for c in column_ids]
            output.writerow(out_row)
Beispiel #16
0
    def main(self):
        if self.args.names_only:
            self.print_column_names()
            return

        rows = CSVKitReader(self.input_file, **self.reader_kwargs)

        if self.args.no_header_row:
            row = next(rows)

            column_names = make_default_headers(len(row))

            # Put the row back on top
            rows = itertools.chain([row], rows)
        else:
            column_names = next(rows)

        column_ids = parse_column_identifiers(self.args.columns, column_names, self.args.zero_based,
                                              self.args.not_columns)
        columns_type = self.parse_column_types(self.args.columns_types, column_ids)
        column_names = map(lambda x: column_names[x],column_ids)


        output = self.output_file

        for row in rows:
            out_row = [row[c] if c < len(row) else None for c in column_ids]

            if ''.join(out_row) == '':
                continue

            insert_stat = "INSERT INTO " + self.args.tablename + "("
            insert_stat += ",".join(column_names)
            insert_stat += ") VALUES ("
            insert_stat += ",".join(
                map(lambda colid: columns_type[colid](row[colid]) ,
                    column_ids))
            insert_stat += ");"
            output.write('%s\n' % insert_stat)
Beispiel #17
0
    def main(self):
        rows = CSVKitReader(self.input_file, **self.reader_kwargs)
        if self.args.no_header_row:
            row = next(rows)
            column_names = make_default_headers(len(row))
            # Put the row back on top
            rows = itertools.chain([row], rows)
        else:
            column_names = next(rows)

        column_ids = parse_column_identifiers(None, column_names,
                                              self.args.zero_based)
        output = CSVKitWriter(self.output_file, **self.writer_kwargs)
        # write header
        output.writerow([column_names[c] for c in column_ids])

        def float_or_else(x):
            try:
                return float(x)
            except ValueError:
                return x

        if self.args.filter_expr:
            for row in rows:
                d = {i: float_or_else(j) for i, j in zip(column_names, row)}
                if eval(self.args.filter_expr, d):
                    out_row = [
                        row[c] if c < len(row) else None for c in column_ids
                    ]
                    output.writerow(out_row)
        elif self.args.not_filter_expr:
            for row in rows:
                d = {i: float_or_else(j) for i, j in zip(column_names, row)}
                if not eval(self.args.not_filter_expr, d):
                    out_row = [
                        row[c] if c < len(row) else None for c in column_ids
                    ]
                    output.writerow(out_row)
Beispiel #18
0
    def main(self):
        if self.args.names_only:
            self.print_column_names()
            return

        rows = CSVKitReader(self.input_file, **self.reader_kwargs)

        if self.args.no_header_row:
            row = next(rows)

            column_names = make_default_headers(len(row))

            # Put the row back on top
            rows = itertools.chain([row], rows)
        else:
            column_names = next(rows)

        column_ids = parse_column_identifiers(self.args.columns, column_names, self.args.zero_based,
                                              self.args.not_columns)

        dn_att_id = parse_column_identifiers(self.args.uid, column_names, self.args.zero_based,
                                              self.args.not_columns)

        output = LDIFWriter(self.output_file)

        #output.writerow([column_names[c] for c in column_ids])

        for row in rows:
            out_row = [row[c] if c < len(row) else None for c in column_ids]

            if self.args.delete_empty:
                if ''.join(out_row) == '':
                    continue
            zipped_row = zip(column_names, map(lambda x: [x], out_row))
            zipped_row = list(v for v in zipped_row if  v[1] != None and v[1][0] != None and v[1][0] != '')

            dn = self.args.uid + "=" + out_row[dn_att_id[0]] + "," + self.args.basedn
            output.unparse(dn, zipped_row)
Beispiel #19
0
    def main(self):
        rows = CSVKitReader(self.input_file, **self.reader_kwargs)

        if self.args.no_header_row:
            row = next(rows)

            column_names = make_default_headers(len(row))

            # Put the row back on top
            rows = itertools.chain([row], rows)
        else:
            column_names = next(rows)

        column_ids = parse_column_identifiers(self.args.columns, column_names, self.args.zero_based, self.args.not_columns)

        output = CSVKitWriter(self.output_file, **self.writer_kwargs)

        output.writerow([column_names[c] for c in column_ids])

        drop_white = lambda i:re.sub('\s+$','',re.sub('^\s+','',i))
        for row in rows:
            out_row = [drop_white(row[c]) if c < len(row) else None for c in column_ids]
            output.writerow(out_row)
Beispiel #20
0
    def main(self):
        
        if self.args.names_only:
            self.print_column_names()
            return

        rows = CSVKitReader(self.input_file, **self.reader_kwargs)

        # Make Headers 
        if self.args.no_header_row:
            row = next(rows)

            column_names = make_default_headers(len(row))

            # Put the row back on top
            rows = itertools.chain([row], rows)
        else:
            column_names = next(rows)

        import sys

        # Project Column Names
        target_names = self.args.renames.split(',')
        source_column_ids = parse_column_identifiers(self.args.sources, column_names, zero_based=self.args.zero_based)

        assert len(target_names) == len(source_column_ids) and "Input sources and rename columns must be the same length!"
        
        output = CSVKitWriter(self.output_file, **self.writer_kwargs)
        
        # print header from target
        output.writerow(target_names)

        # Rewrite Rows
        for row in rows:
            out_row = [row[c] if c < len(row) else None for c in source_column_ids]

            output.writerow(out_row)
Beispiel #21
0
    def from_csv(cls, f, name='from_csv_table', snifflimit=None, column_ids=None, blanks_as_nulls=True, zero_based=False, infer_types=True, no_header_row=False, **kwargs):
        """
        Creates a new Table from a file-like object containing CSV data.

        Note: the column_ids argument will cause only those columns with a matching identifier
        to be parsed, type inferred, etc. However, their order/index property will reflect the
        original data (e.g. column 8 will still be "order" 7, even if it's the third column
        in the resulting Table.
        """
        # This bit of nonsense is to deal with "files" from stdin,
        # which are not seekable and thus must be buffered
        contents = f.read()

        # snifflimit == 0 means do not sniff
        if snifflimit is None:
            kwargs['dialect'] = sniffer.sniff_dialect(contents)
        elif snifflimit > 0:
            kwargs['dialect'] = sniffer.sniff_dialect(contents[:snifflimit])

        f = six.StringIO(contents)
        rows = agate.reader(f, **kwargs)

        try:
            if no_header_row:
                # Peek at a row to infer column names from, and put it back on top
                row = next(rows)
                rows = itertools.chain([row], rows)
                headers = make_default_headers(len(row))
            else:
                headers = next(rows)
        except StopIteration:
            # The file is `/dev/null`.
            headers = []
            pass

        if no_header_row or column_ids:
            column_ids = parse_column_identifiers(column_ids, headers, zero_based)
            headers = [headers[c] for c in column_ids]
        else:
            column_ids = range(len(headers))

        data_columns = [[] for c in headers]
        width = len(data_columns)

        for i, row in enumerate(rows):
            j = 0

            for j, d in enumerate(row):
                try:
                    data_columns[j].append(row[column_ids[j]].strip())
                except IndexError:
                    # Non-rectangular data is truncated
                    break

            j += 1

            # Populate remaining columns with None
            while j < width:
                data_columns[j].append(None)

                j += 1

        columns = []

        for i, c in enumerate(data_columns):
            columns.append(Column(column_ids[i], headers[i], c, blanks_as_nulls=blanks_as_nulls, infer_types=infer_types))

        return Table(columns, name=name)