def detect_empty_columns(sample): empty_col = [] if len(sample) > 0: columns = [[] for _ in sample[0]] for row in sample: for j, c in enumerate(row): columns[j].append(c) for i, col in enumerate(columns): if all(["EMPTY" in simple_type_detection.detectType(c) for c in col]): empty_col.append(i) return empty_col
def guess_headers(sample, empty_columns=None): # first store types of first x rows types = [] for i, row in enumerate(sample): if i >= HEADER_CONFIDENCE: break row_types = [] for c in row: t = simple_type_detection.detectType(c) row_types.append(t) types.append(row_types) # now analyse types first = types[0] if empty_columns: first = [i for j, i in enumerate(first) if j not in empty_columns] first_is_alpha = all(["ALPHA" in t for t in first]) if first_is_alpha: return sample[0] return []