def f_split_tab(table, col): # Figure out # of cells to be generated for each row after applying "split" operation added_cells = list() added_len = 0 for row in table: temp_cells = row[col].split("\t") added_cells.append(temp_cells) if len(temp_cells) > added_len: added_len = len(temp_cells) cloned_cells = list(added_cells) for idx, row in enumerate(cloned_cells): if len(row) < added_len: for i in range(added_len - len(row)): added_cells[idx].append("") result_table = [] for idx, row in enumerate(table): result_table.append(row[:col] + added_cells[idx] + row[col + 1:]) if PRUNE_1: if add_empty_col(new_table=result_table, orig_table=table): return None return result_table
def f_extract(table, col, regex, prefix="", suffix=""): new_table = [] if prefix or suffix: regex = prefix + "(" + regex + ")" + suffix else: regex = "(" + regex + ")" pattern = re.compile(regex) for orig_row in table: row = list(orig_row) m = re.search(pattern, row[col]) if m: found = m.group(1) row.insert(col + 1, found) else: row.insert(col + 1, "") new_table.append(row) if PRUNE_1: if add_empty_col(new_table=new_table, orig_table=table): return None return new_table
def f_wrap_one_row(table): new_table = [list(np.array(table).ravel())] if PRUNE_1: if add_empty_col(new_table=new_table, orig_table=table): return None return new_table
def f_divide_on_dash(table, col): new_table = [] if PRUNE_1: if contains_empty_col(table, col): return None for row in table: if "-" in row[col]: new_table.append(row[:col] + [row[col]] + [""] + row[col + 1:]) else: new_table.append(row[:col] + [""] + [row[col]] + row[col + 1:]) if PRUNE_1: if add_empty_col(new_table=new_table, orig_table=table): return None return new_table
def f_divide_on_all_alphabets(table, col): new_table = [] if PRUNE_1: if contains_empty_col(table, col): return None for row in table: if row[col].isalpha(): new_table.append(row[:col] + [row[col]] + [""] + row[col + 1:]) else: new_table.append(row[:col] + [""] + [row[col]] + row[col + 1:]) if PRUNE_1: if add_empty_col(new_table=new_table, orig_table=table): return None return new_table
def f_split_first(table, col, splitter): # Figure out # of cells to be generated for each row after applying "split" operation added_cells = list() for row in table: temp_cells = row[col].split(splitter, 1) if len(temp_cells) <= 1: temp_cells.append("") added_cells.append(temp_cells) result_table = [] for idx, row in enumerate(table): result_table.append(row[:col] + added_cells[idx] + row[col + 1:]) if PRUNE_1: if add_empty_col(new_table=result_table, orig_table=table): return None return result_table