Esempio n. 1
0
def f_split_tab(table, col):
    # Figure out # of cells to be generated for each row after applying "split" operation
    added_cells = list()
    added_len = 0

    for row in table:
        temp_cells = row[col].split("\t")

        added_cells.append(temp_cells)
        if len(temp_cells) > added_len:
            added_len = len(temp_cells)

    cloned_cells = list(added_cells)

    for idx, row in enumerate(cloned_cells):
        if len(row) < added_len:
            for i in range(added_len - len(row)):
                added_cells[idx].append("")

    result_table = []

    for idx, row in enumerate(table):
        result_table.append(row[:col] + added_cells[idx] + row[col + 1:])

    if PRUNE_1:
        if add_empty_col(new_table=result_table, orig_table=table):
            return None

    return result_table
Esempio n. 2
0
def f_extract(table, col, regex, prefix="", suffix=""):
    new_table = []

    if prefix or suffix:
        regex = prefix + "(" + regex + ")" + suffix
    else:
        regex = "(" + regex + ")"

    pattern = re.compile(regex)

    for orig_row in table:
        row = list(orig_row)

        m = re.search(pattern, row[col])
        if m:
            found = m.group(1)
            row.insert(col + 1, found)
        else:
            row.insert(col + 1, "")

        new_table.append(row)

    if PRUNE_1:
        if add_empty_col(new_table=new_table, orig_table=table):
            return None

    return new_table
Esempio n. 3
0
def f_wrap_one_row(table):
    new_table = [list(np.array(table).ravel())]

    if PRUNE_1:
        if add_empty_col(new_table=new_table, orig_table=table):
            return None

    return new_table
Esempio n. 4
0
def f_divide_on_dash(table, col):
    new_table = []

    if PRUNE_1:
        if contains_empty_col(table, col):
            return None

    for row in table:
        if "-" in row[col]:
            new_table.append(row[:col] + [row[col]] + [""] + row[col + 1:])
        else:
            new_table.append(row[:col] + [""] + [row[col]] + row[col + 1:])

    if PRUNE_1:
        if add_empty_col(new_table=new_table, orig_table=table):
            return None

    return new_table
Esempio n. 5
0
def f_divide_on_all_alphabets(table, col):
    new_table = []

    if PRUNE_1:
        if contains_empty_col(table, col):
            return None

    for row in table:
        if row[col].isalpha():
            new_table.append(row[:col] + [row[col]] + [""] + row[col + 1:])
        else:
            new_table.append(row[:col] + [""] + [row[col]] + row[col + 1:])

    if PRUNE_1:
        if add_empty_col(new_table=new_table, orig_table=table):
            return None

    return new_table
Esempio n. 6
0
def f_split_first(table, col, splitter):
    # Figure out # of cells to be generated for each row after applying "split" operation
    added_cells = list()
    for row in table:
        temp_cells = row[col].split(splitter, 1)
        if len(temp_cells) <= 1:
            temp_cells.append("")
        added_cells.append(temp_cells)

    result_table = []

    for idx, row in enumerate(table):
        result_table.append(row[:col] + added_cells[idx] + row[col + 1:])

    if PRUNE_1:
        if add_empty_col(new_table=result_table, orig_table=table):
            return None

    return result_table