after=None, ignore_between=None, which=1, max=0, positions=None, quote_character=None)) # Delete empty rows w.add( dw.Filter(column=[], table=0, status="active", drop=False, row=dw.Row(column=[], table=0, status="active", drop=False, conditions=[ dw.Empty(column=[], table=0, status="active", drop=False) ]))) # Extract from split after 'in ' w.add( dw.Extract(column=["split"], table=0, status="active", drop=False, result="column", update=False,
after=None, ignore_between=None, which=1, max=0, positions=None, quote_character=None)) # Delete rows where split2 is null w.add(dw.Filter(column=[], table=0, status="active", drop=False, row=dw.Row(column=[], table=0, status="active", drop=False, conditions=[dw.IsNull(column=[], table=0, status="active", drop=False, lcol="split2", value=None, op_str="is null")]))) # Drop split4 w.add(dw.Drop(column=["split4"], table=0, status="active", drop=True)) # Drop split1
before=None, after=None, ignore_between=None, which=1, max=1, positions=None)) # Delete rows where data = '|-' w.add(dw.Filter(column=[], table=0, status="active", drop=False, row=dw.Row(column=[], table=0, status="active", drop=False, conditions=[dw.Eq(column=[], table=0, status="active", drop=False, lcol="data", value="|-", op_str="=")]))) # Cut from data on '| any number ' w.add(dw.Cut(column=["data"], table=0, status="active", drop=False, result="column", update=True,
def get_transform(): w = dw.DataWrangler() # Split data repeatedly on newline into rows w.add( dw.Split(column=["data"], table=0, status="active", drop=True, result="row", update=False, insert_position="right", row=None, on="\n", before=None, after=None, ignore_between=None, which=1, max=0, positions=None, quote_character=None)) # Delete empty rows w.add( dw.Filter(column=[], table=0, status="active", drop=False, row=dw.Row(column=[], table=0, status="active", drop=False, conditions=[ dw.Empty(column=[], table=0, status="active", drop=False, percent_valid=0, num_valid=0) ]))) # Delete rows where data starts with '===' w.add( dw.Filter(column=[], table=0, status="active", drop=False, row=dw.Row(column=[], table=0, status="active", drop=False, conditions=[ dw.StartsWith(column=[], table=0, status="active", drop=False, lcol="data", value="===", op_str="starts with") ]))) # Delete rows where data = '<!-- KBDX was Broadus Airport ... w.add( dw.Filter( column=[], table=0, status="active", drop=False, row=dw.Row( column=[], table=0, status="active", drop=False, conditions=[ dw. Eq(column=[], table=0, status="active", drop=False, lcol="data", value= "<!-- KBDX was Broadus Airport in Broadus, Montana. Replaced by new airport with FAA ID: 00F -->", op_str="=") ]))) # Delete rows where data contains '<s>'''' w.add( dw.Filter(column=[], table=0, status="active", drop=False, row=dw.Row(column=[], table=0, status="active", drop=False, conditions=[ dw.Contains(column=[], table=0, status="active", drop=False, lcol="data", value="<s>'''", op_str="contains") ]))) # Extract from data between positions 5, 9 w.add( dw.Extract(column=["data"], table=0, status="active", drop=False, result="column", update=False, insert_position="right", row=None, on=None, before=None, after=None, ignore_between=None, which=1, max=1, positions=[5, 9])) # Drop data w.add(dw.Drop(column=["data"], table=0, status="active", drop=True)) return w
insert_position="right", row=None, on=",", before=None, after=None, ignore_between=None, which=1, max=0, positions=None, quote_character=None)) # Delete rows 1,2 w.add( dw.Filter(column=[], table=0, status="active", drop=False, row=dw.Row(column=[], table=0, status="active", drop=False, conditions=[ dw.RowIndex(column=[], table=0, status="active", drop=False, indices=[0, 1]) ]))) w.apply_to_file('migration.csv').print_csv('out.csv')
on=",", before=None, after=None, ignore_between=None, which=1, max=0, positions=None, quote_character=None)) # Delete empty rows w.add(dw.Filter(column=[], table=0, status="active", drop=False, row=dw.Row(column=[], table=0, status="active", drop=False, conditions=[dw.Empty(column=[], table=0, status="active", drop=False)]))) # Extract from split after 'in ' w.add(dw.Extract(column=["split"], table=0, status="active", drop=False, result="column", update=False, insert_position="right", row=None,
quote_character=None)) # Fold using 1 as a key w.add(dw.Fold(column=[], table=0, status="active", drop=False, keys=[0])) # Delete every 7 rows w.add( dw.Filter(column=[], table=0, status="active", drop=False, row=dw.Row(column=[], table=0, status="active", drop=False, conditions=[ dw.RowCycle(column=[], table=0, status="active", drop=False, cycle=7, start=0, end=None) ]))) # Extract from value between ' any lowercase word |' and '}' w.add( dw.Extract(column=["value"], table=0, status="active", drop=False,
before=None, after=None, ignore_between=None, which=1, max="0", positions=None, quote_character=None)) # Delete row 1 w.add(dw.Filter(column=[], table=0, status="active", drop=False, row=dw.Row(column=[], table=0, status="active", drop=False, conditions=[dw.RowIndex(column=[], table=0, status="active", drop=False, indices=[0])]))) # Set split1 name to 1 w.add(dw.SetName(column=["split1"], table=0, status="active", drop=True, names=["1"], header_row=None))