dw.Empty(column=[], table=0, status="active", drop=False) ]))) # Extract from split after 'in ' w.add( dw.Extract(column=["split"], table=0, status="active", drop=False, result="column", update=False, insert_position="right", row=None, on=".*", before=None, after="in ", ignore_between=None, which=1, max=1, positions=None)) # Fill extract with values from below # (here is where this test differs from crime_5.py) w.add( dw.Fill(column=["extract"], table=0, status="active", drop=False,
# Drop value w.add(dw.Drop(column=["value"], table=0, status="active", drop=True)) # Extract from fold between positions 4, 5 w.add(dw.Extract(column=["fold"], table=0, status="active", drop=False, result="column", update=False, insert_position="right", row=None, on=None, before=None, after=None, ignore_between=None, which=1, max=1, positions=[4,5])) # Drop fold w.add(dw.Drop(column=["fold"], table=0, status="active", drop=True)) # Extract from translate on ' any word '
def get_transform(): w = dw.DataWrangler() # Split data repeatedly on newline into rows w.add( dw.Split(column=["data"], table=0, status="active", drop=True, result="row", update=False, insert_position="right", row=None, on="\n", before=None, after=None, ignore_between=None, which=1, max=0, positions=None, quote_character=None)) # Delete empty rows w.add( dw.Filter(column=[], table=0, status="active", drop=False, row=dw.Row(column=[], table=0, status="active", drop=False, conditions=[ dw.Empty(column=[], table=0, status="active", drop=False, percent_valid=0, num_valid=0) ]))) # Delete rows where data starts with '===' w.add( dw.Filter(column=[], table=0, status="active", drop=False, row=dw.Row(column=[], table=0, status="active", drop=False, conditions=[ dw.StartsWith(column=[], table=0, status="active", drop=False, lcol="data", value="===", op_str="starts with") ]))) # Delete rows where data = '<!-- KBDX was Broadus Airport ... w.add( dw.Filter( column=[], table=0, status="active", drop=False, row=dw.Row( column=[], table=0, status="active", drop=False, conditions=[ dw. Eq(column=[], table=0, status="active", drop=False, lcol="data", value= "<!-- KBDX was Broadus Airport in Broadus, Montana. Replaced by new airport with FAA ID: 00F -->", op_str="=") ]))) # Delete rows where data contains '<s>'''' w.add( dw.Filter(column=[], table=0, status="active", drop=False, row=dw.Row(column=[], table=0, status="active", drop=False, conditions=[ dw.Contains(column=[], table=0, status="active", drop=False, lcol="data", value="<s>'''", op_str="contains") ]))) # Extract from data between positions 5, 9 w.add( dw.Extract(column=["data"], table=0, status="active", drop=False, result="column", update=False, insert_position="right", row=None, on=None, before=None, after=None, ignore_between=None, which=1, max=1, positions=[5, 9])) # Drop data w.add(dw.Drop(column=["data"], table=0, status="active", drop=True)) return w
drop=False, cycle=7, start=0, end=None) ]))) # Extract from value between ' any lowercase word |' and '}' w.add( dw.Extract(column=["value"], table=0, status="active", drop=False, result="column", update=False, insert_position="right", row=None, on=".*", before="}", after="[a-z]+\\|", ignore_between=None, which=1, max=1, positions=None)) # Fill extract with values from above w.add( dw.Fill(column=["extract"], table=0, status="active", drop=False, direction="down",
w.add(dw.SetName(column=["split5"], table=0, status="active", drop=True, names=["Waitlist"], header_row=None)) # Extract from split6 on ' any number : any number any lowercase word - any number : any number any lowercase word ' w.add(dw.Extract(column=["split6"], table=0, status="active", drop=False, result="column", update=False, insert_position="right", row=None, on="\\d+:\\d+[a-z]+ - \\d+:\\d+[a-z]+", before=None, after=None, ignore_between=None, which=1, max=1, positions=None)) # Set extract name to Time w.add(dw.SetName(column=["extract"], table=0, status="active", drop=True, names=["Time"], header_row=None))
# Fold using header as a key w.add(dw.Fold(column=[], table=0, status="active", drop=False, keys=[-1])) # Extract from value between '|' and '}' w.add(dw.Extract(column=["value"], table=0, status="active", drop=False, result="column", update=False, insert_position="right", row=None, on=".*", before="}", after="\\|", ignore_between=None, which=1, max=1, positions=None)) # Fill extract with values from above w.add(dw.Fill(column=["extract"], table=0, status="active", drop=False, direction="down", method="copy",