Esempio n. 1
0
                             dw.Empty(column=[],
                                      table=0,
                                      status="active",
                                      drop=False)
                         ])))

# Extract from split after 'in '
w.add(
    dw.Extract(column=["split"],
               table=0,
               status="active",
               drop=False,
               result="column",
               update=False,
               insert_position="right",
               row=None,
               on=".*",
               before=None,
               after="in ",
               ignore_between=None,
               which=1,
               max=1,
               positions=None))

# Fill extract  with values from below
# (here is where this test differs from crime_5.py)
w.add(
    dw.Fill(column=["extract"],
            table=0,
            status="active",
            drop=False,
Esempio n. 2
0
# Drop value
w.add(dw.Drop(column=["value"],
              table=0,
              status="active",
              drop=True))

# Extract from fold between positions 4, 5
w.add(dw.Extract(column=["fold"],
                 table=0,
                 status="active",
                 drop=False,
                 result="column",
                 update=False,
                 insert_position="right",
                 row=None,
                 on=None,
                 before=None,
                 after=None,
                 ignore_between=None,
                 which=1,
                 max=1,
                 positions=[4,5]))

# Drop fold
w.add(dw.Drop(column=["fold"],
              table=0,
              status="active",
              drop=True))

# Extract from translate on ' any word '
Esempio n. 3
0
def get_transform():
    w = dw.DataWrangler()

    # Split data repeatedly on newline  into  rows
    w.add(
        dw.Split(column=["data"],
                 table=0,
                 status="active",
                 drop=True,
                 result="row",
                 update=False,
                 insert_position="right",
                 row=None,
                 on="\n",
                 before=None,
                 after=None,
                 ignore_between=None,
                 which=1,
                 max=0,
                 positions=None,
                 quote_character=None))

    # Delete empty rows
    w.add(
        dw.Filter(column=[],
                  table=0,
                  status="active",
                  drop=False,
                  row=dw.Row(column=[],
                             table=0,
                             status="active",
                             drop=False,
                             conditions=[
                                 dw.Empty(column=[],
                                          table=0,
                                          status="active",
                                          drop=False,
                                          percent_valid=0,
                                          num_valid=0)
                             ])))

    # Delete  rows where data starts with '==='
    w.add(
        dw.Filter(column=[],
                  table=0,
                  status="active",
                  drop=False,
                  row=dw.Row(column=[],
                             table=0,
                             status="active",
                             drop=False,
                             conditions=[
                                 dw.StartsWith(column=[],
                                               table=0,
                                               status="active",
                                               drop=False,
                                               lcol="data",
                                               value="===",
                                               op_str="starts with")
                             ])))

    # Delete  rows where data = '<!-- KBDX was Broadus Airport ...
    w.add(
        dw.Filter(
            column=[],
            table=0,
            status="active",
            drop=False,
            row=dw.Row(
                column=[],
                table=0,
                status="active",
                drop=False,
                conditions=[
                    dw.
                    Eq(column=[],
                       table=0,
                       status="active",
                       drop=False,
                       lcol="data",
                       value=
                       "<!-- KBDX was Broadus Airport in Broadus, Montana. Replaced by new airport with FAA ID: 00F -->",
                       op_str="=")
                ])))

    # Delete  rows where data contains '<s>''''
    w.add(
        dw.Filter(column=[],
                  table=0,
                  status="active",
                  drop=False,
                  row=dw.Row(column=[],
                             table=0,
                             status="active",
                             drop=False,
                             conditions=[
                                 dw.Contains(column=[],
                                             table=0,
                                             status="active",
                                             drop=False,
                                             lcol="data",
                                             value="<s>'''",
                                             op_str="contains")
                             ])))

    # Extract from data between positions 5, 9
    w.add(
        dw.Extract(column=["data"],
                   table=0,
                   status="active",
                   drop=False,
                   result="column",
                   update=False,
                   insert_position="right",
                   row=None,
                   on=None,
                   before=None,
                   after=None,
                   ignore_between=None,
                   which=1,
                   max=1,
                   positions=[5, 9]))

    # Drop data
    w.add(dw.Drop(column=["data"], table=0, status="active", drop=True))

    return w
                                         drop=False,
                                         cycle=7,
                                         start=0,
                                         end=None)
                         ])))

# Extract from value between ' any lowercase word |' and '}'
w.add(
    dw.Extract(column=["value"],
               table=0,
               status="active",
               drop=False,
               result="column",
               update=False,
               insert_position="right",
               row=None,
               on=".*",
               before="}",
               after="[a-z]+\\|",
               ignore_between=None,
               which=1,
               max=1,
               positions=None))

# Fill extract  with values from above
w.add(
    dw.Fill(column=["extract"],
            table=0,
            status="active",
            drop=False,
            direction="down",
w.add(dw.SetName(column=["split5"],
                 table=0,
                 status="active",
                 drop=True,
                 names=["Waitlist"],
                 header_row=None))

# Extract from split6 on ' any number : any number  any lowercase word  -  any number : any number  any lowercase word '
w.add(dw.Extract(column=["split6"],
                 table=0,
                 status="active",
                 drop=False,
                 result="column",
                 update=False,
                 insert_position="right",
                 row=None,
                 on="\\d+:\\d+[a-z]+ - \\d+:\\d+[a-z]+",
                 before=None,
                 after=None,
                 ignore_between=None,
                 which=1,
                 max=1,
                 positions=None))

# Set  extract  name to  Time
w.add(dw.SetName(column=["extract"],
                 table=0,
                 status="active",
                 drop=True,
                 names=["Time"],
                 header_row=None))
Esempio n. 6
0
# Fold   using  header as a key
w.add(dw.Fold(column=[],
              table=0,
              status="active",
              drop=False,
              keys=[-1]))

# Extract from value between '|' and '}'
w.add(dw.Extract(column=["value"],
                 table=0,
                 status="active",
                 drop=False,
                 result="column",
                 update=False,
                 insert_position="right",
                 row=None,
                 on=".*",
                 before="}",
                 after="\\|",
                 ignore_between=None,
                 which=1,
                 max=1,
                 positions=None))

# Fill extract  with values from above
w.add(dw.Fill(column=["extract"],
              table=0,
              status="active",
              drop=False,
              direction="down",
              method="copy",