Ejemplo n.º 1
0
             which=1,
             max=0,
             positions=None,
             quote_character=None))

# Delete empty rows
w.add(
    dw.Filter(column=[],
              table=0,
              status="active",
              drop=False,
              row=dw.Row(column=[],
                         table=0,
                         status="active",
                         drop=False,
                         conditions=[
                             dw.Empty(column=[],
                                      table=0,
                                      status="active",
                                      drop=False)
                         ])))

# Extract from split after 'in '
w.add(
    dw.Extract(column=["split"],
               table=0,
               status="active",
               drop=False,
               result="column",
               update=False,
               insert_position="right",
               row=None,
Ejemplo n.º 2
0
             max=0,
             positions=None,
             quote_character=None))

# Delete rows 1,2
w.add(
    dw.Filter(column=[],
              table=0,
              status="active",
              drop=False,
              row=dw.Row(column=[],
                         table=0,
                         status="active",
                         drop=False,
                         conditions=[
                             dw.RowIndex(column=[],
                                         table=0,
                                         status="active",
                                         drop=False,
                                         indices=[0, 1])
                         ])))

# Delete empty rows
w.add(
    dw.Filter(column=[],
              table=0,
              status="active",
              drop=False,
              row=dw.Row(column=[],
                         table=0,
                         status="active",
Ejemplo n.º 3
0
             ignore_between=None,
             which=1,
             max=1,
             positions=None))

# Delete  rows where data = '|-'
w.add(dw.Filter(column=[],
                table=0,
                status="active",
                drop=False,
                row=dw.Row(column=[],
             table=0,
             status="active",
             drop=False,
             conditions=[dw.Eq(column=[],
            table=0,
            status="active",
            drop=False,
            lcol="data",
            value="|-",
            op_str="=")])))

# Cut from data on '| any number '
w.add(dw.Cut(column=["data"],
             table=0,
             status="active",
             drop=False,
             result="column",
             update=True,
             insert_position="right",
             row=None,
Ejemplo n.º 4
0
# Edit data  rows where data = 'Women'  to ' WOMEN '
w.add(
    dw.Edit(column=["data"],
            table=0,
            status="active",
            drop=False,
            result="column",
            update=True,
            insert_position="right",
            row=dw.Row(column=[],
                       table=0,
                       status="active",
                       drop=False,
                       conditions=[
                           dw.Eq(column=[],
                                 table=0,
                                 status="active",
                                 drop=False,
                                 lcol="data",
                                 value="Women",
                                 op_str="=")
                       ]),
            on=None,
            before=None,
            after=None,
            ignore_between=None,
            which=1,
            max=1,
            positions=None,
            to="WOMEN",
            update_method=None))
Ejemplo n.º 5
0
               which=1,
               max=0,
               positions=None,
               quote_character=None))

# Delete  rows where split2 is null
w.add(dw.Filter(column=[],
                table=0,
                status="active",
                drop=False,
                row=dw.Row(column=[],
             table=0,
             status="active",
             drop=False,
             conditions=[dw.IsNull(column=[],
                table=0,
                status="active",
                drop=False,
                lcol="split2",
                value=None,
                op_str="is null")])))

# Drop split4
w.add(dw.Drop(column=["split4"],
              table=0,
              status="active",
              drop=True))

# Drop split1
w.add(dw.Drop(column=["split1"],
              table=0,
Ejemplo n.º 6
0
def get_transform():
    w = dw.DataWrangler()

    # Split data repeatedly on newline  into  rows
    w.add(
        dw.Split(column=["data"],
                 table=0,
                 status="active",
                 drop=True,
                 result="row",
                 update=False,
                 insert_position="right",
                 row=None,
                 on="\n",
                 before=None,
                 after=None,
                 ignore_between=None,
                 which=1,
                 max=0,
                 positions=None,
                 quote_character=None))

    # Delete empty rows
    w.add(
        dw.Filter(column=[],
                  table=0,
                  status="active",
                  drop=False,
                  row=dw.Row(column=[],
                             table=0,
                             status="active",
                             drop=False,
                             conditions=[
                                 dw.Empty(column=[],
                                          table=0,
                                          status="active",
                                          drop=False,
                                          percent_valid=0,
                                          num_valid=0)
                             ])))

    # Delete  rows where data starts with '==='
    w.add(
        dw.Filter(column=[],
                  table=0,
                  status="active",
                  drop=False,
                  row=dw.Row(column=[],
                             table=0,
                             status="active",
                             drop=False,
                             conditions=[
                                 dw.StartsWith(column=[],
                                               table=0,
                                               status="active",
                                               drop=False,
                                               lcol="data",
                                               value="===",
                                               op_str="starts with")
                             ])))

    # Delete  rows where data = '<!-- KBDX was Broadus Airport ...
    w.add(
        dw.Filter(
            column=[],
            table=0,
            status="active",
            drop=False,
            row=dw.Row(
                column=[],
                table=0,
                status="active",
                drop=False,
                conditions=[
                    dw.
                    Eq(column=[],
                       table=0,
                       status="active",
                       drop=False,
                       lcol="data",
                       value=
                       "<!-- KBDX was Broadus Airport in Broadus, Montana. Replaced by new airport with FAA ID: 00F -->",
                       op_str="=")
                ])))

    # Delete  rows where data contains '<s>''''
    w.add(
        dw.Filter(column=[],
                  table=0,
                  status="active",
                  drop=False,
                  row=dw.Row(column=[],
                             table=0,
                             status="active",
                             drop=False,
                             conditions=[
                                 dw.Contains(column=[],
                                             table=0,
                                             status="active",
                                             drop=False,
                                             lcol="data",
                                             value="<s>'''",
                                             op_str="contains")
                             ])))

    # Extract from data between positions 5, 9
    w.add(
        dw.Extract(column=["data"],
                   table=0,
                   status="active",
                   drop=False,
                   result="column",
                   update=False,
                   insert_position="right",
                   row=None,
                   on=None,
                   before=None,
                   after=None,
                   ignore_between=None,
                   which=1,
                   max=1,
                   positions=[5, 9]))

    # Drop data
    w.add(dw.Drop(column=["data"], table=0, status="active", drop=True))

    return w
Ejemplo n.º 7
0
               after=None,
               ignore_between=None,
               which=1,
               max=0,
               positions=None,
               quote_character=None))

# Delete empty rows
w.add(dw.Filter(column=[],
                table=0,
                status="active",
                drop=False,
                row=dw.Row(column=[],
             table=0,
             status="active",
             drop=False,
             conditions=[dw.Empty(column=[],
               table=0,
               status="active",
               drop=False)])))

# Extract from split after 'in '
w.add(dw.Extract(column=["split"],
                 table=0,
                 status="active",
                 drop=False,
                 result="column",
                 update=False,
                 insert_position="right",
                 row=None,
                 on=".*",
                 before=None,
# Fold   using  1 as a key
w.add(dw.Fold(column=[], table=0, status="active", drop=False, keys=[0]))

# Delete  every 7 rows
w.add(
    dw.Filter(column=[],
              table=0,
              status="active",
              drop=False,
              row=dw.Row(column=[],
                         table=0,
                         status="active",
                         drop=False,
                         conditions=[
                             dw.RowCycle(column=[],
                                         table=0,
                                         status="active",
                                         drop=False,
                                         cycle=7,
                                         start=0,
                                         end=None)
                         ])))

# Extract from value between ' any lowercase word |' and '}'
w.add(
    dw.Extract(column=["value"],
               table=0,
               status="active",
               drop=False,
               result="column",
               update=False,
Ejemplo n.º 9
0
             which=1,
             max=0,
             positions=None,
             quote_character=None))

# Delete empty rows
w.add(
    dw.Filter(column=[],
              table=0,
              status="active",
              drop=False,
              row=dw.Row(column=[],
                         table=0,
                         status="active",
                         drop=False,
                         conditions=[
                             dw.Empty(column=[],
                                      table=0,
                                      status="active",
                                      drop=False)
                         ])))

# Delete rows 1,2
w.add(
    dw.Filter(column=[],
              table=0,
              status="active",
              drop=False,
              row=dw.Row(column=[],
                         table=0,
                         status="active",
                         drop=False,
Ejemplo n.º 10
0
               ignore_between=None,
               which=1,
               max="0",
               positions=None,
               quote_character=None))

# Delete row 1
w.add(dw.Filter(column=[],
                table=0,
                status="active",
                drop=False,
                row=dw.Row(column=[],
             table=0,
             status="active",
             drop=False,
             conditions=[dw.RowIndex(column=[],
                  table=0,
                  status="active",
                  drop=False,
                  indices=[0])])))

# Set  split1  name to  1
w.add(dw.SetName(column=["split1"],
                 table=0,
                 status="active",
                 drop=True,
                 names=["1"],
                 header_row=None))

# Set  split2  name to  2
w.add(dw.SetName(column=["split2"],