Ejemplo n.º 1
0
    

# conflicts

table1 = [['foo', 'bar', 'baz'],
          ['A', 1, 2.7],
          ['B', 2, None],
          ['D', 3, 9.4],
          ['B', None, 7.8],
          ['E', None],
          ['D', 3, 12.3],
          ['A', 2, None]]

from petl import conflicts, look    
look(table1)
table2 = conflicts(table1, 'foo')
look(table2)


# complement

a = [['foo', 'bar', 'baz'],
     ['A', 1, True],
     ['C', 7, False],
     ['B', 2, False],
     ['C', 9, True]]
b = [['x', 'y', 'z'],
     ['B', 2, False],
     ['A', 9, False],
     ['B', 3, True],
     ['C', 9, True]]
Ejemplo n.º 2
0
    

# conflicts

table1 = [['foo', 'bar', 'baz'],
          ['A', 1, 2.7],
          ['B', 2, None],
          ['D', 3, 9.4],
          ['B', None, 7.8],
          ['E', None],
          ['D', 3, 12.3],
          ['A', 2, None]]

from petl import conflicts, look    
look(table1)
table2 = conflicts(table1, 'foo')
look(table2)


# complement

a = [['foo', 'bar', 'baz'],
     ['A', 1, True],
     ['C', 7, False],
     ['B', 2, False],
     ['C', 9, True]]
b = [['x', 'y', 'z'],
     ['B', 2, False],
     ['A', 9, False],
     ['B', 3, True],
     ['C', 9, True]]
Ejemplo n.º 3
0
    # Write to a local file
    etl.csv.tocsv(src, source=os.path.join(datroot,'.'.join([fn,'tmp','csv','gz'])), encoding='utf8', write_header=True)
    print "File saved locally to avoid too much in memory..."
    
    del(src)
    
    # Tidy up some of the fields so that they're db-friendly
    tidy = etl.io.fromcsv(os.path.join(datroot,'.'.join([fn,'tmp','csv','gz']))).convert('transaction_id','replace','{','').convert('transaction_id','replace','}','').convert('price_int',int).convert('completion_dt',lambda v: datetime.datetime.strptime(v, "%Y-%m-%d 00:00").date()).sort('completion_dt')

    # Summarise what's there (helpful for tracking
    # changes to the format, especially the status codes).
    print "There are {} rows of data.".format(etl.util.counting.nrows(tidy))
    counts = etl.util.counting.valuecounts(tidy, 'status_cd')
    print "I found the following record types and counts:"
    print counts
    confs  = etl.conflicts(tidy, key='transaction_id')
    if confs.nrows() > 0:
        print "I found the following conflicts:"
        print confs
    else:
        print "I found no conflicting Transaction IDs"  

    proceed = raw_input('Given these stats should I proceed with the processing [y/n]: ')
    if proceed=='y': 
        print("OK, will load the data.")
    else:
        print("OK, stopping.")
        exit()

    etl.csv.totsv(tidy, source=os.path.join(datroot,'.'.join([fn,'.csv'])), encoding='utf-8', write_header=True)
    print "Foo!"
Ejemplo n.º 4
0
table2


# conflicts()
#############

import petl as etl
table1 = [['foo', 'bar', 'baz'],
          ['A', 1, 2.7],
          ['B', 2, None],
          ['D', 3, 9.4],
          ['B', None, 7.8],
          ['E', None],
          ['D', 3, 12.3],
          ['A', 2, None]]
table2 = etl.conflicts(table1, 'foo')
table2


# isunique()
############

import petl as etl
table1 = [['foo', 'bar'],
          ['a', 1],
          ['b'],
          ['b', 2],
          ['c', 3, True]]
etl.isunique(table1, 'foo')
etl.isunique(table1, 'bar')
Ejemplo n.º 5
0
# compound keys are supported
table3 = etl.duplicates(table1, key=['foo', 'bar'])
table3

# unique()
##########

import petl as etl
table1 = [['foo', 'bar', 'baz'], ['A', 1, 2], ['B', '2', '3.4'],
          ['D', 'xyz', 9.0], ['B', u'3', u'7.8'], ['B', '2', 42],
          ['E', None, None], ['D', 4, 12.3], ['F', 7, 2.3]]
table2 = etl.unique(table1, 'foo')
table2

# conflicts()
#############

import petl as etl
table1 = [['foo', 'bar', 'baz'], ['A', 1, 2.7], ['B', 2, None], ['D', 3, 9.4],
          ['B', None, 7.8], ['E', None], ['D', 3, 12.3], ['A', 2, None]]
table2 = etl.conflicts(table1, 'foo')
table2

# isunique()
############

import petl as etl
table1 = [['foo', 'bar'], ['a', 1], ['b'], ['b', 2], ['c', 3, True]]
etl.isunique(table1, 'foo')
etl.isunique(table1, 'bar')