Exemple #1
0
def lookup_and_transform(ts_kv_table):
    """The table has the following structure:
    +---------------------------------+---------------+---------------+--------+
    | entity_id                       | key           | ts            | value  |
    +=================================+===============+===============+========+
    | 1ea47494dc14d40bd76a73c738b665f | Temperature   | 1583010011665 |  -1.8  |
    +---------------------------------+---------------+---------------+--------+
    | 1ea47494dc14d40bd76a73c738b665f | WindDirection | 1583010000692 |   227  |
    +---------------------------------+---------------+---------------+--------+
    
    The output is a dictionary {device_id:table} of tables like that:
    +--------------+--------------+---------------+
    | ts           | Temperature  | WindDirection |
    +--------------+--------------+---------------+
    |1583010011665 | -1.8         |  230          |
    +--------------+--------------+---------------+
    |1583010000692 |   -2.5       | 227           |
    +--------------+--------------+---------------+
    """

    lkp = petl.lookup(ts_kv_table, 'entity_id', value=('key', 'ts', 'value'))
    for entity_id in lkp:
        tbl = [('key', 'ts', 'value')] + lkp[entity_id]
        tbl = petl.recast(tbl, variablefield='key', valuefield='value')
        cut_keys = KEYS_TO_REMOVE & set(petl.fieldnames(tbl))
        tbl = petl.cutout(tbl, *cut_keys)
        tbl = petl.transform.headers.sortheader(tbl)
        tbl = petl.transform.basics.movefield(tbl, 'ts', 0)
        lkp[entity_id] = petl.sort(tbl, 'ts')
    return lkp
Exemple #2
0
def test_fieldnames():
    table = (('foo', 'bar'), ('a', 1), ('b', 2))
    actual = fieldnames(table)
    expect = ['foo', 'bar']
    eq_(expect, actual)
    
    class CustomField(object):
        def __init__(self, key, description):
            self.key = key
            self.description = description
        def __str__(self):
            return self.key
        def __repr__(self):
            return 'CustomField(%r, %r)' % (self.key, self.description)
        
    table = ((CustomField('foo', 'Get some foo.'), CustomField('bar', 'A lot of bar.')), 
             ('a', 1), 
             ('b', 2))
    actual = fieldnames(table)
    expect = ['foo', 'bar']
    eq_(expect, actual)
Exemple #3
0
def test_fieldnames():
    table = (('foo', 'bar'), ('a', 1), ('b', 2))
    actual = fieldnames(table)
    expect = ['foo', 'bar']
    eq_(expect, actual)
    
    class CustomField(object):
        def __init__(self, key, description):
            self.key = key
            self.description = description
        def __str__(self):
            return self.key
        def __repr__(self):
            return 'CustomField(%r, %r)' % (self.key, self.description)
        
    table = ((CustomField('foo', 'Get some foo.'), CustomField('bar', 'A lot of bar.')), 
             ('a', 1), 
             ('b', 2))
    actual = fieldnames(table)
    expect = ['foo', 'bar']
    eq_(expect, actual)
Exemple #4
0
def test_fieldnames():
    table = (("foo", "bar"), ("a", 1), ("b", 2))
    actual = fieldnames(table)
    expect = ["foo", "bar"]
    eq_(expect, actual)

    class CustomField(object):
        def __init__(self, key, description):
            self.key = key
            self.description = description

        def __str__(self):
            return self.key

        def __repr__(self):
            return "CustomField(%r, %r)" % (self.key, self.description)

    table = ((CustomField("foo", "Get some foo."), CustomField("bar", "A lot of bar.")), ("a", 1), ("b", 2))
    actual = fieldnames(table)
    expect = ["foo", "bar"]
    eq_(expect, actual)
Exemple #5
0
def test_fieldnames():
    """Test the fieldnames function."""
    
    table = (('foo', 'bar'), ('a', 1), ('b', 2))
    actual = fieldnames(table)
    expect = ['foo', 'bar']
    assertequal(expect, actual)
    
    class CustomField(object):
        def __init__(self, id, description):
            self.id = id
            self.description = description
        def __str__(self):
            return self.id
        def __repr__(self):
            return 'CustomField(%r, %r)' % (self.id, self.description)
        
    table = ((CustomField('foo', 'Get some foo.'), CustomField('bar', 'A lot of bar.')), 
             ('a', 1), 
             ('b', 2))
    actual = fieldnames(table)
    expect = ['foo', 'bar']
    assertequal(expect, actual)
def split_dataset(dataset, p_train_data, split_mode):

    fields = list(fieldnames(dataset))
    
    size_dataset = len(values(dataset, fields[0])) 
    size_train_data = int(round(size_dataset * p_train_data))
    size_test_data = abs(size_train_data - size_dataset)


    if split_mode == 'normal' :

        train_data = head(dataset, size_train_data - 1)
        
        if size_test_data == 0:
            
            test_data = []
            
        else:
            
            test_data = tail(dataset, size_test_data - 1)

    #################### Falta incluir Shuffle mode ###############

    return train_data, test_data
Exemple #7
0
foobaz
list(foobaz)

# header()
##########

import petl as etl
table = [['foo', 'bar'], ['a', 1], ['b', 2]]
etl.header(table)

# fieldnames()
##############

import petl as etl
table = [['foo', 'bar'], ['a', 1], ['b', 2]]
etl.fieldnames(table)
etl.header(table)

# data()
########

import petl as etl
table = [['foo', 'bar'], ['a', 1], ['b', 2]]
d = etl.data(table)
list(d)

# dicts()
#########

import petl as etl
table = [['foo', 'bar'], ['a', 1], ['b', 2]]
# Fuente de los datos que vamos a leer
uri_confirmed = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
uri_death = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv'
uri_recovered = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv'

# Procesamos primero casos confirmados
t_confirmed = etl.fromcsv(uri_confirmed)

# Cambiamos el nombre a los encabezados
t_confirmed = etl.rename(t_confirmed, {'Country/Region': 'Country'})

# Ajustamos los tipos de datos
# A partir de la columna 5, el tipo de dato es integer, que es el número de personas/casos
# Adicionalmente aprovechamos para cambiar el formato de la fecha de 1/23/20 a 2020-01-23 en el header
headers = etl.fieldnames(t_confirmed)
i = 0
for header in headers:
    if i >= 4:
        t_confirmed = etl.convert(t_confirmed, header,
                                  int)  # corregimos el tipo de dato
        fecha = datetime.datetime.strptime(
            header, '%m/%d/%y')  # calculamos la fecha en formato correcto
        t_confirmed = etl.rename(t_confirmed, header,
                                 fecha.strftime('%Y-%m-%d'))
    i = i + 1

# Eliminamos las columnas de Province/State, Lat y Lon que no vamos a utilizar
t_confirmed = etl.cutout(t_confirmed, 0, 2, 3)

# Ajustamos algunos nombres de países para luego asignarles una región/continente
Exemple #9
0
##########


import petl as etl

table = [["foo", "bar"], ["a", 1], ["b", 2]]
etl.header(table)


# fieldnames()
##############

import petl as etl

table = [["foo", "bar"], ["a", 1], ["b", 2]]
etl.fieldnames(table)
etl.header(table)


# data()
########

import petl as etl

table = [["foo", "bar"], ["a", 1], ["b", 2]]
d = etl.data(table)
list(d)


# dicts()
#########
Exemple #10
0
def procesar_fuente(path, nombre):
    try: 
        # Procesamos primero casos confirmados
        tabla = etl.fromcsv(path)

        # Cambiamos el nombre a los encabezados
        tabla = etl.rename(tabla, {'Country/Region': 'Country'})

        # Ajustamos los tipos de datos
        # A partir de la columna 5, el tipo de dato es integer, que es el número de personas/casos
        # Adicionalmente aprovechamos para cambiar el formato de la fecha de 1/23/20 a 2020-01-23 en el header
        headers = etl.fieldnames(tabla)
        i=0
        for header in headers:
            if i>=4:
                tabla = etl.convert(tabla, header, int)        # corregimos el tipo de dato
                fecha =  datetime.datetime.strptime(header, '%m/%d/%y')    # calculamos la fecha en formato correcto
                tabla = etl.rename(tabla, header, fecha.strftime('%Y-%m-%d'))   
            i = i + 1

        # Eliminamos las columnas de Province/State, Lat y Lon que no vamos a utilizar
        tabla = etl.cutout(tabla, 0, 2, 3)

        # Ajustamos algunos nombres de países para luego asignarles una región/continente
        tabla = etl.convert(tabla, 'Country', 'replace', 'Congo (Brazzaville)', 'Congo')
        tabla = etl.convert(tabla, 'Country', 'replace', 'Congo (Kinshasa)', 'Democratic Republic of the Congo')
        tabla = etl.convert(tabla, 'Country', 'replace', 'Cote d\'Ivoire', 'Ivory Coast')
        tabla = etl.convert(tabla, 'Country', 'replace', 'Korea, South', 'South Korea')
        tabla = etl.convert(tabla, 'Country', 'replace', 'West Bank and Gaza', 'Palestine')
        tabla = etl.convert(tabla, 'Country', 'replace', 'Burma', 'Myanmar')
        tabla = etl.convert(tabla, 'Country', 'replace', 'US', 'USA')
        tabla = etl.convert(tabla, 'Country', 'replace', 'Taiwan*', 'Taiwan')

        # Luego procedemos a agrupar y acumular los resultados por el país
        df_confirmed = etl.todataframe(tabla)
        df = df_confirmed.groupby(['Country']).sum()
        tabla = etl.fromdataframe(df, include_index=True)

        # Renombramos el campo de Country nuevamente
        tabla = etl.rename(tabla, {'index': 'Country'})

        # Luego agregamos las columnas de fecha como datos y renombramos las nuevas columnas
        tabla = etl.melt(tabla, 'Country')
        tabla = etl.rename(tabla, {'variable': 'Date'})
        tabla = etl.rename(tabla, {'value': 'Cases'})

        # Luego agregamos el continente para agrupar
        tabla = etl.addfield(tabla, 'Continent', lambda rec: get_continent_code(rec['Country']))

        # Y nuevamente nos aseguramos que sean del tipo de dato que deben ser.
        tabla = etl.convert(tabla, 'Cases', int)
        tabla = etl.convert(tabla, 'Date', lambda v: datetime.datetime.strptime(v, '%Y-%m-%d') )

        #Finalmente, subimos el archivo al repositorio de datos
        conn = pymysql.connect(password='******', database='covid', user='******')
        conn.cursor().execute('SET SQL_MODE=ANSI_QUOTES')
        etl.todb(tabla, conn, nombre, create=True, drop=True)
        conn.close()
    except:
        print('Se ha presentado un error! ', sys.exc_info()[0])
        raise
def extract_values(dataset, field_position):

    fields = list(fieldnames(dataset))
    field_values = values(dataset, field_position)
    
    return field_values
Exemple #12
0
def add_fields(fields, table):
    for field in fields:
        if field not in etl.fieldnames(table):
            table = etl.addfield(table, field, None)
    return table
Exemple #13
0
def remove_fields(fields, table):
    for field in fields:
        if field in etl.fieldnames(table):
            table = etl.cutout(table, field)
    return table