Example #1
0
    def _get_record_identifiers(self, cfg):
        p = cfg.get('record_identifiers')
        if not (p and os.path.isfile(p)):
            p = os.path.join(cfg['root'], 'record_ids.json')

        table = petl.fromjson(p)
        return petl.dicts(table)
Example #2
0
    def from_json(cls, local_path, header=None, line_delimited=False):
        """
        Create a ``parsons table`` from a json file

        `Args:`
            local_path: list
                A JSON formatted local path, url or ftp. If this is a
                file path that ends in ".gz", the file will be decompressed first.
            header: list
                List of columns to use for the destination table. If omitted, columns will
                be inferred from the initial data in the file.
            line_delimited: bool
                Whether the file is line-delimited JSON (with a row on each line), or a proper
                JSON file.
        `Returns:`
            Parsons Table
                See :ref:`parsons-table` for output options.
        """

        if line_delimited:
            if files.is_gzip_path(local_path):
                open_fn = gzip.open
            else:
                open_fn = open

            with open_fn(local_path, 'r') as file:
                rows = [json.loads(line) for line in file]
            return cls(rows)

        else:
            return cls(petl.fromjson(local_path, header=header))
Example #3
0
def main():
    """Set up jinga2 enviroment, extract data and save down to html files."""
    env = Environment(
        loader=FileSystemLoader(str(TEMPLATES)),
        autoescape=select_autoescape(["html", "xml"]),
    )
    env.filters["petl2html"] = get_html

    data = petl.fromjson(INPUT)
    data_processed = process_data(data)

    # Refresh the output directory
    OUTPUT.mkdir(exist_ok=True)
    clear_directory(OUTPUT)

    # Save home page
    home_template = env.get_template("about.html")
    html_output = home_template.render(pages=pages)
    (OUTPUT / "about.html").write_text(html_output, encoding="utf-8")

    # Save data-driven pages
    for page in pages:
        process_page(page, data_processed, pages, env)

    copy_tree(str(STATIC), str(OUTPUT))
 def export_data(self):
     try:
         self.data = petl.fromjson(config.weather_data_store_path)
     except Exception as e:
         print('Problem in collecting Data from data store: %s' % str(e))
         return False
     return True
Example #5
0
def get_load_result(nameFile):

    table1 = etl.fromjson('./static/data/tabalaElegidaCalculadora.json')
    tocsv(table1, './exelFiles/' + str(nameFile) + '.csv')
    etl.tohtml(table1,
               './exelFiles/' + str(nameFile) + '.html',
               caption=str(nameFile))
    return jsonify(True)
Example #6
0
def test_fromjson_3():

    f = NamedTemporaryFile(delete=False)
    data = '[{"foo": "a", "bar": 1}, {"foo": "b"}, {"foo": "c", "bar": 2, "baz": true}]'
    f.write(data)
    f.close()

    actual = fromjson(f.name, header=["foo", "bar"])
    expect = (("foo", "bar"), ("a", 1), ("b", None), ("c", 2))
    ieq(expect, actual)
    ieq(expect, actual)  # verify can iterate twice
Example #7
0
def test_fromjson_1():

    f = NamedTemporaryFile(delete=False)
    data = '[{"foo": "a", "bar": 1}, {"foo": "b", "bar": 2}, {"foo": "c", "bar": 2}]'
    f.write(data)
    f.close()

    actual = fromjson(f.name)
    expect = (("foo", "bar"), ("a", 1), ("b", 2), ("c", 2))
    ieq(expect, actual)
    ieq(expect, actual)  # verify can iterate twice
Example #8
0
def test_fromjson_3():

    f = NamedTemporaryFile(delete=False)
    data = '[{"foo": "a", "bar": 1}, {"foo": "b"}, {"foo": "c", "bar": 2, "baz": true}]'
    f.write(data)
    f.close()

    actual = fromjson(f.name, header=['foo', 'bar'])
    expect = (('foo', 'bar'), ('a', 1), ('b', None), ('c', 2))
    ieq(expect, actual)
    ieq(expect, actual)  # verify can iterate twice
Example #9
0
def extract(file):
    tb = list()
    if file == "sql":
        host = raw_input("Enter Host:")
        user = raw_input("Enter Username:"******"Enter pwd:")
        dtb = raw_input("Enter Database Name:")
        table = raw_input("Enter Table Name:")
        conn = pymysql.connect(host=host,
                               user=user,
                               password=pwd,
                               db=dtb,
                               charset='utf8mb4',
                               cursorclass=pymysql.cursors.DictCursor)
        temp = etl.fromdb(conn, "SELECT * FROM " + table)
        tb = d2l(temp)
    elif ".csv" in file:
        tb = etl.fromcsv(file)
    elif ".xlsx" in file:
        tb = etl.fromxls(file)
    elif ".json" in file:
        tb = etl.fromjson(file)
        print tb
    elif ".xml" in file:
        f = open(file, 'r').read()
        options = optparse.Values({"pretty": True})
        jsn = json.dumps(xml2json.xml2json(f, options))
        ob = json.loads(jsn.decode('string-escape').strip('"'))
        temp = dict()
        for key in ob.keys():
            for skey in ob[key].keys():
                temp = json.dumps(ob[key][skey])
        with open("temp.json", "w") as tmp:
            tmp.write(temp)
        tb = etl.fromjson("temp.json")
        print tb[0]
        #tb = etl.fromxml(file,'.//ROW',{'Service_Name':'Service_Name','Status':'Status','Service_Type':'Service_Type','Time':'Time'})
    elif ".txt" in file:
        tb = etl.fromtext(file)
        print tb
    return tb
def etl(file):
    tb1 = list()
    import petl as etl
    import sqllite3
    if ".csv" in file:
    	tb1 = etl.fromcsv(file)
    elif ".json" in file:
    	tb1 = etl.fromjson(file)	
    elif ".xls" in file:
    	tb1 = etl.fromxls(file)
    elif ".xml" in file:
    	tb1 = etl.fromxml(file,"row")
def importEvents():
    try:
        filePath = Path('data/events2.json')
        resolved = filePath.resolve()
    except FileNotFoundError:
        print("File [{0}] doesn't exist".format(filePath))
    except ValueError:
        print("File [{0}] has errors".format(filePath))
    except Exception as e:
        print("Something went wrong. Error {0}".format(e))
    else:
        return etl.fromjson(filePath)
Example #12
0
def get_calculos():
    data = request.get_json()
    calculos = data['calculos']
    try:
        table1 = etl.fromjson('./static/data/tabalaElegidaCalculadora.json')

        campos_y_valores = re.split('\=|\+|\-|\/|\*', calculos)
        campoElegido = campos_y_valores[0].lstrip().rstrip()
        campos_a_operar = campos_y_valores[1:]
        print("Campos a operar:", campos_a_operar)
        print("calculos", calculos)
        #get math symbol
        operaciones = []
        for i in calculos:
            if (i == '+' or i == '-' or i == '/' or i == '*'):
                operaciones.append(i)
        #quito espacios
        for i in range(len(campos_a_operar)):
            campos_a_operar[i] = campos_a_operar[i].lstrip().rstrip()

        #Validacion de datos ------------------------------------------------
        todosValidados = True
        i = 0
        while (todosValidados and i < len(campos_a_operar)):
            if campos_a_operar[i].isdigit() == False:
                todosValidados = validarCampo(table1, campos_a_operar[i])
            i += 1

        #---------------------------------------------------------------------
        if todosValidados:
            #agrego row
            for i in range(len(campos_a_operar)):
                if campos_a_operar[i].isdigit() == False:
                    campos_a_operar[i] = 'row.' + campos_a_operar[i]
            #formulo el eval
            operacion_final = ""
            for i in range(len(operaciones)):
                operacion_final += campos_a_operar[i] + operaciones[i]
            operacion_final += campos_a_operar[len(campos_a_operar) - 1]

            table2 = etl.convert(table1,
                                 campoElegido,
                                 lambda v, row: eval(operacion_final),
                                 pass_row=True)
            #etl.tojson(table2,"./static/data/calculos.json")
            etl.tojson(table2, './static/data/tabalaElegidaCalculadora.json')
            rv = showjson("tabalaElegidaCalculadora")
            return jsonify(rv)
        else:
            return jsonify(False)
    except:
        return jsonify(False)
Example #13
0
def test_fromjson_1():

    f = NamedTemporaryFile(delete=False, mode='w')
    data = '[{"foo": "a", "bar": 1}, ' \
           '{"foo": "b", "bar": 2}, ' \
           '{"foo": "c", "bar": 2}]'
    f.write(data)
    f.close()

    actual = fromjson(f.name, header=['foo', 'bar'])
    expect = (('foo', 'bar'), ('a', 1), ('b', 2), ('c', 2))
    ieq(expect, actual)
    ieq(expect, actual)  # verify can iterate twice
Example #14
0
def test_fromjson_2():

    f = NamedTemporaryFile(delete=False, mode='w')
    data = '[{"foo": "a", "bar": 1}, ' \
           '{"foo": "b"}, ' \
           '{"foo": "c", "bar": 2, "baz": true}]'
    f.write(data)
    f.close()

    actual = fromjson(f.name, header=['bar', 'baz', 'foo'])
    expect = (('bar', 'baz', 'foo'), (1, None, 'a'), (None, None, 'b'),
              (2, True, 'c'))
    ieq(expect, actual)
    ieq(expect, actual)  # verify can iterate twice
Example #15
0
def test_fromjson_2():
    
    f = NamedTemporaryFile(delete=False)
    data = '[{"foo": "a", "bar": 1}, {"foo": "b"}, {"foo": "c", "bar": 2, "baz": true}]'
    f.write(data)
    f.close()
    
    actual = fromjson(f.name)
    expect = (('foo', 'bar', 'baz'),
              ('a', 1, None),
              ('b', None, None),
              ('c', 2, True))
    ieq(expect, actual)
    ieq(expect, actual) # verify can iterate twice
Example #16
0
def test_fromjson_3():

    f = NamedTemporaryFile(delete=False, mode='w')
    data = '[{"foo": "a", "bar": 1}, ' \
           '{"foo": "b"}, ' \
           '{"foo": "c", "bar": 2, "baz": true}]'
    f.write(data)
    f.close()

    actual = fromjson(f.name, header=['foo', 'bar'])
    expect = (('foo', 'bar'),
              ('a', 1),
              ('b', None),
              ('c', 2))
    ieq(expect, actual)
    ieq(expect, actual)  # verify can iterate twice
Example #17
0
def test_json_unicode():

    tbl = ((u'name', u'id'),
           (u'Արամ Խաչատրյան', 1),
           (u'Johann Strauß', 2),
           (u'Вагиф Сәмәдоғлу', 3),
           (u'章子怡', 4),
           )
    tojson(tbl, 'tmp/test_tojson_utf8.json')

    result = json.load(open('tmp/test_tojson_utf8.json'))
    assert len(result) == 4
    for a, b in zip(tbl[1:], result):
        assert a[0] == b['name']
        assert a[1] == b['id']

    actual = fromjson('tmp/test_tojson_utf8.json')
    ieq(tbl, actual)
Example #18
0
def main():
    token = "391738|0U8e9WpBWfvBWcdjV7WoJzxjH-s"
    graph = facebook.GraphAPI(token)

    places = graph.search(type='place',
                          center='37.8136, 77.2177',
                          fields='name, location')

    print(json.dumps(places))

    jason_string = str(places['data'])
    data_ = re.sub('\'', '"', jason_string)
    print(data_)
    f = open("facebook_.json", "w")
    f.write(data_)
    f.close()

    table6 = etl.fromjson('facebook_.json', header=['name', 'location'])
    print(table6)
Example #19
0
from __future__ import division, print_function, absolute_import


# fromjson()
############

import petl as etl
data = '''
[{"foo": "a", "bar": 1},
{"foo": "b", "bar": 2},
{"foo": "c", "bar": 2}]
'''
with open('example.json', 'w') as f:
    f.write(data)

table1 = etl.fromjson('example.json')
table1


# fromdicts()
#############

import petl as etl
dicts = [{"foo": "a", "bar": 1},
         {"foo": "b", "bar": 2},
         {"foo": "c", "bar": 2}]
table1 = etl.fromdicts(dicts)
table1


# tojson()
Example #20
0
import os
import petl
import re
import json

data_2018 = petl.util.base.empty()
for filename in os.listdir('./data/2018/'):
    data_2018 = data_2018.cat(petl.fromjson('./data/2018/' + filename))

data_2017 = petl.util.base.empty()
for filename in os.listdir('./data/2017/'):
    data_2017 = data_2017.cat(petl.fromjson('./data/2017/' + filename))

data_2018 = data_2018.distinct('updated_at')
print(data_2018.nrows())

data_2017 = data_2017.distinct('updated_at')
print(data_2017.nrows())

# Fix observed song name changes
name_changes = {
    'Have a Holly Jolly Christmas':
    'A Holly Jolly Christmas',
    'Merry Christmas Darling (Remix)':
    'Merry Christmas Darling',
    'The Chipmunk Song (feat. Alvin) [Christmas Don\'t Be Late]':
    'The Chipmunk Song',
    'Walkin In A Winter Wonderland':
    'Winter Wonderland',
    'Santa Claus Is Coming to Town (Intro)':
    'Santa Claus Is Coming to Town',
import pyodbc
import petl as etl
import pymysql as mysql

########## Json extraction and maping
tableJ = etl.fromjson('cust_data.json', header=['id','gender','first_name','last_name', 'email','ville'])
tableJ = etl.movefield(tableJ, 'gender', 4)

########## CSV extraction and conversion
tableCSV = etl.fromcsv('week_cust.csv')
tableCSV = etl.convert(tableCSV, 'id', int)

########### Sqlserver connection and extraction
connectionSqlServer=pyodbc.connect("Driver={SQL Server Native Client 11.0};" "Server=81_64_msdn;" "Database=BD4client;" "Trusted_Connection=yes;" "convert_unicode =True;")
cursor = connectionSqlServer.cursor()
cursor.execute('SELECT id, first_name, last_name, email, gender, ville FROM client_DATA')
tableSqlServer = cursor.fetchall()
tableSqlServer =[('id','first_name','last_name', 'email','gender','ville')]+tableSqlServer
cursor.close()
connectionSqlServer.close()

######### Staging area transforming and concatenation
StagingArea = etl.cat(tableCSV, tableJ,tableSqlServer)
StagingArea = etl.convert(StagingArea, 'gender', {'Male': 'M', 'Female': 'F', 'male': 'M', 'female': 'F', None: 'N'})
StagingArea = etl.rename(StagingArea, 'ville', 'city')

######## mysql
connection = mysql.connect(host="localhost", user="******", passwd="", db="customerdatabase")
curseur = connection.cursor()
curseur.execute('SET SQL_MODE=ANSI_QUOTES')
#### load data, assuming table " CustomerData" already exists in the database
Example #22
0
def gen_portfolio(net_liq=False):
    table = petl.fromjson(_DATABASE_PATH)

    return Portfolio(petl.data(table), net_liq=net_liq)
Example #23
0
################Reading XML files##################################

table5 = etl.fromxml('data.xml', 'tr', 'td')
print(table5)

################Reading JASON files###############################

data = '''
[{"foo": "a", "bar": 1},
{"foo": "b", "bar": 2},
{"foo": "c", "bar": 2}]
'''
with open('example.json', 'w') as f:
    f.write(data)

table6 = etl.fromjson('example.json', header=['foo', 'bar'])
print(table6)

###############Getting data from facebook#######################

#Documentation- https://facebook-sdk.readthedocs.io/en/latest/api.html
import json
import facebook
import re


def main():
    token = "391738|0U8e9WpBWfvBWcdjV7WoJzxjH-s"
    graph = facebook.GraphAPI(token)

    places = graph.search(type='place',
#!/usr/bin/env python3
import petl as etl
from collections import OrderedDict

# Load the files
users = etl.fromcsv('data/users.csv')
events = etl.fromjson('data/events2.json')

# Transform
# Dim Customers
# Filter necessary data only
dim_customers = etl.cut(users, 'user_id', 'email')
# Export as csv to load folder
etl.tocsv(dim_customers, 'load/dim_customers.csv')

# Dim Subscriptions
# Use the distinct values present in the type column to load  into the dim subscription table
dim_subscriptions_cut = etl.cut(events, 'type')
dim_subscriptions_rename = etl.rename(dim_subscriptions_cut,
                                      {'type': 'subscription_name'})
dim_subscriptions = etl.distinct(dim_subscriptions_rename)
# Export as csv to load folder
etl.tocsv(dim_subscriptions, 'load/dim_subscriptions.csv')

# Dim Medium
# Use the distinct values present in the utm_medium colum to load into the dim medium table
dim_medium_cut = etl.cut(events, 'utm_medium')
dim_medium_rename = etl.rename(dim_medium_cut, {'utm_medium': 'medium'})
dim_medium = etl.distinct(dim_medium_rename)
# Export as csv to load folder
etl.tocsv(dim_medium, 'load/dim_medium.csv')