def _get_record_identifiers(self, cfg): p = cfg.get('record_identifiers') if not (p and os.path.isfile(p)): p = os.path.join(cfg['root'], 'record_ids.json') table = petl.fromjson(p) return petl.dicts(table)
def from_json(cls, local_path, header=None, line_delimited=False): """ Create a ``parsons table`` from a json file `Args:` local_path: list A JSON formatted local path, url or ftp. If this is a file path that ends in ".gz", the file will be decompressed first. header: list List of columns to use for the destination table. If omitted, columns will be inferred from the initial data in the file. line_delimited: bool Whether the file is line-delimited JSON (with a row on each line), or a proper JSON file. `Returns:` Parsons Table See :ref:`parsons-table` for output options. """ if line_delimited: if files.is_gzip_path(local_path): open_fn = gzip.open else: open_fn = open with open_fn(local_path, 'r') as file: rows = [json.loads(line) for line in file] return cls(rows) else: return cls(petl.fromjson(local_path, header=header))
def main(): """Set up jinga2 enviroment, extract data and save down to html files.""" env = Environment( loader=FileSystemLoader(str(TEMPLATES)), autoescape=select_autoescape(["html", "xml"]), ) env.filters["petl2html"] = get_html data = petl.fromjson(INPUT) data_processed = process_data(data) # Refresh the output directory OUTPUT.mkdir(exist_ok=True) clear_directory(OUTPUT) # Save home page home_template = env.get_template("about.html") html_output = home_template.render(pages=pages) (OUTPUT / "about.html").write_text(html_output, encoding="utf-8") # Save data-driven pages for page in pages: process_page(page, data_processed, pages, env) copy_tree(str(STATIC), str(OUTPUT))
def export_data(self): try: self.data = petl.fromjson(config.weather_data_store_path) except Exception as e: print('Problem in collecting Data from data store: %s' % str(e)) return False return True
def get_load_result(nameFile): table1 = etl.fromjson('./static/data/tabalaElegidaCalculadora.json') tocsv(table1, './exelFiles/' + str(nameFile) + '.csv') etl.tohtml(table1, './exelFiles/' + str(nameFile) + '.html', caption=str(nameFile)) return jsonify(True)
def test_fromjson_3(): f = NamedTemporaryFile(delete=False) data = '[{"foo": "a", "bar": 1}, {"foo": "b"}, {"foo": "c", "bar": 2, "baz": true}]' f.write(data) f.close() actual = fromjson(f.name, header=["foo", "bar"]) expect = (("foo", "bar"), ("a", 1), ("b", None), ("c", 2)) ieq(expect, actual) ieq(expect, actual) # verify can iterate twice
def test_fromjson_1(): f = NamedTemporaryFile(delete=False) data = '[{"foo": "a", "bar": 1}, {"foo": "b", "bar": 2}, {"foo": "c", "bar": 2}]' f.write(data) f.close() actual = fromjson(f.name) expect = (("foo", "bar"), ("a", 1), ("b", 2), ("c", 2)) ieq(expect, actual) ieq(expect, actual) # verify can iterate twice
def test_fromjson_3(): f = NamedTemporaryFile(delete=False) data = '[{"foo": "a", "bar": 1}, {"foo": "b"}, {"foo": "c", "bar": 2, "baz": true}]' f.write(data) f.close() actual = fromjson(f.name, header=['foo', 'bar']) expect = (('foo', 'bar'), ('a', 1), ('b', None), ('c', 2)) ieq(expect, actual) ieq(expect, actual) # verify can iterate twice
def extract(file): tb = list() if file == "sql": host = raw_input("Enter Host:") user = raw_input("Enter Username:"******"Enter pwd:") dtb = raw_input("Enter Database Name:") table = raw_input("Enter Table Name:") conn = pymysql.connect(host=host, user=user, password=pwd, db=dtb, charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor) temp = etl.fromdb(conn, "SELECT * FROM " + table) tb = d2l(temp) elif ".csv" in file: tb = etl.fromcsv(file) elif ".xlsx" in file: tb = etl.fromxls(file) elif ".json" in file: tb = etl.fromjson(file) print tb elif ".xml" in file: f = open(file, 'r').read() options = optparse.Values({"pretty": True}) jsn = json.dumps(xml2json.xml2json(f, options)) ob = json.loads(jsn.decode('string-escape').strip('"')) temp = dict() for key in ob.keys(): for skey in ob[key].keys(): temp = json.dumps(ob[key][skey]) with open("temp.json", "w") as tmp: tmp.write(temp) tb = etl.fromjson("temp.json") print tb[0] #tb = etl.fromxml(file,'.//ROW',{'Service_Name':'Service_Name','Status':'Status','Service_Type':'Service_Type','Time':'Time'}) elif ".txt" in file: tb = etl.fromtext(file) print tb return tb
def etl(file): tb1 = list() import petl as etl import sqllite3 if ".csv" in file: tb1 = etl.fromcsv(file) elif ".json" in file: tb1 = etl.fromjson(file) elif ".xls" in file: tb1 = etl.fromxls(file) elif ".xml" in file: tb1 = etl.fromxml(file,"row")
def importEvents(): try: filePath = Path('data/events2.json') resolved = filePath.resolve() except FileNotFoundError: print("File [{0}] doesn't exist".format(filePath)) except ValueError: print("File [{0}] has errors".format(filePath)) except Exception as e: print("Something went wrong. Error {0}".format(e)) else: return etl.fromjson(filePath)
def get_calculos(): data = request.get_json() calculos = data['calculos'] try: table1 = etl.fromjson('./static/data/tabalaElegidaCalculadora.json') campos_y_valores = re.split('\=|\+|\-|\/|\*', calculos) campoElegido = campos_y_valores[0].lstrip().rstrip() campos_a_operar = campos_y_valores[1:] print("Campos a operar:", campos_a_operar) print("calculos", calculos) #get math symbol operaciones = [] for i in calculos: if (i == '+' or i == '-' or i == '/' or i == '*'): operaciones.append(i) #quito espacios for i in range(len(campos_a_operar)): campos_a_operar[i] = campos_a_operar[i].lstrip().rstrip() #Validacion de datos ------------------------------------------------ todosValidados = True i = 0 while (todosValidados and i < len(campos_a_operar)): if campos_a_operar[i].isdigit() == False: todosValidados = validarCampo(table1, campos_a_operar[i]) i += 1 #--------------------------------------------------------------------- if todosValidados: #agrego row for i in range(len(campos_a_operar)): if campos_a_operar[i].isdigit() == False: campos_a_operar[i] = 'row.' + campos_a_operar[i] #formulo el eval operacion_final = "" for i in range(len(operaciones)): operacion_final += campos_a_operar[i] + operaciones[i] operacion_final += campos_a_operar[len(campos_a_operar) - 1] table2 = etl.convert(table1, campoElegido, lambda v, row: eval(operacion_final), pass_row=True) #etl.tojson(table2,"./static/data/calculos.json") etl.tojson(table2, './static/data/tabalaElegidaCalculadora.json') rv = showjson("tabalaElegidaCalculadora") return jsonify(rv) else: return jsonify(False) except: return jsonify(False)
def test_fromjson_1(): f = NamedTemporaryFile(delete=False, mode='w') data = '[{"foo": "a", "bar": 1}, ' \ '{"foo": "b", "bar": 2}, ' \ '{"foo": "c", "bar": 2}]' f.write(data) f.close() actual = fromjson(f.name, header=['foo', 'bar']) expect = (('foo', 'bar'), ('a', 1), ('b', 2), ('c', 2)) ieq(expect, actual) ieq(expect, actual) # verify can iterate twice
def test_fromjson_2(): f = NamedTemporaryFile(delete=False, mode='w') data = '[{"foo": "a", "bar": 1}, ' \ '{"foo": "b"}, ' \ '{"foo": "c", "bar": 2, "baz": true}]' f.write(data) f.close() actual = fromjson(f.name, header=['bar', 'baz', 'foo']) expect = (('bar', 'baz', 'foo'), (1, None, 'a'), (None, None, 'b'), (2, True, 'c')) ieq(expect, actual) ieq(expect, actual) # verify can iterate twice
def test_fromjson_2(): f = NamedTemporaryFile(delete=False) data = '[{"foo": "a", "bar": 1}, {"foo": "b"}, {"foo": "c", "bar": 2, "baz": true}]' f.write(data) f.close() actual = fromjson(f.name) expect = (('foo', 'bar', 'baz'), ('a', 1, None), ('b', None, None), ('c', 2, True)) ieq(expect, actual) ieq(expect, actual) # verify can iterate twice
def test_fromjson_3(): f = NamedTemporaryFile(delete=False, mode='w') data = '[{"foo": "a", "bar": 1}, ' \ '{"foo": "b"}, ' \ '{"foo": "c", "bar": 2, "baz": true}]' f.write(data) f.close() actual = fromjson(f.name, header=['foo', 'bar']) expect = (('foo', 'bar'), ('a', 1), ('b', None), ('c', 2)) ieq(expect, actual) ieq(expect, actual) # verify can iterate twice
def test_json_unicode(): tbl = ((u'name', u'id'), (u'Արամ Խաչատրյան', 1), (u'Johann Strauß', 2), (u'Вагиф Сәмәдоғлу', 3), (u'章子怡', 4), ) tojson(tbl, 'tmp/test_tojson_utf8.json') result = json.load(open('tmp/test_tojson_utf8.json')) assert len(result) == 4 for a, b in zip(tbl[1:], result): assert a[0] == b['name'] assert a[1] == b['id'] actual = fromjson('tmp/test_tojson_utf8.json') ieq(tbl, actual)
def main(): token = "391738|0U8e9WpBWfvBWcdjV7WoJzxjH-s" graph = facebook.GraphAPI(token) places = graph.search(type='place', center='37.8136, 77.2177', fields='name, location') print(json.dumps(places)) jason_string = str(places['data']) data_ = re.sub('\'', '"', jason_string) print(data_) f = open("facebook_.json", "w") f.write(data_) f.close() table6 = etl.fromjson('facebook_.json', header=['name', 'location']) print(table6)
from __future__ import division, print_function, absolute_import # fromjson() ############ import petl as etl data = ''' [{"foo": "a", "bar": 1}, {"foo": "b", "bar": 2}, {"foo": "c", "bar": 2}] ''' with open('example.json', 'w') as f: f.write(data) table1 = etl.fromjson('example.json') table1 # fromdicts() ############# import petl as etl dicts = [{"foo": "a", "bar": 1}, {"foo": "b", "bar": 2}, {"foo": "c", "bar": 2}] table1 = etl.fromdicts(dicts) table1 # tojson()
import os import petl import re import json data_2018 = petl.util.base.empty() for filename in os.listdir('./data/2018/'): data_2018 = data_2018.cat(petl.fromjson('./data/2018/' + filename)) data_2017 = petl.util.base.empty() for filename in os.listdir('./data/2017/'): data_2017 = data_2017.cat(petl.fromjson('./data/2017/' + filename)) data_2018 = data_2018.distinct('updated_at') print(data_2018.nrows()) data_2017 = data_2017.distinct('updated_at') print(data_2017.nrows()) # Fix observed song name changes name_changes = { 'Have a Holly Jolly Christmas': 'A Holly Jolly Christmas', 'Merry Christmas Darling (Remix)': 'Merry Christmas Darling', 'The Chipmunk Song (feat. Alvin) [Christmas Don\'t Be Late]': 'The Chipmunk Song', 'Walkin In A Winter Wonderland': 'Winter Wonderland', 'Santa Claus Is Coming to Town (Intro)': 'Santa Claus Is Coming to Town',
import pyodbc import petl as etl import pymysql as mysql ########## Json extraction and maping tableJ = etl.fromjson('cust_data.json', header=['id','gender','first_name','last_name', 'email','ville']) tableJ = etl.movefield(tableJ, 'gender', 4) ########## CSV extraction and conversion tableCSV = etl.fromcsv('week_cust.csv') tableCSV = etl.convert(tableCSV, 'id', int) ########### Sqlserver connection and extraction connectionSqlServer=pyodbc.connect("Driver={SQL Server Native Client 11.0};" "Server=81_64_msdn;" "Database=BD4client;" "Trusted_Connection=yes;" "convert_unicode =True;") cursor = connectionSqlServer.cursor() cursor.execute('SELECT id, first_name, last_name, email, gender, ville FROM client_DATA') tableSqlServer = cursor.fetchall() tableSqlServer =[('id','first_name','last_name', 'email','gender','ville')]+tableSqlServer cursor.close() connectionSqlServer.close() ######### Staging area transforming and concatenation StagingArea = etl.cat(tableCSV, tableJ,tableSqlServer) StagingArea = etl.convert(StagingArea, 'gender', {'Male': 'M', 'Female': 'F', 'male': 'M', 'female': 'F', None: 'N'}) StagingArea = etl.rename(StagingArea, 'ville', 'city') ######## mysql connection = mysql.connect(host="localhost", user="******", passwd="", db="customerdatabase") curseur = connection.cursor() curseur.execute('SET SQL_MODE=ANSI_QUOTES') #### load data, assuming table " CustomerData" already exists in the database
def gen_portfolio(net_liq=False): table = petl.fromjson(_DATABASE_PATH) return Portfolio(petl.data(table), net_liq=net_liq)
################Reading XML files################################## table5 = etl.fromxml('data.xml', 'tr', 'td') print(table5) ################Reading JASON files############################### data = ''' [{"foo": "a", "bar": 1}, {"foo": "b", "bar": 2}, {"foo": "c", "bar": 2}] ''' with open('example.json', 'w') as f: f.write(data) table6 = etl.fromjson('example.json', header=['foo', 'bar']) print(table6) ###############Getting data from facebook####################### #Documentation- https://facebook-sdk.readthedocs.io/en/latest/api.html import json import facebook import re def main(): token = "391738|0U8e9WpBWfvBWcdjV7WoJzxjH-s" graph = facebook.GraphAPI(token) places = graph.search(type='place',
#!/usr/bin/env python3 import petl as etl from collections import OrderedDict # Load the files users = etl.fromcsv('data/users.csv') events = etl.fromjson('data/events2.json') # Transform # Dim Customers # Filter necessary data only dim_customers = etl.cut(users, 'user_id', 'email') # Export as csv to load folder etl.tocsv(dim_customers, 'load/dim_customers.csv') # Dim Subscriptions # Use the distinct values present in the type column to load into the dim subscription table dim_subscriptions_cut = etl.cut(events, 'type') dim_subscriptions_rename = etl.rename(dim_subscriptions_cut, {'type': 'subscription_name'}) dim_subscriptions = etl.distinct(dim_subscriptions_rename) # Export as csv to load folder etl.tocsv(dim_subscriptions, 'load/dim_subscriptions.csv') # Dim Medium # Use the distinct values present in the utm_medium colum to load into the dim medium table dim_medium_cut = etl.cut(events, 'utm_medium') dim_medium_rename = etl.rename(dim_medium_cut, {'utm_medium': 'medium'}) dim_medium = etl.distinct(dim_medium_rename) # Export as csv to load folder etl.tocsv(dim_medium, 'load/dim_medium.csv')