def app(event, context): response = None try: logger.info('Extract') data = extract() logger.info('Transform') covid_df = transform(data) conn = connect() logger.info('Load') load(conn, covid_df) response = { "statusCode": 200, } except (Exception) as err: n.notify("Error in the handler") logger.error(err) response = { "statusCode": 500, } return response
from etl import extract, transform, load # TRY: # http://api.fixer.io/latest # https://api.cryptonator.com/api/full/ltc-usd # https://google.com url = raw_input("Enter a URL that returns JSON: ") raw_data, error = extract(url) if error is None: records = transform(raw_data) count = load(records) print("Loaded %d records from %s" % (count, url)) else: print("An error occured: %s" % error['message'])
import etl import metricas import constants import pandas as pd # df, clients = etl.extract() payments_df, clientes_df = etl.extract() # df, clients = etl.extract() payments_df, clientes_df, metricsPerClient = etl.transform(payments_df, clientes_df) metricas, metricsPerClient = metricas.metricsCalculator(clientes_df, metricsPerClient) payments_df.to_csv('D:\ETL-TechnicalEvaluation\ETL-TechnicalEvaluation\docs\pagamentos.csv',index=True) clientes_df.to_csv('D:\ETL-TechnicalEvaluation\ETL-TechnicalEvaluation\docs\clients.csv', index=True) metricsPerClient.to_csv('D:\ETL-TechnicalEvaluation\ETL-TechnicalEvaluation\docs\metricasCliente.csv',index=True) metricas.to_csv('D:\ETL-TechnicalEvaluation\ETL-TechnicalEvaluation\docs\metricas.csv',index=True)
def test_extract(self): opec_orb_data = extract(test_file_path) assert (extract_data == opec_orb_data)
xpath = '//table[@class="resultTable table_cellspacing_1 table_border_1 mb_6"]//tr/td[7]//a' for ele in driver.find_elements_by_xpath(xpath): time.sleep(1) ele.click() driver.close() driver.quit() if __name__ == '__main__': print("start") df = get_codes() for index, item in df.iterrows(): code = item["code"] print(code) ###filenames = db.get_filenames(code) download(code) sys.exit(0) for fn in glob("backend/data/%s/*.zip" % code): ###for fn in filenames: try: ###if fn in filenames: ### continue items, values= etl.extract(fn, code) db.save_items(fn, code, items) db.save_values(fn, code, values) meta.save_meta(fn) es.insert_to_es(fn) except: print(traceback.format_exc()) print("done")
def test_basic(self): expected_df = extract(self.spark) self.assertEquals( expected_df.schema.names, ["date", "time", "user_id", "url", "ip", "user_agent_string"])