def llenarListaEmpresas(self): global iEmpresa self.listEmpresas = QtWidgets.QListWidget( self.scrollAreaWidgetContents_2) self.listEmpresas.setObjectName("listEmpresas") empresas = etl.fromdb(connection, 'SELECT * FROM empresas') for empresa in etl.data(empresas): item = QtWidgets.QListWidgetItem() self.listEmpresas.addItem(item) self.horizontalLayout_3.addWidget(self.listEmpresas) __sortingEnabled = self.listEmpresas.isSortingEnabled() self.listEmpresas.setSortingEnabled(False) iEmpresa = 0 for empresa in etl.data(empresas): item = self.listEmpresas.item(iEmpresa) item.setText(empresa[1]) iEmpresa += 1 self.listEmpresas.setSortingEnabled(__sortingEnabled)
def test_wrap_tuple_return(): tablea = etl.wrap((('foo', 'bar'), ('A', 1), ('C', 7))) tableb = etl.wrap((('foo', 'bar'), ('B', 5), ('C', 7))) added, removed = tablea.diff(tableb) eq_(('foo', 'bar'), added.header()) eq_(('foo', 'bar'), removed.header()) ieq(etl.data(added), added.data()) ieq(etl.data(removed), removed.data())
def toxlswithheader(table, filename, sheet, **kwargs): """ Use `petl.tocsv` to write CSV data in `table` to file `source`, including key-value metadata header if passed in as the keyword argument `metadata`. The first row in `table` is assumed to contain the header columns. """ metadata = kwargs.pop("metadata", {}) # prepare header header = petl.header(table) # prepare metadata rows using #-prefix, and :-suffix for keys metadata_rows = [] for key, value in metadata.items(): metadata_row = [''] * len(header) metadata_row[0] = '#' + str(key) + ':' metadata_row[1] = str(value) metadata_rows.append(metadata_row) # prepare data (stripped of header) data = petl.data(table) # combine metadata + header + data the write out combined = metadata_rows + [header] + list(data) petl.toxls(combined, filename, sheet, **kwargs)
def run(self, driver, task, log): input_driver = driver.get_driver(task["source"]["connection"]) sql = self._parse_sql(task["source"]) db = input_driver.get_db() record_set = etl.fromdb(db, sql) if not etl.data(record_set).any(): log.write("Task skipped. No rows on source") else: transform = TransformSubTask(task, log) record_set = transform.get_result(record_set) fld = task["target"].get("folder", "output") fld = compat.translate_unicode(fld) target = task["target"]["file"] target = compat.translate_unicode(target) out = "{}/{}".format(fld, target) separator = task["target"].get("delimiter", ";") separator = compat.translate_unicode(separator) enc = task["target"].get("encoding", "utf-8") task_log = "log/db-csv_{}_{}.log".format(task["name"], get_time_filename()) with open(task_log, "w") as lg: if "truncate" in task["target"] and task["target"]["truncate"]: record_set.progress(10000, out=lg).tocsv(out, encoding=enc, delimiter=separator) else: record_set.progress(10000, out=lg).appendcsv(out, encoding=enc, delimiter=separator) db.close()
def run(self, driver, task, log): inp = task["source"]["file"] inp = compat.translate_unicode(inp) inp = "input/{}".format(inp) sheet = task["source"].get("sheet", None) use_view = task["source"].get("use_view", True) record_set = etl.fromxls(inp, sheet, use_view=use_view) if not etl.data(record_set).any(): log.write("Task skipped. No rows on source") else: transform = TransformSubTask(task, log) record_set = transform.get_result(record_set) out = task["target"]["file"] out = compat.translate_unicode(out) out = "output/{}".format(out) separator = task["target"].get("delimiter", ";") separator = compat.translate_unicode(separator) enc = task["target"].get("encoding", "utf-8") task_log = "log/xls-csv_{}_{}.log".format(task["name"], get_time_filename()) with open(task_log, "w") as lg: if "truncate" in task["target"] and task["target"]["truncate"]: record_set.progress(10000, out=lg).tocsv(out, encoding=enc, delimiter=separator) else: record_set.progress(10000, out=lg).appendcsv(out, encoding=enc, delimiter=separator)
def test_data(): """Test the data function.""" table = (('foo', 'bar'), ('a', 1), ('b', 2)) actual = data(table) expect = (('a', 1), ('b', 2)) ieq(expect, actual)
def paginate_characters_table(csv_path, page): characters_table, headers, total_characters = get_characters_table( csv_path) data = etl.data( etl.rowslice(characters_table, 10 * (page - 1), 10 + 10 * (page - 1))) return headers, data, total_characters
def aggregate_collection( request: HttpRequest, collection_id: int, ) -> HttpResponse: """ Value count computations could be also moved into a celery task that would prepare the answer for the user and bring it to him later (via email or on page with results). """ collection = get_object_or_404(StarWarsCollection, id=collection_id) table = etl.fromcsv(collection.filepath) aggregate_keys, parameters_settings = parse_parameters( request.GET.get( 'current_parameters', '0000001001', ), ) if len(aggregate_keys) == 1: # aggregate does not work correctly # if list with 1 element is passed aggregate_keys = aggregate_keys[0] if len(aggregate_keys) == 0: # show no table if every option is disabled table = etl.empty() else: table = table.aggregate(key=aggregate_keys, aggregation=len) return render( request, 'main/collection_aggregate.html', { 'collection': collection, 'parameters_settings': parameters_settings, 'headers': etl.header(table), 'data': etl.data(table), }, )
def load_table_data(csv_file, limit) -> TableData: table = etl.fromcsv(csv_file) return TableData( header=etl.header(table), data=etl.data(table, limit), next_limit=limit + 10 if limit < table.len() else None, )
def get_delta(source_table, target_table, key='id'): source_table_headers = etl.header(source_table) target_table_headers = etl.header(target_table) if source_table_headers != target_table_headers: raise Exception( 'Source table columns do not match target table columns') source_ids = etl.cut(source_table, key) target_ids = etl.cut(target_table, key) added_ids_table, _ = etl.diff(source_ids, target_ids) merged_table = etl.merge(source_table, target_table, key=key) load_frame = etl.todataframe( etl.selectin(target_table, key, etl.values(added_ids_table, key))) print(load_frame) for row in etl.data(merged_table): for i, col in enumerate(row): if isinstance(col, etl.transform.reductions.Conflict): changes = tuple(col) print('For car {}, {} changed from {} to {}'.format( row[0], source_table_headers[i], changes[1], changes[0])) row_dict = dict(zip(source_table_headers, list(row))) row_dict[source_table_headers[i]] = changes[0] row_dict = {key: [val] for (key, val) in row_dict.items()} print(row_dict) df = pd.DataFrame(row_dict) load_frame = load_frame.append(df, ignore_index=True) break return etl.fromdataframe(load_frame)
def precip_table_etl_noaa( precip_table, rainfall_adjustment=1, frequency_min=1, frequency_max=1000, conversion_factor=2.54, desc_field="by duration for ARI (years):", duration_val="24-hr:" ): """ Extract, Transform, and Load data from a NOAA PRECIPITATION FREQUENCY ESTIMATES matrix (in a csv) into an array used by the runoff calculator. Required Inputs: - precip_table: NOAA PRECIPITATION FREQUENCY ESTIMATES csv, in inches. Optional Inputs: - rainfall_adjustment: multipler to adjust for future rainfall conditions. defaults to 1. - frequency_min: the min. annual frequency to be returned. Default: 1 - frequency_max: the max. annual frequency to be returned. Default: 1000 - conversion_factor: apply to rainfall values. Default: 2.54 (convert inches to centimeters). - desc_field: exact field name from NOAA table in first column. Defaults to "by duration for ARI (years):". Used for selecting data. - duration_val: exact row value in the desc_field from NOAA table that contains the duration of interest. Defaults to "24-hr:". Used for selecting data. Outputs: - precip_array: 1D array containing 24-hour duration estimate for frequencies 1,2,5,10,25,50,100,200,500,and 1000 year storm events """ # load the csv table, skip the file header information, extract rows we need t1 = etl\ .fromcsv(precip_table)\ .skip(13)\ .rowslice(0,19) # grab raw data from the row containing the x-hour duration event info t2 = etl\ .select(t1, desc_field, lambda v: v == duration_val)\ .cutout(desc_field) # generate a new header with only columns within frequency min/max h = tuple( i for i in list(etl.header(t2)) if (int(i) >= frequency_min and int(i) <= frequency_max) ) # for events within freq range, convert to cm, adjust for future rainfall t3 = etl\ .cut(t2, h)\ .convertall(lambda v: round(float(v) * conversion_factor * rainfall_adjustment, 2)) # convert to a 1D array (values cast to floats) precips = list(etl.data(t3)[0]) # also convert to a dictionary, for lookup by event precips_lookup = list(etl.dicts(t3))[0] # return 1D array and dictionary return precips, precips_lookup
async def plot(netliq: Optional[bool] = False, percentage: Optional[bool] = False, duration: Optional[str] = None): sesh = await RenewableTastyAPISession.create() # choose an account acc = await get_account(sesh) history = await acc.get_history(sesh) table = petl.fromdicts(history).cut( 'executed-at', 'transaction-type', 'action', 'symbol', 'value', 'value-effect', 'quantity', 'commission', 'clearing-fees', 'proprietary-index-option-fees', 'regulatory-fees' ).addfield('is-closing', lambda row: 'Close' in row['action'] if row['action'] else False) \ .sort(['executed-at', 'is-closing']) # create a portfolio with the given history pf = Portfolio(petl.data(table), net_liq=netliq) # get initial net liq if we're using percentage nl = None if percentage: pf_tmp = Portfolio(petl.data(table), net_liq=True) nl = pf_tmp._get_starting_net_liq(duration) # get the P/L or net liq and save the graph val = pf.plot(duration, starting_net_liq=nl) # print current positions if nl is None: print(('Current net liquidity' if netliq else 'Realized P/L') + f': ${val:.2f}') else: print(('Change in net liquidity' if netliq else 'Realized P/L') + f': {val:.2f}%') print('Current positions:') for p in pf.positions.values(): print(p)
def aggregate_characters_table(csv_path, filters): if len(filters) == 1: filters = filters[0] characters_table, headers, total_characters = get_characters_table( csv_path) data = etl.data( etl.aggregate(characters_table, key=filters, aggregation=len)) return headers, data, total_characters
def llenarListaTipos(self): global iTipos tipos = etl.fromdb(connection, 'SELECT * FROM tipos') for tipo in etl.data(tipos): item = QtWidgets.QListWidgetItem() self.listWidgetPresets.addItem(item) __sortingEnabled = self.listWidgetPresets.isSortingEnabled() self.listWidgetPresets.setSortingEnabled(False) iTipos = 0 for tipo in etl.data(tipos): item = self.listWidgetPresets.item(iTipos) item.setText(tipo[1]) iTipos += 1 self.listWidgetPresets.setSortingEnabled(__sortingEnabled)
def format_results(results, f, geodata_model): """handle parsing the format argument to convert the results 'table' into one of the desired formats :param results: [description] :type results: [type] :param f: [description] :type f: [type] :param geodata_model: [description] :type geodata_model: [type] :return: [description] :rtype: [type] """ # make submitted value lowercase, to simplify comparison f = f.lower() # fall back to JSON if no format provided if f not in F_ALL: f = F_JSON[0] # JSON format if f in F_JSON: if f == 'time': # grouped by timestamp return _groupby(results, key='ts', sortby='id') elif f == 'sensor': # grouped by id return _groupby(results, key='id', sortby='ts') else: # (list of dicts) return results # GEOJSON format (GeoJSON Feature collection; results under 'data' key within properties) # elif f in F_GEOJSON: # results = _groupby(results, key='id', sortby='ts') # return _format_as_geojson(results, geodata_model) # ARRAYS format (2D table) elif f in F_ARRAYS: # nested arrays t = etl.fromdicts(results) #h = list(etl.header(t)) #return list(etl.data(t)).insert(0,h) return list(etl.data(t)) elif f in F_CSV: return _format_teragon(results) # elif f in F_MD: # return results else: return results
def store_to_db(self, conn, tablename, data): try: if etl.nrows(data) == 0: return None except TypeError: return None cursor = conn.cursor() sql = "INSERT INTO %s (%s) " % (tablename, ','.join( etl.header(data))) + "VALUES %s" execute_values(cursor, sql, etl.data(data)) conn.commit() conn.close()
def run(self, driver, task, log): inp = task["source"]["file"] inp = compat.translate_unicode(inp) inp = "input/{}".format(inp) row_match = task["source"].get("row", None) value_match = task["source"].get("value", None) attr = task["source"].get("attr", None) mapping = task["source"].get("mapping", None) if row_match and value_match: if attr: record_set = etl.fromxml(inp, row_match, value_match, attr) else: record_set = etl.fromxml(inp, row_match, value_match) elif row_match and mapping: record_set = etl.fromxml(inp, row_match, mapping) else: raise ValueError('Incorrect parameter for source') if not etl.data(record_set).any(): log.write("Task skipped. No rows on source") else: transform = TransformSubTask(task, log) record_set = transform.get_result(record_set) output_driver = driver.get_driver(task["target"]["connection"]) db = output_driver.get_db() table = task["target"]["table"] table = compat.translate_unicode(table) if "schema" in task["target"]: schema_name = task["target"]["schema"] schema_name = compat.translate_unicode(schema_name) else: schema_name = None task_log = "log/xml-db_{}_{}.log".format(task["name"], get_time_filename()) with open(task_log, "w") as lg: if "truncate" in task["target"] and task["target"]["truncate"]: record_set.progress(10000, out=lg).todb(output_driver.cursor(db), tablename=table, schema=schema_name) else: record_set.progress(10000, out=lg).appenddb( output_driver.cursor(db), tablename=table, schema=schema_name) db.close()
def llenarListaTipos(self): global iTipos #self.listWidgetPresets = QtWidgets.QListWidget(self.frame) #self.listWidgetPresets.setObjectName("listWidgetPresets") tipos = etl.fromdb(connection, 'SELECT * FROM tipos') for tipo in etl.data(tipos): item = QtWidgets.QListWidgetItem() self.listWidgetPresets.addItem(item) #self.horizontalLayout_3.addWidget(self.listWidgetPresets) __sortingEnabled = self.listWidgetPresets.isSortingEnabled() self.listWidgetPresets.setSortingEnabled(False) iTipos = 0 for tipo in etl.data(tipos): item = self.listWidgetPresets.item(iTipos) item.setText(tipo[1]) iTipos += 1 self.listWidgetPresets.setSortingEnabled(__sortingEnabled)
def run(self, driver, task, log): source_folder = task["source"].get("folder", "input") source_folder = compat.translate_unicode(source_folder) source = task["source"]["file"] source = compat.translate_unicode(source) inp = "{}/{}".format(source_folder, source) separator = task["source"].get("delimiter", ";") separator = compat.translate_unicode(separator) enc = task["source"].get("encoding", "utf-8") enc = compat.translate_unicode(enc) record_set = etl.fromcsv(inp, encoding=enc, delimiter=separator) if not etl.data(record_set).any(): log.write("Task skipped. No rows on source") else: transform = TransformSubTask(task, log) record_set = transform.get_result(record_set) output_driver = driver.get_driver(task["target"]["connection"]) db = output_driver.get_db() table = task["target"]["table"] table = compat.translate_unicode(table) if "schema" in task["target"]: schema_name = task["target"]["schema"] schema_name = compat.translate_unicode(schema_name) else: schema_name = None task_log = "log/csv-db_{}_{}.log".format(task["name"], get_time_filename()) with open(task_log, "w") as lg: if "truncate" in task["target"] and task["target"]["truncate"]: record_set.progress(10000, out=lg).todb(output_driver.cursor(db), tablename=table, schema=schema_name) else: record_set.progress(10000, out=lg).appenddb( output_driver.cursor(db), tablename=table, schema=schema_name) db.close()
def head_collection(request: HttpRequest, collection_id: int) -> HttpResponse: collection = get_object_or_404(StarWarsCollection, id=collection_id) more_data = int(request.GET.get('more', 0)) table = etl.fromcsv(collection.filepath) original_size = len(table) table = table.head(10 + 10 * more_data) return render( request, 'main/collection_head.html', { 'show_more': len(table) < original_size, 'collection': collection, 'headers': settings.STAR_WARS_CHARACTERS_OUTPUT_FILE_HEADER_FIELDS, 'data': etl.data(table), 'more_data': more_data + 1, }, )
def test_basics(): t1 = (('foo', 'bar'), ('A', 1), ('B', 2)) w1 = FluentWrapper(t1) eq_(('foo', 'bar'), w1.header()) eq_(petl.header(w1), w1.header()) iassertequal((('A', 1), ('B', 2)), w1.data()) iassertequal(petl.data(w1), w1.data()) w2 = w1.cut('bar', 'foo') expect2 = (('bar', 'foo'), (1, 'A'), (2, 'B')) iassertequal(expect2, w2) iassertequal(petl.cut(w1, 'bar', 'foo'), w2) w3 = w1.cut('bar', 'foo').cut('foo', 'bar') iassertequal(t1, w3)
def test_basics(): t1 = (('foo', 'bar'), ('A', 1), ('B', 2)) w1 = etl.wrap(t1) eq_(('foo', 'bar'), w1.header()) eq_(etl.header(w1), w1.header()) ieq((('A', 1), ('B', 2)), w1.data()) ieq(etl.data(w1), w1.data()) w2 = w1.cut('bar', 'foo') expect2 = (('bar', 'foo'), (1, 'A'), (2, 'B')) ieq(expect2, w2) ieq(etl.cut(w1, 'bar', 'foo'), w2) w3 = w1.cut('bar', 'foo').cut('foo', 'bar') ieq(t1, w3)
def run(self, driver, task, log): inp = task["source"]["file"] inp = compat.translate_unicode(inp) inp = "input/{}".format(inp) row_match = task["source"].get("row", None) value_match = task["source"].get("value", None) attr = task["source"].get("attr", None) mapping = task["source"].get("mapping", None) if row_match and value_match: if attr: record_set = etl.fromxml(inp, row_match, value_match, attr) else: record_set = etl.fromxml(inp, row_match, value_match) elif row_match and mapping: record_set = etl.fromxml(inp, row_match, mapping) else: raise ValueError('Incorrect parameter for source') if not etl.data(record_set).any(): log.write("Task skipped. No rows on source") else: transform = TransformSubTask(task, log) record_set = transform.get_result(record_set) out = task["target"]["file"] out = compat.translate_unicode(out) out = "output/{}".format(out) separator = task["target"].get("delimiter", ";") separator = compat.translate_unicode(separator) enc = task["target"].get("encoding", "utf-8") task_log = "log/xml-csv_{}_{}.log".format(task["name"], get_time_filename()) with open(task_log, "w") as lg: if "truncate" in task["target"] and task["target"]["truncate"]: record_set.progress(10000, out=lg).tocsv(out, encoding=enc, delimiter=separator) else: record_set.progress(10000, out=lg).appendcsv(out, encoding=enc, delimiter=separator)
def test_basics(): t1 = (('foo', 'bar'), ('A', 1), ('B', 2)) w1 = etl(t1) eq_(('foo', 'bar'), w1.header()) eq_(petl.header(w1), w1.header()) ieq((('A', 1), ('B', 2)), w1.data()) ieq(petl.data(w1), w1.data()) w2 = w1.cut('bar', 'foo') expect2 = (('bar', 'foo'), (1, 'A'), (2, 'B')) ieq(expect2, w2) ieq(petl.cut(w1, 'bar', 'foo'), w2) w3 = w1.cut('bar', 'foo').cut('foo', 'bar') ieq(t1, w3)
class CollectionDetailsView(DetailView): template_name = 'collection_details.html' queryset = SWPeopleCollection.objects.all() page_size = 10 def get_context_data(self, **kwargs) -> dict: c = super().get_context_data(**kwargs) table = self.object.get_table() buttons = etl.header(table) offset = int(self.request.GET.get('offset', 1)) offset_to = offset * self.page_size if aggregation_keys := tuple( set(buttons).intersection(set(self.request.GET.keys()))): table = self.object.get_aggregate_data(aggregation_keys) # this is essentially to speed up rendering as it would be slow in django template # putting this in templatetag would be more elegant - extend petl to render html directly into a template # would also be nice data = '' for row in etl.data(table, 0, offset_to): data += '<tr><td>' + '</td><td>'.join(row) + '</td></tr>' c.update({ 'headers': etl.header(table), 'buttons': buttons, 'data': data, 'offset': offset + 1, 'offset_extra_params': '&'.join(['{}=on'.format(i) for i in aggregation_keys]), 'offset_reached': table.len() < offset_to, 'aggregation_keys': aggregation_keys }) return c
def run(self, driver, task, log): input_driver = driver.get_driver(task["source"]["connection"]) sql = self._parse_sql(task["source"]) db = input_driver.get_db() record_set = etl.fromdb(db, sql) if not etl.data(record_set).any(): log.write("Task skipped. No rows on source") else: transform = TransformSubTask(task, log) record_set = transform.get_result(record_set) output_driver = driver.get_driver(task["target"]["connection"]) out_db = output_driver.get_db() table = task["target"]["table"] table = compat.translate_unicode(table) if "schema" in task["target"]: schema_name = task["target"]["schema"] schema_name = compat.translate_unicode(schema_name) else: schema_name = None task_log = "log/db-db_{}_{}.log".format(task["name"], get_time_filename()) with open(task_log, "w") as lg: if "truncate" in task["target"] and task["target"]["truncate"]: record_set.progress(10000, out=lg).todb( output_driver.cursor(out_db), tablename=table, schema=schema_name) else: record_set.progress(10000, out=lg).appenddb( output_driver.cursor(out_db), tablename=table, schema=schema_name) out_db.close() db.close()
def test_xls_write_and_load(tmpdir, sampledata2): """ Roundtrip test write CSV with metadata to disk then load CSV and check the data and metadata is the same as the original. """ # original metadata = sampledata2["metadata"] header = sampledata2["header"] data = sampledata2["data"] # write to disk tempfile = os.path.join(tmpdir, 'roundtrip_test.xls') table = [header] + data toxlswithheader(table, tempfile, 'Sheet 1', metadata=metadata) # load from disk table = fromxlswithheader(tempfile, 'Sheet 1') # checks assert table.metadata == metadata, 'different metadata' assert table.header == tuple(header), 'different header' assert len(list(table.dicts())) == 3, 'wrong number of data rows' for exp_row, obs_row in zip(data, petl.data(table)): assert exp_row == obs_row, 'different data row encountered'
def data(self): """ Returns an iterable object for iterating over the raw data rows as tuples (without field names) """ return petl.data(self.table)
def gen_portfolio(net_liq=False): table = petl.fromjson(_DATABASE_PATH) return Portfolio(petl.data(table), net_liq=net_liq)
def test_data(): table = (('foo', 'bar'), ('a', 1), ('b', 2)) actual = data(table) expect = (('a', 1), ('b', 2)) ieq(expect, actual)
############## import petl as etl table = [["foo", "bar"], ["a", 1], ["b", 2]] etl.fieldnames(table) etl.header(table) # data() ######## import petl as etl table = [["foo", "bar"], ["a", 1], ["b", 2]] d = etl.data(table) list(d) # dicts() ######### import petl as etl table = [["foo", "bar"], ["a", 1], ["b", 2]] d = etl.dicts(table) d list(d) # namedtuples()
etl.header(table) # fieldnames() ############## import petl as etl table = [['foo', 'bar'], ['a', 1], ['b', 2]] etl.fieldnames(table) etl.header(table) # data() ######## import petl as etl table = [['foo', 'bar'], ['a', 1], ['b', 2]] d = etl.data(table) list(d) # dicts() ######### import petl as etl table = [['foo', 'bar'], ['a', 1], ['b', 2]] d = etl.dicts(table) d list(d) # namedtuples() ############### import petl as etl
def test_data(): table = (("foo", "bar"), ("a", 1), ("b", 2)) actual = data(table) expect = (("a", 1), ("b", 2)) ieq(expect, actual)