def _get_criteria(self): # TODO: only has criteria titles and no IDs # Peer Assess Pro has a single rubric, they plan to allow minimal variety later on return (etl.fromcolumns([ self._get_imported_criteria_list(), self._get_imported_criteria_list() ]).rename(0, 'id').rename(1, 'title'))
def _get_answers(self): # TODO: double check that assessee and assessor mixed up # confusion on assessee and assessor ids here, is assessment result peer feedback? assessment_results = (etl.fromcsv( f'{self._dirc}/assessment_result.csv', delimiter=';').listoflists())[1:] # create a separate row for each criteria # peer assess pro table format has each criteria as a separate column table = [] for row in assessment_results: crit_names = self._get_imported_criteria_list() for i, answers in enumerate(zip(row[3:], crit_names)): comment, criteria = answers table.append([int(row[0]) * len(crit_names) + i] + row[1:3] + [comment, criteria]) assessments = ( etl.fromcsv(f'{self._dirc}/assessment.csv', delimiter=';').cut( 'id', 'assessor_id').rename('id', 'assessee_artifact_id').rename( 'assessor_id', 'assessee_actor_id') # also need to double check ) return (etl.fromcolumns(np.array(table).T.tolist( )).rename(0, 'id').rename(1, 'assessee_artifact_id').rename( 2, 'assessor_actor_id' ) # should double check on this, i think assessee should be assessor on this ish .rename(3, 'comment').rename(4, 'criterion_id').leftjoin( assessments, key='assessee_artifact_id').convert( 'assessee_artifact_id', lambda r: None))
def test_preprocess(): header = ['SUBJECT', 'NAME'] data = [['2', '1'], ['Steve', 'Bob']] table = etl.fromcolumns(data, header) table = util.preprocess(table, 'SUBJECT') result = list(table.data()) assert result == [(1, 'Bob', result[0][2]), (2, 'Steve', result[1][2])]
def from_columns(cls, cols, header=None): """ Create a ``parsons table`` from a list of lists organized as columns `Args:` cols: list A list of lists organized as columns header: list List of column names. If not specified, will use dummy column names `Returns:` Parsons Table See :ref:`parsons-table` for output options. """ return cls(petl.fromcolumns(cols, header=header))
def fetch_collection(filepath): addr = f'{settings.SWAPI_HOST}/api/people' etl.tocsv([FIELDS_WE_NEED], filepath) while addr: response = requests.get(addr).json() addr = response['next'] table_columns = [[item[column_name] for item in response['results']] for column_name in FIELDS_WE_GET] table = (etl.fromcolumns(table_columns, header=FIELDS_WE_GET).convert( 'homeworld', resolve_homeworld).addfield( 'date', lambda rec: rec['edited'].split('T')[0]).cutout( 'created').cutout('edited')) etl.appendcsv(table, filepath) return response['count']
# coding:utf8 import petl as etl # print(c) cols1 = [[0, 1, 2], ['a', 'b', 'c']] tb1 = etl.fromcolumns(cols1) print(tb1) # add "missing" cols2 = [[0, 1, 2, 3], ['a', 'b', 'c']] tb2 = etl.fromcolumns(cols2, missing='NA') print(tb2) # petl.io.json.fromjson() dict = [{ 'foo': 'a', 'bar': 1 }, { 'foo': 'b', 'bar': 2 }, { 'foo': 'c', 'bar': 3 }, { 'foo': 'd' }] # 缺失值填充int:4 tb3 = etl.fromdicts(dict, header=['foo', 'bar'], missing=4) print(tb3)
import pandas as pd import numpy as np from datetime import datetime dn = 'defect' du = 'postgres' dp = 1 dh = 'localhost' dbp = 5432 cs = "dbname=%s user=%s password=%s host=%s port=%s" % (dn, du, dp, dh, dbp) connection = psycopg2.connect(cs) # a = np.random.rand(100000, 200).tolist() b = np.random.rand(100000, 10).tolist() etl.fromcolumns() #pandas -> table -> db import pandas as pd df = pd.DataFrame(columns=['id', 'features', 'model_id', 'regdate']) for i in range(1, 10000): df.loc[i] = [i, np.random.rand(10).tolist(), 'test1', datetime.now()] pddf = etl.fromdataframe(df) etl.todb(pddf, connection, 'defect_features', 'public') #select query mkcursor = lambda: connection.cursor(name='arbitrary') table = etl.fromdb(mkcursor, 'select * from public.defect_features') table
import pickle import petl as etl import csv cols = [[0, 1, 2], ['a', 'b', 'c']] table1 = etl.fromcolumns(cols) print(table1) ###########################CSV Reading############################### table2 = etl.fromcsv('AAPL.csv') print(table2['Date']) print(table2) etl.tocsv(table1, 'example.csv') #wrting to a CSV file ##########################Reading from Pickle files#################### """" what is pickle? Pickling is a way to convert a python object (list, dict, etc.) into a character stream. The idea is that this character stream contains all the information necessary to reconstruct the object in another python script. """ #Creating a pickle file a = ['test value', 'test value 2', 'test value 3'] file_Name = "testfile" # open the file for writing
glob.glob( os.path.join(os.path.dirname(sys.argv[1]), 'ocm3_[thz]*' + inputstr + '_H*'))) == 12: timelen = 96 home = os.path.expanduser("~") today = parser.parse(inputstr + '00+0000').astimezone(tz.tzlocal()) dataset = {} for i in ('temp', 'hvel', 'zcor'): dataset[i] = netCDF4.MFDataset([ os.path.join(head, 'ocm3_{0}_{1}_H{2}.nc'.format(i, inputstr, j)) for j in ('-23_00', '01_24', '25_48', '49_72') ]) find = [-1, -5, -10] depth = petl.fromcolumns([find], ['depth']) points = petl.fromcsv(os.path.join(home, 'var', 'Points')).convert({ 'lat': float, 'lon': float, 'ncpos': int }) pPoints = [int(x) for x in open(os.path.join(home, 'var', 'PengHu'))] nodelist = points.values('ncpos') zcorlist = dataset['zcor']['zcor'][:].take(nodelist, 2) templist = dataset['temp']['temp'][:].take(nodelist, 2) ulist = dataset['hvel']['u'][:].take(nodelist, 2)
def table_container_from(dataset): return etl.fromcolumns(dataset.columns, dataset.header)
# initialize list of lists for data storage BOCDates = [] BOCRates = [] # check response status and process BOC JSON object if (BOCResponse.status_code == 200): BOCRaw = json.loads(BOCResponse.text) # extract observation data into column arrays for row in BOCRaw['observations']: BOCDates.append(datetime.datetime.strptime(row['d'], '%Y-%m-%d')) BOCRates.append(decimal.Decimal(row['FXUSDCAD']['v'])) # create petl table from column arrays and rename the columns exchangeRates = petl.fromcolumns([BOCDates, BOCRates], header=['date', 'rate']) # print (exchangeRates) # load expense document try: expenses = petl.io.xlsx.fromxlsx('Expenses.xlsx', sheet='Github') except Exception as e: print('could not open expenses.xlsx:' + str(e)) sys.exit() # join tables expenses = petl.outerjoin(exchangeRates, expenses, key='date') # fill down missing values expenses = petl.filldown(expenses, 'rate')
def _get_criteria(self): # TODO: only has criteria titles and no IDs return (etl.fromcolumns([ self._get_imported_criteria_list(), self._get_imported_criteria_list() ]).rename(0, 'id').rename(1, 'title'))
import psycopg2 as pg import json import petl as etl server = 'localhost' database = 'BikeStores' username = '******' password = '******' connection = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};SERVER='+server+';DATABASE='+database+';UID='+username+';PWD='+ password) mkcursor = lambda: connection.cursor() table = etl.fromdb(mkcursor,'select * from production.brands') table cursor = cnxn.cursor() categories = [['yuchan','raum','hyejin']] table = etl.fromcolumns(categories) table = etl.rename(table,'f0','category_name') res = etl.appenddb(table,connection,'categories','production') res #df = pd.DataFrame(columns=['id','list','dict']) #for i in range(5): # id = i # a = np.array([k for k in range(i+10)]).tobytes() # b = json.dumps({'ab':[1,2,3],'cd':[4,5,6]}) # df.loc[i] = (i,a,b) #from sqlalchemy import create_engine #df