def producthandling(row, namemapping): from datetime import datetime date = pygrametl.getvalue(row, 'date', namemapping) # Convert the date from a string to a python `Date` object. date = datetime.strptime(date, '%Y-%m-%d').date() row['product_year'] = date.year product_name = pygrametl.getvalue(row, 'product_name', namemapping) # Set the nutrition values if product_name in NUTRITION_DATA: product = NUTRITION_DATA[product_name] row['category'] = product['category'] row['energy'] = product['energy'] row['carbohydrates'] = product['carbohydrates'] row['fat'] = product['fat'] row['protein'] = product['protein'] else: row['category'] = None row['energy'] = None row['carbohydrates'] = None row['fat'] = None row['protein'] = None return row
def locationhandling(row, namemapping): from datetime import datetime country = row['Country'] # Set the population value if country in POP_DATA: row['population'] = POP_DATA[country] else: row['population'] = None # Set the life expectancy value if country in LIFE_EXPECTANCY_DATA: row['life_expectancy'] = LIFE_EXPECTANCY_DATA[country] else: row['life_expectancy'] = None # Set the annual average income value if country in GNI_DATA: row['anav_income'] = GNI_DATA[country] else: row['anav_income'] = None date = pygrametl.getvalue(row, 'date', namemapping) # Convert the date from a string to a python `Date` object. date = datetime.strptime(date, '%Y-%m-%d').date() row['location_year'] = date.year # The year for which to retrieve the GDP is hard-coded to simplify the ETL # process, and because the data only covers 2012. row['gdp'] = pygrametl.getvalue(row, '2012', namemapping) return row
def TimeExpander(row, namemapping): if VERBOSE: print('TimeExpander:', namemapping, row) ts = pygrametl.getvalue(row, 'timestamp', namemapping) date = TimestampToDateTime(ts) timerow = TimeToRow(date) row.update(timerow) if VERBOSE: print('Expended row:', row) # row[namemapping['timestamp']] = str(date) return row
def datehandling(row, namemapping): # This method is called from ensure(row) when the lookup of a date fails. # We have to calculate all date related fields and add them to the row. date = pygrametl.getvalue(row, "date", namemapping) (year, month, day, hour, minute, second, weekday, dayinyear, dst) = time.strptime(date, "%Y-%m-%d") (isoyear, isoweek, isoweekday) = datetime.date(year, month, day).isocalendar() # We could use row[namemapping.get('day') or 'day'] = X to support name map. row["day"] = day row["month"] = month row["year"] = year row["week"] = isoweek row["weekyear"] = isoyear row["dateid"] = dayinyear + 366 * (year - 1990) # Allow dates from 1990-01-01 return row
def datehandling(row, namemapping): # This method is called from ensure(row) when the lookup of a date fails. # We have to calculate all date related fields and add them to the row. date = pygrametl.getvalue(row, 'date', namemapping) (year, month, day, hour, minute, second, weekday, dayinyear, dst) = \ time.strptime(date, "%Y-%m-%d") (isoyear, isoweek, isoweekday) = \ datetime.date(year, month, day).isocalendar() # We could use row[namemapping.get('day') or 'day'] = X to support name map. row['day'] = day row['month'] = month row['year'] = year row['week'] = isoweek row['weekyear'] = isoyear row['dateid'] = dayinyear + 366 * (year - 1990 ) #Allow dates from 1990-01-01 return row
def datehandling(row, namemapping): # This method is called from ensure(row) when the lookup of a date fails. # In the Real World, you would probably prefill the date dimension, but # we use this to illustrate "rowexpanders" that make it possible to # calculate derived attributes on demand (such that the - possibly # expensive - calculations only are done when needed and not for each # seen data row). # # Here, we calculate all date related fields and add them to the row. date = pygrametl.getvalue(row, 'date', namemapping) (year, month, day, hour, minute, second, weekday, dayinyear, dst) = \ time.strptime(date, "%Y-%m-%d") (isoyear, isoweek, isoweekday) = \ datetime.date(year, month, day).isocalendar() row['day'] = day row['month'] = month row['year'] = year row['week'] = isoweek row['weekyear'] = isoyear row['dateid'] = dayinyear + 366 * (year - 1990) #Support dates from 1990 return row