def handle(self, *args, **options): # read users list users = read_csv(f'{DATA_ROOT}/{USERS_FILE}') for i, user in users.iterrows(): try: # add user record to db new_user = User(id=int(user['id']), area=user['area'], tariff=user['tariff']) new_user.save() print(f'Importing consumptions for user {user["id"]}') except IntegrityError: print(f'Skipping already existing user {user["id"]}') new_user = User.objects.get(id=int(user['id'])) # read user's consumption records consumptions = read_csv( f'{DATA_ROOT}/{CONSUMPTIONS_DIR}/{user["id"]}.csv', parse_dates=['datetime']) for j, consumption in consumptions.iterrows(): try: # add user's single consumption to db Consumption(user=new_user, datetime=consumption['datetime'], consumption=float( consumption['consumption'])).save() except IntegrityError: print( f'Skipping already existing consumption record for user {user["id"]} at' f' {consumption["datetime"]}')
def setUpClass(cls): """setup test data""" # could load data from CSVs users = [ User(id=101, area_code='a1', tariff_code='t1'), User(id=102, area_code='a1', tariff_code='t2'), User(id=103, area_code='a2', tariff_code='t1'), User(id=104, area_code='a2', tariff_code='t1'), User(id=105, area_code='a2', tariff_code='t1'), ] consumptions = [ UserConsumption(user_id=101, consumption=100, start=datetime('2019-01-01 00:00:00'), end=datetime('2019-01-01 00:30:00')), # noqa UserConsumption(user_id=101, consumption=200, start=datetime('2019-01-01 00:30:00'), end=datetime('2019-01-01 01:00:00')), # noqa UserConsumption(user_id=102, consumption=300, start=datetime('2019-01-01 00:00:00'), end=datetime('2019-01-01 00:30:00')), # noqa UserConsumption(user_id=102, consumption=400, start=datetime('2019-01-01 00:30:00'), end=datetime('2019-01-01 01:00:00')), # noqa UserConsumption(user_id=103, consumption=500, start=datetime('2019-01-01 00:00:00'), end=datetime('2019-01-01 00:30:00')), # noqa UserConsumption(user_id=103, consumption=600, start=datetime('2019-01-01 00:30:00'), end=datetime('2019-01-01 01:00:00')), # noqa UserConsumption(user_id=104, consumption=700, start=datetime('2019-01-01 00:00:00'), end=datetime('2019-01-01 00:30:00')), # noqa UserConsumption(user_id=104, consumption=800, start=datetime('2019-01-01 00:30:00'), end=datetime('2019-01-01 01:00:00')), # noqa UserConsumption(user_id=105, consumption=900, start=datetime('2019-01-01 00:00:00'), end=datetime('2019-01-01 00:30:00')), # noqa UserConsumption(user_id=105, consumption=1000, start=datetime('2019-01-01 00:30:00'), end=datetime('2019-01-01 01:00:00')), # noqa ] User.objects.bulk_create(users) UserConsumption.objects.bulk_create(consumptions) # required cls.cls_atomics = cls._enter_atomics()
def store_expected_user(self, user_id): area = self.get_random_string() tariff = self.get_random_string() try: caused_error = False user = User(user_id=user_id, area=area, tariff=tariff) user.save() user = list(User.objects.filter(user_id=user_id))[0] except Exception: caused_error = True self.assertFalse(caused_error) self.assertEqual(user.area, area) self.assertEqual(user.tariff, tariff)
def test_average(self): new_user = User(id=100, area='area1', tariff='t1') new_user.save() Consumption(user_id=100, consumption=30.0, datetime='2016-07-15 09:30:00').save() Consumption(user_id=100, consumption=20.0, datetime='2016-07-19 12:30:00').save() # average consumptions during July must be (20 + 30) / 2 = 50 self.assertEqual(new_user.calculate_monthly_consumptions()[1][6], 50.00)
def setup(self): new_user = User(id=100, area='area1', tariff='t1') new_user.save() Consumption(user_id=100, consumption=30.0, datetime='2016-07-15 09:30:00').save() Consumption(user_id=100, consumption=20.0, datetime='2016-07-19 12:30:00').save() Consumption(user_id=100, consumption=10.0, datetime='2016-07-21 11:30:00').save() new_user = User(id=101, area='area1', tariff='t1') new_user.save() Consumption(user_id=101, consumption=45.0, datetime='2016-07-15 09:30:00').save() Consumption(user_id=101, consumption=27.0, datetime='2016-07-21 10:30:00').save() Consumption(user_id=101, consumption=21.0, datetime='2016-07-24 11:30:00').save() Consumption(user_id=101, consumption=43.0, datetime='2016-09-02 06:30:00').save()
def import_user_data(self): """ Import data/user_data.csv to database. """ print('Importing user data...') with open(self.USER_DATA_FILE, 'r') as file: reader = csv.reader(file) next(reader, None) data = [ User(id=row[0], area=row[1], tariff=row[2]) for row in reader ] User.objects.bulk_create(data) print('Done')
def import_users(self, user_data_path=None): if user_data_path is None: user_data_path = USER_DATA_PATH if not os.path.exists(user_data_path): raise CommandError('The following path does not exist: %s' % user_data_path) self.stdout.write('Importing user data from %s' % user_data_path) User.objects.all().delete() df = pd.read_csv(user_data_path) User.objects.bulk_create( User(**vals) for vals in df.to_dict('records') )
def import_user_csv(self): """Import user csv into DB. """ if not os.path.exists(Command.user_csv_path): # TODO(Tasuku): Should have logging print('ERROR: the filename "%s" does not exist.' % Command.user_csv_path, file=sys.stderr) sys.exit(settings.EXIT_FAILURE) data_frame = pd.read_csv(Command.user_csv_path, sep=settings.CSV_SEPARATION_CHAR) User.objects.bulk_create([ User(user_id=int(row['id']), area=row['area'], tariff=row['tariff']) for row in data_frame.to_dict('records') ])
def store_many_expected_users(self): users = [] users_dict = {} #For test for user_id in self.get_unique_ids(50000): users_dict[user_id] = { 'area': self.get_random_string(), 'tariff': self.get_random_string() } users.append( User(user_id=user_id, area=users_dict[user_id]['area'], tariff=users_dict[user_id]['tariff'])) User.objects.bulk_create(users) for user in User.objects.all(): self.assertEqual(user.area, users_dict[user.user_id]['area']) self.assertEqual(user.tariff, users_dict[user.user_id]['tariff'])
def make_user_datasets(self, num_of_users): data_frame = self.get_random_user_dataframe(num_of_users) User.objects.bulk_create( [ User(**row) for row in data_frame.to_dict('records') ] ) return list(data_frame['user_id'])
def handle(self, *args, **options): """ command line method for ETL of user and consumption data from csv to default database (see dashboard/settings.py to configure the engine) :param args: args :param options: kwargs :return: None """ stdlogger = logging.getLogger(__name__) data_dir = os.path.join(os.path.dirname(settings.BASE_DIR), 'data') user_path = os.path.join(data_dir, 'user_data.csv') stdlogger.info("Reading file: {}".format(user_path)) with open(user_path, 'r') as user_data: reader = csv.DictReader(user_data, delimiter=',') u_counter = 0 for u_counter, record in enumerate(reader): user = User() user.id = int(record['id']) user.area = record['area'] user.tariff = record['tariff'] stdlogger.debug( "Saving User object: Id: {}, Area: {}, Tariff: {}".format( user.id, user.area, user.tariff)) user.save() stdlogger.info("{} User records saved to database".format(u_counter)) consumption_path = os.path.join(data_dir, 'consumption', '*.csv') consumption_files = glob.glob(consumption_path) for con_file in consumption_files: stdlogger.info("Reading file: {}".format(con_file)) with open(con_file, 'r') as con_data: reader = csv.DictReader(con_data, delimiter=',') c_counter = 0 for c_counter, record in enumerate(reader): usage = Consumption() usage.user_id = User.objects.get( id=int(os.path.split(con_file)[-1].rstrip('.csv'))) usage.timestamp = datetime.strptime( record['datetime'], '%Y-%m-%d %H:%M:%S') usage.consumption = float(record['consumption']) stdlogger.debug("Saving Consumption object: " "Id: {}, Area: {}, Tariff: {}".format( usage.user_id, usage.timestamp, usage.consumption)) usage.save() stdlogger.info( "{} Consumption records saved to database".format(c_counter))
def create_user_obj(self, *args): return User(user_id=args[0][0], area=args[0][1], tariff=args[0][2])
def testing_user(): from consumption.models import User return User(id=1000, area='a1', tariff='t1')