def calorie_graph_data(request): session = Session() food_entries = session.query(FoodEntry).order_by(FoodEntry.date).all() food_days = FoodDay.group_days(food_entries) tags = session.query(FoodTag).order_by(FoodTag.id) session.close() return {'food_days': map(lambda x: x.to_dict(), food_days), 'tags': map(lambda x: x.to_dict(), tags)}
def food_edit_food_tags_form(request): session = Session() food = session.query(Food).filter_by(id=request.matchdict['id']).first() food_tags = session.query(FoodTag).order_by(FoodTag.name).all() session.close() # TODO: find a way to auto-include request in every one of these...? return {'request': request, 'title': 'Edit Tags for %s' % food.name, 'food': food, 'tags': food_tags}
def lose_it_upload(request): session = Session() # TODO: duplicate detection input_file = request.POST['file'].file reader = LoseItDataReader(input_file) existing_for_date = {} ignored = [] for entry in reader: existing_food = session.query(Food).filter_by(name=entry.name).all() food = None if len(existing_food) == 0: food = Food(entry.name) session.add(food) else: food = existing_food[0] if not entry.date in existing_for_date: existing_entries = session.query(FoodEntry).filter_by(date=entry.date).all() existing_for_date[entry.date] = existing_entries if existing_for_date[entry.date]: ignored.append(entry) else: new_entry = FoodEntry(food, entry.calories, entry.date) session.add(new_entry) session.commit() if ignored: return {'title': 'Existing Data Not Imported', 'ignored': ignored} else: return HTTPFound('/food_entry/lose_it_upload_form')
def food_edit_food_tags(request): food_id = request.matchdict['id'] tag_ids = map(int, request.params.getall('tag_id')) session = Session() food = session.query(Food).filter_by(id=food_id).first() already_tagged = [] to_remove = [] for food_tag in food.food_tags: if not food_tag.id in tag_ids: to_remove.append(food_tag) else: already_tagged.append(food_tag.id) for tag in to_remove: food.food_tags.remove(tag) to_add = session.query(FoodTag).filter(FoodTag.id.in_(set(tag_ids)-set(already_tagged))).all() food.food_tags.extend(to_add) session.commit() session = Session() food = session.query(Food).filter_by(id=food_id).first() session.close() if request.is_xhr: return {'request': request, 'food': food} else: return HTTPFound('/food/list')
def commit_entry(self, staged_entry): session = Session() indicator_values = {} for indicator in self.indicators_metadata: value_location = self.location_engine.convert_postcode( staged_entry.postcode, self.indicators_metadata[indicator]['resolution']) previous_value_entry = session.query(IndicatorValue)\ .filter(IndicatorValue.indicator == indicator, IndicatorValue.location == value_location, IndicatorValue.date < staged_entry.date)\ .order_by(IndicatorValue.date.desc())\ .first() following_value_entry = session.query(IndicatorValue)\ .filter(IndicatorValue.indicator == indicator, IndicatorValue.location == value_location, IndicatorValue.date > staged_entry.date)\ .order_by(IndicatorValue.date)\ .first() if previous_value_entry is None and following_value_entry is None: value = None indicator_date = None elif following_value_entry is None \ or abs((previous_value_entry.date - staged_entry.date).days) \ < abs((following_value_entry.date - staged_entry.date).days): value = previous_value_entry.value indicator_date = previous_value_entry.date else: value = following_value_entry.value indicator_date = following_value_entry.date # Check if value falls within frequency range if indicator_date is not None and (abs(staged_entry.date - indicator_date)).days\ > (FREQUENCY_DAY_COUNTS[self.indicators_metadata[indicator]['frequency']] / 2): value = None indicator_values[indicator] = value new_entry = TargetEntry( entry_id=staged_entry.entry_id, sale_id=staged_entry.sale_id, date=staged_entry.date, value=staged_entry.value, PDD_type=staged_entry.PDD_type, postcode=staged_entry.postcode, town_or_city=staged_entry.town_or_city, district=staged_entry.district, county=staged_entry.county, new_property_flag=staged_entry.new_property_flag, property_type=staged_entry.property_type, tenure_type=staged_entry.tenure_type) for indicator in indicator_values: setattr(new_entry, indicator, indicator_values[indicator]) session.add(new_entry) session.commit() session.close()
def get_model_names(self, dataset_name=None): session = Session() model_names = [] if dataset_name is None: model_records = session.query(ModelEntry).all() else: model_records = session.query(ModelEntry).filter( ModelEntry.dataset == dataset_name) if model_records is not None: for model in model_records: model_names.append(model.name) session.close() return model_names
def get_trained_model_names(self, dataset_name=None): model_names = [] session = Session() if dataset_name is None: model_records = session.query( ModelEntry.name).filter(ModelEntry.state == 'trained') else: model_records = session.query(ModelEntry.name).filter( ModelEntry.dataset == dataset_name, ModelEntry.state == 'trained') for model in model_records: model_names.append(model.name) session.close() return model_names
def main(): engine = create_engine('postgresql://[email protected]/my_metrics') Base.metadata.create_all(engine) Session.configure(bind=engine) session = Session() food_entries = session.query(FoodEntry).order_by(FoodEntry.date) for entry in food_entries: food_name = entry.name existing_food = session.query(Food).filter_by(name=food_name).all() if len(existing_food) == 0: new_food = Food(food_name) session.add(new_food) session.commit()
def commit_entries(self, end_batch_id): session = Session() current_staged_entry = session.query(StagedEntry).order_by(StagedEntry.entry_id)\ .filter(StagedEntry.entry_id <= end_batch_id) while current_staged_entry is not None: self.commit_entry(current_staged_entry) session.close()
def main(): engine = create_engine('postgresql://[email protected]/my_metrics') Base.metadata.create_all(engine) Session.configure(bind=engine) session = Session() foods = session.query(Food) foods_by_name = {} for food in foods: foods_by_name[food.name] = food food_entries = session.query(FoodEntry).order_by(FoodEntry.date) for entry in food_entries: food = foods_by_name[entry.name] entry.food = food session.commit()
def get_lat_and_long(postcode): session = Session() postcode_record = session.query(Locations).filter( Locations.pcds == postcode).first() session.close() if postcode_record is not None: return postcode_record.lat, postcode_record.long else: return None, None
def update_entries_from_source(self, source_entry_date, frequency, indicators, area_resolution): session = Session() frequency_radius_delta = relativedelta( FREQUENCY_DAY_COUNTS[frequency] / 2) lower_threshold_date = source_entry_date - frequency_radius_delta upper_threshold_date = source_entry_date + frequency_radius_delta # noinspection PyComparisonWithNone entries_to_update = session.query(TargetEntry).filter( getattr(TargetEntry, indicators[0]) == None, TargetEntry.date >= lower_threshold_date, TargetEntry.date <= upper_threshold_date) for entry in entries_to_update: location = self.location_engine.convert_postcode( entry.postcode, area_resolution) for indicator in indicators: value = session.query(IndicatorValue).filter( IndicatorValue == source_entry_date, IndicatorValue.location == location) setattr(entry, indicator, value) session.commit() session.close()
def run(self): running = True while running: session = Session() current_job = session.query(Job).order_by( Job.timestamp_when_valid).first() if current_job is None or current_job.timestamp_when_valid > datetime.datetime.now( ): sleep(JOB_MANAGER_POLL_DELAY) else: self.complete_job(current_job) current_job.delete() session.close()
def __init__(self, database_engine): self.database_engine = database_engine session = Session() for model_name in self.get_model_names(): self.load_model(model_name) # Find largest model input count after all models loaded for model_name in self.get_trained_model_names(): model_input_count = len(self.get_model_inputs(model_name)) if model_input_count > self.max_inputs: self.max_inputs = model_input_count # Check for models that were in training when last shut down for model_record in session.query(ModelEntry).filter( ModelEntry.state == 'training'): self.train_model(model_record.name) session.close()
def delete_model(self, model_name): session = Session() model_record = session.query(ModelEntry).filter( ModelEntry.name == model_name) model_record.delete() session.commit() del self.models[model_name] os.remove(model_name) # Update max inputs value self.max_inputs = 0 for model_name in self.get_trained_model_names(): model_input_count = len(self.get_model_inputs(model_name)) if model_input_count > self.max_inputs: self.max_inputs = model_input_count session.close()
def train_model(self, model_name): session = Session() model_record = session.query(ModelEntry).filter(ModelEntry.name == model_name).one() model_record.state = 'training' session.commit() parent_models = self.model_manager.models[model_name].input_models for parent_model in parent_models: # Train any untrained parent models if session.query(ModelEntry).filter(ModelEntry.name == parent_model, ModelEntry.state == 'untrained')\ is not None: self.train_model(parent_model) # Wait for any parent models that are still in training, in cases where another process started the training while session.query(ModelEntry).filter(ModelEntry.name == parent_model, ModelEntry.state == 'training').one_or_none()\ is not None: time.sleep(30) model = self.model_manager.models[model_name] dataset_name = model.dataset entry_count = model.training_entry_count if dataset_name == 'core_dataset': dataframe = pandas.read_sql(session.query(Base.metadata.tables['core_dataset']).order_by(func.rand()).limit(entry_count).statement, self.database_engine) else: dataframe = pandas.read_csv(DEFAULT_DATA_PATH, sep='\s+', names=DEFAULT_DATA_HEADERS) if entry_count > len(dataframe): entry_count = len(dataframe) dataframe = dataframe.sample(entry_count) for parent_model in parent_models: self.recursive_process(parent_model, dataframe) model.train(dataframe) self.model_manager.save_model(model_name) model_record.state = 'trained' session.commit() # Update the model manager max input count - done here to happen at the end of training self.model_manager.update_max_inputs(len(self.model_manager.get_model_inputs(model_name))) session.close()
def get_model_table(self): session = Session() models_entries = session.query(ModelEntry) table = [] for model_entry in models_entries: dependencies = ", ".join( self.get_model_dependencies(model_entry.name)) table.append({ 'Name': model_entry.name, 'Type': model_entry.type, 'Dataset': model_entry.dataset, 'State': model_entry.state, 'Dependencies': dependencies }) session.close() return table
def food_entry_add(request): name = request.params['name'] calories = int(request.params['calories']) day = date.today() - timedelta(int(request.params['days_ago'])) session = Session() existing_food = session.query(Food).filter_by(name=food_name).all() food = None if len(existing_food) == 0: food = Food(food_name) session.add(food) else: food = existing_food[0] new_entry = FoodEntry(food, calories, day) session.add(new_entry) session.commit() return HTTPFound('/food_entry/add_form')
def update_commit_schedule(self, first_pull_date, frequency): session = Session() # Calculate when the next pull is that cannot take place immediately next_scheduled_pull = first_pull_date while next_scheduled_pull < datetime.datetime.now(): if frequency == 'yearly': next_scheduled_pull += relativedelta(years=1) elif frequency == 'monthly': next_scheduled_pull += relativedelta(months=1) else: next_scheduled_pull += relativedelta( days=FREQUENCY_DAY_COUNTS[frequency]) threshold_date = next_scheduled_pull - relativedelta( days=FREQUENCY_DAY_COUNTS[frequency] / 2) commits_before_threshold = session.query(Job).filter( Job.job_type == COMMIT_JOB, Job.timestamp_when_valid < threshold_date) for commit in commits_before_threshold: commit.timestamp_when_valid = next_scheduled_pull session.close()
def pull_land_registry(self): session = Session() data = request.urlopen(LAND_REGISTRY_URL).read() data = str(data, 'utf-8') data_frame = pandas.read_csv(StringIO(data), header=None, names=LAND_REGISTRY_DATA_HEADERS) # Only include entries with PPD type A # data_frame = [data_frame.PDD_type == 'A'] # Convert date strings to date objects data_frame['date'] = pandas.to_datetime( data_frame['date'], format=LAND_REGISTRY_TIMESTAMP_FORMAT) # Reorder columns data_frame = data_frame[STAGED_ENTRY_HEADERS] # Convert old or new character value to boolean data_frame['new_property_flag'] = data_frame['new_property_flag'].map( dict(Y=True, N=False)) current_highest_id = -1 latest_entry = session.query(StagedEntry).order_by( desc(StagedEntry.entry_id)).first() if latest_entry is not None: current_highest_id = latest_entry.id batch_start_id = current_highest_id + 1 batch_end_id = batch_start_id + len(data_frame) # Add id column data_frame.insert(0, 'entry_id', range(batch_start_id, batch_end_id)) data_frame.to_sql('staged_entries', con=self.database_engine, if_exists='append', index=False) # Process deletion entries deletion_entries = session.query(StagedEntry).filter( StagedEntry.record_type == 'D') for deletion_entry in deletion_entries: session.query(TargetEntry).filter( TargetEntry.sale_id == deletion_entry.sale_id).delete() deletion_entry.delete() # Get update entries update_entries = session.query(StagedEntry).filter( StagedEntry.record_type == 'C') for update_entry in update_entries: existing_entry = session.query(TargetEntry).filter( TargetEntry.sale_id == update_entry.sale_id).one() if existing_entry is not None: update_entry_from_land_registry(update_entry, existing_entry) update_entry.delete() session.commit() session.close() return str(batch_end_id)
def food_entry_list(request): session = Session() food_entries = session.query(FoodEntry).order_by(FoodEntry.date) food_days = FoodDay.group_days(food_entries) session.close() return {'title': 'Food Entries', 'food_entries': food_entries, 'food_days': food_days}
# print(38*"*") # print('\033[31;1m第二题:\033[0m') # print('\033[32;1m%s:%s\033[0m' % ('学生姓名','语文成绩')) # qset2 = session.query(Students) # for data in qset2: # print('\033[35;1m%s:%s\033[0m' % (data.name, data.chinese)) # # print(data) # print(38*"*") # #第3题 # print(38*"*") # print('\033[31;1m第三题:\033[0m') # print('\033[32;1m公司职务\033[0m') # qset3 = session.query(Emp.job).distinct().all() # for data in qset3: # print('\033[35;1m%s\033[0m' % data.job) # print(38*"*") #第4题 print(38*"*") print('\033[31;1m第四题:\033[0m') print('\033[32;1m%s:%s:%s:%s\033[0m' % ('学生姓名','语文','英语','数学')) qset4 = session.query(Students) for data in qset4: print('\033[35;1m%-5s:%s:%s:%s\033[0m' % (data.name, data.chinese,data.english,data.math)) print(38*"*") # 确认 session.commit() # 关闭 session.close()
# create / retrieve / update / delete from tables import Emp, Students,Session from sqlalchemy import distinct from sqlalchemy import or_,desc,func # 创建到数据库的会话实例 session = Session() #第1题 print(38*"*") print('\033[31;1m第一题:查询所有的学生\033[0m') print('\033[32;1m%-8s %-5s\033[0m' % ('id','学生姓名')) qset1 = session.query(Students).order_by(Students.id) for data in qset1: print('\033[35;1m%-8s %-5s\033[0m' % (data.id, data.name)) print(38*"*") #第2题 print(38*"*") print('\033[31;1m第二题:查询学生姓名,语文\033[0m') print('\033[32;1m%s:%s\033[0m' % ('学生姓名','语文成绩')) qset2 = session.query(Students).order_by(Students.id) for data in qset2: print('\033[35;1m%s:%s\033[0m' % (data.name, data.chinese)) # print(data) print(38*"*") #第3题 print(38*"*") print('\033[31;1m第三题:查询一个公司里面的工作岗位–清除重复数据\033[0m') print('\033[32;1m公司职务\033[0m')
def food_list(request): session = Session() foods = session.query(Food).order_by(Food.name) session.close() return {'title': 'Foods', 'foods': foods}
def food_tag_list(request): session = Session() food_tags = session.query(FoodTag).order_by(FoodTag.name) session.close() return {'title': 'Food Tags', 'food_tags': food_tags}
def calorie_graph(request): session = Session() food_entries = session.query(FoodEntry).order_by(FoodEntry.date) food_days = FoodDay.group_days(food_entries) session.close() return {'title': 'Calorie Graph', 'food_entries': food_entries, 'food_days': food_days}