def check_reminders(): reminders = (db_session.query(ReminderEvents).order_by( ReminderEvents.date_added.desc()).all()) now = datetime.now() current_time = now.time() previous_time = (now - timedelta(minutes=1)).time() for reminder in reminders: time_of_reminder = datetime.strptime(reminder.time_of_reminder, '%H:%M').time() if time_of_reminder > previous_time and time_of_reminder < current_time: last_executed = (db_session.query(ReminderLog).filter( ReminderLog.reminder == reminder.id).order_by( ReminderLog.executed_at.desc()).first()) print(reminder) if last_executed is None: add_reminder_to_log(reminder.id) else: if reminder.how_often.code == 'daily': check_and_execute_reminder(last_executed, now, reminder, 60) elif reminder.how_often.code == 'hourly': check_and_execute_reminder(last_executed, now, reminder, 60 * 60) elif reminder.how_often.code == 'weekly': check_and_execute_reminder(last_executed, now, reminder, 60 * 60 * 24 * 7) else: pass
def test_good_file(self, request_mock): with full_rig_with_s3() as (app, engine): upload_id = self.do_upload(app, request_mock) # okay, here's what we really want to test. # call the merge endpoint response = app.post( '/api/upload/merge_file?uploadId={}'.format(upload_id)) response_data = json.loads(response.get_data().decode('utf-8')) assert response_data['status'] == 'success' # make sure that there is a new merged file on s3 expected_s3_path = 's3://test-bucket/boone/hmis_service_stays/merged' with open_sesame(expected_s3_path, 'rb') as expected_s3_file: reader = csv.reader(expected_s3_file) assert len([row for row in reader]) == ROWS_IN_GOOD_HMIS_FILE # and make sure that the merge log has a record of this assert db_session.query(MergeLog).filter( MergeLog.upload_id == '123-456').one # make sure that the master table has been bootstrapped master_table = generate_master_table_name('boone', 'hmis_service_stays') total_rows = db_session.query( 'count(*) from {}'.format(master_table)).one() assert total_rows == (ROWS_IN_GOOD_HMIS_FILE - 1, ) # make sure that we filled in some matched ids total_rows = db_session.query( 'count(matched_id is not null) from {}'.format( master_table)).one() assert total_rows == (ROWS_IN_GOOD_HMIS_FILE - 1, )
def migrate(): batch = [] batch_size = 500 # batch_size = 1_000 # batch_size = 70_000 print(f'TOTAL {db_session.query(Output).count()}') output_total = db_session.query(Output).filter( text("output_dir_override is null")).count() start_batch = time.time() print(f'without overrides: {output_total}') outputs = ( db_session.query(Output).filter( text("output_dir_override is null")).order_by( Output.created_date.desc()).limit(100000) # .yield_per(batch_size) # .enable_eagerloads(False) ) updated = 0 now = time.time() for idx, o in enumerate(outputs): # print(f"{o.output_dir_override} => {o.output_dir}") # o.output_dir_override = str(o.output_dir) try: output_dir = o.output_dir except Exception as e: if not ("poc" in o.batch.ci_commit.project.id or "arthur" in o.batch.ci_commit.project.id): print( f"WTF {o.batch.ci_commit} in {o.batch.ci_commit.project}") print(e) continue updated += 1 batch.append({ "id": o.id, "output_dir_override": str(output_dir), }) if idx and idx % batch_size == 0: print(o) now = time.time() print( f"{idx/output_total:.1%} [{batch_size/(now - start_batch):.1f}/s] [est. total left {(now - start_batch) * ((output_total-idx)/batch_size) / 3600:.2f}h] [elapsed time: {now - start:.1f}s]" ) start_batch = now db_session.bulk_update_mappings(Output, batch) db_session.flush() batch = [] # break print( f"DONE, now committing configurations [elapsed time: {now - start:.1f}s]" ) db_session.bulk_update_mappings(Output, batch) db_session.flush() db_session.commit() return updated
def migrate(): batch = [] batch_size = 500 batch_size = 70000 print(f'TOTAL {db_session.query(Output).count()}') output_total = db_session.query(Output).filter(text("(data->'storage') is null")).filter(Output.is_pending==False).count() start_batch = time.time() print(f'without storage {output_total}') outputs = (db_session.query(Output) .filter(text("(data->'storage') is null")) .filter(Output.is_pending==False) .order_by(Output.created_date.desc()) # .limit(10000) # .yield_per(batch_size) .enable_eagerloads(False) ) updated = 0 now = time.time() for idx, o in enumerate(outputs): # print(o) if o.data is None: o.data = {} # if '/' in o.batch.ci_commit.hexsha: # continue try: storage = get_storage(o) except Exception as e: print("error", o, e) continue if storage is None: continue updated += 1 batch.append({ "id": o.id, "data": { **o.data, "storage": storage, }, }) if idx and idx % batch_size == 0: print(o) now = time.time() print(f"{idx/output_total:.1%} [{batch_size/(now - start_batch):.1f}/s] [est. total left {(now - start_batch) * ((output_total-idx)/batch_size) / 3600:.2f}h] [elapsed time: {now - start:.1f}s]") start_batch = now db_session.bulk_update_mappings(Output, batch) db_session.flush() batch = [] # break print(f"DONE, now committing configurations [elapsed time: {now - start:.1f}s]") db_session.bulk_update_mappings(Output, batch) db_session.flush() db_session.commit() return updated
def migrate_batch(): batch = [] batch_size = 500 print(f'TOTAL {db_session.query(Batch).count()}') total = db_session.query(Batch).filter( text("batch_dir_override is null")).count() start_batch = time.time() print(f'without overrides: {total}') batches = ( db_session.query(Batch).filter( text("batch_dir_override is null")).order_by( Batch.created_date.desc()).limit(20000) # .yield_per(batch_size) # .enable_eagerloads(False) ) updated = 0 now = time.time() for idx, b in enumerate(batches): try: batch_dir = b.batch_dir except Exception as e: if not ("poc" in b.ci_commit.project.id or "arthur" in b.ci_commit.project.id): print(f"WTF {b.ci_commit} in {b.ci_commit.project}") print(e) continue # print(f"{b.batch_dir_override} => {batch_dir}") # exit(0) updated += 1 batch.append({ "id": b.id, "batch_dir_override": str(batch_dir), }) if idx and idx % batch_size == 0: print(b) now = time.time() print( f"{idx/total:.1%} [{batch_size/(now - start_batch):.1f}/s] [est. total left {(now - start_batch) * ((total-idx)/batch_size) / 3600:.2f}h] [elapsed time: {now - start:.1f}s]" ) start_batch = now db_session.bulk_update_mappings(Batch, batch) db_session.flush() batch = [] # break print( f"DONE, now committing configurations [elapsed time: {now - start:.1f}s]" ) db_session.bulk_update_mappings(Batch, batch) db_session.flush() db_session.commit() return updated
def migrate_commits(): batch = [] batch_size = 500 total = db_session.query(CiCommit).filter( text("commit_dir_override is null")).count() start_batch = time.time() print(f'without overrides: {total}') commits = (db_session.query(CiCommit).filter( text("commit_dir_override is null")).order_by( CiCommit.authored_datetime.desc()).limit(20000)) updated = 0 now = time.time() for idx, c in enumerate(commits): try: artifacts_dir = c.artifacts_dir except Exception as e: if not ("poc" in c.project.id or "arthur" in c.project.id): print(f"WTF {c} in {c.project}") print(e) continue # print(f"{c.commit_dir_override} => {artifacts_dir}") # continue # exit(0) updated += 1 batch.append({ "id": c.id, "commit_dir_override": str(artifacts_dir), }) if idx and idx % batch_size == 0: print(c) now = time.time() print( f"{idx/total:.1%} [{batch_size/(now - start_batch):.1f}/s] [est. total left {(now - start_batch) * ((total-idx)/batch_size) / 3600:.2f}h] [elapsed time: {now - start:.1f}s]" ) start_batch = now db_session.bulk_update_mappings(Batch, batch) db_session.flush() batch = [] # break print( f"DONE, now committing configurations [elapsed time: {now - start:.1f}s]" ) db_session.bulk_update_mappings(CiCommit, batch) db_session.flush() db_session.commit() return updated
def check(_request): if not _request.headers.get("Authorization"): return {"message": "Make sure you have Token in the headers."}, 400 try: token = jwt.decode( _request.headers.get("Authorization").encode(), current_app.config["SECRET_KEY"], algorithms=["HS256"], ) except Exception as e: current_app.logger.debug(str(e)) return {"message": "invalid token"}, 400 g.current_user = db_session.query(User).filter( User.email == token["email"]).first() if not g.current_user: return {"message": "user not found"}, 404 if not isinstance(token, dict): return {"message": "invalid token"}, 400 token_date = date_parse(token["valid_until"]) if token_date < datetime.now(): return {"message": "deprecated token"}, 400
def test_file_storage(self, request_mock): with TemporaryDirectory() as temp_dir: root_dir = os.getcwd() s3_less_config = { 'raw_uploads_path': os.path.join( temp_dir, '{jurisdiction}-{event_type}-uploaded-{date}-{upload_id}'), 'merged_uploads_path': os.path.join(temp_dir, '{jurisdiction}-{event_type}-merged') } with full_rig_without_s3() as (app, engine): with patch.dict('backend.utils.app_config', s3_less_config): upload_id = self.do_upload(app, request_mock) # okay, here's what we really want to test. # call the merge endpoint response = app.post( '/api/upload/merge_file?uploadId={}'.format(upload_id)) response_data = json.loads( response.get_data().decode('utf-8')) assert response_data['status'] == 'success' # make sure that there is a new merged file on the FS expected_path = os.path.join(temp_dir, 'boone-jail_bookings-merged') with open(expected_path, 'rb') as expected_file: reader = csv.reader(expected_file) assert len([row for row in reader]) == 11 # and make sure that the merge log has a record of this assert db_session.query(MergeLog).filter( MergeLog.upload_id == '123-456').one
def can_access_file(upload_id): upload = db_session.query(Upload).get(upload_id) if not upload: raise ValueError('upload_id: %s not present in metadata database', upload_id) logger.info('Found jurisdiction %s and event type %s for upload id %s', upload.jurisdiction_slug, upload.event_type_slug, upload_id) return can_upload_file(upload.jurisdiction_slug, upload.event_type_slug)
def read_priority_reminders(id): try: r = db_session.query(ReminderEvents).filter( ReminderEvents.id == id).first() db_session.delete(r) db_session.commit() return statement(render_template('reminder_removed')) except: return statement('Unable to find reminder {}'.format(id))
def event_reminder_complete(id): try: r = db_session.query(ReminderEvents).filter( ReminderEvents.id == id).first() r.completed_since_last = True db_session.commit() return statement(render_template('reminder_completed')) except: return statement('Unable to find reminder {}'.format(id))
def query(since): print(since) return (db_session.query(Output) .enable_eagerloads(False) .filter(Output.created_date > since) .order_by(Output.created_date.desc()) # .limit(batch_size) # .all() # .yield_per(batch_size) )
def merge_file(): upload_id = request.args.get('uploadId', None) if not upload_id: return jsonify(status='invalid', reason='uploadId not present') has_access = False try: has_access = can_access_file(upload_id) if has_access: upload_log = db_session.query(Upload).get(upload_id) logger.info('Retrieved upload log, merging raw table to master') raw_table_name = 'raw_{}'.format(upload_id) logger.info('Merging raw table to master') merge_id = upsert_raw_table_to_master(raw_table_name, upload_log.jurisdiction_slug, upload_log.event_type_slug, upload_id, db_session) logger.info('Syncing merged file to s3') bootstrap_master_tables(upload_log.jurisdiction_slug, db_session) sync_merged_file_to_storage(upload_log.jurisdiction_slug, upload_log.event_type_slug, db_session.get_bind()) merge_log = db_session.query(MergeLog).get(merge_id) try: logger.info('Merge succeeded. Now querying matcher') notify_matcher(upload_log.jurisdiction_slug, upload_id) except Exception as e: logger.error('Error matching: ', e) db_session.rollback() return make_response(jsonify(status='error'), 500) db_session.commit() return jsonify(status='success', new_unique_rows=merge_log.new_unique_rows, total_unique_rows=merge_log.total_unique_rows) else: return jsonify(status='not authorized') except ValueError as e: logger.error('Error merging: ', e) db_session.rollback() return make_response(jsonify(status='error'), 500)
def merge_duplicates(dryrun): # batches = (db_session.query(Batch)\ # .filter(CiCommit.id == commit_id, Batch.label == label)\ # .all()) sql = "SELECT ci_commit_id, label, count(*) as qty FROM batches GROUP BY ci_commit_id, label HAVING count(*)> 1;" result = db_session.execute(sql) duplicates = list(result) # for commit_id, label, count in duplicates: # print(commit_id, label, count) print(len(duplicates), "duplicates") for commit_id, label, count in duplicates: print(f"commit_id: {commit_id}, label={label}, count={count}") # .filter(and_(CiCommit.id == commit_id, Batch.label == label))\ batches = (db_session.query(Batch)\ .join(CiCommit)\ .filter(Batch.label == label)\ .filter(CiCommit.id == commit_id)\ .all()) print(f" found {len(batches)}, expected: {count}") for b in batches: print(" ", b, b.label, b.ci_commit.id) print(" ", b.ci_commit, b.ci_commit.project) print('--') assert len(batches) == count final_batch, *other_batches = list(batches) if not final_batch.data: final_batch.data = {} if not final_batch.data.get('commands'): final_batch.data['commands'] = {} print('BEFORE', final_batch.data['commands']) for b in other_batches: if b.data and b.data.get('commands'): final_batch.data['commands'].update(b.data['commands']) for o in b.outputs: o.batch = final_batch db_session.delete(b) print('AFTER', final_batch.data['commands']) if not dryrun: db_session.add(final_batch) db_session.commit()
def login(): if request.method == 'POST': username = request.form['username'] password = request.form['password'] if password == "pass123": id = username user = db_session.query(User).filter(User.username == id).first() login_user(user) return flask.redirect(request.args.get("next")) else: return abort(401) else: return Response(''' <form action="" method="post"> <p><input type=text name=username> <p><input type=password name=password> <p><input type=submit value=Login> </form> ''')
def test_good_file(self): with full_rig_with_s3() as (app, engine): response = app.post( '/api/upload/upload_file?jurisdiction=boone&eventType=hmis_service_stays', content_type='multipart/form-data', data={ 'file_field': (open(GOOD_HMIS_FILE, 'rb'), 'myfile.csv') }) response_data = json.loads(response.get_data().decode('utf-8')) assert response_data['status'] == 'validating' assert 'jobKey' in response_data assert 'message' in response_data job_key = response_data['jobKey'] # get validation result and upload to s3 response = app.get('/api/upload/validated_result/' + job_key) response_data = json.loads(response.get_data().decode('utf-8')) assert 'validation' in response_data assert response_data['validation']['status'] == 'valid' assert response_data['validation']['jobKey'] == job_key assert 'upload_result' in response_data assert 'rowCount' in response_data['upload_result'] assert 'exampleRows' in response_data['upload_result'] assert 'uploadId' in response_data['upload_result'] assert 'fieldOrder' in response_data['upload_result'] current_date = date.today().isoformat() expected_s3_path = 's3://test-bucket/boone/hmis_service_stays/uploaded/{}/{}'.format( current_date, response_data['upload_result']['uploadId']) with open_sesame(expected_s3_path) as expected_s3_file: with open_sesame(GOOD_HMIS_FILE) as source_file: # we do not expect the file on s3 to be the same as the # uploaded source file - missing columns should be filled in s3_df = pd.read_csv(expected_s3_file) source_df = pd.read_csv(source_file, sep='|') assert source_df.equals(s3_df[source_df.columns.tolist()]) assert db_session.query(Upload).filter( Upload.id == response_data['upload_result']['uploadId']).one
def migrate(): batch = [] batch_size = 500 output_total = 1_700_000 start_batch = time.time() outputs = (db_session.query(Output) .yield_per(batch_size) .enable_eagerloads(False) .order_by(Output.created_date.desc()) ) for idx, o in enumerate(outputs): if o.is_pending: continue if o.data.get('storage'): continue storage = get_storage(o) if storage is None: continue batch.append({ "id": o.id, "data": { **o.data, "storage": storage, }, }) if idx % batch_size == 0: print(o) now = time.time() print(f"{idx/output_total:.1%} [{batch_size/(now - start_batch):.1f}/s] [est. total left {(now - start_batch) * ((output_total-idx)/batch_size) / 3600:.2f}h] [elapsed time: {now - start:.1f}s]") start_batch = now db_session.bulk_update_mappings(Output, batch) db_session.flush() batch = [] print(f"DONE, now committing configurations [elapsed time: {now - start:.1f}s]") db_session.bulk_update_mappings(Output, batch) db_session.flush() db_session.commit()
def test_good_file(self, request_mock): with full_rig_with_s3() as (app, engine): upload_id = self.do_upload(app, request_mock) # okay, here's what we really want to test. # call the merge endpoint response = app.post( '/api/upload/merge_file?uploadId={}'.format(upload_id)) response_data = json.loads(response.get_data().decode('utf-8')) assert response_data['status'] == 'success' # make sure that there is a new merged file on s3 expected_s3_path = 's3://test-bucket/boone/jail_bookings/merged' with open_sesame(expected_s3_path, 'rb') as expected_s3_file: reader = csv.reader(expected_s3_file) assert len([row for row in reader]) == 11 # and make sure that the merge log has a record of this assert db_session.query(MergeLog).filter( MergeLog.upload_id == '123-456').one # and make sure that the raw table is no longer there assert not table_exists(generate_raw_table_name(upload_id), db_session.bind)
def get_sensor_data(self, user_id, page_size=20, page=1): offset = page_size * (page - 1) data = db_session.query(SensorData).filter( SensorData.user_id == user_id) return data.count(), data.limit(page_size).offset(offset).all()
def load_user(userid): return db_session.query(User).filter(User.id == userid).first()