def test_create_user_with_missing_required_fields(root_token): username = generate_string(8) response = requests.post(uri('/users'), json={'username': username}, headers=token_header(root_token)) assert response.status_code == 400
def test_upload_and_download_file(root_token): response = requests.post(uri('/repositories'), json={'name': generate_string(8)}, headers=token_header(root_token)) assert response.status_code == 201 repository_id = response.json()['id'] with open('file.txt', 'wb') as f: f.write('this is some text') with open('file.txt', 'rb') as f: response = requests.post(uri( '/repositories/{}/files'.format(repository_id)), files={'file': f}, data={ 'type': 'text', 'modality': 'none' }, headers=token_header(root_token)) assert response.status_code == 201 file_id = response.json()['id'] os.system('rm file.txt') response = requests.get(uri('/repositories/{}/files/{}/content'.format( repository_id, file_id)), headers=token_header(root_token)) assert response.status_code == 200 assert response.content == 'this is some text'
def post(self): parser = reqparse.RequestParser() parser.add_argument('file', type=FileStorage, required=True, location='files') args = parser.parse_args() args['storage_id'] = generate_string() args['storage_path'] = os.path.join(current_app.root_path, self.config()['UPLOAD_DIR'], args['storage_id']) args['file'].save(args['storage_path']) args['name'] = args['file'].filename args['extension'] = '.'.join(args['name'].split('.')[1:]) args['content_type'] = 'application/octet-stream' args['media_link'] = args['storage_path'] args['size'] = 0 del args['file'] f_dao = FileDao(self.db_session()) f = f_dao.create(**args) html = '<h3>Thanks for uploading your file!</h3>' html += '<p>You can get it here: <a href="/files/{}/content">download</a></p>'.format( f.id) html += '<br>' html += '<p>Next step is to create a new classifier by clicking the button below.</p>' html += '<form method="post" action="/classifiers">' html += ' <input type="submit" value="Create classifier">' html += '</form>' return self.output_html(html, 201)
def post(self, id): parser = reqparse.RequestParser() parser.add_argument('file', type=FileStorage, required=True, location='files') args = parser.parse_args() repository_dao = RepositoryDao(self.db_session()) repository = repository_dao.retrieve(id=id) args['repository'] = repository args['storage_id'] = generate_string() args['storage_path'] = os.path.join(current_app.root_path, self.config()['UPLOAD_DIR'], args['storage_id']) args['file'].save(args['storage_path']) args['name'] = args['file'].filename args['extension'] = '.'.join(args['name'].split('.')[1:]) args['content_type'] = 'application/octet-stream' args['media_link'] = args['storage_path'] args['size'] = 0 # Remove 'file' element in the arguments because the File constructor # does not expect it del args['file'] f_dao = FileDao(self.db_session()) f = f_dao.create(**args) return f.to_dict(), 201
def post(self, id): parser = reqparse.RequestParser() parser.add_argument('file', type=FileStorage, required=True, location='files') parser.add_argument('subject_id', type=str, required=True, location='form') args = parser.parse_args() args['storage_id'] = generate_string() args['storage_path'] = os.path.join(current_app.root_path, self.config()['UPLOAD_DIR'], args['storage_id']) args['file'].save(args['storage_path']) args['name'] = args['file'].filename args['extension'] = '.'.join(args['name'].split('.')[1:]) args['content_type'] = 'application/octet-stream' args['media_link'] = args['storage_path'] args['size'] = 0 # Get trained classifier from session session_dao = SessionDao(self.db_session()) session = session_dao.retrieve(id=id) # Load features features = pd.read_csv(args['storage_path'], index_col=args['subject_id']) x = get_x(features) classifier = joblib.load(session.classifier_file_path) predictions = classifier.predict(x) html = '' html += '<h3>Congratulations!</h3>' html += '<p>You have successfully run one or more predictions. The results<br>' html += 'are listed below.</p>' html += '<table border="1">' html += '<tr><th>Case ID</th><th>Predicted target</th></tr>' for i in range(len(features.index)): html += '<tr><td>{}</td><td>{}</td></tr>'.format( features.index[i], predictions[i]) html += '</table>' html += '<p>Click the button "Restart" to start over. You can train a new<br>' html += 'classifier or select an existing training session and run another<br>' html += 'prediction.</p>' html += '<form method="get" action="/">' html += ' <input type="submit" value="Restart">' html += '</form>' return self.output_html(html, 201)
def post(self, id): parser = reqparse.RequestParser() parser.add_argument('file', type=FileStorage, required=True, location='files') args = parser.parse_args() args['storage_id'] = generate_string() args['storage_path'] = os.path.join(current_app.root_path, self.config()['UPLOAD_DIR'], args['storage_id']) args['file'].save(args['storage_path']) args['name'] = args['file'].filename args['extension'] = '.'.join(args['name'].split('.')[1:]) args['content_type'] = 'application/octet-stream' args['media_link'] = args['storage_path'] args['size'] = 0 del args['file'] f_dao = FileDao(self.db_session()) f = f_dao.create(**args) session_dao = SessionDao(self.db_session()) session = session_dao.retrieve(id=id) print('Running prediction...') # Load classifier object file and run the prediction # Run the prediction. # Delete CSV file html = '<h3>Congratulations!</h3>' html += '<p>You have successfully completed your prediction!</p>' html += '<p>Here are the results:</p>' return self.output_html(html, 201)
def get(self): classifier_dao = ClassifierDao(self.db_session()) classifiers = classifier_dao.retrieve_all() if len(classifiers) == 0: classifiers.append( classifier_dao.create( **{ 'name': 'SVM', 'external_id': 'SVM-' + generate_string(8) })) parser = reqparse.RequestParser() parser.add_argument('classifier', type=int, required=False, location='args') args = parser.parse_args() html = '' if args['classifier'] is not None: classifier = classifier_dao.retrieve(id=args['classifier']) html += '<h3>Congratulations!</h3>' html += '<p>You selected the following classifier: {}</p>'.format( classifier.name) nr_sessions = len(classifier.sessions) if nr_sessions > 0: html += '<p' html += 'This classifier has already been trained {} times.<br>'.format( nr_sessions) html += 'If you wish to re-use one of these training sessions,<br>' html += 'click the button below.' html += '</p>' html += '<form method="get" action="/classifiers/{}/sessions">'.format( classifier.id) html += ' <input type="submit" value="View training sessions">' html += '</form>' html += '<p>' html += 'To train this classifier with new examples, upload a CSV file below. After<br>' html += 'you click the button, it may take a few minutes for the page to respond. DO NOT<br>' html += 'REFRESH THE PAGE OR NAVIGATE TO ANOTHER PAGE because this will interrupt the<br>' html += 'training process.' html += '</p>' html += '<form method="post" enctype="multipart/form-data" action="/classifiers/{}/sessions">'.format( classifier.id) html += ' <input type="file" name="file">' html += ' <input type="submit" value="Train classifier"><br><br>' html += ' <input type="checkbox" name="R" value="true">Use R<br><br>' html += ' <input type="text" name="target_column" value="Diagnosis"> Target column<br><br>' html += ' <input type="text" name="exclude_columns"> Exclude columns (comma-separated list)<br><br>' html += ' <input type="text" name="nr_iters" value="1">Nr. iterations<br><br>' html += ' <input type="text" name="nr_folds" value="2">Nr. folds (>= 2)<br><br>' html += '</form>' else: html += '<h3>Select classifier</h3>' html += '<p>Select a classifier from the pull-down menu below.</p>' html += '<br>' html += '<form method="get" action="/classifiers">' html += ' <select name="classifier">' for classifier in classifiers: html += ' <option value="{}">SVM</option>'.format( classifier.id) html += ' </select>' html += ' <input type="submit" value="Select classifier">' html += '</form>' return self.output_html(html, 200)
def post(self, id): parser = reqparse.RequestParser() parser.add_argument('file', type=FileStorage, required=True, location='files') parser.add_argument('R', type=str, location='form') parser.add_argument('target_column', type=str, location='form') parser.add_argument('exclude_columns', type=str, location='form') parser.add_argument('nr_iters', type=str, location='form') parser.add_argument('nr_folds', type=str, location='form') args = parser.parse_args() R = args['R'] target_column = args['target_column'] exclude_columns = args['exclude_columns'].split(',') nr_iters = int(args['nr_iters']) nr_folds = int(args['nr_folds']) args['storage_id'] = generate_string() args['storage_path'] = os.path.join(current_app.root_path, self.config()['UPLOAD_DIR'], args['storage_id']) args['file'].save(args['storage_path']) args['name'] = args['file'].filename args['extension'] = '.'.join(args['name'].split('.')[1:]) args['content_type'] = 'application/octet-stream' args['media_link'] = args['storage_path'] args['size'] = 0 del args['file'] del args['R'] del args['target_column'] del args['exclude_columns'] del args['nr_iters'] del args['nr_folds'] f_dao = FileDao(self.db_session()) f = f_dao.create(**args) classifier_dao = ClassifierDao(self.db_session()) classifier = classifier_dao.retrieve(id=id) print('Training classifier {} on file {}'.format( classifier.name, f.name)) if R == 'true': print('R scripting is not implemented yet...') # After classifier training finishes, create a session that captures the results print('Calculating classifier performance...') scores = 0 features = pd.read_csv(f.storage_path) x, y = get_xy(features, target_column=target_column, exclude_columns=exclude_columns) for i in range(nr_iters): for train, test in StratifiedKFold(n_splits=nr_folds).split(x, y): _, score = score_svm(x, y, train, test) scores += score avg_score = scores / (nr_iters * nr_folds) path = f.storage_path + '.classifier' print('Building optimized classifier and storing in {}'.format(path)) classifier_model = train_svm(x, y) joblib.dump(classifier_model, path) session_dao = SessionDao(self.db_session()) session = session_dao.create( **{ 'classifier': classifier, 'training_file_path': f.storage_path, 'classifier_file_path': path, }) html = '<h3>Congratulations!</h3>' html += '<p>You have successfully trained your classifier! It has an average ' html += 'classification accuracy of {} after {} iterations and {} folds.</p>'.format( avg_score, nr_iters, nr_folds) html += '<p>The next step is to use your classifier for predictions. Again, upload a<br>' html += 'CSV file with the cases you want to predict.</p>' html += '<form method="post" enctype="multipart/form-data" action="/sessions/{}/predictions">'.format( session.id) html += ' <input type="file" name="file">' html += ' <input type="submit" value="Upload predictions">' html += '</form>' return self.output_html(html, 201)
def get(self): """ Returns a pull-down menu containing all supported classifiers. If the list is empty, it will be automatically populated in the database. :return: """ classifier_dao = ClassifierDao(self.db_session()) classifiers = classifier_dao.retrieve_all() if len(classifiers) == 0: classifiers.append(classifier_dao.create(**{'name': 'SVM', 'external_id': 'SVM-' + generate_string(8)})) html = '' html += '<h3>Step 1 - Select a classifier</h3>' html += '<p>Select a classifier from the pull-down menu below. Then click<br>' html += 'the select button to proceed.</p>' html += '<form method="post" action="/classifiers">' html += ' <select name="classifier_id">' for classifier in classifiers: html += ' <option value="{}">Support Vector Machine</option>'.format(classifier.id) html += ' </select>' html += ' <input type="submit" value="Select">' html += '</form>' return self.output_html(html, 200)
def post(self, id): """ Creates a new training session for classifier {id}. The classifier uses the uploaded file for training. :param id: :return: """ parser = reqparse.RequestParser() parser.add_argument('file', type=FileStorage, required=True, location='files') parser.add_argument('R', type=str, location='form') parser.add_argument('target_column', type=str, location='form') parser.add_argument('exclude_columns', type=str, location='form') parser.add_argument('nr_iters', type=str, location='form') parser.add_argument('nr_folds', type=str, location='form') parser.add_argument('subject_id', type=str, required=True, location='form') args = parser.parse_args() subject_id = args['subject_id'] R = args['R'] target_column = args['target_column'] exclude_columns = args['exclude_columns'].split(',') nr_iters = int(args['nr_iters']) nr_folds = int(args['nr_folds']) args['storage_id'] = generate_string() args['storage_path'] = os.path.join(current_app.root_path, self.config()['UPLOAD_DIR'], args['storage_id']) args['file'].save(args['storage_path']) args['name'] = args['file'].filename args['extension'] = '.'.join(args['name'].split('.')[1:]) args['content_type'] = 'application/octet-stream' args['media_link'] = args['storage_path'] args['size'] = 0 del args['subject_id'] del args['file'] del args['R'] del args['target_column'] del args['exclude_columns'] del args['nr_iters'] del args['nr_folds'] f_dao = FileDao(self.db_session()) f = f_dao.create(**args) classifier_dao = ClassifierDao(self.db_session()) classifier = classifier_dao.retrieve(id=id) print('Training classifier {} on file {}'.format(classifier.name, f.name)) if R == 'true': os.system('Rscript ./R/svm.R 1') # After classifier training finishes, create a session that captures the results print('Calculating classifier performance...') features = pd.read_csv(f.storage_path, index_col=subject_id) x, y = get_xy(features, target_column=target_column) scores = 0 # Run performance evaluation on classifier for i in range(nr_iters): for train, test in StratifiedKFold(n_splits=nr_folds).split(x, y): _, score = score_svm(x, y, train, test) scores += score avg_score = scores / (nr_iters * nr_folds) path = f.storage_path + '.classifier' print('Building optimized classifier and storing in {}'.format(path)) classifier_model = train_svm(x, y) joblib.dump(classifier_model, path) session_dao = SessionDao(self.db_session()) session = session_dao.create(**{ 'classifier': classifier, 'training_file_path': f.storage_path, 'classifier_file_path': path, }) html = '<h3>Step 3 - View training session</h3>' html += '<p>You have successfully trained your classifier. Click the link below<br>' html += 'To view the training session and upload a file with cases to predict.</p>' html += '<a href="/sessions/{}">Session {}</a>'.format(session.id, session.id) return self.output_html(html, 201)