Beispiel #1
0
def test_create_user_with_missing_required_fields(root_token):

    username = generate_string(8)
    response = requests.post(uri('/users'),
                             json={'username': username},
                             headers=token_header(root_token))
    assert response.status_code == 400
Beispiel #2
0
def test_upload_and_download_file(root_token):

    response = requests.post(uri('/repositories'),
                             json={'name': generate_string(8)},
                             headers=token_header(root_token))
    assert response.status_code == 201
    repository_id = response.json()['id']

    with open('file.txt', 'wb') as f:
        f.write('this is some text')
    with open('file.txt', 'rb') as f:
        response = requests.post(uri(
            '/repositories/{}/files'.format(repository_id)),
                                 files={'file': f},
                                 data={
                                     'type': 'text',
                                     'modality': 'none'
                                 },
                                 headers=token_header(root_token))
        assert response.status_code == 201
        file_id = response.json()['id']
    os.system('rm file.txt')

    response = requests.get(uri('/repositories/{}/files/{}/content'.format(
        repository_id, file_id)),
                            headers=token_header(root_token))
    assert response.status_code == 200
    assert response.content == 'this is some text'
Beispiel #3
0
    def post(self):

        parser = reqparse.RequestParser()
        parser.add_argument('file',
                            type=FileStorage,
                            required=True,
                            location='files')
        args = parser.parse_args()

        args['storage_id'] = generate_string()
        args['storage_path'] = os.path.join(current_app.root_path,
                                            self.config()['UPLOAD_DIR'],
                                            args['storage_id'])
        args['file'].save(args['storage_path'])
        args['name'] = args['file'].filename
        args['extension'] = '.'.join(args['name'].split('.')[1:])
        args['content_type'] = 'application/octet-stream'
        args['media_link'] = args['storage_path']
        args['size'] = 0

        del args['file']

        f_dao = FileDao(self.db_session())
        f = f_dao.create(**args)

        html = '<h3>Thanks for uploading your file!</h3>'
        html += '<p>You can get it here: <a href="/files/{}/content">download</a></p>'.format(
            f.id)
        html += '<br>'
        html += '<p>Next step is to create a new classifier by clicking the button below.</p>'
        html += '<form method="post" action="/classifiers">'
        html += '  <input type="submit" value="Create classifier">'
        html += '</form>'

        return self.output_html(html, 201)
    def post(self, id):

        parser = reqparse.RequestParser()
        parser.add_argument('file',
                            type=FileStorage,
                            required=True,
                            location='files')
        args = parser.parse_args()
        repository_dao = RepositoryDao(self.db_session())
        repository = repository_dao.retrieve(id=id)
        args['repository'] = repository
        args['storage_id'] = generate_string()
        args['storage_path'] = os.path.join(current_app.root_path,
                                            self.config()['UPLOAD_DIR'],
                                            args['storage_id'])
        args['file'].save(args['storage_path'])
        args['name'] = args['file'].filename
        args['extension'] = '.'.join(args['name'].split('.')[1:])
        args['content_type'] = 'application/octet-stream'
        args['media_link'] = args['storage_path']
        args['size'] = 0
        # Remove 'file' element in the arguments because the File constructor
        # does not expect it
        del args['file']
        f_dao = FileDao(self.db_session())
        f = f_dao.create(**args)

        return f.to_dict(), 201
Beispiel #5
0
    def post(self, id):

        parser = reqparse.RequestParser()
        parser.add_argument('file',
                            type=FileStorage,
                            required=True,
                            location='files')
        parser.add_argument('subject_id',
                            type=str,
                            required=True,
                            location='form')
        args = parser.parse_args()

        args['storage_id'] = generate_string()
        args['storage_path'] = os.path.join(current_app.root_path,
                                            self.config()['UPLOAD_DIR'],
                                            args['storage_id'])
        args['file'].save(args['storage_path'])
        args['name'] = args['file'].filename
        args['extension'] = '.'.join(args['name'].split('.')[1:])
        args['content_type'] = 'application/octet-stream'
        args['media_link'] = args['storage_path']
        args['size'] = 0

        # Get trained classifier from session
        session_dao = SessionDao(self.db_session())
        session = session_dao.retrieve(id=id)

        # Load features
        features = pd.read_csv(args['storage_path'],
                               index_col=args['subject_id'])
        x = get_x(features)
        classifier = joblib.load(session.classifier_file_path)
        predictions = classifier.predict(x)

        html = ''
        html += '<h3>Congratulations!</h3>'
        html += '<p>You have successfully run one or more predictions. The results<br>'
        html += 'are listed below.</p>'
        html += '<table border="1">'
        html += '<tr><th>Case ID</th><th>Predicted target</th></tr>'

        for i in range(len(features.index)):
            html += '<tr><td>{}</td><td>{}</td></tr>'.format(
                features.index[i], predictions[i])

        html += '</table>'
        html += '<p>Click the button "Restart" to start over. You can train a new<br>'
        html += 'classifier or select an existing training session and run another<br>'
        html += 'prediction.</p>'
        html += '<form method="get" action="/">'
        html += '  <input type="submit" value="Restart">'
        html += '</form>'

        return self.output_html(html, 201)
Beispiel #6
0
    def post(self, id):

        parser = reqparse.RequestParser()
        parser.add_argument('file',
                            type=FileStorage,
                            required=True,
                            location='files')
        args = parser.parse_args()

        args['storage_id'] = generate_string()
        args['storage_path'] = os.path.join(current_app.root_path,
                                            self.config()['UPLOAD_DIR'],
                                            args['storage_id'])
        args['file'].save(args['storage_path'])
        args['name'] = args['file'].filename
        args['extension'] = '.'.join(args['name'].split('.')[1:])
        args['content_type'] = 'application/octet-stream'
        args['media_link'] = args['storage_path']
        args['size'] = 0
        del args['file']

        f_dao = FileDao(self.db_session())
        f = f_dao.create(**args)
        session_dao = SessionDao(self.db_session())
        session = session_dao.retrieve(id=id)

        print('Running prediction...')

        # Load classifier object file and run the prediction

        # Run the prediction.

        # Delete CSV file

        html = '<h3>Congratulations!</h3>'
        html += '<p>You have successfully completed your prediction!</p>'
        html += '<p>Here are the results:</p>'

        return self.output_html(html, 201)
Beispiel #7
0
    def get(self):

        classifier_dao = ClassifierDao(self.db_session())
        classifiers = classifier_dao.retrieve_all()
        if len(classifiers) == 0:
            classifiers.append(
                classifier_dao.create(
                    **{
                        'name': 'SVM',
                        'external_id': 'SVM-' + generate_string(8)
                    }))

        parser = reqparse.RequestParser()
        parser.add_argument('classifier',
                            type=int,
                            required=False,
                            location='args')
        args = parser.parse_args()

        html = ''

        if args['classifier'] is not None:

            classifier = classifier_dao.retrieve(id=args['classifier'])
            html += '<h3>Congratulations!</h3>'
            html += '<p>You selected the following classifier: {}</p>'.format(
                classifier.name)
            nr_sessions = len(classifier.sessions)
            if nr_sessions > 0:
                html += '<p'
                html += 'This classifier has already been trained {} times.<br>'.format(
                    nr_sessions)
                html += 'If you wish to re-use one of these training sessions,<br>'
                html += 'click the button below.'
                html += '</p>'
                html += '<form method="get" action="/classifiers/{}/sessions">'.format(
                    classifier.id)
                html += '  <input type="submit" value="View training sessions">'
                html += '</form>'

            html += '<p>'
            html += 'To train this classifier with new examples, upload a CSV file below. After<br>'
            html += 'you click the button, it may take a few minutes for the page to respond. DO NOT<br>'
            html += 'REFRESH THE PAGE OR NAVIGATE TO ANOTHER PAGE because this will interrupt the<br>'
            html += 'training process.'
            html += '</p>'
            html += '<form method="post" enctype="multipart/form-data" action="/classifiers/{}/sessions">'.format(
                classifier.id)
            html += '  <input type="file" name="file">'
            html += '  <input type="submit" value="Train classifier"><br><br>'
            html += '  <input type="checkbox" name="R" value="true">Use R<br><br>'
            html += '  <input type="text" name="target_column" value="Diagnosis"> Target column<br><br>'
            html += '  <input type="text" name="exclude_columns"> Exclude columns (comma-separated list)<br><br>'
            html += '  <input type="text" name="nr_iters" value="1">Nr. iterations<br><br>'
            html += '  <input type="text" name="nr_folds" value="2">Nr. folds (>= 2)<br><br>'
            html += '</form>'

        else:

            html += '<h3>Select classifier</h3>'
            html += '<p>Select a classifier from the pull-down menu below.</p>'
            html += '<br>'
            html += '<form method="get" action="/classifiers">'
            html += '  <select name="classifier">'
            for classifier in classifiers:
                html += '    <option value="{}">SVM</option>'.format(
                    classifier.id)
            html += '  </select>'
            html += '  <input type="submit" value="Select classifier">'
            html += '</form>'

        return self.output_html(html, 200)
Beispiel #8
0
    def post(self, id):

        parser = reqparse.RequestParser()
        parser.add_argument('file',
                            type=FileStorage,
                            required=True,
                            location='files')
        parser.add_argument('R', type=str, location='form')
        parser.add_argument('target_column', type=str, location='form')
        parser.add_argument('exclude_columns', type=str, location='form')
        parser.add_argument('nr_iters', type=str, location='form')
        parser.add_argument('nr_folds', type=str, location='form')
        args = parser.parse_args()

        R = args['R']
        target_column = args['target_column']
        exclude_columns = args['exclude_columns'].split(',')
        nr_iters = int(args['nr_iters'])
        nr_folds = int(args['nr_folds'])

        args['storage_id'] = generate_string()
        args['storage_path'] = os.path.join(current_app.root_path,
                                            self.config()['UPLOAD_DIR'],
                                            args['storage_id'])
        args['file'].save(args['storage_path'])
        args['name'] = args['file'].filename
        args['extension'] = '.'.join(args['name'].split('.')[1:])
        args['content_type'] = 'application/octet-stream'
        args['media_link'] = args['storage_path']
        args['size'] = 0

        del args['file']
        del args['R']
        del args['target_column']
        del args['exclude_columns']
        del args['nr_iters']
        del args['nr_folds']

        f_dao = FileDao(self.db_session())
        f = f_dao.create(**args)

        classifier_dao = ClassifierDao(self.db_session())
        classifier = classifier_dao.retrieve(id=id)
        print('Training classifier {} on file {}'.format(
            classifier.name, f.name))
        if R == 'true':
            print('R scripting is not implemented yet...')

        # After classifier training finishes, create a session that captures the results
        print('Calculating classifier performance...')
        scores = 0
        features = pd.read_csv(f.storage_path)
        x, y = get_xy(features,
                      target_column=target_column,
                      exclude_columns=exclude_columns)
        for i in range(nr_iters):
            for train, test in StratifiedKFold(n_splits=nr_folds).split(x, y):
                _, score = score_svm(x, y, train, test)
                scores += score
        avg_score = scores / (nr_iters * nr_folds)

        path = f.storage_path + '.classifier'
        print('Building optimized classifier and storing in {}'.format(path))
        classifier_model = train_svm(x, y)
        joblib.dump(classifier_model, path)

        session_dao = SessionDao(self.db_session())
        session = session_dao.create(
            **{
                'classifier': classifier,
                'training_file_path': f.storage_path,
                'classifier_file_path': path,
            })

        html = '<h3>Congratulations!</h3>'

        html += '<p>You have successfully trained your classifier! It has an average '
        html += 'classification accuracy of {} after {} iterations and {} folds.</p>'.format(
            avg_score, nr_iters, nr_folds)

        html += '<p>The next step is to use your classifier for predictions. Again, upload a<br>'
        html += 'CSV file with the cases you want to predict.</p>'
        html += '<form method="post" enctype="multipart/form-data" action="/sessions/{}/predictions">'.format(
            session.id)
        html += '  <input type="file" name="file">'
        html += '  <input type="submit" value="Upload predictions">'
        html += '</form>'

        return self.output_html(html, 201)
Beispiel #9
0
    def get(self):
        """
        Returns a pull-down menu containing all supported classifiers. If the list is empty, 
        it will be automatically populated in the database.
        :return: 
        """
        classifier_dao = ClassifierDao(self.db_session())
        classifiers = classifier_dao.retrieve_all()
        if len(classifiers) == 0:
            classifiers.append(classifier_dao.create(**{'name': 'SVM', 'external_id': 'SVM-' + generate_string(8)}))

        html = ''
        html += '<h3>Step 1 - Select a classifier</h3>'
        html += '<p>Select a classifier from the pull-down menu below. Then click<br>'
        html += 'the select button to proceed.</p>'
        html += '<form method="post" action="/classifiers">'
        html += '  <select name="classifier_id">'

        for classifier in classifiers:
            html += '    <option value="{}">Support Vector Machine</option>'.format(classifier.id)

        html += '  </select>'
        html += '  <input type="submit" value="Select">'
        html += '</form>'

        return self.output_html(html, 200)
Beispiel #10
0
    def post(self, id):
        """
        Creates a new training session for classifier {id}. The classifier uses the
        uploaded file for training.
        :param id: 
        :return: 
        """
        parser = reqparse.RequestParser()
        parser.add_argument('file', type=FileStorage, required=True, location='files')
        parser.add_argument('R', type=str, location='form')
        parser.add_argument('target_column', type=str, location='form')
        parser.add_argument('exclude_columns', type=str, location='form')
        parser.add_argument('nr_iters', type=str, location='form')
        parser.add_argument('nr_folds', type=str, location='form')
        parser.add_argument('subject_id', type=str, required=True, location='form')
        args = parser.parse_args()

        subject_id = args['subject_id']
        R = args['R']
        target_column = args['target_column']
        exclude_columns = args['exclude_columns'].split(',')
        nr_iters = int(args['nr_iters'])
        nr_folds = int(args['nr_folds'])

        args['storage_id'] = generate_string()
        args['storage_path'] = os.path.join(current_app.root_path, self.config()['UPLOAD_DIR'], args['storage_id'])
        args['file'].save(args['storage_path'])
        args['name'] = args['file'].filename
        args['extension'] = '.'.join(args['name'].split('.')[1:])
        args['content_type'] = 'application/octet-stream'
        args['media_link'] = args['storage_path']
        args['size'] = 0

        del args['subject_id']
        del args['file']
        del args['R']
        del args['target_column']
        del args['exclude_columns']
        del args['nr_iters']
        del args['nr_folds']

        f_dao = FileDao(self.db_session())
        f = f_dao.create(**args)

        classifier_dao = ClassifierDao(self.db_session())
        classifier = classifier_dao.retrieve(id=id)
        print('Training classifier {} on file {}'.format(classifier.name, f.name))
        if R == 'true':
            os.system('Rscript ./R/svm.R 1')

        # After classifier training finishes, create a session that captures the results
        print('Calculating classifier performance...')
        features = pd.read_csv(f.storage_path, index_col=subject_id)
        x, y = get_xy(features, target_column=target_column)

        scores = 0
        # Run performance evaluation on classifier
        for i in range(nr_iters):
            for train, test in StratifiedKFold(n_splits=nr_folds).split(x, y):
                _, score = score_svm(x, y, train, test)
                scores += score
        avg_score = scores / (nr_iters * nr_folds)

        path = f.storage_path + '.classifier'
        print('Building optimized classifier and storing in {}'.format(path))
        classifier_model = train_svm(x, y)
        joblib.dump(classifier_model, path)

        session_dao = SessionDao(self.db_session())
        session = session_dao.create(**{
            'classifier': classifier,
            'training_file_path': f.storage_path,
            'classifier_file_path': path,
        })

        html = '<h3>Step 3 - View training session</h3>'
        html += '<p>You have successfully trained your classifier. Click the link below<br>'
        html += 'To view the training session and upload a file with cases to predict.</p>'

        html += '<a href="/sessions/{}">Session {}</a>'.format(session.id, session.id)

        return self.output_html(html, 201)