コード例 #1
0
ファイル: dataprep.py プロジェクト: openmednlp/boonai
def upload_dataset(file, name, description, train, test, label, input, target):
    datasets_api_url = current_app.config['DATASETS_API']
    storage_adapter_api_url = current_app.config['STORAGE_ADAPTER_API']
    try:
        r = requests.post(storage_adapter_api_url,
                          file,
                          headers={'Content-Type': 'application/octet-stream'})
        r.raise_for_status()
    except requests.exceptions.HTTPError as err:
        flash(
            'Could not store file - got HTTPError from the Storage Adapter API',
            'warning')
        return False

    storage_adapter_json = r.json()

    dataset_json = {
        'name': name,
        'description': description,
        'train': train,  # TODO: maybe it can just accept true false
        'test': test,
        'label': label,
        'input': input,
        'target': target,
        'storage_adapter_uri': hateoas_get_link(storage_adapter_json, 'self'),
        'binary_uri': hateoas_get_link(storage_adapter_json, 'binary'),
        'user_id': current_user.id,
        'project_id': 0
    }

    try:
        r = requests.post(datasets_api_url, json=dataset_json)
    except requests.exceptions.HTTPError as err:
        flash('Could not store file - got HTTPError from Dataset API',
              'warning')
        return False
    if not 200 <= r.status_code < 300:
        flash(
            'Could not store file - Dataset API status code: {}'.format(
                r.status_code), 'warning')
        return False
    flash('Dataset uploaded', 'success')
    return True
コード例 #2
0
def dataset_get(dataset_id):
    # TODO: A lot of it should be replaced -> df = h.dataset_id_to_df(dataset_id)

    dataset_single_uri = h.url_join(current_app.config['DATASETS_API'],
                                    dataset_id)
    r = requests.get(dataset_single_uri)

    if int(r.status_code) != 200:
        flash(
            'Dataset API response code was {}, '
            'cannot fetch the dataset'.format(r.status_code), 'warning')
        return redirect(url_for('site_datasets.dataset_list'))

    json_data = json.loads(r.content)

    storage_binary_uri = h.hateoas_get_link(json_data, 'binary')

    label_url = h.url_join(request.url,
                           'al')  #h.hateoas_get_link(json_data, 'label')

    # TODO get storage delte uri from dataset
    delete_url = url_for('site_datasets.dataset_delete', dataset_id=dataset_id)

    r_storage = requests.get(storage_binary_uri)
    if r_storage.status_code != 200:
        flash(
            'Storage API response code was {}, cannot fetch the file'.format(
                r.status_code), 'warning')
        return redirect(url_for('site_datasets.dataset_list'))

    csv_content = r_storage.content

    try:
        df = pd.read_csv(io.StringIO(csv_content.decode('utf-8')))
    except EmptyDataError:
        flash('Could not make a dataset out of the storage file - it is empty',
              'warning')
        return redirect(url_for('site_datasets.dataset_list'))

    if df.empty:
        flash('Cannot show dataset - it is empty', 'warning')

    html_params = h.get_html_pagination_params(request.args, df)

    return render_template(
        'datasets/info.html',
        info=json_data,
        data=html_params['page_data'],
        pagination=html_params['pagination'],
        download_url=storage_binary_uri,
        label_url=label_url,
        delete_url=delete_url,
    )
コード例 #3
0
def dataset_id_to_df(dataset_id):
    datasets_api = current_app.config['DATASETS_API']
    dataset_uri = h.url_join(datasets_api, dataset_id)
    r = requests.get(dataset_uri)
    r_dataset = r.json()

    storage_binary_uri = h.hateoas_get_link(r_dataset, 'binary')

    r_binary = requests.get(storage_binary_uri)

    # TODO: This is implementation for for text based problems only
    dataset = r_binary.content.decode('utf-8')
    csv = StringIO(dataset)
    return pd.read_csv(csv)
コード例 #4
0
    def post(self):
        # Train the model with a submited dataset and store it

        storage_adapter_api = current_app.config['STORAGE_ADAPTER_API']
        datasets_api = current_app.config['DATASETS_API']

        posted_json = request.get_json()
        name = posted_json['name']
        description = posted_json['description']
        dataset_id = posted_json['dataset_id']
        algorithm_id = int(posted_json['algorithm_id'])
        user_id = posted_json['user_id']
        project_id = posted_json['project_id']

        dataset_uri = datasets_api + '/' + dataset_id
        r = requests.get(dataset_uri)
        r_dataset = r.json()

        storage_binary_uri = h.hateoas_get_link(r_dataset, 'binary')

        r_binary = requests.get(storage_binary_uri)

        # TODO: This is implementation for for text based problems only
        dataset = r_binary.content.decode('utf-8')
        csv = StringIO(dataset)
        df = pd.read_csv(csv)

        algorithm = algorithm_dict[algorithm_id](storage_adapter_api)
        algorithm.train(df.ix[:, 0], df.ix[:, 1])
        algorithm.persist()

        trained_model = TrainedModel(
            name=name,
            description=description,
            algorithm_id=int(algorithm_id),
            dataset_id=dataset_id,
            user_id=user_id,
            project_id=project_id,
        )
        for key in algorithm.resources:
            resource = ModelResource()
            resource.name = key
            resource.uri = algorithm.resources[key]
            trained_model.resources.append(resource)
            db.session.add(trained_model)
        db.session.commit()

        return redirect(
            url_for('machine_learning_models.single',
                    model_id=trained_model.id))
コード例 #5
0
def dataset_delete(dataset_id):
    dataset_api_url = current_app.config['DATASETS_API']
    dataset_url = h.url_join(dataset_api_url, dataset_id)
    r = requests.get(dataset_url)

    json_data = json.loads(r.content)

    storage_url = h.hateoas_get_link(json_data, 'storage')
    # file_url = _get_link(json_data['links'], 'file')  # TODO: get storage delete uri from dataset

    r_dataset = requests.delete(dataset_url)
    r_file = requests.delete(storage_url)

    flash('Not yet implemented', 'info')

    return redirect(url_for('.dataset_get', dataset_id=dataset_id))
コード例 #6
0
def model(model_id):
    url = h.url_join(current_app.config['MODELS_API'], str(model_id))
    r = requests.get(url)

    json_data = json.loads(r.content)
    url = h.hateoas_get_link(json_data, 'file')  # TODO: fix this

    return '''
    <p>
    Id: {}</br>
    Name: {}</br>
    Description:<br/> 
    {}</br>
    <a href={}>link</a></p>
    '''.format(json_data['content']['id'], json_data['content']['name'],
               json_data['content']['description'], url)
コード例 #7
0
ファイル: Algorithm.py プロジェクト: openmednlp/boonai
    def _persist_keras(self, model):
        # Save the model to an in-memory-only h5 file.
        _, tmp_path = tempfile.mkstemp()
        model.save(tmp_path)

        # with h5py.File(
        #         'does not matter',
        #         driver='core',
        #         backing_store=False) as h5file:
        #     save_model(model, h5file)
        #     h5file.flush()  # Very important! Otherwise you get all zeroes below.
        #     binary_data = h5file.fid.get_file_image()

        with open(tmp_path, 'rb') as f:
            binary_data = f.read()

        r = requests.post(self.storage_adapter_api, data=binary_data)
        storage_adapter_uri = h.hateoas_get_link(r.json(), 'self')
        self.resources['keras'] = storage_adapter_uri
        return storage_adapter_uri
コード例 #8
0
ファイル: al.py プロジェクト: openmednlp/boonai
def al(dataset_id):
    form_update = UpdateForm()
    form_generate = DownloadForm()
    if form_update.validate_on_submit():
        r_dataset = h.get_dataset_response(dataset_id)
        dataset_json = r_dataset.json()
        storage_adapter_uri = h.hateoas_get_link(dataset_json, 'storage')

        f = form_update.file.data
        filename = secure_filename(f.filename)

        file_type = h.get_file_type(filename)
        if file_type == 'excel':
            df = pd.read_excel(f, encoding='utf-8')
        elif file_type != 'csv':
            raise ValueError('Expected Excel or csv file, received ' +
                             file_type)
        else:
            df = pd.read_csv(f, encoding='utf-8')

        #
        # encoding = encoding_info_dict = chardet.detect(f)
        # sniffer = Sniffer()
        # line = f.readline().encode(encoding).decode('utf-8')
        # dialect = sniffer.sniff(line)
        # reader = csv.reader(
        #     codecs.EncodedFile(f, "utf-8"), delimiter=',', dialect=dialect)
        # csv_data = reader

        csv_data = df.to_csv(index=False, encoding='utf-8').encode('utf-8')
        r_put = requests.put(storage_adapter_uri, data=csv_data)

        r_dataset.json()
        flash(
            "Updated dataset's file '{}' (id: {}) with '{}') ".format(
                dataset_json['name'], dataset_json['id'], filename), 'success')
        return redirect(url_for('.al', dataset_id=dataset_id))

    if form_generate.validate_on_submit():
        mark_for_labeling(dataset_id)

        df = h.dataset_id_to_df(dataset_id)

        if form_generate.csv_download.data:
            result = df.to_csv(encoding='utf-8')
            mimetype = "text/csv"
            filename = 'result.csv'
        elif form_generate.xls_download.data:
            output = io.BytesIO()
            writer = pd.ExcelWriter(output, engine='xlsxwriter')
            df.to_excel(writer, sheet_name='Sheet1', encoding='utf-8')
            writer.save()
            result = output.getvalue()
            mimetype = "application/vnd.ms-excel"
            filename = 'result.xlsx'
        else:
            flash(
                'Expected one of the buttons to be pushed, '
                'but that did not happen', 'warning')
            raise ValueError('Form response was neither csl nor xls')

        return Response(result,
                        mimetype=mimetype,
                        headers={
                            "Content-disposition":
                            "attachment; filename={}".format(filename)
                        })

    return render_template('al/index.html',
                           form_generate=form_generate,
                           form_update=form_update)
コード例 #9
0
ファイル: Algorithm.py プロジェクト: openmednlp/boonai
 def _persist_sklearn(self, model):
     model_pickle = pickle.dumps(model)
     r = requests.post(self.storage_adapter_api, data=model_pickle)
     storage_adapter_uri = h.hateoas_get_link(r.json(), 'self')
     self.resources['sklearn'] = storage_adapter_uri
     return storage_adapter_uri  # TODO: not sure if useful
コード例 #10
0
ファイル: Algorithm.py プロジェクト: openmednlp/boonai
 def _get_binary(uri):
     # TODO: could use another approach if multiple storage backends are used
     r_json = requests.get(uri).json()
     bin_uri = h.hateoas_get_link(r_json, 'binary')
     return requests.get(bin_uri).content