class ConvertDoc(): document_conversion = DocumentConversionV1( url='https://gateway.watsonplatform.net/document-conversion/api', username='******', password='******', version='2016-02-10') config = { 'conversion_target': 'NORMALIZED_TEXT', # Use a custom configuration. #http://www.ibm.com/watson/developercloud/doc/document-conversion/customizing.shtml 'word': { 'heading': { 'fonts': [{ 'level': 1, 'min_size': 24 }, { 'level': 2, 'min_size': 16, 'max_size': 24 }] } } } with open('uploads/guide.pdf', 'r') as document: response = document_conversion.convert_document(document=document, config=config) print response.text #To pass to next page? #def convert_doc() # return response.text #finding the time and parsing with chrono time/date parser needs to be done #https://github.com/wanasit/chrono-python
import json from os.path import join, dirname from watson_developer_cloud import DocumentConversionV1 document_conversion = DocumentConversionV1( username='******', password='******', version='2016-02-09') path = input('please enter the path name: ') name = input('please enter the name of your new file with appending .json: ') # Example with JSON o = open(name, 'w') f = open(path, 'r+') config = { 'conversion_target' : DocumentConversionV1.ANSWER_UNITS } json.dump(document_conversion.convert_document(document=f, config=config), o, indent= 2) f.close() o.flush() o.close()
__author__ = 'rcj1492' __created__ = '2017.04' __license__ = '©2017 Collective Acuity' from labpack.records.settings import load_settings from watson_developer_cloud import DocumentConversionV1 if __name__ == '__main__': file_path = '../../media/test-pdf-4.pdf' save_path = '../../media/test-pdf-4.json' watson_config = load_settings('../../cred/watson.yaml') username = watson_config['watson_conversion_username'] password = watson_config['watson_conversion_password'] document_conversion = DocumentConversionV1(username=username, password=password, version='2015-12-15') convert_config = {'conversion_target': 'ANSWER_UNITS'} file_data = open(file_path, "rb") response = document_conversion.convert_document(document=file_data, config=convert_config) print(response.status_code) with open(save_path, 'wt', encoding='utf-8', errors='ignore') as save_file: import json save_file.write(json.dumps(response.json(), indent=2)) save_file.close()
from ibm_cloud_env import IBMCloudEnv from watson_developer_cloud import DocumentConversionV1 document_conversion = DocumentConversionV1( username=IBMCloudEnv.getString('watson_document_conversion_username'), password=IBMCloudEnv.getString('watson_document_conversion_password'), version='2016-02-10') def getService(app): return 'watson-document-conversion', document_conversion
# coding=utf-8 import json from os.path import join, dirname from watson_developer_cloud import DocumentConversionV1 as DocumentConversion document_conversion = DocumentConversion(username='******', password='******') with open(join(dirname(__file__), '../resources/sample-docx.docx'), 'rb') as document: config = {'conversion_target': DocumentConversion.ANSWER_UNITS} print(json.dumps(document_conversion.convert_document(document=document, config=config), indent=2))
# coding=utf-8 import json from os.path import join, dirname from io import open from watson_developer_cloud import DocumentConversionV1 document_conversion = DocumentConversionV1(username='******', password='******', version='2016-02-09') # Example of retrieving html or plain text with open(join(dirname(__file__), '../resources/example.html'), encoding='utf8') as document: config = {'conversion_target': DocumentConversionV1.NORMALIZED_HTML} print( document_conversion.convert_document(document=document, config=config, media_type='text/html').content) # Example with JSON with open(join(dirname(__file__), '../resources/example.html'), encoding='utf8') as document: config['conversion_target'] = DocumentConversionV1.ANSWER_UNITS print( json.dumps(document_conversion.convert_document(document=document, config=config), indent=2)) # Examples of index_document API print("########## Example of a dry run of index_document with only a document " "##########")
import pprint from os.path import join, dirname import os from watson_developer_cloud import DocumentConversionV1 from watson_developer_cloud import NaturalLanguageUnderstandingV1 from flask import Flask, render_template, request from werkzeug import secure_filename import requests from requests.auth import HTTPBasicAuth from json2html import * app = Flask(__name__) # Doc conversion document_conversion = DocumentConversionV1( username='******', password='******', version='2015-12-15') #Natural language understanding cred = {'user': '******', 'pass': '******'} NLU_url = 'https://gateway.watsonplatform.net/natural-language-understanding/api/v1' @app.route('/', methods=['GET', 'POST']) def serve_page(): if request.method == 'POST': model_id = request.values.get('model_id') resp = requests.delete(NLU_url + '/models/' + model_id, auth=HTTPBasicAuth(cred['user'], cred['pass'])) return render_template('index.htm', model_id="model deleted") else:
''' Created on Jun 2, 2016 @author: Jerome Boyer - IBM ''' import json from os.path import join, dirname from watson_developer_cloud import DocumentConversionV1 document_conversion = DocumentConversionV1( username='******', password='******', version='2016-02-09') def convertBattryDocToAnswerUnit(): with open(('./data/Battery.docx'), 'rb') as document: config = {'conversion_target': DocumentConversionV1.ANSWER_UNITS} return document_conversion.convert_document(document=document, config=config) if __name__ == '__main__': #print(json.dumps(buildAnswerUnits(), indent=2)) print(json.dumps(convertBattryDocToAnswerUnit(), indent=2)) # print(buildNormalizedHtml())
# coding=utf-8 import json from os.path import join, dirname from watson_developer_cloud import DocumentConversionV1 as DocumentConversion document_conversion = DocumentConversion(username='******', password='******', version='2016-02-09') with open(join(dirname(__file__), '../resources/example.html'), 'r') as document: config = {'conversion_target': DocumentConversion.ANSWER_UNITS} print( json.dumps(document_conversion.convert_document( document=document, config=config, media_type='text/html'), indent=2))
# coding=utf-8 import json from os.path import join, dirname from watson_developer_cloud import DocumentConversionV1 as DocumentConversion document_conversion = DocumentConversion(username='******', password='******') # print(json.dumps(document_conversion.get_jobs(), indent=2)) with open(join(dirname(__file__), '../resources/sample-docx.docx'), 'rb') as document: config = {'conversion_target': DocumentConversion.ANSWER_UNITS} print( json.dumps(document_conversion.convert_document(document=document, config=config), indent=2))
# coding=utf-8 import json from os.path import join, dirname from io import open from watson_developer_cloud import DocumentConversionV1 ''' { "url" : "https://gateway.aibril-watson.kr/document-conversion/api", "username" : "61d34a43-4105-42a0-9ec8-01774c3ce1a6", "password" : "ydCaaRag3Um5" } ''' document_conversion = DocumentConversionV1( username='******', password='******', version='2016-02-09') # Example of retrieving html or plain text # with open(join(dirname(__file__), '../resources/thesis/sample.pdf'), # encoding='ascii') as document: # config = {'conversion_target': DocumentConversionV1.NORMALIZED_HTML} # print(document_conversion.convert_document( # document=document, config=config, media_type='text/html').content) # Example with JSON with open(join(dirname(__file__), '../resources/thesis/medical.pdf'), 'rb') as document: config = {'conversion_target': DocumentConversionV1.NORMALIZED_TEXT} config['conversion_target'] = DocumentConversionV1.ANSWER_UNITS print( json.dumps(document_conversion.convert_document(document=document,
# coding=utf-8 import json from os.path import join, dirname from watson_developer_cloud import DocumentConversionV1 as DocumentConversion document_conversion = DocumentConversion(username='******', password='******', version='2016-02-09') with open(join(dirname(__file__), '../resources/example.html'), 'r') as document: config = {'conversion_target': DocumentConversion.ANSWER_UNITS} print(json.dumps(document_conversion.convert_document(document=document, config=config, media_type='text/html'), indent=2))