""" Example demonstrating how to OCR a document and access the text at the document, page, and block (or paragraph) level. """ from indico import IndicoClient, IndicoConfig from indico.queries import DocumentExtraction, JobStatus, RetrieveStorageObject # Get the OCR object my_config = IndicoConfig(host="app.indico.io", api_token_path=".path/to/indico_api_token.txt") client = IndicoClient(config=my_config) files_to_extract = client.call( DocumentExtraction(files=["./test_paragraphs.pdf"], json_config={"preset_config": "standard"})) extracted_file = client.call(JobStatus(id=files_to_extract[0].id, wait=True)) json_result = client.call(RetrieveStorageObject(extracted_file.result)) # The code below shows how to get the OCR text from the 'json_result' object. # Note: it may vary slightly if you use DocumentExtraction configurations other than 'standard' # Full Text full_document_text = json_result["text"] # Doucment Text split by page text_by_page = list() for page in json_result["pages"]: text_by_page.append(page["text"]) # Document Text split by block (or paragraph)
from indico import IndicoClient, IndicoConfig from indico.queries import GraphQLRequest my_config = IndicoConfig(host="app.indico.io", api_token_path="./path/to/indico_api_token.txt") client = IndicoClient(config=my_config) # GraphQL Query to list my datasets qstr = """{ datasets { id name status rowCount numModelGroups modelGroups { id } } }""" response = client.call(GraphQLRequest(query=qstr)) print(response)
from indico import IndicoClient, IndicoConfig from indico.queries import DocumentExtraction, JobStatus, RetrieveStorageObject # Create an Indico API client my_config = IndicoConfig(host="app.indico.io", api_token_path="./path/to/indico_api_token.txt") client = IndicoClient(config=my_config) # OCR a single file and wait for it to complete job = client.call( DocumentExtraction(files=["./path_to_doc.pdf"], json_config=dict(preset_config="ondocument"))) extracted_file = client.call(JobStatus(id=job[0].id, wait=True)) if extracted_file.status == "SUCCESS": result = client.call(RetrieveStorageObject(extracted_file.result)) print(result)
below shows you what to do if the images are on your computer. """ from indico import IndicoClient, IndicoConfig from indico.queries import CreateDataset import pandas as pd # Create an Indico API client my_config = IndicoConfig(host="app.indico.io", api_token_path="./path/to/indico_api_token.txt") client = IndicoClient(config=my_config) # With local images you should create a CSV formatted (here for demonstration) like below # Where one column contains the paths from the csv to where the images are stored on your computer image_dataset = pd.DataFrame() image_dataset["image_files"] = [ "./path/from/csv/to/image.png", "./path/from/csv/to/image2.png", ] image_dataset.to_csv("./image_dataset.csv", index=False) # Use the CSV you created (like above) to create the dataset dataset = client.call( CreateDataset( name="My Image Dataset", files="./image_dataset.csv", from_local_images=True, image_filename_col= "image_files", # specify the column containing the images ))
from indico import IndicoClient, IndicoConfig from indico.queries import GetModelGroup, GetTrainingModelWithProgress # The model group ID can be found on the review page of the indico platform model_group_id = 4305 my_config = IndicoConfig(host="app.indico.io", api_token_path="./path/to/indico_api_token.txt") client = IndicoClient(config=my_config) # Get the model group and training status mg = client.call(GetModelGroup(model_group_id)) training_mg = client.call(GetTrainingModelWithProgress(model_group_id)) print(f"Model Name: {mg.name}") print(f"Training status: {training_mg.status}") print( f"Percent complete: {training_mg.training_progress.percent_complete:.2f}")
""" Getting Image Predictions Image Predictions work slightly differently from generating predictions from text documents in that they require you to first upload the documents. The script below provides a sample. """ from indico import IndicoClient, IndicoConfig from indico.queries import ModelGroupPredict, UploadImages, JobStatus config = IndicoConfig(host="app.indico.io", api_token_path="./indico_api_token.txt") client = IndicoClient(config) # UploadImages returns a list of upload URLs that you can use for gathering predictions urls = client.call( UploadImages(files=["./path/to/image.png", "./path/to/image2.png"])) # Get your Selected Model ID (from the model's Explain page in the app or using the API) job = client.call(ModelGroupPredict(model_id=30970, data=urls)) # Wait for the predictions to finish predictions = client.call(JobStatus(job.id)) # Prediction results are ready print(predictions.result)
JobStatus, ListWorkflows, RetrieveStorageObject, WorkflowSubmission, ) # Use your dataset's id to call it's associated workflow dataset_id = 6826 my_config = IndicoConfig( host="app.indico.io", api_token_path="./path/to/indico_api_token.txt" ) client = IndicoClient(config=my_config) # Return a list of workflows for this dataset id or an empty list if there are none workflows = client.call(ListWorkflows(dataset_ids=[dataset_id])) if workflows: # Send a document through the workflow # Get back one Job per file jobs = client.call( WorkflowSubmission( workflow_id=workflows[0].id, files=["./path/to/sample.pdf"], submission=False, ) ) job = jobs[0] # Retrieve and print your result status = client.call(JobStatus(id=job.id, wait=True))
) # Create an Indico API client my_config = IndicoConfig(host="app.indico.io", api_token_path="./path/to/indico_api_token.txt") client = IndicoClient(config=my_config) workflow_id = 5 """ Example 1 Create a new submission Generate a submission result as soon as the submission is done processing Then mark the submission has having been retrieved """ submission_ids = client.call( WorkflowSubmission(workflow_id=workflow_id, files=["./path_to_doc.pdf"])) submission_id = submission_ids[0] result_url = client.call(SubmissionResult(submission_id, wait=True)) result = client.call(RetrieveStorageObject(result_url.result)) print(result) client.call(UpdateSubmission(submission_id, retrieved=True)) """ Example 2 List all submissions that are COMPLETE or FAILED Generate submission results for these Delay gathering the results until required """ sub_filter = or_(SubmissionFilter(status="COMPLETE"), SubmissionFilter(status="FAILED"))
from datetime import datetime, timedelta from indico import IndicoConfig, IndicoClient from indico.filters import or_, UserMetricsFilter from indico.queries import JobStatus, RetrieveStorageObject from indico.queries.usermetrics import GetUserSummary, GetUserSnapshots, GenerateChangelogReport, GetUserChangelog from indico.types.user_metrics import UserSummary """ Example 1: User Summary """ # Create an Indico API client my_config = IndicoConfig(host="app.indico.io", api_token_path="./path/to/indico_api_token.txt") client = IndicoClient(config=my_config) user_summary: UserSummary = client.call(GetUserSummary()) print("Wow! there's " + str(user_summary.users.enabled) + " users enabled on the app!") print("Did you know there are " + str(len(user_summary.app_roles)) + " roles available here?") """ Example 2: User Snapshots Snapshots are paginated and iterable, so you can continue to iterate over them to build a full set """ snapshots = [] for snapshot in client.paginate(GetUserSnapshots(date=datetime.now())): snapshots.extend(snapshot) print("Fetched " + str(len(snapshots)) + " users for analysis")
from indico import IndicoClient, IndicoConfig from indico.queries import CreateDataset, CreateModelGroup, ModelGroupPredict # Create an Indico API client my_config = IndicoConfig(host="app.indico.io", api_token_path="./path/to/indico_api_token.txt") client = IndicoClient(config=my_config) # create the dataset dataset = client.call( CreateDataset(name="airline_comments", files=["./airline-comments.csv"])) # train the model w/ the relevant csv columns model_group = client.call( CreateModelGroup( name="my_classification_model", dataset_id=dataset.id, source_column_id=dataset.datacolumn_by_name( "text").id, # csv text column labelset_id=dataset.labelset_by_name( "Target_1").id, # csv target class column wait=True, # wait for training to finish )) # predict on the model job = client.call( ModelGroupPredict( model_id=model_group.selected_model.id, data=["Sample Text to predict on", "More Sample text to predict on"], ))