""" Example demonstrating how to OCR a document and access the text at the document, page, and block (or paragraph) level. """ from indico import IndicoClient, IndicoConfig from indico.queries import DocumentExtraction, JobStatus, RetrieveStorageObject # Get the OCR object my_config = IndicoConfig(host="app.indico.io", api_token_path=".path/to/indico_api_token.txt") client = IndicoClient(config=my_config) files_to_extract = client.call( DocumentExtraction(files=["./test_paragraphs.pdf"], json_config={"preset_config": "standard"})) extracted_file = client.call(JobStatus(id=files_to_extract[0].id, wait=True)) json_result = client.call(RetrieveStorageObject(extracted_file.result)) # The code below shows how to get the OCR text from the 'json_result' object. # Note: it may vary slightly if you use DocumentExtraction configurations other than 'standard' # Full Text full_document_text = json_result["text"] # Doucment Text split by page text_by_page = list() for page in json_result["pages"]: text_by_page.append(page["text"]) # Document Text split by block (or paragraph)
from indico import IndicoClient, IndicoConfig from indico.queries import GraphQLRequest my_config = IndicoConfig(host="app.indico.io", api_token_path="./path/to/indico_api_token.txt") client = IndicoClient(config=my_config) # GraphQL Query to list my datasets qstr = """{ datasets { id name status rowCount numModelGroups modelGroups { id } } }""" response = client.call(GraphQLRequest(query=qstr)) print(response)
from indico import IndicoClient, IndicoConfig from indico.queries import DocumentExtraction, JobStatus, RetrieveStorageObject # Create an Indico API client my_config = IndicoConfig(host="app.indico.io", api_token_path="./path/to/indico_api_token.txt") client = IndicoClient(config=my_config) # OCR a single file and wait for it to complete job = client.call( DocumentExtraction(files=["./path_to_doc.pdf"], json_config=dict(preset_config="ondocument"))) extracted_file = client.call(JobStatus(id=job[0].id, wait=True)) if extracted_file.status == "SUCCESS": result = client.call(RetrieveStorageObject(extracted_file.result)) print(result)
from indico import IndicoClient, IndicoConfig from indico.queries import GetModelGroup, GetTrainingModelWithProgress # The model group ID can be found on the review page of the indico platform model_group_id = 4305 my_config = IndicoConfig(host="app.indico.io", api_token_path="./path/to/indico_api_token.txt") client = IndicoClient(config=my_config) # Get the model group and training status mg = client.call(GetModelGroup(model_group_id)) training_mg = client.call(GetTrainingModelWithProgress(model_group_id)) print(f"Model Name: {mg.name}") print(f"Training status: {training_mg.status}") print( f"Percent complete: {training_mg.training_progress.percent_complete:.2f}")
import os from indico import IndicoClient, IndicoConfig # Will connect to https://app.indico.io client = IndicoClient() # Environment variables override defaults os.environ["INDICO_HOST"] = "foo.bar.com" # Will connect to https://foo.bar.com client = IndicoClient() # IndicoConfig will override environment variables and defaults my_config = IndicoConfig( host="indico.my-company.com", # Overrides environment variable api_token_path="../path/to/custom_api_token.txt", ) # Will connect to https://indico.my-company.com client = IndicoClient(config=my_config)
""" Getting Image Predictions Image Predictions work slightly differently from generating predictions from text documents in that they require you to first upload the documents. The script below provides a sample. """ from indico import IndicoClient, IndicoConfig from indico.queries import ModelGroupPredict, UploadImages, JobStatus config = IndicoConfig(host="app.indico.io", api_token_path="./indico_api_token.txt") client = IndicoClient(config) # UploadImages returns a list of upload URLs that you can use for gathering predictions urls = client.call( UploadImages(files=["./path/to/image.png", "./path/to/image2.png"])) # Get your Selected Model ID (from the model's Explain page in the app or using the API) job = client.call(ModelGroupPredict(model_id=30970, data=urls)) # Wait for the predictions to finish predictions = client.call(JobStatus(job.id)) # Prediction results are ready print(predictions.result)
""" Creating an Image Dataset from Local PNGs/JPGs There are two ways two create a data of PNGs/JPGs depending on whether the images are stored locally on your computer or at a publicly accessible URL. The code snippet below shows you what to do if the images are on your computer. """ from indico import IndicoClient, IndicoConfig from indico.queries import CreateDataset import pandas as pd # Create an Indico API client my_config = IndicoConfig(host="app.indico.io", api_token_path="./path/to/indico_api_token.txt") client = IndicoClient(config=my_config) # With local images you should create a CSV formatted (here for demonstration) like below # Where one column contains the paths from the csv to where the images are stored on your computer image_dataset = pd.DataFrame() image_dataset["image_files"] = [ "./path/from/csv/to/image.png", "./path/from/csv/to/image2.png", ] image_dataset.to_csv("./image_dataset.csv", index=False) # Use the CSV you created (like above) to create the dataset dataset = client.call( CreateDataset( name="My Image Dataset", files="./image_dataset.csv",
from indico import IndicoClient, IndicoConfig from indico.queries import ( JobStatus, ListWorkflows, RetrieveStorageObject, WorkflowSubmission, ) # Use your dataset's id to call it's associated workflow dataset_id = 6826 my_config = IndicoConfig( host="app.indico.io", api_token_path="./path/to/indico_api_token.txt" ) client = IndicoClient(config=my_config) # Return a list of workflows for this dataset id or an empty list if there are none workflows = client.call(ListWorkflows(dataset_ids=[dataset_id])) if workflows: # Send a document through the workflow # Get back one Job per file jobs = client.call( WorkflowSubmission( workflow_id=workflows[0].id, files=["./path/to/sample.pdf"], submission=False, ) ) job = jobs[0]
from indico import IndicoConfig, IndicoClient import os # edit this with the path to your api token API_TOKEN_PATH = '/home/fitz/Documents/POC/chicken-parm/indico_api_token.txt' INDICO_PROD_URL = 'app.indico.io' indico_prod_config = IndicoConfig( host=INDICO_PROD_URL, api_token_path=API_TOKEN_PATH, ) PROD_CLIENT = IndicoClient(config=indico_prod_config) detailed_pdf_extraction_config = { "preset_config": 'detailed' } # DataFolders DATA_DIR = "/home/fitz/Documents/POC/chicken-parm/data" SNAPSHOT_DIR = os.path.join(DATA_DIR, "snapshots") DEMO_FILE_DIR = os.path.join(DATA_DIR, "demo_files")
GenerateSubmissionResult, JobStatus, ListSubmissions, RetrieveStorageObject, SubmissionResult, SubmitReview, UpdateSubmission, WaitForSubmissions, WorkflowSubmission, WorkflowSubmissionDetailed, ) # Create an Indico API client my_config = IndicoConfig(host="app.indico.io", api_token_path="./path/to/indico_api_token.txt") client = IndicoClient(config=my_config) workflow_id = 5 """ Example 1 Create a new submission Generate a submission result as soon as the submission is done processing Then mark the submission has having been retrieved """ submission_ids = client.call( WorkflowSubmission(workflow_id=workflow_id, files=["./path_to_doc.pdf"])) submission_id = submission_ids[0] result_url = client.call(SubmissionResult(submission_id, wait=True)) result = client.call(RetrieveStorageObject(result_url.result))
from datetime import datetime, timedelta from indico import IndicoConfig, IndicoClient from indico.filters import or_, UserMetricsFilter from indico.queries import JobStatus, RetrieveStorageObject from indico.queries.usermetrics import GetUserSummary, GetUserSnapshots, GenerateChangelogReport, GetUserChangelog from indico.types.user_metrics import UserSummary """ Example 1: User Summary """ # Create an Indico API client my_config = IndicoConfig(host="app.indico.io", api_token_path="./path/to/indico_api_token.txt") client = IndicoClient(config=my_config) user_summary: UserSummary = client.call(GetUserSummary()) print("Wow! there's " + str(user_summary.users.enabled) + " users enabled on the app!") print("Did you know there are " + str(len(user_summary.app_roles)) + " roles available here?") """ Example 2: User Snapshots Snapshots are paginated and iterable, so you can continue to iterate over them to build a full set """ snapshots = [] for snapshot in client.paginate(GetUserSnapshots(date=datetime.now())): snapshots.extend(snapshot) print("Fetched " + str(len(snapshots)) + " users for analysis")
from indico import IndicoClient, IndicoConfig from indico.queries import CreateDataset, CreateModelGroup, ModelGroupPredict # Create an Indico API client my_config = IndicoConfig(host="app.indico.io", api_token_path="./path/to/indico_api_token.txt") client = IndicoClient(config=my_config) # create the dataset dataset = client.call( CreateDataset(name="airline_comments", files=["./airline-comments.csv"])) # train the model w/ the relevant csv columns model_group = client.call( CreateModelGroup( name="my_classification_model", dataset_id=dataset.id, source_column_id=dataset.datacolumn_by_name( "text").id, # csv text column labelset_id=dataset.labelset_by_name( "Target_1").id, # csv target class column wait=True, # wait for training to finish )) # predict on the model job = client.call( ModelGroupPredict( model_id=model_group.selected_model.id, data=["Sample Text to predict on", "More Sample text to predict on"], ))