Esempio n. 1
0
"""
Example demonstrating how to OCR a document and access the text at the document, page, and 
block (or paragraph) level.
"""

from indico import IndicoClient, IndicoConfig
from indico.queries import DocumentExtraction, JobStatus, RetrieveStorageObject

# Get the OCR object
my_config = IndicoConfig(host="app.indico.io",
                         api_token_path=".path/to/indico_api_token.txt")
client = IndicoClient(config=my_config)

files_to_extract = client.call(
    DocumentExtraction(files=["./test_paragraphs.pdf"],
                       json_config={"preset_config": "standard"}))
extracted_file = client.call(JobStatus(id=files_to_extract[0].id, wait=True))
json_result = client.call(RetrieveStorageObject(extracted_file.result))

# The code below shows how to get the OCR text from the 'json_result' object.
# Note: it may vary slightly if you use DocumentExtraction configurations other than 'standard'

# Full Text
full_document_text = json_result["text"]

# Doucment Text split by page
text_by_page = list()
for page in json_result["pages"]:
    text_by_page.append(page["text"])

# Document Text split by block (or paragraph)
from indico import IndicoClient, IndicoConfig
from indico.queries import GraphQLRequest

my_config = IndicoConfig(host="app.indico.io",
                         api_token_path="./path/to/indico_api_token.txt")

client = IndicoClient(config=my_config)

# GraphQL Query to list my datasets
qstr = """{
            datasets {
                id
                name
                status
                rowCount
                numModelGroups
                modelGroups {
                    id
                }
            }
        }"""

response = client.call(GraphQLRequest(query=qstr))
print(response)
Esempio n. 3
0
from indico import IndicoClient, IndicoConfig
from indico.queries import DocumentExtraction, JobStatus, RetrieveStorageObject

# Create an Indico API client
my_config = IndicoConfig(host="app.indico.io",
                         api_token_path="./path/to/indico_api_token.txt")
client = IndicoClient(config=my_config)

# OCR a single file and wait for it to complete
job = client.call(
    DocumentExtraction(files=["./path_to_doc.pdf"],
                       json_config=dict(preset_config="ondocument")))
extracted_file = client.call(JobStatus(id=job[0].id, wait=True))

if extracted_file.status == "SUCCESS":
    result = client.call(RetrieveStorageObject(extracted_file.result))
    print(result)
Esempio n. 4
0
below shows you what to do if the images are on your computer.
"""

from indico import IndicoClient, IndicoConfig
from indico.queries import CreateDataset
import pandas as pd

# Create an Indico API client
my_config = IndicoConfig(host="app.indico.io",
                         api_token_path="./path/to/indico_api_token.txt")
client = IndicoClient(config=my_config)

# With local images you should create a CSV formatted (here for demonstration) like below
# Where one column contains the paths from the csv to where the images are stored on your computer
image_dataset = pd.DataFrame()
image_dataset["image_files"] = [
    "./path/from/csv/to/image.png",
    "./path/from/csv/to/image2.png",
]
image_dataset.to_csv("./image_dataset.csv", index=False)

# Use the CSV you created (like above) to create the dataset
dataset = client.call(
    CreateDataset(
        name="My Image Dataset",
        files="./image_dataset.csv",
        from_local_images=True,
        image_filename_col=
        "image_files",  # specify the column containing the images
    ))
from indico import IndicoClient, IndicoConfig
from indico.queries import GetModelGroup, GetTrainingModelWithProgress

# The model group ID can be found on the review page of the indico platform
model_group_id = 4305

my_config = IndicoConfig(host="app.indico.io",
                         api_token_path="./path/to/indico_api_token.txt")

client = IndicoClient(config=my_config)

# Get the model group and training status
mg = client.call(GetModelGroup(model_group_id))
training_mg = client.call(GetTrainingModelWithProgress(model_group_id))

print(f"Model Name: {mg.name}")
print(f"Training status: {training_mg.status}")
print(
    f"Percent complete: {training_mg.training_progress.percent_complete:.2f}")
"""
Getting Image Predictions

Image Predictions work slightly differently from generating predictions from text documents
in that they require you to first upload the documents. The script below provides a sample. 
"""
from indico import IndicoClient, IndicoConfig
from indico.queries import ModelGroupPredict, UploadImages, JobStatus

config = IndicoConfig(host="app.indico.io",
                      api_token_path="./indico_api_token.txt")
client = IndicoClient(config)

# UploadImages returns a list of upload URLs that you can use for gathering predictions
urls = client.call(
    UploadImages(files=["./path/to/image.png", "./path/to/image2.png"]))

# Get your Selected Model ID (from the model's Explain page in the app or using the API)
job = client.call(ModelGroupPredict(model_id=30970, data=urls))

# Wait for the predictions to finish
predictions = client.call(JobStatus(job.id))

# Prediction results are ready
print(predictions.result)
Esempio n. 7
0
    JobStatus,
    ListWorkflows,
    RetrieveStorageObject,
    WorkflowSubmission,
)

# Use your dataset's id to call it's associated workflow
dataset_id = 6826

my_config = IndicoConfig(
    host="app.indico.io", api_token_path="./path/to/indico_api_token.txt"
)
client = IndicoClient(config=my_config)

# Return a list of workflows for this dataset id or an empty list if there are none
workflows = client.call(ListWorkflows(dataset_ids=[dataset_id]))

if workflows:
    # Send a document through the workflow
    # Get back one Job per file
    jobs = client.call(
        WorkflowSubmission(
            workflow_id=workflows[0].id,
            files=["./path/to/sample.pdf"],
            submission=False,
        )
    )
    job = jobs[0]

    # Retrieve and print your result
    status = client.call(JobStatus(id=job.id, wait=True))
Esempio n. 8
0
)

# Create an Indico API client
my_config = IndicoConfig(host="app.indico.io",
                         api_token_path="./path/to/indico_api_token.txt")
client = IndicoClient(config=my_config)

workflow_id = 5
"""
Example 1
Create a new submission
Generate a submission result as soon as the submission is done processing
Then mark the submission has having been retrieved
"""

submission_ids = client.call(
    WorkflowSubmission(workflow_id=workflow_id, files=["./path_to_doc.pdf"]))
submission_id = submission_ids[0]

result_url = client.call(SubmissionResult(submission_id, wait=True))
result = client.call(RetrieveStorageObject(result_url.result))
print(result)

client.call(UpdateSubmission(submission_id, retrieved=True))
"""
Example 2
List all submissions that are COMPLETE or FAILED
Generate submission results for these
Delay gathering the results until required
"""
sub_filter = or_(SubmissionFilter(status="COMPLETE"),
                 SubmissionFilter(status="FAILED"))
Esempio n. 9
0
from datetime import datetime, timedelta

from indico import IndicoConfig, IndicoClient
from indico.filters import or_, UserMetricsFilter
from indico.queries import JobStatus, RetrieveStorageObject
from indico.queries.usermetrics import GetUserSummary, GetUserSnapshots, GenerateChangelogReport, GetUserChangelog
from indico.types.user_metrics import UserSummary
"""
Example 1: User Summary
"""
# Create an Indico API client
my_config = IndicoConfig(host="app.indico.io",
                         api_token_path="./path/to/indico_api_token.txt")
client = IndicoClient(config=my_config)

user_summary: UserSummary = client.call(GetUserSummary())
print("Wow! there's " + str(user_summary.users.enabled) +
      " users enabled on the app!")
print("Did you know there are " + str(len(user_summary.app_roles)) +
      " roles available here?")
"""

Example 2: User Snapshots
Snapshots are paginated and iterable, 
so you can continue to iterate over them to build a full set
"""
snapshots = []
for snapshot in client.paginate(GetUserSnapshots(date=datetime.now())):
    snapshots.extend(snapshot)

print("Fetched " + str(len(snapshots)) + " users for analysis")
from indico import IndicoClient, IndicoConfig
from indico.queries import CreateDataset, CreateModelGroup, ModelGroupPredict

# Create an Indico API client
my_config = IndicoConfig(host="app.indico.io",
                         api_token_path="./path/to/indico_api_token.txt")
client = IndicoClient(config=my_config)

# create the dataset
dataset = client.call(
    CreateDataset(name="airline_comments", files=["./airline-comments.csv"]))

# train the model w/ the relevant csv columns
model_group = client.call(
    CreateModelGroup(
        name="my_classification_model",
        dataset_id=dataset.id,
        source_column_id=dataset.datacolumn_by_name(
            "text").id,  # csv text column
        labelset_id=dataset.labelset_by_name(
            "Target_1").id,  # csv target class column
        wait=True,  # wait for training to finish
    ))

# predict on the model
job = client.call(
    ModelGroupPredict(
        model_id=model_group.selected_model.id,
        data=["Sample Text to predict on", "More Sample text to predict on"],
    ))