コード例 #1
0
def _new_dataset_for_updating(client):
    # new dataset
    airline_csv = str(
        Path(__file__).parents[1]) + "/data/AirlineComplaints.csv"
    dataset = client.call(
        CreateDataset(name=f"AddDataToWorkflow-test-{int(time.time())}",
                      files=[airline_csv]))

    # new teach task
    questionnaire = client.call(
        CreateQuestionaire(
            name=f"AddDataToWorkflowTeach-test-{int(time.time())}",
            dataset_id=dataset.id,
            targets=["positive", "negative"],
        ))

    assert questionnaire.num_total_examples > 0

    # add data to dataset and process
    dataset = client.call(AddFiles(dataset_id=dataset.id, files=[airline_csv]))
    datafile_ids = [f.id for f in dataset.files]
    dataset = client.call(
        ProcessCSV(dataset_id=dataset.id, datafile_ids=datafile_ids,
                   wait=True))

    assert dataset.status == "COMPLETE"

    # get workflow
    wfs = client.call(ListWorkflows(dataset_ids=[dataset.id]))
    wf = max(wfs, key=lambda w: w.id)
    return dataset, wf, questionnaire
コード例 #2
0
def too_small_dataset(indico):
    client = IndicoClient()
    dataset_filepath = str(Path(__file__).parents[0]) + "/TooSmall.csv"

    response = client.call(
        CreateDataset(name=f"TooSmall-test-{int(time.time())}",
                      files=[dataset_filepath]))
    assert response.status == "COMPLETE"
    return response
コード例 #3
0
def org_annotate_dataset(indico):
    client = IndicoClient()
    dataset_filepath = str(
        Path(__file__).parents[0]) + "/org-annotate-labeled.csv"

    response = client.call(
        CreateDataset(name=f"OrgAnnotate-test-{int(time.time())}",
                      files=[dataset_filepath]))
    assert response.status == "COMPLETE"
    return response
コード例 #4
0
def airlines_dataset(indico):
    client = IndicoClient()
    dataset_filepath = str(
        Path(__file__).parents[0]) + "/AirlineComplaints.csv"

    response = client.call(
        CreateDataset(name=f"AirlineComplaints-test-{int(time.time())}",
                      files=[dataset_filepath]))
    assert response.status == "COMPLETE"
    return response
コード例 #5
0
def cats_dogs_image_dataset(indico):
    client = IndicoClient()
    dataset_filepath = str(
        Path(__file__).parents[0]) + "/dog_vs_cats_small.csv"

    response = client.call(
        CreateDataset(
            name=f"DogsAndCats-test-{int(time.time())}",
            files=dataset_filepath,
            from_local_images=True,
        ))
    assert response.status == "COMPLETE"
    return response
コード例 #6
0
below shows you what to do if the images are on your computer.
"""

from indico import IndicoClient, IndicoConfig
from indico.queries import CreateDataset
import pandas as pd

# Create an Indico API client
my_config = IndicoConfig(host="app.indico.io",
                         api_token_path="./path/to/indico_api_token.txt")
client = IndicoClient(config=my_config)

# With local images you should create a CSV formatted (here for demonstration) like below
# Where one column contains the paths from the csv to where the images are stored on your computer
image_dataset = pd.DataFrame()
image_dataset["image_files"] = [
    "./path/from/csv/to/image.png",
    "./path/from/csv/to/image2.png",
]
image_dataset.to_csv("./image_dataset.csv", index=False)

# Use the CSV you created (like above) to create the dataset
dataset = client.call(
    CreateDataset(
        name="My Image Dataset",
        files="./image_dataset.csv",
        from_local_images=True,
        image_filename_col=
        "image_files",  # specify the column containing the images
    ))
from indico import IndicoClient, IndicoConfig
from indico.queries import CreateDataset, CreateModelGroup, ModelGroupPredict

# Create an Indico API client
my_config = IndicoConfig(host="app.indico.io",
                         api_token_path="./path/to/indico_api_token.txt")
client = IndicoClient(config=my_config)

# create the dataset
dataset = client.call(
    CreateDataset(name="airline_comments", files=["./airline-comments.csv"]))

# train the model w/ the relevant csv columns
model_group = client.call(
    CreateModelGroup(
        name="my_classification_model",
        dataset_id=dataset.id,
        source_column_id=dataset.datacolumn_by_name(
            "text").id,  # csv text column
        labelset_id=dataset.labelset_by_name(
            "Target_1").id,  # csv target class column
        wait=True,  # wait for training to finish
    ))

# predict on the model
job = client.call(
    ModelGroupPredict(
        model_id=model_group.selected_model.id,
        data=["Sample Text to predict on", "More Sample text to predict on"],
    ))