def _new_dataset_for_updating(client): # new dataset airline_csv = str( Path(__file__).parents[1]) + "/data/AirlineComplaints.csv" dataset = client.call( CreateDataset(name=f"AddDataToWorkflow-test-{int(time.time())}", files=[airline_csv])) # new teach task questionnaire = client.call( CreateQuestionaire( name=f"AddDataToWorkflowTeach-test-{int(time.time())}", dataset_id=dataset.id, targets=["positive", "negative"], )) assert questionnaire.num_total_examples > 0 # add data to dataset and process dataset = client.call(AddFiles(dataset_id=dataset.id, files=[airline_csv])) datafile_ids = [f.id for f in dataset.files] dataset = client.call( ProcessCSV(dataset_id=dataset.id, datafile_ids=datafile_ids, wait=True)) assert dataset.status == "COMPLETE" # get workflow wfs = client.call(ListWorkflows(dataset_ids=[dataset.id])) wf = max(wfs, key=lambda w: w.id) return dataset, wf, questionnaire
def too_small_dataset(indico): client = IndicoClient() dataset_filepath = str(Path(__file__).parents[0]) + "/TooSmall.csv" response = client.call( CreateDataset(name=f"TooSmall-test-{int(time.time())}", files=[dataset_filepath])) assert response.status == "COMPLETE" return response
def org_annotate_dataset(indico): client = IndicoClient() dataset_filepath = str( Path(__file__).parents[0]) + "/org-annotate-labeled.csv" response = client.call( CreateDataset(name=f"OrgAnnotate-test-{int(time.time())}", files=[dataset_filepath])) assert response.status == "COMPLETE" return response
def airlines_dataset(indico): client = IndicoClient() dataset_filepath = str( Path(__file__).parents[0]) + "/AirlineComplaints.csv" response = client.call( CreateDataset(name=f"AirlineComplaints-test-{int(time.time())}", files=[dataset_filepath])) assert response.status == "COMPLETE" return response
def cats_dogs_image_dataset(indico): client = IndicoClient() dataset_filepath = str( Path(__file__).parents[0]) + "/dog_vs_cats_small.csv" response = client.call( CreateDataset( name=f"DogsAndCats-test-{int(time.time())}", files=dataset_filepath, from_local_images=True, )) assert response.status == "COMPLETE" return response
below shows you what to do if the images are on your computer. """ from indico import IndicoClient, IndicoConfig from indico.queries import CreateDataset import pandas as pd # Create an Indico API client my_config = IndicoConfig(host="app.indico.io", api_token_path="./path/to/indico_api_token.txt") client = IndicoClient(config=my_config) # With local images you should create a CSV formatted (here for demonstration) like below # Where one column contains the paths from the csv to where the images are stored on your computer image_dataset = pd.DataFrame() image_dataset["image_files"] = [ "./path/from/csv/to/image.png", "./path/from/csv/to/image2.png", ] image_dataset.to_csv("./image_dataset.csv", index=False) # Use the CSV you created (like above) to create the dataset dataset = client.call( CreateDataset( name="My Image Dataset", files="./image_dataset.csv", from_local_images=True, image_filename_col= "image_files", # specify the column containing the images ))
from indico import IndicoClient, IndicoConfig from indico.queries import CreateDataset, CreateModelGroup, ModelGroupPredict # Create an Indico API client my_config = IndicoConfig(host="app.indico.io", api_token_path="./path/to/indico_api_token.txt") client = IndicoClient(config=my_config) # create the dataset dataset = client.call( CreateDataset(name="airline_comments", files=["./airline-comments.csv"])) # train the model w/ the relevant csv columns model_group = client.call( CreateModelGroup( name="my_classification_model", dataset_id=dataset.id, source_column_id=dataset.datacolumn_by_name( "text").id, # csv text column labelset_id=dataset.labelset_by_name( "Target_1").id, # csv target class column wait=True, # wait for training to finish )) # predict on the model job = client.call( ModelGroupPredict( model_id=model_group.selected_model.id, data=["Sample Text to predict on", "More Sample text to predict on"], ))