def create_dataset(project_id, display_name): """Create a dataset.""" # [START automl_vision_object_detection_create_dataset] from google.cloud import automl # TODO(developer): Uncomment and set the following variables # project_id = "YOUR_PROJECT_ID" # display_name = "your_datasets_display_name" client = automl.AutoMlClient() # A resource that represents Google Cloud Platform location. project_location = f"projects/{project_id}/locations/us-central1" metadata = automl.ImageObjectDetectionDatasetMetadata() dataset = automl.Dataset( display_name=display_name, image_object_detection_dataset_metadata=metadata, ) # Create a dataset with the dataset metadata in the region. response = client.create_dataset(parent=project_location, dataset=dataset) created_dataset = response.result() # Display the dataset information print("Dataset name: {}".format(created_dataset.name)) print("Dataset id: {}".format(created_dataset.name.split("/")[-1]))
def create_dataset(project_id, display_name): """Create a dataset.""" # [START automl_language_text_classification_create_dataset] from google.cloud import automl # TODO(developer): Uncomment and set the following variables # project_id = "YOUR_PROJECT_ID" # display_name = "YOUR_DATASET_NAME" client = automl.AutoMlClient() # A resource that represents Google Cloud Platform location. project_location = f"projects/{project_id}/locations/us-central1" # Specify the classification type # Types: # MultiLabel: Multiple labels are allowed for one example. # MultiClass: At most one label is allowed per example. metadata = automl.TextClassificationDatasetMetadata( classification_type=automl.ClassificationType.MULTICLASS) dataset = automl.Dataset( display_name=display_name, text_classification_dataset_metadata=metadata, ) # Create a dataset with the dataset metadata in the region. response = client.create_dataset(parent=project_location, dataset=dataset) created_dataset = response.result() # Display the dataset information print("Dataset name: {}".format(created_dataset.name)) print("Dataset id: {}".format(created_dataset.name.split("/")[-1]))
def create_dataset(project_id, display_name): """Create a dataset.""" # [START automl_translate_create_dataset] from google.cloud import automl # TODO(developer): Uncomment and set the following variables # project_id = "YOUR_PROJECT_ID" # display_name = "YOUR_DATASET_NAME" client = automl.AutoMlClient() # A resource that represents Google Cloud Platform location. project_location = f"projects/{project_id}/locations/us-central1" # For a list of supported languages, see: # https://cloud.google.com/translate/automl/docs/languages dataset_metadata = automl.TranslationDatasetMetadata( source_language_code="en", target_language_code="ja") dataset = automl.Dataset( display_name=display_name, translation_dataset_metadata=dataset_metadata, ) # Create a dataset with the dataset metadata in the region. response = client.create_dataset(parent=project_location, dataset=dataset) created_dataset = response.result() # Display the dataset information print("Dataset name: {}".format(created_dataset.name)) print("Dataset id: {}".format(created_dataset.name.split("/")[-1]))
def create_dataset(project_id, display_name): """Create a dataset.""" # [START automl_language_sentiment_analysis_create_dataset] from google.cloud import automl # TODO(developer): Uncomment and set the following variables # project_id = "YOUR_PROJECT_ID" # display_name = "YOUR_DATASET_NAME" client = automl.AutoMlClient() # A resource that represents Google Cloud Platform location. project_location = f"projects/{project_id}/locations/us-central1" # Each dataset requires a sentiment score with a defined sentiment_max # value, for more information on TextSentimentDatasetMetadata, see: # https://cloud.google.com/natural-language/automl/docs/prepare#sentiment-analysis # https://cloud.google.com/automl/docs/reference/rpc/google.cloud.automl.v1#textsentimentdatasetmetadata metadata = automl.TextSentimentDatasetMetadata( sentiment_max=4) # Possible max sentiment score: 1-10 dataset = automl.Dataset(display_name=display_name, text_sentiment_dataset_metadata=metadata) # Create a dataset with the dataset metadata in the region. response = client.create_dataset(parent=project_location, dataset=dataset) created_dataset = response.result() # Display the dataset information print("Dataset name: {}".format(created_dataset.name)) print("Dataset id: {}".format(created_dataset.name.split("/")[-1]))
def train_text_extraction_model( self, display_name: (str, 'the display name for the dataset and model'), input_paths: (str, 'the paths to csv files describing the input data for a new dataset' ) = '', dataset_id: (str, 'the id of an existing dataset to reuse') = '', train_budget_milli_node_hours: int = 24000) -> Operation: dataset = None if len(dataset_id) == 0: dataset = automl.Dataset( display_name=display_name, text_extraction_dataset_metadata={}, ) metadata = automl.TextExtractionModelMetadata() model = automl.Model(display_name=display_name, dataset_id=dataset_id, text_extraction_model_metadata=metadata) long_running_operation = self.train_automl_model( model=model, dataset=dataset, dataset_id=dataset_id, input_paths=input_paths) return long_running_operation
def dataset_id(): client = automl.AutoMlClient() project_location = f"projects/{PROJECT_ID}/locations/us-central1" display_name = "test_" + datetime.datetime.now().strftime("%Y%m%d%H%M%S") metadata = automl.TextExtractionDatasetMetadata() dataset = automl.Dataset( display_name=display_name, text_extraction_dataset_metadata=metadata ) response = client.create_dataset(parent=project_location, dataset=dataset) dataset_id = response.result().name.split("/")[-1] yield dataset_id
def train_image_classification_model( self, display_name: (str, 'the display name for the dataset and model'), input_paths: (str, 'the paths to csv files describing the input data for a new dataset' ) = '', dataset_id: (str, 'the id of an existing dataset to reuse') = '', classification_type: (automl.ClassificationType, 'MULTICLASS or MULTILABEL' ) = automl.ClassificationType.MULTICLASS, train_budget_milli_node_hours: int = 24000) -> Operation: dataset = None if len(dataset_id) == 0: # Specify the classification type # Types: # MultiLabel: Multiple labels are allowed for one example. # MultiClass: At most one label is allowed per example. # https://cloud.google.com/automl/docs/reference/rpc/google.cloud.automl.v1#classificationtype metadata = automl.ImageClassificationDatasetMetadata( classification_type=classification_type) dataset = automl.Dataset( display_name=display_name, image_classification_dataset_metadata=metadata, ) # Leave model unset to use the default base model provided by Google # train_budget_milli_node_hours: The actual train_cost will be equal or # less than this value. # https://cloud.google.com/automl/docs/reference/rpc/google.cloud.automl.v1#imageclassificationmodelmetadata metadata = automl.ImageClassificationModelMetadata( train_budget_milli_node_hours=train_budget_milli_node_hours) model = automl.Model( display_name=display_name, dataset_id=dataset_id, image_classification_model_metadata=metadata, ) long_running_operation = self.train_automl_model( model=model, dataset=dataset, dataset_id=dataset_id, input_paths=input_paths) return long_running_operation
def train_text_classification_model( self, display_name: (str, 'the display name for the dataset and model'), input_paths: ( str, 'the paths to csv files describing the input data for a new dataset' ) = '', dataset_id: (str, 'the id of an existing dataset to reuse') = '', classification_type: ( automl.ClassificationType, 'MULTICLASS or MULTILABEL') = automl.ClassificationType.MULTICLASS ) -> Operation: dataset = None if len(dataset_id) == 0: # Specify the classification type # Types: # MultiLabel: Multiple labels are allowed for one example. # MultiClass: At most one label is allowed per example. # https://cloud.google.com/automl/docs/reference/rpc/google.cloud.automl.v1#classificationtype metadata = automl.TextClassificationDatasetMetadata( classification_type=classification_type) dataset = automl.Dataset( display_name=display_name, text_classification_dataset_metadata=metadata, ) model = automl.Model(display_name=display_name, dataset_id=dataset_id, text_classification_model_metadata={}) long_running_operation = self.train_automl_model( model=model, dataset=dataset, dataset_id=dataset_id, input_paths=input_paths) return long_running_operation
display_name = "%s_%d_%d_%d"%(data_config['dataset_name'], int(data_config['noise_ratio']*100), data_config['pos_limit'], data_config['neg_limit']) start_time = time.time() client = automl.AutoMlClient() project_location = f"projects/{project_id}/locations/us-central1" metadata = automl.ImageClassificationDatasetMetadata( classification_type=automl.ClassificationType.MULTICLASS ) dataset = automl.Dataset( display_name=display_name, image_classification_dataset_metadata=metadata, ) # Create a dataset with the dataset metadata in the region. response = client.create_dataset(parent=project_location, dataset=dataset) created_dataset = response.result() # Display the dataset information print("Dataset name: {}".format(created_dataset.name)) print("Dataset id: {}".format(created_dataset.name.split("/")[-1])) storage_client = storage.Client() bucket = storage_client.get_bucket(f"{project_id}-vcm") pos_train_images_gc = add_images_to_gc(data['pos_train_images'])