def __init__(self, csv_file, root, transforms=None, test=False): self.test = test img_names, self.labels = read_csv_file(csv_file) print('root is {}'.format(root)) self.img_names = [] for img in img_names: img = os.path.join(root, img) self.img_names.append(img) self.encoder = OneHotEncoder() self.encoder.fit(self.labels) self.labels = self.encoder.transform(self.labels) self.n_classes = self.encoder.n_classes if transforms is None: normalize = torchvision.transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) if self.test: self.transforms = torchvision.transforms.Compose([ torchvision.transforms.Resize(256), torchvision.transforms.CenterCrop(224), torchvision.transforms.ToTensor(), normalize, ]) else: self.transforms = torchvision.transforms.Compose([ torchvision.transforms.Resize(256), torchvision.transforms.RandomResizedCrop(224), torchvision.transforms.RandomHorizontalFlip(), torchvision.transforms.RandomVerticalFlip(), torchvision.transforms.ToTensor(), normalize, ])
def transform_y(self, y_train): # Transform y_train. if self.y_encoder is None: self.y_encoder = OneHotEncoder() self.y_encoder.fit(y_train) y_train = self.y_encoder.transform(y_train) return y_train
def __init__(self, clf, input_shape, y_train): # input_shape: x_train.shape[1:] self.searcher_args = {} clf.y_encoder = OneHotEncoder() clf.y_encoder.fit(y_train) self.searcher_args['n_output_node'] = clf.get_n_output_node() self.searcher_args['input_shape'] = input_shape self.searcher_args['path'] = clf.path self.searcher_args['metric'] = clf.metric self.searcher_args['loss'] = clf.loss self.searcher_args['verbose'] = clf.verbose super().__init__(**self.searcher_args) clf.save_searcher(self) clf.searcher = True if publish: # Not the best solution, but I wont the code to be testable with pipenv shell in the git directory of autokeras! from confluent_kafka import avro from confluent_kafka.avro import AvroProducer value = avro.loads(value_schema) key = avro.loads(key_schema) global producer producer = AvroProducer({ 'bootstrap.servers': os.environ.get("BROKER", "95.158.189.52:9092"), 'schema.registry.url': os.environ.get("SCHEMA_REGISTRY", "http://95.158.189.52:8081"), 'message.max.bytes': 15728640 }, default_key_schema=key, default_value_schema=value) producer.produce(topic="autokeras-queen-1", key={"loss": 0, "accuracy": 0}, value={"model": b"Starting to produce models"}) print("Will publish to kafka")
def fit(self, x_train=None, y_train=None, time_limit=None): """Find the best neural architecture and train it. Based on the given dataset, the function will find the best neural architecture for it. The dataset is in numpy.ndarray format. So they training data should be passed through x_train, y_train. Args: x_train: An numpy.ndarray instance contains the training data. y_train: An numpy.ndarray instance contains the label of the training data. time_limit: The time limit for the search in seconds. """ if y_train is None: y_train = [] if x_train is None: x_train = [] x_train = np.array(x_train) y_train = np.array(y_train).flatten() _validate(x_train, y_train) # Transform y_train. if self.y_encoder is None: self.y_encoder = OneHotEncoder() self.y_encoder.fit(y_train) y_train = self.y_encoder.transform(y_train) # Create the searcher and save on disk if not self.searcher: input_shape = x_train.shape[1:] n_classes = self.y_encoder.n_classes self.searcher_args['n_classes'] = n_classes self.searcher_args['input_shape'] = input_shape self.searcher_args['path'] = self.path self.searcher_args['verbose'] = self.verbose searcher = BayesianSearcher(**self.searcher_args) self.save_searcher(searcher) self.searcher = True # Divide training data into training and testing data. x_train, x_test, y_train, y_test = train_test_split(x_train, y_train, test_size=0.25, random_state=42) pickle.dump(self, open(os.path.join(self.path, 'classifier'), 'wb')) pickle_to_file(self, os.path.join(self.path, 'classifier')) if time_limit is None: time_limit = 24 * 60 * 60 start_time = time.time() while time.time() - start_time <= time_limit: run_searcher_once(x_train, y_train, x_test, y_test, self.path) if len(self.load_searcher().history) >= constant.MAX_MODEL_NUM: break
def __init__(self, y_encoder=OneHotEncoder(), data_transformer=None, verbose=False, path=None): super().__init__(verbose, path) self.graph = None self.generator = None self.resize_shape = None self.y_encoder = y_encoder self.data_transformer = data_transformer
def __init__(self): super().__init__(verbose=False) self.labels = None self.net = None self.augment = None self.Length = 3 self.Width = 4 self.Epochs = 10 self.encoder = OneHotEncoder() self.path = '../temp' self.capacity = 50
def transform_y(self, y): """Transform the parameter y_train using the variable self.y_encoder Args: y: list of labels to convert """ # Transform y. if self.y_encoder is None: self.y_encoder = OneHotEncoder() self.y_encoder.fit(y) y = self.y_encoder.transform(y) return y
def fit(self, x_train, y_train): """Find the best model. Format the input, and split the dataset into training and testing set, save the classifier and find the best model. Args: x_train: An numpy.ndarray instance contains the training data. y_train: An numpy.ndarray instance contains the label of the training data. """ x_train = np.array(x_train) y_train = np.array(y_train).flatten() self._validate(x_train, y_train) # Transform y_train. if self.y_encoder is None: self.y_encoder = OneHotEncoder() self.y_encoder.fit(y_train) y_train = self.y_encoder.transform(y_train) if self.searcher is None: input_shape = x_train.shape[1:] n_classes = self.y_encoder.n_classes self.searcher = self._get_searcher_class()(n_classes, input_shape, self.path, self.verbose) # Divide training data into training and testing data. x_train, x_test, y_train, y_test = train_test_split(x_train, y_train, test_size=0.25, random_state=42) pickle.dump(self, open(os.path.join(self.path, 'classifier'), 'wb')) self.model_id = self.searcher.search(x_train, y_train, x_test, y_test)
from keras.datasets import cifar10 from autokeras.generator import DefaultClassifierGenerator from autokeras.net_transformer import default_transform from autokeras.preprocessor import OneHotEncoder from autokeras.utils import ModelTrainer if __name__ == '__main__': (x_train, y_train), (x_test, y_test) = cifar10.load_data() print('Start Encoding') encoder = OneHotEncoder() encoder.fit(y_train) y_train = encoder.transform(y_train) y_test = encoder.transform(y_test) print('Start Generating') graphs = default_transform( DefaultClassifierGenerator(10, x_train.shape[1:]).generate()) keras_model = graphs[0].produce_model() print('Start Training') ModelTrainer(keras_model, x_train, y_train, x_test, y_test, True).train_model(max_no_improvement_num=100, batch_size=128) print(keras_model.evaluate(x_test, y_test, True))
def fit(self, x_train=None, y_train=None, time_limit=None): """Find the best neural architecture and train it. Based on the given dataset, the function will find the best neural architecture for it. The dataset is in numpy.ndarray format. So they training data should be passed through `x_train`, `y_train`. Args: x_train: A numpy.ndarray instance containing the training data. y_train: A numpy.ndarray instance containing the label of the training data. time_limit: The time limit for the search in seconds. """ start_time = time.time() if y_train is None: y_train = [] if x_train is None: x_train = [] x_train = np.array(x_train) y_train = np.array(y_train).flatten() _validate(x_train, y_train) # Transform y_train. if self.y_encoder is None: self.y_encoder = OneHotEncoder() self.y_encoder.fit(y_train) y_train = self.y_encoder.transform(y_train) # Transform x_train if self.data_transformer is None: self.data_transformer = DataTransformer(x_train, augment=self.augment) # Create the searcher and save on disk if not self.searcher: input_shape = x_train.shape[1:] n_classes = self.y_encoder.n_classes self.searcher_args['n_classes'] = n_classes self.searcher_args['input_shape'] = input_shape self.searcher_args['path'] = self.path self.searcher_args['metric'] = self.metric self.searcher_args['verbose'] = self.verbose searcher = BayesianSearcher(**self.searcher_args) self.save_searcher(searcher) self.searcher = True # Divide training data into training and testing data. x_train, x_test, y_train, y_test = train_test_split( x_train, y_train, test_size=min(Constant.VALIDATION_SET_SIZE, int(len(y_train) * 0.2)), random_state=42) train_data = self.data_transformer.transform_train(x_train, y_train) test_data = self.data_transformer.transform_test(x_test, y_test) pickle.dump(self, open(os.path.join(self.path, 'classifier'), 'wb')) pickle_to_file(self, os.path.join(self.path, 'classifier')) if time_limit is None: time_limit = 24 * 60 * 60 time_elapsed = time.time() - start_time time_remain = time_limit - time_elapsed while time_remain > 0: run_searcher_once(train_data, test_data, self.path, int(time_remain)) if len(self.load_searcher().history) >= Constant.MAX_MODEL_NUM: break time_elapsed = time.time() - start_time time_remain = time_limit - time_elapsed # if no search executed during the time_limit, then raise an error if not len(self.load_searcher().history): raise TimeoutError
def transform_y(y_train): y_encoder = OneHotEncoder() y_encoder.fit(y_train) y_train = y_encoder.transform(y_train) return y_train
import keras from keras.datasets import mnist from keras.models import Sequential from keras.layers import Dense, Dropout, Flatten from keras.layers import Conv2D, MaxPooling2D from keras import backend as K from autokeras.preprocessor import OneHotEncoder from keras.models import load_model (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train = x_train.reshape(x_train.shape+(1,)) x_test = x_test.reshape(x_test.shape+(1,)) y_encode = OneHotEncoder() def transform_y(y_train): y_encoder = OneHotEncoder() y_encoder.fit(y_train) y_train = y_encoder.transform(y_train) return y_train y_test = transform_y(y_test) y_train = transform_y(y_train) model = Sequential() model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=x_train.shape[1:])) model.add(Conv2D(64, kernel_size=(3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(128, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(10, activation='softmax')) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adadelta(), metrics=['accuracy']) model.fit(x_train, y_train, validation_data=(x_test, y_test)) score = model.evaluate(x_test, y_test, verbose=0)
def fit(self, x_train=None, y_train=None, csv_file_path=None, images_path=None): """Find the best model. Format the input, and split the dataset into training and testing set, save the classifier and find the best model. Args: x_train: An numpy.ndarray instance contains the training data. y_train: An numpy.ndarray instance contains the label of the training data. csv_file_path: CVS file path images_path: Path where images exist """ if y_train is None: y_train = [] if x_train is None: x_train = [] if csv_file_path is not None: img_file_name, y_train = read_csv_file(csv_file_path) if images_path is not None: x_train = read_images(img_file_name, images_path) else: raise ValueError('Directory containing images is not provided') x_train = np.array(x_train) y_train = np.array(y_train).flatten() _validate(x_train, y_train) # Transform y_train. if self.y_encoder is None: self.y_encoder = OneHotEncoder() self.y_encoder.fit(y_train) y_train = self.y_encoder.transform(y_train) # Create the searcher and save on disk if not self.searcher: input_shape = x_train.shape[1:] n_classes = self.y_encoder.n_classes searcher = self._get_searcher_class()(n_classes, input_shape, self.path, self.verbose) self.save_searcher(searcher) self.searcher = True # Divide training data into training and testing data. x_train, x_test, y_train, y_test = train_test_split(x_train, y_train, test_size=0.25, random_state=42) pickle.dump(self, open(os.path.join(self.path, 'classifier'), 'wb')) while True: searcher = self.load_searcher() if searcher.model_count >= constant.MAX_MODEL_NUM: break p = multiprocessing.Process(target=run_searcher_once, args=(x_train, y_train, x_test, y_test, self.path)) p.start() p.join()
def fit(self, x_train=None, y_train=None, csv_file_path=None, images_path=None, time_limit=None): """Find the best model. Format the input, and split the dataset into training and testing set, save the classifier and find the best model. Args: time_limit: x_train: An numpy.ndarray instance contains the training data. y_train: An numpy.ndarray instance contains the label of the training data. csv_file_path: CVS file path images_path: Path where images exist """ if y_train is None: y_train = [] if x_train is None: x_train = [] if csv_file_path is not None: img_file_name, y_train = read_csv_file(csv_file_path) if images_path is not None: x_train = read_images(img_file_name, images_path) else: raise ValueError('Directory containing images is not provided') x_train = np.array(x_train) y_train = np.array(y_train).flatten() _validate(x_train, y_train) # Transform y_train. if self.y_encoder is None: self.y_encoder = OneHotEncoder() self.y_encoder.fit(y_train) y_train = self.y_encoder.transform(y_train) # Create the searcher and save on disk if not self.searcher: input_shape = x_train.shape[1:] n_classes = self.y_encoder.n_classes self.searcher_args['n_classes'] = n_classes self.searcher_args['input_shape'] = input_shape self.searcher_args['path'] = self.path self.searcher_args['verbose'] = self.verbose searcher = self._get_searcher_class()(**self.searcher_args) self.save_searcher(searcher) self.searcher = True # Divide training data into training and testing data. x_train, x_test, y_train, y_test = train_test_split(x_train, y_train, test_size=0.25, random_state=42) pickle.dump(self, open(os.path.join(self.path, 'classifier'), 'wb')) pickle_to_file(self, os.path.join(self.path, 'classifier')) if time_limit is None: while True: searcher = self.load_searcher() if searcher.model_count >= constant.MAX_MODEL_NUM: return p = multiprocessing.Process(target=run_searcher_once, args=(x_train, y_train, x_test, y_test, self.path)) p.start() p.join() start_time = time.time() while time.time() - start_time <= time_limit: p = multiprocessing.Process(target=run_searcher_once, args=(x_train, y_train, x_test, y_test, self.path)) p.start() # Kill the process if necessary. while time.time() - start_time <= time_limit: if p.is_alive(): time.sleep(1) else: break else: # If break above the code in this else won't run p.terminate() p.join()
clf = ImageClassifier(verbose=True, searcher_args={'trainer_args': { 'max_iter_num': 5 }}) #clf = ImageClassifier(verbose=True, path='d:/tmp/autokeras/', searcher_args={'trainer_args':{'max_iter_num':5}}) # 3. Fitting # time_limit : 초단위, 시간이 지나면 작동을 자동으로 멈춥니다. clf.fit(x_train, y_train, time_limit=24 * 60 * 60) # 3-1. Load saved model (3번 항목 실행후 3 주석처리 필요) # if you reloaded your saved clf, y_encoder & data_transformer should be defined like following. from autokeras.preprocessor import OneHotEncoder, DataTransformer from autokeras.constant import Constant clf.y_encoder = OneHotEncoder() clf.y_encoder.fit(y_train) clf.data_transformer = DataTransformer(x_train, augment=Constant.DATA_AUGMENTATION) #print(clf.get_best_model_id()) searcher = clf.load_searcher() #print(searcher.history) # 3-2. fitting finally and saving model clf.final_fit(x_train, y_train, x_test, y_test, retrain=False,