Ejemplo n.º 1
0
    def __init__(self, csv_file, root, transforms=None, test=False):
        self.test = test
        img_names, self.labels = read_csv_file(csv_file)
        print('root is {}'.format(root))
        self.img_names = []
        for img in img_names:
            img = os.path.join(root, img)
            self.img_names.append(img)

        self.encoder = OneHotEncoder()
        self.encoder.fit(self.labels)
        self.labels = self.encoder.transform(self.labels)
        self.n_classes = self.encoder.n_classes

        if transforms is None:
            normalize = torchvision.transforms.Normalize(
                mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            if self.test:
                self.transforms = torchvision.transforms.Compose([
                    torchvision.transforms.Resize(256),
                    torchvision.transforms.CenterCrop(224),
                    torchvision.transforms.ToTensor(),
                    normalize,
                ])
            else:
                self.transforms = torchvision.transforms.Compose([
                    torchvision.transforms.Resize(256),
                    torchvision.transforms.RandomResizedCrop(224),
                    torchvision.transforms.RandomHorizontalFlip(),
                    torchvision.transforms.RandomVerticalFlip(),
                    torchvision.transforms.ToTensor(),
                    normalize,
                ])
Ejemplo n.º 2
0
 def transform_y(self, y_train):
     # Transform y_train.
     if self.y_encoder is None:
         self.y_encoder = OneHotEncoder()
         self.y_encoder.fit(y_train)
     y_train = self.y_encoder.transform(y_train)
     return y_train
Ejemplo n.º 3
0
    def __init__(self, clf, input_shape, y_train): # input_shape: x_train.shape[1:]
        self.searcher_args = {}
        clf.y_encoder = OneHotEncoder()
        clf.y_encoder.fit(y_train)

        self.searcher_args['n_output_node'] = clf.get_n_output_node()
        self.searcher_args['input_shape'] = input_shape
        self.searcher_args['path'] = clf.path
        self.searcher_args['metric'] = clf.metric
        self.searcher_args['loss'] = clf.loss
        self.searcher_args['verbose'] = clf.verbose
        super().__init__(**self.searcher_args)
        clf.save_searcher(self)
        clf.searcher = True
        if publish:
            # Not the best solution, but I wont the code to be testable with pipenv shell in the git directory of autokeras!
            from confluent_kafka import avro
            from confluent_kafka.avro import AvroProducer
 
            value = avro.loads(value_schema)
            key = avro.loads(key_schema)

            global producer
            producer = AvroProducer({
                'bootstrap.servers': os.environ.get("BROKER", "95.158.189.52:9092"), 
                'schema.registry.url': os.environ.get("SCHEMA_REGISTRY", "http://95.158.189.52:8081"),
                'message.max.bytes': 15728640
            }, default_key_schema=key, default_value_schema=value)

            producer.produce(topic="autokeras-queen-1", key={"loss": 0, "accuracy": 0}, value={"model": b"Starting to produce models"})
            
            print("Will publish to kafka")
Ejemplo n.º 4
0
    def fit(self, x_train=None, y_train=None, time_limit=None):
        """Find the best neural architecture and train it.

        Based on the given dataset, the function will find the best neural architecture for it.
        The dataset is in numpy.ndarray format.
        So they training data should be passed through x_train, y_train.

        Args:
            x_train: An numpy.ndarray instance contains the training data.
            y_train: An numpy.ndarray instance contains the label of the training data.
            time_limit: The time limit for the search in seconds.
        """

        if y_train is None:
            y_train = []
        if x_train is None:
            x_train = []

        x_train = np.array(x_train)
        y_train = np.array(y_train).flatten()

        _validate(x_train, y_train)

        # Transform y_train.
        if self.y_encoder is None:
            self.y_encoder = OneHotEncoder()
            self.y_encoder.fit(y_train)

        y_train = self.y_encoder.transform(y_train)

        # Create the searcher and save on disk
        if not self.searcher:
            input_shape = x_train.shape[1:]
            n_classes = self.y_encoder.n_classes
            self.searcher_args['n_classes'] = n_classes
            self.searcher_args['input_shape'] = input_shape
            self.searcher_args['path'] = self.path
            self.searcher_args['verbose'] = self.verbose
            searcher = BayesianSearcher(**self.searcher_args)
            self.save_searcher(searcher)
            self.searcher = True

        # Divide training data into training and testing data.
        x_train, x_test, y_train, y_test = train_test_split(x_train,
                                                            y_train,
                                                            test_size=0.25,
                                                            random_state=42)

        pickle.dump(self, open(os.path.join(self.path, 'classifier'), 'wb'))
        pickle_to_file(self, os.path.join(self.path, 'classifier'))

        if time_limit is None:
            time_limit = 24 * 60 * 60

        start_time = time.time()
        while time.time() - start_time <= time_limit:
            run_searcher_once(x_train, y_train, x_test, y_test, self.path)
            if len(self.load_searcher().history) >= constant.MAX_MODEL_NUM:
                break
Ejemplo n.º 5
0
 def __init__(self,
              y_encoder=OneHotEncoder(),
              data_transformer=None,
              verbose=False,
              path=None):
     super().__init__(verbose, path)
     self.graph = None
     self.generator = None
     self.resize_shape = None
     self.y_encoder = y_encoder
     self.data_transformer = data_transformer
Ejemplo n.º 6
0
 def __init__(self):
     super().__init__(verbose=False)
     self.labels = None
     self.net = None
     self.augment = None
     self.Length = 3
     self.Width = 4
     self.Epochs = 10
     self.encoder = OneHotEncoder()
     self.path = '../temp'
     self.capacity = 50
Ejemplo n.º 7
0
 def transform_y(self, y):
     """Transform the parameter y_train using the variable self.y_encoder
     
     Args:
         y: list of labels to convert
     """
     # Transform y.
     if self.y_encoder is None:
         self.y_encoder = OneHotEncoder()
         self.y_encoder.fit(y)
     y = self.y_encoder.transform(y)
     return y
Ejemplo n.º 8
0
    def fit(self, x_train, y_train):
        """Find the best model.

        Format the input, and split the dataset into training and testing set,
        save the classifier and find the best model.

        Args:
            x_train: An numpy.ndarray instance contains the training data.
            y_train: An numpy.ndarray instance contains the label of the training data.
        """
        x_train = np.array(x_train)
        y_train = np.array(y_train).flatten()

        self._validate(x_train, y_train)

        # Transform y_train.
        if self.y_encoder is None:
            self.y_encoder = OneHotEncoder()
            self.y_encoder.fit(y_train)

        y_train = self.y_encoder.transform(y_train)

        if self.searcher is None:
            input_shape = x_train.shape[1:]
            n_classes = self.y_encoder.n_classes
            self.searcher = self._get_searcher_class()(n_classes, input_shape,
                                                       self.path, self.verbose)

        # Divide training data into training and testing data.
        x_train, x_test, y_train, y_test = train_test_split(x_train,
                                                            y_train,
                                                            test_size=0.25,
                                                            random_state=42)

        pickle.dump(self, open(os.path.join(self.path, 'classifier'), 'wb'))
        self.model_id = self.searcher.search(x_train, y_train, x_test, y_test)
Ejemplo n.º 9
0
from keras.datasets import cifar10

from autokeras.generator import DefaultClassifierGenerator
from autokeras.net_transformer import default_transform
from autokeras.preprocessor import OneHotEncoder
from autokeras.utils import ModelTrainer

if __name__ == '__main__':
    (x_train, y_train), (x_test, y_test) = cifar10.load_data()

    print('Start Encoding')
    encoder = OneHotEncoder()
    encoder.fit(y_train)

    y_train = encoder.transform(y_train)
    y_test = encoder.transform(y_test)

    print('Start Generating')
    graphs = default_transform(
        DefaultClassifierGenerator(10, x_train.shape[1:]).generate())
    keras_model = graphs[0].produce_model()

    print('Start Training')
    ModelTrainer(keras_model, x_train, y_train, x_test, y_test,
                 True).train_model(max_no_improvement_num=100, batch_size=128)
    print(keras_model.evaluate(x_test, y_test, True))
Ejemplo n.º 10
0
    def fit(self, x_train=None, y_train=None, time_limit=None):
        """Find the best neural architecture and train it.

        Based on the given dataset, the function will find the best neural architecture for it.
        The dataset is in numpy.ndarray format.
        So they training data should be passed through `x_train`, `y_train`.

        Args:
            x_train: A numpy.ndarray instance containing the training data.
            y_train: A numpy.ndarray instance containing the label of the training data.
            time_limit: The time limit for the search in seconds.
        """
        start_time = time.time()
        if y_train is None:
            y_train = []
        if x_train is None:
            x_train = []

        x_train = np.array(x_train)
        y_train = np.array(y_train).flatten()

        _validate(x_train, y_train)

        # Transform y_train.
        if self.y_encoder is None:
            self.y_encoder = OneHotEncoder()
            self.y_encoder.fit(y_train)

        y_train = self.y_encoder.transform(y_train)

        # Transform x_train
        if self.data_transformer is None:
            self.data_transformer = DataTransformer(x_train,
                                                    augment=self.augment)

        # Create the searcher and save on disk
        if not self.searcher:
            input_shape = x_train.shape[1:]
            n_classes = self.y_encoder.n_classes
            self.searcher_args['n_classes'] = n_classes
            self.searcher_args['input_shape'] = input_shape
            self.searcher_args['path'] = self.path
            self.searcher_args['metric'] = self.metric
            self.searcher_args['verbose'] = self.verbose
            searcher = BayesianSearcher(**self.searcher_args)
            self.save_searcher(searcher)
            self.searcher = True

        # Divide training data into training and testing data.
        x_train, x_test, y_train, y_test = train_test_split(
            x_train,
            y_train,
            test_size=min(Constant.VALIDATION_SET_SIZE,
                          int(len(y_train) * 0.2)),
            random_state=42)

        train_data = self.data_transformer.transform_train(x_train, y_train)
        test_data = self.data_transformer.transform_test(x_test, y_test)
        pickle.dump(self, open(os.path.join(self.path, 'classifier'), 'wb'))
        pickle_to_file(self, os.path.join(self.path, 'classifier'))

        if time_limit is None:
            time_limit = 24 * 60 * 60

        time_elapsed = time.time() - start_time
        time_remain = time_limit - time_elapsed
        while time_remain > 0:
            run_searcher_once(train_data, test_data, self.path,
                              int(time_remain))
            if len(self.load_searcher().history) >= Constant.MAX_MODEL_NUM:
                break
            time_elapsed = time.time() - start_time
            time_remain = time_limit - time_elapsed
        # if no search executed during the time_limit, then raise an error
        if not len(self.load_searcher().history):
            raise TimeoutError
Ejemplo n.º 11
0
def transform_y(y_train):
	y_encoder = OneHotEncoder()
	y_encoder.fit(y_train)
	y_train = y_encoder.transform(y_train)
	return y_train
Ejemplo n.º 12
0
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
from autokeras.preprocessor import OneHotEncoder
from keras.models import load_model
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(x_train.shape+(1,))
x_test = x_test.reshape(x_test.shape+(1,))
y_encode = OneHotEncoder()
def transform_y(y_train):
	y_encoder = OneHotEncoder()
	y_encoder.fit(y_train)
	y_train = y_encoder.transform(y_train)
	return y_train
y_test = transform_y(y_test)
y_train = transform_y(y_train)
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=x_train.shape[1:]))
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))
model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adadelta(), metrics=['accuracy'])
model.fit(x_train, y_train, validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
Ejemplo n.º 13
0
    def fit(self,
            x_train=None,
            y_train=None,
            csv_file_path=None,
            images_path=None):
        """Find the best model.

        Format the input, and split the dataset into training and testing set,
        save the classifier and find the best model.

        Args:
            x_train: An numpy.ndarray instance contains the training data.
            y_train: An numpy.ndarray instance contains the label of the training data.
            csv_file_path: CVS file path
            images_path: Path where images exist
        """

        if y_train is None:
            y_train = []
        if x_train is None:
            x_train = []
        if csv_file_path is not None:
            img_file_name, y_train = read_csv_file(csv_file_path)
            if images_path is not None:
                x_train = read_images(img_file_name, images_path)
            else:
                raise ValueError('Directory containing images is not provided')

        x_train = np.array(x_train)
        y_train = np.array(y_train).flatten()

        _validate(x_train, y_train)

        # Transform y_train.
        if self.y_encoder is None:
            self.y_encoder = OneHotEncoder()
            self.y_encoder.fit(y_train)

        y_train = self.y_encoder.transform(y_train)

        # Create the searcher and save on disk
        if not self.searcher:
            input_shape = x_train.shape[1:]
            n_classes = self.y_encoder.n_classes
            searcher = self._get_searcher_class()(n_classes, input_shape,
                                                  self.path, self.verbose)
            self.save_searcher(searcher)
            self.searcher = True

        # Divide training data into training and testing data.
        x_train, x_test, y_train, y_test = train_test_split(x_train,
                                                            y_train,
                                                            test_size=0.25,
                                                            random_state=42)

        pickle.dump(self, open(os.path.join(self.path, 'classifier'), 'wb'))

        while True:
            searcher = self.load_searcher()
            if searcher.model_count >= constant.MAX_MODEL_NUM:
                break
            p = multiprocessing.Process(target=run_searcher_once,
                                        args=(x_train, y_train, x_test, y_test,
                                              self.path))
            p.start()
            p.join()
Ejemplo n.º 14
0
    def fit(self, x_train=None, y_train=None, csv_file_path=None, images_path=None, time_limit=None):
        """Find the best model.

        Format the input, and split the dataset into training and testing set,
        save the classifier and find the best model.

        Args:
            time_limit:
            x_train: An numpy.ndarray instance contains the training data.
            y_train: An numpy.ndarray instance contains the label of the training data.
            csv_file_path: CVS file path
            images_path: Path where images exist
        """

        if y_train is None:
            y_train = []
        if x_train is None:
            x_train = []
        if csv_file_path is not None:
            img_file_name, y_train = read_csv_file(csv_file_path)
            if images_path is not None:
                x_train = read_images(img_file_name, images_path)
            else:
                raise ValueError('Directory containing images is not provided')

        x_train = np.array(x_train)
        y_train = np.array(y_train).flatten()

        _validate(x_train, y_train)

        # Transform y_train.
        if self.y_encoder is None:
            self.y_encoder = OneHotEncoder()
            self.y_encoder.fit(y_train)

        y_train = self.y_encoder.transform(y_train)

        # Create the searcher and save on disk
        if not self.searcher:
            input_shape = x_train.shape[1:]
            n_classes = self.y_encoder.n_classes
            self.searcher_args['n_classes'] = n_classes
            self.searcher_args['input_shape'] = input_shape
            self.searcher_args['path'] = self.path
            self.searcher_args['verbose'] = self.verbose
            searcher = self._get_searcher_class()(**self.searcher_args)
            self.save_searcher(searcher)
            self.searcher = True

        # Divide training data into training and testing data.
        x_train, x_test, y_train, y_test = train_test_split(x_train, y_train, test_size=0.25, random_state=42)

        pickle.dump(self, open(os.path.join(self.path, 'classifier'), 'wb'))
        pickle_to_file(self, os.path.join(self.path, 'classifier'))

        if time_limit is None:
            while True:
                searcher = self.load_searcher()
                if searcher.model_count >= constant.MAX_MODEL_NUM:
                    return
                p = multiprocessing.Process(target=run_searcher_once, args=(x_train, y_train, x_test, y_test, self.path))
                p.start()
                p.join()

        start_time = time.time()
        while time.time() - start_time <= time_limit:
            p = multiprocessing.Process(target=run_searcher_once, args=(x_train, y_train, x_test, y_test, self.path))
            p.start()
            # Kill the process if necessary.
            while time.time() - start_time <= time_limit:
                if p.is_alive():
                    time.sleep(1)
                else:
                    break
            else:
                # If break above the code in this else won't run
                p.terminate()
                p.join()
Ejemplo n.º 15
0
    clf = ImageClassifier(verbose=True,
                          searcher_args={'trainer_args': {
                              'max_iter_num': 5
                          }})
    #clf = ImageClassifier(verbose=True, path='d:/tmp/autokeras/', searcher_args={'trainer_args':{'max_iter_num':5}})

    # 3. Fitting
    # time_limit : 초단위, 시간이 지나면 작동을 자동으로 멈춥니다.
    clf.fit(x_train, y_train, time_limit=24 * 60 * 60)

    # 3-1. Load saved model (3번 항목 실행후 3 주석처리 필요)

    # if you reloaded your saved clf, y_encoder & data_transformer should be defined like following.
    from autokeras.preprocessor import OneHotEncoder, DataTransformer
    from autokeras.constant import Constant
    clf.y_encoder = OneHotEncoder()
    clf.y_encoder.fit(y_train)
    clf.data_transformer = DataTransformer(x_train,
                                           augment=Constant.DATA_AUGMENTATION)

    #print(clf.get_best_model_id())

    searcher = clf.load_searcher()
    #print(searcher.history)

    # 3-2. fitting finally and saving model
    clf.final_fit(x_train,
                  y_train,
                  x_test,
                  y_test,
                  retrain=False,