Esempio n. 1
0
    def get_batch(self):
        #initialize all variables...
        mname = 'get_batch'
        n_img_w = self.img_width
        n_img_h = self.img_heigth

        #x_train = np.zeros(( tot_cnt, n_img_w, n_img_h, 3), dtype='uint8')
        x_img_buf = np.empty((1, n_img_w, n_img_h), dtype='uint8')
        x_train = None
        y_train = None
        y_train_buf = []

        train_cnt = self.batch_size
        if self.channels == 1:
            x_train = np.zeros((train_cnt, n_img_w, n_img_h), dtype='uint8')
        else:
            x_train = np.zeros((train_cnt, n_img_w, n_img_h, self.channels),
                               dtype='uint8')

        y_train = np.zeros((0, 1), dtype='uint8')

        #loop in through dataframe.
        self.log(mname, "[{}] recs for training.".format(train_cnt), level=3)

        temp_df = self.train_df.sample(
            self.batch_size, replace=True)  #, random_state=self.random_seed)

        cnt = 0
        file_missing = 0
        for i, rec in temp_df.iterrows():
            #if cnt >= tot_cnt:
            #  break

            progress_sts = "%6d out of %6d" % (cnt, self.batch_size)
            sys.stdout.write(progress_sts)
            sys.stdout.write(
                "\b" * len(progress_sts))  # return to start of line, after '['
            sys.stdout.flush()

            imgpath = self.img_dir_path + rec.image + self.img_filename_ext
            temp_df.loc[i, 'imgpath'] = imgpath

            if os.path.exists(imgpath):
                myimg1 = myimg.myImg(imageid=str(i),
                                     config=self.myImg_config,
                                     path=imgpath)

                #x_img_buf[ 0, :, :] = myimg1.getImage()
                if self.channels == 1:
                    x_train[train_cnt, :, :] = myimg1.getImage()
                else:
                    x_train[train_cnt, :, :, :] = myimg1.getImage()
                y_train_buf.append(rec.level)

                #x_test = np.vstack( (x_test, x_img_buf))
                #self.log( mname, "[{}] [{}] x_test[{}] x_img_buf[{}]".format(cnt,test_cnt,x_test.shape,x_img_buf.shape), level=2)

                #self.log( mname, "Image file [{}] doesn't exists!!!".format(imgpath), level=2)
            else:
                #self.log( mname, "Image file [{}] doesn't exists!!!".format(imgpath), level=2)
                file_missing += 1

            self.processing_cnt += 1
            cnt += 1

        #create y array as required
        y_train = np.array(y_train_buf, dtype='uint8')
        y_train = np.reshape(y_train, (y_train.size, 1))
        #print final dimensionf or x_train and y_train
        self.log(mname,
                 "x_train [{}] y_train [{}]".format(x_train.shape,
                                                    y_train.shape),
                 level=3)
        #print( mname, "####x_train [{}] y_train [{}] y_buf[{}]".format(x_train.shape,y_train.shape,len(y_train_buf)))

        self.log(mname, "Process dataset [{}]".format(cnt), level=3)
        self.log(mname, "File missing [{}]".format(file_missing), level=3)
        self.log(mname,
                 "Max image width[{}] heigth[{}]".format(
                     self.df['w'].max(), self.df['h'].max()),
                 level=3)
        #print(self.df.head(10))
        #self.df.to_csv( self.train_data_dir + 'u_img_set.csv')

        return (x_train, y_train)
Esempio n. 2
0
    def image_data_generator(self, mode="train"):
        #initialize all variables...
        mname = 'image_data_generator'

        fd = open(self.train_data_dir + mode + '_df.csv', 'r')

        while True:
            n_img_w = self.img_width
            n_img_h = self.img_heigth

            #x_train = np.zeros(( tot_cnt, n_img_w, n_img_h, 3), dtype='uint8')
            x_img_buf = np.empty((1, n_img_w, n_img_h), dtype='uint8')
            x_buf = None
            y_buf = None
            y_labels = []

            img_cnt = self.batch_size
            '''
      if self.channels == 1:
        x_buf = np.zeros((img_cnt, n_img_w, n_img_h), dtype='uint8')
      else:
        x_buf = np.zeros((img_cnt, n_img_w, n_img_h, self.channels), dtype='uint8')
      '''
            x_buf = np.zeros((img_cnt, n_img_w, n_img_h, self.channels),
                             dtype='uint8')

            y_buf = np.zeros((0, 1), dtype='uint8')

            #loop in through dataframe.
            self.log(mname, "[{}] recs for set.".format(img_cnt), level=3)

            cnt = 0
            file_missing = 0
            while cnt < img_cnt:
                #if cnt >= tot_cnt:
                #  break

                line = fd.readline()
                if line == "":
                    fd.seek(0)
                    line = fd.readline()
                line = line.strip().split(',')
                image_id = line[0]
                label = line[1]

                imgpath = self.img_dir_path + image_id + self.img_filename_ext

                progress_sts = "[%.3fG] [%6d] %6d out of %6d [%45s]" % (
                    self.proc.memory_full_info()[1] /
                    (1024**3), self.processing_cnt, cnt, img_cnt, imgpath)
                sys.stdout.write(progress_sts)
                sys.stdout.write(
                    "\b" *
                    len(progress_sts))  # return to start of line, after '['
                sys.stdout.flush()

                if os.path.exists(imgpath):
                    myimg1 = myimg.myImg(imageid=image_id,
                                         config=self.myImg_config,
                                         path=imgpath)
                    myimg1.getGreyScaleImage2(convertFlag=True)
                    self.channels = 1
                    myimg1.padImage(n_img_w, n_img_h)

                    #x_img_buf[ 0, :, :] = myimg1.getImage()
                    if self.channels == 1:
                        x_buf[cnt, :, :, 0] = myimg1.getImage()
                    else:
                        x_buf[cnt, :, :, :] = myimg1.getImage()

                    y_labels.append(label)
                    #x_test = np.vstack( (x_test, x_img_buf))
                    #self.log( mname, "[{}] [{}] x_test[{}] x_img_buf[{}]".format(cnt,test_cnt,x_test.shape,x_img_buf.shape), level=2)

                    #self.log( mname, "Image file [{}] doesn't exists!!!".format(imgpath), level=2)
                    cnt += 1
                    self.processing_cnt += 1
                else:
                    print(
                        mname, "****Image file [{}] doesn't exists!!!".format(
                            imgpath))
                    self.log(
                        mname,
                        "Image file [{}] doesn't exists!!!".format(imgpath),
                        level=2)
                    file_missing += 1

            #create y array as required
            y_buf = np.array(y_labels, dtype='uint8')
            y_buf = np.reshape(y_buf, (y_buf.size, 1))
            #print final dimensionf or x_train and y_train
            #self.log( mname, "x_buf [{}] y_buf [{}]".format(x_buf.shape,y_buf.shape), level=3)
            #print( mname, "####x_test [{}] y_test [{}] y_buf[{}]".format(x_test.shape,y_test.shape,len(y_test_buf)))

            #self.log( mname, "Process dataset [{}]".format(cnt), level=3)
            #self.log( mname, "File missing [{}]".format(file_missing), level=3)
            #self.log( mname, "Max image width[{}] heigth[{}]".format(self.df['w'].max(),self.df['h'].max()), level=3)
            #print(self.df.head(10))

            # Normalize data.
            x_buf = x_buf.astype('float32') / 255

            # Convert class vectors to binary class matrices.
            y_buf = keras.utils.to_categorical(y_buf, self.no_classes)

            yield (x_buf, y_buf)
Esempio n. 3
0
    def load_data_as_greyscale(self):
        mname = "load_data_as_greyscale"

        self.log(mname,
                 "Loading Dataframe from [{}]".format(
                     self.train_label_data_file),
                 level=3)
        self.df = pd.read_csv(self.train_label_data_file)
        self.log(mname,
                 "Loaded [{}] recs".format(self.df['level'].count()),
                 level=3)

        #create & set all myImg Config
        self.myImg_config = cutil.Config(configid="myConfId", cdir=self.cdir)
        self.myImg_config.setDdir(self.train_data_dir)
        self.myImg_config.setOdir(self.img_croped_dir_path)
        self.myImg_config.setIdir(self.img_dir_path)

        self.df['h'] = 0
        self.df['w'] = 0
        self.df['imgpath'] = ""
        self.df['imgexists'] = False

        #initialize all variables...
        n_img_w = self.img_width
        n_img_h = self.img_heigth

        tot_cnt = self.df['level'].count()
        cnt = 0
        file_missing = 0

        #x_train = np.zeros(( tot_cnt, n_img_w, n_img_h, 3), dtype='uint8')
        x_train = np.zeros((0, n_img_w, n_img_h), dtype='uint8')
        x_img_buf = np.empty((1, n_img_w, n_img_h), dtype='uint8')
        y_buf = []
        y_train = np.empty((0, 1), dtype='uint8')

        #loop in through dataframe.
        for i, rec in self.df.iterrows():
            #if cnt >= 50:
            #  break

            progress_sts = "%6d out of %6d" % (cnt, tot_cnt)
            sys.stdout.write("%6d out of %6d" % (cnt, tot_cnt))
            sys.stdout.write(
                "\b" * len(progress_sts))  # return to start of line, after '['
            sys.stdout.flush()

            imgpath = self.img_dir_path + rec.image + self.img_filename_ext
            self.df.loc[i, 'imgpath'] = imgpath

            #skip already processed data
            if os.path.exists(self.img_croped_dir_path + rec.image +
                              self.img_filename_ext):
                cnt += 1
                continue

            if os.path.exists(imgpath):
                myimg1 = myimg.myImg(imageid=str(i),
                                     config=self.myImg_config,
                                     path=imgpath)

                myimg1.getGreyScaleImage(convertFlag=True)
                myimg1.padImage(n_img_w, n_img_h)

                #x_img_buf[ 0, :, :] = myimg1.getImage()

                myimg1.saveImage(img_type_ext='.jpeg', gen_new_filename=True)

                #self.log( mname, "Croped Image [{}] [{}] [{}] [{}]".format(myimg1.getImage().shape,croped_img_arr.shape,x_train.shape,x_img_buf.shape), level=4)

                #x_train = np.vstack( (x_train, x_img_buf))
                #x_train[cnt,:,:,:] = croped_img_arr
                y_buf.append(rec.level)

                self.df.loc[i, 'imgexists'] = True
                self.df.loc[i, 'w'], self.df.loc[i, 'h'] = myimg1.getImageDim()
                #self.df.loc[i,'_w'], self.df.loc[i,'_h'] = croped_img_arr.shape[0],croped_img_arr.shape[1]
                #self.log( mname, "Image file [{}] doesn't exists!!!".format(imgpath), level=2)
            else:
                #self.log( mname, "Image file [{}] doesn't exists!!!".format(imgpath), level=2)
                file_missing += 1

            cnt += 1

        #create y array as required
        y_train = np.array(y_buf, dtype='uint8')
        y_train = np.reshape(y_train, (y_train.size, 1))
        #print final dimensionf or x_train and y_train
        self.log(mname,
                 "x_train [{}] y_train [{}]".format(x_train.shape,
                                                    y_train.shape),
                 level=3)

        self.log(mname, "Process dataset [{}]".format(cnt), level=3)
        self.log(mname, "File missing [{}]".format(file_missing), level=3)
        self.log(mname,
                 "Max image width[{}] heigth[{}]".format(
                     self.df['w'].max(), self.df['h'].max()),
                 level=3)
        #print(self.df.head(10))
        self.df.to_csv(self.train_data_dir + 'u_img_set.csv')
Esempio n. 4
0
    def load_img_data(self):
        mname = "load_greyscale_data"

        self.log(mname,
                 "Loading Dataframe from [{}]".format(
                     self.train_label_data_file),
                 level=3)
        self.df = pd.read_csv(self.train_label_data_file)
        self.log(mname,
                 "Loaded [{}] recs".format(self.df['level'].count()),
                 level=3)

        #create & set all myImg Config
        self.myImg_config = cutil.Config(configid="myConfId", cdir=self.cdir)
        self.myImg_config.setDdir(self.train_data_dir)
        self.myImg_config.setOdir(self.img_croped_dir_path)
        self.myImg_config.setIdir(self.img_dir_path)

        self.df['h'] = 0
        self.df['w'] = 0
        self.df['imgpath'] = ""
        self.df['imgexists'] = False

        #initialize all variables...
        n_img_w = self.img_width
        n_img_h = self.img_heigth

        tot_cnt = self.img_processing_capacity
        if tot_cnt == 0:
            tot_cnt = self.df['level'].count()
        cnt = 0
        file_missing = 0

        #generate dataset for handling train : test
        np.random.seed(self.random_seed)
        train_dataset_sample = np.random.choice(
            range(0, tot_cnt),
            int(tot_cnt * self.training_dataset_ratio),
            replace=False)
        train_dataset_indicies = dict(
            zip(train_dataset_sample, train_dataset_sample))

        #x_train = np.zeros(( tot_cnt, n_img_w, n_img_h, 3), dtype='uint8')
        x_img_buf = np.empty((1, n_img_w, n_img_h), dtype='uint8')
        x_train = None
        y_train = None
        x_test = None
        y_test = None
        y_train_buf = []
        y_test_buf = []

        if self.channels == 1:
            x_train = np.zeros((len(train_dataset_sample), n_img_w, n_img_h),
                               dtype='uint8')
            x_test = np.zeros(
                ((tot_cnt - len(train_dataset_sample)), n_img_w, n_img_h),
                dtype='uint8')
        else:
            x_train = np.zeros(
                (len(train_dataset_sample), n_img_w, n_img_h, self.channels),
                dtype='uint8')
            x_test = np.zeros(((tot_cnt - len(train_dataset_sample)), n_img_w,
                               n_img_h, self.channels),
                              dtype='uint8')

        y_train = np.zeros((0, 1), dtype='uint8')
        y_test = np.zeros((0, 1), dtype='uint8')

        #loop in through dataframe.
        train_cnt = 0
        test_cnt = 0
        train_samples_cnt = len(train_dataset_sample)
        test_samples_cnt = tot_cnt - len(train_dataset_sample)
        self.log(mname,
                 "[{}] recs for training.".format(train_samples_cnt),
                 level=3)
        self.log(mname,
                 "[{}] recs for test.".format(test_samples_cnt),
                 level=3)

        for i, rec in self.df.iterrows():
            if cnt >= tot_cnt:
                break

            progress_sts = "%6d out of %6d" % (cnt, tot_cnt)
            sys.stdout.write(progress_sts)
            sys.stdout.write(
                "\b" * len(progress_sts))  # return to start of line, after '['
            sys.stdout.flush()

            imgpath = self.img_dir_path + rec.image + self.img_filename_ext
            self.df.loc[i, 'imgpath'] = imgpath

            if os.path.exists(imgpath):
                myimg1 = myimg.myImg(imageid=str(i),
                                     config=self.myImg_config,
                                     path=imgpath)

                #x_img_buf[ 0, :, :] = myimg1.getImage()
                if train_dataset_indicies.get(cnt, False):
                    #x_train = np.vstack( (x_train, x_img_buf))
                    if train_cnt < train_samples_cnt:
                        if self.channels == 1:
                            x_train[train_cnt, :, :] = myimg1.getImage()
                        else:
                            x_train[train_cnt, :, :, :] = myimg1.getImage()
                    y_train_buf.append(rec.level)
                    train_cnt += 1
                else:
                    #x_test = np.vstack( (x_test, x_img_buf))
                    #self.log( mname, "[{}] [{}] x_test[{}] x_img_buf[{}]".format(cnt,test_cnt,x_test.shape,x_img_buf.shape), level=2)
                    if test_cnt < test_samples_cnt:
                        if self.channels == 1:
                            x_test[test_cnt, :, :] = myimg1.getImage()
                        else:
                            x_test[test_cnt, :, :, :] = myimg1.getImage()
                    y_test_buf.append(rec.level)
                    test_cnt += 1

                #self.log( mname, "Image file [{}] doesn't exists!!!".format(imgpath), level=2)
            else:
                #self.log( mname, "Image file [{}] doesn't exists!!!".format(imgpath), level=2)
                file_missing += 1

            cnt += 1

        #create y array as required
        y_train = np.array(y_train_buf, dtype='uint8')
        y_train = np.reshape(y_train, (y_train.size, 1))
        y_test = np.array(y_test_buf, dtype='uint8')
        y_test = np.reshape(y_test, (y_test.size, 1))
        #print final dimensionf or x_train and y_train
        self.log(mname,
                 "x_train [{}] y_train [{}]".format(x_train.shape,
                                                    y_train.shape),
                 level=3)
        self.log(mname,
                 "x_test [{}] y_test [{}]".format(x_test.shape, y_test.shape),
                 level=3)

        self.log(mname, "Process dataset [{}]".format(cnt), level=3)
        self.log(mname, "File missing [{}]".format(file_missing), level=3)
        self.log(mname,
                 "Max image width[{}] heigth[{}]".format(
                     self.df['w'].max(), self.df['h'].max()),
                 level=3)
        #print(self.df.head(10))
        #self.df.to_csv( self.train_data_dir + 'u_img_set.csv')

        return (x_train, y_train), (x_test, y_test)
Esempio n. 5
0
    def load_train_data(self):
        mname = "load_train_data"

        self.log(mname,
                 "Loading Dataframe from [{}]".format(
                     self.train_label_data_file),
                 level=3)
        self.df = pd.read_csv(self.train_label_data_file)

        #create & set all myImg Config
        self.myImg_config = cutil.Config(configid="myConfId", cdir=self.cdir)
        self.myImg_config.setDdir(self.train_data_dir)
        self.myImg_config.setOdir(self.img_croped_dir_path)
        self.myImg_config.setIdir(self.img_dir_path)

        self.df['h'] = 0
        self.df['w'] = 0
        self.df['imgpath'] = ""
        self.df['imgexists'] = False

        #initialize all variables...
        n_img_w = self.img_width
        n_img_h = self.img_heigth

        tot_cnt = self.df['level'].count()
        cnt = 0
        file_missing = 0

        #x_train = np.zeros(( tot_cnt, n_img_w, n_img_h, 3), dtype='uint8')
        x_train = np.zeros((0, n_img_w, n_img_h, 3), dtype='uint8')
        x_img_buf = np.empty((1, n_img_w, n_img_h, 3), dtype='uint8')
        y_buf = []
        y_train = np.empty((0, 1), dtype='uint8')

        #loop in through dataframe.
        for i, rec in self.df.iterrows():
            #if cnt > 50:
            #  break

            progress_sts = "%6d out of %6d" % (cnt, tot_cnt)
            sys.stdout.write("%6d out of %6d" % (cnt, tot_cnt))
            sys.stdout.write(
                "\b" * len(progress_sts))  # return to start of line, after '['
            sys.stdout.flush()

            imgpath = self.img_dir_path + rec.image + self.img_filename_ext
            self.df.loc[i, 'imgpath'] = imgpath

            #skip already processed data
            if os.path.exists(self.img_croped_dir_path + rec.image +
                              self.img_filename_ext):
                cnt += 1
                continue

            if os.path.exists(imgpath):
                myimg1 = myimg.myImg(imageid=str(i),
                                     config=self.myImg_config,
                                     path=imgpath)

                i_w, i_h = myimg1.getImageDim()
                croped_img_arr = np.zeros((n_img_w, n_img_h, 3), dtype='uint8')
                calc_img_w_offset = int((n_img_w - i_w) / 2)
                calc_img_h_offset = int((n_img_h - i_h) / 2)
                croped_img_arr[calc_img_w_offset:(calc_img_w_offset + i_w),
                               calc_img_h_offset:(calc_img_h_offset +
                                                  i_h), :] = myimg1.getImage()
                ''' 
        croped_img = tf.image.resize_image_with_crop_or_pad( myimg1.getImage(), n_img_w, n_img_h)
        init = tf.global_variables_initializer()
        croped_img_arr = 0
        with tf.Session() as sess:
          sess.run(init)
          croped_img_arr = sess.run(croped_img)
          print(v.shape,type(v))  # will show you your variable.
          v = np.reshape( v, ( n_img_w, n_img_h, 3))
          print(v.shape,type(v))  # will show you your variable.
        '''

                x_img_buf[0, :, :, :] = croped_img_arr

                #'''#use below block of code to debug croped image with original.
                #myimg1.showImage()
                #myimg1.saveImage(img_type_ext='.jpeg',gen_new_filename=False)
                myimg2 = myimg.myImg(imageid=str(i),
                                     config=self.myImg_config,
                                     path=rec.image + self.img_filename_ext,
                                     img=croped_img_arr)
                myimg2.saveImage(img_type_ext='.jpeg', gen_new_filename=True)
                #myimg2.saveImage()
                #'''

                #self.log( mname, "Croped Image [{}] [{}] [{}] [{}]".format(myimg1.getImage().shape,croped_img_arr.shape,x_train.shape,x_img_buf.shape), level=4)

                #x_train = np.vstack( (x_train, x_img_buf))
                #x_train[cnt,:,:,:] = croped_img_arr
                y_buf.append(rec.level)

                self.df.loc[i, 'imgexists'] = True
                self.df.loc[i, 'w'], self.df.loc[i, 'h'] = myimg1.getImageDim()
                self.df.loc[i, '_w'], self.df.loc[
                    i, '_h'] = croped_img_arr.shape[0], croped_img_arr.shape[1]
                #self.log( mname, "Image file [{}] doesn't exists!!!".format(imgpath), level=2)
            else:
                #self.log( mname, "Image file [{}] doesn't exists!!!".format(imgpath), level=2)
                file_missing += 1

            cnt += 1

        #create y array as required
        y_train = np.array(y_buf, dtype='uint8')
        y_train = np.reshape(y_train, (y_train.size, 1))
        #print final dimensionf or x_train and y_train
        self.log(mname,
                 "x_train [{}] y_train [{}]".format(x_train.shape,
                                                    y_train.shape),
                 level=3)

        self.log(mname, "Process dataset [{}]".format(cnt), level=3)
        self.log(mname, "File missing [{}]".format(file_missing), level=3)
        self.log(mname,
                 "Max image width[{}] heigth[{}]".format(
                     self.df['w'].max(), self.df['h'].max()),
                 level=3)
        #print(self.df.head(10))
        self.df.to_csv(self.train_data_dir + 'u_img_set.csv')