Beispiel #1
0
 def load_data(self, need_shuffle=True):
     raise_if_not_found(self.data_path)
     chunk = pd.read_csv(self.data_path,
                         header=0,
                         delimiter="#",
                         dtype={'target': np.str})
     if need_shuffle:
         chunk = shuffle(chunk)
     return self.__process_chunk(*self.__extract_xy(chunk))
Beispiel #2
0
 def gen_data(self, need_shuffle=True):
     raise_if_not_found(self.data_path)
     reader = pd.read_csv(self.data_path,
                          header=0,
                          delimiter="#",
                          chunksize=self.batch_size)
     for chunk in reader:
         if need_shuffle:
             chunk = shuffle(chunk)
         yield self.__process_chunk(*self.__extract_xy(chunk))
Beispiel #3
0
 def load_d6table(self):
     raise_if_not_found(self.naics_codes_path)
     return pd.read_csv(self.naics_codes_path,
                        header=0,
                        delimiter="#",
                        dtype={"code": np.str})