def pre_prepare(base_path, function, file_filter): print(__name__) # get data dir data_path = cvt_abs_path(os.path.join(base_path, 'data')) collection_ = [] dfs(data_path, file_filter, function, collection_) return collection_
def exist_pkl(base_path): stock_path = cvt_abs_path(os.path.join(base_path, 'stock')) train_pkl_path = os.path.join(stock_path, 'train_collection.pkl') test_pkl_path = os.path.join(stock_path, 'test_collection.pkl') if not os.path.exists(train_pkl_path) or not os.path.exists(test_pkl_path): return False else: return True
def get_pkl(base_path): stock_path = cvt_abs_path(os.path.join(base_path, 'stock')) train_pkl_path = os.path.join(stock_path, 'train_collection.pkl') test_pkl_path = os.path.join(stock_path, 'test_collection.pkl') with open(train_pkl_path, 'rb') as f: train_collection = pickle.load(f) with open(test_pkl_path, 'rb') as f: test_collection = pickle.load(f) return train_collection, test_collection
def save_pkl(base_path, train_collection, test_collection): # make stock dir stock_path = cvt_abs_path(os.path.join(base_path, 'stock')) make_dirs(stock_path) # train/test pkl path train_pkl_path = os.path.join(stock_path, 'train_collection.pkl') test_pkl_path = os.path.join(stock_path, 'test_collection.pkl') print("write paths to pkl:", train_pkl_path, test_pkl_path) with open(train_pkl_path, 'wb') as f: pickle.dump(train_collection, f) with open(test_pkl_path, 'wb') as f: pickle.dump(test_collection, f)
def search_autokeras(args): """ Just train. :param args: Just args. :return: """ pool = Pool(cpu_count() - 2) if exist_pkl(args.base) and not args.force: print_("use the existing pkl file") train_collection, test_collection = get_pkl(args.base) else: print_("prepare data and dump into pkl file") collection = pre_prepare(cvt_abs_path(args.base), data_prepare, file_filter) train_collection, test_collection = split_train_test_set(collection) train_collection = pool.map(multi_prepare_record, train_collection) test_collection = pool.map(multi_prepare_record, test_collection) train_batch = list(zip(pool.map(generate_x_y_, train_collection))) test_batch = list(zip(pool.map(generate_x_y_, test_collection))) print('train size:', len(train_batch)) print('test size:', len(test_batch)) x_train = np.concatenate([e[0][0] for e in train_batch]) y_train = np.concatenate([e[0][1] for e in train_batch]) x_test = np.concatenate([e[0][0] for e in test_batch]) y_test = np.concatenate([e[0][1] for e in test_batch]) clf = ak.ImageClassifier(max_trials=10) clf.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=100) model = clf.export_model() target_folder = '/'.join([args.base, 'stock']) make_dirs(target_folder) target_path = '/'.join([target_folder, 'best.h5']) model.save(target_path)
import argparse from tool.path_parser import cvt_abs_path from tool.others import print_ DATA_FOLDER = cvt_abs_path('../data') def parse_argument(): parser_ = argparse.ArgumentParser() parser_.add_argument("--project", help="project name", type=str, default="TL") parser_.add_argument("--base", help="base path", type=str, default="../") parser_.add_argument("-train", "--train", help="training", action="store_true") parser_.add_argument("--batch", help="training batch_size", type=int, default=32) parser_.add_argument("--epoch", help="training epochs", type=int, default=100) parser_.add_argument("--orderly_sample", help="orderly sample", action="store_true") parser_.add_argument("-autokeras", "--autokeras", help="autokeras", action="store_true") parser_.add_argument("-inference", "--inference", help="inferring", type=str) parser_.add_argument("--not_augment", help="don`t augment when generate", action="store_true") parser_.add_argument("--image", help="an inference image file", type=str) parser_.add_argument("--images", help="an inference folder contain images", type=str, default='') parser_.add_argument("--force", help="force to re-split test/train sets", action="store_true") parser_.add_argument("--parallel", help="be done in parallel", type=int, default=3) args_ = parser_.parse_args() return args_