def main(args): np.random.seed(args.seed) # CPU only instead of GPU if args.cpu_only: logging.info('Setting env for CPU-only mode...') os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152 os.environ["CUDA_VISIBLE_DEVICES"] = '-1' # Load and process data # Provide objective to load recall_0 = Utils.class_recall(0) recall_1 = Utils.class_recall(1) custom_obj = {'metr': recall_0} logging.info('Loading model...') ## pkl logging.info(' Loading mstd...') F = os.path.join(args.model_path, args.mstd_name) if not os.path.exists(F): msg = 'Model file not available at data-path: {}' raise IOError(msg.format(F)) with open(F, 'rb') as mstd: mean_tr, std_tr = pickle.load(mstd) ## h5 logging.info(' Loading h5...') F = os.path.join(args.model_path, args.model_name) if not os.path.exists(F): msg = 'Model file not available at data-path: {}' raise IOError(msg.format(F)) model = load_model(F, custom_objects=custom_obj) # outdir if not os.path.exists(args.save_path): os.makedirs(args.save_path) logging.info('Loading features...') x, y, i2n = Utils.load_features_nogt(args.feature_file_table, force_overwrite=args.force_overwrite, pickle_only=args.pickle_only, n_procs=args.n_procs) logging.info('Loaded {} contigs'.format(len(set(i2n.values())))) n2i = Utils.reverse_dict(i2n) x = [xi for xmeta in x for xi in xmeta] y = np.concatenate(y) logging.info('Running model generator...') dataGen = Models.Generator(x, y, batch_size=64, shuffle=False, norm_raw=0, mean_tr=mean_tr, std_tr=std_tr) logging.info('Computing predictions...') scores = Utils.compute_predictions(n2i, dataGen, model, args.save_path, args.save_name)
def __init__(self, config): self.max_len = config.max_len self.filters = config.filters self.n_conv = config.n_conv self.n_features = config.n_features self.pool_window = config.pool_window self.dropout = config.dropout self.lr_init = config.lr_init self.n_fc = config.n_fc self.n_hid = config.n_hid self.net = Sequential() self.net.add( Conv2D(self.filters, kernel_size=(2, self.n_features), input_shape=(self.max_len, self.n_features, 1), activation='relu', padding='valid')) self.net.add(BatchNormalization(axis=-1)) for i in range(1, self.n_conv): self.net.add( Conv2D(2**i * self.filters, kernel_size=(2, 1), strides=2, input_shape=(self.max_len, 1, 2**(i - 1) * self.filters), activation='relu')) self.net.add(BatchNormalization(axis=-1)) self.net.add(AveragePooling2D((self.pool_window, 1))) self.net.add(Flatten()) optimizer = keras.optimizers.adam(lr=self.lr_init) # binary classification for _ in range(self.n_fc - 1): self.net.add(Dense(self.n_hid, activation='relu')) self.net.add(Dropout(rate=self.dropout)) self.net.add(Dense(1, activation='sigmoid')) self.net.add(Dropout(rate=self.dropout)) recall_0 = Utils.class_recall(0) recall_1 = Utils.class_recall(1) self.net.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=[recall_0, recall_1]) self.reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=0.01 * self.lr_init)
def main(args): """Main interface """ np.random.seed(12) save_plot = args.save_plot if save_plot is None: save_plot = args.save_path # Load and process data # Provide objective to load recall_0 = Utils.class_recall(0) recall_1 = Utils.class_recall(1) custom_obj = {'metr': recall_0} path_to_models = os.listdir(args.save_path) auc = [] for model_path in path_to_models: if not os.path.exists( (os.path.join(args.save_path, model_path, 'final_model.h5'))): continue if not os.path.exists( os.path.join(args.save_path, model_path, 'predictions')): os.makedirs(os.path.join(args.save_path, model_path, 'predictions')) if not os.path.exists( os.path.join(args.save_path, model_path, 'predictions', args.data_path.split('/')[-1])): os.makedirs( os.path.join(args.save_path, model_path, 'predictions', args.data_path.split('/')[-1])) F = os.path.join(args.save_path, model_path, 'mean_std_final_model.pkl') with open(F, 'rb') as mstd: mean_tr, std_tr = pickle.load(mstd) model = load_model(os.path.join(args.save_path, model_path, 'final_model.h5'), custom_objects=custom_obj) tech = args.technology logging.info('Loading data...') if args.is_synthetic == 1: x, y, i2n = Utils.load_features(args.data_path, max_len=args.max_len, mode=args.mode, technology=tech) else: x, y, i2n = Utils.load_features_nogt(args.data_path, max_len=args.max_len, mode=args.mode) logging.info('Loaded {} contigs...'.format(len(set(i2n.values())))) n2i = Utils.reverse_dict(i2n) x = [xi for xmeta in x for xi in xmeta] y = np.concatenate(y) dataGen = Models.Generator(x, y, args.max_len, batch_size=64, shuffle=False, norm_raw=bool(args.norm_raw), mean_tr=mean_tr, std_tr=std_tr) loggin.info('Computing predictions for {}...'.format(tech)) scores = compute_predictions(y, n2i) outfile = os.path.join(args.save_path, model_path, 'predictions', args.data_path.split('/')[-1], tech + '.pkl') with open(outfile, 'wb') as spred: pickle.dump(scores, spred) logging.info('File written: {}'.format(outfile))
def main(args): """Main interface """ # init np.random.seed(args.seed) ## where to save the plot save_plot = args.save_plot if save_plot is None: save_plot = args.save_path # Load and process data # Provide objective to load logging.info('Loading data...') recall_0 = Utils.class_recall(0) recall_1 = Utils.class_recall(1) custom_obj = {'metr': recall_0} h5_file = os.path.join(args.model_path, args.model_name) if not os.path.exists(h5_file): msg = 'Cannot find {} file in {}' raise IOError(msg.format(args.model_name, args.model_path)) logging.info('Loading model: {}'.format(h5_file)) model = load_model(h5_file, custom_objects=custom_obj) # model pkl pkl_file = os.path.join(args.model_path, args.mstd_name) logging.info('Loading file: {}'.format(pkl_file)) with open(pkl_file, 'rb') as mstd: mean_tr, std_tr = pickle.load(mstd) # loading features if args.is_synthetic == 1: logging.info('Loading synthetic features') x, y, i2n = Utils.load_features(args.feature_file_table, max_len=args.max_len, technology=args.technology, force_overwrite=args.force_overwrite, n_procs=args.n_procs) else: logging.info('Loading non-synthetic features') x, y, i2n = Utils.load_features_nogt( args.feature_file_table, max_len=args.max_len, force_overwrite=args.force_overwrite, n_procs=args.n_procs) logging.info('Loaded {} contigs'.format(len(set(i2n.values())))) n2i = Utils.reverse_dict(i2n) x = [xi for xmeta in x for xi in xmeta] y = np.concatenate(y) logging.info('Running model generator...') dataGen = Models.Generator(x, y, args.max_len, batch_size=64, shuffle=False, norm_raw=bool(args.norm_raw), mean_tr=mean_tr, std_tr=std_tr) logging.info('Computing predictions for {}...'.format(args.technology)) scores = Utils.compute_predictions_y_known(y, n2i, model, dataGen) outfile = os.path.join( args.save_path, '_'.join([args.save_name, args.technology + '.pkl'])) with open(outfile, 'wb') as spred: pickle.dump(scores, spred) logging.info('File written: {}'.format(outfile))
def __init__(self, config): self.max_len = config.max_len self.filters = config.filters self.n_conv = config.n_conv self.n_features = config.n_features self.pool_window = config.pool_window self.dropout = config.dropout self.lr_init = config.lr_init self.mode = config.mode self.n_fc = config.n_fc self.n_hid = config.n_hid self.net = Sequential() self.net.add( Conv2D(self.filters, kernel_size=(2, self.n_features), input_shape=(self.max_len, self.n_features, 1), activation='relu', padding='valid')) self.net.add(BatchNormalization(axis=-1)) for i in range(1, self.n_conv): self.net.add( Conv2D(2**i * self.filters, kernel_size=(2, 1), strides=2, input_shape=(self.max_len, 1, 2**(i - 1) * self.filters), activation='relu')) self.net.add(BatchNormalization(axis=-1)) self.net.add(AveragePooling2D((self.pool_window, 1))) self.net.add(Flatten()) optimizer = keras.optimizers.adam(lr=self.lr_init) if self.mode in ['chimera', 'extensive']: for _ in range(self.n_fc - 1): self.net.add(Dense(self.n_hid, activation='relu')) self.net.add(Dropout(rate=self.dropout)) self.net.add(Dense(1, activation='sigmoid')) self.net.add(Dropout(rate=self.dropout)) recall_0 = Utils.class_recall(0) recall_1 = Utils.class_recall(1) self.net.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=[recall_0, recall_1]) elif self.mode == 'edit': self.net.add(Dense(20, activation='relu')) self.net.add(Dropout(rate=dropout)) self.net.add(Dense(20, activation='relu')) self.net.add(Dropout(rate=dropout)) self.net.add(Dense(1, activation='linear')) self.net.compile(loss='mean_absolute_error', optimizer=optimizer, metrics=[Utils.explained_var]) else: raise ('Training mode "{}" not supported.'.format(mode)) self.reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=0.01 * self.lr_init)