def read_image_frame(dir_path): cat_paths=utils.get_paths(dir_path,dirs=True) images=[] for cat,cat_path in enumerate(cat_paths): print(cat_path) cat_imgs=utils.get_paths(cat_path,dirs=False) cat_imgs=[(cat,read_img(img_path)) for img_path in cat_imgs] images+=cat_imgs return create_img_frame(images)
def call_make_map(kelpType, plot_type, experiments, polygons): if len(experiments) > 1: p = [utils.get_paths(polygons, exp) for exp in experiments] paths = [val for sublist in p for val in sublist] else: paths = utils.get_paths(experiment=experiments[0], polygons=polygons) make_map(paths, kelpType, plot_type, experiment=experiments, polygons=polygons)
def forward(self, g, nodepairs, relations=None, paths=None): """Pass graph through rgcn, predict edge labels for given edge pairs :g: TODO :edgepairs: TODO :returns: TODO """ node_embeddings, adj, emb, s_d, s_p = self.encoder(g) # assuming relations are only between drugs drug_start = node_embeddings['drug'][nodepairs[:, 0]] # batch x 1 x dim drug_end = node_embeddings['drug'][nodepairs[:, 1]] # batch x dim x 1 paths = get_paths(adj, nodepairs, s_d, 5) path_embeddings = [[self.path_embedding(path, node_embeddings) for path in datum] for \ datum in paths] context = [self.get_attention_context(pe, start, end) for\ pe, start, end in zip(path_embeddings, torch.unbind(drug_start), \ torch.unbind(drug_end))] context = torch.stack(context) # Batch x dim all_features = torch.cat([drug_start, drug_end, context], -1) op = self.output_layer(all_features) if relations is not None: return torch.gather(op, 1, relations.unsqueeze(-1)).squeeze(-1) else: return op
def __init__(self, dispatcher, sess): model_path, config_path, vocab_path = get_paths('models/reddit') with open(config_path) as f: saved_args = cPickle.load(f) with open(vocab_path) as f: self.chars, self.vocab = cPickle.load(f) net = Model(saved_args, True) saver = tf.train.Saver(net.save_variables_list()) saver.restore(sess, model_path) self.sess = sess self.net = net self.g = tf.get_default_graph() start_handler = CommandHandler('start', self.start) end_handler = CommandHandler('end', self.end) text_handler = MessageHandler(Filters.text, self.message) dispatcher.add_handler(start_handler) dispatcher.add_handler(text_handler) dispatcher.add_handler(end_handler) self.chat_rooms = {}
def get_fprop_fn(variable_shape=False, include_pool=True): """ build a theano function that use SAE weights to get convolved(or pooled if include_pool is True) features from a given input """ conf = utils.get_config() paths = utils.get_paths() ae = serial.load(paths['sae']['model']) cnn_layer = 'cnn_layer_%i' % (conf['cnn_layers']) batch_size = conf[cnn_layer]['batch_size'] nhid = conf['sae']['nhid'] patch_size = conf['patch_size'] region_size = conf['region_size'] input = T.tensor4('input') filter_shape = (nhid, 1, patch_size, patch_size) filters = theano.shared(ae.get_weights().T.reshape(filter_shape)) if variable_shape: out = conv.conv2d(input, filters) else: image_shape = [batch_size, 1, region_size, region_size] out = conv.conv2d(input, filters, filter_shape=filter_shape, image_shape=image_shape) if include_pool: pool_fn = getattr(out, conf['pool_fn']) out = pool_fn(axis=(2, 3)) return theano.function([input], out)
def get_logger(): """ get_logger(): Creates logger object to output messages to stdout """ log_dir = os.path.join(utils.get_paths()['logs']) if not os.path.exists(log_dir): os.makedirs(log_dir) log_path = os.path.join(log_dir, 'pantri.log') logger = logging.getLogger(__name__) if not len(logger.handlers): format_str = '[%(asctime)s] %(levelname)s: %(message)s' # Define stdout handler console = logging.StreamHandler() console.setFormatter(logging.Formatter(format_str)) # Define log handler log_file = logging.handlers.TimedRotatingFileHandler(log_path, when='midnight', interval=1, backupCount=14) #log_file = logging.FileHandler(log_path) log_file.setFormatter(logging.Formatter(format_str)) # Add handlers logger.addHandler(console) logger.addHandler(log_file) logger.setLevel(logging.DEBUG) return logger
def get_feats_in_partitions(): """ Extracts features from all dataset and split them in train validation and test sets """ conf = utils.get_config() paths = utils.get_paths() rows = utils.load_csv() filters = conf['filters'] region_size = conf['region_size'] region_stride = conf['region_stride'] filtered_rows = [ row for row in rows if utils.check_filter(row, conf['filters']) ] train_rows, valid_rows, test_rows = utils.split_dataset( filtered_rows, conf['valid_percent'], conf['test_percent'], rng=conf['rng_seed']) conv = get_fprop_fn(False) print 'Getting features from train...' X_train = get_feats_from_rows(train_rows, conv, conf['stride']) print 'Getting features from valid...' X_valid = get_feats_from_rows(valid_rows, conv, conf['stride']) print 'Getting features from test...' X_test = get_feats_from_rows(test_rows, conv, conf['stride']) y_train = [row['classification'] == 'Malign' for row in train_rows] y_valid = [row['classification'] == 'Malign' for row in valid_rows] y_test = [row['classification'] == 'Malign' for row in test_rows] return X_train, y_train, X_valid, y_valid, X_test, y_test
def get_feats_in_partitions(): """ Extracts features from all dataset and split them in train validation and test sets """ conf = utils.get_config() paths = utils.get_paths() rows = utils.load_csv() filters = conf['filters'] region_size = conf['region_size'] region_stride = conf['region_stride'] filtered_rows = [ row for row in rows if utils.check_filter(row, conf['filters'])] train_rows, valid_rows, test_rows = utils.split_dataset( filtered_rows, conf['valid_percent'], conf['test_percent'], rng=conf['rng_seed']) conv = get_fprop_fn(False) print 'Getting features from train...' X_train = get_feats_from_rows( train_rows, conv, conf['stride']) print 'Getting features from valid...' X_valid = get_feats_from_rows( valid_rows, conv, conf['stride']) print 'Getting features from test...' X_test = get_feats_from_rows( test_rows, conv, conf['stride']) y_train = [row['classification'] == 'Malign' for row in train_rows] y_valid = [row['classification'] == 'Malign' for row in valid_rows] y_test = [row['classification'] == 'Malign' for row in test_rows] return X_train, y_train, X_valid, y_valid, X_test, y_test
def load_lfw(): file_ext = 'jpg' # observe, no '.' before jpg dataset_path = './data/lfw' pairs_path = './data/pairs.txt' pairs = utils.read_pairs(pairs_path) path_list, issame_list = utils.get_paths(args.dataset_path, pairs, file_ext) print('==> Preparing data..') # Define data transforms RGB_MEAN = [0.485, 0.456, 0.406] RGB_STD = [0.229, 0.224, 0.225] test_transform = transforms.Compose([ transforms.Scale((250, 250)), # make 250x250 transforms.CenterCrop(150), # then take 150x150 center crop # resized to the network's required input size transforms.Scale((224, 224)), transforms.ToTensor(), transforms.Normalize(mean=RGB_MEAN, std=RGB_STD), ]) # Create data loader test_loader = torch.utils.data.DataLoader(data_loader.LFWDataset( path_list, issame_list, test_transform), batch_size=args.batch_size, shuffle=False) return test_loader
def read_image_frame(dir_path): paths=utils.get_paths(dir_path,dirs=True) images=[] for cat,path in enumerate(paths): cat_imgs=utils.read_img_dir(path) cat_imgs=[standard_image(cat,img) for img in cat_imgs] images+=cat_imgs return create_img_frame(images)
def main(): paths = get_paths() music_files = list(paths["data_dir"].glob("*.mid")) # randomly select 10% test, 90%train # 10% train is enough in this case as these few files itself will have over 10k sequences # 10% train is enough in this case as these few files itself will have over 10k sequences random.shuffle(music_files) test_count = len(music_files) // 10 # train_count = len(music_files) - test_count test_files = music_files[:test_count] train_files = music_files[test_count:] # create a vocab of all possible notes # this is the main drawback of this pre-processing approach # we don't know what to do when we get unseen data, how to encode? notes = set() # delete old test-train data and before generating new test-train data shutil.rmtree(paths["train_dir"], ignore_errors=True) shutil.rmtree(paths["test_dir"], ignore_errors=True) Path(paths["metadata_dir"]).mkdir(parents=True, exist_ok=True) Path(paths["test_dir"]).mkdir(parents=True, exist_ok=True) # create train directory print("Generating processed train data:\n") train_notes = create_dataset(train_files, dest_folder=paths["train_dir"]) # for train we'll have a single file with all the data concatenated with open(str(paths["train_dir"] / "notes.pkl"), "wb") as file_path: pickle.dump(train_notes, file_path) notes |= set(train_notes) print("Generated train data\n\n\n") # create test directory print("Generating processed test data:\n") test_notes = create_dataset(test_files, dest_folder=paths["test_dir"]) notes |= set(test_notes) print("Generated test data\n\n\n") # TODO: fix this mess later # create vocab here itself since doing so later might give problems while playing test data pitch_names = sorted(notes) # create a dictionary to map pitches to integers note_to_int = dict( (note, number) for number, note in enumerate(pitch_names)) int_to_note = dict( (number, note) for number, note in enumerate(pitch_names)) with open(paths["metadata_dir"] / "note_to_int.pkl", "wb") as f: pickle.dump(note_to_int, f) with open(paths["metadata_dir"] / "int_to_note.pkl", "wb") as f: pickle.dump(int_to_note, f) print("\nFinished Pre-processing.\n\n\n")
def __init__(self, which_set): conf = utils.get_config() paths = utils.get_paths() region_size = conf['region_size'] self.h5file = tables.open_file(paths[which_set]) node = self.h5file.root.Data axes = ('b', 0, 1, 'c') channels = node.X.shape[1] / (region_size * region_size) view_converter = dense_design_matrix.DefaultViewConverter( (region_size, region_size, channels), axes) super(BCDR, self).__init__( X=node.X, view_converter=view_converter, y=node.y)
def __init__(self, which_set): conf = utils.get_config() paths = utils.get_paths() region_size = conf['region_size'] h5file = tables.open_file(paths[which_set]) node = h5file.root.Data X = node.X.read() channels = node.X.shape[1] / (region_size * region_size) X = X.reshape( (X.shape[0], conf['region_size'], conf['region_size'], channels)) y = node.y.read() h5file.close() super(BCDR_On_Memory, self).__init__(topo_view=X, y=y)
def __init__(self, options): """ __init__(self): Instantiate class variables """ # TODO reseach decorators for logging and configs if not options: options = config.get_options('default', {}) self.options = options self.logger = logger.get_logger() self.paths = utils.get_paths() self.git_path = self.paths['repo_root']
def show_demo(): print("请求方式为:", request.method) entity = request.args.to_dict().get('entity', '') # mention = request.args.to_dict().get('mention','') if entity == '': return render_template('demo.html') elif "source:" in entity: print(entity) line = entity.split(':') params = line[1].split(",") contents = advogato_data_KG_source('source', params[0], int(params[1])) print(contents) return render_template('index.html', contents=contents) elif "target:" in entity: print(entity) line = entity.split(':') contents = advogato_data_KG_target('target', line[1]) print(contents) return render_template('index.html', contents=contents) elif "mention:" in entity: print(entity) line = entity.strip().split(":") mention = line[1] contents = mention2entity(mention) print(contents) return render_template('index.html', contents=contents) elif "attr:" in entity: print(entity) line = entity.strip().split(":") user = line[1] contents = get_page_rank(user) print(contents) return render_template('index.html', contents=contents) elif "path:" in entity: print(entity) line = entity.strip().split(":") entities = line[1].split(",") source = entities[0] target = entities[1] cutoff = entities[2] print(source, target, cutoff) contents = get_paths(source, target, cutoff) return render_template('index.html', contents=contents) else: contents = KG_View(entity) print(contents) return render_template('index.html', contents=contents[0])
def main(): in_z = 0 test_volpath = os.path.join(args.datapath, 'test') out_file = os.path.join(args.datapath, 'fakes') checkpoint_path = os.path.join(args.datapath, 'checkpoint.pth') test_segpath = test_volpath double_vol = False model = models.Net23(2) cuda = args.cuda if cuda: model.cuda() model.load_state_dict(torch.load(checkpoint_path)) batch_size = args.batch_size orig_dim = 256 sqr = transforms.Square() center = transforms.CenterCrop2(224) scale = transforms.Scale(orig_dim) transform_plan = [sqr, scale, center] num_labels = 2 series_names = ['Mag'] seg_series_names = ['AV'] f = preprocess.gen_filepaths(test_segpath) mult_inds = [] for i in f: if 'volume' in i: mult_inds.append(int(re.findall('\d+', i)[0])) mult_inds = sorted(mult_inds) mult_inds = np.unique(mult_inds) mult_inds = mult_inds[0:5] volpaths, segpaths = utils.get_paths(mult_inds, f, series_names, \ seg_series_names, test_volpath, test_segpath) out = utils.test_net_cheap(mult_inds, in_z, model,\ transform_plan, orig_dim, batch_size, out_file, num_labels,\ volpaths, segpaths, nrrd=True, vol_only=double_vol,\ get_dice=True, make_niis=False, cuda=cuda) out_csv = os.path.join(args.datapath, 'out.csv') out.to_csv(out_csv, index=False)
def main(): if not check_data(): exit() img_paths_train, img_paths_val = get_paths() print('Succesfully verified data...') train_generator = DataGenerator2D(img_paths_train['path'], './data', batch_size=1, shuffle=True, augmentation_rate=0.5) val_generator = DataGenerator2D(img_paths_val['path'], './data', batch_size=1, shuffle=False, augmentation_rate=0) print('Loaded data generators...') optimizer = Adam(lr=0.0001, amsgrad=True) model = get_simple_model() print('Model Loaded') print(model.summary()) model.compile(loss=depth_loss_function, optimizer=optimizer, metrics=['mae']) print('Model Compiled... Starting Training...') tensorboard = TensorBoard(log_dir="./logs/DenseDepth/{}".format(time()), histogram_freq=1, write_graph=True) early_stopping = EarlyStopping(monitor='val_loss', patience=10) filepath = "./checkpoints/" + "DenseDepth-" + "saved-model-{epoch:03d}-{val_loss:.5f}.hdf5" checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=False) callbacks_list = [checkpoint, tensorboard, early_stopping] history = model.fit_generator(train_generator, epochs=NUM_EPOCHS, shuffle=True, callbacks=callbacks_list, validation_data=val_generator)
def train( model_type, path_args, training_args, model_args, output_args, load_dataset_args, checkpoint_args, extension_args, ): """ --------------------------------------------- Input: None Output: None Run the test harness for evaluating a model --------------------------------------------- """ pretrained = training_args.pop("pretrained") results_folder = training_args.pop("results_folder") data_format = load_dataset_args["data_format"] checkpoint_path = checkpoint_args["checkpoint_path"] paths = utils.get_paths(**path_args) utils.check_folders(paths, **extension_args) keras.backend.set_image_data_format(data_format) callbacks = [] callbacks.append(utils.get_early_stopping_callback()) callbacks.append(utils.get_tensorboard_directory_callback()) callbacks.append(utils.get_checkpoint_callback(checkpoint_path)) if pretrained: model = keras.models.load_model(checkpoint_path) elif model_type == "unet": model = unet.define_model(output_args, **model_args) train, val = datagen.load_dataset(paths, load_dataset_args) history = model.fit_generator(train, validation_data=val, callbacks=callbacks, **training_args) return (history, model)
def main(): book_paths = get_paths('../Data/books') book2stats = {} for book_path in book_paths: stats = get_basic_stats(book_path) book = os.path.basename(book_path).strip('.txt') print(book, stats) book2stats[book] = stats with open(f'top_20_{book}.txt', 'w') as f: f.write("\n".join(stats['top_20_tokens'])) stats2book_with_highest_value = { "num_sents": max(book2stats, key=lambda book: book2stats[book]["num_sents"]), "num_tokens": max(book2stats, key=lambda book: book2stats[book]["num_tokens"]), "vocab_size": max(book2stats, key=lambda book: book2stats[book]["vocab_size"]), "num_chapters_or_acts": max(book2stats, key=lambda book: book2stats[book]["num_chapters_or_acts"]), } print(stats2book_with_highest_value)
def main(): conf_file = sys.argv[1] if len(sys.argv) > 1 else None conf = utils.get_config(conf_file) paths = utils.get_paths(conf) cnn_layer = 'cnn_layer_%i' % (conf['cnn_layers']) with open(paths[cnn_layer]['yaml']) as f: src = f.read() # Get batch size from validation set to report roc_auc from a single batch ds = datasets.BCDR('valid') monitoring_batch_size = ds.y.shape[0] ds.h5file.close() train_ds_class = 'BCDR_On_Memory' if conf['train_on_memory'] else 'BCDR' valid_ds_class = 'BCDR_On_Memory' if conf['valid_on_memory'] else 'BCDR' params = utils.flatten(conf) params.update({ 'train_ds_class': train_ds_class, 'valid_ds_class': valid_ds_class, 'monitoring_batch_size': monitoring_batch_size, 'save_path': paths[cnn_layer]['model'], 'best_path': paths[cnn_layer]['best_model'] }) train = yaml_parse.load(src % params) if os.path.isfile(train.save_path): print '%s already exists, skipping...' % (train.save_path) else: if conf['load_pretrained']: print 'Setting pretrained filters...' ae = serial.load(paths['sae']['model']) W = ae.get_weights().T.reshape( train.model.layers[0].transformer._filters_shape) train.model.layers[0].transformer._filters.set_value(W) train.main_loop() print 'Done!'
def __init__(self, sess, bot_url): self.bot_url = bot_url model_path, config_path, vocab_path = get_paths('bot/reddit') with open(config_path) as f: saved_args = pickle.load(f) with open(vocab_path) as f: self.chars, self.vocab = pickle.load(f) net = Model(saved_args, True) saver = tf.train.Saver(net.save_variables_list()) saver.restore(sess, model_path) self.sess = sess self.net = net self.g = tf.get_default_graph() self.chat_rooms = {}
def __init__(self, cli_options={}): """ __init__(self): Instantiate class variables """ # TODO Update message with git repo name # Script must be ran within the git repo assert utils.verify_git_repo(), 'Must be ran within the git repo' self.logger = logger.get_logger() self.paths = utils.get_paths() self.git_path = self.paths['repo_root'] self.gitignore = self.read_gitignore() # Get options from config self.shelf = 'default' if 'shelf' in cli_options: self.shelf = cli_options['shelf'] if 'objects' in cli_options: self.shelf = utils.get_shelf_directory(cli_options['objects']) self.options = config.get_options(self.shelf, cli_options)
def __init__(self, which_set, train_dataset=None): conf = utils.get_config() paths = utils.get_paths() region_size = conf['region_size'] h5file = tables.open_file(paths[which_set]) node = h5file.root.Data X = node.X.read() num_channels = node.X.shape[1] / (region_size * region_size) im_shape = (conf['region_size'], conf['region_size']) X = X.reshape((X.shape[0],) + im_shape + (num_channels,)) y = node.y.read() self.feats = node.feats.read() h5file.close() if train_dataset is None: self.feats_mean = self.feats.mean(axis=0) self.feats_std = self.feats.std(axis=0) self.feats = (self.feats - self.feats_mean) / self.feats_std else: feats_mean = train_dataset.feats_mean feats_std = train_dataset.feats_std self.feats = (self.feats - feats_mean) / feats_std self.y = self.feats source = ('features', 'targets0', 'targets1') conv_space = Conv2DSpace( shape=im_shape, num_channels=num_channels, axes=('b', 0, 1, 'c')) target_space = VectorSpace(y.shape[1]) shape_space = VectorSpace(self.feats.shape[1]) space = CompositeSpace([conv_space, target_space, shape_space]) data = (X.astype(theano.config.floatX), y.astype(theano.config.floatX), self.feats.astype(theano.config.floatX),) super(BCDRComposite, self).__init__( data=data, data_specs=(space, source))
transform_plan = [sqr, scale, center] series_names = ['echo'] seg_series_names = ['echo'] model = models.Net23(num_labels) model.cuda() model.load_state_dict(torch.load(checkpoint_path)) f_s = preprocess.gen_filepaths(test_segpath) f_v = preprocess.gen_filepaths(test_volpath) mult_inds = [] for i in f_s: if 'segmentation' in i: mult_inds.append(int(re.findall('\d+', i)[0])) mult_inds = sorted(mult_inds) mult_inds = np.unique(mult_inds) volpaths, segpaths = utils.get_paths(mult_inds, f_s, f_v, series_names, \ seg_series_names, test_volpath, test_segpath) t_transform_plan = transform_plan utils.test_net_cheap(test_volpath, test_segpath, mult_inds, 0, model,\ t_transform_plan, original_size, batch_size, out_file, num_labels,\ num_labels, volpaths, segpaths, nrrd=True,\ vol_only=False, get_dice=False, make_niis=True)
# For unbalanced dataset we create a weighted sampler # * Balanced class sampling: https://discuss.pytorch.org/t/balanced-sampling-between-classes-with-torchvision-dataloader/2703/3 weights = utils.make_weights_for_balanced_classes(dataset_train.imgs, len(dataset_train.classes)) weights = torch.DoubleTensor(weights) sampler = torch.utils.data.sampler.WeightedRandomSampler(weights, len(weights)) train_loader = torch.utils.data.DataLoader(dataset_train, batch_size=TRAIN_BATCH_SIZE, sampler=sampler, drop_last=True) num_class = len(train_loader.dataset.classes) print('Number of Training Classes: %d' % num_class) pairs = utils.read_pairs(PAIR_TEXT_PATH) path_list, issame_list = utils.get_paths(VAL_PATH, pairs, FILE_EXT) val_loader = torch.utils.data.DataLoader(data_loader.LFWDataset( path_list, issame_list, val_transform), batch_size=VAL_BATCH_SIZE, shuffle=False) #======= Model & Optimizer =======# if MODEL_NAME.lower() == 'resnet18': model = torchvision.models.resnet18(pretrained=True) elif MODEL_NAME.lower() == 'resnet34': model = torchvision.models.resnet34(pretrained=True) elif MODEL_NAME.lower() == 'resnet50': model = torchvision.models.resnet50(pretrained=True) elif MODEL_NAME.lower() == 'resnet101': model = torchvision.models.resnet101(pretrained=True) elif MODEL_NAME.lower() == 'resnet152':
def load_models(device, base_folder='./models/BAM/', specific="bowling_alley", seed=0, module="layer3", experiment="sgd_finetuned", ratio="0.5", adv=False, baseline=False, epoch=None, post=False, multiple=True, leakage=False, tcav=False, force=False, dataset='bam', args=None, ignore_net=False): ''' if dataset == 'coco' and adv: class DummyArgs: num_object = 79 finetune=False layer='generated_image' autoencoder_finetune=True finetune=True model = balanced_models.ObjectMultiLabelAdv(DummyArgs(), 79, 300, True, 1) ok = torch.load('model_best.pth.tar', encoding='bytes') state_dict = {key.decode("utf-8"):ok[b'state_dict'][key] for key in ok[b'state_dict']} model.load_state_dict(state_dict) model.to(device) model.eval() ''' if leakage: assert post if epoch is not None: epoch = "_" + str(epoch) else: epoch = "" if len(args.custom_end) > 0: args.custom_end = "_" + str(args.custom_end) if baseline: model_end = "resnet_base_"+str(ratio)+epoch+'.pt' if not post: n2v_end = "resnet_n2v_base_"+str(ratio)+epoch+'.pt' else: n2v_end = "resnet_n2v_base_after_"+str(ratio)+epoch+'.pt' else: if not adv: model_end = "resnet_debias_"+str(ratio)+epoch+'.pt' if not post: n2v_end = "resnet_n2v_debias_"+str(ratio)+epoch+'.pt' else: n2v_end = "resnet_n2v_debias_after_"+str(ratio)+epoch+str(args.custom_end)+'.pt' else: model_end = "resnet_adv_"+str(ratio)+'.pt' if not post: n2v_end = "resnet_n2v_adv_"+str(ratio)+'.pt' else: n2v_end = "resnet_n2v_adv_after_"+str(ratio)+epoch+'.pt' if dataset != 'bam' and dataset != 'coco': model_end = model_end.replace('_'+str(ratio), '') n2v_end = n2v_end.replace('_'+str(ratio), '') if dataset == 'bam' or dataset == 'coco': model_path, n2v_path = utils.get_paths( base_folder, seed, specific, model_end=model_end, n2v_end='leakage/' + n2v_end.replace('n2v','mlp') if leakage else n2v_end, n2v_module=module, experiment=experiment, with_n2v=True, ) else: model_path = os.path.join(base_folder, str(seed), experiment, module, model_end) n2v_path = os.path.join(base_folder, str(seed), experiment, module, 'leakage/' + n2v_end.replace('n2v','mlp') if leakage else n2v_end) if dataset == 'bam': trainloader, _ = dataload.get_data_loader_SceneBAM(seed=seed,ratio=float(ratio), specific=specific) _, testloader = dataload.get_data_loader_SceneBAM(seed=seed,ratio=float(0.5), specific=specific) elif dataset == 'coco': tmp_args = copy.deepcopy(args) tmp_args.ratio = ratio if int(ratio) > 0: tmp_args.balanced = True if leakage: tmp_args.gender_balanced = True trainloader, testloader = coco_dataload.get_data_loader_coco( tmp_args ) else: trainloader,testloader = dataload.get_data_loader_idenProf('idenprof',train_shuffle=True, train_batch_size=64, test_batch_size=64, exclusive=True) if not (dataset == 'coco' and adv): assert os.path.exists(model_path), model_path if post: # since we have to run a separate script, might not have finished... if not leakage: model_extra = '_adv' if adv else ('_base' if baseline else '_debias') n2v_extra = model_extra + '_after' if tcav: pass elif force: post_train.train_net2vec(trainloader, testloader, device, seed, specific=specific, p=ratio, n_epochs=20, module=module, lr=.01, out_file=None, base_folder=base_folder, experiment1=experiment, experiment2=experiment, model_extra=model_extra, n2v_extra=n2v_extra, with_n2v=True, nonlinear=False, # might want to change this later model_custom_end=epoch.replace('_',''), n2v_custom_end=epoch.replace('_',''), multiple=multiple, dataset=dataset ) else: raise Exception('Run trial again') elif leakage: model_extra = '_adv' if adv else ('_base' if baseline else '_debias') n2v_extra = model_extra + '_after' if force: post_train.train_leakage(trainloader, testloader, device, seed, specific=specific, p=ratio, n_epochs=20, module=module, lr=5e-5, # leakage model uses adam out_file=None, base_folder=base_folder, experiment1=experiment, experiment2=experiment, model_extra=model_extra, n2v_extra=n2v_extra, with_n2v=True, nonlinear=True, # MLP leakage model model_custom_end='', n2v_custom_end='', dataset=dataset ) else: raise Exception('Run trial again') else: # should've been saved during training if not ported from tianlu if not (dataset == 'coco' and adv): assert os.path.exists(n2v_path) num_attributes = 10 + 9 + 20 if multiple else 12 num_classes=10 if dataset == 'coco': num_attributes = 81 num_classes = 79 model, net, net_forward, activation_probe = models.load_models( device, None if (dataset == 'coco' and adv) else lambda x,y,z: models.resnet_( pretrained=True, custom_path=x, device=y, initialize=z, num_classes=num_classes, size=50 if (dataset == 'bam') or (dataset == 'coco') else 34 ), model_path=model_path, net2vec_pretrained=True, net2vec_path=n2v_path, module='fc' if leakage else module, num_attributes=2 if leakage else num_attributes, model_init = False, n2v_init = False, nonlinear = leakage, ignore_net = ignore_net ) print(n2v_path) return model, net, net_forward, activation_probe
model = torch.load(model_path).cuda() # prepare dataset db = prepare_db(opt) # use only the evaluation subset. use db['train'] for fetching the training subset dataset = db['eval'] # ================================================================================== # compute saliency maps for different inputs for one splitting node # pick a tree index and splitting node index # tree_idx = 0 # node_idx = 0 # 0 - 510 for the 511 splitting nodes in a tree of depth 9 # get saliency maps for a specified node for different input tensors # utils.get_node_saliency_map(dataset, model, tree_idx, node_idx, name=opt.dataset) # ================================================================================== # get the computational paths for the some random inputs sample, paths, class_pred = utils.get_paths(dataset, model, tree_idx, name=opt.dataset) # for each sample, compute and plot the decision saliency map, which reflects how the input will influence the # decision-making process utils.get_path_saliency(sample, paths, class_pred, model, tree_idx, name=opt.dataset)
# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved. import os import utils repo_root = utils.get_paths()['repo_root'] """ Default config options""" default = { 'storage_url': 'https://example.com', 'auth_url': 'https://example.com/auth/', 'object_store_container': 'blah', 'ignore_patterns': [ '._*', '.__*', '.TemporaryItems*', '._.TemporaryItems', '.DS_Store', '*.pyc', ], 'dest_sync': os.path.join(repo_root, 'dest_sync'), 'checksum': False, 'binary_overrides': [] } example_shelf = {'dest_sync': repo_root}
def run(type): if type not in [COHORT_ANALYSIS, SINGLE_TUMOR_ANALYSIS]: abort(400) if request.method == "GET": form = dict( ofm_genes_threshold=ONCODRIVEFM_GENES_THRESHOLD, ofm_pathways_threshold=ONCODRIVEFM_PATHWAYS_THRESHOLD, oclust_genes_threshold=ONCODRIVECLUST_MUTATIONS_THRESHOLD) return render_template("analysis.html", type=type, form=form) if current_app.wok.cases_count(current_user) >= current_app.config.get("LIMIT_NUM_CASES", 100): flash("""There is a limit on the number of simultaneous analysis that can be managed. You must remove finished analysis before running new ones.""", "error") return redirect(url_for("cases.index")) mutations_file = request.files['mutations_file'] file_name = os.path.basename(mutations_file.filename) project_id = request.form['project_name'] if len(project_id) == 0: project_id = os.path.splitext(file_name)[0] project_id = unique_project_id(normalize_id(project_id)) ''' if not current_user.validated: flash("""You can not run an analysis with your data until you are completely registered. Please check your email and follow the instructions to validate this account.""", "error") flash("Meanwhile you can play with the included examples.") return redirect(url_for("examples")) ''' cb = ConfigBuilder() cb.add_value("user_id", current_user.nick) cb.add_value("workspace", DEFAULT_WORKSPACE) cb.add_value("project.id", project_id) #case_name = "-".join([current_user.nick, project_id]) #cb.add_value("wok.instance.name", case_name) results_path, project_path, project_temp_path = get_paths(project_id) if not current_user.is_anonymous(): cb.add_value("website.user_id", current_user.nick) if type == SINGLE_TUMOR_ANALYSIS: #request.form.get("variants_only") == "1": cb.add_value("variants_only", True) cb.add_value("skip_oncodrivefm", True) cb.add_value("skip_oncodriveclust", True) try: threshold = request.form["ofm_genes_threshold"] if re.match(r"^[1-9]\d*%?$", threshold): cb.add_value(ONCODRIVEFM_GENES_THRESHOLD_KEY, threshold) except: if type == COHORT_ANALYSIS: current_app.logger.warn("[{}] Wrong form input: {}={}".format( current_user.nick, "ofm_genes_threshold", request.form.get("ofm_genes_threshold"))) try: threshold = request.form["ofm_pathways_threshold"] if re.match(r"^[1-9]\d*%?$", threshold): cb.add_value(ONCODRIVEFM_PATHWAYS_THRESHOLD_KEY, threshold) except: if type == COHORT_ANALYSIS: current_app.logger.warn("[{}] Wrong form input: {}={}".format( current_user.nick, "ofm_pathways_threshold", reuqest.form.get("ofm_pathways_threshold"))) try: threshold = int(request.form["oclust_genes_threshold"]) if threshold >= 1: cb.add_value(ONCODRIVECLUST_GENES_THRESHOLD_KEY, threshold) except: if type == COHORT_ANALYSIS: current_app.logger.warn("[{}] Wrong form input: {}={}".format( current_user.nick, "oclust_genes_threshold", request.form.get("oclust_genes_threshold"))) genes_filter_enabled = request.form.get('genes_filter_enabled') == "1" cb.add_value(ONCODRIVEFM_FILTER_ENABLED_KEY, genes_filter_enabled) cb.add_value(ONCODRIVECLUST_FILTER_ENABLED_KEY, genes_filter_enabled) if genes_filter_enabled: try: genes_filter_file = request.files['genes_filter_file'] genes_filter_file_path = os.path.join(project_temp_path, "genes-filter.txt") genes_filter_file.save(genes_filter_file_path) if os.path.getsize(genes_filter_file_path) != 0: cb.add_value(ONCODRIVEFM_GENES_FILTER_KEY, genes_filter_file_path) cb.add_value(ONCODRIVECLUST_GENES_FILTER_KEY, genes_filter_file_path) except: current_app.logger.exception("Error retrieving genes filter from form") assembly = request.form.get("assembly", DEFAULT_ASSEMBLY).lower() project = dict( id=project_id, assembly=assembly, files=[file_name]) projects = [init_project_files(project, check_paths=False)] cb.add_value("projects", projects) properties = dict( analysis_type=type, path=os.path.relpath(project_path, results_path)) current_app.logger.info("[{}] Starting analysis {} ...".format(current_user.nick, project_id)) case = current_app.wok.create_case(current_user, project_id, cb, PROJECT_NAME, MUTATIONS_FLOW_NAME, properties=properties, start=False) engine_case = current_app.wok.engine.case(case.engine_name) #TODO use a background thread upload_files(current_app.logger, case.engine_name, engine_case.storages, projects, streams=[mutations_file.stream]) current_app.logger.info("[{}] Analysis {} started on case {}...".format( current_user.nick, project_id, case.engine_name)) engine_case.start() return redirect(url_for("cases.index", highlight=case.id))
if 'imnet' in f_list: rows = utils.load_csv() feats, y = fe_extraction.get_feats_from_imagenet(rows) features = np.hstack((features, feats)) segm_ids = np.asarray([int(row['segmentation_id']) for row in rows]) if 'hcfeats' in f_list: rows = utils.load_csv(conf['csv_features_file']) feats, y = fe_extraction.get_feats_from_csv( rows, prefixes=['s_', 't_', 'i_']) feats = np.asarray(feats) features = np.hstack((features, feats)) segm_ids = np.asarray([int(row['segmentation_id']) for row in rows]) if 'cnn' in f_list: cnn_layer = 'cnn_layer_%i' % (conf['cnn_layers']) paths = utils.get_paths(conf) model_path = paths[cnn_layer]['best_model'] model = serial.load(model_path) rows = utils.load_csv() chunkSize = 32 feats, y = (None, None) for i in range(0, len(rows), chunkSize): offset = min(i + chunkSize, len(rows)) f_chunk, y_chunk = fe_extraction.get_feats_from_cnn( rows[i:offset], model) if feats is None: feats = f_chunk y = y_chunk else: feats = np.vstack((feats, f_chunk)) y = np.hstack((y, y_chunk))
# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved. import os import utils SWIFT_OBJSTORE = "cpe.pantri.lib.fb_objectstore.FB_ObjectStore" EXAMPLE_OBJSTORE = "cpe.pantri.lib.example.example" repo_root = utils.get_paths()["repo_root"] """ Default config options""" default = { "ignore_patterns": [ "._*", ".__*", ".TemporaryItems*", "._.TemporaryItems", ".DS_Store", "*.pyc", ], "dest_sync": os.path.join(repo_root, "dest_sync"), "checksum": False, "binary_overrides": [], "object_store": EXAMPLE_OBJSTORE, } example_shelf = {"dest_sync": repo_root} example_shelf_2 = {
def train( trainloader, testloader, device, seed, debias_=True, specific=None, ratio=0.5, # bias ratio in dataset n_epochs=5, model_lr=1e-3, n2v_lr=1e-3, combined_n2v_lr=1e-3, # metalearning rate for n2v alpha=100, # for debias, beta=0.1, # for adversarial loss out_file=None, base_folder="", results_folder="", experiment="sgd", momentum=0, module="layer4", finetuned=False, adversarial=False, nonlinear=False, subset=False, subset_ratio=0.1, save_every=False, model_momentum=0, n2v_momentum=0, experimental=False, multiple=False, debias_multiple=False, reset=False, reset_counter=1, n2v_start=False, experiment2=None, adaptive_alpha=False, n2v_adam=False, single=False, imagenet=False, train_batch_size=64, constant_resize=False, adaptive_resize=False, no_class=False, gamma=0, partial_projection=False, norm='l2', constant_alpha=False, jump_alpha=False, linear_alpha=False, mean_debias=False, no_limit=False, dataset='bam', parallel=False, gpu_ids=[], switch_modes=True): print("mu", momentum, "debias", debias_, "alpha", alpha, " | ratio:", ratio) def get_vg(W): if single: return W[-2, :] else: return W[-2, :] - W[-1, :] if dataset == 'bam' or dataset == 'coco': model_init_path, n2v_init_path = utils.get_paths( base_folder, seed, specific, model_end="resnet_init" + '.pt', n2v_end="resnet_n2v_init" + '.pt', n2v_module=module, experiment=experiment, with_n2v=False) else: model_init_path = os.path.join(base_folder, str(seed), experiment, 'resnet_init.pt') n2v_init_path = os.path.join(base_folder, str(seed), experiment, module, 'resnet_n2v_init.pt') if finetuned: if dataset == 'bam' or dataset == 'coco': model_init_path = utils.get_model_path( base_folder, seed, specific, "resnet_" + str(ratio) + ".pt", experiment='post_train' if not n2v_start else experiment.split('_finetuned')[0]) else: model_init_path = os.path.join( base_folder, str(seed), 'post_train' if not n2v_start else experiment.split('_finetuned')[0], 'resnet.pt') assert (debias_ and not adversarial) or ( adversarial and not debias_) or (not adversarial and not debias_) if debias_ and n2v_start: ext = "_n2v_" if not nonlinear else "_mlp_" if dataset == 'bam' or dataset == 'coco': n2v_init_path = utils.get_net2vec_path( base_folder, seed, specific, module, "resnet" + str(ext) + str(ratio) + ".pt", experiment=experiment.split('_finetuned')[0]) else: n2v_init_path = os.path.join(base_folder, str(seed), experiment.split('_finetuned')[0], module, 'resnet' + ext[:-1] + '.pt') # if we're also doing adversarial, make sure to load the matching n2v as init... if adversarial: ext = "_n2v_" if not nonlinear else "_mlp_" if dataset == 'bam' or dataset == 'coco': n2v_init_path = utils.get_net2vec_path(base_folder, seed, specific, module, "resnet" + str(ext) + str(ratio) + ".pt", experiment='post_train') else: n2v_init_path = os.path.join(base_folder, str(seed), 'post_train', module, 'resnet' + ext[:-1] + '.pt') num_classes = 10 num_attributes = 12 if nonlinear: num_attributes = 2 if multiple: num_attributes = 10 + 9 + 2 * 10 if dataset == 'coco': num_classes = 79 num_attributes = 81 model, net, net_forward, activation_probe = models.load_models( device, lambda x, y, z: models.resnet_(pretrained=True, custom_path=x, device=y, initialize=z, num_classes=num_classes, size=50 if (dataset == 'bam' or dataset == 'coco') else 34), model_path=model_init_path, net2vec_pretrained=True, net2vec_path=n2v_init_path, module=module, num_attributes=num_attributes, # we want to make sure to save the inits if not finetuned... model_init=True if not finetuned else False, n2v_init=True if not (finetuned and (adversarial or (debias_ and n2v_start))) else False, loader=trainloader, nonlinear=nonlinear, # parameters if we want to initially project probes to have a certain amount of bias partial_projection=partial_projection, t=gamma) print(model_init_path, n2v_init_path) model_n2v_combined = models.ProbedModel(model, net, module, switch_modes=switch_modes) if n2v_adam: combined_optim = torch.optim.Adam( [{ 'params': model_n2v_combined.model.parameters() }, { 'params': model_n2v_combined.net.parameters() }], lr=n2v_lr) # TODO: allow for momentum training as well n2v_optim = torch.optim.Adam(net.parameters(), lr=n2v_lr) else: combined_optim = torch.optim.SGD( [{ 'params': model_n2v_combined.model.parameters() }, { 'params': model_n2v_combined.net.parameters(), 'lr': combined_n2v_lr, 'momentum': n2v_momentum }], lr=model_lr, momentum=model_momentum) # TODO: allow for momentum training as well n2v_optim = torch.optim.SGD(net.parameters(), lr=n2v_lr, momentum=n2v_momentum) model_optim = torch.optim.SGD(model.parameters(), lr=model_lr, momentum=model_momentum) d_losses = [] adv_losses = [] n2v_train_losses = [] n2v_accs = [] n2v_val_losses = [] class_train_losses = [] class_accs = [] class_val_losses = [] alpha_log = [] magnitudes = [] magnitudes2 = [] unreduced = [] bias_grads = [] loss_shapes = [] loss_shapes2 = [] results = { "debias_losses": d_losses, "n2v_train_losses": n2v_train_losses, "n2v_val_losses": n2v_val_losses, "n2v_accs": n2v_accs, "class_train_losses": class_train_losses, "class_val_losses": class_val_losses, "class_accs": class_accs, "adv_losses": adv_losses, "alphas": alpha_log, "magnitudes": magnitudes, "magnitudes2": magnitudes2, "unreduced": unreduced, "bias_grads": bias_grads, "loss_shapes": loss_shapes, "loss_shapes2": loss_shapes2 } if debias_: results_end = str(ratio) + "_debias.pck" elif adversarial: results_end = str(ratio) + "_adv.pck" if nonlinear: results_end = str(ratio) + "_mlp_adv.pck" else: results_end = str(ratio) + "_base.pck" if dataset == 'bam' or dataset == 'coco': results_path = utils.get_net2vec_path( results_folder, seed, specific, module, results_end, experiment if experiment2 is None else experiment2) else: results_path = os.path.join( results_folder, str(seed), experiment if experiment2 is None else experiment2, module, results_end) if debias_: model_end = "resnet_debias_" + str(ratio) + '.pt' n2v_end = "resnet_n2v_debias_" + str(ratio) + '.pt' elif adversarial: if not nonlinear: model_end = "resnet_adv_" + str(ratio) + '.pt' else: model_end = "resnet_adv_nonlinear_" + str(ratio) + '.pt' if not nonlinear: n2v_end = "resnet_n2v_adv_" + str(ratio) + '.pt' else: n2v_end = "resnet_mlp_adv_" + str(ratio) + '.pt' else: model_end = "resnet_base_" + str(ratio) + '.pt' n2v_end = "resnet_n2v_base_" + str(ratio) + '.pt' if dataset != 'bam' and dataset != 'coco': model_end = model_end.replace('_' + str(ratio), '') n2v_end = n2v_end.replace('_' + str(ratio), '') if dataset == 'bam' or dataset == 'coco': model_path, n2v_path = utils.get_paths( base_folder, seed, specific, model_end=model_end, n2v_end=n2v_end, n2v_module=module, experiment=experiment if experiment2 is None else experiment2, with_n2v=True, ) else: model_path = os.path.join( base_folder, str(seed), experiment if experiment2 is None else experiment2, module, model_end) n2v_path = os.path.join( base_folder, str(seed), experiment if experiment2 is None else experiment2, module, n2v_end) if hasattr(trainloader.dataset, 'idx_to_class'): for key in trainloader.dataset.idx_to_class: if specific is not None and trainloader.dataset.idx_to_class[ key] in specific: specific_idx = int(key) else: specific_idx = 0 train_labels = None if not nonlinear else [-2, -1] d_last = 0 resize = constant_resize or adaptive_resize if imagenet: imagenet_trainloaders, _ = dataload.get_imagenet_tz( './datasets/imagenet', workers=8, train_batch_size=train_batch_size // 8, resize=resize, constant=constant_resize) imagenet_trainloader = dataload.process_imagenet_loaders( imagenet_trainloaders) params = list(model_n2v_combined.parameters())[:-2] init_alpha = alpha last_e = 0 # setup training criteria if dataset == 'coco': object_weights = torch.FloatTensor( trainloader.dataset.getObjectWeights()) gender_weights = torch.FloatTensor( trainloader.dataset.getGenderWeights()) all_weights = torch.cat([object_weights, gender_weights]) probe_criterion = nn.BCEWithLogitsLoss(weight=all_weights.to(device), reduction='elementwise_mean') downstream_criterion = nn.BCEWithLogitsLoss( weight=object_weights.to(device), reduction='elementwise_mean') else: probe_criterion = None downstream_criterion = nn.CrossEntropyLoss() for e in range(n_epochs): # save results every epoch... with open(results_path, 'wb') as f: print("saving results", e) print(results_path) pickle.dump(results, f) model.eval() with torch.no_grad(): n2v_acc, n2v_val_loss = utils.net2vec_accuracy( testloader, net_forward, device, train_labels) n2v_accs.append(n2v_acc) n2v_val_losses.append(n2v_val_loss) if dataset != 'coco': class_acc, class_val_loss = utils.classification_accuracy( testloader, model, device) class_accs.append(class_acc) class_val_losses.append(class_val_loss) else: f1, mAP = utils.detection_results(testloader, model, device) print("Epoch", e, "| f1:", f1, "| mAP:", mAP) class_accs.append([f1, mAP]) d_initial = 0 if not adversarial: curr_W = net.weight.data.clone() if not multiple: vg = get_vg(curr_W).reshape(-1, 1) d_initial = debias.debias_loss(curr_W[:-2], vg, t=0).item() print("Epoch", e, "bias", str(d_initial), " | debias: ", debias_) else: ds = np.zeros(10) for i in range(10): if i == 0: vg = (curr_W[10, :] - curr_W[11, :]).reshape(-1, 1) else: vg = (curr_W[20 + i, :] - curr_W[29 + i, :]).reshape( -1, 1) ds[i] = debias.debias_loss(curr_W[:10], vg, t=0).item() print("Epoch", e, "bias", ds, " | debias: ", debias_) print("Accuracies:", n2v_acc) d_initial = ds[0] else: print("Epoch", e, "Adversarial", n2v_accs[-1]) if adaptive_alpha and (e == 0 or ((d_last / d_initial) >= (5 / 2**(e - 1)) or (0.8 < (d_last / d_initial) < 1.2))): #alpha = alpha old_alpha = alpha # we don't want to increase too much if it's already decreasing if (e == 0 or (d_last / d_initial) >= (5 / 2**(e - 1))): alpha = min( alpha * 2, (15 / (2**e)) / (d_initial + 1e-10) ) # numerical stability just in case d_initial gets really low #if e > 0 and old_alpha >= alpha: # alpha = old_alpha # don't update if we're decreasing... print("Option 1") if e > 0 and alpha < old_alpha: # we want to increase if plateaud alpha = max( old_alpha * 1.5, alpha ) # numerical stability just in case d_initial gets really low print("Option 2") # don't want to go over 1000... if alpha > 1000: alpha = 1000 d_last = d_initial elif not adaptive_alpha and not constant_alpha: if dataset == 'coco' and jump_alpha: if e < 2: alpha = 5e3 elif e >= 2 and e < 4: alpha = 1e4 else: alpha = init_alpha elif jump_alpha and (e - last_e) > 2: if not mean_debias: if alpha < 100: alpha = min(alpha * 2, 100) last_e = e else: # two jumps # if (e-last_e) >= ((n_epochs - last_e) // 2): # alpha = 1000 # else: alpha = 1000 else: if alpha < 1000: alpha = min(alpha * 2, 1000) last_e = e else: alpha = 10000 elif linear_alpha and (e - last_e) > 2: if alpha < 100: alpha = min(alpha * 2, 100) last_e = e else: alpha += (1000 - 100) / (n_epochs - last_e) elif not jump_alpha and not linear_alpha: if (e + 1) % 3 == 0: # apply alpha schedule? # alpha = min(alpha * 1.2, max(init_alpha,1000)) alpha = alpha * 1.5 alpha_log.append(alpha) print("Current Alpha:,", alpha) if save_every and e % 10 == 0 and e > 0 and seed == 0 and debias_: torch.save(net.state_dict(), n2v_path.split('.pt')[0] + '_' + str(e) + '.pt') torch.save(model.state_dict(), model_path.split('.pt')[0] + '_' + str(e) + '.pt') if reset and (e + 1) % reset_counter == 0 and e > 0: print("resetting") net, net_forward, activation_probe = net2vec.create_net2vec( model, module, num_attributes, device, pretrained=False, initialize=True, nonlinear=nonlinear) n2v_optim = torch.optim.SGD(net.parameters(), lr=n2v_lr, momentum=n2v_momentum) model.train() ct = 0 for X, y, genders in trainloader: ids = None ##### Part 1: Update the Embeddings ##### model_optim.zero_grad() n2v_optim.zero_grad() labels = utils.merge_labels(y, genders, device) logits = net_forward(X.to(device), switch_modes=switch_modes) # Now actually update net2vec embeddings, making sure to use the same batch if train_labels is not None: if logits.shape[1] == labels.shape[1]: logits = logits[:, train_labels] labels = labels[:, train_labels] shapes = [] shapes2 = [] if dataset == 'coco': prelim_loss = probe_criterion(logits, labels) else: prelim_loss, ids = utils.balanced_loss(logits, labels, device, 0.5, ids=ids, multiple=multiple, specific=specific_idx, shapes=shapes) #print("prelim_loss:", prelim_loss.item()) prelim_loss.backward() # we don't want to update these parameters, just in case model_optim.zero_grad() n2v_train_losses.append(prelim_loss.item()) n2v_optim.step() try: magnitudes.append( torch.norm(net.weight.data, dim=1).data.cpu().numpy()) except: pass ##### Part 2: Update Conv parameters for classification ##### model_optim.zero_grad() n2v_optim.zero_grad() class_logits = model(X.to(device)) class_loss = downstream_criterion(class_logits, y.to(device)) class_train_losses.append(class_loss.item()) if debias_: W_curr = net.weight.data vg = get_vg(W_curr).reshape(-1, 1) unreduced.append( debias.debias_loss(W_curr[:-2], vg, t=0, unreduced=True).data.cpu().numpy()) loss = class_loss #### Part 2a: Debias Loss if debias_: model_optim.zero_grad() n2v_optim.zero_grad() labels = utils.merge_labels(y, genders, device) o = net.weight.clone() combined_optim.zero_grad() with higher.innerloop_ctx(model_n2v_combined, combined_optim) as (fn2v, diffopt_n2v): models.update_probe(fn2v) logits = fn2v(X.to(device)) if dataset == 'coco': prelim_loss = probe_criterion(logits, labels) else: prelim_loss, ids = utils.balanced_loss( logits, labels, device, 0.5, ids=ids, multiple=False, specific=specific_idx, shapes=shapes2) diffopt_n2v.step(prelim_loss) weights = list(fn2v.parameters())[-2] vg = get_vg(weights).reshape(-1, 1) d_loss = debias.debias_loss(weights[:-2], vg, t=gamma, norm=norm, mean=mean_debias) # only want to save the actual bias... d_losses.append(d_loss.item()) grad_of_grads = torch.autograd.grad( alpha * d_loss, list(fn2v.parameters(time=0))[:-2], allow_unused=True) del prelim_loss del logits del vg del fn2v del diffopt_n2v #### Part 2b: Adversarial Loss if adversarial: logits = net_forward( None, forward=True)[:, -2:] # just use activation probe labels = genders.type(torch.FloatTensor).reshape( genders.shape[0], -1).to(device) adv_loss, _ = utils.balanced_loss(logits, labels, device, 0.5, ids=ids, stable=True) adv_losses.append(adv_loss.item()) # getting too strong, let it retrain... if adv_loss < 2: adv_loss = -beta * adv_loss loss += adv_loss loss.backward() if debias_: # custom backward to include the bias regularization.... max_norm_grad = -1 param_idx = -1 for ii in range(len(grad_of_grads)): if (grad_of_grads[ii] is not None and params[ii].grad is not None and torch.isnan(grad_of_grads[ii]).long().sum() < grad_of_grads[ii].reshape(-1).shape[0]): # just in case some or nan for some reason? not_nan = ~torch.isnan(grad_of_grads[ii]) params[ii].grad[not_nan] += grad_of_grads[ii][not_nan] if grad_of_grads[ii][not_nan].norm().item( ) > max_norm_grad: max_norm_grad = grad_of_grads[ii][not_nan].norm( ).item() param_idx = ii bias_grads.append((param_idx, max_norm_grad)) # undo the last step and apply a smaller alpha to prevent stability issues if not no_limit and ((not mean_debias and max_norm_grad > 100) or (mean_debias and max_norm_grad > 100)): for ii in range(len(grad_of_grads)): if (grad_of_grads[ii] is not None and params[ii].grad is not None and torch.isnan(grad_of_grads[ii]).long().sum() < grad_of_grads[ii].reshape(-1).shape[0]): # just in case some or nan for some reason? not_nan = ~torch.isnan(grad_of_grads[ii]) params[ii].grad[not_nan] -= grad_of_grads[ii][ not_nan] # scale accordingly # params[ii].grad[not_nan] += grad_of_grads[ii][not_nan] / max_norm_grad loss_shapes.append(shapes) loss_shapes2.append(shapes2) model_optim.step() #magnitudes2.append( # torch.norm(net.weight.data, dim=1).data.cpu().numpy() #) ct += 1 # save results every epoch... with open(results_path, 'wb') as f: print("saving results", e) print(results_path) pickle.dump(results, f) torch.save(net.state_dict(), n2v_path) torch.save(model.state_dict(), model_path)
import utils from pylearn2.utils import serial import h5py import numpy as np import sys if __name__ == "__main__": conf_file = sys.argv[1] if len(sys.argv) > 1 else None conf = utils.get_config(conf_file) paths = utils.get_paths() region_size = conf['region_size'] region_stride = conf['region_stride'] train_rows, valid_rows, test_rows = utils.split_dataset( utils.get_filtered_rows(), conf['valid_percent'], conf['test_percent'], rng=conf['rng_seed']) rowsdict = {'train': train_rows, 'valid': valid_rows, 'test': test_rows} nsamples = {} prefixes = ['s_', 'i_', 't_'] # Feature names' prefixes for subset, subrows in rowsdict.iteritems(): X = None y = [] feats = [] for row in subrows: samples = utils.get_samples_from_image( row, oversampling=(subset == 'train' and conf['oversampling'])) print "%i samples to %s taken from %s" % ( len(samples), subset, row['image_filename']) if len(samples) == 0:
def run(type): if type not in [COHORT_ANALYSIS, SINGLE_TUMOR_ANALYSIS]: abort(400) if current_app.wok.cases_count(current_user) >= current_app.config.get("LIMIT_NUM_CASES", 100): flash("""There is a limit on the number of simultaneous analysis that can be managed. You must remove finished analysis before running new ones.""", "error") return redirect(url_for("cases.index")) cb = ConfigBuilder() cb.add_value("user_id", current_user.nick) cb.add_value("workspace", DEFAULT_WORKSPACE) if not current_user.is_anonymous(): cb.add_value("website.user_id", current_user.nick) conf = get_project_conf() if type == COHORT_ANALYSIS: project_id = "cohort-example" mutations_path = get_examples_path(conf, "meduloblastoma_cohort_tier1.muts") elif type == SINGLE_TUMOR_ANALYSIS: project_id = "single-tumor-example" mutations_path = get_examples_path(conf, "pat4_crc.muts") cb.add_value("variants_only", True) cb.add_value("skip_oncodrivefm", True) cb.add_value("skip_oncodriveclust", True) project_id = unique_project_id(project_id) cb.add_value("project.id", project_id) results_path, project_path, project_temp_path = get_paths(project_id, conf=conf) assembly = "hg19" project = dict( id=project_id, assembly=assembly, files=[mutations_path]) projects = [init_project_files(project)] cb.add_value("projects", projects) properties = dict( analysis_type=type, path=os.path.relpath(project_path, results_path), data_file=mutations_path) current_app.logger.info("[{}] Starting example {} ...".format(current_user.nick, project_id)) case = current_app.wok.create_case(current_user, project_id, cb, PROJECT_NAME, MUTATIONS_FLOW_NAME, properties=properties, start=False) engine_case = current_app.wok.engine.case(case.engine_name) #TODO use a background thread upload_files(current_app.logger, case.engine_name, engine_case.storages, projects) current_app.logger.info("[{}] Example {} started on case {}...".format( current_user.nick, project_id, case.engine_name)) engine_case.start() return redirect(url_for("cases.index", highlight=case.id))
def get_feature_vector(graph, triple, relations, remove_triple=False, original_positive=None, centrality_indices=None, rels_to_study=None): res = [] s, r, t = triple recyprocal_removed = False rng = range(1, settings.MAX_CONTEXT_SIZE + 1) # Remove the triple itself from the graph if it's a positive example # and the original positive if it's a negative one # (and any reciprocal relations) if remove_triple: graph.remove_edge(s, t, key=r) try: graph.remove_edge(t, s, key=r) recyprocal_removed = True except NetworkXError: pass elif original_positive: o_s, o_r, o_t = original_positive graph.remove_edge(o_s, o_t, key=o_r) try: graph.remove_edge(o_t, o_s, key=o_r) recyprocal_removed = True except NetworkXError: pass ########################################################################### # Load the subgraphs if they are not there yet for i in rng: if (s, i) not in context_subgraphs: context_subgraphs[(s, i)] = ego_graph(graph, s, i) if (t, i) not in context_subgraphs: context_subgraphs[(t, i)] = ego_graph(graph, t, i) ########################################################################### # Regular subgraph features for i, j in product(rng, rng): ents_s = list(context_subgraphs[(s, i)].nodes) + [s] ents_t = list(context_subgraphs[(t, j)].nodes) + [t] res += get_intersection_feats(s, t, ents_s, ents_t, graph, True, centrality_indices=centrality_indices) ########################################################################### # Reachable entities for all relations for i, j in product(rng, rng): triples_s = [ (s_g, r_g, t_g) for s_g, t_g, r_g in context_subgraphs[(s, i)].edges.data("rel") ] triples_t = [ (s_g, r_g, t_g) for s_g, t_g, r_g in context_subgraphs[(t, j)].edges.data("rel") ] for rel in relations: ents_s = [t_g for s_g, r_g, t_g in triples_s if r_g == rel] + [s] ents_t = [t_g for s_g, r_g, t_g in triples_t if r_g == rel] + [t] res += get_intersection_feats( s, t, ents_s, ents_t, graph, centrality_indices=centrality_indices) ########################################################################### # Path-based features if settings.USE_PATHS: rels_dict = {rel: i for i, rel in enumerate(rels_to_study)} for i in rng: triples = [(s, r, t) for s, t, r in context_subgraphs[(s, i)].edges.data("rel") if r in rels_to_study] paths = get_paths(triples, s, t, i) matrix = np.zeros((len(rels_to_study), ) * i) ### TODO ad-hoc code --- probably refactor this in the future for path in paths: if i == 1: matrix[rels_dict[path[0][1]]] += 1 elif i == 2: matrix[rels_dict[path[0][1]]][rels_dict[path[1][1]]] += 1 elif i == 3: matrix[rels_dict[path[0][1]]][rels_dict[path[1][1]]][ rels_dict[path[2][1]]] += 1 total_paths = np.sum(matrix) res.append(total_paths) res += matrix.flatten().tolist() ########################################################################### # Restore the deleted edges if remove_triple: graph.add_edge(s, t, rel=r, key=r) if recyprocal_removed: graph.add_edge(t, s, rel=r, key=r) elif original_positive: o_s, o_r, o_t = original_positive graph.add_edge(o_s, o_t, rel=o_r, key=o_r) if recyprocal_removed: graph.add_edge(o_t, o_s, rel=o_r, key=o_r) ########################################################################### # Done return res
def main(): parser = argparse.ArgumentParser() parser.add_argument('-e', '--exp_name', default='lfw_eval') parser.add_argument('-g', '--gpu', type=int, default=0) parser.add_argument('-d', '--dataset_path', default='/srv/data1/arunirc/datasets/lfw-deepfunneled') parser.add_argument('--fold', type=int, default=0, choices=[0,10]) parser.add_argument('--batch_size', type=int, default=100) parser.add_argument('-m', '--model_path', default=None, required=True, help='Path to pre-trained model') parser.add_argument('--model_type', default='resnet50', choices=['resnet50', 'resnet101', 'resnet101-512d']) args = parser.parse_args() # CUDA setup os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu) cuda = torch.cuda.is_available() torch.manual_seed(1337) if cuda: torch.cuda.manual_seed(1337) torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True # enable if all images are same size if args.fold == 0: pairs_path = './lfw/data/pairsDevTest.txt' else: pairs_path = './lfw/data/pairs.txt' # ----------------------------------------------------------------------------- # 1. Dataset # ----------------------------------------------------------------------------- file_ext = 'jpg' # observe, no '.' before jpg num_class = 8631 pairs = utils.read_pairs(pairs_path) path_list, issame_list = utils.get_paths(args.dataset_path, pairs, file_ext) # Define data transforms RGB_MEAN = [ 0.485, 0.456, 0.406 ] RGB_STD = [ 0.229, 0.224, 0.225 ] test_transform = transforms.Compose([ transforms.Scale((250,250)), # make 250x250 transforms.CenterCrop(150), # then take 150x150 center crop transforms.Scale((224,224)), # resized to the network's required input size transforms.ToTensor(), transforms.Normalize(mean = RGB_MEAN, std = RGB_STD), ]) # Create data loader test_loader = torch.utils.data.DataLoader( data_loader.LFWDataset( path_list, issame_list, test_transform), batch_size=args.batch_size, shuffle=False ) # ----------------------------------------------------------------------------- # 2. Model # ----------------------------------------------------------------------------- if args.model_type == 'resnet50': model = torchvision.models.resnet50(pretrained=False) model.fc = torch.nn.Linear(2048, num_class) elif args.model_type == 'resnet101': model = torchvision.models.resnet101(pretrained=False) model.fc = torch.nn.Linear(2048, num_class) elif args.model_type == 'resnet101-512d': model = torchvision.models.resnet101(pretrained=False) layers = [] layers.append(torch.nn.Linear(2048, 512)) layers.append(torch.nn.Linear(512, num_class)) model.fc = torch.nn.Sequential(*layers) else: raise NotImplementedError checkpoint = torch.load(args.model_path) if checkpoint['arch'] == 'DataParallel': # if we trained and saved our model using DataParallel model = torch.nn.DataParallel(model, device_ids=[0, 1, 2, 3, 4]) model.load_state_dict(checkpoint['model_state_dict']) model = model.module # get network module from inside its DataParallel wrapper else: model.load_state_dict(checkpoint['model_state_dict']) if cuda: model = model.cuda() # Convert the trained network into a "feature extractor" feature_map = list(model.children()) if args.model_type == 'resnet101-512d': model.eval() extractor = model extractor.fc = nn.Sequential(extractor.fc[0]) else: feature_map.pop() extractor = nn.Sequential(*feature_map) extractor.eval() # set to evaluation mode (fixes BatchNorm, dropout, etc.) # ----------------------------------------------------------------------------- # 3. Feature extraction # ----------------------------------------------------------------------------- features = [] for batch_idx, images in tqdm.tqdm(enumerate(test_loader), total=len(test_loader), desc='Extracting features'): x = Variable(images, volatile=True) # test-time memory conservation if cuda: x = x.cuda() feat = extractor(x) if cuda: feat = feat.data.cpu() else: feat = feat.data features.append(feat) features = torch.stack(features) sz = features.size() features = features.view(sz[0]*sz[1], sz[2]) features = F.normalize(features, p=2, dim=1) # L2-normalize # TODO - cache features # ----------------------------------------------------------------------------- # 4. Verification # ----------------------------------------------------------------------------- num_feat = features.size()[0] feat_pair1 = features[np.arange(0,num_feat,2),:] feat_pair2 = features[np.arange(1,num_feat,2),:] feat_dist = (feat_pair1 - feat_pair2).norm(p=2, dim=1) feat_dist = feat_dist.numpy() # Eval metrics scores = -feat_dist gt = np.asarray(issame_list) if args.fold == 0: fig_path = osp.join(here, args.exp_name + '_' + args.model_type + '_lfw_roc_devTest.png') roc_auc = sklearn.metrics.roc_auc_score(gt, scores) fpr, tpr, thresholds = sklearn.metrics.roc_curve(gt, scores) print 'ROC-AUC: %.04f' % roc_auc # Plot and save ROC curve fig = plt.figure() plt.title('ROC - lfw dev-test') plt.plot(fpr, tpr, lw=2, label='ROC (auc = %0.4f)' % roc_auc) plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.grid() plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.legend(loc='lower right') plt.tight_layout() else: # 10 fold fold_size = 600 # 600 pairs in each fold roc_auc = np.zeros(10) roc_eer = np.zeros(10) fig = plt.figure() plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.grid() plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') for i in tqdm.tqdm(range(10)): start = i * fold_size end = (i+1) * fold_size scores_fold = scores[start:end] gt_fold = gt[start:end] roc_auc[i] = sklearn.metrics.roc_auc_score(gt_fold, scores_fold) fpr, tpr, _ = sklearn.metrics.roc_curve(gt_fold, scores_fold) # EER calc: https://yangcha.github.io/EER-ROC/ roc_eer[i] = brentq( lambda x: 1. - x - interpolate.interp1d(fpr, tpr)(x), 0., 1.) plt.plot(fpr, tpr, alpha=0.4, lw=2, color='darkgreen', label='ROC(auc=%0.4f, eer=%0.4f)' % (roc_auc[i], roc_eer[i]) ) plt.title( 'AUC: %0.4f +/- %0.4f, EER: %0.4f +/- %0.4f' % (np.mean(roc_auc), np.std(roc_auc), np.mean(roc_eer), np.std(roc_eer)) ) plt.tight_layout() fig_path = osp.join(here, args.exp_name + '_' + args.model_type + '_lfw_roc_10fold.png') plt.savefig(fig_path, bbox_inches='tight') print 'ROC curve saved at: ' + fig_path
if not os.path.isdir('results/' + results_folder_name): os.mkdir('results/' + results_folder_name) for i in range(5, splits): print('Split {}/{}'.format(i, splits)) """ Create readers """ dataReaders = {} dataReaders['CNN'] = ImageReader(folder_name='img_patches', np_shape=(897, 897, 3), formats=['.jpeg'], patch_size=512) """ Get paths """ # Dataset paths datasets = ['train', 'val', 'test'] paths = get_paths(splits_folder, db_path, i, cnn=True, multitest=False) """ Read data """ for key in dataReaders: print('Read data ({})'.format(key)) for dataset in datasets: dataReaders[key].read_data(paths=paths[dataset], ohe=ohe, dataset=dataset) # Shuffle train set index_train = np.random.randint(0, len(dataReaders['CNN'].data['train']['x']), len(dataReaders['CNN'].data['train']['x'])) dataReaders['CNN'].data['train']['x'] = dataReaders['CNN'].data['train'][ 'x'][index_train] dataReaders['CNN'].data['train']['y'] = dataReaders['CNN'].data['train'][
# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved. import os import utils repo_root = utils.get_paths()['repo_root'] """ Default config options""" default = { 'storage_url': 'https://example.com', 'auth_url': 'https://example.com/auth/', 'object_store_container': 'blah', 'ignore_patterns': [ '._*', '.__*', '.TemporaryItems*', '._.TemporaryItems', '.DS_Store', '*.pyc', ], 'dest_sync': os.path.join(repo_root, 'dest_sync'), 'checksum': False, 'binary_overrides': [] } example_shelf = { 'dest_sync': repo_root } example_shelf_2 = { 'binary_overrides': ['*.inf', '*.din']