Example #1
0
def read_image_frame(dir_path):
    cat_paths=utils.get_paths(dir_path,dirs=True)
    images=[]
    for cat,cat_path in enumerate(cat_paths):
        print(cat_path)
        cat_imgs=utils.get_paths(cat_path,dirs=False)
        cat_imgs=[(cat,read_img(img_path)) for img_path in cat_imgs]
        images+=cat_imgs
    return create_img_frame(images)
Example #2
0
def call_make_map(kelpType, plot_type, experiments, polygons):
    if len(experiments) > 1:
        p = [utils.get_paths(polygons, exp) for exp in experiments]
        paths = [val for sublist in p for val in sublist]
    else:
        paths = utils.get_paths(experiment=experiments[0], polygons=polygons)

    make_map(paths,
             kelpType,
             plot_type,
             experiment=experiments,
             polygons=polygons)
Example #3
0
    def forward(self, g, nodepairs, relations=None, paths=None):
        """Pass graph through rgcn, predict edge labels for given edge pairs

        :g: TODO
        :edgepairs: TODO
        :returns: TODO

        """
        node_embeddings, adj, emb, s_d, s_p = self.encoder(g)
        # assuming relations are only between drugs
        drug_start = node_embeddings['drug'][nodepairs[:, 0]]
        # batch x 1 x dim
        drug_end = node_embeddings['drug'][nodepairs[:, 1]]
        # batch x dim x 1
        paths = get_paths(adj, nodepairs, s_d, 5)
        path_embeddings = [[self.path_embedding(path, node_embeddings) for path in datum] for \
                                   datum in paths]
        context = [self.get_attention_context(pe, start, end) for\
                   pe, start, end in zip(path_embeddings, torch.unbind(drug_start), \
                           torch.unbind(drug_end))]
        context = torch.stack(context)  # Batch x dim
        all_features = torch.cat([drug_start, drug_end, context], -1)
        op = self.output_layer(all_features)
        if relations is not None:
            return torch.gather(op, 1, relations.unsqueeze(-1)).squeeze(-1)
        else:
            return op
Example #4
0
    def __init__(self, dispatcher, sess):

        model_path, config_path, vocab_path = get_paths('models/reddit')

        with open(config_path) as f:
            saved_args = cPickle.load(f)

        with open(vocab_path) as f:
            self.chars, self.vocab = cPickle.load(f)

        net = Model(saved_args, True)

        saver = tf.train.Saver(net.save_variables_list())
        saver.restore(sess, model_path)

        self.sess = sess
        self.net = net
        self.g = tf.get_default_graph()

        start_handler = CommandHandler('start', self.start)
        end_handler = CommandHandler('end', self.end)
        text_handler = MessageHandler(Filters.text, self.message)

        dispatcher.add_handler(start_handler)
        dispatcher.add_handler(text_handler)
        dispatcher.add_handler(end_handler)
        self.chat_rooms = {}
Example #5
0
def get_fprop_fn(variable_shape=False, include_pool=True):
    """
    build a theano function that use SAE weights to get convolved(or pooled if
    include_pool is True) features from a given input
    """
    conf = utils.get_config()
    paths = utils.get_paths()
    ae = serial.load(paths['sae']['model'])
    cnn_layer = 'cnn_layer_%i' % (conf['cnn_layers'])
    batch_size = conf[cnn_layer]['batch_size']
    nhid = conf['sae']['nhid']
    patch_size = conf['patch_size']
    region_size = conf['region_size']

    input = T.tensor4('input')
    filter_shape = (nhid, 1, patch_size, patch_size)
    filters = theano.shared(ae.get_weights().T.reshape(filter_shape))

    if variable_shape:
        out = conv.conv2d(input, filters)
    else:
        image_shape = [batch_size, 1, region_size, region_size]
        out = conv.conv2d(input,
                          filters,
                          filter_shape=filter_shape,
                          image_shape=image_shape)

    if include_pool:
        pool_fn = getattr(out, conf['pool_fn'])
        out = pool_fn(axis=(2, 3))
    return theano.function([input], out)
Example #6
0
def get_logger():
    """
  get_logger():

  Creates logger object to output messages to stdout
  """

    log_dir = os.path.join(utils.get_paths()['logs'])
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)

    log_path = os.path.join(log_dir, 'pantri.log')
    logger = logging.getLogger(__name__)
    if not len(logger.handlers):
        format_str = '[%(asctime)s] %(levelname)s: %(message)s'

        # Define stdout handler
        console = logging.StreamHandler()
        console.setFormatter(logging.Formatter(format_str))

        # Define log handler
        log_file = logging.handlers.TimedRotatingFileHandler(log_path,
                                                             when='midnight',
                                                             interval=1,
                                                             backupCount=14)
        #log_file = logging.FileHandler(log_path)
        log_file.setFormatter(logging.Formatter(format_str))

        # Add handlers
        logger.addHandler(console)
        logger.addHandler(log_file)
        logger.setLevel(logging.DEBUG)

    return logger
Example #7
0
def get_feats_in_partitions():
    """
    Extracts features from all dataset and split them in train validation and
    test sets
    """
    conf = utils.get_config()
    paths = utils.get_paths()
    rows = utils.load_csv()
    filters = conf['filters']
    region_size = conf['region_size']
    region_stride = conf['region_stride']

    filtered_rows = [
        row for row in rows if utils.check_filter(row, conf['filters'])
    ]
    train_rows, valid_rows, test_rows = utils.split_dataset(
        filtered_rows,
        conf['valid_percent'],
        conf['test_percent'],
        rng=conf['rng_seed'])

    conv = get_fprop_fn(False)
    print 'Getting features from train...'
    X_train = get_feats_from_rows(train_rows, conv, conf['stride'])
    print 'Getting features from valid...'
    X_valid = get_feats_from_rows(valid_rows, conv, conf['stride'])
    print 'Getting features from test...'
    X_test = get_feats_from_rows(test_rows, conv, conf['stride'])
    y_train = [row['classification'] == 'Malign' for row in train_rows]
    y_valid = [row['classification'] == 'Malign' for row in valid_rows]
    y_test = [row['classification'] == 'Malign' for row in test_rows]
    return X_train, y_train, X_valid, y_valid, X_test, y_test
Example #8
0
def get_fprop_fn(variable_shape=False, include_pool=True):
    """
    build a theano function that use SAE weights to get convolved(or pooled if
    include_pool is True) features from a given input
    """
    conf = utils.get_config()
    paths = utils.get_paths()
    ae = serial.load(paths['sae']['model'])
    cnn_layer = 'cnn_layer_%i' % (conf['cnn_layers'])
    batch_size = conf[cnn_layer]['batch_size']
    nhid = conf['sae']['nhid']
    patch_size = conf['patch_size']
    region_size = conf['region_size']

    input = T.tensor4('input')
    filter_shape = (nhid, 1, patch_size, patch_size)
    filters = theano.shared(ae.get_weights().T.reshape(filter_shape))

    if variable_shape:
        out = conv.conv2d(input, filters)
    else:
        image_shape = [batch_size, 1, region_size, region_size]
        out = conv.conv2d(input, filters, filter_shape=filter_shape,
                          image_shape=image_shape)

    if include_pool:
        pool_fn = getattr(out, conf['pool_fn'])
        out = pool_fn(axis=(2, 3))
    return theano.function([input], out)
Example #9
0
def get_feats_in_partitions():
    """
    Extracts features from all dataset and split them in train validation and
    test sets
    """
    conf = utils.get_config()
    paths = utils.get_paths()
    rows = utils.load_csv()
    filters = conf['filters']
    region_size = conf['region_size']
    region_stride = conf['region_stride']

    filtered_rows = [
        row for row in rows if utils.check_filter(row, conf['filters'])]
    train_rows, valid_rows, test_rows = utils.split_dataset(
        filtered_rows, conf['valid_percent'], conf['test_percent'], rng=conf['rng_seed'])

    conv = get_fprop_fn(False)
    print 'Getting features from train...'
    X_train = get_feats_from_rows(
        train_rows, conv, conf['stride'])
    print 'Getting features from valid...'
    X_valid = get_feats_from_rows(
        valid_rows, conv, conf['stride'])
    print 'Getting features from test...'
    X_test = get_feats_from_rows(
        test_rows, conv, conf['stride'])
    y_train = [row['classification'] == 'Malign' for row in train_rows]
    y_valid = [row['classification'] == 'Malign' for row in valid_rows]
    y_test = [row['classification'] == 'Malign' for row in test_rows]
    return X_train, y_train, X_valid, y_valid, X_test, y_test
Example #10
0
def load_lfw():

    file_ext = 'jpg'  # observe, no '.' before jpg

    dataset_path = './data/lfw'

    pairs_path = './data/pairs.txt'

    pairs = utils.read_pairs(pairs_path)
    path_list, issame_list = utils.get_paths(args.dataset_path, pairs,
                                             file_ext)

    print('==> Preparing data..')
    # Define data transforms
    RGB_MEAN = [0.485, 0.456, 0.406]
    RGB_STD = [0.229, 0.224, 0.225]
    test_transform = transforms.Compose([
        transforms.Scale((250, 250)),  # make 250x250
        transforms.CenterCrop(150),  # then take 150x150 center crop
        # resized to the network's required input size
        transforms.Scale((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=RGB_MEAN, std=RGB_STD),
    ])

    # Create data loader
    test_loader = torch.utils.data.DataLoader(data_loader.LFWDataset(
        path_list, issame_list, test_transform),
                                              batch_size=args.batch_size,
                                              shuffle=False)

    return test_loader
Example #11
0
def read_image_frame(dir_path):
    paths=utils.get_paths(dir_path,dirs=True)
    images=[]
    for cat,path in enumerate(paths):
	cat_imgs=utils.read_img_dir(path)
        cat_imgs=[standard_image(cat,img) for img in cat_imgs]
        images+=cat_imgs
    return create_img_frame(images)
def main():
    paths = get_paths()
    music_files = list(paths["data_dir"].glob("*.mid"))

    # randomly select 10% test, 90%train
    # 10% train is enough in this case as these few files itself will have over 10k sequences
    # 10% train is enough in this case as these few files itself will have over 10k sequences
    random.shuffle(music_files)
    test_count = len(music_files) // 10
    # train_count = len(music_files) - test_count
    test_files = music_files[:test_count]
    train_files = music_files[test_count:]

    # create a vocab of all possible notes
    # this is the main drawback of this pre-processing approach
    # we don't know what to do when we get unseen data, how to encode?
    notes = set()

    # delete old test-train data and before generating new test-train data
    shutil.rmtree(paths["train_dir"], ignore_errors=True)
    shutil.rmtree(paths["test_dir"], ignore_errors=True)
    Path(paths["metadata_dir"]).mkdir(parents=True, exist_ok=True)
    Path(paths["test_dir"]).mkdir(parents=True, exist_ok=True)

    # create train directory
    print("Generating processed train data:\n")
    train_notes = create_dataset(train_files, dest_folder=paths["train_dir"])
    # for train we'll have a single file with all the data concatenated
    with open(str(paths["train_dir"] / "notes.pkl"), "wb") as file_path:
        pickle.dump(train_notes, file_path)
    notes |= set(train_notes)
    print("Generated train data\n\n\n")

    # create test directory
    print("Generating processed test data:\n")
    test_notes = create_dataset(test_files, dest_folder=paths["test_dir"])
    notes |= set(test_notes)
    print("Generated test data\n\n\n")

    # TODO: fix this mess later
    # create vocab here itself since doing so later might give problems while playing test data
    pitch_names = sorted(notes)
    # create a dictionary to map pitches to integers
    note_to_int = dict(
        (note, number) for number, note in enumerate(pitch_names))
    int_to_note = dict(
        (number, note) for number, note in enumerate(pitch_names))
    with open(paths["metadata_dir"] / "note_to_int.pkl", "wb") as f:
        pickle.dump(note_to_int, f)
    with open(paths["metadata_dir"] / "int_to_note.pkl", "wb") as f:
        pickle.dump(int_to_note, f)

    print("\nFinished Pre-processing.\n\n\n")
Example #13
0
 def __init__(self, which_set):
     conf = utils.get_config()
     paths = utils.get_paths()
     region_size = conf['region_size']
     self.h5file = tables.open_file(paths[which_set])
     node = self.h5file.root.Data
     axes = ('b', 0, 1, 'c')
     channels = node.X.shape[1] / (region_size * region_size)
     view_converter = dense_design_matrix.DefaultViewConverter(
         (region_size, region_size, channels), axes)
     super(BCDR, self).__init__(
         X=node.X, view_converter=view_converter, y=node.y)
Example #14
0
 def __init__(self, which_set):
     conf = utils.get_config()
     paths = utils.get_paths()
     region_size = conf['region_size']
     h5file = tables.open_file(paths[which_set])
     node = h5file.root.Data
     X = node.X.read()
     channels = node.X.shape[1] / (region_size * region_size)
     X = X.reshape(
         (X.shape[0], conf['region_size'], conf['region_size'], channels))
     y = node.y.read()
     h5file.close()
     super(BCDR_On_Memory, self).__init__(topo_view=X, y=y)
Example #15
0
    def __init__(self, options):
        """
    __init__(self):

    Instantiate class variables
    """

        # TODO reseach decorators for logging and configs
        if not options:
            options = config.get_options('default', {})
        self.options = options
        self.logger = logger.get_logger()
        self.paths = utils.get_paths()
        self.git_path = self.paths['repo_root']
Example #16
0
  def __init__(self, options):
    """
    __init__(self):

    Instantiate class variables
    """

    # TODO reseach decorators for logging and configs
    if not options:
      options = config.get_options('default', {})
    self.options = options
    self.logger = logger.get_logger()
    self.paths = utils.get_paths()
    self.git_path = self.paths['repo_root']
Example #17
0
def show_demo():
    print("请求方式为:", request.method)
    entity = request.args.to_dict().get('entity', '')
    # mention = request.args.to_dict().get('mention','')
    if entity == '':
        return render_template('demo.html')
    elif "source:" in entity:
        print(entity)
        line = entity.split(':')
        params = line[1].split(",")
        contents = advogato_data_KG_source('source', params[0], int(params[1]))
        print(contents)
        return render_template('index.html', contents=contents)
    elif "target:" in entity:
        print(entity)
        line = entity.split(':')
        contents = advogato_data_KG_target('target', line[1])
        print(contents)
        return render_template('index.html', contents=contents)
    elif "mention:" in entity:
        print(entity)
        line = entity.strip().split(":")
        mention = line[1]
        contents = mention2entity(mention)
        print(contents)
        return render_template('index.html', contents=contents)
    elif "attr:" in entity:
        print(entity)
        line = entity.strip().split(":")
        user = line[1]
        contents = get_page_rank(user)
        print(contents)
        return render_template('index.html', contents=contents)
    elif "path:" in entity:
        print(entity)
        line = entity.strip().split(":")
        entities = line[1].split(",")
        source = entities[0]
        target = entities[1]
        cutoff = entities[2]
        print(source, target, cutoff)
        contents = get_paths(source, target, cutoff)
        return render_template('index.html', contents=contents)
    else:
        contents = KG_View(entity)
        print(contents)
        return render_template('index.html', contents=contents[0])
Example #18
0
def main():
    in_z = 0

    test_volpath = os.path.join(args.datapath, 'test')
    out_file = os.path.join(args.datapath, 'fakes')
    checkpoint_path = os.path.join(args.datapath, 'checkpoint.pth')
    test_segpath = test_volpath
    double_vol = False
    model = models.Net23(2)
    cuda = args.cuda
    if cuda:
        model.cuda()

    model.load_state_dict(torch.load(checkpoint_path))

    batch_size = args.batch_size
    orig_dim = 256
    sqr = transforms.Square()
    center = transforms.CenterCrop2(224)
    scale = transforms.Scale(orig_dim)
    transform_plan = [sqr, scale, center]
    num_labels = 2
    series_names = ['Mag']
    seg_series_names = ['AV']

    f = preprocess.gen_filepaths(test_segpath)

    mult_inds = []
    for i in f:
        if 'volume' in i:
            mult_inds.append(int(re.findall('\d+', i)[0]))

    mult_inds = sorted(mult_inds)

    mult_inds = np.unique(mult_inds)
    mult_inds = mult_inds[0:5]

    volpaths, segpaths = utils.get_paths(mult_inds, f, series_names, \
            seg_series_names, test_volpath, test_segpath)

    out = utils.test_net_cheap(mult_inds, in_z, model,\
            transform_plan, orig_dim, batch_size, out_file, num_labels,\
            volpaths, segpaths, nrrd=True, vol_only=double_vol,\
            get_dice=True, make_niis=False, cuda=cuda)
    out_csv = os.path.join(args.datapath, 'out.csv')
    out.to_csv(out_csv, index=False)
Example #19
0
def main():

    if not check_data():
        exit()
    img_paths_train, img_paths_val = get_paths()
    print('Succesfully verified data...')

    train_generator = DataGenerator2D(img_paths_train['path'],
                                      './data',
                                      batch_size=1,
                                      shuffle=True,
                                      augmentation_rate=0.5)
    val_generator = DataGenerator2D(img_paths_val['path'],
                                    './data',
                                    batch_size=1,
                                    shuffle=False,
                                    augmentation_rate=0)
    print('Loaded data generators...')

    optimizer = Adam(lr=0.0001, amsgrad=True)

    model = get_simple_model()
    print('Model Loaded')
    print(model.summary())

    model.compile(loss=depth_loss_function,
                  optimizer=optimizer,
                  metrics=['mae'])
    print('Model Compiled... Starting Training...')

    tensorboard = TensorBoard(log_dir="./logs/DenseDepth/{}".format(time()),
                              histogram_freq=1,
                              write_graph=True)
    early_stopping = EarlyStopping(monitor='val_loss', patience=10)
    filepath = "./checkpoints/" + "DenseDepth-" + "saved-model-{epoch:03d}-{val_loss:.5f}.hdf5"
    checkpoint = ModelCheckpoint(filepath,
                                 monitor='val_loss',
                                 verbose=1,
                                 save_best_only=False)
    callbacks_list = [checkpoint, tensorboard, early_stopping]

    history = model.fit_generator(train_generator,
                                  epochs=NUM_EPOCHS,
                                  shuffle=True,
                                  callbacks=callbacks_list,
                                  validation_data=val_generator)
Example #20
0
def train(
    model_type,
    path_args,
    training_args,
    model_args,
    output_args,
    load_dataset_args,
    checkpoint_args,
    extension_args,
):
    """
    ---------------------------------------------
    Input: None
    Output: None
    Run the test harness for evaluating a model
    ---------------------------------------------
    """
    pretrained = training_args.pop("pretrained")
    results_folder = training_args.pop("results_folder")

    data_format = load_dataset_args["data_format"]
    checkpoint_path = checkpoint_args["checkpoint_path"]

    paths = utils.get_paths(**path_args)
    utils.check_folders(paths, **extension_args)
    keras.backend.set_image_data_format(data_format)

    callbacks = []
    callbacks.append(utils.get_early_stopping_callback())
    callbacks.append(utils.get_tensorboard_directory_callback())
    callbacks.append(utils.get_checkpoint_callback(checkpoint_path))

    if pretrained:
        model = keras.models.load_model(checkpoint_path)

    elif model_type == "unet":
        model = unet.define_model(output_args, **model_args)

    train, val = datagen.load_dataset(paths, load_dataset_args)
    history = model.fit_generator(train,
                                  validation_data=val,
                                  callbacks=callbacks,
                                  **training_args)

    return (history, model)
def main():
    book_paths = get_paths('../Data/books')
    book2stats = {}
    for book_path in book_paths:
        stats = get_basic_stats(book_path)
        book = os.path.basename(book_path).strip('.txt')
        print(book, stats)
        book2stats[book] = stats
        with open(f'top_20_{book}.txt', 'w') as f:
            f.write("\n".join(stats['top_20_tokens']))

    stats2book_with_highest_value = {
        "num_sents": max(book2stats, key=lambda book: book2stats[book]["num_sents"]),
        "num_tokens": max(book2stats, key=lambda book: book2stats[book]["num_tokens"]),
        "vocab_size": max(book2stats, key=lambda book: book2stats[book]["vocab_size"]),
        "num_chapters_or_acts": max(book2stats, key=lambda book: book2stats[book]["num_chapters_or_acts"]),
    }
    print(stats2book_with_highest_value)
Example #22
0
def main():
    conf_file = sys.argv[1] if len(sys.argv) > 1 else None
    conf = utils.get_config(conf_file)
    paths = utils.get_paths(conf)
    cnn_layer = 'cnn_layer_%i' % (conf['cnn_layers'])

    with open(paths[cnn_layer]['yaml']) as f:
        src = f.read()

    # Get batch size from validation set to report roc_auc from a single batch
    ds = datasets.BCDR('valid')
    monitoring_batch_size = ds.y.shape[0]
    ds.h5file.close()
    train_ds_class = 'BCDR_On_Memory' if conf['train_on_memory'] else 'BCDR'
    valid_ds_class = 'BCDR_On_Memory' if conf['valid_on_memory'] else 'BCDR'

    params = utils.flatten(conf)

    params.update({
        'train_ds_class': train_ds_class,
        'valid_ds_class': valid_ds_class,
        'monitoring_batch_size': monitoring_batch_size,
        'save_path': paths[cnn_layer]['model'],
        'best_path': paths[cnn_layer]['best_model']
    })
    train = yaml_parse.load(src % params)
    if os.path.isfile(train.save_path):
        print '%s already exists, skipping...' % (train.save_path)
    else:
        if conf['load_pretrained']:
            print 'Setting pretrained filters...'
            ae = serial.load(paths['sae']['model'])

            W = ae.get_weights().T.reshape(
                train.model.layers[0].transformer._filters_shape)
            train.model.layers[0].transformer._filters.set_value(W)

        train.main_loop()
    print 'Done!'
Example #23
0
def main():
    conf_file = sys.argv[1] if len(sys.argv) > 1 else None
    conf = utils.get_config(conf_file)
    paths = utils.get_paths(conf)
    cnn_layer = 'cnn_layer_%i' % (conf['cnn_layers'])

    with open(paths[cnn_layer]['yaml']) as f:
        src = f.read()

    # Get batch size from validation set to report roc_auc from a single batch
    ds = datasets.BCDR('valid')
    monitoring_batch_size = ds.y.shape[0]
    ds.h5file.close()
    train_ds_class = 'BCDR_On_Memory' if conf['train_on_memory'] else 'BCDR'
    valid_ds_class = 'BCDR_On_Memory' if conf['valid_on_memory'] else 'BCDR'

    params = utils.flatten(conf)

    params.update({
        'train_ds_class': train_ds_class,
        'valid_ds_class': valid_ds_class,
        'monitoring_batch_size': monitoring_batch_size,
        'save_path': paths[cnn_layer]['model'],
        'best_path': paths[cnn_layer]['best_model']
    })
    train = yaml_parse.load(src % params)
    if os.path.isfile(train.save_path):
        print '%s already exists, skipping...' % (train.save_path)
    else:
        if conf['load_pretrained']:
            print 'Setting pretrained filters...'
            ae = serial.load(paths['sae']['model'])

            W = ae.get_weights().T.reshape(
                train.model.layers[0].transformer._filters_shape)
            train.model.layers[0].transformer._filters.set_value(W)

        train.main_loop()
    print 'Done!'
Example #24
0
    def __init__(self, sess, bot_url):

        self.bot_url = bot_url

        model_path, config_path, vocab_path = get_paths('bot/reddit')

        with open(config_path) as f:
            saved_args = pickle.load(f)

        with open(vocab_path) as f:
            self.chars, self.vocab = pickle.load(f)

        net = Model(saved_args, True)

        saver = tf.train.Saver(net.save_variables_list())
        saver.restore(sess, model_path)

        self.sess = sess
        self.net = net
        self.g = tf.get_default_graph()

        self.chat_rooms = {}
Example #25
0
    def __init__(self, cli_options={}):
        """
    __init__(self):

    Instantiate class variables
    """

        # TODO Update message with git repo name
        # Script must be ran within the git repo
        assert utils.verify_git_repo(), 'Must be ran within the git repo'

        self.logger = logger.get_logger()
        self.paths = utils.get_paths()
        self.git_path = self.paths['repo_root']
        self.gitignore = self.read_gitignore()

        # Get options from config
        self.shelf = 'default'
        if 'shelf' in cli_options:
            self.shelf = cli_options['shelf']
        if 'objects' in cli_options:
            self.shelf = utils.get_shelf_directory(cli_options['objects'])
        self.options = config.get_options(self.shelf, cli_options)
Example #26
0
  def __init__(self, cli_options={}):
    """
    __init__(self):

    Instantiate class variables
    """

    # TODO Update message with git repo name
    # Script must be ran within the git repo
    assert utils.verify_git_repo(), 'Must be ran within the git repo'

    self.logger = logger.get_logger()
    self.paths = utils.get_paths()
    self.git_path = self.paths['repo_root']
    self.gitignore = self.read_gitignore()

    # Get options from config
    self.shelf = 'default'
    if 'shelf' in cli_options:
      self.shelf = cli_options['shelf']
    if 'objects' in cli_options:
      self.shelf = utils.get_shelf_directory(cli_options['objects'])
    self.options = config.get_options(self.shelf, cli_options)
Example #27
0
    def __init__(self, which_set, train_dataset=None):
        conf = utils.get_config()
        paths = utils.get_paths()
        region_size = conf['region_size']
        h5file = tables.open_file(paths[which_set])
        node = h5file.root.Data
        X = node.X.read()
        num_channels = node.X.shape[1] / (region_size * region_size)
        im_shape = (conf['region_size'], conf['region_size'])
        X = X.reshape((X.shape[0],) + im_shape + (num_channels,))
        y = node.y.read()
        self.feats = node.feats.read()
        h5file.close()
        if train_dataset is None:
            self.feats_mean = self.feats.mean(axis=0)
            self.feats_std = self.feats.std(axis=0)
            self.feats = (self.feats - self.feats_mean) / self.feats_std
        else:
            feats_mean = train_dataset.feats_mean
            feats_std = train_dataset.feats_std
            self.feats = (self.feats - feats_mean) / feats_std

        self.y = self.feats
        source = ('features', 'targets0', 'targets1')
        conv_space = Conv2DSpace(
            shape=im_shape, num_channels=num_channels, axes=('b', 0, 1, 'c'))
        target_space = VectorSpace(y.shape[1])
        shape_space = VectorSpace(self.feats.shape[1])

        space = CompositeSpace([conv_space, target_space, shape_space])

        data = (X.astype(theano.config.floatX),
                y.astype(theano.config.floatX),
                self.feats.astype(theano.config.floatX),)

        super(BCDRComposite, self).__init__(
            data=data, data_specs=(space, source))
Example #28
0
transform_plan = [sqr, scale, center]
series_names = ['echo']
seg_series_names = ['echo']

model = models.Net23(num_labels)
model.cuda()

model.load_state_dict(torch.load(checkpoint_path))

f_s = preprocess.gen_filepaths(test_segpath)
f_v = preprocess.gen_filepaths(test_volpath)

mult_inds = []
for i in f_s:
    if 'segmentation' in i:
        mult_inds.append(int(re.findall('\d+', i)[0]))

mult_inds = sorted(mult_inds)

mult_inds = np.unique(mult_inds)

volpaths, segpaths = utils.get_paths(mult_inds, f_s, f_v, series_names, \
        seg_series_names, test_volpath, test_segpath)

t_transform_plan = transform_plan

utils.test_net_cheap(test_volpath, test_segpath, mult_inds, 0, model,\
        t_transform_plan, original_size, batch_size, out_file, num_labels,\
        num_labels, volpaths, segpaths, nrrd=True,\
        vol_only=False, get_dice=False, make_niis=True)
Example #29
0
# For unbalanced dataset we create a weighted sampler
#   * Balanced class sampling: https://discuss.pytorch.org/t/balanced-sampling-between-classes-with-torchvision-dataloader/2703/3
weights = utils.make_weights_for_balanced_classes(dataset_train.imgs,
                                                  len(dataset_train.classes))
weights = torch.DoubleTensor(weights)
sampler = torch.utils.data.sampler.WeightedRandomSampler(weights, len(weights))
train_loader = torch.utils.data.DataLoader(dataset_train,
                                           batch_size=TRAIN_BATCH_SIZE,
                                           sampler=sampler,
                                           drop_last=True)
num_class = len(train_loader.dataset.classes)
print('Number of Training Classes: %d' % num_class)

pairs = utils.read_pairs(PAIR_TEXT_PATH)
path_list, issame_list = utils.get_paths(VAL_PATH, pairs, FILE_EXT)
val_loader = torch.utils.data.DataLoader(data_loader.LFWDataset(
    path_list, issame_list, val_transform),
                                         batch_size=VAL_BATCH_SIZE,
                                         shuffle=False)

#======= Model & Optimizer =======#
if MODEL_NAME.lower() == 'resnet18':
    model = torchvision.models.resnet18(pretrained=True)
elif MODEL_NAME.lower() == 'resnet34':
    model = torchvision.models.resnet34(pretrained=True)
elif MODEL_NAME.lower() == 'resnet50':
    model = torchvision.models.resnet50(pretrained=True)
elif MODEL_NAME.lower() == 'resnet101':
    model = torchvision.models.resnet101(pretrained=True)
elif MODEL_NAME.lower() == 'resnet152':
def load_models(device,
                base_folder='./models/BAM/',
                specific="bowling_alley", 
                seed=0, 
                module="layer3",
                experiment="sgd_finetuned",
                ratio="0.5",
                adv=False,
                baseline=False,
                epoch=None,
                post=False,
                multiple=True,
                leakage=False,
                tcav=False,
                force=False,
                dataset='bam',
                args=None,
                ignore_net=False):
    '''
    if dataset == 'coco' and adv:
        class DummyArgs:
            num_object = 79
            finetune=False
            layer='generated_image'
            autoencoder_finetune=True
            finetune=True
        model = balanced_models.ObjectMultiLabelAdv(DummyArgs(), 79, 300, True, 1)
        ok    = torch.load('model_best.pth.tar', encoding='bytes')
        state_dict = {key.decode("utf-8"):ok[b'state_dict'][key] for key in ok[b'state_dict']}
        model.load_state_dict(state_dict)
        model.to(device)
        model.eval()
    '''
    if leakage:
        assert post
    if epoch is not None:
        epoch = "_" + str(epoch)
    else:
        epoch = ""
    if len(args.custom_end) > 0:
        args.custom_end = "_" + str(args.custom_end)
    if baseline:
        model_end = "resnet_base_"+str(ratio)+epoch+'.pt'
        if not post:
            n2v_end   = "resnet_n2v_base_"+str(ratio)+epoch+'.pt'
        else:
            n2v_end   = "resnet_n2v_base_after_"+str(ratio)+epoch+'.pt'
    else:
        if not adv:
            model_end = "resnet_debias_"+str(ratio)+epoch+'.pt'
            if not post:
                n2v_end   = "resnet_n2v_debias_"+str(ratio)+epoch+'.pt'
            else:
                n2v_end   = "resnet_n2v_debias_after_"+str(ratio)+epoch+str(args.custom_end)+'.pt'
        else:
            model_end = "resnet_adv_"+str(ratio)+'.pt'
            if not post:
                n2v_end   = "resnet_n2v_adv_"+str(ratio)+'.pt'
            else:
                n2v_end   = "resnet_n2v_adv_after_"+str(ratio)+epoch+'.pt'
    if dataset != 'bam' and dataset != 'coco':
        model_end = model_end.replace('_'+str(ratio), '')
        n2v_end   = n2v_end.replace('_'+str(ratio), '')
    if dataset == 'bam' or dataset == 'coco':
        model_path, n2v_path = utils.get_paths(
                base_folder,
                seed,
                specific,
                model_end=model_end,
                n2v_end='leakage/' + n2v_end.replace('n2v','mlp') if leakage else n2v_end,
                n2v_module=module,
                experiment=experiment,
                with_n2v=True,
        )
    else:
        model_path = os.path.join(base_folder, str(seed), experiment, module, model_end)
        n2v_path = os.path.join(base_folder, str(seed), experiment, module, 'leakage/' + n2v_end.replace('n2v','mlp') if leakage else n2v_end)
    if dataset == 'bam':
        trainloader, _ = dataload.get_data_loader_SceneBAM(seed=seed,ratio=float(ratio), specific=specific)
        _, testloader = dataload.get_data_loader_SceneBAM(seed=seed,ratio=float(0.5), specific=specific)
    elif dataset == 'coco':
        tmp_args = copy.deepcopy(args)
        tmp_args.ratio = ratio
        if int(ratio) > 0:
            tmp_args.balanced = True
        if leakage:
            tmp_args.gender_balanced = True
        trainloader, testloader = coco_dataload.get_data_loader_coco(
            tmp_args
        )
    else:
        trainloader,testloader = dataload.get_data_loader_idenProf('idenprof',train_shuffle=True,
                                                                   train_batch_size=64,
                                                                   test_batch_size=64,
                                                                   exclusive=True)
    if not (dataset == 'coco' and adv):
        assert os.path.exists(model_path), model_path
    if post:
        # since we have to run a separate script, might not have finished...
        if not leakage:
            model_extra = '_adv' if adv else ('_base' if baseline else '_debias')
            n2v_extra   = model_extra + '_after'
            if tcav:
                pass
            elif force:
                post_train.train_net2vec(trainloader, 
                                        testloader, 
                                        device, 
                                        seed,
                                        specific=specific,
                                        p=ratio,
                                        n_epochs=20,
                                        module=module,
                                        lr=.01,
                                        out_file=None,
                                        base_folder=base_folder,
                                        experiment1=experiment,
                                        experiment2=experiment,
                                        model_extra=model_extra,
                                        n2v_extra=n2v_extra,
                                        with_n2v=True,
                                        nonlinear=False, # might want to change this later
                                        model_custom_end=epoch.replace('_',''),
                                        n2v_custom_end=epoch.replace('_',''),
                                        multiple=multiple,
                                        dataset=dataset
                )
            else:
                raise Exception('Run trial again')
        elif leakage:
            model_extra = '_adv' if adv else ('_base' if baseline else '_debias')
            n2v_extra   = model_extra + '_after'
            if force:
                post_train.train_leakage(trainloader, 
                                        testloader, 
                                        device, 
                                        seed,
                                        specific=specific,
                                        p=ratio,
                                        n_epochs=20,
                                        module=module,
                                        lr=5e-5, # leakage model uses adam
                                        out_file=None,
                                        base_folder=base_folder,
                                        experiment1=experiment,
                                        experiment2=experiment,
                                        model_extra=model_extra,
                                        n2v_extra=n2v_extra,
                                        with_n2v=True,
                                        nonlinear=True, # MLP leakage model
                                        model_custom_end='',
                                        n2v_custom_end='',
                                        dataset=dataset
                )
            else:
                raise Exception('Run trial again')
    else:
        # should've been saved during training if not ported from tianlu
        if not (dataset == 'coco' and adv):
            assert os.path.exists(n2v_path)
    num_attributes = 10 + 9 + 20 if multiple else 12
    num_classes=10
    if dataset == 'coco':
        num_attributes = 81
        num_classes = 79
    model, net, net_forward, activation_probe = models.load_models(
        device,
        None if (dataset == 'coco' and adv) else
        lambda x,y,z: models.resnet_(
            pretrained=True, 
            custom_path=x, 
            device=y,
            initialize=z, 
            num_classes=num_classes,
            size=50 if (dataset == 'bam') or (dataset == 'coco') else 34
        ),
        model_path=model_path,
        net2vec_pretrained=True,
        net2vec_path=n2v_path,
        module='fc' if leakage else module,
        num_attributes=2 if leakage else num_attributes,
        model_init = False,
        n2v_init = False,
        nonlinear = leakage,
        ignore_net = ignore_net
    )
    print(n2v_path)
    return model, net, net_forward, activation_probe
Example #31
0
model = torch.load(model_path).cuda()

# prepare dataset
db = prepare_db(opt)
# use only the evaluation subset. use db['train'] for fetching the training subset
dataset = db['eval']

# ==================================================================================
# compute saliency maps for different inputs for one splitting node
# pick a tree index and splitting node index
# tree_idx = 0
# node_idx = 0 # 0 - 510 for the 511 splitting nodes in a tree of depth 9
# get saliency maps for a specified node for different input tensors
# utils.get_node_saliency_map(dataset, model, tree_idx, node_idx, name=opt.dataset)
# ==================================================================================

# get the computational paths for the some random inputs
sample, paths, class_pred = utils.get_paths(dataset,
                                            model,
                                            tree_idx,
                                            name=opt.dataset)

# for each sample, compute and plot the decision saliency map, which reflects how the input will influence the
# decision-making process
utils.get_path_saliency(sample,
                        paths,
                        class_pred,
                        model,
                        tree_idx,
                        name=opt.dataset)
Example #32
0
# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
import os

import utils

repo_root = utils.get_paths()['repo_root']
""" Default config options"""
default = {
    'storage_url':
    'https://example.com',
    'auth_url':
    'https://example.com/auth/',
    'object_store_container':
    'blah',
    'ignore_patterns': [
        '._*',
        '.__*',
        '.TemporaryItems*',
        '._.TemporaryItems',
        '.DS_Store',
        '*.pyc',
    ],
    'dest_sync':
    os.path.join(repo_root, 'dest_sync'),
    'checksum':
    False,
    'binary_overrides': []
}

example_shelf = {'dest_sync': repo_root}
Example #33
0
def run(type):
	if type not in [COHORT_ANALYSIS, SINGLE_TUMOR_ANALYSIS]:
		abort(400)

	if request.method == "GET":
		form = dict(
			ofm_genes_threshold=ONCODRIVEFM_GENES_THRESHOLD,
			ofm_pathways_threshold=ONCODRIVEFM_PATHWAYS_THRESHOLD,
			oclust_genes_threshold=ONCODRIVECLUST_MUTATIONS_THRESHOLD)

		return render_template("analysis.html", type=type, form=form)

	if current_app.wok.cases_count(current_user) >= current_app.config.get("LIMIT_NUM_CASES", 100):
		flash("""There is a limit on the number of simultaneous analysis that can be managed.
		You must remove finished analysis before running new ones.""", "error")
		return redirect(url_for("cases.index"))

	mutations_file = request.files['mutations_file']
	file_name = os.path.basename(mutations_file.filename)

	project_id = request.form['project_name']
	if len(project_id) == 0:
		project_id = os.path.splitext(file_name)[0]

	project_id = unique_project_id(normalize_id(project_id))

	'''
	if not current_user.validated:
		flash("""You can not run an analysis with your data until you are completely registered.
		Please check your email and follow the instructions to validate this account.""", "error")
		flash("Meanwhile you can play with the included examples.")
		return redirect(url_for("examples"))
	'''

	cb = ConfigBuilder()
	cb.add_value("user_id", current_user.nick)
	cb.add_value("workspace", DEFAULT_WORKSPACE)
	cb.add_value("project.id", project_id)

	#case_name = "-".join([current_user.nick, project_id])
	#cb.add_value("wok.instance.name", case_name)

	results_path, project_path, project_temp_path = get_paths(project_id)

	if not current_user.is_anonymous():
		cb.add_value("website.user_id", current_user.nick)

	if type == SINGLE_TUMOR_ANALYSIS: #request.form.get("variants_only") == "1":
		cb.add_value("variants_only", True)
		cb.add_value("skip_oncodrivefm", True)
		cb.add_value("skip_oncodriveclust", True)

	try:
		threshold = request.form["ofm_genes_threshold"]
		if re.match(r"^[1-9]\d*%?$", threshold):
			cb.add_value(ONCODRIVEFM_GENES_THRESHOLD_KEY, threshold)
	except:
		if type == COHORT_ANALYSIS:
			current_app.logger.warn("[{}] Wrong form input: {}={}".format(
				current_user.nick, "ofm_genes_threshold", request.form.get("ofm_genes_threshold")))

	try:
		threshold = request.form["ofm_pathways_threshold"]
		if re.match(r"^[1-9]\d*%?$", threshold):
			cb.add_value(ONCODRIVEFM_PATHWAYS_THRESHOLD_KEY, threshold)
	except:
		if type == COHORT_ANALYSIS:
			current_app.logger.warn("[{}] Wrong form input: {}={}".format(
				current_user.nick, "ofm_pathways_threshold", reuqest.form.get("ofm_pathways_threshold")))

	try:
		threshold = int(request.form["oclust_genes_threshold"])
		if threshold >= 1:
			cb.add_value(ONCODRIVECLUST_GENES_THRESHOLD_KEY, threshold)
	except:
		if type == COHORT_ANALYSIS:
			current_app.logger.warn("[{}] Wrong form input: {}={}".format(
				current_user.nick, "oclust_genes_threshold", request.form.get("oclust_genes_threshold")))

	genes_filter_enabled = request.form.get('genes_filter_enabled') == "1"
	cb.add_value(ONCODRIVEFM_FILTER_ENABLED_KEY, genes_filter_enabled)
	cb.add_value(ONCODRIVECLUST_FILTER_ENABLED_KEY, genes_filter_enabled)
	if genes_filter_enabled:
		try:
			genes_filter_file = request.files['genes_filter_file']
			genes_filter_file_path = os.path.join(project_temp_path, "genes-filter.txt")
			genes_filter_file.save(genes_filter_file_path)
			if os.path.getsize(genes_filter_file_path) != 0:
				cb.add_value(ONCODRIVEFM_GENES_FILTER_KEY, genes_filter_file_path)
				cb.add_value(ONCODRIVECLUST_GENES_FILTER_KEY, genes_filter_file_path)
		except:
			current_app.logger.exception("Error retrieving genes filter from form")

	assembly = request.form.get("assembly", DEFAULT_ASSEMBLY).lower()

	project = dict(
		id=project_id,
		assembly=assembly,
		files=[file_name])

	projects = [init_project_files(project, check_paths=False)]
	cb.add_value("projects", projects)

	properties = dict(
		analysis_type=type,
		path=os.path.relpath(project_path, results_path))

	current_app.logger.info("[{}] Starting analysis {} ...".format(current_user.nick, project_id))

	case = current_app.wok.create_case(current_user, project_id, cb, PROJECT_NAME, MUTATIONS_FLOW_NAME,
									   properties=properties, start=False)

	engine_case = current_app.wok.engine.case(case.engine_name)

	#TODO use a background thread
	upload_files(current_app.logger, case.engine_name, engine_case.storages, projects, streams=[mutations_file.stream])

	current_app.logger.info("[{}] Analysis {} started on case {}...".format(
							current_user.nick, project_id, case.engine_name))

	engine_case.start()

	return redirect(url_for("cases.index", highlight=case.id))
Example #34
0
if 'imnet' in f_list:
    rows = utils.load_csv()
    feats, y = fe_extraction.get_feats_from_imagenet(rows)
    features = np.hstack((features, feats))
    segm_ids = np.asarray([int(row['segmentation_id']) for row in rows])
if 'hcfeats' in f_list:
    rows = utils.load_csv(conf['csv_features_file'])
    feats, y = fe_extraction.get_feats_from_csv(
        rows, prefixes=['s_', 't_', 'i_'])
    feats = np.asarray(feats)
    features = np.hstack((features, feats))
    segm_ids = np.asarray([int(row['segmentation_id']) for row in rows])
if 'cnn' in f_list:
    cnn_layer = 'cnn_layer_%i' % (conf['cnn_layers'])
    paths = utils.get_paths(conf)
    model_path = paths[cnn_layer]['best_model']
    model = serial.load(model_path)
    rows = utils.load_csv()
    chunkSize = 32
    feats, y = (None, None)
    for i in range(0, len(rows), chunkSize):
        offset = min(i + chunkSize, len(rows))
        f_chunk, y_chunk = fe_extraction.get_feats_from_cnn(
            rows[i:offset], model)
        if feats is None:
            feats = f_chunk
            y = y_chunk
        else:
            feats = np.vstack((feats, f_chunk))
            y = np.hstack((y, y_chunk))
Example #35
0
# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
import os

import utils

SWIFT_OBJSTORE = "cpe.pantri.lib.fb_objectstore.FB_ObjectStore"
EXAMPLE_OBJSTORE = "cpe.pantri.lib.example.example"

repo_root = utils.get_paths()["repo_root"]
""" Default config options"""
default = {
    "ignore_patterns": [
        "._*",
        ".__*",
        ".TemporaryItems*",
        "._.TemporaryItems",
        ".DS_Store",
        "*.pyc",
    ],
    "dest_sync":
    os.path.join(repo_root, "dest_sync"),
    "checksum":
    False,
    "binary_overrides": [],
    "object_store":
    EXAMPLE_OBJSTORE,
}

example_shelf = {"dest_sync": repo_root}

example_shelf_2 = {
Example #36
0
def train(
        trainloader,
        testloader,
        device,
        seed,
        debias_=True,
        specific=None,
        ratio=0.5,  # bias ratio in dataset
        n_epochs=5,
        model_lr=1e-3,
        n2v_lr=1e-3,
        combined_n2v_lr=1e-3,  # metalearning rate for n2v
        alpha=100,  # for debias,
        beta=0.1,  # for adversarial loss
        out_file=None,
        base_folder="",
        results_folder="",
        experiment="sgd",
        momentum=0,
        module="layer4",
        finetuned=False,
        adversarial=False,
        nonlinear=False,
        subset=False,
        subset_ratio=0.1,
        save_every=False,
        model_momentum=0,
        n2v_momentum=0,
        experimental=False,
        multiple=False,
        debias_multiple=False,
        reset=False,
        reset_counter=1,
        n2v_start=False,
        experiment2=None,
        adaptive_alpha=False,
        n2v_adam=False,
        single=False,
        imagenet=False,
        train_batch_size=64,
        constant_resize=False,
        adaptive_resize=False,
        no_class=False,
        gamma=0,
        partial_projection=False,
        norm='l2',
        constant_alpha=False,
        jump_alpha=False,
        linear_alpha=False,
        mean_debias=False,
        no_limit=False,
        dataset='bam',
        parallel=False,
        gpu_ids=[],
        switch_modes=True):
    print("mu", momentum, "debias", debias_, "alpha", alpha, " | ratio:",
          ratio)

    def get_vg(W):
        if single:
            return W[-2, :]
        else:
            return W[-2, :] - W[-1, :]

    if dataset == 'bam' or dataset == 'coco':
        model_init_path, n2v_init_path = utils.get_paths(
            base_folder,
            seed,
            specific,
            model_end="resnet_init" + '.pt',
            n2v_end="resnet_n2v_init" + '.pt',
            n2v_module=module,
            experiment=experiment,
            with_n2v=False)
    else:
        model_init_path = os.path.join(base_folder, str(seed), experiment,
                                       'resnet_init.pt')
        n2v_init_path = os.path.join(base_folder, str(seed), experiment,
                                     module, 'resnet_n2v_init.pt')
    if finetuned:
        if dataset == 'bam' or dataset == 'coco':
            model_init_path = utils.get_model_path(
                base_folder,
                seed,
                specific,
                "resnet_" + str(ratio) + ".pt",
                experiment='post_train'
                if not n2v_start else experiment.split('_finetuned')[0])
        else:
            model_init_path = os.path.join(
                base_folder, str(seed), 'post_train' if not n2v_start else
                experiment.split('_finetuned')[0], 'resnet.pt')
        assert (debias_ and not adversarial) or (
            adversarial and not debias_) or (not adversarial and not debias_)
        if debias_ and n2v_start:
            ext = "_n2v_" if not nonlinear else "_mlp_"
            if dataset == 'bam' or dataset == 'coco':
                n2v_init_path = utils.get_net2vec_path(
                    base_folder,
                    seed,
                    specific,
                    module,
                    "resnet" + str(ext) + str(ratio) + ".pt",
                    experiment=experiment.split('_finetuned')[0])
            else:
                n2v_init_path = os.path.join(base_folder, str(seed),
                                             experiment.split('_finetuned')[0],
                                             module,
                                             'resnet' + ext[:-1] + '.pt')
        # if we're also doing adversarial, make sure to load the matching n2v as init...
        if adversarial:
            ext = "_n2v_" if not nonlinear else "_mlp_"
            if dataset == 'bam' or dataset == 'coco':
                n2v_init_path = utils.get_net2vec_path(base_folder,
                                                       seed,
                                                       specific,
                                                       module,
                                                       "resnet" + str(ext) +
                                                       str(ratio) + ".pt",
                                                       experiment='post_train')
            else:
                n2v_init_path = os.path.join(base_folder, str(seed),
                                             'post_train', module,
                                             'resnet' + ext[:-1] + '.pt')
    num_classes = 10
    num_attributes = 12
    if nonlinear:
        num_attributes = 2
    if multiple:
        num_attributes = 10 + 9 + 2 * 10
    if dataset == 'coco':
        num_classes = 79
        num_attributes = 81
    model, net, net_forward, activation_probe = models.load_models(
        device,
        lambda x, y, z: models.resnet_(pretrained=True,
                                       custom_path=x,
                                       device=y,
                                       initialize=z,
                                       num_classes=num_classes,
                                       size=50 if (dataset == 'bam' or dataset
                                                   == 'coco') else 34),
        model_path=model_init_path,
        net2vec_pretrained=True,
        net2vec_path=n2v_init_path,
        module=module,
        num_attributes=num_attributes,
        # we want to make sure to save the inits if not finetuned...
        model_init=True if not finetuned else False,
        n2v_init=True if not (finetuned and
                              (adversarial or
                               (debias_ and n2v_start))) else False,
        loader=trainloader,
        nonlinear=nonlinear,
        # parameters if we want to initially project probes to have a certain amount of bias
        partial_projection=partial_projection,
        t=gamma)
    print(model_init_path, n2v_init_path)
    model_n2v_combined = models.ProbedModel(model,
                                            net,
                                            module,
                                            switch_modes=switch_modes)
    if n2v_adam:
        combined_optim = torch.optim.Adam(
            [{
                'params': model_n2v_combined.model.parameters()
            }, {
                'params': model_n2v_combined.net.parameters()
            }],
            lr=n2v_lr)
        # TODO: allow for momentum training as well
        n2v_optim = torch.optim.Adam(net.parameters(), lr=n2v_lr)
    else:
        combined_optim = torch.optim.SGD(
            [{
                'params': model_n2v_combined.model.parameters()
            }, {
                'params': model_n2v_combined.net.parameters(),
                'lr': combined_n2v_lr,
                'momentum': n2v_momentum
            }],
            lr=model_lr,
            momentum=model_momentum)

        # TODO: allow for momentum training as well
        n2v_optim = torch.optim.SGD(net.parameters(),
                                    lr=n2v_lr,
                                    momentum=n2v_momentum)
    model_optim = torch.optim.SGD(model.parameters(),
                                  lr=model_lr,
                                  momentum=model_momentum)

    d_losses = []
    adv_losses = []
    n2v_train_losses = []
    n2v_accs = []
    n2v_val_losses = []
    class_train_losses = []
    class_accs = []
    class_val_losses = []
    alpha_log = []
    magnitudes = []
    magnitudes2 = []
    unreduced = []
    bias_grads = []
    loss_shapes = []
    loss_shapes2 = []

    results = {
        "debias_losses": d_losses,
        "n2v_train_losses": n2v_train_losses,
        "n2v_val_losses": n2v_val_losses,
        "n2v_accs": n2v_accs,
        "class_train_losses": class_train_losses,
        "class_val_losses": class_val_losses,
        "class_accs": class_accs,
        "adv_losses": adv_losses,
        "alphas": alpha_log,
        "magnitudes": magnitudes,
        "magnitudes2": magnitudes2,
        "unreduced": unreduced,
        "bias_grads": bias_grads,
        "loss_shapes": loss_shapes,
        "loss_shapes2": loss_shapes2
    }
    if debias_:
        results_end = str(ratio) + "_debias.pck"
    elif adversarial:
        results_end = str(ratio) + "_adv.pck"
        if nonlinear:
            results_end = str(ratio) + "_mlp_adv.pck"
    else:
        results_end = str(ratio) + "_base.pck"

    if dataset == 'bam' or dataset == 'coco':
        results_path = utils.get_net2vec_path(
            results_folder, seed, specific, module, results_end,
            experiment if experiment2 is None else experiment2)
    else:
        results_path = os.path.join(
            results_folder, str(seed),
            experiment if experiment2 is None else experiment2, module,
            results_end)
    if debias_:
        model_end = "resnet_debias_" + str(ratio) + '.pt'
        n2v_end = "resnet_n2v_debias_" + str(ratio) + '.pt'
    elif adversarial:
        if not nonlinear:
            model_end = "resnet_adv_" + str(ratio) + '.pt'
        else:
            model_end = "resnet_adv_nonlinear_" + str(ratio) + '.pt'
        if not nonlinear:
            n2v_end = "resnet_n2v_adv_" + str(ratio) + '.pt'
        else:
            n2v_end = "resnet_mlp_adv_" + str(ratio) + '.pt'
    else:
        model_end = "resnet_base_" + str(ratio) + '.pt'
        n2v_end = "resnet_n2v_base_" + str(ratio) + '.pt'

    if dataset != 'bam' and dataset != 'coco':
        model_end = model_end.replace('_' + str(ratio), '')
        n2v_end = n2v_end.replace('_' + str(ratio), '')

    if dataset == 'bam' or dataset == 'coco':
        model_path, n2v_path = utils.get_paths(
            base_folder,
            seed,
            specific,
            model_end=model_end,
            n2v_end=n2v_end,
            n2v_module=module,
            experiment=experiment if experiment2 is None else experiment2,
            with_n2v=True,
        )
    else:
        model_path = os.path.join(
            base_folder, str(seed),
            experiment if experiment2 is None else experiment2, module,
            model_end)
        n2v_path = os.path.join(
            base_folder, str(seed),
            experiment if experiment2 is None else experiment2, module,
            n2v_end)
    if hasattr(trainloader.dataset, 'idx_to_class'):
        for key in trainloader.dataset.idx_to_class:
            if specific is not None and trainloader.dataset.idx_to_class[
                    key] in specific:
                specific_idx = int(key)
            else:
                specific_idx = 0
    train_labels = None if not nonlinear else [-2, -1]
    d_last = 0
    resize = constant_resize or adaptive_resize
    if imagenet:
        imagenet_trainloaders, _ = dataload.get_imagenet_tz(
            './datasets/imagenet',
            workers=8,
            train_batch_size=train_batch_size // 8,
            resize=resize,
            constant=constant_resize)
        imagenet_trainloader = dataload.process_imagenet_loaders(
            imagenet_trainloaders)

    params = list(model_n2v_combined.parameters())[:-2]
    init_alpha = alpha
    last_e = 0

    # setup training criteria
    if dataset == 'coco':
        object_weights = torch.FloatTensor(
            trainloader.dataset.getObjectWeights())
        gender_weights = torch.FloatTensor(
            trainloader.dataset.getGenderWeights())
        all_weights = torch.cat([object_weights, gender_weights])
        probe_criterion = nn.BCEWithLogitsLoss(weight=all_weights.to(device),
                                               reduction='elementwise_mean')
        downstream_criterion = nn.BCEWithLogitsLoss(
            weight=object_weights.to(device), reduction='elementwise_mean')
    else:
        probe_criterion = None
        downstream_criterion = nn.CrossEntropyLoss()

    for e in range(n_epochs):
        # save results every epoch...
        with open(results_path, 'wb') as f:
            print("saving results", e)
            print(results_path)
            pickle.dump(results, f)

        model.eval()

        with torch.no_grad():
            n2v_acc, n2v_val_loss = utils.net2vec_accuracy(
                testloader, net_forward, device, train_labels)
            n2v_accs.append(n2v_acc)
            n2v_val_losses.append(n2v_val_loss)

            if dataset != 'coco':
                class_acc, class_val_loss = utils.classification_accuracy(
                    testloader, model, device)
                class_accs.append(class_acc)
                class_val_losses.append(class_val_loss)
            else:
                f1, mAP = utils.detection_results(testloader, model, device)
                print("Epoch", e, "| f1:", f1, "| mAP:", mAP)
                class_accs.append([f1, mAP])

        d_initial = 0
        if not adversarial:
            curr_W = net.weight.data.clone()
            if not multiple:
                vg = get_vg(curr_W).reshape(-1, 1)
                d_initial = debias.debias_loss(curr_W[:-2], vg, t=0).item()
                print("Epoch", e, "bias", str(d_initial), " | debias: ",
                      debias_)
            else:
                ds = np.zeros(10)
                for i in range(10):
                    if i == 0:
                        vg = (curr_W[10, :] - curr_W[11, :]).reshape(-1, 1)
                    else:
                        vg = (curr_W[20 + i, :] - curr_W[29 + i, :]).reshape(
                            -1, 1)
                    ds[i] = debias.debias_loss(curr_W[:10], vg, t=0).item()
                print("Epoch", e, "bias", ds, " | debias: ", debias_)
                print("Accuracies:", n2v_acc)
                d_initial = ds[0]
        else:
            print("Epoch", e, "Adversarial", n2v_accs[-1])
        if adaptive_alpha and (e == 0 or ((d_last / d_initial) >=
                                          (5 / 2**(e - 1)) or
                                          (0.8 < (d_last / d_initial) < 1.2))):
            #alpha = alpha
            old_alpha = alpha
            # we don't want to increase too much if it's already decreasing
            if (e == 0 or (d_last / d_initial) >= (5 / 2**(e - 1))):
                alpha = min(
                    alpha * 2, (15 / (2**e)) / (d_initial + 1e-10)
                )  # numerical stability just in case d_initial gets really low
                #if e > 0 and old_alpha >= alpha:
                #    alpha = old_alpha # don't update if we're decreasing...
                print("Option 1")
            if e > 0 and alpha < old_alpha:
                # we want to increase if plateaud
                alpha = max(
                    old_alpha * 1.5, alpha
                )  # numerical stability just in case d_initial gets really low
                print("Option 2")
            # don't want to go over 1000...
            if alpha > 1000:
                alpha = 1000
            d_last = d_initial
        elif not adaptive_alpha and not constant_alpha:
            if dataset == 'coco' and jump_alpha:
                if e < 2:
                    alpha = 5e3
                elif e >= 2 and e < 4:
                    alpha = 1e4
                else:
                    alpha = init_alpha
            elif jump_alpha and (e - last_e) > 2:
                if not mean_debias:
                    if alpha < 100:
                        alpha = min(alpha * 2, 100)
                        last_e = e
                    else:
                        # two jumps
                        # if (e-last_e) >= ((n_epochs - last_e) // 2):
                        #     alpha = 1000
                        # else:
                        alpha = 1000
                else:
                    if alpha < 1000:
                        alpha = min(alpha * 2, 1000)
                        last_e = e
                    else:
                        alpha = 10000
            elif linear_alpha and (e - last_e) > 2:
                if alpha < 100:
                    alpha = min(alpha * 2, 100)
                    last_e = e
                else:
                    alpha += (1000 - 100) / (n_epochs - last_e)
            elif not jump_alpha and not linear_alpha:
                if (e + 1) % 3 == 0:
                    # apply alpha schedule?
                    # alpha = min(alpha * 1.2, max(init_alpha,1000))
                    alpha = alpha * 1.5
        alpha_log.append(alpha)
        print("Current Alpha:,", alpha)
        if save_every and e % 10 == 0 and e > 0 and seed == 0 and debias_:
            torch.save(net.state_dict(),
                       n2v_path.split('.pt')[0] + '_' + str(e) + '.pt')
            torch.save(model.state_dict(),
                       model_path.split('.pt')[0] + '_' + str(e) + '.pt')
        if reset and (e + 1) % reset_counter == 0 and e > 0:
            print("resetting")
            net, net_forward, activation_probe = net2vec.create_net2vec(
                model,
                module,
                num_attributes,
                device,
                pretrained=False,
                initialize=True,
                nonlinear=nonlinear)
            n2v_optim = torch.optim.SGD(net.parameters(),
                                        lr=n2v_lr,
                                        momentum=n2v_momentum)

        model.train()
        ct = 0
        for X, y, genders in trainloader:
            ids = None
            ##### Part 1: Update the Embeddings #####
            model_optim.zero_grad()
            n2v_optim.zero_grad()
            labels = utils.merge_labels(y, genders, device)
            logits = net_forward(X.to(device), switch_modes=switch_modes)
            # Now actually update net2vec embeddings, making sure to use the same batch
            if train_labels is not None:
                if logits.shape[1] == labels.shape[1]:
                    logits = logits[:, train_labels]
                labels = labels[:, train_labels]
            shapes = []
            shapes2 = []
            if dataset == 'coco':
                prelim_loss = probe_criterion(logits, labels)
            else:
                prelim_loss, ids = utils.balanced_loss(logits,
                                                       labels,
                                                       device,
                                                       0.5,
                                                       ids=ids,
                                                       multiple=multiple,
                                                       specific=specific_idx,
                                                       shapes=shapes)
            #print("prelim_loss:", prelim_loss.item())
            prelim_loss.backward()
            # we don't want to update these parameters, just in case
            model_optim.zero_grad()
            n2v_train_losses.append(prelim_loss.item())
            n2v_optim.step()
            try:
                magnitudes.append(
                    torch.norm(net.weight.data, dim=1).data.cpu().numpy())
            except:
                pass

            ##### Part 2: Update Conv parameters for classification #####
            model_optim.zero_grad()
            n2v_optim.zero_grad()
            class_logits = model(X.to(device))
            class_loss = downstream_criterion(class_logits, y.to(device))
            class_train_losses.append(class_loss.item())

            if debias_:
                W_curr = net.weight.data
                vg = get_vg(W_curr).reshape(-1, 1)
                unreduced.append(
                    debias.debias_loss(W_curr[:-2], vg, t=0,
                                       unreduced=True).data.cpu().numpy())

            loss = class_loss
            #### Part 2a: Debias Loss
            if debias_:
                model_optim.zero_grad()
                n2v_optim.zero_grad()

                labels = utils.merge_labels(y, genders, device)
                o = net.weight.clone()
                combined_optim.zero_grad()
                with higher.innerloop_ctx(model_n2v_combined,
                                          combined_optim) as (fn2v,
                                                              diffopt_n2v):
                    models.update_probe(fn2v)
                    logits = fn2v(X.to(device))
                    if dataset == 'coco':
                        prelim_loss = probe_criterion(logits, labels)
                    else:
                        prelim_loss, ids = utils.balanced_loss(
                            logits,
                            labels,
                            device,
                            0.5,
                            ids=ids,
                            multiple=False,
                            specific=specific_idx,
                            shapes=shapes2)
                    diffopt_n2v.step(prelim_loss)
                    weights = list(fn2v.parameters())[-2]
                    vg = get_vg(weights).reshape(-1, 1)
                    d_loss = debias.debias_loss(weights[:-2],
                                                vg,
                                                t=gamma,
                                                norm=norm,
                                                mean=mean_debias)
                    # only want to save the actual bias...
                    d_losses.append(d_loss.item())
                    grad_of_grads = torch.autograd.grad(
                        alpha * d_loss,
                        list(fn2v.parameters(time=0))[:-2],
                        allow_unused=True)

                    del prelim_loss
                    del logits
                    del vg
                    del fn2v
                    del diffopt_n2v
            #### Part 2b: Adversarial Loss
            if adversarial:
                logits = net_forward(
                    None, forward=True)[:, -2:]  # just use activation probe
                labels = genders.type(torch.FloatTensor).reshape(
                    genders.shape[0], -1).to(device)
                adv_loss, _ = utils.balanced_loss(logits,
                                                  labels,
                                                  device,
                                                  0.5,
                                                  ids=ids,
                                                  stable=True)
                adv_losses.append(adv_loss.item())
                # getting too strong, let it retrain...
                if adv_loss < 2:
                    adv_loss = -beta * adv_loss
                    loss += adv_loss
            loss.backward()
            if debias_:
                # custom backward to include the bias regularization....
                max_norm_grad = -1
                param_idx = -1
                for ii in range(len(grad_of_grads)):
                    if (grad_of_grads[ii] is not None
                            and params[ii].grad is not None
                            and torch.isnan(grad_of_grads[ii]).long().sum() <
                            grad_of_grads[ii].reshape(-1).shape[0]):
                        # just in case some or nan for some reason?
                        not_nan = ~torch.isnan(grad_of_grads[ii])
                        params[ii].grad[not_nan] += grad_of_grads[ii][not_nan]
                        if grad_of_grads[ii][not_nan].norm().item(
                        ) > max_norm_grad:
                            max_norm_grad = grad_of_grads[ii][not_nan].norm(
                            ).item()
                            param_idx = ii
                bias_grads.append((param_idx, max_norm_grad))
                # undo the last step and apply a smaller alpha to prevent stability issues
                if not no_limit and ((not mean_debias and max_norm_grad > 100)
                                     or (mean_debias and max_norm_grad > 100)):
                    for ii in range(len(grad_of_grads)):
                        if (grad_of_grads[ii] is not None
                                and params[ii].grad is not None and
                                torch.isnan(grad_of_grads[ii]).long().sum() <
                                grad_of_grads[ii].reshape(-1).shape[0]):
                            # just in case some or nan for some reason?
                            not_nan = ~torch.isnan(grad_of_grads[ii])
                            params[ii].grad[not_nan] -= grad_of_grads[ii][
                                not_nan]
                            # scale accordingly
                            # params[ii].grad[not_nan] += grad_of_grads[ii][not_nan] / max_norm_grad

            loss_shapes.append(shapes)
            loss_shapes2.append(shapes2)
            model_optim.step()
            #magnitudes2.append(
            #    torch.norm(net.weight.data, dim=1).data.cpu().numpy()
            #)
            ct += 1

    # save results every epoch...
    with open(results_path, 'wb') as f:
        print("saving results", e)
        print(results_path)
        pickle.dump(results, f)
    torch.save(net.state_dict(), n2v_path)
    torch.save(model.state_dict(), model_path)
Example #37
0
import utils
from pylearn2.utils import serial
import h5py
import numpy as np
import sys

if __name__ == "__main__":
    conf_file = sys.argv[1] if len(sys.argv) > 1 else None
    conf = utils.get_config(conf_file)
    paths = utils.get_paths()
    region_size = conf['region_size']
    region_stride = conf['region_stride']

    train_rows, valid_rows, test_rows = utils.split_dataset(
        utils.get_filtered_rows(), conf['valid_percent'],
        conf['test_percent'], rng=conf['rng_seed'])

    rowsdict = {'train': train_rows, 'valid': valid_rows, 'test': test_rows}
    nsamples = {}

    prefixes = ['s_', 'i_', 't_']  # Feature names' prefixes
    for subset, subrows in rowsdict.iteritems():
        X = None
        y = []
        feats = []
        for row in subrows:
            samples = utils.get_samples_from_image(
                row, oversampling=(subset == 'train' and conf['oversampling']))
            print "%i samples to %s taken from %s" % (
                len(samples), subset, row['image_filename'])
            if len(samples) == 0:
Example #38
0
def run(type):
	if type not in [COHORT_ANALYSIS, SINGLE_TUMOR_ANALYSIS]:
		abort(400)

	if current_app.wok.cases_count(current_user) >= current_app.config.get("LIMIT_NUM_CASES", 100):
		flash("""There is a limit on the number of simultaneous analysis that can be managed.
		You must remove finished analysis before running new ones.""", "error")
		return redirect(url_for("cases.index"))

	cb = ConfigBuilder()
	cb.add_value("user_id", current_user.nick)
	cb.add_value("workspace", DEFAULT_WORKSPACE)

	if not current_user.is_anonymous():
		cb.add_value("website.user_id", current_user.nick)

	conf = get_project_conf()

	if type == COHORT_ANALYSIS:
		project_id = "cohort-example"
		mutations_path = get_examples_path(conf, "meduloblastoma_cohort_tier1.muts")
	elif type == SINGLE_TUMOR_ANALYSIS:
		project_id = "single-tumor-example"
		mutations_path = get_examples_path(conf, "pat4_crc.muts")
		cb.add_value("variants_only", True)
		cb.add_value("skip_oncodrivefm", True)
		cb.add_value("skip_oncodriveclust", True)

	project_id = unique_project_id(project_id)

	cb.add_value("project.id", project_id)

	results_path, project_path, project_temp_path = get_paths(project_id, conf=conf)

	assembly = "hg19"

	project = dict(
		id=project_id,
		assembly=assembly,
		files=[mutations_path])
	projects = [init_project_files(project)]
	cb.add_value("projects", projects)

	properties = dict(
		analysis_type=type,
		path=os.path.relpath(project_path, results_path),
		data_file=mutations_path)

	current_app.logger.info("[{}] Starting example {} ...".format(current_user.nick, project_id))

	case = current_app.wok.create_case(current_user, project_id, cb, PROJECT_NAME, MUTATIONS_FLOW_NAME,
									   properties=properties, start=False)

	engine_case = current_app.wok.engine.case(case.engine_name)

	#TODO use a background thread
	upload_files(current_app.logger, case.engine_name, engine_case.storages, projects)

	current_app.logger.info("[{}] Example {} started on case {}...".format(
								current_user.nick, project_id, case.engine_name))

	engine_case.start()

	return redirect(url_for("cases.index", highlight=case.id))
Example #39
0
def get_feature_vector(graph,
                       triple,
                       relations,
                       remove_triple=False,
                       original_positive=None,
                       centrality_indices=None,
                       rels_to_study=None):
    res = []
    s, r, t = triple
    recyprocal_removed = False
    rng = range(1, settings.MAX_CONTEXT_SIZE + 1)

    # Remove the triple itself from the graph if it's a positive example
    # and the original positive if it's a negative one
    # (and any reciprocal relations)
    if remove_triple:
        graph.remove_edge(s, t, key=r)
        try:
            graph.remove_edge(t, s, key=r)
            recyprocal_removed = True
        except NetworkXError:
            pass
    elif original_positive:
        o_s, o_r, o_t = original_positive
        graph.remove_edge(o_s, o_t, key=o_r)
        try:
            graph.remove_edge(o_t, o_s, key=o_r)
            recyprocal_removed = True
        except NetworkXError:
            pass

    ###########################################################################

    # Load the subgraphs if they are not there yet
    for i in rng:
        if (s, i) not in context_subgraphs:
            context_subgraphs[(s, i)] = ego_graph(graph, s, i)
        if (t, i) not in context_subgraphs:
            context_subgraphs[(t, i)] = ego_graph(graph, t, i)

    ###########################################################################

    # Regular subgraph features
    for i, j in product(rng, rng):
        ents_s = list(context_subgraphs[(s, i)].nodes) + [s]
        ents_t = list(context_subgraphs[(t, j)].nodes) + [t]
        res += get_intersection_feats(s,
                                      t,
                                      ents_s,
                                      ents_t,
                                      graph,
                                      True,
                                      centrality_indices=centrality_indices)

    ###########################################################################

    # Reachable entities for all relations
    for i, j in product(rng, rng):
        triples_s = [
            (s_g, r_g, t_g)
            for s_g, t_g, r_g in context_subgraphs[(s, i)].edges.data("rel")
        ]
        triples_t = [
            (s_g, r_g, t_g)
            for s_g, t_g, r_g in context_subgraphs[(t, j)].edges.data("rel")
        ]

        for rel in relations:
            ents_s = [t_g for s_g, r_g, t_g in triples_s if r_g == rel] + [s]
            ents_t = [t_g for s_g, r_g, t_g in triples_t if r_g == rel] + [t]
            res += get_intersection_feats(
                s,
                t,
                ents_s,
                ents_t,
                graph,
                centrality_indices=centrality_indices)

    ###########################################################################

    # Path-based features
    if settings.USE_PATHS:
        rels_dict = {rel: i for i, rel in enumerate(rels_to_study)}
        for i in rng:
            triples = [(s, r, t)
                       for s, t, r in context_subgraphs[(s,
                                                         i)].edges.data("rel")
                       if r in rels_to_study]
            paths = get_paths(triples, s, t, i)
            matrix = np.zeros((len(rels_to_study), ) * i)

            ### TODO ad-hoc code --- probably refactor this in the future
            for path in paths:
                if i == 1:
                    matrix[rels_dict[path[0][1]]] += 1
                elif i == 2:
                    matrix[rels_dict[path[0][1]]][rels_dict[path[1][1]]] += 1
                elif i == 3:
                    matrix[rels_dict[path[0][1]]][rels_dict[path[1][1]]][
                        rels_dict[path[2][1]]] += 1

            total_paths = np.sum(matrix)
            res.append(total_paths)
            res += matrix.flatten().tolist()

    ###########################################################################

    # Restore the deleted edges
    if remove_triple:
        graph.add_edge(s, t, rel=r, key=r)
        if recyprocal_removed:
            graph.add_edge(t, s, rel=r, key=r)
    elif original_positive:
        o_s, o_r, o_t = original_positive
        graph.add_edge(o_s, o_t, rel=o_r, key=o_r)
        if recyprocal_removed:
            graph.add_edge(o_t, o_s, rel=o_r, key=o_r)

    ###########################################################################

    # Done
    return res
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-e', '--exp_name', default='lfw_eval')
    parser.add_argument('-g', '--gpu', type=int, default=0)
    parser.add_argument('-d', '--dataset_path', 
                        default='/srv/data1/arunirc/datasets/lfw-deepfunneled')
    parser.add_argument('--fold', type=int, default=0, choices=[0,10])
    parser.add_argument('--batch_size', type=int, default=100)
    parser.add_argument('-m', '--model_path', default=None, required=True,
                        help='Path to pre-trained model')
    parser.add_argument('--model_type', default='resnet50',
                        choices=['resnet50', 'resnet101', 'resnet101-512d'])
    
    args = parser.parse_args()


    # CUDA setup
    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
    cuda = torch.cuda.is_available()
    torch.manual_seed(1337)
    if cuda:
        torch.cuda.manual_seed(1337)
        torch.backends.cudnn.enabled = True
        torch.backends.cudnn.benchmark = True # enable if all images are same size    

    if args.fold == 0:
        pairs_path = './lfw/data/pairsDevTest.txt'
    else:
        pairs_path = './lfw/data/pairs.txt'

    # -----------------------------------------------------------------------------
    # 1. Dataset
    # -----------------------------------------------------------------------------
    file_ext = 'jpg' # observe, no '.' before jpg
    num_class = 8631

    pairs = utils.read_pairs(pairs_path)
    path_list, issame_list = utils.get_paths(args.dataset_path, pairs, file_ext)

    # Define data transforms
    RGB_MEAN = [ 0.485, 0.456, 0.406 ]
    RGB_STD = [ 0.229, 0.224, 0.225 ]
    test_transform = transforms.Compose([
        transforms.Scale((250,250)),  # make 250x250
        transforms.CenterCrop(150),   # then take 150x150 center crop
        transforms.Scale((224,224)),  # resized to the network's required input size
        transforms.ToTensor(),
        transforms.Normalize(mean = RGB_MEAN,
                             std = RGB_STD),
    ])

    # Create data loader
    test_loader = torch.utils.data.DataLoader(
                        data_loader.LFWDataset(
                        path_list, issame_list, test_transform), 
                        batch_size=args.batch_size, shuffle=False )


    # -----------------------------------------------------------------------------
    # 2. Model
    # -----------------------------------------------------------------------------
    if args.model_type == 'resnet50':
        model = torchvision.models.resnet50(pretrained=False)
        model.fc = torch.nn.Linear(2048, num_class)
    elif args.model_type == 'resnet101':
        model = torchvision.models.resnet101(pretrained=False)
        model.fc = torch.nn.Linear(2048, num_class)
    elif args.model_type == 'resnet101-512d':
        model = torchvision.models.resnet101(pretrained=False)
        layers = []
        layers.append(torch.nn.Linear(2048, 512))
        layers.append(torch.nn.Linear(512, num_class))
        model.fc = torch.nn.Sequential(*layers)
    else:
        raise NotImplementedError
    
    checkpoint = torch.load(args.model_path)       

    if checkpoint['arch'] == 'DataParallel':
        # if we trained and saved our model using DataParallel
        model = torch.nn.DataParallel(model, device_ids=[0, 1, 2, 3, 4])
        model.load_state_dict(checkpoint['model_state_dict'])
        model = model.module # get network module from inside its DataParallel wrapper
    else:
        model.load_state_dict(checkpoint['model_state_dict'])

    if cuda:
        model = model.cuda()

    # Convert the trained network into a "feature extractor"
    feature_map = list(model.children())
    if args.model_type == 'resnet101-512d':
        model.eval()
        extractor = model
        extractor.fc = nn.Sequential(extractor.fc[0])
    else: 
        feature_map.pop()
        extractor = nn.Sequential(*feature_map)
    
    extractor.eval() # set to evaluation mode (fixes BatchNorm, dropout, etc.)


    # -----------------------------------------------------------------------------
    # 3. Feature extraction
    # -----------------------------------------------------------------------------
    features = []

    for batch_idx, images in tqdm.tqdm(enumerate(test_loader), 
                                        total=len(test_loader), 
                                        desc='Extracting features'): 
        x = Variable(images, volatile=True) # test-time memory conservation
        if cuda:
            x = x.cuda()
        feat = extractor(x)
        if cuda:
            feat = feat.data.cpu()
        else:
            feat = feat.data
        features.append(feat)

    features = torch.stack(features)
    sz = features.size()
    features = features.view(sz[0]*sz[1], sz[2])
    features = F.normalize(features, p=2, dim=1) # L2-normalize
    # TODO - cache features


    # -----------------------------------------------------------------------------
    # 4. Verification
    # -----------------------------------------------------------------------------
    num_feat = features.size()[0]
    feat_pair1 = features[np.arange(0,num_feat,2),:]
    feat_pair2 = features[np.arange(1,num_feat,2),:]
    feat_dist = (feat_pair1 - feat_pair2).norm(p=2, dim=1)
    feat_dist = feat_dist.numpy()

    # Eval metrics
    scores = -feat_dist
    gt = np.asarray(issame_list)
       
    if args.fold == 0:
        fig_path = osp.join(here, 
                args.exp_name + '_' + args.model_type + '_lfw_roc_devTest.png')
        roc_auc = sklearn.metrics.roc_auc_score(gt, scores)
        fpr, tpr, thresholds = sklearn.metrics.roc_curve(gt, scores)
        print 'ROC-AUC: %.04f' % roc_auc
        # Plot and save ROC curve
        fig = plt.figure()
        plt.title('ROC - lfw dev-test')
        plt.plot(fpr, tpr, lw=2, label='ROC (auc = %0.4f)' % roc_auc)
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.grid()
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.legend(loc='lower right')
        plt.tight_layout()
    else:
        # 10 fold
        fold_size = 600 # 600 pairs in each fold
        roc_auc = np.zeros(10)
        roc_eer = np.zeros(10)

        fig = plt.figure()
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.grid()
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')

        for i in tqdm.tqdm(range(10)):
            start = i * fold_size
            end = (i+1) * fold_size
            scores_fold = scores[start:end]
            gt_fold = gt[start:end]
            roc_auc[i] = sklearn.metrics.roc_auc_score(gt_fold, scores_fold)
            fpr, tpr, _ = sklearn.metrics.roc_curve(gt_fold, scores_fold)
            # EER calc: https://yangcha.github.io/EER-ROC/
            roc_eer[i] = brentq(
                            lambda x: 1. - x - interpolate.interp1d(fpr, tpr)(x), 0., 1.)
            plt.plot(fpr, tpr, alpha=0.4, 
                    lw=2, color='darkgreen',
                    label='ROC(auc=%0.4f, eer=%0.4f)' % (roc_auc[i], roc_eer[i]) )

        plt.title( 'AUC: %0.4f +/- %0.4f, EER: %0.4f +/- %0.4f' % 
                    (np.mean(roc_auc), np.std(roc_auc),
                     np.mean(roc_eer), np.std(roc_eer)) )
        plt.tight_layout()

        fig_path = osp.join(here, 
                args.exp_name + '_' + args.model_type + '_lfw_roc_10fold.png')
        

    plt.savefig(fig_path, bbox_inches='tight')
    print 'ROC curve saved at: ' + fig_path
if not os.path.isdir('results/' + results_folder_name):
    os.mkdir('results/' + results_folder_name)

for i in range(5, splits):
    print('Split {}/{}'.format(i, splits))
    """ Create readers """
    dataReaders = {}
    dataReaders['CNN'] = ImageReader(folder_name='img_patches',
                                     np_shape=(897, 897, 3),
                                     formats=['.jpeg'],
                                     patch_size=512)
    """ Get paths """
    # Dataset paths
    datasets = ['train', 'val', 'test']

    paths = get_paths(splits_folder, db_path, i, cnn=True, multitest=False)
    """ Read data """
    for key in dataReaders:
        print('Read data ({})'.format(key))
        for dataset in datasets:
            dataReaders[key].read_data(paths=paths[dataset],
                                       ohe=ohe,
                                       dataset=dataset)

    # Shuffle train set
    index_train = np.random.randint(0,
                                    len(dataReaders['CNN'].data['train']['x']),
                                    len(dataReaders['CNN'].data['train']['x']))
    dataReaders['CNN'].data['train']['x'] = dataReaders['CNN'].data['train'][
        'x'][index_train]
    dataReaders['CNN'].data['train']['y'] = dataReaders['CNN'].data['train'][
Example #42
0
# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
import os

import utils


repo_root = utils.get_paths()['repo_root']
""" Default config options"""
default = {
  'storage_url': 'https://example.com',
  'auth_url': 'https://example.com/auth/',
  'object_store_container': 'blah',
  'ignore_patterns': [
    '._*',
    '.__*',
    '.TemporaryItems*',
    '._.TemporaryItems',
    '.DS_Store',
    '*.pyc',
  ],
  'dest_sync': os.path.join(repo_root, 'dest_sync'),
  'checksum': False,
  'binary_overrides': []
}

example_shelf = {
  'dest_sync': repo_root
}

example_shelf_2 = {
  'binary_overrides': ['*.inf', '*.din']