Exemple #1
0
def read_partitioned_json(file_path: str,
                          filter_function=lambda _: True) -> pd.DataFrame:
    data_source = Dataset(base_path=file_path,
                          extension="json",
                          filter_function=filter_function,
                          loader_function=_json_loader_function,
                          ignore_partitions=False)
    return data_source.to_pandas()
Exemple #2
0
def dl_maker(x, y, conf):
    batch_size = conf.batch_size
    xt, xv, yt, yv = split(x, y)
    xt, xv, yt, yv = map(torch.tensor, (xt, xv, yt, yv))

    train_ds, valid_ds = Dataset(xt.unsqueeze(1).float(), yt.float()), Dataset(
        xv.unsqueeze(1).float(), yv.float())

    if conf.one_batch:
        train_dl = DataLoader(train_ds, len(train_ds))
        valid_dl = DataLoader(valid_ds, len(valid_ds))

    else:
        train_dl = DataLoader(train_ds, batch_size)
        valid_dl = DataLoader(valid_ds, batch_size)

    return train_dl, valid_dl
Exemple #3
0
    def __init__(self, args):
        # Set the folder to save the records and checkpoints
        log_base_dir = './logs/'
        if not osp.exists(log_base_dir):
            os.mkdir(log_base_dir)
        meta_base_dir = osp.join(log_base_dir, 'meta')
        if not osp.exists(meta_base_dir):
            os.mkdir(meta_base_dir)
        save_path1 = '_'.join([args.dataset, args.model_type, 'maml'])
        save_path2 = ('shot' + str(args.shot) + '_way' + str(args.way) +
                      '_query' + str(args.train_query) + '_lr' +
                      str(args.meta_lr) + '_batch' + str(args.num_batch) +
                      '_maxepoch' + str(args.max_epoch) + '_baselr' +
                      str(args.base_lr) + '_updatestep' +
                      str(args.update_step) + '_' + args.meta_label)
        args.save_path = meta_base_dir + '/' + save_path1 + '_' + save_path2
        ensure_path(args.save_path)

        self.args = args
        self.trainset = Dataset('train', self.args, train_aug=True)
        self.train_sampler = CategoriesSampler(
            self.trainset.label, self.args.num_batch, self.args.way,
            self.args.shot + self.args.train_query)
        #self.train_loader = DataLoader(dataset=self.trainset, batch_sampler=self.train_sampler, num_workers=8, pin_memory=True)
        self.train_loader = None
        self.valset = Dataset('val', self.args)
        self.val_sampler = CategoriesSampler(
            self.valset.label, self.args.val_batch, self.args.way,
            self.args.shot + self.args.val_query)
        #self.val_loader = DataLoader(dataset=self.valset, batch_sampler=self.val_sampler, num_workers=8, pin_memory=True)
        self.val_loader = None
        self.model = MetaLearner(self.args).to(self.args.device)
        ##self.model.encoder.load_state_dict(torch.load(self.args.pre_load_path))
        self.model = torch.nn.DataParallel(self.model)
        print(self.model)
        self.optimizer = optim.SGD(self.model.parameters(),
                                   lr=self.args.meta_lr,
                                   momentum=0.9,
                                   weight_decay=args.weight_decay)  # or adam
'''
Load the training dataset: 
- Reads annotations from csv file 
- Sends each traning video to Visual Attention Module
- Appends each stshi sequence outputted to training inputs.
- Appends class and context labels as HOT encoded vectors to training outputs
'''
#define phase - training/testing
phase = "training"
#define size of temporal window 
duration = 40
#define length of stshi sequence
maxlen = 35

print("SETTING PHASE TO : ", phase, "\n\n")
ds = Dataset(phase, duration, maxlen)
print("\nDataset loaded.")

'''
Inputs to Memory Module:
X - Array of stshi sequences 
Y - Array of (1-HOT context vector, HOT-encoded class vector)
'''

#Training the memory module
hidden_dim = 64

X_all_stshiseq = ds.X_all_stshiseq
Y_all = ds.Y_all

input_dimensions = np.shape(X_all_stshiseq)
Exemple #5
0
output_rtn = []
if not no_rpn_flag:
    rpn_network = RPN(x_static_segmentation_rpn, x_objects[2])
    output_rpn = rpn_network.make_graph()
    resampled_output_rpn = tf_resample_hyps(output_rpn, float(RPN_RESOLUTION[0]/FLN_RESOLUTION[0]), float(RPN_RESOLUTION[1]/FLN_RESOLUTION[1]))
    rtn_network = RTN(x_static_segmentation, x_objects[2], x_imgs[2], x_egos, resampled_output_rpn)
    output_rtn = rtn_network.make_graph()

fln_network = FLN(x_imgs, x_semantics, x_egos, x_objects, output_rtn)
output_fln = fln_network.make_graph()

# Load the model snapshot
optimistic_restore(session, model_path)

# Load the input dataset
dataset = Dataset(data_path, dataset_name)

nll_sum = 0
fde_sum = 0
iou_sum = 0
counter = 0
# Run the test for each sequence for each scene
for scene_index in range(len(dataset.scenes)):
    scene = dataset.scenes[scene_index]
    scene_name = scene.scene_path.split('/')[-1]
    print('---------------- Scene %s ---------------------' % scene_name)
    if write_output_flag:
        result_scene_path = os.path.join(output_folder, dataset_name, scene_name)
        os.makedirs(result_scene_path, exist_ok=True)
    for i in range(len(scene.sequences)):
        testing_sequence = scene.sequences[i]
Exemple #6
0
    def eval(self):
        """The function for the meta-eval phase."""
        # Load the logs
        with open(osp.join(self.args.save_path, 'trlog.json'), 'r') as f:
            trlog = yaml.load(f)

        # Load meta-test set
        test_set = Dataset('test', self.args, train_aug=False)
        sampler = CategoriesSampler(test_set.label, self.args.test_batch,
                                    self.args.way,
                                    self.args.shot + self.args.val_query)
        loader = DataLoader(test_set,
                            batch_sampler=sampler,
                            num_workers=self.args.num_work,
                            pin_memory=True)
        test_data = self.inf_get(loader)

        # Load model for meta-test phase
        if self.args.eval_weights is not None:
            self.model.load_state_dict(
                torch.load(self.args.eval_weights)['params'])
        else:
            self.model.load_state_dict(
                torch.load(osp.join(self.args.save_path,
                                    'max_acc' + '.pth'))['params'])
        # Set model to eval mode
        #self.model.eval()    ################################ ????????? ################################################################

        # Set accuracy averager
        ave_acc = Averager()
        acc_log = []

        # Generate labels
        label_shot = torch.arange(self.args.way).repeat(self.args.shot).to(
            self.args.device).type(torch.long)
        label_query = torch.arange(self.args.way).repeat(
            self.args.train_query).to(self.args.device).type(torch.long)

        for i in tqdm.tqdm(range(self.args.test_batch //
                                 self.args.meta_batch)):
            data_list = []
            label_shot_list = []
            for _ in range(self.args.meta_batch):
                data_list.append(test_data.__next__().to(self.args.device))
                label_shot_list.append(label_shot)
            pass
            data_list = torch.stack(data_list, dim=0)
            label_shot_list = torch.stack(label_shot_list, dim=0)
            out = self.model(data_list, label_shot_list).detach()
            for inner_id in range(self.args.meta_batch):
                cur_acc = count_acc(out[inner_id], label_query)
                acc_log.append(cur_acc)
                ave_acc.add(cur_acc)
            pass
        pass

        acc_np = np.array(acc_log, dtype=np.float)
        m, pm = compute_confidence_interval(acc_np)

        trlog['test_acc'] = [m, pm]
        cur_test_save_name = 'trlog_test_' + str(self.args.index) + '.json'
        with open(osp.join(self.args.save_path, cur_test_save_name), 'w') as f:
            json.dump(trlog, f)
        print('Val Best Epoch {}, Acc {:.4f}, Test Acc {:.4f}'.format(
            trlog['max_acc_epoch'], trlog['max_acc'], ave_acc.item()))
        print('Test Acc {:.4f} + {:.4f}'.format(m, pm))
data_path = DATASET_PATH[dataset_name]

session = create_session()

x_objects = tf.placeholder(tf.float32, shape=(3, 1, 1, 5, 1))
x_imgs = tf.placeholder(tf.float32, shape=(3, 1, 3, height, width))

# Build the network graph
network = EWTA_MDF(x_imgs, x_objects)
output = network.make_graph()

# Load the model snapshot
optimistic_restore(session, model_path)

# Load the input dataset
dataset = Dataset(data_path)

nll_sum = 0
semd_sum = 0
counter = 0
# Run the test for each sequence for each scene
for scene_index in range(len(dataset.scenes)):
    scene = dataset.scenes[scene_index]
    scene_name = scene.scene_path.split('/')[-1]
    print('---------------- Scene %s ---------------------' % scene_name)
    if write_output_flag:
        result_scene_path = os.path.join(output_folder, dataset_name,
                                         scene_name)
        os.makedirs(result_scene_path, exist_ok=True)
    for i in range(len(scene.sequences)):
        testing_sequence = scene.sequences[i]
Exemple #8
0
from dataset_loader import Dataset
from document_preprocessor import DocumentPreprocessor
from document_vectors import StatsKeeper

'''
Little simple showcase app showing the differences in searches
using "matching score" and "cosine similarity"
'''

dataset = Dataset()
print("DATASET LOADED !")

documentPreprocessor = DocumentPreprocessor(remove_apostrophes=True,
                                            remove_punctuation=True,
                                            remove_single_characters=True,
                                            remove_stop_words=True,
                                            stemming=True,
                                            number_converting=True,
                                            lower_case=True)
statsKeeper = StatsKeeper()

for path, (title, text) in dataset.texts.items():
    preprocessed = documentPreprocessor.preprocess_document(path=path, text=text, title=title)
    statsKeeper.load_document(title, preprocessed.title, preprocessed.text)
print("DATASET PREPARED FOR COMPILATION !")
statsKeeper.compile()
print("DATASET COMPILED !")

while 1 :
    print("\nType \"__exit__\" if you want to leave.")
    query = input("What are you searching for ? : ")
Exemple #9
0
    rpn_network = RPN(x_static_segmentation_rpn, x_object)
    output_rpn = rpn_network.make_graph()
    resampled_output_rpn = tf_resample_hyps(
        output_rpn, float(RPN_RESOLUTION[0] / EPN_RESOLUTION[0]),
        float(RPN_RESOLUTION[1] / EPN_RESOLUTION[1]))
    rtn_network = RTN(x_static_segmentation, x_object, x_img, x_egos,
                      resampled_output_rpn)
    output_rtn = rtn_network.make_graph()
epn_network = EPN(x_img, x_semantic, x_egos, x_object, output_rtn)
output_epn = epn_network.make_graph()

# Load the model snapshot
optimistic_restore(session, model_path)

# Load the input dataset
dataset = Dataset(data_path, dataset_name, type='EPN')

nll_sum = 0
fde_sum = 0
iou_sum = 0
counter = 0
# Run the test for each sequence for each scene
for scene_index in range(len(dataset.scenes)):
    scene = dataset.scenes[scene_index]
    scene_name = scene.scene_path.split('/')[-1]
    print('---------------- Scene %s ---------------------' % scene_name)
    if write_output_flag:
        result_scene_path = os.path.join(output_folder, dataset_name,
                                         scene_name)
        os.makedirs(result_scene_path, exist_ok=True)
    for i in range(len(scene.sequences)):