def test(self, model_path=''): print('Test model') try: print('*** Load pre-trained model ' + model_path + ' ***') self.model = load_checkpoint(self.model, model_path) except ValueError as e: print('Error while loading the model.', e) print('\nTest all') # acc = np.mean(self.accuracies) # acc = self.accuracies graphs = self.data[GRAPH] labels = self.labels self.run_test(graphs, labels) print('\nTest on train graphs') graphs = load_pickle(os.path.join(self.odir, 'train')) labels = load_pickle(os.path.join(self.odir, 'train_labels')) self.run_test(graphs, labels) print('\nTest on test graphs') graphs = load_pickle(os.path.join(self.odir, 'test')) labels = load_pickle(os.path.join(self.odir, 'test_labels')) self.run_test(graphs, labels)
def test(self, model_path=''): print('[app][test] Test model') try: print('*** [app][test] Load pre-trained model ' + model_path + ' ***') self.model = load_checkpoint(self.model, model_path, self.is_cuda) except ValueError as e: print('[app][test] Error while loading the model.', e) self.save_traintest() # print('\n[app][test] Test all') # # acc = np.mean(self.accuracies) # # acc = self.accuracies # graphs = self.data[GRAPH] # labels = self.labels # self.run_test(graphs, labels) graphs = load_pickle(os.path.join(self.odir, 'train')) labels = load_pickle(os.path.join(self.odir, 'train_labels')) print('\n[app][test] Test on train graphs ({})'.format(len(labels)), os.path.join(self.odir, 'train')) self.run_test_fold(graphs, labels, fold=300) graphs = load_pickle(os.path.join(self.odir, 'test')) labels = load_pickle(os.path.join(self.odir, 'test_labels')) print('\n[app][test] Test on test graphs ({})'.format(len(labels)), os.path.join(self.odir, 'test')) self.run_test_fold(graphs, labels, fold=150)
def load_and_merge(self, train_df, test_df, start): train_feat = utils.load_pickle(self.train_path) test_feat = utils.load_pickle(self.test_path) # カラム数によってconcat方法かえる(高速化のため) if train_df.shape[1] > 100: train_df = utils.fast_concat(train_df, train_feat) test_df = utils.fast_concat(test_df, test_feat) else: train_df = pd.concat([train_df, train_feat], axis=1) test_df = pd.concat([test_df, test_feat], axis=1) logging.info("load complete ... {:.1f}s".format(time.time() - start)) return train_df, test_df
def extract_feature_groups(corpus, narration_feature_dirs=None): group_indices = { 'i3d': (0, 1024), 'resnet': (1024, 3072), 'audio': (3072, 3200), } n_instances = len(corpus) grouped = defaultdict(dict) last_task = None task_feats = None for idx in range(n_instances): instance = corpus._get_by_index(idx) video_name = instance['video_name'] features = instance['features'] for group, (start, end) in group_indices.items(): grouped[group][video_name] = features[:, start:end] if narration_feature_dirs is not None: task = instance['task_name'] if last_task != task: task_data = [ load_pickle( os.path.join(dir, 'crosstask_narr_{}.pkl'.format(task))) for dir in narration_feature_dirs ] task_feats = { datum['video']: datum['narration'] for data in task_data for datum in data } grouped['narration'][video_name] = task_feats[video_name] last_task = task return grouped
def extract_pairs(input_mask, input_mynorm, output_path, threshold, std_threshold, background) -> None: mask = load_pickle(input_mask) print_("Loaded mask.") data = DataProcessor(mask=mask) data.load_mynorm(mynorm_path=input_mynorm) print_("Loaded myNorm.") data.check_pairs() print_("Pairs checked.") data.set_statistics() print_("Stats calculated.") distance_frame = data.create_record() print_("Distance calculated.") print_(f"Background: {background}") if not background: distance_frame = DataProcessor.select(distance_frame=distance_frame, threshold=threshold) distance_frame = DataProcessor.var_filtering( distance_frame=distance_frame) export_to_csv(distance_frame, path=output_path) print_("Done")
def __init__(self, args): self.path = args.path + args.dataset self.method = args.method self.max_item_seq_length = args.max_item_seq_length self.load_embedding_flag = True if args.load_embedding_flag == 1 else False # load user_lists_dct, list_items_train, valid, test ======================== self.user_lists_dct = utils.load_pickle(self.path + '.user_lists.ided.pkl') #(self.list_items_dct_no_valid,self.list_items_dct,self.validNegativesDict, # self.testNegativesDict) = self.get_pickle_train_valid_test(self.path+'.list_items.train_valid_test.pkl') (self.list_items_dct, self.list_items_wv_dct, self.validNegativesDict, self.testNegativesDict) = self.get_pickle_train_valid_test( self.path + '.list_items.train_valid_test.pkl') ## change =================== ##self.validNegativesDict = self.testNegativesDict ##to-be commented out self.list_items_dct_train = self.list_items_dct self.list_items_dct = self.list_items_wv_dct # =========================== self.list_user_dct = self.get_list_user_dct( self.user_lists_dct ) #user-embedding can be obtained using this(many-to-one) self.num_user, self.num_list, self.num_item = self.get_user_list_item_count( self.user_lists_dct, self.list_items_dct) self.list_user_vec = self.get_list_user_vec( self.list_user_dct, self.num_list ) #user-embedding can be obtained using this(many-to-one) # train valid test array-lists and sequence matrix ========================== self.trainArrTriplets = self.get_arraylist_from_train_dict( self.list_items_dct) self.validArrDubles = self.get_arraylist_from_valid_dict( self.validNegativesDict) self.testArrDubles = self.get_arraylist_from_valid_dict( self.testNegativesDict) #=== self.train_matrix = self.get_train_matrix_sp(self.list_items_dct, self.num_list, self.num_item) self.train_matrix_item_seq = self.get_dct_mat_seq_remove_test( dct=self.list_items_dct, num_row=self.num_list, num_col=self.max_item_seq_length + 1, padding_value=0 ) ##last_index :] for all, :-1] for remove test item, :-2] for removing test and valid self.train_matrix_item_seq_for_test = self.get_dct_mat_seq_for_test( dct=self.list_items_dct, num_row=self.num_list, num_col=self.max_item_seq_length, padding_value=0 ) ##last_index :] for all, :-1] for remove test item, :-2] for removing test and valid
def try_to_recover(self): try: data = load_pickle(self.tmp_file_name) if data: print('Recovered') self.processed_sites = data for row in self.processed_sites: print('Checked {}: {}'.format( row['donor'], ('success' if row['success'] else 'fail'))) next(self.acceptors) self.processed_donors.append(row['donor']) except Exception as e: pass return False
edge_df.to_pickle(f"../data/edge_angle/{molecule}.pkl") if __name__ == "__main__": with utils.timer("make_feature_per_molecule"): for mode in ["train", "test"]: meta_df = pd.read_pickle(f"../pickle/{mode}.pkl").set_index("id") molecules = meta_df["molecule_name"].unique().tolist() st_df = pd.read_pickle("../pickle/structures.pkl") ## train or validのstructureに絞る st_df = st_df[st_df.molecule_name.isin(molecules)]\ [["molecule_name","atom_index","atom","x","y","z"]] # 分子単位に処理 st_gr = st_df.groupby("molecule_name") st_dict = {} for molecule in tqdm(molecules): st_dict[molecule] = st_gr.get_group(molecule) all_file_num = len(molecules) with Pool(4) as p: res = p.map(make_per_molecule, molecules) with utils.timer("concatenate_molecules_feature"): for mode in ["train", "test"]: meta_df = pd.read_pickle(f"../pickle/{mode}.pkl").set_index("id") molecules = meta_df["molecule_name"].unique().tolist() df_list = [] for molecule in tqdm(molecules): df_list.append( utils.load_pickle(f"../data/edge_angle/{molecule}.pkl")) all_df = pd.concat(df_list).reset_index(drop=True) utils.save_pickle(all_df, f"../pickle/{mode}_edge_angle.pkl")
force_save=force_save) utils.save_pickle_safe(path_optimization + 'cv_results_time_series.pkl', cv_results_time_series, force_save=force_save) utils.save_pickle_safe(path_optimization + 'model_infos.pkl', model_infos, force_save=force_save) # Create manually the best_params #dict[int]['best_params_'] #best_params = {'alpha': 0.2,'copy_X_train': True, 'kernel': None, 'n_restarts_optimizer': 0, 'normalize_y': True, # 'optimizer': 'fmin_l_bfgs_b','random_state': None} #best_params_time_series = {i : {'best_params':best_params} for i in np.arange(len(time_series))} best_params_time_series = utils.load_pickle(path_optimization + 'best_params_time_series.pkl') # Loop fit and predict preds = [] for idx, i in enumerate( tqdm(time_series, desc='Loop fit and predict over time series (using low memory)')): # Fit estimator = utils_regressor.fit_multioutput_regressor_uniseries_model( X_train, y_list_train[idx], estimator_choice, best_params_time_series[idx]['best_params_'], n_jobs=max(n_jobs, n_jobs_cv)) # Predict X = utils_regressor.scaler_transform(X, scaler_X)
def create_dataset(name='market1501', part='trainval', **kwargs): assert name in ['market1501', 'cuhk03', 'duke', 'combined'], \ "Unsupported Dataset {}".format(name) assert part in ['trainval', 'train', 'val', 'test'], \ "Unsupported Dataset Part {}".format(part) ######################################## # Specify Directory and Partition File # ######################################## if name == 'market1501': im_dir = ospeu('/scratch/group/atlas_prid/market1501/images') partition_file = ospeu( '/scratch/group/atlas_prid/market1501/partitions.pkl') ################## # Create Dataset # ################## # Use standard Market1501 CMC settings for all datasets here. cmc_kwargs = dict(separate_camera_set=False, single_gallery_shot=False, first_match_break=True) partitions = load_pickle(partition_file) im_names = partitions['{}_im_names'.format(part)] if part == 'trainval': ids2labels = partitions['trainval_ids2labels'] ret_set = TrainSet(im_dir=im_dir, im_names=im_names, ids2labels=ids2labels, **kwargs) elif part == 'val': marks = partitions['val_marks'] kwargs.update(cmc_kwargs) ret_set = TestSet(im_dir=im_dir, im_names=im_names, marks=marks, **kwargs) if part in ['trainval', 'train']: num_ids = len(ids2labels) elif part in ['val', 'test']: ids = [parse_im_name(n, 'id') for n in im_names] num_ids = len(list(set(ids))) num_query = np.sum(np.array(marks) == 0) num_gallery = np.sum(np.array(marks) == 1) num_multi_query = np.sum(np.array(marks) == 2) # Print dataset information print('-' * 40) print('{} {} set'.format(name, part)) print('-' * 40) print('NO. Images: {}'.format(len(im_names))) print('NO. IDs: {}'.format(num_ids)) try: print('NO. Query Images: {}'.format(num_query)) print('NO. Gallery Images: {}'.format(num_gallery)) print('NO. Multi-query Images: {}'.format(num_multi_query)) except: pass print('-' * 40) return ret_set
def read_data(cls, input_file): return load_pickle(input_file)
from utils.utils import load_pickle if __name__ == '__main__': data = load_pickle('find_comments.pkl') processed_links = data['processed_links'] links_to_process = data['links_to_process'] links_with_comments_form = data['links_with_comments_form'] processed_domains = data['processed_domains'] with open('processed_links.txt', 'w') as file: for link in data['processed_links']: file.write("{}\n".format(link)) with open('links_to_process.txt', 'w') as file: for link in data['links_to_process']: file.write("{}\n".format(link)) with open('links_with_comments_form.txt', 'w') as file: for link in data['links_with_comments_form']: file.write("{}\n".format(link)) with open('processed_domains.txt', 'w') as file: for link in data['processed_domains']: file.write("{}\n".format(link))
def get_tokenizer(): if not os.path.isfile(TOKENIZER_PATH): gen_tokenizer(TOKENIZER_PATH) tokenizer = load_pickle(TOKENIZER_PATH) return tokenizer
def get_pickle_train_valid_test(self, fname): return utils.load_pickle(fname)
def create_dataset(name='market1501', part='trainval', **kwargs): assert name in ['market1501', 'cuhk03', 'duke', 'combined'], \ "Unsupported Dataset {}".format(name) assert part in ['trainval', 'train', 'val', 'test'], \ "Unsupported Dataset Part {}".format(part) ######################################## # Specify Directory and Partition File # ######################################## if name == 'market1501': im_dir = ospeu('/home/mbaharan/shfs/TeCSAR/Datasets/market1501/images') partition_file = ospeu( '/home/mbaharan/shfs/TeCSAR/Datasets/market1501/partitions.pkl') elif name == 'cuhk03': im_type = ['detected', 'labeled'][0] im_dir = ospeu( ospj('/home/mbaharan/shfs/TeCSAR/Datasets/CUHK03', im_type, 'images')) partition_file = ospeu( ospj('/home/mbaharan/shfs/TeCSAR/Datasets/CUHK03', im_type, 'partitions.pkl')) elif name == 'duke': im_dir = ospeu( '/home/mbaharan/shfs/TeCSAR/Datasets/DukeMTMC-reID/images') partition_file = ospeu( '/home/mbaharan/shfs/TeCSAR/Datasets/DukeMTMC-reID/partitions.pkl') elif name == 'combined': assert part in ['trainval'], \ "Only trainval part of the combined dataset is available now." im_dir = ospeu( '/home/mbaharan/shfs/TeCSAR/Datasets/MixDataset_Duke_Market_CUHK03/trainval_images' ) partition_file = ospeu( '/home/mbaharan/shfs/TeCSAR/Datasets/MixDataset_Duke_Market_CUHK03/partitions.pkl' ) ################## # Create Dataset # ################## # Use standard Market1501 CMC settings for all datasets here. cmc_kwargs = dict(separate_camera_set=False, single_gallery_shot=False, first_match_break=True) partitions = load_pickle(partition_file) im_names = partitions['{}_im_names'.format(part)] if part == 'trainval': ids2labels = partitions['trainval_ids2labels'] ret_set = TrainSet(im_dir=im_dir, im_names=im_names, ids2labels=ids2labels, **kwargs) elif part == 'train': ids2labels = partitions['train_ids2labels'] ret_set = TrainSet(im_dir=im_dir, im_names=im_names, ids2labels=ids2labels, **kwargs) elif part == 'val': marks = partitions['val_marks'] kwargs.update(cmc_kwargs) ret_set = TestSet(im_dir=im_dir, im_names=im_names, marks=marks, **kwargs) elif part == 'test': marks = partitions['test_marks'] kwargs.update(cmc_kwargs) ret_set = TestSet(im_dir=im_dir, im_names=im_names, marks=marks, **kwargs) if part in ['trainval', 'train']: num_ids = len(ids2labels) elif part in ['val', 'test']: ids = [parse_im_name(n, 'id') for n in im_names] num_ids = len(list(set(ids))) num_query = np.sum(np.array(marks) == 0) num_gallery = np.sum(np.array(marks) == 1) num_multi_query = np.sum(np.array(marks) == 2) # Print dataset information print('-' * 40) print('{} {} set'.format(name, part)) print('-' * 40) print('NO. Images: {}'.format(len(im_names))) print('NO. IDs: {}'.format(num_ids)) try: print('NO. Query Images: {}'.format(num_query)) print('NO. Gallery Images: {}'.format(num_gallery)) print('NO. Multi-query Images: {}'.format(num_multi_query)) except: pass print('-' * 40) return ret_set
"Saved Data, links_with_comments_form: {}, processed_links: {}, links_to_process: {}, processed_domains: {}" .format(len(links_with_comments_form), len(processed_links), len(links_to_process), len(processed_domains))) save_pickle(tmp_file, stage) with open('domains.txt', 'w') as file: for link in processed_domains: file.write("{}\n".format(link)) if __name__ == '__main__': links_to_process += start_pages grequest_stack = [] try: data = load_pickle(tmp_file) processed_links = data['processed_links'] links_to_process = data['links_to_process'] links_with_comments_form = data['links_with_comments_form'] processed_domains = data['processed_domains'] except Exception: pass while len(links_to_process): grequests_links() results = grequests.map(grequest_stack, exception_handler=exception_handler, size=20) save_data()
from utils.utils import load_pickle import torch labels = torch.load('/media/tunguyen/Devs/Security/HAN_sec/data/adnew_iapi/pickle/labels') labels_txt = load_pickle('/media/tunguyen/Devs/Security/HAN_sec/data/adnew_iapi/pickle/labels_txt') labels_new = labels.clone() for i,label in enumerate(labels): print(i, label) if label == 0: # current malware, change malware to 1 labels_new[i] = 1 if label == 1: # current benign, change benign to 0 labels_new[i] = 0 torch.save(labels_new, '/media/tunguyen/Devs/Security/HAN_sec/data/adnew_iapi/pickle/labels_')