def setUp(self): # set initial configs. db = { 'dialect': 'postgresql', 'driver': 'psycopg2', 'username': '******', 'password': '******', 'host': 'localhost', 'port': '25432', } self._schema_output = 'tst_oracle_output' # Recreate schema str_con = DataAccessLayer.str_connection(db) self.eng = create_engine(str_con) self.recreate_schema() dal.db_init(str_con, schema_output=self._schema_output) ops_path = "test/ops_y1a1.json" self.ops_desc = util.load_json(ops_path) self.workflow = nx.read_adjlist('test/operations_sequence.al', create_using=nx.DiGraph()) self.operations = None
def hist(): """ Fazer um histograma para cada fator que estamos avaliando qualidade: 2000 kbps, 24000 kbps fmt: 1x1, 3x2, 6x4 video: om_nom e rollercoaster Total: 2 x 3 x 2 = 12 histogramas :return: """ config = util.Config('Config.json') dectime = util.load_json('times.json') factors = (['om_nom', 'rollercoaster'], ['1x1', '3x2', '6x4'], [2000000, 24000000]) # for name, fmt, quality in product(*factors): # m, n = list(map(int, fmt.split('x'))) # # factors = (list(range(1, m * n + 1)), # list(range(1, config.duration + 1))) # times = [] # for tile, chunk in product(*factors): # times.append(dectime['ffmpeg'][name][fmt]['rate'][str(quality)][str(tile)][str(chunk)]['single']['times']['ut']) times = [[dectime['ffmpeg'][name][fmt]['rate'][str(quality)][str(tile)][str(chunk)]['single']['times']['ut'] for (tile, chunk) in product(list(range(1, list(map(int, fmt.split('x')))[0] * list(map(int, fmt.split('x')))[1] + 1)), list(range(1, config.duration + 1)))] for (name, fmt, quality) in product(*factors)]
def hist(): """ Fazer um histograma para cada fator que estamos avaliando qualidade: 2000 kbps, 24000 kbps fmt: 1x1, 3x2, 6x4 video: om_nom e rollercoaster Total: 2 x 3 x 2 = 12 histogramas :return: """ config = util.Config('Config.json') dectime = util.load_json('times.json') # for name, fmt, quality in product(*factors): # m, n = list(map(int, fmt.split('x'))) # # factors = (list(range(1, m * n + 1)), # list(range(1, config.duration + 1))) # times = [] # for tile, chunk in product(*factors): # times.append(dectime['ffmpeg'][name][fmt]['rate'][str(quality)][str(tile)][str(chunk)]['single']['times']['ut']) # for (tile, chunk) in # product(list(range(1, list(map(int, fmt.split('x')))[0] * list(map(int, fmt.split('x')))[1] + 1)), # list(range(1, config.duration + 1)))] # for (name, fmt, quality) in # product(*factors)] for name in config.videos_list: for fmt in config.tile_list: for quality in config.rate_list: times = [] sizes = [] for tile in range( 1, list(map(int, fmt.split('x')))[0] * list(map(int, fmt.split('x')))[1] + 1): for chunk in range(1, config.duration + 1): times.append( dectime['ffmpeg'][name][fmt]['rate'][str(quality)] [str(tile)][str(chunk)]['single']['times']['ut']) sizes.append(dectime['ffmpeg'][name][fmt]['rate'][str( quality)][str(tile)][str(chunk)]['single']['size']) plt.close() # plt.hist(times, bins=20) plt.plot(times) plt.show() # os.makedirs('hist', exist_ok=True) # plt.savefig(f'hist{sl}{name}_{fmt}_rate{quality}') print('ok')
def __init__(self, json_info_file, phase, transform=None): """ Args: json_info_file: the json file with infomation of the object-detection dataset. phase: ”train", "val" or "test". transform: optional transform to be applied on a sample. """ dataset = load_json(json_info_file) self.dataset = dataset[phase] self.transform = transform classes, class2idx = find_classes_detect(self.dataset) self.classes = classes self.class2index = class2idx
def __init__(self, json_info_file, phase, transform=None): """Initialization of instance of the class Args: json_info_file: the json file with infomation of the dataset. phase: ”train", "val" or "test". transform: optional transform to be applied on a sample. """ dataset = load_json(json_info_file) self.dataset = dataset[phase] self.transform = transform classes, class2idx = find_classes_classifier(self.dataset) self.classes = classes self.class2idx = class2idx
def setUp(self): ops_path = "test/ops_y1a1_oracle.json" self.ops_desc = util.load_json(ops_path) self.tree_desc = { "galaxy_properties": ["photoz"], "photoz": ["sg_separation"], "sg_separation": ["object_selection"], "object_selection": ["bitmask"], "bitmask": ["cuts"], "cuts": ["reduction", "zero_point"], "reduction": ["footprint"], "footprint": ["exposure_time", "mangle_map", "bad_regions"], "exposure_time": ["exposure_time_i", "exposure_time_r", "exposure_time_z"], "mangle_map": ["mangle_maps_i", "mangle_maps_r"] } self.operations = None
def eval_retrieval_json_result(self, pred_file): json_file = pred_file.replace("-.re", "-.json") result_dict = load_json(json_file) rank_all = [] for k, v in result_dict.items(): rank_all.append(v["target_rank"] + 1) if self.opt.train_mode == 'test': print("rank_all: \n", rank_all) print("\nrank_all printed \n") print("len(rank_all): ", len(rank_all)) mrr, R1, R5, R10, ave = self.eval_retrieval4validation_with_metric(rank_all) print("json_file: \n", json_file) print("R1: R5: R10: MRR: Mean: \n{} {} {} {} {}".format(R1, R5, R10, mrr, ave))
def run(): queries_folder = '/media/alberto/DATA/ExperimentalCollections/Robust04/processed/topics' documents_folder = '/media/alberto/DATA/ExperimentalCollections/Robust04/processed/corpus' stop_word_path = '/home/alberto/PycharmProjects/probabilisticir/indri_stoplist_eng.txt' gt_file = '/media/alberto/DATA/ExperimentalCollections/Robust04/processed/qrels.robust2004.txt' run_to_rerank = '/home/alberto/PycharmProjects/probabilisticir/robust.terrier.krovetz.qld.2k.run' queries, query_names = input_output.load_texts(queries_folder, stop_word_path) # documents, doc_names = input_output.load_texts(documents_folder, stop_word_path) # idf_scores, word_index = input_output.compute_idf(documents, 10, 0.5) # util.save_json(word_index, 'word_index_json') idf_scores = util.load_model('idf_scores') word_index = util.load_json('word_index_json') encoded_idf_scores = { word_index[k]: v for k, v in idf_scores.items() if k in word_index.keys() } # encoded_docs = [[word_index[w] for w in d if w in word_index.keys()] for d in documents] # encoded_queries = [[word_index[w] for w in q if w in word_index.keys()] for q in queries] # util.save_model(encoded_docs, 'encoded_docs') # util.save_model(encoded_queries, 'encoded_queries') # util.save_model(query_names, 'q_names') # util.save_model(doc_names, 'd_names') query_names = util.load_model('q_names') doc_names = util.load_model('d_names') # query_names = util.load_model('q_names') # doc_names = util.load_model('d_names') query_names = [n.split(r'.txt')[0] for n in query_names][:50] doc_names = [n.split(r'.txt')[0] for n in doc_names] encoded_queries = util.load_model('encoded_queries')[:50] encoded_docs = util.load_model('encoded_docs') evaluate_ranking(encoded_idf_scores, encoded_queries, encoded_docs, query_names, doc_names, gt_file, run_to_rerank)
def graph4(): """ Este plot compara tile a tile a taxa e o tempo de decodificação para diferentes qualidades. :return: """ config = util.Config('Config.json') dectime = util.load_json('times.json') dirname = 'graph4' os.makedirs(f'{dirname}', exist_ok=True) for fmt in config.tile_list: m, n = list(map(int, fmt.split('x'))) for tile in range(1, m * n + 1): times = util.AutoDict() sizes = util.AutoDict() times_a_ld = [] times_a_hd = [] sizes_a_ld = [] sizes_a_hd = [] times_b_ld = [] times_b_hd = [] sizes_b_ld = [] sizes_b_hd = [] # for name in config.videos_list: # for quality in config.rate_list: # t = [] # s = [] # for chunk in range(1, config.duration + 1): # t.append(dectime['ffmpeg'][name][fmt]['rate'][str(quality)][str(tile)][str(chunk)]['single']['times']['ut']) # s.append(dectime['ffmpeg'][name][fmt]['rate'][str(quality)][str(tile)][str(chunk)]['single']['size']) # times[name][str(quality)] = t # times[name][str(quality)] = s for chunk in range(1, config.duration + 1): times_a_ld.append(dectime['ffmpeg']['om_nom'][fmt]['rate'][str(2000000)][str(tile)][str(chunk)]['single']['times']['ut']) sizes_a_ld.append(dectime['ffmpeg']['om_nom'][fmt]['rate'][str(2000000)][str(tile)][str(chunk)]['single']['size']) times_a_hd.append(dectime['ffmpeg']['om_nom'][fmt]['rate'][str(24000000)][str(tile)][str(chunk)]['single']['times']['ut']) sizes_a_hd.append(dectime['ffmpeg']['om_nom'][fmt]['rate'][str(24000000)][str(tile)][str(chunk)]['single']['size']) times_b_ld.append(dectime['ffmpeg']['rollercoaster'][fmt]['rate'][str(2000000)][str(tile)][str(chunk)]['single']['times']['ut']) sizes_b_ld.append(dectime['ffmpeg']['rollercoaster'][fmt]['rate'][str(2000000)][str(tile)][str(chunk)]['single']['size']) times_b_hd.append(dectime['ffmpeg']['rollercoaster'][fmt]['rate'][str(24000000)][str(tile)][str(chunk)]['single']['times']['ut']) sizes_b_hd.append(dectime['ffmpeg']['rollercoaster'][fmt]['rate'][str(24000000)][str(tile)][str(chunk)]['single']['size']) # a = plt.Axes() plt.close() fig, ax = plt.subplots(2, 1, figsize=(10, 6), dpi=100) ax[0].hist(times_a_ld, bins=10, histtype='step', label=f'Om_non_{fmt}_rate2000000') ax[0].hist(times_a_hd, bins=10, histtype='step', label=f'Om_non_{fmt}_rate24000000') ax[0].hist(times_b_ld, bins=10, histtype='step', label=f'rollercoaster_{fmt}_rate2000000') ax[0].hist(times_b_hd, bins=10, histtype='step', label=f'rollercoaster_{fmt}_rate24000000') ax[0].legend(loc='upper left', ncol=1, bbox_to_anchor=(1.01, 1.0)) ax[0].set_title(f'Tile {tile}') ax[0].set_xlabel('Times') ax[0].set_ylabel("Occurrence") ax[1].hist(times_a_ld, bins=10, density=True, cumulative=True, histtype='step', label=f'Om_non_{fmt}_rate2000000') ax[1].hist(times_a_hd, bins=10, density=True, cumulative=True, histtype='step', label=f'Om_non_{fmt}_rate24000000') ax[1].hist(times_b_ld, bins=10, density=True, cumulative=True, histtype='step', label=f'rollercoaster_{fmt}_rate2000000') ax[1].hist(times_b_hd, bins=10, density=True, cumulative=True, histtype='step', label=f'rollercoaster_{fmt}_rate24000000') ax[1].legend(loc='upper left', ncol=1, bbox_to_anchor=(1.01, 1.0)) ax[1].set_xlabel('Times') ax[1].set_ylabel("CDF") plt.tight_layout() plt.savefig(f'{dirname}{sl}hist_{fmt}_tile{tile}') # plt.show() print(f'hist_{fmt}_tile{tile}') # plt.hist(times, bins=20) plt.close() fig, ax = plt.subplots(2, 1, figsize=(8, 6), dpi=100) ax[0].bar(np.array(range(len(times_a_ld))) - 0.3, times_a_ld, width=0.2, label=f'om_nom-{fmt}-rate{2000000}') ax[0].bar(np.array(range(len(times_a_hd))) - 0.1, times_a_hd, width=0.2, label=f'om_nom-{fmt}-rate{24000000}') ax[0].bar(np.array(range(len(times_b_ld))) + 0.1, times_b_ld, width=0.2, label=f'rollercoaster-{fmt}-rate{2000000}') ax[0].bar(np.array(range(len(times_b_hd))) + 0.3, times_b_hd, width=0.2, label=f'rollercoaster-{fmt}-rate{24000000}') ax[0].set_title(f'Tile {tile} - Atrasos') ax[0].set_ylabel("Time") ax[1].plot(sizes_a_ld, label=f'om_nom-{fmt}-rate{2000000}') ax[1].plot(sizes_a_hd, label=f'om_nom-{fmt}-rate{24000000}') ax[1].plot(sizes_b_ld, label=f'rollercoaster-{fmt}-rate{2000000}') ax[1].plot(sizes_b_hd, label=f'rollercoaster-{fmt}-rate{24000000}') ax[1].set_title(f'Tile {tile} - Taxas') ax[1].set_xlabel("Chunk") ax[1].set_ylabel("Time") ax[0].legend(loc='upper left', ncol=1, bbox_to_anchor=(1.01, 1.0)) ax[1].legend(loc='upper left', ncol=1, bbox_to_anchor=(1.01, 1.0)) plt.tight_layout() plt.savefig(f'{dirname}{sl}graph_{fmt}_tile{tile}') # plt.show() print(f'graph_{fmt}_tile{tile}')
def graph3() -> None: """ bar fmt X average_dec_time (seconds) and fmt X average_rate (Bytes) :return: None """ dirname = 'graph3' config = util.Config('config.json') dectime = util.load_json('times.json') # decoders = ['ffmpeg', 'mp4client'] factors = ['rate'] threads = ['single'] # for decoder in decoders: for name in config.videos_list: for factor in factors: for thread in threads: df = pd.DataFrame() plt.close() fig, ax = plt.subplots(2, 1, figsize=(8, 5)) quality_list = getattr(config, f'{factor}_list') offset = 0 for quality in quality_list: average_size = [] std_size = [] average_time = [] std_time = [] width = 0.8 / len(quality_list) start_position = (0.8 - width) / 2 for fmt in config.tile_list: m, n = list(map(int, fmt.split('x'))) size = [] time = [] for tile in range(1, m * n + 1): for chunk in range(1, config.duration + 1): size.append(dectime['ffmpeg'][name][fmt][factor][str(quality)][str(tile)][str(chunk)][thread]['size']) time.append(dectime['ffmpeg'][name][fmt][factor][str(quality)][str(tile)][str(chunk)][thread]['times']['ut']) average_size.append(np.average(size)) std_size.append(np.std(size)) average_time.append(np.average(time)) std_time.append(np.std(time)) x = np.array(range(1, len(average_time) + 1)) - start_position + offset offset += width ax[0].bar(x, average_time, width=width, yerr=std_time, label=f'rate_total={quality}') ax[1].bar(x, average_size, width=width, yerr=std_size, label=f'rate_total={quality}') df[f'times_{name}_{quality}'] = average_time ax[0].set_xticklabels(config.tile_list) ax[0].set_xticks(np.array(range(1, len(config.tile_list) + 1))) ax[1].set_xticklabels(config.tile_list) ax[1].set_xticks(np.array(range(1, len(config.tile_list) + 1))) ax[0].set_xlabel('Tile') ax[1].set_xlabel('Tile') ax[0].set_ylabel('Average Time') ax[1].set_ylabel('Average Rate') ax[0].set_title(f'{name} - Times by tiles, {factor}') ax[1].set_title(f'{name} - Rates by tiles, {factor}') ax[0].set_ylim(bottom=0) ax[1].set_ylim(bottom=0) ax[0].legend(loc='upper left', ncol=1, bbox_to_anchor=(1.01, 1.0)) ax[1].legend(loc='upper left', ncol=1, bbox_to_anchor=(1.01, 1.0)) plt.tight_layout() os.makedirs(dirname, exist_ok=True) print(f'Salvando {dirname}{sl}{name}_{factor}.') fig.savefig(f'{dirname}{sl}{name}_{factor}') # plt.show() 1
from utils.db import dal, DataAccessLayer from utils import util import networkx as nx from model.query_builder import QueryBuilder import settings if __name__ == "__main__": db = settings.DATABASES[settings.DATABASE] dal.db_init(DataAccessLayer.str_connection(db), schema_output=settings.SCHEMA_OUTPUT) op_description = util.load_json(settings.OPS_DESCRIPTION_FILE) G = nx.read_adjlist(settings.OPS_SEQUENCE_FILE, create_using=nx.DiGraph()) builder = QueryBuilder(op_description, G) operations = builder.get_operations() for k, v in operations.items(): print(k) print(str(v)) # print(v.access_data_table()) print(v.columns_name()) print(v.number_of_rows())
def retrieval(self, pred_file): self.model.eval() with torch.no_grad(): if self.opt.train_mode == 'test': if self.codebase_vec is None: print("os.path.exists(self.opt.codebase_vec_path): ", os.path.exists(self.opt.codebase_vec_path)) print("self.opt.get_codebase_vec_from_scratch: ", self.opt.get_codebase_vec_from_scratch) if not os.path.exists( self.opt.codebase_vec_path) or self.opt.get_codebase_vec_from_scratch or self.opt.use_val_as_codebase: print("run self.codebase2vec()") self.codebase2vec() else: print("loading:\n", self.opt.codebase_vec_path) self.codebase_vec = np.load(self.opt.codebase_vec_path) codebase_index_json = load_json(self.opt.dataset_index_and_codebase_vec_index_path) self.dataset_index2codebase_vec_index_new = codebase_index_json[ "dataset_index2codebase_vec_index_new"] self.codebase_vec_index2dataset_index_new = codebase_index_json[ "codebase_vec_index2dataset_index_new"] else: self.codebase2vec() comment_vec_list = [] data_iter = iter(self.query_dataloader) iteration = 0 cnt_display = 0 print("len(self.query_dataset): ", len(self.query_dataset)) print("self.opt.batch_size: ", self.opt.batch_size) print("len(self.query_dataloader): ", len(self.query_dataloader)) comment_vec_index2dataset_index_list = [] while iteration < len(self.query_dataloader): batch = data_iter.next() comment_batch, comment_target_batch, comment_length, comment_index_batch = batch comment_vec_index2dataset_index_list.extend(comment_index_batch) if self.opt.gpus: comment_batch, comment_target_batch, comment_length \ = map(lambda x: x.cuda(), [comment_batch, comment_target_batch, comment_length]) cnt_display += comment_batch.size()[0] comment_feat = self.model.comment_encoder(comment_batch, comment_target_batch, comment_length) comment_vec_list.append(comment_feat.detach().cpu().numpy()) iteration += 1 comment_vec = np.concatenate(comment_vec_list) comment_vec = self.normalize(comment_vec) evaluation_result = {} for _idx in range(comment_vec.shape[0]): dataset_index = comment_vec_index2dataset_index_list[_idx] print("_idx:{} dataset_index:{} ".format(_idx, dataset_index)) one_query_vec = comment_vec[_idx].reshape(1, comment_vec.shape[1]) one_query2codebase_sims = self.dot_np(one_query_vec, self.codebase_vec)[0] negsims = np.negative(one_query2codebase_sims) if self.opt.use_val_as_codebase: index_sort_negsims = np.argsort(negsims) target_rank = \ np.where(index_sort_negsims == self.dataset_index2codebase_vec_index_new[dataset_index])[0][0] evaluation_result[dataset_index] = {"target_rank": target_rank} if self.opt.train_mode == 'test': print("self.opt.batch_size: ", self.opt.batch_size) print("len(evaluation_result): ", len(evaluation_result)) print("comment_vec.shape: ", comment_vec.shape) print("len(dataset_index2codebase_vec_index_new): ", len(self.dataset_index2codebase_vec_index_new)) self.dump_preds_json(evaluation_result, pred_file)
'database': 'query_builder', }, 'y1a1': { 'dialect': 'postgresql', 'driver': 'psycopg2', 'username': '******', 'password': '******', 'host': 'localhost', 'port': '25432', }, 'oracle': { 'dialect': 'oracle', 'username': '******', 'password': '******', 'host': 'localhost', 'port': '49161', 'database': 'xe', }, 'dessci': { 'dialect': 'oracle', 'username': '******', 'password': '******', 'host': 'leovip148.ncsa.uiuc.edu', 'port': '1521', 'database': 'dessci', } } # global_variables G_PARAMS = util.load_json(GLOBAL_PARAMETERS)
def graph1() -> None: """ chunks X dec_time (seconds) and chunks X file_size (Bytes) :return: """ dirname = 'graph1' config = util.Config('config.json') dectime = util.load_json('times.json') # decoders = ['ffmpeg', 'mp4client'] factors = ['rate'] threads = ['single'] # for decoder in decoders: for name in config.videos_list: for factor in factors: for quality in getattr(config, f'{factor}_list'): quality = np.array(quality) for thread in threads: for fmt in config.tile_list: m, n = list(map(int, fmt.split('x'))) plt.close() fig, ax = plt.subplots(1, 2, figsize=(18, 6)) for tile in range(1, m * n + 1): size = [] time_ffmpeg = [] # time_mp4client = [] for chunk in range(1, config.duration + 1): size.append(dectime['ffmpeg'][name][fmt] [factor][str(quality)][str(tile)][ str(chunk)][thread]['size']) time_ffmpeg.append( dectime['ffmpeg'][name][fmt][factor][str( quality)][str(tile)][str( chunk)][thread]['times']['ut']) # time_mp4client.append(dectime['mp4client'][name][fmt][factor][str(quality)][str(tile)][str(chunk)][thread]['times']) ax[0].plot(time_ffmpeg, label=f'ffmpeg_tile={tile}_ffmpeg') # ax[0][1].plot(time_mp4client, label=f'tile={tile}') ax[1].plot(size, label=f'tile={tile}') # ax[1][1].plot(time_ffmpeg, label=f'ffmpeg_tile={tile}_ffmpeg') # ax[1][1].plot(time_mp4client, label=f'mp4client_tile={tile}_mp4client') quality_ind = quality if factor in 'rate': quality_ind = int(quality / (m * n)) ax[0].set_xlabel('Chunks') # ax[0][1].set_xlabel('Chunks') ax[1].set_xlabel('Chunks') # ax[1][1].set_xlabel('Chunks') ax[0].set_ylabel('Time') # ax[0][1].set_ylabel('Time') # ax[1][1].set_ylabel('Time') ax[1].set_ylabel('Rate') ax[0].set_title( f'ffmpeg - {name} - Times by chunks, tile={fmt}, {factor}={quality_ind}' ) # ax[0][1].set_title(f'mp4client {name} - Times by chunks, tile={fmt}, {factor}={quality_ind}') ax[1].set_title( f'{name} - Rates by chunks, tile={fmt}, {factor}={quality_ind}' ) # ax[1][1].set_title(f'mp4client x ffmpeg - {name} - Times by chunks, tile={fmt}, {factor}={quality_ind}') # ax[0].set_ylim(bottom=0) # ax[1].set_ylim(bottom=0) ax[1].set_ylim(bottom=0) # ax[1][1].set_ylim(bottom=0) # ax[0][1].legend(loc='upper left', ncol=2, bbox_to_anchor=(1.01, 1.0)) ax[1].legend(loc='upper left', ncol=2, bbox_to_anchor=(1.01, 1.0)) plt.tight_layout() # plt.() os.makedirs(dirname, exist_ok=True) print( f'Salvando {dirname}{sl}{name}_{fmt}_{factor}={quality_ind}.' ) fig.savefig( f'{dirname}{sl}{name}_{fmt}_{factor}={quality_ind}' ) # fig.show() 1