def configure_logging(): """ Configure the loggers for Talos. Sets up the Talos loggers and discord.py loggers separately, so they can be easily configured independently. """ fh = logging.FileHandler(utils.log_folder / "dtalos.log") dfh = logging.FileHandler(utils.log_folder / "dpy.log") sh = logging.StreamHandler(sys.stderr) gh = None try: import google.cloud.logging as glog client = glog.Client() gh = client.get_default_handler() gh.name = "dtalos" gh.setLevel(logging.WARNING) except (ImportError, OSError): pass ff = logging.Formatter("%(levelname)s:%(name)s:%(message)s") dlog = logging.getLogger("discord") utils.configure_logger(log, handlers=[fh, sh, gh], formatter=ff, level=logging.INFO, propagate=False) utils.configure_logger(dlog, handlers=[dfh, sh], formatter=ff, level=logging.INFO, propagate=False)
def run(): utils.configure_logger('debug') logger = utils.get_logger() input_directory = sys.argv[1] # '../datasets/wiki-filtered' out_directory = sys.argv[2] # '../datasets/wiki-senses' files = os.listdir(input_directory) files = [os.path.join(input_directory, f) for f in files] logger.info('total number of files: %d' % len(files)) create_sense_dataset(files, out_directory) logger.info('done')
def run(): utils.configure_logger('debug') logger = utils.get_logger() input_directory = sys.argv[1] out_directory = sys.argv[2] num_of_fold = int(sys.argv[3]) files = os.listdir(input_directory) files = [os.path.join(input_directory, f) for f in files] logger.info('total number of files: %d' % len(files)) create_IMS_formatted_dataset(files, out_directory, k=num_of_fold, num_of_process=30) logger.info('done')
def run(): import argparse parser = argparse.ArgumentParser() parser.add_argument('--filename', required=False, default='wikipages.txt') parser.add_argument('--num-process', help="Number of process for parallel processing", required=False, default=1, type=int) parser.add_argument('--log-level', required=False, default="info") args = parser.parse_args() configure_logger(args.log_level) logger = utils.get_logger() logger.info("Input file: {}".format(args)) directory = '../datasets/wiki/' try: os.mkdir(directory) except OSError: logger.debug("{} is already exist".format(directory)) extract_from_file(args.filename, args.num_process)
def process(statsfile, k, optfile=None): stats = utils.load_pickle(statsfile) track_ar = average_rank_per_track(stats) clique_ar = average_rank_per_clique(stats) ma_p = mean_average_precision(stats) #k_p = average_precision(stats, k, ver=True) k_p = average_precision_at_k(stats, k) # Set up logger logger = utils.configure_logger() # print results logger.info("Number of queries: %d" % len(stats)) logger.info("Average Rank per Track: %.3f" % track_ar) logger.info("Average Rank per Clique: %.3f" % clique_ar) logger.info("Mean Average Precision: %.2f %%" % (ma_p * 100)) logger.info("Precision at %d: %.2f %%" % (k, k_p * 100)) if optfile is not None: stats2 = utils.load_pickle(optfile) #plot_rank_histograms(stats, stats2, test=False) plot_precision_at_k_histograms(stats, stats2, K=[1,3,5,10], test=False) else: plot_rank_histogram(stats)
# local stuff import hdf5_getters as GETTERS import dan_tools import time import utils import scipy.cluster.vq as vq import pylab as plt from transforms import load_transform import analyze_stats as anst # params, for ICMR paper: 75 and 1.96 WIN = 75 PATCH_LEN = WIN*12 # Set up logger logger = utils.configure_logger() # Global models lda = None pca = None def compute_codes_orig_it(track_ids, maindir, clique_ids, start_idx, end_idx): """Computes the original features, based on Thierry and Ellis, 2012. Dimensionality reduction using PCA of 50, 100, and 200 components.""" res = [] trainedpca = utils.load_pickle("models/pca_250Kexamples_900dim_nocovers.pkl") pca_components = [50,100,200] # Init codes codes = [] for n_comp in pca_components: