Esempio n. 1
0
def configure_logging():
    """
        Configure the loggers for Talos. Sets up the Talos loggers
        and discord.py loggers separately, so they can be easily configured
        independently.
    """
    fh = logging.FileHandler(utils.log_folder / "dtalos.log")
    dfh = logging.FileHandler(utils.log_folder / "dpy.log")
    sh = logging.StreamHandler(sys.stderr)
    gh = None
    try:
        import google.cloud.logging as glog
        client = glog.Client()
        gh = client.get_default_handler()
        gh.name = "dtalos"
        gh.setLevel(logging.WARNING)
    except (ImportError, OSError):
        pass

    ff = logging.Formatter("%(levelname)s:%(name)s:%(message)s")

    dlog = logging.getLogger("discord")

    utils.configure_logger(log, handlers=[fh, sh, gh], formatter=ff, level=logging.INFO, propagate=False)
    utils.configure_logger(dlog, handlers=[dfh, sh], formatter=ff, level=logging.INFO, propagate=False)
Esempio n. 2
0
def run():
    utils.configure_logger('debug')
    logger = utils.get_logger()
    input_directory = sys.argv[1]  # '../datasets/wiki-filtered'
    out_directory = sys.argv[2]  # '../datasets/wiki-senses'
    files = os.listdir(input_directory)
    files = [os.path.join(input_directory, f) for f in files]
    logger.info('total number of files: %d' % len(files))
    create_sense_dataset(files, out_directory)
    logger.info('done')
def run():
    utils.configure_logger('debug')
    logger = utils.get_logger()
    input_directory = sys.argv[1]
    out_directory = sys.argv[2]
    num_of_fold = int(sys.argv[3])
    files = os.listdir(input_directory)
    files = [os.path.join(input_directory, f) for f in files]
    logger.info('total number of files: %d' % len(files))
    create_IMS_formatted_dataset(files, out_directory, k=num_of_fold, num_of_process=30)
    logger.info('done')
Esempio n. 4
0
def run():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--filename', required=False, default='wikipages.txt')
    parser.add_argument('--num-process', help="Number of process for parallel processing", required=False, default=1,
                        type=int)
    parser.add_argument('--log-level', required=False, default="info")
    args = parser.parse_args()

    configure_logger(args.log_level)
    logger = utils.get_logger()

    logger.info("Input file: {}".format(args))

    directory = '../datasets/wiki/'
    try:
        os.mkdir(directory)
    except OSError:
        logger.debug("{} is already exist".format(directory))

    extract_from_file(args.filename, args.num_process)
def process(statsfile, k, optfile=None):
    stats = utils.load_pickle(statsfile)
    track_ar = average_rank_per_track(stats)
    clique_ar = average_rank_per_clique(stats)
    ma_p = mean_average_precision(stats)
    #k_p = average_precision(stats, k, ver=True)
    k_p = average_precision_at_k(stats, k)

    # Set up logger
    logger = utils.configure_logger()

    # print results
    logger.info("Number of queries: %d" % len(stats))
    logger.info("Average Rank per Track: %.3f" % track_ar)
    logger.info("Average Rank per Clique: %.3f" % clique_ar)
    logger.info("Mean Average Precision: %.2f %%" % (ma_p * 100))
    logger.info("Precision at %d: %.2f %%" % (k, k_p * 100))
    
    if optfile is not None:
        stats2 = utils.load_pickle(optfile)
        #plot_rank_histograms(stats, stats2, test=False) 
        plot_precision_at_k_histograms(stats, stats2, K=[1,3,5,10], test=False)
    else:
        plot_rank_histogram(stats)
# local stuff
import hdf5_getters as GETTERS
import dan_tools
import time
import utils
import scipy.cluster.vq as vq
import pylab as plt
from transforms import load_transform
import analyze_stats as anst

# params, for ICMR paper: 75 and 1.96
WIN = 75
PATCH_LEN = WIN*12

# Set up logger
logger = utils.configure_logger()

# Global models
lda = None
pca = None

def compute_codes_orig_it(track_ids, maindir, clique_ids, start_idx, end_idx):
    """Computes the original features, based on Thierry and Ellis, 2012.
    Dimensionality reduction using PCA of 50, 100, and 200 components."""
    res = []
    trainedpca = utils.load_pickle("models/pca_250Kexamples_900dim_nocovers.pkl")
    pca_components = [50,100,200]

    # Init codes
    codes = []
    for n_comp in pca_components: