def main():

    args = get_args()
    data_path = os.path.join(args.iobasedir, 'processed/downloads',
                             args.data_set)
    log_path = os.path.join(args.iobasedir, 'logs')
    log_file = os.path.join(args.iobasedir, 'logs', 'UB.log')
    mkdirp(log_path)
    set_logger(log_file)

    for filename in os.listdir(data_path):
        data_file = os.path.join(data_path, filename)
        topic = filename[:-5]

        docs, refs = load_data(data_file)
        if not refs:
            continue

        if not args.summary_size:
            summary_size = len(' '.join(refs[0]).split(' '))
        else:
            summary_size = int(args.summary_size)

        logger.info('Topic ID: %s ', topic)
        logger.info('###')
        logger.info('Summmary_len: %d', summary_size)

        algos = ['UB1', 'UB2']
        for algo in algos:
            get_summary_scores(algo, docs, refs, summary_size, language, rouge)

        logger.info('###')
Example #2
0
def main():

    args = get_args()
    rouge_dir = os.path.join(
        os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
        'rouge/RELEASE-1.5.5/')

    data_path = os.path.join(args.iobasedir, 'processed/', args.dataset,
                             args.domain, args.split)
    log_path = os.path.join(args.iobasedir, 'logs')
    log_file = os.path.join(
        args.iobasedir, 'logs', 'baselines_rsumm_%s_%s_%s_%s.log' %
        (args.dataset, args.domain, args.split, str(args.summary_size)))
    mkdirp(log_path)
    set_logger(log_file)

    data_file = os.path.join(data_path, 'test0.csv')
    df = pd.read_csv(data_file,
                     sep=",",
                     quotechar='"',
                     engine='python',
                     header=None,
                     skiprows=1,
                     names=[
                         "user_id", "product_id", "rating", "review", "nouns",
                         "summary", 'time'
                     ])

    #   check_index = 1099
    for index, row in df.iterrows():
        #        if index != check_index:
        #            continue
        topic = row['user_id'] + '_' + row['product_id']
        docs = [[sent] for sent in sent_tokenize(row['review'].strip())]
        refs = [sent_tokenize(row['summary'].strip())]
        if not refs:
            continue

        if not args.summary_size:
            summary_size = len(" ".join(refs[0]).split(' '))
        else:
            summary_size = int(args.summary_size)

        logger.info('Topic ID: %s', topic)
        logger.info('###')
        logger.info('Summmary_len: %d', summary_size)

        rouge = Rouge(rouge_dir)
        algos = [
            'Luhn', 'LexRank', 'TextRank', 'LSA', 'KL', "ICSI", 'UB1', 'UB2'
        ]
        best_summary = []
        best_score = 0.0
        for algo in algos:
            best_summary, best_score = get_summary_scores(
                algo, docs, refs, summary_size, args.language, rouge,
                best_summary, best_score)

        rouge._cleanup()
        logger.info('###')
Example #3
0
    def setup(self):
        # Setup random seed for the experiment if none provided use 42
        torch.manual_seed(self.parameters.get('random_seed', 42))
        np.random.seed(self.parameters.get('random_seed', 42))
        if torch.cuda.is_available():
            torch.cuda.manual_seed(self.parameters.get('random_seed', 42))
            torch.cuda.manual_seed_all(self.parameters.get('random_seed', 42))
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True
        # Create directories to store results
        for directory in ['log', 'results', 'models', 'tensorboard']:
            directory_path = os.path.join(self.experiment_path, directory)
            if not os.path.exists(directory_path):
                os.mkdir(directory_path)

        # Set the logger
        set_logger(os.path.join(self.experiment_path, 'log', 'experiment.log'))
Example #4
0
def main():

    args = get_args()
    rouge_dir = os.path.join(
        os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
        'rouge/RELEASE-1.5.5/')

    data_path = os.path.join(args.iobasedir, args.data_setpath)
    log_path = os.path.join(args.iobasedir, 'logs')
    log_file = os.path.join(
        args.iobasedir, 'logs',
        'baselines_%s_%s.log' % (args.data_set, args.summary_size))
    mkdirp(log_path)
    set_logger(log_file)

    for filename in os.listdir(data_path):
        data_file = os.path.join(data_path, filename)
        topic = filename[:-5]

        try:
            docs, refs = load_data(data_file)
        except:
            pass
        if not refs:
            continue

        if not args.summary_size:
            summary_size = len(" ".join(refs[0]).split(' '))
        else:
            summary_size = int(args.summary_size)

        logger.info('Topic ID: %s', topic)
        logger.info('###')
        logger.info('Summmary_len: %d', summary_size)

        rouge = Rouge(rouge_dir)
        algos = ['UB1', 'UB2', 'ICSI', 'Luhn', 'LexRank', 'LSA', 'KL']
        for algo in algos:
            get_summary_scores(algo, docs, refs, summary_size, args.language,
                               rouge)
        rouge._cleanup()
        logger.info('###')
Example #5
0
def main():
    args = get_args_from_command_line()

    # Set GPU to use
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id

    # update config
    from configs.base_config import cfg, cfg_from_file, cfg_update

    if args.gan:
        cfg_from_file("configs/" + args.model + "_gan.yaml")
    else:
        cfg_from_file("configs/" + args.model + ".yaml")
    if args.test_mode is not None:
        cfg.TEST.mode = args.test_mode
    output_dir = cfg_update(args)

    # Set up folders for logs and checkpoints
    if not os.path.exists(cfg.DIR.logs):
        os.makedirs(cfg.DIR.logs)
    from utils.misc import set_logger

    logger = set_logger(os.path.join(cfg.DIR.logs, "log.txt"))
    logger.info("save into dir: %s" % cfg.DIR.logs)

    if "weights" not in cfg.CONST or not os.path.exists(cfg.CONST.weights):
        logger.error("Please specify the file path of checkpoint.")
        sys.exit(2)

    # Start inference process
    if args.gan:
        runners = __import__("runners." + args.model + "_gan_runner")
        module = getattr(runners, args.model + "_gan_runner")
        model = getattr(module, args.model + "GANRunner")(cfg, logger)

    else:
        runners = __import__("runners." + args.model + "_runner")
        module = getattr(runners, args.model + "_runner")
        model = getattr(module, args.model + "Runner")(cfg, logger)

    model.test()
Example #6
0
def main():
    args = get_args_from_command_line()

    # Set GPU to use
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id

    # update config
    from configs.base_config import cfg, cfg_from_file, cfg_update

    if args.gan:
        cfg_from_file("configs/" + args.model + "_gan.yaml")
    else:
        cfg_from_file("configs/" + args.model + ".yaml")
    output_dir = cfg_update(args)

    # Set up folders for logs and checkpoints
    if not os.path.exists(cfg.DIR.logs):
        os.makedirs(cfg.DIR.logs)
    from utils.misc import set_logger

    logger = set_logger(os.path.join(cfg.DIR.logs, "log.txt"))
    logger.info("save into dir: %s" % cfg.DIR.logs)

    # Start train/inference process
    if args.gan:
        runners = __import__("runners." + args.model + "_gan_runner")
        module = getattr(runners, args.model + "_gan_runner")
        model = getattr(module, args.model + "GANRunner")(cfg, logger)

    else:
        runners = __import__("runners." + args.model + "_runner")
        module = getattr(runners, args.model + "_runner")
        model = getattr(module, args.model + "Runner")(cfg, logger)

    model.runner()
Example #7
0
    # set run directory
    logs_dir = os.path.join(model_dir, 'runs')
    if args.run:
        log = args.run
        log_dir = os.path.join(logs_dir, args.run)
    else:
        log = sorted(glob.glob(os.path.join(logs_dir, '*')))[-1].split('/')[-1]
        log_dir = os.path.join(logs_dir, log)

    # Set the random seed for reproducible experiments
    torch.manual_seed(230)
    if params.cuda:
        torch.cuda.manual_seed(230)

    # Set the logger
    misc.set_logger(os.path.join(root, args.model_dir, 'test.log'))

    # Create the input data pipeline
    logging.info("\nLoading the datasets...")

    # fetch dataloaders
    data_dir = os.path.join(root, args.data_dir)
    # fetch dataloaders
    if args.dataloader == 'mnist':
        import data_loaders.mnist_data_loader as data_loader

        dataloaders = data_loader.fetch_dataloader(types=['test'], data_dir=data_dir, download=False, params=params)

        # If output needs to be reshaped into an image
        reshape = True
Example #8
0
    model_dir = os.path.join(root, args.model_dir)
    json_path = os.path.join(model_dir, 'params.json')
    assert os.path.isfile(
        json_path), "No json configuration file found at {}".format(json_path)
    params = Params(json_path)

    # use GPU if available
    params.cuda = torch.cuda.is_available()

    # Set the random seed for reproducible experiments
    torch.manual_seed(230)
    if params.cuda:
        torch.cuda.manual_seed(230)

    # Set the logger
    set_logger(os.path.join(root, args.model_dir, 'train.log'))
    # print out the arguments in a nice table
    tab_printer(args, "Argument Parameters")
    tab_printer(params, "Hyperparameters")
    # Create the input data pipeline
    logging.info("\nLoading the datasets...")

    data_dir = os.path.join(root, args.data_dir)

    # fetch dataloaders
    if args.dataloader == 'mnist':
        import data_loaders.mnist_data_loader as data_loader
        dataloaders = data_loader.fetch_dataloader(types=['train'],
                                                   data_dir=data_dir,
                                                   download=False,
                                                   params=params)