Ejemplo n.º 1
0
def main():
    global args
    use_cuda = torch.cuda.is_available()
    initialize_environment(random_seed=cfg.RNG_SEED, use_cuda=use_cuda)

    args = parser.parse_args()
    datadir = args.db_dir
    outputdir = get_output_dir(args.db_dir)
    nepoch = args.nepoch
    step = args.step_epoch
    dropout = args.dropout
    n_layers = cfg.N_LAYERS
    input_dim = cfg.INPUT_DIM
    hidden_dims = cfg.HIDDEN_DIMS

    # logging information
    loggin_dir = os.path.join(outputdir, 'runs', 'pretraining')
    if not os.path.exists(loggin_dir):
        os.makedirs(loggin_dir)
    tensorboard_logger.configure(os.path.join(loggin_dir, '%s' % (args.id)))

    trainset = EncodedTextDataset(root=datadir, train=True)
    testset = EncodedTextDataset(root=datadir, train=False)
    kwargs = {'num_workers': 0, 'pin_memory': True} if use_cuda else {}
    trainloader = torch.utils.data.DataLoader(trainset,
                                              batch_size=args.batchsize,
                                              shuffle=True,
                                              **kwargs)
    testloader = torch.utils.data.DataLoader(testset,
                                             batch_size=100,
                                             shuffle=True,
                                             **kwargs)

    pretrain(
        outputdir, {
            'nlayers': n_layers,
            'dropout': dropout,
            'reluslope': 0.0,
            'nepoch': nepoch,
            'lrate': [args.lr],
            'wdecay': [0.0],
            'step': step,
            'input_dim': input_dim,
            'hidden_dims': hidden_dims
        }, use_cuda, trainloader, testloader)
Ejemplo n.º 2
0
def check_prelim():
    """Initial setup code. Eventually this will set options."""
    directory_structure = utils.initialize_environment(config)
    return directory_structure
Ejemplo n.º 3
0
        return args

    args = get_args()
    # n_clusters = 4
    # data_dir = 'data/ag_news/'
    data_dir = args.data_dir
    n_clusters = args.n_clusters
    use_cuda = torch.cuda.is_available()
    random_seed = args.seed
    recons_lam = args.recons_lam
    cluster_lam = args.cluster_lam
    batch_size = args.batch_size
    tol = args.tol
    lr = args.lr

    initialize_environment(random_seed=random_seed, use_cuda=use_cuda)

    feat_path = os.path.join(data_dir, cfg.TRAIN_TEXT_FEAT_FILE_NAME)
    feat, labels, ids = load_feat(feat_path)
    outputdir = get_output_dir(data_dir)
    net_filename = os.path.join(outputdir, cfg.PRETRAINED_FAE_FILENAME)
    checkpoint = torch.load(net_filename)
    net = extract_sdae_model(input_dim=cfg.INPUT_DIM,
                             hidden_dims=cfg.HIDDEN_DIMS)
    net.load_state_dict(checkpoint['state_dict'])
    if use_cuda:
        net.cuda()

    dcn = DCN(n_clusters,
              net,
              cfg.HIDDEN_DIMS[-1],
Ejemplo n.º 4
0
                        default=100,
                        help='the number of seed for each class')
    parser.add_argument('--verbose',
                        help='whether to print log',
                        action='store_true')
    args = parser.parse_args()
    return args


args = get_args()
data_dir = args.data_dir
random_seed = args.seed
seed_num = args.seed_num
verbose = args.verbose

initialize_environment(random_seed=random_seed)
_, labels, ids = load_csv_corpus(
    os.path.join(data_dir, cfg.TRAIN_DATA_NAME + '.csv'))

dic = defaultdict(list)

for tmp_id, tmp_label in zip(ids, labels):
    dic[tmp_label].append(tmp_id)

results = []
for l, tmp_ids in dic.items():
    random.shuffle(tmp_ids)
    tmp_ids = tmp_ids[:seed_num]
    results.extend([(tmp_id, l) for tmp_id in tmp_ids])
results.sort()
Ejemplo n.º 5
0
    else:
        with bz2.open(memory_path, 'rb') as zipped_pickle_file:
            return pickle.load(zipped_pickle_file)


def save_memory(memory, memory_path, disable_bzip):
    if disable_bzip:
        with open(memory_path, 'wb') as pickle_file:
            pickle.dump(memory, pickle_file)
    else:
        with bz2.open(memory_path, 'wb') as zipped_pickle_file:
            pickle.dump(memory, zipped_pickle_file)


# Environment
env, test_env = initialize_environment(args)
n_actions = env.action_space.n

# Agent
dqn = Agent(args, env)

# If a model is provided, and evaluate is fale, presumably we want to resume, so try to load memory
if args.model is not None and not args.evaluate:
    if not args.memory:
        raise ValueError('Cannot resume training without memory save path. Aborting...')
    elif not os.path.exists(args.memory):
        raise ValueError(
            'Could not find memory file at {path}. Aborting...'.format(path=args.memory))

    mem = load_memory(args.memory, args.disable_bzip_memory)
Ejemplo n.º 6
0
def check_prelim():
    """Initial setup code. Eventually this will set options."""
    directory_structure = utils.initialize_environment(config)
    return directory_structure