Esempio n. 1
0
def main(args):
    ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime())

    #### get data
    
    # with open(os.path.join(args.data_dir, args.data_file), 'rb') as file:
    #     data = pickle.load(file)

    data_obj = _Data()

    train_data, valid_data, vocab_obj = data_obj.f_load_data_amazon(args)
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    logger_obj = Logger()
    logger_obj.f_add_writer(args)

    ### add count parameters
    if not args.test:
        now_time = datetime.datetime.now()
        time_name = str(now_time.day)+"_"+str(now_time.month)+"_"+str(now_time.hour)+"_"+str(now_time.minute)
        model_file = os.path.join(args.model_path, args.model_name+"/model_best_"+time_name+".pt")
        args.model_file = model_file

    print("vocab_size", len(vocab_obj.m_w2i))

    ### get model
    # user_num = 10
    network = REVIEWDI(vocab_obj, args, device=device)

    total_param_num = 0
    for name, param in network.named_parameters():
        if param.requires_grad:
            param_num = param.numel()
            total_param_num += param_num
            print(name, "\t", param_num)
        
    print("total parameters num", total_param_num)

    if not args.test:
        optimizer = Optimizer(network.parameters(), args)

        trainer = TRAINER(vocab_obj, args, device)
        trainer.f_train(train_data, valid_data, network, optimizer, logger_obj)

    if args.test:
        print("="*10, "eval", "="*10)
        # eval_obj = EVAL(vocab_obj, args, device)
        # eval_obj.f_init_eval(network, args.model_file, reload_model=True)
        # eval_obj.f_eval(valid_data)
        
        print("="*10, "inference", "="*10)
        
        infer = INFER(vocab_obj, args, device)

        infer.f_init_infer(network, args.model_file, reload_model=True)

        infer.f_inference(valid_data)

    logger_obj.f_close_writer()
def main(args):
    ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime())

    #### get data

    data_obj = _Data()

    train_data, valid_data, vocab_obj = data_obj.f_load_data_google(args)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    logger_obj = Logger()
    logger_obj.f_add_writer(args)

    # print(vocab_obj.m_w2i['the'])
    # exit()

    if not args.test:
        now_time = datetime.datetime.now()
        time_name = str(now_time.day) + "_" + str(now_time.month) + "_" + str(
            now_time.hour) + "_" + str(now_time.minute)
        model_file = os.path.join(
            args.model_path,
            args.model_name + "/model_best_" + time_name + ".pt")
        args.model_file = model_file

    ### add count parameters
    ### get model

    vocab_size = len(vocab_obj.m_w2i)
    print("vocab_size", vocab_size)

    network = REVIEWDI(vocab_obj, args, device=device)

    if not args.test:
        optimizer = Optimizer(network.parameters(), args)

        trainer = TRAINER(vocab_obj, args, device)
        trainer.f_train(train_data, valid_data, network, optimizer, logger_obj)

    if args.test:
        print("=" * 10, "eval")
        # eval_obj = EVAL(vocab_obj, args, device)
        # eval_obj.f_init_eval(network, args.model_file, reload_model=True)
        # eval_obj.f_eval(valid_data)

        print("=" * 10, "inference")

        infer = INFER(vocab_obj, args, device)

        infer.f_init_infer(network, args.model_file, reload_model=True)

        infer.f_inference(valid_data)

    logger_obj.f_close_writer()
def main(args):
    ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime())

    #### get data

    # with open(os.path.join(args.data_dir, args.data_file), 'rb') as file:
    #     data = pickle.load(file)

    # data_obj = _Data()
    # data_obj.f_create_data(args)
    int_to_vocab, vocab_to_int, n_vocab, in_text, out_text = get_data_from_file(
        flags.train_file, flags.batch_size, flags.seq_size)

    # exit()
    # train_data, valid_data, vocab_obj, user_num= data_obj._load_data_amazon(args)

    # train_data, valid_data, vocab_obj, user_num = data_obj._load_data_amazon(args)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    logger_obj = Logger()
    logger_obj.f_add_writer(args)

    ### add count parameters

    print("vocab_size", len(int_to_vocab))

    ### get model
    user_num = 10
    network = REVIEWDI(n_vocab,
                       flags.seq_size,
                       flags.embedding_size,
                       flags.lstm_size,
                       device=device)

    optimizer = Optimizer(network.parameters(), args)

    trainer = TRAINER(args, flags.seq_size, device)

    trainer.f_train(in_text, out_text, network, optimizer, logger_obj)

    # print("*"*10, "inference")

    # infer = INFER(vocab_obj, args, device)

    # infer.f_init_infer(network, args.model_file, reload_model=True)

    # infer.f_inference(valid_data)

    logger_obj.f_close_writer()
Esempio n. 4
0
def main(args):
    ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime())

    seed = 1111
    set_seed(seed)
    #### get data

    data_obj = _Data()
    train_data, valid_data, vocab_obj = data_obj.f_load_data(args)
    # train_data, valid_data = data()
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("device", device)
    
    if  args.train:
        now_time = datetime.datetime.now()
        time_name = str(now_time.month)+"_"+str(now_time.day)+"_"+str(now_time.hour)+"_"+str(now_time.minute)
        model_file = os.path.join(args.model_path, args.model_name+"/model_best_"+time_name+"_"+args.data_name+".pt")

        args.model_file = model_file

    print("vocab_size", vocab_obj.vocab_size)
    print("user num", vocab_obj.user_size)
    ### get model
    network = REVIEWDI(vocab_obj, args, device=device)

    ### add count parameters
    total_param_num = 0
    for name, param in network.named_parameters():
        if param.requires_grad:
            param_num = param.numel()
            total_param_num += param_num
            print(name, "\t", param_num)
    
    print("total parameters num", total_param_num)

    if  args.train:
        logger_obj = Logger()
        logger_obj.f_add_writer(args)

        optimizer = Optimizer(network.parameters(), args)

        trainer = TRAINER(vocab_obj, args, device)
        trainer.f_train(train_data, valid_data, network, optimizer, logger_obj)

        logger_obj.f_close_writer()

    if args.test or args.eval:
        print("="*10, "test", "="*10)
        infer_obj = INFER(vocab_obj, args, device)

        infer_obj.f_init_infer(network, args.model_file, reload_model=True)

        infer_obj.f_inference(valid_data)
    
    if args.eval:
        print("="*10, "eval", "="*10)
        
        eval_obj = _EVAL(vocab_obj, args, device)

        eval_obj.f_init_eval(network, args.model_file, reload_model=True)

        eval_obj.f_eval(valid_data)
Esempio n. 5
0
def main(args):
    ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime())

    #### get data

    set_seed(1111)

    data_obj = _Data()

    train_data, valid_data, vocab_obj = data_obj.f_load_data_amazon(args)
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    logger_obj = Logger()
    logger_obj.f_add_writer(args)

    ### add count parameters
    if args.train:
        now_time = datetime.datetime.now()
        time_name = str(now_time.month)+"_"+str(now_time.day)+"_"+str(now_time.hour)+"_"+str(now_time.minute)
        model_file = os.path.join(args.model_path, args.model_name+"/model_best_"+time_name+"_"+args.data_name+".pt")
        args.model_file = model_file

    print("vocab_size", len(vocab_obj.m_w2i))

    ### get model
    # user_num = 10
    network = REVIEWDI(vocab_obj, args, device=device)

    total_param_num = 0
    for name, param in network.named_parameters():
        if param.requires_grad:
            param_num = param.numel()
            total_param_num += param_num
            print(name, "\t", param_num)
        
    print("total parameters num", total_param_num)

    if args.train:
        optimizer = Optimizer(network.parameters(), args)
        trainer = TRAINER(vocab_obj, args, device)
        trainer.f_train(train_data, valid_data, network, optimizer, logger_obj)

    if args.test or args.eval:
        print("="*10, "test", "="*10)  
        infer = INFER(vocab_obj, args, device)
        infer.f_init_infer(network, args.model_file, reload_model=True)
        infer.f_inference(valid_data)
    
    if args.eval:
        print("="*10, "eval", "="*10)
        eval_obj = EVAL(vocab_obj, args, device)
        eval_obj.f_init_eval(network, args.model_file, reload_model=True)
        eval_obj.f_eval(valid_data)

        # infer = INFER(vocab_obj, args, device)

        # infer.f_init_infer(network, args.model_file, reload_model=True)

        # input_text = "verrry cheaply constructed , not as comfortable as i expected . i have been wearing this brand , but the first time i wore it , it was a little more than a few days ago . i have been wearing this brand before , so far , no complaints . i will be ordering more in different colors . update : after washing & drying , i will update after washing . after washing , this is a great buy . the fabric is not as soft as it appears to be made of cotton material . <eos>"
        # infer.f_search_text(input_text, train_data)
    
    logger_obj.f_close_writer()