def main(data_path, output_path): start_time = time.time() db = IMDB() data = db.get_data() train_sets, val_sets = split_imdb_data(data, 0.3) # convert_to_tfrecord(train_sets, data_path, output_path, 'train') convert_to_tfrecord(val_sets, data_path, output_path, 'val') duration = time.time() - start_time print("Running %.3f sec All done!" % duration)
def main(ARGS, device): """ Prepares the datasets for training, and optional, validation and testing. Then, initializes the VAE model and runs the training (/validation) process for a given number of epochs. """ data_splits = ['train', 'val'] datasets = { split: IMDB(ARGS.data_dir, split, ARGS.max_sequence_length, ARGS.min_word_occ, ARGS.create_data) for split in data_splits } pretrained_embeddings = datasets['train'].get_pretrained_embeddings( ARGS.embed_dim).to(device) model = VAE( datasets['train'].vocab_size, ARGS.batch_size, device, pretrained_embeddings=pretrained_embeddings, trainset=datasets['train'], max_sequence_length=ARGS.max_sequence_length, lstm_dim=ARGS.lstm_dim, z_dim=ARGS.z_dim, embed_dim=ARGS.embed_dim, n_lstm_layers=ARGS.n_lstm_layers, kl_anneal_type=ARGS.kl_anneal_type, kl_anneal_x0=ARGS.kl_anneal_x0, kl_anneal_k=ARGS.kl_anneal_k, kl_fbits_lambda=ARGS.kl_fbits_lambda, word_keep_rate=ARGS.word_keep_rate, ) model.to(device) optimizer = torch.optim.Adam(model.parameters()) print('Starting training process...') amount_of_files = len(os.listdir("trained_models")) for epoch in range(ARGS.epochs): elbos = run_epoch(model, datasets, device, optimizer) train_elbo, val_elbo = elbos print( f"[Epoch {epoch} train elbo: {train_elbo}, val_elbo: {val_elbo}]") # Perform inference on the trained model with torch.no_grad(): model.eval() samples = model.inference() print(*idx2word(samples, i2w=datasets['train'].i2w, pad_idx=datasets['train'].pad_idx), sep='\n') model.save(f"trained_models/{amount_of_files + 1}.model")
help='type of recurrent layer to use (rnn or birnn)') parser.set_defaults() args = parser.parse_args() # these hyperparameters are from the paper args.batch_size = 128 time_steps = 128 hidden_size = 10 gradient_clip_value = 15 embed_size = 128 vocab_size = 20000 pad_idx = 0 # download IMDB imdb_dataset = IMDB(path=args.data_dir, sentence_length=time_steps, pad_idx=pad_idx) imdb_data = imdb_dataset.load_data() train_set = ArrayIterator(imdb_data['train'], batch_size=args.batch_size, total_iterations=args.num_iterations) valid_set = ArrayIterator(imdb_data['valid'], batch_size=args.batch_size) inputs = train_set.make_placeholders() ax.Y.length = imdb_dataset.nclass # weight initialization init = UniformInit(low=-0.08, high=0.08) if args.layer_type == "rnn":
blurayURL = [ "https://www.blu-ray.com/movies/A-Star-Is-Born-Blu-ray/217109/" ] config = ConfigParser() config.read("conf.txt") bdinfoPath = config["user_settings"]["bdinfo"] tempDir = config["user_settings"]["output_dir"] for url in blurayURL: blurayObj = Bluray(url) blurayObj.build() blurayObj.printAttrs() directory = Folder() directory.build(tempDir, blurayObj.title) imdbObj = IMDB() imdbObj.build(blurayObj.imdbLink, blurayObj.title, blurayObj.year, blurayObj.runtime) imdbObj.printAttrs() tmdbObj = MovieDB() tmdbObj.build(blurayObj.imdbLink, directory.screenDir) tmdbObj.printAttrs() if args.bdinfo: bdiObj = BDInfo() bdiObj.build(args.bdinfo, directory.movieDir) print(bdiObj.prettyBDInfo) # templateObj = Template(url, blurayObj.title, blurayObj.year, etc.)
""" Created on Wed Sep 29 10:06:27 2021 ContentAI @author: Herais """ #%% Import Libraries import os from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.remote.command import Command #%% instantiate imdb from imdb import IMDB imdb = IMDB() #%% df_mpm = imdb.get_most_popular_movies() #%% element_table = imdb.driver.find_element(By.XPATH, "//table") #%% elements_td = element_table.find_elements(By.CLASS_NAME, "posterColumn") #%% html = imdb.driver.page_source #%% #%% Get movie pages from table path_wip = 'wip' element_table = imdb.driver.find_element(By.XPATH, "//table")