Exemple #1
0
def main(data_path, output_path):
    start_time = time.time()
    db = IMDB()
    data = db.get_data()
    train_sets, val_sets = split_imdb_data(data, 0.3)

    # convert_to_tfrecord(train_sets, data_path, output_path, 'train')
    convert_to_tfrecord(val_sets, data_path, output_path, 'val')
    duration = time.time() - start_time
    print("Running %.3f sec All done!" % duration)
Exemple #2
0
def main(ARGS, device):
    """
  Prepares the datasets for training, and optional, validation and
  testing. Then, initializes the VAE model and runs the training (/validation)
  process for a given number of epochs.
  """
    data_splits = ['train', 'val']
    datasets = {
        split: IMDB(ARGS.data_dir, split, ARGS.max_sequence_length,
                    ARGS.min_word_occ, ARGS.create_data)
        for split in data_splits
    }
    pretrained_embeddings = datasets['train'].get_pretrained_embeddings(
        ARGS.embed_dim).to(device)
    model = VAE(
        datasets['train'].vocab_size,
        ARGS.batch_size,
        device,
        pretrained_embeddings=pretrained_embeddings,
        trainset=datasets['train'],
        max_sequence_length=ARGS.max_sequence_length,
        lstm_dim=ARGS.lstm_dim,
        z_dim=ARGS.z_dim,
        embed_dim=ARGS.embed_dim,
        n_lstm_layers=ARGS.n_lstm_layers,
        kl_anneal_type=ARGS.kl_anneal_type,
        kl_anneal_x0=ARGS.kl_anneal_x0,
        kl_anneal_k=ARGS.kl_anneal_k,
        kl_fbits_lambda=ARGS.kl_fbits_lambda,
        word_keep_rate=ARGS.word_keep_rate,
    )
    model.to(device)

    optimizer = torch.optim.Adam(model.parameters())

    print('Starting training process...')

    amount_of_files = len(os.listdir("trained_models"))
    for epoch in range(ARGS.epochs):
        elbos = run_epoch(model, datasets, device, optimizer)
        train_elbo, val_elbo = elbos
        print(
            f"[Epoch {epoch} train elbo: {train_elbo}, val_elbo: {val_elbo}]")

        # Perform inference on the trained model
        with torch.no_grad():
            model.eval()
            samples = model.inference()
            print(*idx2word(samples,
                            i2w=datasets['train'].i2w,
                            pad_idx=datasets['train'].pad_idx),
                  sep='\n')

        model.save(f"trained_models/{amount_of_files + 1}.model")
Exemple #3
0
                    help='type of recurrent layer to use (rnn or birnn)')
parser.set_defaults()
args = parser.parse_args()

# these hyperparameters are from the paper
args.batch_size = 128
time_steps = 128
hidden_size = 10
gradient_clip_value = 15
embed_size = 128
vocab_size = 20000
pad_idx = 0

# download IMDB
imdb_dataset = IMDB(path=args.data_dir,
                    sentence_length=time_steps,
                    pad_idx=pad_idx)
imdb_data = imdb_dataset.load_data()

train_set = ArrayIterator(imdb_data['train'],
                          batch_size=args.batch_size,
                          total_iterations=args.num_iterations)
valid_set = ArrayIterator(imdb_data['valid'], batch_size=args.batch_size)

inputs = train_set.make_placeholders()
ax.Y.length = imdb_dataset.nclass

# weight initialization
init = UniformInit(low=-0.08, high=0.08)

if args.layer_type == "rnn":
Exemple #4
0
    blurayURL = [
        "https://www.blu-ray.com/movies/A-Star-Is-Born-Blu-ray/217109/"
    ]
config = ConfigParser()
config.read("conf.txt")
bdinfoPath = config["user_settings"]["bdinfo"]
tempDir = config["user_settings"]["output_dir"]

for url in blurayURL:
    blurayObj = Bluray(url)
    blurayObj.build()
    blurayObj.printAttrs()

    directory = Folder()
    directory.build(tempDir, blurayObj.title)

    imdbObj = IMDB()
    imdbObj.build(blurayObj.imdbLink, blurayObj.title, blurayObj.year,
                  blurayObj.runtime)
    imdbObj.printAttrs()

    tmdbObj = MovieDB()
    tmdbObj.build(blurayObj.imdbLink, directory.screenDir)
    tmdbObj.printAttrs()

    if args.bdinfo:
        bdiObj = BDInfo()
        bdiObj.build(args.bdinfo, directory.movieDir)
        print(bdiObj.prettyBDInfo)

    # templateObj = Template(url, blurayObj.title, blurayObj.year, etc.)
Exemple #5
0
"""
Created on Wed Sep 29 10:06:27 2021
ContentAI
@author: Herais
"""
#%% Import Libraries
import os
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.remote.command import Command
#%% instantiate imdb
from imdb import IMDB
imdb = IMDB()
#%%
df_mpm = imdb.get_most_popular_movies()

#%%
element_table = imdb.driver.find_element(By.XPATH, "//table")
#%%
elements_td = element_table.find_elements(By.CLASS_NAME, "posterColumn")
#%%

html = imdb.driver.page_source
#%%

#%% Get movie pages from table
path_wip = 'wip'
element_table = imdb.driver.find_element(By.XPATH, "//table")