Ejemplo n.º 1
0
def main(rolls, write_csv, write_chart, plot_chart):
    """The main entrypoint."""
    print(f"Running dice mode with {rolls} rolls.\n")
    data = run(rolls)
    df = helpers.get_df(data)

    print(df.to_string(index=False))

    if write_csv:
        helpers.write_csv(df, 'dice', rolls)

    if write_chart:
        helpers.write_chart(df, 'dice', rolls)

    if plot_chart:
        helpers.plot_chart(df)
Ejemplo n.º 2
0
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def decode(self, z):
        h3 = F.relu(self.fc3(z))
        return torch.sigmoid(self.fc4(h3))

    def forward(self, x):
        mu, logvar = self.encode(x.view(-1, input_size))
        z = self.reparameterize(mu, logvar)
        return self.decode(z), mu, logvar


batch_size = 1
df = h.get_df(fn='./data/3_years_of_data.csv')
num_cols = df.shape[1]
# scaled = h.sk_scale(df)

train_df, test_df = h.train_test(df, train_pct=0.3)

train = h.DfPastGames(train_df)
test = h.DfPastGames(test_df)

input_size = train.data_shape

train_loader = DataLoader(dataset=train, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(dataset=test, batch_size=batch_size, shuffle=False)

autoencoder = VAE(input_size)
optimizer = torch.optim.Adam(autoencoder.parameters(), lr=LR)
Ejemplo n.º 3
0
def main(spec):
    np.random.seed(spec['seed'])
    torch.manual_seed(spec['seed'])
    
    nthreads = spec['threads']
    os.environ["OMP_NUM_THREADS"] = str(nthreads)
    os.environ["OPENBLAS_NUM_THREADS"] = str(nthreads)
    os.environ["MKL_NUM_THREADS"] = str(nthreads)
    os.environ["VECLIB_MAXIMUM_THREADS"] = str(nthreads)
    os.environ["NUMEXPR_NUM_THREADS"] = str(nthreads)

    input_file = spec['cl']['input_file']
    folds_path = spec['cl']['folds']
    mode = spec['cl']['mode']
    device = spec['device']
    models_path = spec['cl']['model_path']
    ce_dim = spec['ce']['encdim']
    senc_dim = spec['senc_dim']
    window = spec['ce']['window']
    f_dim = spec['fe']['fdim']
    fenc_dim = spec['fe']['enc_dim']
    n_classes = spec['cl']['num_classes']
    infersent_model = spec['infersent_model']
    w2v_path = spec['w2v_path']
    vocab_size = spec['vocab_size']
    half_precision = False
    if device != 'cpu': torch.cuda.set_device(device)

    senc = SentEnc(infersent_model, w2v_path, 
                   vocab_size, device=device, hp=False)
    prep = Preprocess()
    with gzip.open(input_file) as infile:
        tables = np.array([json.loads(line) for li, line in enumerate(infile)])
    for i in range(len(tables)): 
        tables[i]['table_array'] = np.array(prep.clean_table_array(tables[i]['table_array']))
    folds = json.load(open(folds_path))
    ## initialize the sentence encodings
    pbar = tqdm(total=len(tables))
    pbar.set_description('initialize sent encodings:')
    sentences = set()
    for t in tables:
        for row in t['table_array']:
            for c in row:
                sentences.add(c)
    senc.cache_sentences(list(sentences))
    reports = []
    for fi, fold in enumerate(folds):
        train_tables, dev_tables, test_tables = split_train_test(tables, fold, 1)

        ce_model = CEModel(senc_dim, ce_dim//2, window*4)
        ce_model = ce_model.to(device)
        fe_model = FeatEnc(f_dim, fenc_dim)
        fe_model = fe_model.to(device)
        cl_model = ClassificationModel(ce_dim+fenc_dim, n_classes).to(device)
        
        ce_model.load_state_dict(torch.load(models_path+f'/ce_fold{fi}.model', map_location=device))
        fe_model.load_state_dict(torch.load(models_path+f'/fe_fold{fi}.model', map_location=device))
        cl_model.load_state_dict(torch.load(models_path+f'/cl_fold{fi}.model', map_location=device))
        f1macro, report, _, _, _ = predict(test_tables, cl_model, ce_model, fe_model, senc, label2ind, device=device)
        reports.append(report)
        print(f'fold {fi} test f1-macro = {f1macro}')
    dfs = [get_df(r) for r in reports]
    mean_res = reduce(lambda x, y: x.add(y, fill_value=0), dfs)/len(dfs)
    std_res = [(x-mean_res) ** 2 for x in dfs]
    std_res = reduce(lambda x, y: x.add(y, fill_value=0), dfs)
    std_res = std_res.pow(1./2)/len(dfs)
    print('mean:')
    print(tabulate.tabulate(mean_res, headers='keys', tablefmt='psql'))
    print('STD:')
    print(tabulate.tabulate(std_res, headers='keys', tablefmt='psql'))
Ejemplo n.º 4
0
    def forward(self, tensors):
        lstm_out, _ = self.lstm(tensors.view(len(tensors), 1, -1))

        target = self.hidden2target(lstm_out.view(len(tensors), -1))

        prediction = F.log_softmax(target, dim=1)

        return prediction


if __name__ == "__main__":

    batch_size = 128

    df = h.get_df()
    train = h.Df(df)

    item = train.__getitem__(500)

    input_size = h.num_flat_features(item[0])
    output_size = h.num_flat_features(item[1])
    hidden_size = (input_size + output_size) // 2

    TARGET_SIZE = 99  # (1, 5, 99)
    EMBEDDING_DIM = 5 * 99  # (1, 50, 99)
    HIDDEN_DIM = 5 * 99  # (1, 50, 99)

    model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, TARGET_SIZE)
    loss_function = nn.MSELoss()
    optimizer = optim.SGD(model.parameters(), lr=0.1)
Ejemplo n.º 5
0
    raw = raw.dropna()
    raw = pd.get_dummies(data=raw, columns=[ 'a_team', 'h_team', 'league', 'sport'])
    # raw = pd.get_dummies(data=raw, columns=[ 'a_team', 'h_team',])
    # raw = raw.drop(['game_id', 'lms_date', 'lms_time'], axis=1)
    raw = raw.drop(['game_id'], axis=1)
    print(raw.columns)
    # raw = raw.astype(np.float32)
    # raw = raw.sort_values('cur_time', axis=0)
    return raw.copy()


EPOCH = 1 
LR = 0.005
N_TEST_IMG = 5

tmp_df = h.get_df('./data/nba2.csv')
tmp_df = h.select_dtypes(tmp_df)
print(tmp_df.dtypes)

train_df, test_df = h.train_test(tmp_df)

scaled_train = h.sk_scale(train_df)
scaled_test = h.sk_scale(test_df)

train = h.Df(scaled_train, train_df.values)
test = h.Df(scaled_test, test_df.values)

batch_size = 1
num_cols = tmp_df.shape[1]

input_size = num_cols