Esempio n. 1
0
    def _create_csvs():
        print('creating CSV...')

        # create no_cluster/full
        path = 'dataset/preprocessed/no_cluster'
        full = data.full_df()
        train_len = data.read_config()[data.TRAIN_LEN_KEY]

        train = full.iloc[0:train_len]
        test = full.iloc[train_len:len(full)]
        target_indices = get_target_indices(test)

        check_folder('dataset/preprocessed/no_cluster/full')
        train.to_csv(os.path.join(path, 'full/train.csv'))
        test.to_csv(os.path.join(path, 'full/test.csv'))
        np.save(os.path.join(path, 'full/train_indices'), train.index)
        np.save(os.path.join(path, 'full/test_indices'), test.index)
        np.save(os.path.join(path, 'full/target_indices'), target_indices)

        no_of_rows_in_small = int(
            input('How many rows do you want in small.csv? '))
        train_small = get_small_dataset(train,
                                        maximum_rows=no_of_rows_in_small)
        check_folder('dataset/preprocessed/no_cluster/small')
        split(train_small, os.path.join(path, 'small'))

        check_folder('dataset/preprocessed/no_cluster/local')
        split(train, os.path.join(path, 'local'))

        # create item_metadata in preprocess folder
        original_item_metadata = data.accomodations_original_df()
        original_item_metadata.to_csv(data.ITEMS_PATH)

        # append missing accomodations to item metadata
        append_missing_accomodations('full')
    def build(self, configuration_file):

        self.config = read_config(configuration_file)

        self.simulation_time = float(self.config['T']['TTS'][0])
        self.create_employees()
        self.initialize_event_queue()
        self.log.header(self)
Esempio n. 3
0
                    help='load model state dict')

args = parser.parse_args()
pretrain = args.pretrain
train = args.train
restart = args.restart
config_path = args.config_path
# 10/29/2020
model_path = args.model_path
max_patience = args.max_patience
training_lr = args.training_lr
# 11/22/2020
model_state_num = args.model_state_num

# Read config file
config = read_config(config_path)
torch.manual_seed(config.seed)
np.random.seed(config.seed)

if pretrain:
    # Generate datasets
    train_dataset, valid_dataset, test_dataset = get_ASR_datasets(config)

    # Initialize base model
    pretrained_model = PretrainedModel(config=config)

    # Train the base model
    trainer = Trainer(model=pretrained_model, config=config)
    if restart: trainer.load_checkpoint()

    for epoch in range(config.pretraining_num_epochs):
Esempio n. 4
0
'''
Script to generate the data (train/dev/test splits)
'''

import sys
import os
import argparse

from data import read_config

parser = argparse.ArgumentParser()
parser.add_argument("--config", help="path to json config", required=True)
args = parser.parse_args()
config_file_path = args.config
config = read_config(config_file_path)

import random
random.seed(config['data']['seed'])


def check_context_size(slideshow, cur_idx, title_checker, content_checker,
                       config):
    '''
  checks if the current slide has expected number of context slides
  '''
    if config['data']['strict_context_size'] == False:
        return True
    for i in xrange(config['data']['context_size']):
        if config['data']['use_left_context'] == True:
            left_idx = cur_idx - i - 1
            if left_idx < 0:
Esempio n. 5
0
    if opt.verbose:
        logger.setLevel(logging.DEBUG)
    else:
        logger.setLevel(logging.INFO)

    logging.info(opt)

    if opt.random_seed != 0:
        random.seed(opt.random_seed)
        np.random.seed(opt.random_seed)
        torch.manual_seed(opt.random_seed)
        torch.cuda.manual_seed_all(opt.random_seed)

    if opt.whattodo == 'train':

        config = data.read_config(opt.config)

        logging.info(config)

        makedir_and_clear(opt.save)

        logging.info("load data ...")
        train_data = data.loadData(opt.train_file, True, opt.types,
                                   opt.type_filter)
        dev_data = data.loadData(opt.dev_file, True, opt.types,
                                 opt.type_filter)
        if opt.test_file:
            test_data = data.loadData(opt.test_file, False, opt.types,
                                      opt.type_filter)
        else:
            test_data = None
Esempio n. 6
0
UNSURE = 31
def predict(wav):
    signal, _ = sf.read(wav)
    signal = torch.tensor(signal, device=device).float().unsqueeze(0)
    label = model.decode_intents(signal)
    return label

def set_label(category, intents):
    category = intents.loc[intents.intent == category]
    return UNSURE if category.empty else category.category.item()

# make output directory
if not os.path.isfile(OUTPUT): os.makedirs(OUTPUT)

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
config = data.read_config('../input/myinput/no_unfreezing/no_unfreezing.cfg'); _,_,_=data.get_SLU_datasets(config)
model = models.Model(config).eval()
model.load_state_dict(torch.load('../input/myinput/no_unfreezing/model_state.pth', map_location=device)) # load trained model

# predict label of each .wav file and store it as a pickle
test = pd.read_csv(TEST)
if not os.path.isfile(PRED):
    df, paths = list(), list()
    files = set(test['file'].apply(lambda f: f.replace('.png', '.wav')))
    for i, speaker in enumerate(os.listdir(SPEAKERS)):
        speaker = os.path.join(SPEAKERS, speaker)
        for wav in os.listdir(speaker):
            if wav not in files:
                continue
            wav = os.path.join(speaker, wav)
            paths.append(wav)
Esempio n. 7
0
import torch
from models import HMM
from data import get_datasets, read_config
from training import Trainer

# Generate datasets from text file
path = "data"
N = 128
config = read_config(N,path)
train_dataset, valid_dataset = get_datasets(config)
checkpoint_path = "."

# Initialize model
model = HMM(config=config)

# Train the model
num_epochs = 10
trainer = Trainer(model, config, lr=0.003)
trainer.load_checkpoint(checkpoint_path)

for epoch in range(num_epochs):
	print("========= Epoch %d of %d =========" % (epoch+1, num_epochs))
	train_loss = trainer.train(train_dataset)
	valid_loss = trainer.test(valid_dataset)
	trainer.save_checkpoint(epoch, checkpoint_path)

	print("========= Results: epoch %d of %d =========" % (epoch+1, num_epochs))
	print("train loss: %.2f| valid loss: %.2f\n" % (train_loss, valid_loss) )


Esempio n. 8
0
import time
from model import MainTextIdea
from data import read_config
from utils import get_score

if __name__ == '__main__':
    DATA_PATH = './data'
    data = read_config(f'{DATA_PATH}/test.json')
    solver = MainTextIdea()
    scores = 0
    max_scores = len(data)
    for i, task in enumerate(data):
        start = time.time()
        task_index, task_type = i + 1, 'multiple_choice'
        print("Predicting task {}...".format(task_index))
        y_true = task["solution"]
        try:
            prediction = solver.predict_from_model(task)
        except BaseException as e:
            print(e)
            print("Solver {} failed to solve task №{}".format('1', task_index))
            prediction = ""
        score = get_score(y_true, prediction)
        scores += score
        print("Score: {}\nCorrect: {}\nPrediction: {}\n".format(
            score, y_true, prediction))
    print(f'max_scores={max_scores}, scores={scores}')