parser.add_argument("--hidden_dim", type=int, default=1024) parser.add_argument("--layer_num", type=int, default=1) parser.add_argument("--weight_dropout_in", type=float, default=0.01) parser.add_argument("--weight_dropout_hidden", type=float, default=0.1) parser.add_argument("--char_dropout_prob", type=float, default=0.33) parser.add_argument("--char_noise_prob", type=float, default=0.0) parser.add_argument("--learning_rate", type=float, default=0.1) parser.add_argument("--myID", type=int, default=random.randint(0, 1000000000)) parser.add_argument("--sequence_length", type=int, default=50) args = parser.parse_args() print(args) from acqdivReadersplit import AcqdivReader, AcqdivReaderPartition acqdivCorpusReadertrain = AcqdivReader("train", args.language) acqdivCorpusReaderdev = AcqdivReader("dev", args.language) acqdivCorpusReadertest = AcqdivReader("test", args.language) def plus(it1, it2): for x in it1: yield x for x in it2: yield x itos = [] with open(VOCAB_HOME + args.language + '-char.txt', "r") as inFile: for line in inFile: line = line.strip()
#else: # assert False # For putting things on the GPU if the --gpu flag is set def device(x): if args.gpu: return x.cuda() else: return x from acqdivReadersplit import AcqdivReader, AcqdivReaderPartition #acqdivCorpusReader = AcqdivReader(args.language) acqdivCorpusReadertrain = AcqdivReader("test", args.language) # in the end, this will be test, but for now let's do traindev to avoid overfitting our research def plus(it1, it2): for x in it1: yield x for x in it2: yield x ## read the character vocabulary itos = [] with open(VOCAB_HOME + args.language + '-char.txt', "r") as inFile: for line in inFile: line = line.strip()
from config import VOCAB_HOME from acqdivReadersplit import AcqdivReader import argparse parser = argparse.ArgumentParser() parser.add_argument("--language", dest="language", type=str) parser.add_argument("--datapath", dest="datapath", type=str) import random args = parser.parse_args() print(args) acqdivCorpusReader = AcqdivReader("train", args.language) vocabularychar = set() vocabulary = set() iterator = acqdivCorpusReader.iterator() for utterance in iterator: utterance = utterance.split(" ; ") for word in utterance: vocabulary.add(word) #print(vocabulary) iterator = acqdivCorpusReader.iterator() for utterance in iterator: utterancenew = utterance.replace(" ; ", " ") utterancenew = utterancenew.split(" ") for char in utterancenew: if char != "\n": vocabularychar.add(char)