Ejemplo n.º 1
0
parser.add_argument("--hidden_dim", type=int, default=1024)
parser.add_argument("--layer_num", type=int, default=1)
parser.add_argument("--weight_dropout_in", type=float, default=0.01)
parser.add_argument("--weight_dropout_hidden", type=float, default=0.1)
parser.add_argument("--char_dropout_prob", type=float, default=0.33)
parser.add_argument("--char_noise_prob", type=float, default=0.0)
parser.add_argument("--learning_rate", type=float, default=0.1)
parser.add_argument("--myID", type=int, default=random.randint(0, 1000000000))
parser.add_argument("--sequence_length", type=int, default=50)

args = parser.parse_args()
print(args)

from acqdivReadersplit import AcqdivReader, AcqdivReaderPartition

acqdivCorpusReadertrain = AcqdivReader("train", args.language)
acqdivCorpusReaderdev = AcqdivReader("dev", args.language)
acqdivCorpusReadertest = AcqdivReader("test", args.language)


def plus(it1, it2):
    for x in it1:
        yield x
    for x in it2:
        yield x


itos = []
with open(VOCAB_HOME + args.language + '-char.txt', "r") as inFile:
    for line in inFile:
        line = line.strip()
#else:
# assert False


# For putting things on the GPU if the --gpu flag is set
def device(x):
    if args.gpu:
        return x.cuda()
    else:
        return x


from acqdivReadersplit import AcqdivReader, AcqdivReaderPartition

#acqdivCorpusReader = AcqdivReader(args.language)
acqdivCorpusReadertrain = AcqdivReader("test", args.language)
# in the end, this will be test, but for now let's do traindev to avoid overfitting our research


def plus(it1, it2):
    for x in it1:
        yield x
    for x in it2:
        yield x


## read the character vocabulary
itos = []
with open(VOCAB_HOME + args.language + '-char.txt', "r") as inFile:
    for line in inFile:
        line = line.strip()
from config import VOCAB_HOME

from acqdivReadersplit import AcqdivReader

import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--language", dest="language", type=str)
parser.add_argument("--datapath", dest="datapath", type=str)
import random

args = parser.parse_args()
print(args)

acqdivCorpusReader = AcqdivReader("train", args.language)

vocabularychar = set()
vocabulary = set()
iterator = acqdivCorpusReader.iterator()
for utterance in iterator:
    utterance = utterance.split(" ; ")
    for word in utterance:
        vocabulary.add(word)
#print(vocabulary)

iterator = acqdivCorpusReader.iterator()
for utterance in iterator:
    utterancenew = utterance.replace(" ; ", " ")
    utterancenew = utterancenew.split(" ")
    for char in utterancenew:
        if char != "\n":
            vocabularychar.add(char)