Example #1
0
def initializeOrderTable():
    orderTable = {}
    keys = set()
    vocab = {}
    distanceSum = {}
    distanceCounts = {}
    depsVocab = set()
    for partition in ["train", "dev"]:
        for sentence in CorpusIterator(args.language, partition).iterator():
            for line in sentence:
                vocab[line["word"]] = vocab.get(line["word"], 0) + 1
                line["fine_dep"] = line["dep"]
                depsVocab.add(makeCoarse(line["fine_dep"]))
                posFine.add(line["posFine"])
                posUni.add(line["posUni"])

                if line["fine_dep"] == "root":
                    continue
                posHere = line["posUni"]
                posHead = sentence[line["head"] - 1]["posUni"]
                dep = line["fine_dep"]
                direction = "HD" if line["head"] < line["index"] else "DH"
                key = (posHead, dep, posHere)
                keyWithDir = (dep, direction)
                orderTable[keyWithDir] = orderTable.get(keyWithDir, 0) + 1
                keys.add(key)
                distanceCounts[key] = distanceCounts.get(key, 0.0) + 1.0
                distanceSum[key] = distanceSum.get(
                    key, 0.0) + abs(line["index"] - line["head"])
    #print orderTable
    dhLogits = {}
    for key in keys:
        hd = orderTable.get((key, "HD"), 0) + 1.0
        dh = orderTable.get((key, "DH"), 0) + 1.0
        dhLogit = log(dh) - log(hd)
        dhLogits[key] = dhLogit
    return dhLogits, vocab, keys, depsVocab
Example #2
0
dhWeights = Variable(torch.FloatTensor([0.0] * len(itos_pure_deps)),
                     requires_grad=True)
distanceWeights = Variable(torch.FloatTensor([0.0] * len(itos_pure_deps)),
                           requires_grad=True)
for i, key in enumerate(itos_pure_deps):
    dhLogits[key] = 0.0
    if key == "obj":
        dhLogits[key] = (10.0 if random() > 0.5 else -10.0)

    dhWeights.data[i] = dhLogits[key]

    originalDistanceWeights[key] = 0.0  #random()
    distanceWeights.data[i] = originalDistanceWeights[key]

data_train = list(
    CorpusIterator(args.language, "train",
                   storeMorph=True).iterator(rejectShortSentences=False))
data_dev = list(
    CorpusIterator(args.language, "dev",
                   storeMorph=True).iterator(rejectShortSentences=False))
#print(len(data_train), len(data_dev))
#quit()

words = []

affixFrequency = {}

print(itos_pure_deps)
itos_pure_deps = sorted(list(itos_pure_deps) + ["HEAD"])
stoi_pure_deps = dict(list(zip(itos_pure_deps, range(len(itos_pure_deps)))))

itos_pure_deps_ = itos_pure_deps[::]
Example #3
0
 def __init__(self, language, partition="train", storeMorph=False, splitLemmas=False, shuffleData=True):
    self.basis = CorpusIterator(language, partition=partition, storeMorph=storeMorph, splitLemmas=splitLemmas, shuffleData=shuffleData)