Exemple #1
0
def Test25():
    MaxLength = 30
    BatchSize = 2
    EmbeddingSize = 4
    HeadNum = 2
    SrcIndSentences, SrcLength, SrcDict = DL.LoadData("src.sents", "src.vocab",
                                                      MaxLength)
    TgtIndSentences, TgtLength, TgtDict = DL.LoadData("tgt.sents", "tgt.vocab",
                                                      MaxLength)
    TrainDataset = DL.TrainCorpusDataset(SrcIndSentences, SrcLength,
                                         TgtIndSentences, TgtLength)
    BatchDatas = DL.TrainDataLoaderCreator(TrainDataset, BatchSize)
    for Batch in BatchDatas:
        SrcSent = Batch["SrcSent"]
        print(SrcSent)
        SrcLength = Batch["SrcLength"]
        print(SrcLength)
        TgtSent = Batch["TgtSent"]
        print(TgtSent)
        TgtLength = Batch["TgtLength"]
        print(TgtLength)
Exemple #2
0
def Test21():
    MaxLength = 30

    def CollateFunction(Batch):
        #print(len(Batch))
        OutputBatch = {
            "SrcSent": [],
            "SrcLength": [],
            "TgtSent": [],
            "TgtLength": []
        }
        for Elem in Batch:
            #print(Elem[0][0])
            OutputBatch["SrcSent"].append(Elem[0][0])
            OutputBatch["SrcLength"].append(Elem[0][1])
            OutputBatch["TgtSent"].append(Elem[1][0])
            OutputBatch["TgtLength"].append(Elem[1][1])
        #print(OutputBatch["SrcSent"])
        OutputBatch["SrcSent"] = t.LongTensor(OutputBatch["SrcSent"])
        OutputBatch["TgtSent"] = t.LongTensor(OutputBatch["TgtSent"])
        return OutputBatch

    SrcIndSentences, SrcLength, SrcDict = DL.LoadData("src.sents", "src.vocab",
                                                      MaxLength)
    TgtIndSentences, TgtLength, TgtDict = DL.LoadData("tgt.sents", "tgt.vocab",
                                                      MaxLength)
    TrainDataset = DL.TrainCorpusDataset(SrcIndSentences, SrcLength,
                                         TgtIndSentences, TgtLength)
    z = DL.TrainDataLoaderCreator(TrainDataset, 4)
    Count = 0
    while True:
        if Count == 100:
            break
        Count = Count + 1
        for x in z:
            print("Batch")
            print(x["SrcSent"].size())
Exemple #3
0
def Test26():
    MaxLength = 30
    BatchSize = 2
    EmbeddingSize = 4
    HeadNum = 2
    EnLayer = 2
    DeLayer = 2
    SrcIndSentences, SrcLength, SrcDict = DL.LoadData("src.sents", "src.vocab",
                                                      MaxLength)
    TgtIndSentences, TgtLength, TgtDict = DL.LoadData("tgt.sents", "tgt.vocab",
                                                      MaxLength)
    TrainDataset = DL.TrainCorpusDataset(SrcIndSentences, SrcLength,
                                         TgtIndSentences, TgtLength)
    BatchDatas = DL.TrainDataLoaderCreator(TrainDataset, BatchSize)
    SrcVocabularySize = SrcDict.VocabularySize()
    TgtVocabularySize = TgtDict.VocabularySize()
    Trans = T.TransformerNMTModel(HeadNum, EmbeddingSize, SrcVocabularySize,
                                  TgtVocabularySize, MaxLength, EnLayer,
                                  DeLayer)
    for BatchInd, Batch in enumerate(BatchDatas):
        print("BegingBatch")
        SrcSent = Batch["SrcSent"]
        print(SrcSent.size())
        SrcLength = Batch["SrcLength"]
        #print(SrcLength.size())
        TgtSent = Batch["TgtSent"]
        print(TgtSent.size())
        TgtLength = Batch["TgtLength"]
        #print(TgtLength.size())
        SrcMask = T.BatchLengthToBoolTensorMask(SrcLength, MaxLength)
        TgtMask = T.BatchLengthToBoolTensorMask(TgtLength, MaxLength)
        Output = Trans(SrcSent, TgtSent, SrcMask, TgtMask)
        print("Step")
        print(BatchInd + 1)
        print(Output.size())
        print(Output[0][2])
def TestBuildTransformer():
    with open("Model/Config.json") as Fd:
        ConfigDict = json.load(Fd)
        MaxLength = ConfigDict["MaxLength"]
        BatchSize = ConfigDict["BatchSize"]
        EmbeddingSize = ConfigDict["EmbeddingSize"]
        HeadNum = ConfigDict["HeadNum"]
        EnLayer = ConfigDict["EnLayer"]
        DeLayer = ConfigDict["DeLayer"]
        SrcIndSentences, SrcLength, SrcDict = DLoad.LoadData(
            "Data/test.sents", "Data/src.vocab", MaxLength)
        TgtDict = DLoad.LoadVocabulary("Data/tgt.vocab")
        TestDataset = DLoad.TestCorpusDataset(SrcIndSentences, SrcLength)
        BatchDatas = DLoad.TestDataLoaderCreator(TestDataset, BatchSize)
        SrcVocabularySize = SrcDict.VocabularySize()
        TgtVocabularySize = TgtDict.VocabularySize()
        print("Building Model")
        Trans = TransformerNMTModel(
            HeadNum, EmbeddingSize, SrcVocabularySize, TgtVocabularySize, MaxLength, EnLayer, DeLayer)
        print("Model building finished")
        return Trans, BatchDatas, SrcDict, TgtDict, MaxLength
Exemple #5
0
def Test17():
    IndSentences, Length, Dict = DL.LoadData("src.sents", "src.vocab", 30)
    for Sent in IndSentences:
        print(Sent)
    for L in Length:
        print(L)