Esempio n. 1
0
    def __init__(self,
                 datasetFile,
                 textDir,
                 checking_folder,
                 lang,
                 client_txt,
                 pre_trained_gen,
                 pre_trained_disc,
                 ID,
                 batch_size=1):

        self.generator = torch.nn.DataParallel(
            gan_factory.generator_factory('gan').cuda())
        self.generator.load_state_dict(torch.load(pre_trained_gen))

        self.discriminator = torch.nn.DataParallel(
            gan_factory.discriminator_factory('gan').cuda())
        self.discriminator.load_state_dict(torch.load(pre_trained_disc))

        self.checking_folder = checking_folder
        self.lang = lang
        self.client_txt = client_txt
        self.filename = ID
        self.batch_size = batch_size

        cl = CorpusLoader(datasetFile=datasetFile, textDir=textDir)
        self.vectorizer = cl.TrainVocab()
Esempio n. 2
0
    def load_corpus(self, name, files, min=15, max=100, merge=False):
        '''
        :param name: key for dictionary entry in self.corpora
        :param files: list of files
        :param min, max: min and max length of sentences
        :param merge: one or two text elements. one if true
        :return: None
        '''

        CL = CorpusLoader(files[0], min, max)

        if len(files) > 1:
            iterfiles = iter(files)
            next(iterfiles)
            for file in iterfiles:
                CL.add_Corpus(file, min, max)

        if merge:
            CL.mergeData()

        CL.containing.append(name)
        CL.tokenize()

        corpus = self.tax.expandTax(CL)

        self.corpora[name] = corpus
Esempio n. 3
0
def load_corpus(name, files, merge=True):

    CL = CorpusLoader(files[0], min, max)
    CL.add_Corpus(files[1], min, max)

    if merge:
        CL.mergeData()

    corpora[name] = CL

    print(name + " loaded...")
Esempio n. 4
0
def loadData():
    file = "data/corpus/Metalogue_extractedLinks_fullCorpus.txt"
    file2 = "data/corpus/Metalogue_Corpus_NegativePhrases.txt"
    file3 = "data/corpus/IBM_extracted_raw.txt"

    CL = CorpusLoader()
    CL.load(file3)

    #CL.add_Corpus(file2)
    #CL.mergeLabel("justification", "evidence", "contingency")
    CL.stats(CL.data)
    print("DONE")

    return CL.data
Esempio n. 5
0
    def __init__(self, datasetFile, imagesDir, textDir, split, arrangement,
                 sampling):
        self.datasetFile = datasetFile
        self.imagesDir = imagesDir
        self.textDir = textDir
        self.split = split
        self.arrangement = easydict.EasyDict(arrangement)
        self.sampling = easydict.EasyDict(sampling)

        self.images_classes = {}
        self.assign_classes()

        cl = CorpusLoader(datasetFile=datasetFile, textDir=textDir)
        self.vectorizer = cl.TrainVocab()
Esempio n. 6
0
    def mergeCorpora(self, corpora):
        '''
        merges the corpora into one new CL object
        :param corpora: list of self.corpora keys
        :return: CL
        '''

        merge = []
        CL = CorpusLoader()

        for corpus in corpora:

            merge.append(self.corpora[corpus])
            CL.containing.append(corpus)

        CL.mergeWithCorpus(merge)

        return CL
Esempio n. 7
0
    def __init__(self):
        self.corpusLoader = CorpusLoader()
        self.corpus = self.corpusLoader.load_corpus()
        self.input_sentences = list(self.corpus.keys())
        logging.debug(pformat(self.corpus))

        self.lemmer = WordNetLemmatizer()
        self.tfIdfVec = TfidfVectorizer(tokenizer=self.tokenize)
        self.similarity_threshold = 0.30

        # Keyword Matching
        self.GREETING_INPUTS = (
            "hello",
            "hi",
            "greetings",
            "sup",
            "what's up",
            "hey",
        )
        self.GREETING_RESPONSES = ("hi", "hey", "*nods*", "hi there", "hello",
                                   "I am glad! You are talking to me")