def testVocab():
        
        tokenizer=get_tokenizer(name="bert", model_path=AlphaPathLookUp.BertBaseUnCased)
        ids=[0,1,2,3,4,5,6,7,8,9,10]
        print(tokenizer.decode(ids))
        tokenizer=get_tokenizer(name="xlnet", model_path=AlphaPathLookUp.XLNetBaseCased)
        print(tokenizer.decode(ids) )
Exemple #2
0
    def __init__(self, config_file):

        self.config = AlphaConfig.loadConfig(
            os.path.join(AlphaPathLookUp.ConfigPath, config_file))

        self.tokenizer = get_tokenizer(name=self.config.tokenizer,
                                       model_path=self.config.model_path)

        self.textExtractor = InformationAbstrator(maxClip=100, tokenizer=None)
        self.textExtractor.initParagraphFilter(
            self.textExtractor.lexrankSummary)
def testBasicFunctions():
    tokenizer=get_tokenizer(name="gpt2", model_path=AlphaPathLookUp.GPT2Base)
    #tokenizer=get_tokenizer(name="roberta", model_path=programmingalpha.RoBertaBase)
    #tokenizer=get_tokenizer(name="bert", model_path=AlphaPathLookUp.BertBaseUnCased)
    
    print(tokenizer.tokenizer.additional_special_tokens)
    print(tokenizer.tokenizer.added_tokens_encoder)
    #exit(10)

    s="I am fantastic [CODE] supreme [MATH] !"
    print(tokenizer.tokenize(s))
    s_ids=tokenizer.tokenizeLine(s)
    print(s)
    print(s_ids)

    for id in s_ids.split():
        print(tokenizer.decode([id]))
Exemple #4
0
def init():
    global tokenizer
    name= args.tokenizer
    tokenizer=get_tokenizer(path_map_tokenizers[name], name)
Exemple #5
0
 def __init__(self, config_file):
     AlphaHTTPProxy.__init__(self,config_file)
     args=self.args
     self.tokenizer=get_tokenizer(model_path=programmingalpha.BertBaseUnCased,name=args.tokenizer)