def load_config(): """ Carrega configuração :return: """ #print(environment) config = ConfigParser.ConfigParser() here = os.path.abspath(os.path.dirname(__file__)) config_file = os.path.join(here, '../' + environment + '.ini') config.read(config_file) # Parâmetros globais de configuração nltk.data.path.append(config.get('nltk', 'data_dir')) nlpnet.set_data_dir(config.get('nlpnet', 'data_dir')) # Logging logging.config.fileConfig(config_file) # Cache configurations cache_opts = { 'cache.regions': config.get('lbsociam', 'cache.regions'), 'cache.type': config.get('lbsociam', 'cache.type'), 'cache.short_term.expire': config.get('lbsociam', 'cache.short_term.expire'), 'cache.default_term.expire': config.get('lbsociam', 'cache.default_term.expire'), 'cache.long_term.expire': config.get('lbsociam', 'cache.long_term.expire') } cache = CacheManager(**parse_cache_config_options(cache_opts)) return config
def get_dependencies(self, dependency_string): """ Returns dependency_string with sentence dependencies included. """ nlpnet.set_data_dir(self.get_data_dir_path()) dependency_parser = nlpnet.DependencyParser() return dependency_parser.parse(dependency_string)
def tokenize(self, tokenize_string): """ Returns the tokenized version of tokenize_string, which is just a normal English sentence. """ # Setting up the nlpnet parser nlpnet.set_data_dir(self.get_data_dir_path()) pos_parser = nlpnet.POSTagger() return pos_parser.tag(tokenize_string)
def __init__(self, parent, *args, **kwargs): wx.Panel.__init__(self, parent=parent, *args, **kwargs) self._init_grid() self.Bind(wx.EVT_BUTTON, self.on_run, self.button_run) self.Bind(wx.EVT_BUTTON, self.on_load, self.button_load) self.Bind(wx.EVT_BUTTON, self.on_save, self.button_save) self.Bind(wx.EVT_SPINCTRL, self.on_change_font, self.font_spin) nlpnet.set_data_dir('data') self.pos_tagger = nlpnet.POSTagger(language='pt') self.srl_tagger = nlpnet.SRLTagger(language='pt')
# -*- coding: utf-8; -*- import nlpnet import pandas as pd nlpnet.set_data_dir('/Users/danielfalci/Downloads/srl-pt') tagger = nlpnet.SRLTagger() def getByPredicate(predicate, result): #quando nao acha nada if len(result.arg_structures) == 0: return {}, [] for este in result.arg_structures: if este[0] == predicate: return este[1], result.tokens # quando nao acha o predicado return {}, [] def handleTag(tag): if tag.startswith('V:') or (tag.startswith('A') and ':' in tag): quantidade = int(tag[tag.find(':') + 1:]) tagFinal = tag[:tag.find(':')] if quantidade == 1: return [u'(' + tagFinal + u'*' + tagFinal + u')'] else: temp = [] for i in xrange(0, quantidade): if i == 0: temp.append(u'(' + tagFinal + u'*') elif i < quantidade - 1:
import time import os # import nltk, sys # from alpes_core.textProcess import stemming # from nltk.corpus import floresta # from nltk.probability import FreqDist # from nltk import word_tokenize as wt ############################################################################################################# ### INDICA A BASE DE DADOS PARA FAZER A CLASSIFICACAO SINTATICA ### USO DO POS TAGGER DA NLPNET ### COPIAR PARA O SERVIDOR A PASTA NLPNET-DATA ### COLOCAR CAMINHO CORRETO DENTRO DO SERVIDOR!!! #nlpnet.set_data_dir('/home/panceri/nlpnet-data/pos-pt') nlpnet.set_data_dir( os.path.join(os.path.dirname(__file__), '../../../nlpnet-data')) ############################################################################################################# ############################################################################################################# ############################################################################################################# ## Desenvolvimento da lógica de execução do Núcleo de processamento do Alpes # ## Aplicação das técnicas de pré-processamento textual a fim de ajudar no processo de comparação e busca # ## de textos similares # ## Técnicas desenvolvidas # ## 1 - Case folding # ## 2 - Troca de caracteres acentuados por caracteres não acentuados # ## 3 - Remoção pontuações # ## 4 - Remoção de stopwords # ## 5 - Stemming #
formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('type', help='Format of the embeddings. See the description below.', choices=['plain', 'senna', 'gensim', 'word2embeddings']) parser.add_argument('embeddings', help='File containing the actual embeddings') parser.add_argument('-v', help='Vocabulary file, if applicable. '\ 'In SENNA, it is hash/words.lst', dest='vocabulary') parser.add_argument('-o', help='Directory to save the output', default='.', dest='output_dir') parser.add_argument('--task', help='Task for which the embeddings will be used. '\ 'It determines the name of the embeddings file. If not given, '\ 'it will be nlpnet-embeddings.npy.', dest='task', default=None, choices=['pos', 'srl', 'srl_boundary', 'srl_classify', 'srl_predicates']) args = parser.parse_args() nlpnet.set_data_dir(args.output_dir) output_vocabulary = nlpnet.config.FILES['vocabulary'] if args.task is None: output_embeddings = os.path.join(args.output_dir, 'nlpnet-embeddings.npy') else: key = 'type_features_%s' % args.task output_embeddings = nlpnet.config.FILES[key] nlpnet.utils.set_logger(logging.INFO) logger = logging.getLogger('Logger') logger.info('Loading data...') if args.type == 'senna': words = read_senna_vocabulary(args.vocabulary) matrix = read_plain_embeddings(args.embeddings) elif args.type == 'plain': words = read_plain_vocabulary(args.vocabulary)
argument = ' '.join(arg_structure[label]) line = '\t%s: %s' % (label, argument) print(line.encode('utf-8')) print() if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('task', help='Task for which the network should be used.', type=str, choices=['srl', 'pos', 'dependency']) parser.add_argument('--data', help='Directory containing trained models (default: current)', type=str, default='.') parser.add_argument('-v', help='Verbose mode', action='store_true', dest='verbose') parser.add_argument('-t', action='store_true', dest='disable_tokenizer', help='Disable built-in tokenizer. Tokens are assumed to be separated by whitespace.') parser.add_argument('--lang', dest='lang', default='en', help='Language (used to determine which tokenizer to run. Ignored if -t is provided)', choices=['en', 'pt']) parser.add_argument('--no-repeat', dest='no_repeat', action='store_true', help='Forces the classification step to avoid repeated argument labels (SRL only)') args = parser.parse_args() logging_level = logging.DEBUG if args.verbose else logging.WARNING utils.set_logger(logging_level) logger = logging.getLogger("Logger") nlpnet.set_data_dir(args.data) interactive_running(args)
import nlpnet nlpnet.set_data_dir( '/media/mateus/Data/Main/Projects/ufpb/fact-check/classificator/nlpnet_data/pos-pt' ) tagger = nlpnet.POSTagger() while True: text = input() print(tagger.tag(text))
from numpy import array from nltk.probability import FreqDist #from nltk.tokenize import word_tokenize from nltk.corpus import stopwords from nltk.tokenize import RegexpTokenizer from nltk.cluster import KMeansClusterer, GAAClusterer, euclidean_distance from cStringIO import StringIO import sys import nlpnet import codecs import os import re import math import utils nlpnet.set_data_dir(os.path.join(os.getcwd(), "nlpnet-data")) stop_words = stopwords.words('portuguese') tokenizer = RegexpTokenizer(r'\w+') class Tadano_Summarizer(object): def __init__(self, name, opinions_path, aspect_manager): self.__name = name self.__aspect_manager = aspect_manager self.__sentence_list = {} self.__clusters = {} self.__aspect_list = { key: 0 for key in aspect_manager.get_aspects_reviews(name) } self.__read_files(opinions_path)
import os import re import nlpnet import logging pln_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) nlpnet.set_data_dir(os.path.join(pln_dir, u'pos-pt')) tagger = nlpnet.POSTagger() def tag_text(text): """Add tags to passed text. Args: text (str): Text to be tagged. Return: str: Tagged text. Example: >>> tag_text('Bom dia') 'Bom/ADJ dia/N' """ if text.replace(' ', ''): try: tags = tagger.tag(text)[0] tagged_text = ' '.join(['{}/{}'.format(x,y) for (x, y) in tags]) return tagged_text except: logging.exception(u'Error tagging text: "%s"', text)
import os # import nltk, sys # from alpes_core.textProcess import stemming # from nltk.corpus import floresta # from nltk.probability import FreqDist # from nltk import word_tokenize as wt ############################################################################################################# ### INDICA A BASE DE DADOS PARA FAZER A CLASSIFICACAO SINTATICA ### USO DO POS TAGGER DA NLPNET ### COPIAR PARA O SERVIDOR A PASTA NLPNET-DATA ### COLOCAR CAMINHO CORRETO DENTRO DO SERVIDOR!!! #nlpnet.set_data_dir('/home/panceri/nlpnet-data/pos-pt') nlpnet.set_data_dir(os.path.join(os.path.dirname(__file__),'../../../nlpnet-data')) ############################################################################################################# ############################################################################################################# ############################################################################################################# ## Desenvolvimento da lógica de execução do Núcleo de processamento do Alpes # ## Aplicação das técnicas de pré-processamento textual a fim de ajudar no processo de comparação e busca # ## de textos similares # ## Técnicas desenvolvidas # ## 1 - Case folding # ## 2 - Troca de caracteres acentuados por caracteres não acentuados # ## 3 - Remoção pontuações # ## 4 - Remoção de stopwords # ## 5 - Stemming #
import re import nlpnet filenames = ['como', 'direta', 'existe', 'o_que', 'por_que', 'posso', 'qual'] nlpnet.set_data_dir('/home/jpegx100/develop/lpln/pos-pt') tagger = nlpnet.POSTagger() for filename in filenames: tagged_questions = list() with open('./no_stopwords/{}_no_st.txt'.format(filename), 'r') as arq: text = arq.read() questions = text.split('\n') for quest in questions: # type_id = re.search(r'_ (.*?) _', quest) # no_type_id = re.sub(r'_ .*? _', 'TYPE_ID', quest) no_type_id = quest tagged_question = list() words = no_type_id.split() while words: word = words.pop(0) if word == '_': tagged_question.append('_') continue if word.startswith('*'): markeds = [word] while words and not markeds[-1].endswith('*'): markeds.append(words.pop(0))
import re import string LANGUAGE = 'portuguese' # used for: tagger for Portuguese import nlpnet # used for: tagger for English, tokenizer, stopwords lists, stemmer import nltk from nltk.stem import RSLPStemmer, PorterStemmer # NLP variables stemmer = dict(portuguese=RSLPStemmer(), english=PorterStemmer()) nlpnet.set_data_dir("language/portuguese") nlpnet_POSTagger = nlpnet.POSTagger() # lists of negation words negation_words = { 'portuguese': [ "sem", "jamais", "nada", "nem", "nenhum", "ninguém", "nunca", "não", "tampouco", "nao", "ñ", "ninguem", "longe", "evitar", "impedir", "perder", "tirar" ], 'english': [ "never", "neither", "nobody", "no", "none", "nor", "nothing", "nowhere", "not", 'n\'t' ] } LANGUAGE_DIR = 'language' stopwords = []
def make_pos(self, path='./data/tweentsentbr/resources/pos-pt'): nlpnet.set_data_dir(path) self.tagger = nlpnet.POSTagger()
# Part-of-Speech-Tagging # -*- coding: utf-8 -*- """ Created on Mon Dec 11 13:30:28 2017 @author: d7-02 """ import nlpnet nlpnet.set_data_dir('dependency') #parser = nlpnet.DependencyParser('dependency', language='en') #tagger = nlpnet.POSTagger('/path/to/pos-model/', language='pt') tagger = nlpnet.POSTagger() print tagger.tag(u"I want to book a flight from Delhi to Pune on Sunday") #parsed_text = parser.parse(u'I want to book a flight from Delhi to Pune on Sunday') #sent = parsed_text[0] #print(sent.to_conll())
# <-----> Auxiliary Modules <-----> # <-------------------------------> # clustering algorithm used in the implementation of RF method from sklearn.cluster import AgglomerativeClustering # used to obtain the best alignment in polynomial time (Hungarian method) from scipy.optimize import linear_sum_assignment # <-----------------------> # <-----> Variables <-----> # <-----------------------> # NLP variables stemmer = dict(portuguese=RSLPStemmer(), english=PorterStemmer()) nlpnet.set_data_dir("nlpnet_data/") nlpnet_POSTagger = nlpnet.POSTagger() # lists of negation words negation_words = { 'portuguese': ["jamais", "nada", "nem", "nenhum", "ninguém", "nunca", "não", "tampouco"], 'english': [ "never", "neither", "nobody", "no", "none", "nor", "nothing", "nowhere", "not", 'n\'t' ] } # represents the summary contrastive_pairs = [(0, 0), (1, 1)]
def use_nlpnet(self, base_string, test_string, pattern_arg): """ Main interface method from the NLPNET class to the rest of the program. """ # Setting up the nlpnet parser nlpnet.set_data_dir(self.get_data_dir_path()) dependency_parser = nlpnet.DependencyParser() pos_parser = nlpnet.POSTagger() # Getting the passed patterns patterns = pattern_arg # Parsing the base_string base_parse = dependency_parser.parse(base_string) base_blob = TextBlob(base_string) base_sentences = base_blob.sentences base_sentence_info = [] for index in range(0, len(base_parse)): # Grabbing sentence information raw_data = str(base_sentences[index]) pos_sentence = pos_parser.tag(str(base_sentences[index])) subject, verb, object, prepositional_phrases = self.identify_sentence_parts_nlpnet(base_parse[index].tokens, base_parse[index].labels) """ # Displaying information for debugging purposes #print "***BASE***" #print "Raw Sentence : " + raw_data #print "POS Sentence : " + str( pos_sentence ) #print "[ Tokens ] : " + str( base_parse[ index ].tokens ) #print "[ Labels ] : " + str( base_parse[ index ].labels ) #print "[ Subject ] : " + subject #print "[ Verb ] : " + verb #print "[ Object ] : " + object #print "[ Prep Phrases ] : " + str( prepositional_phrases ) """ # Deciding whether the sentence/pattern should be added add_sentence = True for sentence in base_sentence_info: if sentence != []: if sentence[len(sentence) - 1] == raw_data: add_sentence = False break # If the sentence should be added to the possible patterns, add it if add_sentence: base_sentence_info.append([subject, verb, object, [], raw_data]) # Parsing the test_string test_parse = dependency_parser.parse(test_string) test_blob = TextBlob(test_string) test_sentences = test_blob.sentences test_sentence_info = [] for index in range(0, len(test_parse)): # Grabbing sentence information raw_data = str(test_sentences[index]) pos_sentence = pos_parser.tag(str(test_sentences[index])) subject, verb, object, prepositional_phrases = self.identify_sentence_parts_nlpnet(test_parse[index].tokens, test_parse[index].labels) """ #print "***TEST***" #print "Raw Sentence : " + raw_data #print "POS Sentence : " + str( pos_sentence ) #print "[ Tokens ] : " + str( test_parse[ index ].tokens ) #print "[ Labels ] : " + str( test_parse[ index ].labels ) #print "[ Subject ] : " + subject #print "[ Verb ] : " + verb #print "[ Object ] : " + object #print "[ Prep Phrases ] : " + str( prepositional_phrases ) """ # Deciding whether the sentence/pattern should be added add_sentence = True for sentence in test_sentence_info: if sentence != []: if sentence[len(sentence) - 1] == raw_data: add_sentence = False break # If the sentence should be added to the possible patterns, add it if add_sentence: test_sentence_info.append([subject, verb, object, [], raw_data]) # Returning the patterns found in the text return self.identify_common_patterns(base_sentence_info, test_sentence_info, patterns)
# coding=utf-8 """ Semantic module. It provides: - Synonyms - Antonyms - Semantic Role Labeling (SRL) """ import nltk import nlpnet import nlpnet.config nlpnet.set_data_dir('nlpnet') # replace by data __all__ = ['synsets', 'antonyms', 'srl'] def synsets(token): """ get a set of words which are synonyms to the token :param token1: one token string :return: list """ synset = [] return synset def antonyms(token):
''' Created on 17/12/2014 @author: Roque Lopez ''' from __future__ import unicode_literals from nltk.tag import brill import unicodedata import itertools import nlpnet import utils import codecs import os import re nlpnet.set_data_dir(str("../resource//nlpnet_data/")) class Ganesan_Summarizer(object): ''' Class that implements Ganesan method ''' def __init__(self, name, opinions_path, aspect_manager): self.__name = name self.__aspect_manager = aspect_manager self.__data = {} self.__aspect_frequency = {} self.__tagger = nlpnet.POSTagger() self.__read_files(opinions_path) def __read_files(self, opinions_path):
def use_nlpnet(self, base_string, test_string, pattern_arg): """ Main interface method from the NLPNET class to the rest of the program. """ # Setting up the nlpnet parser nlpnet.set_data_dir(self.get_data_dir_path()) dependency_parser = nlpnet.DependencyParser() pos_parser = nlpnet.POSTagger() # Getting the passed patterns patterns = pattern_arg # Parsing the base_string base_parse = dependency_parser.parse(base_string) base_blob = TextBlob(base_string) base_sentences = base_blob.sentences base_sentence_info = [] for index in range(0, len(base_parse)): # Grabbing sentence information raw_data = str(base_sentences[index]) pos_sentence = pos_parser.tag(str(base_sentences[index])) subject, verb, object, prepositional_phrases = self.identify_sentence_parts_nlpnet( base_parse[index].tokens, base_parse[index].labels) """ # Displaying information for debugging purposes #print "***BASE***" #print "Raw Sentence : " + raw_data #print "POS Sentence : " + str( pos_sentence ) #print "[ Tokens ] : " + str( base_parse[ index ].tokens ) #print "[ Labels ] : " + str( base_parse[ index ].labels ) #print "[ Subject ] : " + subject #print "[ Verb ] : " + verb #print "[ Object ] : " + object #print "[ Prep Phrases ] : " + str( prepositional_phrases ) """ # Deciding whether the sentence/pattern should be added add_sentence = True for sentence in base_sentence_info: if sentence != []: if sentence[len(sentence) - 1] == raw_data: add_sentence = False break # If the sentence should be added to the possible patterns, add it if add_sentence: base_sentence_info.append( [subject, verb, object, [], raw_data]) # Parsing the test_string test_parse = dependency_parser.parse(test_string) test_blob = TextBlob(test_string) test_sentences = test_blob.sentences test_sentence_info = [] for index in range(0, len(test_parse)): # Grabbing sentence information raw_data = str(test_sentences[index]) pos_sentence = pos_parser.tag(str(test_sentences[index])) subject, verb, object, prepositional_phrases = self.identify_sentence_parts_nlpnet( test_parse[index].tokens, test_parse[index].labels) """ #print "***TEST***" #print "Raw Sentence : " + raw_data #print "POS Sentence : " + str( pos_sentence ) #print "[ Tokens ] : " + str( test_parse[ index ].tokens ) #print "[ Labels ] : " + str( test_parse[ index ].labels ) #print "[ Subject ] : " + subject #print "[ Verb ] : " + verb #print "[ Object ] : " + object #print "[ Prep Phrases ] : " + str( prepositional_phrases ) """ # Deciding whether the sentence/pattern should be added add_sentence = True for sentence in test_sentence_info: if sentence != []: if sentence[len(sentence) - 1] == raw_data: add_sentence = False break # If the sentence should be added to the possible patterns, add it if add_sentence: test_sentence_info.append( [subject, verb, object, [], raw_data]) # Returning the patterns found in the text return self.identify_common_patterns(base_sentence_info, test_sentence_info, patterns)
Created on 17/12/2014 @author: Roque Lopez ''' from __future__ import unicode_literals from nltk.tag import brill import unicodedata import itertools import nlpnet from uteis import utils_opizer import codecs import os import re file_path = __file__[:-(len(__name__ + ".py"))] nlpnet.set_data_dir(file_path + "resource/nlpnet_data/") class Ganesan_Summarizer(object): ''' Class that implements Ganesan method ''' def __init__(self, review_data, review_key, id_key, aspect_manager): self.__name = 'produto' self.__aspect_manager = aspect_manager self.__data = {} self.__aspect_frequency = {} self.__tagger = nlpnet.POSTagger() self.__read_files(review_data, review_key, id_key) def __read_files(self, review_data, review_key, id_key):
# PCP, import matplotlib.pyplot as plt import nlpnet nlpnet.set_data_dir('pos-pt/') nlpnet_POSTagger = nlpnet.POSTagger() FUND, MEDIO = 0, 1 filenames = list() filenames.append([('ENSINO_FUNDAMENTAL_amostras_corpus/part' + str(i) + '_ENSINO_FUNDAMENTAL_historia_e_geografia.txt') for i in range(171)]) filenames.append(list()) for i in range(70): filenames[MEDIO].append('ENSINO_MEDIO_amostras_corpus/part' + str(i) + '_ENSINO_MEDIO_ciencias_humanas.txt') for i in range(127): filenames[MEDIO].append('ENSINO_MEDIO_amostras_corpus/part' + str(i) + '_ENSINO_MEDIO_ciencias_humanas_II.txt') disconsidered_text_tags = [ '<title>', '</title>', '<subtitle>', '</subtitle>', '<imagem>', '<figura>', '<tabela>', '<gráfico>', '[Figura]' ] tag_list = [ 'ADJ', 'ADV', 'ADV-KS', 'ADV-KS-REL', 'ART', 'CUR', 'IN', 'KC', 'KS', 'N', 'NPROP', 'NUM', 'PCP', 'PDEN', 'PREP', 'PREP+ADV', 'PREP+ART', 'PREP+PROADJ', 'PREP+PRO-KS', 'PREP+PRO-KS-REL', 'PREP+PROPESS', 'PREP+PROSUB', 'PROADJ', 'PRO-KS', 'PRO-KS-REL', 'PROPESS', 'PROSUB', 'PU', 'V', 'VAUX'
def load_tagger(self): if not self._data_dir: self._data_dir = config['NLPNET_DATA_DIR'] nlpnet.set_data_dir(self._data_dir) self._tagger = nlpnet.POSTagger()
argument = ' '.join(arg_structure[label]) line = '\t%s: %s' % (label, argument) print(line.encode('utf-8')) print if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('task', help='Task for which the network should be used.', type=str, choices=['srl', 'pos', 'dependency']) parser.add_argument('--data', help='Directory containing trained models (default: current)', type=str, default='.') parser.add_argument('-v', help='Verbose mode', action='store_true', dest='verbose') parser.add_argument('-t', action='store_true', dest='disable_tokenizer', help='Disable built-in tokenizer. Tokens are assumed to be separated by whitespace.') parser.add_argument('--lang', dest='lang', default='en', help='Language (used to determine which tokenizer to run. Ignored if -t is provided)', choices=['en', 'pt']) parser.add_argument('--no-repeat', dest='no_repeat', action='store_true', help='Forces the classification step to avoid repeated argument labels (SRL only)') args = parser.parse_args() logging_level = logging.DEBUG if args.verbose else logging.WARNING utils.set_logger(logging_level) logger = logging.getLogger("Logger") nlpnet.set_data_dir(args.data) interactive_running(args)
""" Este script eh um script chamador da biblioteca de NLP. https://github.com/erickrf/nlpnet.""" import sys import getopt try: from configparser import ConfigParser except ImportError: from ConfigParser import ConfigParser # ver. < 3.0 import nlpnet CONFIG = ConfigParser() CONFIG.read('setup.ini') nlpnet.set_data_dir(CONFIG.get('attributes', 'setdatadir')) TEXT = '' METHOD = '' try: OPTS, ARGS = getopt.getopt(sys.argv[1:], "ht:m:", ["text=", "method="]) except getopt.GetoptError: sys.exit(1) for opt, arg in OPTS: if opt == '-h': print 'nlpnet2go.py -t <"text to be analyzed"> -m <method [''pos''] OR [''srl'']>' print 'Eg.: python nlpnet2go.py -t "teste do edward" -m pos' sys.exit() elif opt in ("-t", "--text"): TEXT = arg elif opt in ("-m", "--method"): METHOD = arg
'type', help='Format of the embeddings. See the description below.', choices=[ 'plain', 'senna', 'gensim', 'word2embeddings', 'single', 'polyglot' ]) parser.add_argument('embeddings', help='File containing the actual embeddings') parser.add_argument('-v', help='Vocabulary file, if applicable. '\ 'In SENNA, it is hash/words.lst', dest='vocabulary') parser.add_argument('-o', help='Directory to save the output', default='.', dest='output_dir') args = parser.parse_args() nlpnet.set_data_dir(args.output_dir) output_vocabulary = nlpnet.config.FILES['vocabulary'] output_embeddings = nlpnet.config.FILES['type_features'] nlpnet.utils.set_logger(logging.INFO) logger = logging.getLogger('Logger') logger.info('Loading data...') if args.type == 'senna': words = read_senna_vocabulary(args.vocabulary) matrix = read_plain_embeddings(args.embeddings) elif args.type == 'plain': words = read_plain_vocabulary(args.vocabulary) matrix = read_plain_embeddings(args.embeddings) elif args.type == 'gensim': matrix, words = read_gensim_embeddings(args.embeddings) elif args.type == 'word2embeddings':
def __init__(self, nlpnet_model_dir=''): if nlpnet_model_dir != '': nlpnet.set_data_dir(nlpnet_model_dir) self.tagger = nlpnet.POSTagger()
def __init__(self): self.cmudict = cmudict.dict() nlpnet.set_data_dir("dependency") self.tagger = nlpnet.taggers.DependencyParser(language="en") pass