def __init__(self,use_ssl,host,port,index,engine_name,path,model): uri = "" if use_ssl in ["True","true",1,"1",True]: uri += 'https://' else: uri += 'http://' uri += host + ":" + str(port) + "/" if engine_name in ["solr"]: uri += "solr/" self.config = { "host":uri, "name":index, "engine_name":engine_name, "path":path, "model":model } self.uri = uri self.eq = EnrichQuery(model=model) self.iq = IndexQuery(self.config,enrich_query=self.eq) self.gq = GraphQuery(self.config) self.graph_connections = {} self.index_connections = {}
def __init__(self, use_ssl, host, port, index, engine_name, path, model, settings): uri = "" if use_ssl in ["True", "true", True, 1, "1"]: uri += 'https://' else: uri += 'http://' uri += host + ":" + str(port) + "/" if engine_name in ["solr"]: uri += "solr/" self.config = { "host": uri, "name": index, "engine_name": engine_name, "path": path, "model": model } self.uri = uri self.fields = settings.pop("fields") self.sc = Skipchunk(self.config, spacy_model=model, **settings) print("But no need to worry, Hello-NLP is saving your stuff.") self.eq = EnrichQuery(model=model) self.iq = IndexQuery(self.config, enrich_query=self.eq) self.gq = GraphQuery(self.config) self.graph_connections = {} self.index_connections = {}
source = "blog-posts.json" #source = "blog-posts-one.json" #Single document for integration testing print(timestamp(), " | Initializing") s = sc.Skipchunk(skipchunk_config, spacy_model="en_core_web_lg", minconceptlength=1, maxconceptlength=3, minpredicatelength=1, maxpredicatelength=3, minlabels=1, cache_documents=True, cache_pickle=True) gq = GraphQuery(skipchunk_config) if LOAD: print(timestamp(), " | Loading Pickle") s.load() else: # Produces a list of (text,document) tuples ready for processing by the enrichment. print(timestamp(), " | Loading Content") tuples = s.tuplize(filename=source, fields=['title', 'content']) # Enriching can take a long time if you provide lots of text. Consider batching at 10k docs at a time. print(timestamp(), " | Enriching") s.enrich(tuples)
def graph_connect(self, name): if name not in self.graph_connections.keys(): graph_config = self.config.copy() graph_config["name"] = name self.graph_connections[name] = GraphQuery(graph_config) return self.graph_connections[name]