Example #1
0
    def __init__(self,use_ssl,host,port,index,engine_name,path,model):

        uri = ""

        if use_ssl in ["True","true",1,"1",True]:
            uri += 'https://'
        else:
            uri += 'http://'

        uri += host + ":" + str(port) + "/"

        if engine_name in ["solr"]:
            uri += "solr/"

        self.config = {
            "host":uri,
            "name":index,
            "engine_name":engine_name,
            "path":path,
            "model":model

        }

        self.uri = uri

        self.eq = EnrichQuery(model=model)
        self.iq = IndexQuery(self.config,enrich_query=self.eq)
        self.gq = GraphQuery(self.config)

        self.graph_connections = {}
        self.index_connections = {}
Example #2
0
    def __init__(self, use_ssl, host, port, index, engine_name, path, model,
                 settings):

        uri = ""

        if use_ssl in ["True", "true", True, 1, "1"]:
            uri += 'https://'
        else:
            uri += 'http://'

        uri += host + ":" + str(port) + "/"

        if engine_name in ["solr"]:
            uri += "solr/"

        self.config = {
            "host": uri,
            "name": index,
            "engine_name": engine_name,
            "path": path,
            "model": model
        }

        self.uri = uri

        self.fields = settings.pop("fields")

        self.sc = Skipchunk(self.config, spacy_model=model, **settings)
        print("But no need to worry, Hello-NLP is saving your stuff.")

        self.eq = EnrichQuery(model=model)
        self.iq = IndexQuery(self.config, enrich_query=self.eq)
        self.gq = GraphQuery(self.config)

        self.graph_connections = {}
        self.index_connections = {}
Example #3
0
    source = "blog-posts.json"
    #source = "blog-posts-one.json" #Single document for integration testing

    print(timestamp(), " | Initializing")

    s = sc.Skipchunk(skipchunk_config,
                     spacy_model="en_core_web_lg",
                     minconceptlength=1,
                     maxconceptlength=3,
                     minpredicatelength=1,
                     maxpredicatelength=3,
                     minlabels=1,
                     cache_documents=True,
                     cache_pickle=True)

    gq = GraphQuery(skipchunk_config)

    if LOAD:
        print(timestamp(), " | Loading Pickle")
        s.load()

    else:

        # Produces a list of (text,document) tuples ready for processing by the enrichment.
        print(timestamp(), " | Loading Content")
        tuples = s.tuplize(filename=source, fields=['title', 'content'])

        # Enriching can take a long time if you provide lots of text.  Consider batching at 10k docs at a time.
        print(timestamp(), " | Enriching")
        s.enrich(tuples)
Example #4
0
 def graph_connect(self, name):
     if name not in self.graph_connections.keys():
         graph_config = self.config.copy()
         graph_config["name"] = name
         self.graph_connections[name] = GraphQuery(graph_config)
     return self.graph_connections[name]