Ejemplo n.º 1
0
 def __init__(self, kernel: DiscoveryV1):
     self.kernel = kernel
     self.currentEnvironment = kernel.list_environments(
         name='byod')['environments'][0]['environment_id']
     self.collectionIndex = {
         e["name"]: e["collection_id"]
         for e in kernel.list_collections(
             environment_id=self.currentEnvironment)["collections"]
     }
     self.query_response = None
Ejemplo n.º 2
0
def connect():
    discovery = DiscoveryV1(username="******",
                            password="******",
                            version="2017-11-07")

    #environments = discovery.list_environments()
    environments = discovery.get_environments()

    #print(environments)
    #print(json.dumps(environments, indent=2))

    watson_environments = [
        x for x in environments['environments']
        if x['name'] == 'my_environment'
    ]
    #print(watson_environments)
    watson_environment_id = watson_environments[0]['environment_id']
    #print(json.dumps(watson_environment_id, indent=2))

    collections = discovery.list_collections(watson_environment_id)
    watson_collections = [x for x in collections['collections']]

    for x in watson_collections:
        if (x['name'] == 'crimereports'):
            watson_collection = x['collection_id']

    return discovery, environments, watson_environments, watson_environment_id, collections, watson_collections, watson_collection
Ejemplo n.º 3
0
def askDiscovery2():
    print("Discovery service intiated")
    ibmCreds = getIBMCreds()
    discovery = DiscoveryV1(version='2017-11-07',
                            username=ibmCreds['username'],
                            password=ibmCreds['password'])
    env = discovery.list_environments()['environments'][1]
    print("Environment Details:\n {}".format(env))
    env_id = env['environment_id']
    collections = discovery.list_collections(env_id)
    collections = collections['collections']
    print("Collection: \n{}".format(collections))
    discoveryDetails = (env_id, collections, discovery)
#     query = getQuery()
    query = getQuery2()
#     print("Unformatted query is: {}".format(query[1]))
#     query = removePunc(query[1])
#     print("Formatted query is: {}".format(query[1]))
    queryOptions = constructQuery(query)
    print("Query options are: {}".format(queryOptions))
    queryResults = getQueryResults(discoveryDetails, queryOptions)
    print("Query results: {}".format(queryResults))
    passage = getPassage(queryResults)
    print("--------------------***--------------------")
    print("Passage after cleaning is: ")
    print("--------------------***--------------------")
    print(passage)
    return(passage)
Ejemplo n.º 4
0
def main():
    print("cutlture")

    #to create discovery object
    discovery = DiscoveryV1(
        url="https://gateway.watsonplatform.net/discovery/api",
        version='2018-03-05',
        username="******",
        password="******")
    print(discovery)
    docs = ['sa.pdf', 'us.pdf', 'eg.pdf', 'in.pdf', 'ch.pdf']
    #	for doc in docs:
    #add docs
    #		with open(os.path.join(os.getcwd(), '/home/osboxes/Desktop/culture/pdffiles', doc)) as fileinfo:
    #			add_doc = discovery.add_document(EnvID, ColID, file=fileinfo)
    #			print(json.dumps(add_doc, indent=2))
    #			#get collection details
    #			collection = discovery.get_collection(EnvID, ColID)
    #			print(json.dumps(collection, indent=2))

    #query natural language
    #	dict_id = get_all_documentid(discovery)
    #reads question
    query = readQuestion()

    with open('/home/osboxes/Desktop/culture/result.csv', 'w') as csvfile:
        fieldnames = ['query', 'top1', 'document_id1']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

    for q in query:
        query_response = get_natural_language_query(discovery, q)
        print(json.dumps(query_response, indent=2))
Ejemplo n.º 5
0
def main(param):
    #return({"ciao":"ciao"})
    credentials = {
        "Test_ricerca_e_sviluppo": {
            "APIKEY": "",
            "URL": "",
            "COLL_ID": "",
            "CONFIG_ID": "",
            "ENV_ID": ""
        }
    }  # mancano le chiavi

    collection_names = ["Test_ricerca_e_sviluppo", "qualiware_test", "Stefal"]
    if (param["collection"] in collection_names):
        collection_params = credentials[param["collection"]]
    else:
        return ({"ERRORE": "il collection name non è corretto!"})

    discovery = DiscoveryV1(version="2020-05-22",
                            url=collection_params['URL'],
                            iam_apikey=collection_params["APIKEY"])

    query_result = discovery.query(environment_id=collection_params['ENV_ID'],
                                   collection_id=collection_params['COLL_ID'],
                                   natural_language_query=param["query"],
                                   count=100).get_result()

    return query_result
Ejemplo n.º 6
0
def askDiscovery():
    print("--------------------***--------------------")
    print("IBM CREDENTIALS: ")
    print("--------------------***--------------------")
    ibmCreds = getIBMCreds()
    print("--------------------***--------------------")
    print("--------------------***--------------------")
    discovery = DiscoveryV1(version='2017-11-07',
                            username=ibmCreds['username'],
                            password=ibmCreds['password'])
    env = discovery.list_environments()['environments'][1]
    env_id = env['environment_id']
    collections = discovery.list_collections(env_id)
    collections = collections['collections']
    discoveryDetails = (env_id, collections, discovery)
    print("--------------------***--------------------")
    print("QUERY: ")
    print("--------------------***--------------------")
    query = getQuery()
    print("--------------------***--------------------")
    print("--------------------***--------------------")
    queryOptions = constructQuery(query)
    queryResults = getQueryResults(discoveryDetails, queryOptions)
    passage = getPassage(queryResults)
    print("--------------------***--------------------")
    print("Passage after cleaning is: ")
    print("--------------------***--------------------")
    print(passage)
    print(
        "========================================================================================"
    )
    return (passage)
Ejemplo n.º 7
0
def main(frase):

    discovery = DiscoveryV1(username="", password="", version="2017-11-07")

    print(frase)
    qopts = {'query': frase}
    #qopts = {'query':resp['input']['text']}
    my_query = discovery.query('', '', qopts)
    textos = json.dumps(my_query)
    textos = json.loads(textos)

    lista = {}
    num = 1

    print(len(textos['results']))
    # print(textos['results'][0]['text'])

    for i in range(0, len(textos['results'])):
        lista.update({str(num): textos['results'][i]['text']})
        num = int(num) + 1
        print(textos['results'][i]['text'])
        print(i)

    # return textos['results'][0]['text']
    return lista
Ejemplo n.º 8
0
def InitDiscovery():
    global discovery
    global environmentID
    global collectionID
    APIKEY = 'gR4dbAo_IIcdVYAcL1VAafrQonD9FRJF-Imceur5LPXW'
    URL = "https://gateway.watsonplatform.net/discovery/api"
    # Active discovery instance
    discovery = DiscoveryV1(version='2017-11-07', iam_apikey=APIKEY, url=URL)
    global collectionID
    global environmentID
    # after we initialize the discovery instance using the apikey and url,
    # we will need to get the environment id so that we can create a new collection
    environments = discovery.list_environments()
    environmentID = environments.result['environments'][1]['environment_id']
    # before making the new collection, makes sure that we have the room to do so
    # by deleting the previous queries collection
    # this is the most efficient way to ensure that the documents
    # from previous job searches do not interfere with the queries related
    # to the current one
    collections = discovery.list_collections(
        environmentID).result['collections']
    for collection in collections:
        if collection['name'] == 'Collection':
            discovery.delete_collection(environmentID,
                                        collection['collection_id'])
    # creates the collection
    collection = discovery.create_collection(
        environment_id=environmentID,
        name='Collection',
        description='{collection_desc}').get_result()
    collectionID = collection['collection_id']
def handleClientActions(context, actions, watsonResponse):
    print(">>> processing client actions...\n")

    # Initialize the Discovery API
    discovery = DiscoveryV1(
        version='2018-08-01',
        ## url is optional, and defaults to the URL below. Use the correct URL for your region.
        # url='https://gateway.watsonplatform.net/discovery/api',
        username='******',
        password='******')

    # We are going to access a system collection with English news
    # You could change the language to news-de or news-es...
    news_environment_id = 'system'
    collection_id = 'news-en'

    # We could query the different collections here
    # collections = discovery.list_collections(news_environment_id).get_result()
    # news_collections = [x for x in collections['collections']]
    # print(json.dumps(collections, indent=2))

    # Now query Discovery, sort the result and only return certain fields
    query_results = discovery.query(
        news_environment_id,
        collection_id,
        natural_language_query=context['topic'],
        deduplicate="true",
        sort="-score,-publication_date",
        return_fields='title,url,publication_date').get_result()

    # Write the result to our defined variable and return
    context.update({'myNews': query_results})
    return context
Ejemplo n.º 10
0
    def __init__(self, credentials):
        """
        Load credentials and instantiate Discovery object.

        Args:
            credentials: Dictionary containing details for the WDS collection, namely API key
                ('iam_apikey'), URL ('url'), API version ('version'), environment ID
                ('environment_id') and collection ID ('collection_id').
        
        Raises:
            KeyError: Raised if credentials does not contain all of the correct keys.
        """
        try:
            # Discovery object
            self._instance = DiscoveryV1(iam_apikey=credentials['iam_apikey'],
                                         url=credentials['url'],
                                         version=credentials['version'])

            # Collection details
            self._environment_id = credentials['environment_id']
            self.collection_id = credentials['collection_id']
        except KeyError as e:
            raise KeyError(
                "Argument 'credentials' is missing the following key: " +
                str(e))
Ejemplo n.º 11
0
def query_collection(query):
    collection_id = "3d1f4781-f502-441a-a17a-21e60c8aa2ff"
    environment_id = "b013b430-3922-41a4-8a9b-4610ac233a42"
    discovery = DiscoveryV1(
        username="******",
        password="******",
        version="2016-05-05"
    )
    return discovery.query(environment_id, collection_id, query)
Ejemplo n.º 12
0
def getDiscovery_2():
    global ENVIRONMENT_2, COLLECTION_2
    #john's discovery
    ENVIRONMENT_2 = "a9e5ef42-6ee3-4b5b-8dbe-ea6c0fce0556"
    COLLECTION_2 = "fc628c92-35e7-4ca1-890c-d515f1ab7b4f"
    return DiscoveryV1(
        version='2018-12-03',
        iam_apikey='Z5qjSJAEOoxr29_cq2AB2YhDasgd0zKkCQAEBvlTdkLf',
        url='https://gateway-wdc.watsonplatform.net/discovery/api')
Ejemplo n.º 13
0
def getDiscovery():
    global ENVIRONMENT, COLLECTION
    #joseph's discovery
    ENVIRONMENT = "45b1c136-c499-42dd-be4a-acfe78aede82"
    COLLECTION = "e7d71852-3174-498f-be16-e72f71fb768d"
    return DiscoveryV1(
        version='2018-12-03',
        iam_apikey='QOX9E0nMnTC1aQ9ZMRFGA_Nhnm7QskDQkNapK_sBD_Wj',
        url='https://gateway.watsonplatform.net/discovery/api')
Ejemplo n.º 14
0
    def __init__(self, config):
        params = config['discovery']

        self.discovery = DiscoveryV1(username=params['username'],
                                     password=params['password'],
                                     version=params['version'])

        self.environment_id = params['environment_id']
        self.collection_id_git = params['collection_id_git']
        self.collection_id_imp = params['collection_id_imp']
Ejemplo n.º 15
0
    def __init__(self, docs, args):
        super(type(self), self).__init__()
        hostname = args["hostname"]
        disco_username = args["disco_username"]
        disco_password = args["disco_password"]

        self.docs = docs
        self.disco_env_id = args["disco_env_id"]
        self.disco_collection_id = args["disco_collection_id"]
        self.discovery = DiscoveryV1(url=('https://%s/discovery/api' % hostname), username=disco_username, password=disco_password, version="2017-11-07")
Ejemplo n.º 16
0
def mydearwatson(input_seller):
    input_seller = str(input_seller)

    # --- Authentication ----
    discovery = DiscoveryV1(username="******",
                            password="******",
                            version="2017-11-07")
    #--- Need to write a function to read seller ------

    query_string = 'enriched_text.entities.text:' + input_seller + ', labor|labour'
    qopts = {'query': query_string, 'counts': '10'}
    my_query = discovery.query('system', 'news-en', qopts)

    #query returns a sorted list of upto potentially 50 objects-
    #(default is 10 as is the case here) sorted in decresing order-
    #of relevance or "confidence":  a % value assigned based-
    #on potential relevance of every phrase in said URL

    #no of matches
    number = np.int(my_query["matching_results"])

    #getting the Titles, Sources and URL's for the most relevant articles
    relevance = [
        my_query["results"][i]['result_metadata']['score'] for i in range(3)
    ]
    titles = [my_query["results"][i]["title"] for i in range(3)]
    urls = [my_query["results"][i]["url"] for i in range(3)]
    orgs = []
    for i in range(3):
        try:
            orgs.append(my_query["results"][i]["forum_title"])
        except KeyError:
            orgs.append(None)

    #whether or not link is  'positive', 'negative' or 'neutral'
    label = [
        my_query["results"][i]['enriched_text']['sentiment']['document']
        ['label'] for i in range(3)
    ]

    #let's define a pandas dataframe:

    po = pd.DataFrame(
        list(zip(label, titles, urls, orgs, relevance)),
        columns=['Sentiment', 'Title', 'URL', 'Source', 'Relevance'])

    #Getting an aggregate score
    score = np.mean([
        my_query["results"][i]['enriched_text']['sentiment']['document']
        ['score'] for i in range(10)
    ])
    output_string = 'IBM Watson Discovery News API estimates {} matches for {} with a total weighted sentiment of {:.1f}'
    output_string = output_string.format(number, input_seller, score)
    return (po, output_string)
def update_discovery_environment_(Discovery,
                                  environment_id,
                                  new_name,
                                  description=None):
    try:
        result = Discovery.update_environment(environment_id, new_name,
                                              description).get_result()
        return result
    except WatsonApiException as exc:
        print("Method Analyse fail with status code " + str(exc.code) + ": " +
              exc.message)
Ejemplo n.º 18
0
def getRecipes(ingredientsList):
    discovery = DiscoveryV1(    
        username='******',
        password='******',
        version='2017-09-01'
    )
    ingred_str = ','.join([i for i in ingredientsList])
    query_str = "Ingredients:" + ingred_str
    qopts = {'query': query_str}
    my_query = discovery.query('0a15c836-8ec9-41ca-a33b-93a9d63dae8d', '7844f79c-c259-4a3d-a2d8-2db7d18acd76', qopts)
    return my_query['results']
Ejemplo n.º 19
0
    def __init__(self, url, username, password, collection_id, config_id,
                 environment_id):
        self.creds['url'] = url
        self.creds['username'] = username
        self.creds['password'] = password

        self.api_ids['collection_id'] = collection_id
        self.api_ids['configuration_id'] = config_id
        self.api_ids['environment_id'] = environment_id

        self.discovery = DiscoveryV1(username=self.creds['username'],
                                     password=self.creds['password'],
                                     version='2017-09-01')
Ejemplo n.º 20
0
    def get_sentiment_score(self, start_date, end_date):
        dates =  self._daterange(start_date, end_date)
        discovery = DiscoveryV1(
            username=config.discovery['username'],
            password=config.discovery['password'],
            version=config.discovery['version']
        )
        
        tot_score = 0
        tot_count =0
        for idx, elem in enumerate(dates):
            #print( single_date[i].strftime("%Y-%m-%d"))
            thiselem = elem
            nextelem = dates[(idx + 1) % len(dates)]
            date1 = thiselem.strftime("%Y-%m-%d")
            date2 = nextelem.strftime("%Y-%m-%d")
            filter ="language:(english|en),crawl_date>%sT12:00:00+0530,crawl_date<%sT12:00:00+0530"  % (date1, date2)
            qopts = {
                "query": "\"term to be searched\"",
                "filter": filter,
                "aggregations": [
                "term(host).term(enriched_text.sentiment.document)",
                "term(enriched_text.sentiment.document)"
                    ],
                'return': 'enriched_text.sentiment.document',
                'count': 50,
                'offset': 0
            }

            #print(qopts)
            matching_results=100000

            while True:
                if qopts['offset'] >= matching_results:
                    break
                my_query = discovery.query('system', 'news', qopts)
                matching_results=my_query['matching_results']
                for result in my_query['results']:
                    try:
                        label = result['enriched_text']['sentiment']['document']['label']
                        score = result['enriched_text']['sentiment']['document']['score']
                        #print("score",score)
                    except Exception as e:
                        label = "NO LABEL"
                        score = "NO SCORE"

                    tot_score += score
                    tot_count += 1
                qopts['offset'] = qopts['offset'] + 50

        return tot_score/tot_count
Ejemplo n.º 21
0
def upload_document_to_collection(file_path):
    collection_id = "0410f7f2-c65d-465f-935d-5b237a03b7dd"
    environment_id = "2534eff3-6e4e-4908-a6e4-dede8e0f92be"
    discovery = DiscoveryV1(
        username="******",
        password="******",
        version="2016-05-05"
    )
    with open(file_path) as fileinfo:
        discovery.add_document(
            environment_id,
            collection_id,
            file_info=fileinfo
        )
def update_discovery_collection_(Discovery,
                                 environment_id,
                                 collection_id,
                                 configuration_id,
                                 name,
                                 description=None):
    try:
        result = Discovery.update_collection(environment_id, collection_id,
                                             configuration_id, name,
                                             description).get_result()
        return result
    except WatsonApiException as exc:
        print("Method fail with status code " + str(exc.code) + ": " +
              exc.message)
Ejemplo n.º 23
0
def main(args):
    discovery = DiscoveryV1(args.version,
                            url=args.url,
                            username=args.username,
                            password=args.password,
                            iam_apikey=args.iam_api_key)
    args.environment_id = writable_environment_id(discovery)
    collections = discovery.list_collections(
        args.environment_id).get_result()["collections"]
    if len(collections) == 1:
        args.collection_id = collections[0]["collection_id"]

    if not args.collection_id:
        if collections:
            print(
                "Error: multiple collections found. Please specify which one to use."
            )
        else:
            print(
                "Error: no target collection found. Please create a collection."
            )
        exit(1)

    work = Worker(discovery, args.environment_id, args.collection_id)

    index_list = existing_sha1s(discovery, args.environment_id,
                                args.collection_id)
    indexed = set(index_list)
    count_ignore = 0
    count_ingest = 0
    for path in args.paths:
        if os.path.isfile(path):
            ingested, ignored = do_one_file(path, work, indexed, args.dry_run)
            count_ingest += ingested
            count_ignore += ignored
        else:
            for root, _dirs, files in os.walk(path):
                for name in files:
                    ingested, ignored = do_one_file(os.path.join(root,
                                                                 name), work,
                                                    indexed, args.dry_run)
                    count_ingest += ingested
                    count_ignore += ignored

    print("Ignored", count_ignore,
          "file(s), because they were found in collection.", "\nIngesting",
          count_ingest, "file(s).")

    work.finish()
Ejemplo n.º 24
0
    def __init__(self, name):
        super().__init__()
        self.name = name
        self.discovery = DiscoveryV1(version='2018-08-01',
                                     url='insert-watson-url',
                                     iam_apikey='insert-api-key')
        self.discovery_env_id = 'insert-env-id'
        self.discovery_col_id = 'insert-col-id'
        self.assistant_id = 'insert-assistant-id'
        self.assistant = AssistantV2(iam_apikey='insert-api-key',
                                     version='2018-11-08',
                                     url='insert-url')

        self.assistant_session_id = self.assistant.create_session(
            assistant_id=self.assistant_id).get_result()['session_id']
Ejemplo n.º 25
0
 def __init__(self, debug_mode=False, query_word_limit=150):
     f = open("key.txt", "r")
     f1 = f.read().splitlines()
     self.debug_mode = debug_mode
     self.discovery = DiscoveryV1(version=f1[5],
                                  username=f1[6],
                                  password=f1[7],
                                  url=f1[8])
     self.discovery_c = DiscoveryV1(version=f1[20],
                                    username=f1[21],
                                    password=f1[22],
                                    url=f1[23])
     self.collection_id = f1[24]
     self.configuration_id = f1[25]
     self.environment_id = f1[26]
     f.close()
     self.query_word_limit = query_word_limit
     self.currentEnvironment = self.discovery.list_environments(
         name='byod')['environments'][0]['environment_id']
     self.collectionIndex = {
         e["name"]: e["collection_id"]
         for e in self.discovery.list_collections(
             environment_id=self.currentEnvironment)["collections"]
     }
Ejemplo n.º 26
0
def get_natural_language_query(query):
    #with session_scope() as s:
    print("query is" + query)
    discovery = DiscoveryV1(version='2018-03-05',
                            username="******",
                            password="******")
    my_query = discovery.query(environment_id=EnvID,
                               collection_id=ColID,
                               query=query,
                               passages='true',
                               passages_count='1',
                               count=1,
                               highlight='true')
    p_passage = my_query['passages'][0]["passage_text"]
    p_score = my_query['passages'][0]["passage_score"]
    p_id = my_query['passages'][0]["document_id"]
    querylist = [p_passage, p_score, p_id]

    return querylist
Ejemplo n.º 27
0
def get_news(query):
    discovery = DiscoveryV1(username='******',
                            password='******',
                            version='2017-08-01')
    environments = discovery.get_environments()
    print(json.dumps(environments, indent=2))

    #  if x['name'] == 'Watson Discovery News Environment'
    news_environments = [x for x in environments['environments']]
    news_environment_id = news_environments[0]['environment_id']
    print(json.dumps(news_environment_id, indent=2))

    collections = discovery.list_collections(news_environment_id)
    news_collections = [x for x in collections['collections']]
    print(json.dumps(collections, indent=2))

    qopts = {'query': 'Fruit Seed Plant Omnivore'}
    my_query = discovery.query(news_environment_id, 'news', qopts)
    print(json.dumps(my_query, indent=2))
    return (jsonify(my_query))
Ejemplo n.º 28
0
def discovery(keyword):

    global resultados_query

    discovery = DiscoveryV1(
        version="2018-03-05",
        username="******",
        password="******",
        url = 'https://gateway.watsonplatform.net/discovery/api'
    )

    my_query = discovery.query(environment_id='d3755050-a1c6-4cdb-a480-3bc1df719e7d', collection_id='c8117070-40cb-44b1-bb6c-29fac8f620d6', query=keyword)#, filter='enrichedTitle.entities.type:Person', aggregation='nested(enrichedTitle.entities)')
                               #, return_fields='{return_fields}')
    #result=json.dumps(my_query, indent=2)
    resultados_query=my_query['results']

    docs=[]
    for article in resultados_query:
        docs.append(article['extracted_metadata']['filename'])

    return(docs)
Ejemplo n.º 29
0
def get_news(query):
    discovery = DiscoveryV1(iam_apikey=API_KEY, version='2017-08-01')
    environments = discovery.list_environments().get_result()
    # print(json.dumps(environments, indent=2))

    #  if x['name'] == 'Watson Discovery News Environment'
    news_environments = [x for x in environments['environments']]
    # print(json.dumps(news_environments, indent=2))
    news_environment_id = news_environments[0]['environment_id']

    collections = discovery.list_collections(news_environment_id).get_result()
    # print(json.dumps(collections, indent=2))
    news_collections = [x for x in collections['collections']]

    results = discovery.query(count=5,
                              return_fields=['title, text, url, sentiments'],
                              environment_id=news_environment_id,
                              collection_id='news-en',
                              query=query).get_result()

    return results
Ejemplo n.º 30
0
def main():
    full_params = flask.request.get_json()
    if not ('value' in full_params):
        full_params['result'] = 'Params Malformed'
        json_results = json.dumps(full_params)
        return flask.Response(response=json_results,
                              status=500,
                              mimetype='application/json')

    params = full_params['value']

    if 'username' in params and 'password' in params:
        discovery = DiscoveryV1('2016-12-15',
                                username=params['username'],
                                password=params['password'])

        added = []
        added_results = []
        for i in filter_known_urls(params['db_url'], params['result']):
            with download_to_temp(i) as tmpfile:
                add_result = discovery.add_document(
                    environment_id=params['environment_id'],
                    collection_id=params['collection_id'],
                    fileinfo=tmpfile)
                added.append(i)
                added_results.append(i)

        mark_urls_known(params['db_url'], added)

        dict_results = {'result': added_results}
        json_results = json.dumps(dict_results)
        return flask.Response(response=json_results,
                              status=200,
                              mimetype='application/json')
    else:
        params['result'] = 'No WDS Username/Password'
        json_results = json.dumps(params)
        return flask.Response(response=json_results,
                              status=500,
                              mimetype='application/json')