def __init__(self, kernel: DiscoveryV1): self.kernel = kernel self.currentEnvironment = kernel.list_environments( name='byod')['environments'][0]['environment_id'] self.collectionIndex = { e["name"]: e["collection_id"] for e in kernel.list_collections( environment_id=self.currentEnvironment)["collections"] } self.query_response = None
def connect(): discovery = DiscoveryV1(username="******", password="******", version="2017-11-07") #environments = discovery.list_environments() environments = discovery.get_environments() #print(environments) #print(json.dumps(environments, indent=2)) watson_environments = [ x for x in environments['environments'] if x['name'] == 'my_environment' ] #print(watson_environments) watson_environment_id = watson_environments[0]['environment_id'] #print(json.dumps(watson_environment_id, indent=2)) collections = discovery.list_collections(watson_environment_id) watson_collections = [x for x in collections['collections']] for x in watson_collections: if (x['name'] == 'crimereports'): watson_collection = x['collection_id'] return discovery, environments, watson_environments, watson_environment_id, collections, watson_collections, watson_collection
def askDiscovery2(): print("Discovery service intiated") ibmCreds = getIBMCreds() discovery = DiscoveryV1(version='2017-11-07', username=ibmCreds['username'], password=ibmCreds['password']) env = discovery.list_environments()['environments'][1] print("Environment Details:\n {}".format(env)) env_id = env['environment_id'] collections = discovery.list_collections(env_id) collections = collections['collections'] print("Collection: \n{}".format(collections)) discoveryDetails = (env_id, collections, discovery) # query = getQuery() query = getQuery2() # print("Unformatted query is: {}".format(query[1])) # query = removePunc(query[1]) # print("Formatted query is: {}".format(query[1])) queryOptions = constructQuery(query) print("Query options are: {}".format(queryOptions)) queryResults = getQueryResults(discoveryDetails, queryOptions) print("Query results: {}".format(queryResults)) passage = getPassage(queryResults) print("--------------------***--------------------") print("Passage after cleaning is: ") print("--------------------***--------------------") print(passage) return(passage)
def main(): print("cutlture") #to create discovery object discovery = DiscoveryV1( url="https://gateway.watsonplatform.net/discovery/api", version='2018-03-05', username="******", password="******") print(discovery) docs = ['sa.pdf', 'us.pdf', 'eg.pdf', 'in.pdf', 'ch.pdf'] # for doc in docs: #add docs # with open(os.path.join(os.getcwd(), '/home/osboxes/Desktop/culture/pdffiles', doc)) as fileinfo: # add_doc = discovery.add_document(EnvID, ColID, file=fileinfo) # print(json.dumps(add_doc, indent=2)) # #get collection details # collection = discovery.get_collection(EnvID, ColID) # print(json.dumps(collection, indent=2)) #query natural language # dict_id = get_all_documentid(discovery) #reads question query = readQuestion() with open('/home/osboxes/Desktop/culture/result.csv', 'w') as csvfile: fieldnames = ['query', 'top1', 'document_id1'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for q in query: query_response = get_natural_language_query(discovery, q) print(json.dumps(query_response, indent=2))
def main(param): #return({"ciao":"ciao"}) credentials = { "Test_ricerca_e_sviluppo": { "APIKEY": "", "URL": "", "COLL_ID": "", "CONFIG_ID": "", "ENV_ID": "" } } # mancano le chiavi collection_names = ["Test_ricerca_e_sviluppo", "qualiware_test", "Stefal"] if (param["collection"] in collection_names): collection_params = credentials[param["collection"]] else: return ({"ERRORE": "il collection name non è corretto!"}) discovery = DiscoveryV1(version="2020-05-22", url=collection_params['URL'], iam_apikey=collection_params["APIKEY"]) query_result = discovery.query(environment_id=collection_params['ENV_ID'], collection_id=collection_params['COLL_ID'], natural_language_query=param["query"], count=100).get_result() return query_result
def askDiscovery(): print("--------------------***--------------------") print("IBM CREDENTIALS: ") print("--------------------***--------------------") ibmCreds = getIBMCreds() print("--------------------***--------------------") print("--------------------***--------------------") discovery = DiscoveryV1(version='2017-11-07', username=ibmCreds['username'], password=ibmCreds['password']) env = discovery.list_environments()['environments'][1] env_id = env['environment_id'] collections = discovery.list_collections(env_id) collections = collections['collections'] discoveryDetails = (env_id, collections, discovery) print("--------------------***--------------------") print("QUERY: ") print("--------------------***--------------------") query = getQuery() print("--------------------***--------------------") print("--------------------***--------------------") queryOptions = constructQuery(query) queryResults = getQueryResults(discoveryDetails, queryOptions) passage = getPassage(queryResults) print("--------------------***--------------------") print("Passage after cleaning is: ") print("--------------------***--------------------") print(passage) print( "========================================================================================" ) return (passage)
def main(frase): discovery = DiscoveryV1(username="", password="", version="2017-11-07") print(frase) qopts = {'query': frase} #qopts = {'query':resp['input']['text']} my_query = discovery.query('', '', qopts) textos = json.dumps(my_query) textos = json.loads(textos) lista = {} num = 1 print(len(textos['results'])) # print(textos['results'][0]['text']) for i in range(0, len(textos['results'])): lista.update({str(num): textos['results'][i]['text']}) num = int(num) + 1 print(textos['results'][i]['text']) print(i) # return textos['results'][0]['text'] return lista
def InitDiscovery(): global discovery global environmentID global collectionID APIKEY = 'gR4dbAo_IIcdVYAcL1VAafrQonD9FRJF-Imceur5LPXW' URL = "https://gateway.watsonplatform.net/discovery/api" # Active discovery instance discovery = DiscoveryV1(version='2017-11-07', iam_apikey=APIKEY, url=URL) global collectionID global environmentID # after we initialize the discovery instance using the apikey and url, # we will need to get the environment id so that we can create a new collection environments = discovery.list_environments() environmentID = environments.result['environments'][1]['environment_id'] # before making the new collection, makes sure that we have the room to do so # by deleting the previous queries collection # this is the most efficient way to ensure that the documents # from previous job searches do not interfere with the queries related # to the current one collections = discovery.list_collections( environmentID).result['collections'] for collection in collections: if collection['name'] == 'Collection': discovery.delete_collection(environmentID, collection['collection_id']) # creates the collection collection = discovery.create_collection( environment_id=environmentID, name='Collection', description='{collection_desc}').get_result() collectionID = collection['collection_id']
def handleClientActions(context, actions, watsonResponse): print(">>> processing client actions...\n") # Initialize the Discovery API discovery = DiscoveryV1( version='2018-08-01', ## url is optional, and defaults to the URL below. Use the correct URL for your region. # url='https://gateway.watsonplatform.net/discovery/api', username='******', password='******') # We are going to access a system collection with English news # You could change the language to news-de or news-es... news_environment_id = 'system' collection_id = 'news-en' # We could query the different collections here # collections = discovery.list_collections(news_environment_id).get_result() # news_collections = [x for x in collections['collections']] # print(json.dumps(collections, indent=2)) # Now query Discovery, sort the result and only return certain fields query_results = discovery.query( news_environment_id, collection_id, natural_language_query=context['topic'], deduplicate="true", sort="-score,-publication_date", return_fields='title,url,publication_date').get_result() # Write the result to our defined variable and return context.update({'myNews': query_results}) return context
def __init__(self, credentials): """ Load credentials and instantiate Discovery object. Args: credentials: Dictionary containing details for the WDS collection, namely API key ('iam_apikey'), URL ('url'), API version ('version'), environment ID ('environment_id') and collection ID ('collection_id'). Raises: KeyError: Raised if credentials does not contain all of the correct keys. """ try: # Discovery object self._instance = DiscoveryV1(iam_apikey=credentials['iam_apikey'], url=credentials['url'], version=credentials['version']) # Collection details self._environment_id = credentials['environment_id'] self.collection_id = credentials['collection_id'] except KeyError as e: raise KeyError( "Argument 'credentials' is missing the following key: " + str(e))
def query_collection(query): collection_id = "3d1f4781-f502-441a-a17a-21e60c8aa2ff" environment_id = "b013b430-3922-41a4-8a9b-4610ac233a42" discovery = DiscoveryV1( username="******", password="******", version="2016-05-05" ) return discovery.query(environment_id, collection_id, query)
def getDiscovery_2(): global ENVIRONMENT_2, COLLECTION_2 #john's discovery ENVIRONMENT_2 = "a9e5ef42-6ee3-4b5b-8dbe-ea6c0fce0556" COLLECTION_2 = "fc628c92-35e7-4ca1-890c-d515f1ab7b4f" return DiscoveryV1( version='2018-12-03', iam_apikey='Z5qjSJAEOoxr29_cq2AB2YhDasgd0zKkCQAEBvlTdkLf', url='https://gateway-wdc.watsonplatform.net/discovery/api')
def getDiscovery(): global ENVIRONMENT, COLLECTION #joseph's discovery ENVIRONMENT = "45b1c136-c499-42dd-be4a-acfe78aede82" COLLECTION = "e7d71852-3174-498f-be16-e72f71fb768d" return DiscoveryV1( version='2018-12-03', iam_apikey='QOX9E0nMnTC1aQ9ZMRFGA_Nhnm7QskDQkNapK_sBD_Wj', url='https://gateway.watsonplatform.net/discovery/api')
def __init__(self, config): params = config['discovery'] self.discovery = DiscoveryV1(username=params['username'], password=params['password'], version=params['version']) self.environment_id = params['environment_id'] self.collection_id_git = params['collection_id_git'] self.collection_id_imp = params['collection_id_imp']
def __init__(self, docs, args): super(type(self), self).__init__() hostname = args["hostname"] disco_username = args["disco_username"] disco_password = args["disco_password"] self.docs = docs self.disco_env_id = args["disco_env_id"] self.disco_collection_id = args["disco_collection_id"] self.discovery = DiscoveryV1(url=('https://%s/discovery/api' % hostname), username=disco_username, password=disco_password, version="2017-11-07")
def mydearwatson(input_seller): input_seller = str(input_seller) # --- Authentication ---- discovery = DiscoveryV1(username="******", password="******", version="2017-11-07") #--- Need to write a function to read seller ------ query_string = 'enriched_text.entities.text:' + input_seller + ', labor|labour' qopts = {'query': query_string, 'counts': '10'} my_query = discovery.query('system', 'news-en', qopts) #query returns a sorted list of upto potentially 50 objects- #(default is 10 as is the case here) sorted in decresing order- #of relevance or "confidence": a % value assigned based- #on potential relevance of every phrase in said URL #no of matches number = np.int(my_query["matching_results"]) #getting the Titles, Sources and URL's for the most relevant articles relevance = [ my_query["results"][i]['result_metadata']['score'] for i in range(3) ] titles = [my_query["results"][i]["title"] for i in range(3)] urls = [my_query["results"][i]["url"] for i in range(3)] orgs = [] for i in range(3): try: orgs.append(my_query["results"][i]["forum_title"]) except KeyError: orgs.append(None) #whether or not link is 'positive', 'negative' or 'neutral' label = [ my_query["results"][i]['enriched_text']['sentiment']['document'] ['label'] for i in range(3) ] #let's define a pandas dataframe: po = pd.DataFrame( list(zip(label, titles, urls, orgs, relevance)), columns=['Sentiment', 'Title', 'URL', 'Source', 'Relevance']) #Getting an aggregate score score = np.mean([ my_query["results"][i]['enriched_text']['sentiment']['document'] ['score'] for i in range(10) ]) output_string = 'IBM Watson Discovery News API estimates {} matches for {} with a total weighted sentiment of {:.1f}' output_string = output_string.format(number, input_seller, score) return (po, output_string)
def update_discovery_environment_(Discovery, environment_id, new_name, description=None): try: result = Discovery.update_environment(environment_id, new_name, description).get_result() return result except WatsonApiException as exc: print("Method Analyse fail with status code " + str(exc.code) + ": " + exc.message)
def getRecipes(ingredientsList): discovery = DiscoveryV1( username='******', password='******', version='2017-09-01' ) ingred_str = ','.join([i for i in ingredientsList]) query_str = "Ingredients:" + ingred_str qopts = {'query': query_str} my_query = discovery.query('0a15c836-8ec9-41ca-a33b-93a9d63dae8d', '7844f79c-c259-4a3d-a2d8-2db7d18acd76', qopts) return my_query['results']
def __init__(self, url, username, password, collection_id, config_id, environment_id): self.creds['url'] = url self.creds['username'] = username self.creds['password'] = password self.api_ids['collection_id'] = collection_id self.api_ids['configuration_id'] = config_id self.api_ids['environment_id'] = environment_id self.discovery = DiscoveryV1(username=self.creds['username'], password=self.creds['password'], version='2017-09-01')
def get_sentiment_score(self, start_date, end_date): dates = self._daterange(start_date, end_date) discovery = DiscoveryV1( username=config.discovery['username'], password=config.discovery['password'], version=config.discovery['version'] ) tot_score = 0 tot_count =0 for idx, elem in enumerate(dates): #print( single_date[i].strftime("%Y-%m-%d")) thiselem = elem nextelem = dates[(idx + 1) % len(dates)] date1 = thiselem.strftime("%Y-%m-%d") date2 = nextelem.strftime("%Y-%m-%d") filter ="language:(english|en),crawl_date>%sT12:00:00+0530,crawl_date<%sT12:00:00+0530" % (date1, date2) qopts = { "query": "\"term to be searched\"", "filter": filter, "aggregations": [ "term(host).term(enriched_text.sentiment.document)", "term(enriched_text.sentiment.document)" ], 'return': 'enriched_text.sentiment.document', 'count': 50, 'offset': 0 } #print(qopts) matching_results=100000 while True: if qopts['offset'] >= matching_results: break my_query = discovery.query('system', 'news', qopts) matching_results=my_query['matching_results'] for result in my_query['results']: try: label = result['enriched_text']['sentiment']['document']['label'] score = result['enriched_text']['sentiment']['document']['score'] #print("score",score) except Exception as e: label = "NO LABEL" score = "NO SCORE" tot_score += score tot_count += 1 qopts['offset'] = qopts['offset'] + 50 return tot_score/tot_count
def upload_document_to_collection(file_path): collection_id = "0410f7f2-c65d-465f-935d-5b237a03b7dd" environment_id = "2534eff3-6e4e-4908-a6e4-dede8e0f92be" discovery = DiscoveryV1( username="******", password="******", version="2016-05-05" ) with open(file_path) as fileinfo: discovery.add_document( environment_id, collection_id, file_info=fileinfo )
def update_discovery_collection_(Discovery, environment_id, collection_id, configuration_id, name, description=None): try: result = Discovery.update_collection(environment_id, collection_id, configuration_id, name, description).get_result() return result except WatsonApiException as exc: print("Method fail with status code " + str(exc.code) + ": " + exc.message)
def main(args): discovery = DiscoveryV1(args.version, url=args.url, username=args.username, password=args.password, iam_apikey=args.iam_api_key) args.environment_id = writable_environment_id(discovery) collections = discovery.list_collections( args.environment_id).get_result()["collections"] if len(collections) == 1: args.collection_id = collections[0]["collection_id"] if not args.collection_id: if collections: print( "Error: multiple collections found. Please specify which one to use." ) else: print( "Error: no target collection found. Please create a collection." ) exit(1) work = Worker(discovery, args.environment_id, args.collection_id) index_list = existing_sha1s(discovery, args.environment_id, args.collection_id) indexed = set(index_list) count_ignore = 0 count_ingest = 0 for path in args.paths: if os.path.isfile(path): ingested, ignored = do_one_file(path, work, indexed, args.dry_run) count_ingest += ingested count_ignore += ignored else: for root, _dirs, files in os.walk(path): for name in files: ingested, ignored = do_one_file(os.path.join(root, name), work, indexed, args.dry_run) count_ingest += ingested count_ignore += ignored print("Ignored", count_ignore, "file(s), because they were found in collection.", "\nIngesting", count_ingest, "file(s).") work.finish()
def __init__(self, name): super().__init__() self.name = name self.discovery = DiscoveryV1(version='2018-08-01', url='insert-watson-url', iam_apikey='insert-api-key') self.discovery_env_id = 'insert-env-id' self.discovery_col_id = 'insert-col-id' self.assistant_id = 'insert-assistant-id' self.assistant = AssistantV2(iam_apikey='insert-api-key', version='2018-11-08', url='insert-url') self.assistant_session_id = self.assistant.create_session( assistant_id=self.assistant_id).get_result()['session_id']
def __init__(self, debug_mode=False, query_word_limit=150): f = open("key.txt", "r") f1 = f.read().splitlines() self.debug_mode = debug_mode self.discovery = DiscoveryV1(version=f1[5], username=f1[6], password=f1[7], url=f1[8]) self.discovery_c = DiscoveryV1(version=f1[20], username=f1[21], password=f1[22], url=f1[23]) self.collection_id = f1[24] self.configuration_id = f1[25] self.environment_id = f1[26] f.close() self.query_word_limit = query_word_limit self.currentEnvironment = self.discovery.list_environments( name='byod')['environments'][0]['environment_id'] self.collectionIndex = { e["name"]: e["collection_id"] for e in self.discovery.list_collections( environment_id=self.currentEnvironment)["collections"] }
def get_natural_language_query(query): #with session_scope() as s: print("query is" + query) discovery = DiscoveryV1(version='2018-03-05', username="******", password="******") my_query = discovery.query(environment_id=EnvID, collection_id=ColID, query=query, passages='true', passages_count='1', count=1, highlight='true') p_passage = my_query['passages'][0]["passage_text"] p_score = my_query['passages'][0]["passage_score"] p_id = my_query['passages'][0]["document_id"] querylist = [p_passage, p_score, p_id] return querylist
def get_news(query): discovery = DiscoveryV1(username='******', password='******', version='2017-08-01') environments = discovery.get_environments() print(json.dumps(environments, indent=2)) # if x['name'] == 'Watson Discovery News Environment' news_environments = [x for x in environments['environments']] news_environment_id = news_environments[0]['environment_id'] print(json.dumps(news_environment_id, indent=2)) collections = discovery.list_collections(news_environment_id) news_collections = [x for x in collections['collections']] print(json.dumps(collections, indent=2)) qopts = {'query': 'Fruit Seed Plant Omnivore'} my_query = discovery.query(news_environment_id, 'news', qopts) print(json.dumps(my_query, indent=2)) return (jsonify(my_query))
def discovery(keyword): global resultados_query discovery = DiscoveryV1( version="2018-03-05", username="******", password="******", url = 'https://gateway.watsonplatform.net/discovery/api' ) my_query = discovery.query(environment_id='d3755050-a1c6-4cdb-a480-3bc1df719e7d', collection_id='c8117070-40cb-44b1-bb6c-29fac8f620d6', query=keyword)#, filter='enrichedTitle.entities.type:Person', aggregation='nested(enrichedTitle.entities)') #, return_fields='{return_fields}') #result=json.dumps(my_query, indent=2) resultados_query=my_query['results'] docs=[] for article in resultados_query: docs.append(article['extracted_metadata']['filename']) return(docs)
def get_news(query): discovery = DiscoveryV1(iam_apikey=API_KEY, version='2017-08-01') environments = discovery.list_environments().get_result() # print(json.dumps(environments, indent=2)) # if x['name'] == 'Watson Discovery News Environment' news_environments = [x for x in environments['environments']] # print(json.dumps(news_environments, indent=2)) news_environment_id = news_environments[0]['environment_id'] collections = discovery.list_collections(news_environment_id).get_result() # print(json.dumps(collections, indent=2)) news_collections = [x for x in collections['collections']] results = discovery.query(count=5, return_fields=['title, text, url, sentiments'], environment_id=news_environment_id, collection_id='news-en', query=query).get_result() return results
def main(): full_params = flask.request.get_json() if not ('value' in full_params): full_params['result'] = 'Params Malformed' json_results = json.dumps(full_params) return flask.Response(response=json_results, status=500, mimetype='application/json') params = full_params['value'] if 'username' in params and 'password' in params: discovery = DiscoveryV1('2016-12-15', username=params['username'], password=params['password']) added = [] added_results = [] for i in filter_known_urls(params['db_url'], params['result']): with download_to_temp(i) as tmpfile: add_result = discovery.add_document( environment_id=params['environment_id'], collection_id=params['collection_id'], fileinfo=tmpfile) added.append(i) added_results.append(i) mark_urls_known(params['db_url'], added) dict_results = {'result': added_results} json_results = json.dumps(dict_results) return flask.Response(response=json_results, status=200, mimetype='application/json') else: params['result'] = 'No WDS Username/Password' json_results = json.dumps(params) return flask.Response(response=json_results, status=500, mimetype='application/json')