def GenerateAnkiCardsFromWikipediaCategory(url,deckName,user_id,api_key): cards=[] client = importio.importio(user_id=user_id,api_key=api_key , host="https://query.import.io") client.connect() global queryLatch queryLatch = latch.latch(1) client.query({ "connectorGuids": [ "68b4b6ac-25ce-434d-923d-7cc9661216ff"#7fc7daa2-25a4-4649-b48c-be1d7fd8756e ], "input": { "webpage/url": url } }, callback) print "Queries dispatched, now waiting for results" queryLatch.await() print json.dumps(dataRows, indent = 4) #print(dataRows[0]["title"]) queryLatch = latch.latch(len(dataRows)) for data in dataRows : if('url' in data.keys()): client.query({ "connectorGuids": [ "7fc7daa2-25a4-4649-b48c-be1d7fd8756e" ], "input": { "webpage/url": data['url'] } }, callback2) queryLatch.await() print json.dumps(dataRows2, indent = 4) for d in dataRows2: if(all(x in d.keys() for x in ["title","first_par"])): cards.append(AnkiCard(d["title"],d["first_par"])) client.disconnect() reinitGlobalVariables() return cards
# OLD IMPORT IO SCRAPING SCRIPT FOR BBC OLD STYLE NEWS SITE ARTICLES 01/01/2010 - 20/08/2014 import logging, json, importio, latch client = importio.importio(user_id="cf592fba-bd1f-4128-8e98-e729c2bb7dec", api_key="aledxqRLOCLFo9O7cYeeC58aotifmZbL2C57Mg1zicz6ZLVSY94xttvI9AjeV1Fw9DpBg2y/cbrNZXM23yiWBg==", host="https://query.import.io") client.connect() queryLatch = latch.latch(13441) dataRows = [] d = '' def callback(query, message): global dataRows global d if message["type"] == "DISCONNECT": print "Query in progress when library disconnected" print json.dumps(message["data"], indent = 4) if message["type"] == "MESSAGE": if "errorType" in message["data"]: print "Got an error!" print json.dumps(message["data"], indent = 4) else: print "Got data!" print json.dumps(message["data"], indent = 4) dataRows.extend(message["data"]["results"]) d = message["data"]["results"] for i in d: with open('urls.txt', 'a') as f: f.write(i["url"] + ',\n')
#proxies = { "http": "127.0.0.1:3128" } # Then you can use the "proxies" variable when instanciating a new client library object # For more details on this see below # You have two choices for authenticating with the Python client: you can use your API key # or your username and password. Username and password is quicker to get started with, but # API key authentication will be more reliable for really large query volumes. # If you need it, you can get YOUR_USER_GUID and YOUR_API_KEY from your account page, at # http://import.io/data/account # To use an API key for authentication, use the following code to initialise the library client = importio.importio( user_id="fdcf7c1f-ee58-4644-9b55-0b660ef6bd2e'", api_key= "YcSkwP9WeCxz1kbdy64oJcLQehYtw8RAlyy5vd+EtcfJVpe0ojkzgujNtQUPkDHlpizfGo76Zr6EOzpyjdG9DA==" ) # If you want to use the client library with API keys and proxies, use this command: #client = importio.importio(user_id="YOUR_USER_GUID", api_key="YOUR_API_KEY", proxies=proxies) # Once you have initialised the client, connect it to the server: client.connect() # If you wish to use username and password based authentication, first create a client: #client = importio.importio() # If you wish to use proxies with your username and password, then you can do so like this: #client = importio.importio(proxies=proxies) # Next you need to log in to import.io using your username and password, like so: #client.login("YOUR_USERNAME", "YOUR_PASSWORD")
def clientGen(): return importio.importio(user_id=os.environ.get('IIO_USER'), api_key=os.environ.get('IIO_API'), host="https://query.import.io")
def extract(connector, urls): # To use an API key for authentication, use the following code: client = importio.importio( user_id="d133b9b6-1253-4568-b727-425c7181ed93", api_key= "xCSj76J7NK+PaXi5foAzbIjgyo+Y+Xpu1+oS+OpngOor8gYN/johObwTLAUaQSoGTGzmSCxVMJQU3mXbICU6SQ==", host="https://query.import.io", proxies={ "http": "http://proxy.server:3128", "https": "http://proxy.server:3128" }) client.connect() queryLatch = latch.latch(len(urls)) def callback(query, message): global data # Disconnect messages happen if we disconnect the client library while a query is in progress if message["type"] == "DISCONNECT": data["log"].append("Query in progress when library disconnected") data["log"].append(json.dumps(message["data"], indent=4)) # Check the message we receive actually has some data in it if message["type"] == "MESSAGE": if "errorType" in message["data"]: # In this case, we received a message, but it was an error from the external service data["log"].append("Got an error!") data["log"].append(json.dumps(message["data"], indent=4)) else: # Save the data we got in our dataRows variable for later data[connector].extend(message["data"]["results"]) # When the query is finished, countdown the latch so the program can continue when everything is done if query.finished(): queryLatch.countdown() for url in urls: client.query( { "connectorGuids": [connectors[connector]], "input": { "webpage/url": url } }, callback) data["log"].append("Queries dispatched, now waiting for results") queryLatch. await () data["log"].append("Latch has completed, all results returned") client.disconnect() # Now we can print out the data we got data["log"].append("All data received:") if connector == "fixture": for f in data[connector]: data["log"].append("%s vs %s" % (f["hometeam/_title"], f["awayteam/_title"])) elif connector == "history": data["log"].extend(urls) else: data["log"].append(json.dumps(data[connector], indent=4)) return data[connector]
from importio_login import GUID, API_key, Amaz_BestSeller_GUID, Amaz_BestSeller_URL import logging, json, importio, latch # You do not need to do this, but setting the logging level will reveal logs about # what the import.io client is doing and will surface more information on errors logging.basicConfig(level=logging.INFO) # If you wish, you may configure HTTP proxies that Python can use to connect # to import.io. If you need to do this, uncomment the following line and fill in the # correct details to specify an HTTP proxy: #proxies = { "http": "127.0.0.1:3128" } client = importio.importio(user_id=GUID, api_key=API_key) client.connect() queryLatch = latch.latch(1) dataRows = [] def callback(query, message): global dataRows # Disconnect messages happen if we disconnect the client library while a query is in progress if message["type"] == "DISCONNECT": print "Query in progress when library disconnected" print json.dumps(message["data"], indent = 4) # Check the message we receive actually has some data in it if message["type"] == "MESSAGE":
def importquery( conNum, ImportURL, filename ): # To use an API key for authentication, use the following code: client = importio.importio(user_id=user_id_Value, api_key=api_key_Value, host="https://query.import.io") # Once we have started the client and authenticated, we need to connect it to the server: client.connect() # Because import.io queries are asynchronous, for this simple script we will use a "latch" # to stop the script from exiting before all of our queries are returned # For more information on the latch class, see the latch.py file included in this client library queryLatch = latch.latch(1) # Define here a global variable that we can put all our results in to when they come back from # the server, so we can use the data later on in the script # dataRows = [] # In order to receive the data from the queries we issue, we need to define a callback method # This method will receive each message that comes back from the queries, and we can take that # data and store it for use in our app def callback(query, message): global dataRows # Disconnect messages happen if we disconnect the client library while a query is in progress if message["type"] == "DISCONNECT": print("Query in progress when library disconnected") ##print json.dumps(message["data"], indent = 4) # Check the message we receive actually has some data in it if message["type"] == "MESSAGE": if "errorType" in message["data"]: # In this case, we received a message, but it was an error from the external service print("Got an error!") #print json.dumps(message["data"], indent = 4) else: # We got a message and it was not an error, so we can process the data print("Got data!") #print json.dumps(message["data"], indent = 4) # Save the data we got in our dataRows variable for later dataRows = (message["data"]["results"]) # When the query is finished, countdown the latch so the program can continue when everything is done if query.finished(): queryLatch.countdown() # Issue queries to your data sources and with your inputs # You can modify the inputs and connectorGuids so as to query your own sources # Query for tile Polycount_Freelance client.query( { "connectorGuids": [conNum], "input": { "webpage/url": ImportURL } }, callback) print("Checking " + filename + ", waiting for results") # Now we have issued all of the queries, we can "await" on the latch so that we know when it is all done queryLatch. await () #print "Latch has completed, all results returned" # It is best practice to disconnect when you are finished sending queries and getting data - it allows us to # clean up resources on the client and the server client.disconnect() # Now we can print out the data we got open("sites\\" + filename + ".dat", 'w').write(json.dumps(dataRows, indent=4)) print(filename + " data received, file saved")
# to import.io. If you need to do this, uncomment the following line and fill in the # correct details to specify an HTTP proxy: #proxies = { "http": "127.0.0.1:3128" } # Then you can use the "proxies" variable when instanciating a new client library object # For more details on this see below # You have two choices for authenticating with the Python client: you can use your API key # or your username and password. Username and password is quicker to get started with, but # API key authentication will be more reliable for really large query volumes. # If you need it, you can get YOUR_USER_GUID and YOUR_API_KEY from your account page, at # http://import.io/data/account # To use an API key for authentication, use the following code to initialise the library client = importio.importio(user_id="YOUR_USER_GUID", api_key="YOUR_API_KEY") # If you want to use the client library with API keys and proxies, use this command: #client = importio.importio(user_id="YOUR_USER_GUID", api_key="YOUR_API_KEY", proxies=proxies) # Once you have initialised the client, connect it to the server: client.connect() # If you wish to use username and password based authentication, first create a client: #client = importio.importio() # If you wish to use proxies with your username and password, then you can do so like this: #client = importio.importio(proxies=proxies) # Next you need to log in to import.io using your username and password, like so: #client.login("YOUR_USERNAME", "YOUR_PASSWORD") # Because import.io queries are asynchronous, for this simple script we will use a "latch"
import logging, json, importio, latch # To use an API key for authentication, use the following code: client = importio.importio(user_id="bf4b28b0-c952-47c2-a0c2-a615ae963b71", api_key="bf4b28b0c95247c2a0c2a615ae963b71d0de22db4cc190b6305176ed3aa35ef5c0765cb94923560fc48ce293ca722eee9578b2d55ac766852f751eb7e9818a1b495035b513f162f0d29ff242672209dc", host="https://query.import.io") # Once we have started the client and authenticated, we need to connect it to the server: client.connect() # Because import.io queries are asynchronous, for this simple script we will use a "latch" # to stop the script from exiting before all of our queries are returned # For more information on the latch class, see the latch.py file included in this client library queryLatch = latch.latch(2) # Define here a global variable that we can put all our results in to when they come back from # the server, so we can use the data later on in the script dataRows = [] # In order to receive the data from the queries we issue, we need to define a callback method # This method will receive each message that comes back from the queries, and we can take that # data and store it for use in our app def callback(query, message): global dataRows # Disconnect messages happen if we disconnect the client library while a query is in progress if message["type"] == "DISCONNECT": print "Query in progress when library disconnected" print json.dumps(message["data"], indent = 4) # Check the message we receive actually has some data in it if message["type"] == "MESSAGE": if "errorType" in message["data"]:
print "Got an error!" print json.dumps(message["data"], indent = 4) else: # We got a message and it was not an error, so we can process the data #print "Got data!" #print json.dumps(message["data"], indent = 4) # Save the data we got in our current_results variable for later current_results[message["data"]["pageUrl"]]=message["data"]["results"] # When the query is finished, countdown the latch so the program can continue when everything is done if query.finished(): queryLatch.countdown() # Initialise the library # To use an API key for authentication, use the following code: client = importio.importio(user_id=user_id, api_key=api_key, host="https://query.import.io") client.connect() # Now we are going to query the first extractor print "Querying the first extractor:" # If the input for the first extractor is onyl one: if isinstance(starting_query,list)==False: # Use a latch to stop the program from exiting queryLatch = latch.latch(1) current_results = {} # Querying extractor 1: client.query({ "connectorGuids": [
import importio, latch, sys, uuid, time # Retrieve the credentials from the command line host = sys.argv[1] username = sys.argv[2] password = sys.argv[3] userguid = sys.argv[4] api_key = sys.argv[5] ''' Test 1 Test that specifying incorrect username and password raises an exception ''' client = importio.importio(host="http://query." + host) try: client.login(str(uuid.uuid4()), str(uuid.uuid4()), host="https://api." + host) print("Test 1: Failed (did not throw exception)") sys.exit(1) except Exception: print("Test 1: Success") client.disconnect() ''' Test 2 Test that providing an incorrect user GUID raises an exception
import importio, latch, sys, uuid, time # Retrieve the credentials from the command line host = sys.argv[1] username = sys.argv[2] password = sys.argv[3] userguid = sys.argv[4] api_key = sys.argv[5] ''' Test 1 Test that specifying incorrect username and password raises an exception ''' client = importio.importio(host= "http://query." + host) try: client.login(str(uuid.uuid4()), str(uuid.uuid4()), host = "https://api." + host) print("Test 1: Failed (did not throw exception)") sys.exit(1) except Exception: print("Test 1: Success") client.disconnect() ''' Test 2 Test that providing an incorrect user GUID raises an exception '''