def main(JSONinput): query = json.loads(JSONinput) p = SQLAPIcall(query) result = p.execute() if (query['method'] == 'data' and 'format' in query and query['format'] == 'json'): # New format for response jresp = json.loads(result) resp = jsonify(jresp) if jresp['status'] == 'error': resp.status_code = jresp['code'] if 'code' in jresp else 500 else: resp = Response(result) if query['method'] == "return_tsv": resp.headers['Content-Type'] = "text; charset=utf-8" resp.headers["Content-Disposition"] = "filename=Bookworm-data.txt" resp.headers["Pragma"] = "no-cache" resp.headers["Expires"] = 0 elif query['method'] in ['return_json', 'return_pickle']: resp.headers['Content-Type'] = "text/html" resp.headers['Access-Control-Allow-Origin'] = '*' resp.headers['Access-Control-Allow-Methods'] = 'GET, POST, PUT, OPTIONS' resp.headers['Access-Control-Allow-Headers'] = 'Origin, Accept, '\ 'Content-Type, X-Requested-With, X-CSRF-Token' return resp
def main(JSONinput): query = json.loads(JSONinput) # Set up the query. p = SQLAPIcall(query) # run the query. resp = p.execute() if query['method'] == 'data' and 'format' in query and query[ 'format'] == 'json': try: resp = json.loads(resp) except: resp = dict(status="error", code=500, message="Internal error: server did not return json") # Print appropriate HTML headers if 'status' in resp and resp['status'] == 'error': code = resp['code'] if 'code' in resp else 500 headers(query['method'], errorcode=code) else: headers(query['method']) print json.dumps(resp) else: headers(query['method']) print resp return True
def main(JSONinput): query = json.loads(JSONinput) # Set up the query. p = SQLAPIcall(query) # run the query. resp = p.execute() if query['method'] == 'data' and 'format' in query and query['format'] == 'json': try: resp = json.loads(resp) except: resp = dict(status="error", code=500, message="Internal error: server did not return json") # Print appropriate HTML headers if 'status' in resp and resp['status'] == 'error': code = resp['code'] if 'code' in resp else 500 headers(query['method'], errorcode=code) else: headers(query['method']) print json.dumps(resp) else: headers(query['method']) print resp return True
def test_asterisks_in_search_limits(self): """ The following two queries should, by definition, produce the same result. """ query = { "database": "federalist_bookworm", "search_limits": { "word": ["on"], "author": ["HAMILTON"] }, "compare_limits": { "word": ["on"] }, "counttype": "WordsPerMillion", "groups": [], "method": "return_json" } val1 = json.loads(SQLAPIcall(query).execute()) query = { "database": "federalist_bookworm", "search_limits": { "word": ["on"], "*author": ["HAMILTON"] }, "counttype": "WordsPerMillion", "groups": [], "method": "return_json" } val2 = json.loads(SQLAPIcall(query).execute()) self.assertTrue(val1[0] == val2[0])
def test_adding_metadata_to_bookworm(self): """ Build out some dummy metadata: label the difference between even and odd paragrahs. """ from bookwormDB.manager import BookwormManager manager = BookwormManager(database="federalist_bookworm") # Create a phony derived field to test metadata supplementing def even_even(number): if number % 2 == 0: return "even" return "odd" tmp_file = "{}/test_bookworm_metadata.tsv".format(sys.path[0]) with open(tmp_file, "w") as newMetadata: newMetadata.write("paragraphNumber\toddness\n") for n in range(500): newMetadata.write("%d\t%s\n" % (n, even_even(n))) class Dummy(object): """ Just quickly create a namespace to stand in for the command-line args. """ key = "paragraphNumber" format = "tsv" file = tmp_file # Test the guessing at field_descriptions while we're at it field_descriptions = None import os manager.add_metadata(Dummy) """ And then we test if that can be retrieved """ query = { "database": "federalist_bookworm", "search_limits": {}, "counttype": "TextCount", "groups": ["oddness"], "method": "data", "format": "json" } SQLAPIcall(query) m = json.loads(SQLAPIcall(query).execute())['data'] # Even or odd is one of two things. self.assertTrue(len(m) == 2) # Since the first paragraph is odd, # there should be more of those. self.assertTrue(m['odd'][0] >= m['even'][0])
def query(self, args): """ Run a query against the API. """ from bookwormDB.general_API import SQLAPIcall import json query = json.loads(args.APIcall) caller = SQLAPIcall(query) print caller.execute()
def main(JSONinput): query = json.loads(JSONinput) # Print appropriate HTML headers headers(query['method']) # Set up the query. p = SQLAPIcall(query) #run the query. print p.execute() return True
def query(self,args): """ Run a query against the API. """ from bookwormDB.general_API import SQLAPIcall import json query = json.loads(args.APIcall) caller = SQLAPIcall(query) print caller.execute()
def test_unicode_search_term(self): query = { "database":"unicode_test_bookworm", "search_limits":{"word":[u"ᎾᏍᎩ"]}, "counttype":"WordCount", "groups":[], "words_collation":"Case_Insensitive", "method":"return_json" } SQLAPIcall(query) val1 = json.loads(SQLAPIcall(query).execute()) self.assertTrue(val1[0] > 0)
def test_case_insensitivity_works_without_search_term(self): query = { "database":"federalist_bookworm", "search_limits":{"word":["hOwEvEr"]}, "counttype":"WordCount", "groups":[], "words_collation":"Case_Insensitive", "method":"return_json" } SQLAPIcall(query) val1 = json.loads(SQLAPIcall(query).execute()) self.assertTrue(val1[0] > 0)
def test_various_unicode_cases(self): # There's a 'description_' for each individual item. catalog_location = sys.path[0] + "/test_bookworm_files_unicode/jsoncatalog.txt" cases = [json.loads(line)["description_"] for line in open(catalog_location)] for case in cases: query = { "database":"unicode_test_bookworm", "search_limits":{"description_":case}, "counttype":"WordCount", "groups":[], "words_collation":"Case_Insensitive", "method":"return_json" } SQLAPIcall(query) val1 = json.loads(SQLAPIcall(query).execute()) self.assertTrue(val1[0] > 0)
def test_case_sensitivity(self): query = { "database":"federalist_bookworm", "search_limits":{"word":["the"]}, "counttype":"WordCount", "groups":[], "words_collation":"Case_Sensitive", "method":"return_json" } SQLAPIcall(query) val1 = json.loads(SQLAPIcall(query).execute()) self.assertTrue(val1[0] > 0) query["words_collation"] = "Case_Insensitive" SQLAPIcall(query) val2 = json.loads(SQLAPIcall(query).execute()) # The words ('The','the') appear more often than ('the') alone. self.assertTrue(val2[0] > val1[0])
def test_API(self): from bookwormDB.general_API import SQLAPIcall as SQLAPIcall import json query = { "database": "federalist_bookworm", "search_limits": {}, "counttype": "TextPercent", "groups": ["author"], "method": "return_json" } m = json.loads(SQLAPIcall(query).execute()) self.assertTrue(len(m) == 5)
def test_lte_and_gte(self): from bookwormDB.general_API import SQLAPIcall as SQLAPIcall import json query = { "database":"federalist_bookworm", "search_limits":{ "fedNumber":{"$lte":10,"$gte":5} }, "counttype":"TextCount", "groups":["fedNumber"], "method":"return_json" } m = json.loads(SQLAPIcall(query).execute()) self.assertTrue(len(m)==6)
def test_ne_with_two_entries(self): from bookwormDB.general_API import SQLAPIcall as SQLAPIcall import json query = { "database":"federalist_bookworm", "search_limits":{ "author": {"$ne": ["HAMILTON","DISPUTED"]} }, "counttype":"TextPercent", "groups":["author"], "method":"return_json" } m = json.loads(SQLAPIcall(query).execute()) self.assertTrue(len(m)==3)
def test_multiword_search(self): from bookwormDB.general_API import SQLAPIcall as SQLAPIcall import json query = { "database": "federalist_bookworm", "search_limits": { "word": ["on", "upon"] }, "counttype": "TextPercent", "method": "data", "format": "json", "groups": [] } m = json.loads(SQLAPIcall(query).execute())['data'] self.assertTrue(m[0] > 33)
def test_ne_with_one_entry(self): from bookwormDB.general_API import SQLAPIcall as SQLAPIcall import json query = { "database": "federalist_bookworm", "search_limits": { "author": { "$ne": ["HAMILTON"] } }, "counttype": "TextPercent", "groups": ["author"], "method": "data", "format": "json" } m = json.loads(SQLAPIcall(query).execute())['data'] self.assertTrue(len(m) == 4)
def test_and_with_two_entries(self): from bookwormDB.general_API import SQLAPIcall as SQLAPIcall import json query = { "database": "federalist_bookworm", "search_limits": { "$and": [{ "author": ["HAMILTON"] }, { "fedNumber": [40] }] }, "counttype": "TextCount", "groups": ["author"], "method": "data", "format": "json" } m = json.loads(SQLAPIcall(query).execute())['data'] self.assertTrue(len(m) == 0)
class Bookworm_SQL_Creation(unittest.TestCase): def test_server_connection(self): logging.info("\n\nTESTING SERVER CONNECTION\n\n") """ Connect to MySQL and run a simple query. """ import bookwormDB.CreateDatabase db = bookwormDB.CreateDatabase.DB(dbname="mysql") sampleQuery = db.query("SELECT 1+1").fetchall() self.assertTrue(sampleQuery[0][0] == 2) """ To properly test things, we actually build some bookworms. This assumes that the directory '/tmp' is writeable, which isn't strictly necessary for a bookworm to be built. """ def test_config_files(self): logging.info("\n\nTESTING CONFIG FILE ACCESS\n\n") def test_config_file(conf): user = conf.get("client", "user") pw = conf.get("client", "password") global_configuration_file = bookwormDB.configuration.Configfile( "global").config admin_configuration_file = bookwormDB.configuration.Configfile( "admin").config test_config_file(global_configuration_file) test_config_file(admin_configuration_file) def test_bookworm_creation(self): """ Creates a test bookworm. Removes any existing databases called "federalist_bookworm" """ logging.info("\n\nTESTING BOOKWORM CREATION\n\n") import MySQLdb from warnings import filterwarnings filterwarnings('ignore', category=MySQLdb.Warning) import bookwormDB.CreateDatabase db = bookwormDB.CreateDatabase.DB(dbname="mysql") try: db.query("DROP DATABASE federalist_bookworm") except MySQLdb.OperationalError as e: if e[0] == 1008: pass else: raise except Exception, e: """ This is some weird MariaDB exception. It sucks that I'm compensating for it here. """ if e[0] == "Cannot load from mysql.proc. The table is probably corrupted": pass else: logging.warning( "Some mysterious error in attempting to drop previous iterations: just try running it again?" ) from subprocess import call as call from urllib2 import urlopen, URLError, HTTPError url = "https://github.com/bmschmidt/federalist-bookworm/archive/master.zip" f = urlopen(url) with open("/tmp/federalist.zip", "wb") as local_file: local_file.write(f.read()) import zipfile import os import shutil if os.path.exists("/tmp/federalist/federalist-bookworm-master/"): if os.path.exists( "/tmp/federalist/federalist-bookworm-master/.bookworm"): shutil.rmtree( "/tmp/federalist/federalist-bookworm-master/.bookworm") else: zip = zipfile.ZipFile(r'/tmp/federalist.zip') zip.extractall(r'/tmp/federalist') import bookwormDB.configuration globalc = bookwormDB.configuration.Configfile("global").config password = globalc.get("client", "password") user = globalc.get("client", "user") with open("/tmp/federalist/federalist-bookworm-master/bookworm.cnf", "w") as output: output.write( """[client]\ndatabase = federalist_bookworm\nuser=%s\npassword=%s\n""" % (user, password)) # This doesn't worry about client-side passwords. call(["make"], shell=True, cwd="/tmp/federalist/federalist-bookworm-master") db.query("USE federalist_bookworm") wordCount = db.query( "SELECT SUM(nwords) FROM fastcat").fetchall()[0][0] # This should be 212,081, but I don't want the tests to start failing when # we change the tokenization rules or miscellaneous things about encoding. self.assertTrue(wordCount > 100000) """ Then we test whether the API can make queries on that bookworm. """ from bookwormDB.general_API import SQLAPIcall as SQLAPIcall import json query = { "database": "federalist_bookworm", "search_limits": {}, "counttype": "TextPercent", "groups": ["author"], "method": "return_json" } m = json.loads(SQLAPIcall(query).execute()) self.assertTrue(len(m) == 5) """ And then we test if we can add metadata to the bookworm. """ from bookwormDB.manager import BookwormManager manager = BookwormManager(database="federalist_bookworm") # Create a phony derived field to test metadata supplementing newMetadata = open("/tmp/test_bookworm_metadata.tsv", "w") newMetadata.write("paragraphNumber\toddness\n") def even_even(number): if number % 2 == 0: return "even" return "odd" for n in range(500): newMetadata.write("%d\t%s\n" % (n, even_even(n))) class Dummy: """ Just quickly create a namespace to stand in for the command-line args. """ key = "paragraphNumber" format = "tsv" file = "/tmp/test_bookworm_metadata.tsv" field_descriptions = None # Test the guessing at field_descriptions while we're at it import os os.chdir("/tmp/federalist/federalist-bookworm-master") manager.add_metadata(Dummy) """ And then we test if that can be retrieved """ from bookwormDB.general_API import SQLAPIcall as SQLAPIcall import json import os query = { "database": "federalist_bookworm", "search_limits": {}, "counttype": "TextCount", "groups": ["oddness"], "method": "return_json" } SQLAPIcall(query) m = json.loads(SQLAPIcall(query).execute()) # Even or odd is one of two things. self.assertTrue(len(m) == 2) # Since the first paragraph is even, # there should be more of those. self.assertTrue(m['odd'][0] >= m['even'][0])
def application(environ, start_response, logfile="bookworm_queries.log"): # Starting with code from http://wsgi.tutorial.codepoint.net/parsing-the-request-post try: request_body_size = int(environ.get('QUERY_STRING', 0)) except (ValueError): request_body_size = 0 # When the method is POST the variable will be sent # in the HTTP request body which is passed by the WSGI server # in the file like wsgi.input environment variable. q = environ.get('QUERY_STRING') try: ip = environ.get('HTTP_X_FORWARDED_FOR') # logging.debug("Request from {}".format(ip)) except: ip = environ.get('REMOTE_ADDR') if ip is None: ip = environ.get('REMOTE_ADDR') query = unquote(q) headers = { 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Methods': 'GET, POST, PUT, OPTIONS', 'Access-Control-Allow-Headers': 'Origin, Accept, Content-Type, X-Requested-With, X-CSRF-Token', 'charset': 'utf-8' } logging.debug("Received query {}".format(query)) start = datetime.now() # Backward-compatability: we used to force query to be # a named argument. query = query.strip("query=") query = query.strip("queryTerms=") try: query = json.loads(query) query['ip'] = ip except: response_body = "Unable to read JSON" status = '404' start_response(status, list(headers.items())) return [ b'{"status":"error", "message": "You have passed invalid JSON to the Bookworm API"}' ] process = SQLAPIcall(query) response_body = process.execute() # It might be binary already. headers['Content-type'] = content_type(query) if headers['Content-type'] != 'application/octet-stream': response_body = bytes(response_body, 'utf-8') headers['Content-Length'] = str(len(response_body)) status = '200 OK' start_response(status, list(headers.items())) query['time'] = start.timestamp() query['duration'] = datetime.now().timestamp() - start.timestamp() # This writing isn't thread-safe; but generally we're not getting more than a couple queries a second. with open(logfile, 'a') as fout: json.dump(query, fout) fout.write("\n") logging.debug("Writing to log: \n{}\n".format(json.dumps(query))) return [response_body]