def main(JSONinput):

    query = json.loads(JSONinput)

    p = SQLAPIcall(query)
    result = p.execute()

    if (query['method'] == 'data' and 'format' in query and
            query['format'] == 'json'):
        # New format for response
        jresp = json.loads(result)
        resp = jsonify(jresp)
        if jresp['status'] == 'error':
            resp.status_code = jresp['code'] if 'code' in jresp else 500
    else:
        resp = Response(result)

    if query['method'] == "return_tsv":
        resp.headers['Content-Type'] = "text; charset=utf-8"
        resp.headers["Content-Disposition"] = "filename=Bookworm-data.txt"
        resp.headers["Pragma"] = "no-cache"
        resp.headers["Expires"] = 0
    elif query['method'] in ['return_json', 'return_pickle']:
        resp.headers['Content-Type'] = "text/html"

    resp.headers['Access-Control-Allow-Origin'] = '*'
    resp.headers['Access-Control-Allow-Methods'] = 'GET, POST, PUT, OPTIONS'
    resp.headers['Access-Control-Allow-Headers'] = 'Origin, Accept, '\
        'Content-Type, X-Requested-With, X-CSRF-Token'

    return resp
Example #2
0
def main(JSONinput):

    query = json.loads(JSONinput)
    # Set up the query.
    p = SQLAPIcall(query)

    # run the query.
    resp = p.execute()

    if query['method'] == 'data' and 'format' in query and query[
            'format'] == 'json':
        try:
            resp = json.loads(resp)
        except:
            resp = dict(status="error",
                        code=500,
                        message="Internal error: server did not return json")

        # Print appropriate HTML headers
        if 'status' in resp and resp['status'] == 'error':
            code = resp['code'] if 'code' in resp else 500
            headers(query['method'], errorcode=code)
        else:
            headers(query['method'])
        print json.dumps(resp)
    else:
        headers(query['method'])
        print resp

    return True
Example #3
0
def main(JSONinput):

    query = json.loads(JSONinput)

    p = SQLAPIcall(query)
    result = p.execute()

    if (query['method'] == 'data' and 'format' in query
            and query['format'] == 'json'):
        # New format for response
        jresp = json.loads(result)
        resp = jsonify(jresp)
        if jresp['status'] == 'error':
            resp.status_code = jresp['code'] if 'code' in jresp else 500
    else:
        resp = Response(result)

    if query['method'] == "return_tsv":
        resp.headers['Content-Type'] = "text; charset=utf-8"
        resp.headers["Content-Disposition"] = "filename=Bookworm-data.txt"
        resp.headers["Pragma"] = "no-cache"
        resp.headers["Expires"] = 0
    elif query['method'] in ['return_json', 'return_pickle']:
        resp.headers['Content-Type'] = "text/html"

    resp.headers['Access-Control-Allow-Origin'] = '*'
    resp.headers['Access-Control-Allow-Methods'] = 'GET, POST, PUT, OPTIONS'
    resp.headers['Access-Control-Allow-Headers'] = 'Origin, Accept, '\
        'Content-Type, X-Requested-With, X-CSRF-Token'

    return resp
def main(JSONinput):

    query = json.loads(JSONinput)
    # Set up the query.
    p = SQLAPIcall(query)

    # run the query.
    resp = p.execute()

    if query['method'] == 'data' and 'format' in query and query['format'] == 'json':
        try:
            resp = json.loads(resp)
        except:
            resp = dict(status="error", code=500,
                        message="Internal error: server did not return json")

        # Print appropriate HTML headers
        if 'status' in resp and resp['status'] == 'error':
            code = resp['code'] if 'code' in resp else 500
            headers(query['method'], errorcode=code)
        else:
            headers(query['method'])
        print json.dumps(resp)
    else:
        headers(query['method'])
        print resp

    return True
Example #5
0
    def test_asterisks_in_search_limits(self):
        """
        The following two queries should, by definition, produce the same result.
        """
        query = {
            "database": "federalist_bookworm",
            "search_limits": {
                "word": ["on"],
                "author": ["HAMILTON"]
            },
            "compare_limits": {
                "word": ["on"]
            },
            "counttype": "WordsPerMillion",
            "groups": [],
            "method": "return_json"
        }
        val1 = json.loads(SQLAPIcall(query).execute())

        query = {
            "database": "federalist_bookworm",
            "search_limits": {
                "word": ["on"],
                "*author": ["HAMILTON"]
            },
            "counttype": "WordsPerMillion",
            "groups": [],
            "method": "return_json"
        }
        val2 = json.loads(SQLAPIcall(query).execute())
        self.assertTrue(val1[0] == val2[0])
Example #6
0
    def test_adding_metadata_to_bookworm(self):
        """
        Build out some dummy metadata: label the difference
        between even and odd paragrahs.
        """

        from bookwormDB.manager import BookwormManager
        manager = BookwormManager(database="federalist_bookworm")

        # Create a phony derived field to test metadata supplementing

        def even_even(number):
            if number % 2 == 0:
                return "even"
            return "odd"

        tmp_file = "{}/test_bookworm_metadata.tsv".format(sys.path[0])

        with open(tmp_file, "w") as newMetadata:
            newMetadata.write("paragraphNumber\toddness\n")
            for n in range(500):
                newMetadata.write("%d\t%s\n" % (n, even_even(n)))

        class Dummy(object):
            """
            Just quickly create a namespace to stand in for the command-line args.
            """
            key = "paragraphNumber"
            format = "tsv"
            file = tmp_file
            # Test the guessing at field_descriptions while we're at it
            field_descriptions = None

        import os
        manager.add_metadata(Dummy)
        """
        And then we test if that can be retrieved
        """

        query = {
            "database": "federalist_bookworm",
            "search_limits": {},
            "counttype": "TextCount",
            "groups": ["oddness"],
            "method": "data",
            "format": "json"
        }

        SQLAPIcall(query)
        m = json.loads(SQLAPIcall(query).execute())['data']
        # Even or odd is one of two things.
        self.assertTrue(len(m) == 2)

        # Since the first paragraph is odd,
        # there should be more of those.

        self.assertTrue(m['odd'][0] >= m['even'][0])
Example #7
0
    def query(self, args):
        """
        Run a query against the API.
        """

        from bookwormDB.general_API import SQLAPIcall
        import json

        query = json.loads(args.APIcall)
        caller = SQLAPIcall(query)
        print caller.execute()
Example #8
0
def main(JSONinput):

    query = json.loads(JSONinput)
    # Print appropriate HTML headers
    headers(query['method'])
    # Set up the query.
    p = SQLAPIcall(query)
    #run the query.
    print p.execute() 

    return True
 def query(self,args):
     """
     Run a query against the API.
     """
     
     from bookwormDB.general_API import SQLAPIcall
     import json
     
     query = json.loads(args.APIcall)
     caller = SQLAPIcall(query)
     print caller.execute()
Example #10
0
 def test_unicode_search_term(self):
     query = {
             "database":"unicode_test_bookworm",
             "search_limits":{"word":[u"ᎾᏍᎩ"]},
             "counttype":"WordCount",
             "groups":[],
             "words_collation":"Case_Insensitive",
             "method":"return_json"
     }
     SQLAPIcall(query)
     val1 = json.loads(SQLAPIcall(query).execute())
     self.assertTrue(val1[0] > 0)
Example #11
0
 def test_case_insensitivity_works_without_search_term(self):
     query = {
             "database":"federalist_bookworm",
             "search_limits":{"word":["hOwEvEr"]},
             "counttype":"WordCount",
             "groups":[],
             "words_collation":"Case_Insensitive",
             "method":"return_json"
     }
     SQLAPIcall(query)
     val1 = json.loads(SQLAPIcall(query).execute())
     self.assertTrue(val1[0] > 0)
Example #12
0
 def test_various_unicode_cases(self):
     # There's a 'description_' for each individual item.
     catalog_location = sys.path[0] + "/test_bookworm_files_unicode/jsoncatalog.txt"
     cases = [json.loads(line)["description_"] for line in open(catalog_location)]       
     for case in cases:
         query = {
             "database":"unicode_test_bookworm",
             "search_limits":{"description_":case},
             "counttype":"WordCount",
             "groups":[],
             "words_collation":"Case_Insensitive",
             "method":"return_json"
             }
         SQLAPIcall(query)
         val1 = json.loads(SQLAPIcall(query).execute())
         self.assertTrue(val1[0] > 0)
Example #13
0
    def test_case_sensitivity(self):
        query = {
                "database":"federalist_bookworm",
                "search_limits":{"word":["the"]},
                "counttype":"WordCount",
                "groups":[],
                "words_collation":"Case_Sensitive",
                "method":"return_json"
        }

        SQLAPIcall(query)
        val1 = json.loads(SQLAPIcall(query).execute())
        self.assertTrue(val1[0] > 0)

        query["words_collation"] = "Case_Insensitive"        

        SQLAPIcall(query)        
        val2 = json.loads(SQLAPIcall(query).execute())
        # The words ('The','the') appear more often than ('the') alone.
        self.assertTrue(val2[0] > val1[0])
Example #14
0
    def test_API(self):
        from bookwormDB.general_API import SQLAPIcall as SQLAPIcall
        import json

        query = {
            "database": "federalist_bookworm",
            "search_limits": {},
            "counttype": "TextPercent",
            "groups": ["author"],
            "method": "return_json"
        }

        m = json.loads(SQLAPIcall(query).execute())
        self.assertTrue(len(m) == 5)
Example #15
0
    def test_lte_and_gte(self):
        from bookwormDB.general_API import SQLAPIcall as SQLAPIcall
        import json
        
        query = {
                "database":"federalist_bookworm",
                "search_limits":{
                    "fedNumber":{"$lte":10,"$gte":5}
                },
                "counttype":"TextCount",
                "groups":["fedNumber"],
                "method":"return_json"
        }

        m = json.loads(SQLAPIcall(query).execute())
        self.assertTrue(len(m)==6)
Example #16
0
    def test_ne_with_two_entries(self):
        from bookwormDB.general_API import SQLAPIcall as SQLAPIcall
        import json
        
        query = {
                "database":"federalist_bookworm",
                "search_limits":{
                    "author": {"$ne": ["HAMILTON","DISPUTED"]}
                },
                "counttype":"TextPercent",
                "groups":["author"],
                "method":"return_json"
        }

        m = json.loads(SQLAPIcall(query).execute())
        self.assertTrue(len(m)==3)
Example #17
0
    def test_multiword_search(self):
        from bookwormDB.general_API import SQLAPIcall as SQLAPIcall
        import json

        query = {
            "database": "federalist_bookworm",
            "search_limits": {
                "word": ["on", "upon"]
            },
            "counttype": "TextPercent",
            "method": "data",
            "format": "json",
            "groups": []
        }

        m = json.loads(SQLAPIcall(query).execute())['data']
        self.assertTrue(m[0] > 33)
Example #18
0
    def test_ne_with_one_entry(self):
        from bookwormDB.general_API import SQLAPIcall as SQLAPIcall
        import json

        query = {
            "database": "federalist_bookworm",
            "search_limits": {
                "author": {
                    "$ne": ["HAMILTON"]
                }
            },
            "counttype": "TextPercent",
            "groups": ["author"],
            "method": "data",
            "format": "json"
        }

        m = json.loads(SQLAPIcall(query).execute())['data']
        self.assertTrue(len(m) == 4)
Example #19
0
    def test_and_with_two_entries(self):
        from bookwormDB.general_API import SQLAPIcall as SQLAPIcall
        import json

        query = {
            "database": "federalist_bookworm",
            "search_limits": {
                "$and": [{
                    "author": ["HAMILTON"]
                }, {
                    "fedNumber": [40]
                }]
            },
            "counttype": "TextCount",
            "groups": ["author"],
            "method": "data",
            "format": "json"
        }

        m = json.loads(SQLAPIcall(query).execute())['data']
        self.assertTrue(len(m) == 0)
Example #20
0
class Bookworm_SQL_Creation(unittest.TestCase):
    def test_server_connection(self):
        logging.info("\n\nTESTING SERVER CONNECTION\n\n")
        """
        Connect to MySQL and run a simple query.
        """
        import bookwormDB.CreateDatabase
        db = bookwormDB.CreateDatabase.DB(dbname="mysql")
        sampleQuery = db.query("SELECT 1+1").fetchall()
        self.assertTrue(sampleQuery[0][0] == 2)

    """
    To properly test things, we actually build some bookworms.
    This assumes that the directory '/tmp' is writeable,
    which isn't strictly necessary for a bookworm to be built.
    """

    def test_config_files(self):
        logging.info("\n\nTESTING CONFIG FILE ACCESS\n\n")

        def test_config_file(conf):
            user = conf.get("client", "user")
            pw = conf.get("client", "password")

        global_configuration_file = bookwormDB.configuration.Configfile(
            "global").config
        admin_configuration_file = bookwormDB.configuration.Configfile(
            "admin").config

        test_config_file(global_configuration_file)
        test_config_file(admin_configuration_file)

    def test_bookworm_creation(self):
        """
        Creates a test bookworm. Removes any existing databases called "federalist_bookworm"
        """
        logging.info("\n\nTESTING BOOKWORM CREATION\n\n")
        import MySQLdb
        from warnings import filterwarnings
        filterwarnings('ignore', category=MySQLdb.Warning)

        import bookwormDB.CreateDatabase
        db = bookwormDB.CreateDatabase.DB(dbname="mysql")
        try:
            db.query("DROP DATABASE federalist_bookworm")
        except MySQLdb.OperationalError as e:
            if e[0] == 1008:
                pass
            else:
                raise
        except Exception, e:
            """
            This is some weird MariaDB exception. It sucks that I'm compensating for it here.
            """
            if e[0] == "Cannot load from mysql.proc. The table is probably corrupted":
                pass
            else:
                logging.warning(
                    "Some mysterious error in attempting to drop previous iterations: just try running it again?"
                )
        from subprocess import call as call

        from urllib2 import urlopen, URLError, HTTPError

        url = "https://github.com/bmschmidt/federalist-bookworm/archive/master.zip"
        f = urlopen(url)
        with open("/tmp/federalist.zip", "wb") as local_file:
            local_file.write(f.read())

        import zipfile
        import os
        import shutil

        if os.path.exists("/tmp/federalist/federalist-bookworm-master/"):
            if os.path.exists(
                    "/tmp/federalist/federalist-bookworm-master/.bookworm"):
                shutil.rmtree(
                    "/tmp/federalist/federalist-bookworm-master/.bookworm")
        else:
            zip = zipfile.ZipFile(r'/tmp/federalist.zip')
            zip.extractall(r'/tmp/federalist')

        import bookwormDB.configuration

        globalc = bookwormDB.configuration.Configfile("global").config
        password = globalc.get("client", "password")
        user = globalc.get("client", "user")

        with open("/tmp/federalist/federalist-bookworm-master/bookworm.cnf",
                  "w") as output:
            output.write(
                """[client]\ndatabase = federalist_bookworm\nuser=%s\npassword=%s\n"""
                % (user, password))
            # This doesn't worry about client-side passwords.

        call(["make"],
             shell=True,
             cwd="/tmp/federalist/federalist-bookworm-master")

        db.query("USE federalist_bookworm")
        wordCount = db.query(
            "SELECT SUM(nwords) FROM fastcat").fetchall()[0][0]
        # This should be 212,081, but I don't want the tests to start failing when
        # we change the tokenization rules or miscellaneous things about encoding.
        self.assertTrue(wordCount > 100000)
        """
        Then we test whether the API can make queries on that bookworm.
        """

        from bookwormDB.general_API import SQLAPIcall as SQLAPIcall
        import json

        query = {
            "database": "federalist_bookworm",
            "search_limits": {},
            "counttype": "TextPercent",
            "groups": ["author"],
            "method": "return_json"
        }

        m = json.loads(SQLAPIcall(query).execute())
        self.assertTrue(len(m) == 5)
        """
        And then we test if we can add metadata to the bookworm.
        """

        from bookwormDB.manager import BookwormManager
        manager = BookwormManager(database="federalist_bookworm")

        # Create a phony derived field to test metadata supplementing
        newMetadata = open("/tmp/test_bookworm_metadata.tsv", "w")
        newMetadata.write("paragraphNumber\toddness\n")

        def even_even(number):
            if number % 2 == 0:
                return "even"
            return "odd"

        for n in range(500):
            newMetadata.write("%d\t%s\n" % (n, even_even(n)))

        class Dummy:
            """
            Just quickly create a namespace to stand in for the command-line args.
            """
            key = "paragraphNumber"
            format = "tsv"
            file = "/tmp/test_bookworm_metadata.tsv"
            field_descriptions = None  # Test the guessing at field_descriptions while we're at it

        import os
        os.chdir("/tmp/federalist/federalist-bookworm-master")
        manager.add_metadata(Dummy)
        """
        And then we test if that can be retrieved
        """

        from bookwormDB.general_API import SQLAPIcall as SQLAPIcall
        import json
        import os

        query = {
            "database": "federalist_bookworm",
            "search_limits": {},
            "counttype": "TextCount",
            "groups": ["oddness"],
            "method": "return_json"
        }
        SQLAPIcall(query)
        m = json.loads(SQLAPIcall(query).execute())
        # Even or odd is one of two things.
        self.assertTrue(len(m) == 2)
        # Since the first paragraph is even,
        # there should be more of those.

        self.assertTrue(m['odd'][0] >= m['even'][0])
Example #21
0
def application(environ, start_response, logfile="bookworm_queries.log"):
    # Starting with code from http://wsgi.tutorial.codepoint.net/parsing-the-request-post
    try:
        request_body_size = int(environ.get('QUERY_STRING', 0))
    except (ValueError):
        request_body_size = 0

    # When the method is POST the variable will be sent
    # in the HTTP request body which is passed by the WSGI server
    # in the file like wsgi.input environment variable.

    q = environ.get('QUERY_STRING')
    try:
        ip = environ.get('HTTP_X_FORWARDED_FOR')
#       logging.debug("Request from {}".format(ip))
    except:
        ip = environ.get('REMOTE_ADDR')
    if ip is None:
        ip = environ.get('REMOTE_ADDR')
    query = unquote(q)

    headers = {
        'Access-Control-Allow-Origin': '*',
        'Access-Control-Allow-Methods': 'GET, POST, PUT, OPTIONS',
        'Access-Control-Allow-Headers':
        'Origin, Accept, Content-Type, X-Requested-With, X-CSRF-Token',
        'charset': 'utf-8'
    }

    logging.debug("Received query {}".format(query))
    start = datetime.now()

    # Backward-compatability: we used to force query to be
    # a named argument.
    query = query.strip("query=")
    query = query.strip("queryTerms=")

    try:
        query = json.loads(query)
        query['ip'] = ip
    except:
        response_body = "Unable to read JSON"
        status = '404'
        start_response(status, list(headers.items()))
        return [
            b'{"status":"error", "message": "You have passed invalid JSON to the Bookworm API"}'
        ]

    process = SQLAPIcall(query)
    response_body = process.execute()

    # It might be binary already.
    headers['Content-type'] = content_type(query)

    if headers['Content-type'] != 'application/octet-stream':
        response_body = bytes(response_body, 'utf-8')

    headers['Content-Length'] = str(len(response_body))
    status = '200 OK'
    start_response(status, list(headers.items()))

    query['time'] = start.timestamp()
    query['duration'] = datetime.now().timestamp() - start.timestamp()
    # This writing isn't thread-safe; but generally we're not getting more than a couple queries a second.
    with open(logfile, 'a') as fout:
        json.dump(query, fout)
        fout.write("\n")
    logging.debug("Writing to log: \n{}\n".format(json.dumps(query)))
    return [response_body]