Esempio n. 1
0
def run(*dataspec, **query):
    f = urllib.urlopen("http://api.census.gov/data/" + "/".join(dataspec) + "?" + urllib.urlencode(query))

    response = tangelo.empty_response()
    response['result'] = f.read()

    return response
Esempio n. 2
0
def run(database, table, start_time, end_time, center, degree, host="mongo", port="21000", fields="true"):
        response = tangelo.empty_response()

        try:
          degree = int(degree)
        except ValueError:
          response["error"] = "argument 'degree' must be an integer"
          return response
  
        client = impala.ImpalaBeeswaxClient(host + ':' + port)
        client.connect()

        talkers = set([center])

        distance = {center: 0}

        current_talkers = list(talkers)
        all_results = []

        for i in range(degree):
          query = build_query(database, table, start_time, end_time, current_talkers)
          qResults = client.execute(query)
          
          results = convert_results(qResults, "true")
          
          current_talkers = list(itertools.chain(*map(lambda x: [x["emailto"], x["emailfrom"]], results)))
          current_talkers = list(set(current_talkers))

          talkers = talkers.union(current_talkers)

          for t in current_talkers:
            if t not in distance:
              distance[t] = i+1

          all_results.append(results)

        talkers = list(talkers)
        talker_index = {name: index for (index, name) in enumerate(talkers)}

        all_results = itertools.chain(*all_results)
        
        edges = []
        ident = 0
        for result in all_results:
            source = result["emailfrom"]
            target = result["emailto"]
            ident += 1
            
            rec = { "source": talker_index[source],
                    "target": talker_index[target],
                    "id": str(ident) }
            
            edges.append(rec)
            
        talkers = [{"email": n, "distance": distance[n]} for n in talkers]

        response["result"] = { "nodes": talkers,
                               "edges": edges }
        
        return response
Esempio n. 3
0
def run(*dataspec, **query):
    f = urllib.urlopen("http://api.census.gov/data/" + "/".join(dataspec) +
                       "?" + urllib.urlencode(query))

    response = tangelo.empty_response()
    response['result'] = f.read()

    return response
Esempio n. 4
0
def run(servername, dbname, datatype, by=None, datemin=None, datemax=None, charity=None):
    # Construct an empty response object.
    response = tangelo.empty_response()

    # Establish a connection to the MongoDB server.
    try:
        conn = pymongo.Connection(servername)
    except pymongo.errors.AutoReconnect as e:
        response["error"] = "error: %s" % (e.message)
        return bson.json_util.dumps(response)

    # Extract the requested database and collection.
    db = conn[dbname]

    if datatype == "transactions":
        coll = db["charitynet.normalized.transactions"]
        conditions = [{"date": {"$ne": None}}]
        if datemin != None and datemax != None:
            date_min = datetime.datetime.strptime(datemin, "%Y-%m-%d")
            date_max = datetime.datetime.strptime(datemax, "%Y-%m-%d")
            conditions.append({"date": {"$gte": date_min}})
            conditions.append({"date": {"$lt": date_max}})
        if charity != None:
            conditions.append({"charity_id": int(charity)})
        pipeline = []
        if len(conditions) > 0:
            pipeline.append({"$match": {"$and": conditions}})
        if by == "month":
            group = {"year": {"$year": "$date"}, "month": {"$month": "$date"}}
        else:
            group = "$county"
        pipeline.append({"$group": {"_id": group, "amount": {"$sum": "$amount"}}})
        result = coll.aggregate(pipeline)
        if by == "month":
            response = [[d["_id"], float(d["amount"])] for d in result["result"] if d["_id"] != None]
        else:
            response = [["%05d" % d["_id"], float(d["amount"])] for d in result["result"] if d["_id"] != None]
    elif datatype == "population":
        coll = db["census"]
        response = [[d["_id"], int(d["pop2010"])] for d in coll.find()]
    elif datatype == "charities":
        coll = db["charitynet.normalized.transactions"]
        result = coll.aggregate([{"$group": {"_id": "$charity_id", "count": {"$sum": 1}}}, {"$sort": {"count": -1}}])
        response = [[d["_id"], d["_id"], d["count"]] for d in result["result"]]
    else:
        response["error"] = "error: unknown datatype requested"

    # Convert to JSON and return the result.
    return bson.json_util.dumps(response)
Esempio n. 5
0
    def stream(self, key=None, action="next"):
        if action != "show":
            # Check for key parameter.
            if key is None:
                raise cherrypy.HTTPError("400 Required Query Parameter Missing", "The streaming API requires a 'key' query parameter")

            # Check that the key actually exists.
            if key not in self.streams:
                raise cherrypy.HTTPError("404 Key Not Found", "The key '%s' does not reference any existing stream" % (key))

        # Construct a container object.
        result = tangelo.empty_response()

        # Perform the requested action.
        actions = ["next", "delete", "show"]
        if action == "next":
            # Grab the stream in preparation for running it.
            stream = self.streams[key]

            # Attempt to run the stream via its next() method - if this yields a
            # result, then continue; if the next() method raises StopIteration,
            # then there are no more results to retrieve; if any other exception
            # is raised, this is treated as an error.
            try:
                result["stream_finished"] = False
                result["result"] = stream.next()
            except StopIteration:
                result["stream_finished"] = True
                del self.streams[key]
            except:
                del self.streams[key]
                raise cherrypy.HTTPError("501 Error in Python Service", "Caught exception while executing stream service keyed by %s:<br><pre>%s</pre>" % (key, traceback.format_exc()))

        elif action == "delete":
            del self.streams[key]
            result["result"] = "OK"
        elif action == "show":
            raise cherrypy.HTTPError("501 Unimplemented", "The 'show' action in the Tangelo streaming API has not yet been implemented")
        else:
            raise cherrypy.HTTPError("400 Bad Query Parameter", "The 'action' parameter must be one of: %s" % (", ".join(actions)))

        try:
            result = json.dumps(result)
        except TypeError:
            raise cherrypy.HTTPError("501 Bad Response from Python Service", "The stream keyed by %s returned a non JSON-seriazable result: %s" % (key, result["result"]))

        return result
Esempio n. 6
0
def run(servername, dbname, collname, file_hash=None, data=None):
    # Construct an empty response object.
    response = tangelo.empty_response();

    # If no schema was passed in, give an error.
    #
    # TODO(choudhury): see comment below about error codes, etc.
    if file_hash == None:
        response['error'] = "no file hash"
        return bson.json_util.dumps(response)

    # Establish a connection to the MongoDB server.
    try:
        conn = pymongo.Connection(servername)
    except pymongo.errors.AutoReconnect as e:
        response['error'] = "error: %s" % (e.message)
        return bson.json_util.dumps(response)

    # Extract the requested database and collection.
    db = conn[dbname]
    coll = db[collname]

    # If no data field was specified, treat this as a read request;
    # otherwise, write the data to the database.
    if data == None:
        # Create a search schema for finding the record with the appropriate
        # hash.
        schema = {'file_hash' : file_hash}

        # Apply the schema to retrieve documents.
        response['result'] = [d for d in coll.find(schema)]
    else:
        # Convert the JSON object "data" to a Python object.
        try:
            pydata = bson.json_util.loads(data)
        except ValueError as e:
            response['error'] = e.message
            return bson.json_util.dumps(response)

        # Apply the schema to an insert request.
        coll.insert({'file_hash': file_hash, 'data': data})

        # Return a success code.
        response['result'] = "ok"

    # Convert to JSON and return the result.
    return bson.json_util.dumps(response)
Esempio n. 7
0
def run(servername, dbname, datatype, datemin = "2012-01-01", datemax = "2012-02-01"):
    # Construct an empty response object.
    response = tangelo.empty_response();

    # Establish a connection to the MongoDB server.
    try:
        conn = pymongo.Connection(servername)
    except pymongo.errors.AutoReconnect as e:
        response['error'] = "error: %s" % (e.message)
        return bson.json_util.dumps(response)

    # Extract the requested database and collection.
    db = conn[dbname]

    if datatype == "full":
        # Output number of donors per county. Use a string prefixed by "0" if needed
        # for the county code to output state codes less than 10 correctly.
        coll = db["charitynet.normalized.donors.counties"]
        result = coll.find()

        # Do not use normal 'result' field so the URL be used directly in a vega
        # specification.
        response = [["%05d" % d['_id'], int(d['value'])] for d in result if d['_id'] != None]
    elif datatype == "bycounty":
        coll = db["charitynet.normalized.transactions"]
        result = coll.aggregate([{"$group": {"_id": "$county", "amount": {"$sum": "$amount"}}}])
        response = [["%05d" % d['_id'], float(d['amount'])] for d in result["result"] if d["_id"] != None]
    elif datatype == "bydate":
        date_min = datetime.datetime.strptime(datemin, "%Y-%m-%d")
        date_max = datetime.datetime.strptime(datemax, "%Y-%m-%d")
        coll = db["charitynet.normalized.transactions"]
        query = {"$and": [{"date": {"$gte": date_min}}, {"date": {"$lt": date_max}}]}
        group = {"_id": "$county", "amount": {"$sum": "$amount"}}

        result = coll.aggregate([{"$match": query}, {"$group": group}])
        response = [["%05d" % d['_id'], float(d['amount'])] for d in result["result"] if d["_id"] != None]
    elif datatype == "population":
        coll = db["census"]
        response = [[d["_id"], int(d["pop2010"])] for d in coll.find()]
    else:
        response['error'] = "error: unknown datatype requested"

    # Convert to JSON and return the result.
    return bson.json_util.dumps(response)
Esempio n. 8
0
def run(text=""):
    # Create an empty result container.
    response = empty_response();
    response['result'] = [];

    # If nothing passed in, return an empty result.
    if text == "":
        return response

    # Otherwise, perform named entity recognition.
    sentences = nltk.sent_tokenize(text)
    chunks = [nltk.ne_chunk(nltk.pos_tag(nltk.word_tokenize(s))) for s in sentences]

    # Now find all tagged chunks that are not whole sentences - gather the leaves of such
    # chunks into strings, and place them in the list of named entities.
    for c in chunks:
        for subtree in filter(lambda x: x.node != 'S', c.subtrees()):
            response['result'].append( (subtree.node, ' '.join(map(lambda x: x[0], subtree.leaves())) ) )

    return response
Esempio n. 9
0
def run(servername, dbname, collname, name=None, data=None, code=None):
    # Construct an empty response object.
    response = tangelo.empty_response()

    # If no schema was passed in, give an error.
    #
    # TODO(choudhury): see comment below about error codes, etc.
    if name == None:
        response['error'] = "no name"
        return bson.json_util.dumps(response)

    # Establish a connection to the MongoDB server.
    try:
        conn = pymongo.Connection(servername)
    except pymongo.errors.AutoReconnect as e:
        response['error'] = "error: %s" % (e.message)
        return bson.json_util.dumps(response)

    # Extract the requested database and collection.
    db = conn[dbname]
    coll = db[collname]

    # If no data field was specified, treat this as a read request;
    # otherwise, write the data to the database.
    if data == None and code == None:
        # Create a search schema for finding the record with the appropriate
        # hash.
        schema = {'_id': name}

        # Apply the schema to retrieve documents.
        response['result'] = [d for d in coll.find(schema)]
    else:
        # Apply the schema to an insert request.
        coll.save({'_id': name, 'data': data, 'code': code})

        # Return a success code.
        response['result'] = "ok"

    # Convert to JSON and return the result.
    return bson.json_util.dumps(response)
Esempio n. 10
0
def run(server, db, coll, method="find", query=None, limit=1000, fields=None, sort=None, fill=None):
    # Create an empty response object.
    response = tangelo.empty_response()

    # Check the requested method.
    if method not in ["find", "insert"]:
        response["error"] = "Unsupported MongoDB operation '%s'" % (method)
        return bson.json_util.dumps(response)

    # Decode the query strings into Python objects.
    try:
        if query is not None:
            query = decode(query, "query", response)
        if fields is not None:
            fields = decode(fields, "fields", response)
        if sort is not None:
            sort = decode(sort, "sort", response)
        if fill is not None:
            fill = decode(fill, "fill", response)
        else:
            fill = True
    except ValueError:
        return bson.json_util.dumps(response)

    # Cast the limit value to an int.
    try:
        limit = int(limit)
    except ValueError:
        response["error"] = "Argument 'limit' ('%s') could not be converted to int." % (limit)
        return bson.json_util.dumps(response)

    # Create database connection.
    try:
        c = pymongo.Connection(server)[db][coll]
    except pymongo.errors.AutoReconnect:
        response["error"] = "Could not connect to MongoDB server '%s'" % (server)
        return bson.json_util.dumps(response)

    # Perform the requested action.
    if method == "find":
        # Do a find operation with the passed arguments.
        it = c.find(spec=query, fields=fields, limit=limit, sort=sort)

        # Create a list of the results.
        if fill:
            results = [x for x in it]
        else:
            results = []

        # Create an object to structure the results.
        retobj = {}
        retobj["count"] = it.count()
        retobj["data"] = results

        # Pack the results into the response object, and return it.
        response["result"] = retobj
    else:
        raise RuntimeError("illegal method '%s' in module 'mongo'")

    # Return the response object.
    return bson.json_util.dumps(response)
Esempio n. 11
0
def run(servername,
        dbname,
        data_coll,
        name=None,
        objectid=None,
        _id=None,
        accession=None,
        scientific_name=None,
        noid=False,
        noloc=False,
        maxdepth=100):
    def recursiveHelper(child, depth=0):
        it = c.find({'_id': child})
        phylo = it[0]
        if 'clades' in phylo:
            counter = 0
            for child in phylo['clades']:
                if depth >= maxdepth:
                    phylo['clades'][counter] = str(child)
                else:
                    phylo['clades'][counter] = recursiveHelper(
                        child, depth + 1)
                counter += 1
        if noid:
            del phylo['_id']
        else:
            phylo['_id'] = str(phylo['_id'])
        if 'loc' in phylo:
            if noloc:
                del phylo['loc']
        return phylo

    # Construct an empty response object.
    response = tangelo.empty_response()

    query = dict()
    # Decode the query strings into Python objects.
    try:
        if name is not None:
            decodeAndAdd(name, query, 'sequences.name', response)
        if objectid is not None:
            decodeAndAdd(ObjectId(objectid), query, 'objectid', response)
        if _id is not None: decodeAndAdd(_id, query, '_id', response)
        if accession is not None:
            decodeAndAdd(accession, query, 'sequences.accession.source',
                         response)
        if scientific_name is not None:
            decodeAndAdd(scientific_name, query, 'taxonomies.scientific_name',
                         response)
    except ValueError:
        return bson.json_util.dumps(response)

    # Cast the maxdepth value to an int.
    try:
        maxdepth = int(maxdepth)
    except ValueError:
        response[
            'error'] = "Argument 'limit' ('%s') could not be converted to int." % (
                maxdepth)
        return bson.json_util.dumps(response)

    # Create database connection.
    try:
        c = pymongo.Connection(servername)[dbname][data_coll]
    except pymongo.errors.AutoReconnect:
        response['error'] = "Could not connect to MongoDB server '%s'" % (
            servername)
        return bson.json_util.dumps(response)

    # if no arguments given just search from root
    if not query:
        query['rooted'] = True

    it = c.find(query)

    # create a new tree for results
    if it.count() == 1:
        phylo = it[0]
        phylotree = recursiveHelper(phylo['_id'])
        # Convert to JSON and return the result.
        return bson.json_util.dumps(phylotree, sort_keys=True)
    else:
        response['error'] = "Search returned %s object(s) to root the tree" % (
            it.count())
        response['error'] += "| %s" % (str(query))
        return bson.json_util.dumps(response)
def run(start_date=None, end_date=None, omit_countries=None, omit_diseases=None):
    # Check for required arguments.
    if start_date is None:
        return tangelo.HTTPStatusCode("422 Missing Parameter", "Required parameter <i>start_date</i> missing.");
    elif end_date is None:
        return tangelo.HTTPStatusCode("422 Missing Parameter", "Required parameter <i>end_date</i> missing.");

    # Convert arguments to date objects.
    try:
        start_date = datetime.datetime.strptime(start_date, "%Y-%m-%d")
    except ValueError:
        return tangelo.HTTPStatusCode("422 Bad Parameter", "Parameter <i>start_date</i> ('%s') was not in YYYY-MM-DD form." % (start_date))

    try:
        end_date = datetime.datetime.strptime(end_date, "%Y-%m-%d")
    except ValueError:
        return tangelo.HTTPStatusCode("422 Bad Parameter", "Parameter <i>end_date</i> ('%s') was not in YYYY-MM-DD form." % (end_date))

    # See if there are any countries or diseases to omit.
    if omit_countries is None:
        omit_countries = []
    else:
        try:
            omit_countries = bson.json_util.loads(omit_countries)
        except ValueError:
            return tangelo.HTTPStatusCode("422 Bad Parameter", "Parameter <i>omit_countries</i> ('%s') was not JSON-deserializable." % (omit_countries))

    if omit_diseases is None:
        omit_diseases = []
    else:
        try:
            omit_diseases = bson.json_util.loads(omit_diseases)
        except ValueError:
            return tangelo.HTTPStatusCode("422 Bad Parameter", "Parameter <i>omit_diseases</i> ('%s') was not JSON-deserializable." % (omit_diseases))

    # Perform the lookup.
    coll = pymongo.Connection("mongo").canepi.alerts
    query = coll.find({"$and": [{"date": {"$gte": start_date} },
                                {"date": {"$lt": end_date} },
                                {"disease": {"$not": {"$in": omit_diseases}}},
                                {"country": {"$not": {"$in": omit_countries}}}]
                      },
                      fields = ["_id", "date", "rating.rating", "disease", "country"])

    # Compute the graph structure.
    nodes = []
    links = []

    diseases = {}
    countries = {}

    for q in query:
        # For each result record, construct an "alert type" node and store it in
        # the node list.
        alert = {"id": str(q["_id"]), "date": q["date"], "rating": q["rating"]["rating"], "type": "alert"}
        nodes.append(alert)

        # Extract the disease and country, and create nodes for them if they
        # don't already exist.
        if q["country"] not in countries:
            countries[q["country"]] = {"id": q["country"], "type": "country"}
        country = countries[q["country"]]

        if q["disease"] not in diseases:
            diseases[q["disease"]] = {"id": q["disease"], "type": "disease"}
        disease = diseases[q["disease"]]

        # Create links between the alert and its country and its disease.
        links += [{"source": alert, "target": country}, {"source": alert, "target": disease}]

    # Add the disease and country nodes to the node list.
    nodes += countries.values() + diseases.values()

    # Create an index map of the nodes.
    nodemap = {value["id"]: index for (index, value) in enumerate(nodes)}

    # Replace the raw entries in the links list with indices into the node
    # array.
    for i, v in enumerate(links):
        links[i]["source"] = nodemap[v["source"]["id"]]
        links[i]["target"] = nodemap[v["target"]["id"]]

    # Create a response object and pack the graph structure into it.
    r = tangelo.empty_response()
    r["result"] = {"nodes": nodes, "links": links}
    
    # Use the special bson encoder and return the result.
    return bson.json_util.dumps(r)
Esempio n. 13
0
def run(host, database, collection, start_time=None, end_time=None, center=None, degree=None):
    response = tangelo.empty_response()

    # Bail with error if any of the required arguments is missing.
    missing = map(lambda x: x[0], filter(lambda x: x[1] is None, zip(["start_time", "end_time", "center", "degree"], [start_time, end_time, center, degree])))
    if len(missing) > 0:
        response["error"] = "missing required arguments: %s" % (", ".join(missing))
        return response

    # Cast the arguments to the right types.
    #
    # The degree is the degree of separation between the center element and the
    # retrieved nodes - an integer.
    try:
        degree = int(degree)
    except ValueError:
        response["error"] = "argument 'degree' must be an integer"
        return response

    # The start time is the number of milliseconds since the epoch (which is how
    # JavaScript dates are constructed, and therefore how dates are stored in
    # MongoDB) - an integer.
    try:
        start_time = datetime.datetime.strptime(start_time, "%Y-%m-%d")
    except ValueError:
        response["error"] = "argument 'start_time' must be in YYYY-MM-DD format"
        return response

    # The end time is another date - an integer.
    try:
        end_time = datetime.datetime.strptime(end_time, "%Y-%m-%d")
    except ValueError:
        response["error"] = "argument 'end_time' must be in YYYY-MM-DD format"
        return response

    # Get a handle to the database collection.
    if SparkContext._active_spark_context == None:
        sc = SparkContext('spark://impaladev.darpa.mil:7077', 'Enron Emailers')
    else:
        sc = SparkContext._active_spark_context

    enronData = sc.textFile('hdfs://localhost:8020/user/bigdata/pgill/enron/email_graph_fixed.txt').map(lambda line: line.split('\t')).cache()
            
    def withinTimespan(record):
        recordDate = datetime.datetime.strptime(record[2], "%Y-%m-%d")
        return recordDate >= start_time and recordDate < end_time
    
    def emptyRecords(record):
        return record[0] != "" and record[1] != ""
        
    def orderRecord(record):
        if record[1] < record[0]:
            record[0], record[1] = record[1], record[0]
        return record

    enronSpan = enronData.filter(withinTimespan).filter(emptyRecords).map(orderRecord).map(lambda rec: (rec[0], rec[1])).distinct().cache()
    
    # Start a set of all interlocutors we're interested in - that includes the
    # center emailer.
    talkers = set([center])

    # Also start a table of distances from the center.
    distance = {center: 0}

    current_talkers = list(talkers)
    all_results = []
    for i in range(degree):
        
        def emailsInvolved(record):
            return any(keyword in record for keyword in current_talkers)
        
        results = enronSpan.filter(emailsInvolved).collect()

        # Collect the names.
        current_talkers = list(itertools.chain(*map(lambda x: [x[1], x[0]], results)))
        current_talkers = list(set(current_talkers))
        talkers = talkers.union(current_talkers)

        # Compute updates to everyone's distance from center.
        for t in current_talkers:
            if t not in distance:
                distance[t] = i+1

        # save the cursor.
        all_results.append(results)

    # Construct a canonical graph structure from the set of talkers and the list
    # of emails.
    #
    # Start with an index map of the talkers.
    talkers = list(talkers)
    talker_index = {name: index for (index, name) in enumerate(talkers)}

    # Create a chained iterable from all the rewound partial results.
    all_results = itertools.chain(*all_results)

    # Create a list of graph edges suitable for use by D3 - replace each record
    # in the data with one that carries an index into the emailers list.
    edges = []
    ident = 0
    for result in all_results:
        source = result[0]
        target = result[1]
        ident += 1

        rec = { "source": talker_index[source],
                "target": talker_index[target],
                "id": str(ident) }

        edges.append(rec)

    talkers = [{"email": n, "distance": distance[n]} for n in talkers]

    # Stuff the graph data into the response object, and return it.
    response["result"] = { "nodes": talkers,
                           "edges": edges }
    return response
Esempio n. 14
0
def run(host,
        database,
        collection,
        start_time=None,
        end_time=None,
        center=None,
        degree=None):
    response = tangelo.empty_response()

    # Bail with error if any of the required arguments is missing.
    missing = map(
        lambda x: x[0],
        filter(
            lambda x: x[1] is None,
            zip(["start_time", "end_time", "center", "degree"],
                [start_time, end_time, center, degree])))
    if len(missing) > 0:
        response["error"] = "missing required arguments: %s" % (
            ", ".join(missing))
        return response

    # Cast the arguments to the right types.
    #
    # The degree is the degree of separation between the center element and the
    # retrieved nodes - an integer.
    try:
        degree = int(degree)
    except ValueError:
        response["error"] = "argument 'degree' must be an integer"
        return response

    # The start time is the number of milliseconds since the epoch (which is how
    # JavaScript dates are constructed, and therefore how dates are stored in
    # MongoDB) - an integer.
    try:
        start_time = datetime.datetime.strptime(start_time, "%Y-%m-%d")
    except ValueError:
        response[
            "error"] = "argument 'start_time' must be in YYYY-MM-DD format"
        return response

    # The end time is another date - an integer.
    try:
        end_time = datetime.datetime.strptime(end_time, "%Y-%m-%d")
    except ValueError:
        response["error"] = "argument 'end_time' must be in YYYY-MM-DD format"
        return response

    # Get a handle to the database collection.
    try:
        c = pymongo.Connection(host)[database][collection]
    except pymongo.errors.AutoReconnect as e:
        response["error"] = "database error: %s" % (e.message)
        return response

    # Start a set of all interlocutors we're interested in - that includes the
    # center emailer.
    talkers = set([center])

    # Also start a table of distances from the center.
    distance = {center: 0}

    current_talkers = list(talkers)
    all_results = []
    for i in range(degree):
        # Construct and send a query to retrieve all records involving the
        # current talkers, occurring within the time bounds specified, and
        # involving two known addresses.
        query = {
            "$and": [{
                "date": {
                    "$gte": start_time
                }
            }, {
                "date": {
                    "$lt": end_time
                }
            }, {
                "source": {
                    "$ne": ""
                }
            }, {
                "target": {
                    "$ne": ""
                }
            }, {
                "$or": [{
                    "source": {
                        "$in": current_talkers
                    }
                }, {
                    "target": {
                        "$in": current_talkers
                    }
                }]
            }]
        }
        results = c.find(query, fields=["target", "source"])

        # Collect the names.
        #current_talkers = list(set(map(lambda x: x["target"] if x["source"] == center else x["source"], results)))
        current_talkers = list(
            itertools.chain(
                *map(lambda x: [x["target"], x["source"]], results)))
        talkers = talkers.union(current_talkers)

        # Compute updates to everyone's distance from center.
        for t in current_talkers:
            if t not in distance:
                distance[t] = i + 1

        # Rewind and save the cursor.
        results.rewind()
        all_results.append(results)

    # Construct a canonical graph structure from the set of talkers and the list
    # of emails.
    #
    # Start with an index map of the talkers.
    talkers = list(talkers)
    talker_index = {name: index for (index, name) in enumerate(talkers)}

    # Create a chained iterable from all the rewound partial results.
    all_results = itertools.chain(*all_results)

    # Create a list of graph edges suitable for use by D3 - replace each record
    # in the data with one that carries an index into the emailers list.
    edges = []
    for result in all_results:
        source = result["source"]
        target = result["target"]
        ident = str(result["_id"])

        rec = {
            "source": talker_index[source],
            "target": talker_index[target],
            "id": ident
        }

        edges.append(rec)

    talkers = [{"email": n, "distance": distance[n]} for n in talkers]

    # Stuff the graph data into the response object, and return it.
    response["result"] = {"nodes": talkers, "edges": edges}
    return response
Esempio n. 15
0
def run(host, database, collection, start_time=None, end_time=None, center=None, degree=None):
    response = tangelo.empty_response()

    # Bail with error if any of the required arguments is missing.
    missing = map(lambda x: x[0], filter(lambda x: x[1] is None, zip(["start_time", "end_time", "center", "degree"], [start_time, end_time, center, degree])))
    if len(missing) > 0:
        response["error"] = "missing required arguments: %s" % (", ".join(missing))
        return response

    # Cast the arguments to the right types.
    #
    # The degree is the degree of separation between the center element and the
    # retrieved nodes - an integer.
    try:
        degree = int(degree)
    except ValueError:
        response["error"] = "argument 'degree' must be an integer"
        return response

    # The start time is the number of milliseconds since the epoch (which is how
    # JavaScript dates are constructed, and therefore how dates are stored in
    # MongoDB) - an integer.
    try:
        start_time = datetime.datetime.strptime(start_time, "%Y-%m-%d")
    except ValueError:
        response["error"] = "argument 'start_time' must be in YYYY-MM-DD format"
        return response

    # The end time is another date - an integer.
    try:
        end_time = datetime.datetime.strptime(end_time, "%Y-%m-%d")
    except ValueError:
        response["error"] = "argument 'end_time' must be in YYYY-MM-DD format"
        return response

    # Get a handle to the database collection.
    try:
        c = pymongo.Connection(host)[database][collection]
    except pymongo.errors.AutoReconnect as e:
        response["error"] = "database error: %s" % (e.message)
        return response

    # Start a set of all interlocutors we're interested in - that includes the
    # center emailer.
    talkers = set([center])

    # Also start a table of distances from the center.
    distance = {center: 0}

    current_talkers = list(talkers)
    all_results = []
    for i in range(degree):
        # Construct and send a query to retrieve all records involving the
        # current talkers, occurring within the time bounds specified, and
        # involving two known addresses.
        query = {"$and": [ {"date": {"$gte": start_time} }, 
            {"date": {"$lt": end_time} },
            {"source": {"$ne": ""} },
            {"target": {"$ne": ""} },
            {"$or": [
                {"source": {"$in": current_talkers} },
                {"target": {"$in": current_talkers} }
                ]
            }
            ]
        }
        results = c.find(query, fields=["target", "source"])

        # Collect the names.
        #current_talkers = list(set(map(lambda x: x["target"] if x["source"] == center else x["source"], results)))
        current_talkers = list(itertools.chain(*map(lambda x: [x["target"], x["source"]], results)))
        talkers = talkers.union(current_talkers)

        # Compute updates to everyone's distance from center.
        for t in current_talkers:
            if t not in distance:
                distance[t] = i+1

        # Rewind and save the cursor.
        results.rewind()
        all_results.append(results)

    # Construct a canonical graph structure from the set of talkers and the list
    # of emails.
    #
    # Start with an index map of the talkers.
    talkers = list(talkers)
    talker_index = {name: index for (index, name) in enumerate(talkers)}

    # Create a chained iterable from all the rewound partial results.
    all_results = itertools.chain(*all_results)

    # Create a list of graph edges suitable for use by D3 - replace each record
    # in the data with one that carries an index into the emailers list.
    edges = []
    for result in all_results:
        source = result["source"]
        target = result["target"]
        ident = str(result["_id"])

        rec = { "source": talker_index[source],
                "target": talker_index[target],
                "id": ident }

        edges.append(rec)

    talkers = [{"email": n, "distance": distance[n]} for n in talkers]

    # Stuff the graph data into the response object, and return it.
    response["result"] = { "nodes": talkers,
                           "edges": edges }
    return response
Esempio n. 16
0
def run(servername, projectName,datasetName, name=None, objectid=None, _id=None, accession=None, scientific_name=None, noid=False, noloc=False, nobranchlength=False, maxdepth=1000):
	def recursiveHelper(child, depth = 0):
		it = c.find({'_id':child})
		phylo = it[0]
		if 'clades' in phylo:
			counter = 0
			for child in phylo['clades']:
				if depth >= maxdepth:
					phylo['clades'][counter] = str(child)
				else:
					phylo['clades'][counter] = recursiveHelper(child, depth + 1)
				counter += 1
		if noid:
			del phylo['_id']
		else:
			phylo['_id'] = str(phylo['_id'])
		if nobranchlength:
			if 'branch_length' in phylo:
				del phylo['branch_length']
		if 'loc' in phylo:
			if noloc:
				del phylo['loc']
		return phylo

        # lookup the data collection
        data_coll = api.returnCollectionForObjectByName(projectName,'PhyloTree',datasetName)
        dbname = api.getMongoDatabase()
        print "initializing to db: ",dbname, "collection: ",data_coll

	# Construct an empty response object.
	response = tangelo.empty_response();

	query = dict()
	# Decode the query strings into Python objects.
	try:
		if name is not None: decodeAndAdd(name, query, 'sequences.name', response)
		if objectid is not None: decodeAndAdd(ObjectId(objectid), query, 'objectid', response)
		if _id is not None: decodeAndAdd(_id, query, '_id', response)
		if accession is not None: decodeAndAdd(accession, query, 'sequences.accession.source', response)
		if scientific_name is not None: decodeAndAdd(scientific_name, query, 'taxonomies.scientific_name', response)
	except ValueError:
		return bson.json_util.dumps(response)

	# Cast the maxdepth value to an int.
	try:
		maxdepth = int(maxdepth)
	except ValueError:
		response['error'] = "Argument 'limit' ('%s') could not be converted to int." % (maxdepth)
		return bson.json_util.dumps(response)

	# Create database connection.
	try:
		c = pymongo.Connection(servername)[dbname][data_coll]
	except pymongo.errors.AutoReconnect:
		response['error'] = "Could not connect to MongoDB server '%s'" % (servername)
		return bson.json_util.dumps(response)

	# if no arguments given just search from root
	if not query:
		query['rooted'] = True

	it = c.find(query)

	# create a new tree for results
	if it.count() == 1:
		phylo = it[0]
		phylotree = recursiveHelper(phylo['_id'])
		# Convert to JSON and return the result.
		return bson.json_util.dumps(phylotree, sort_keys=True)
	else:
		response['error'] = "Search returned %s object(s) to root the tree" % (it.count())
		response['error'] += "| %s" %(str(query))
		return bson.json_util.dumps(response)
Esempio n. 17
0
def run(servername,
        dbname,
        datatype,
        by=None,
        datemin=None,
        datemax=None,
        charity=None):
    # Construct an empty response object.
    response = tangelo.empty_response()

    # Establish a connection to the MongoDB server.
    try:
        conn = pymongo.Connection(servername)
    except pymongo.errors.AutoReconnect as e:
        response['error'] = "error: %s" % (e.message)
        return bson.json_util.dumps(response)

    # Extract the requested database and collection.
    db = conn[dbname]

    if datatype == "transactions":
        coll = db["charitynet.normalized.transactions"]
        conditions = [{
            "date": {
                "$ne": None
            }
        }]
        if datemin != None and datemax != None:
            date_min = datetime.datetime.strptime(datemin, "%Y-%m-%d")
            date_max = datetime.datetime.strptime(datemax, "%Y-%m-%d")
            conditions.append({"date": {"$gte": date_min}})
            conditions.append({"date": {"$lt": date_max}})
        if charity != None:
            conditions.append({"charity_id": int(charity)})
        pipeline = []
        if len(conditions) > 0:
            pipeline.append({"$match": {"$and": conditions}})
        if by == "month":
            group = {"year": {"$year": "$date"}, "month": {"$month": "$date"}}
        else:
            group = "$county"
        pipeline.append(
            {"$group": {
                "_id": group,
                "amount": {
                    "$sum": "$amount"
                }
            }})
        result = coll.aggregate(pipeline)
        if by == "month":
            response = [[d["_id"], float(d["amount"])]
                        for d in result["result"] if d["_id"] != None]
        else:
            response = [["%05d" % d["_id"],
                         float(d["amount"])] for d in result["result"]
                        if d["_id"] != None]
    elif datatype == "population":
        coll = db["census"]
        response = [[d["_id"], int(d["pop2010"])] for d in coll.find()]
    elif datatype == "charities":
        coll = db["charitynet.normalized.transactions"]
        result = coll.aggregate([{
            "$group": {
                "_id": "$charity_id",
                "count": {
                    "$sum": 1
                }
            }
        }, {
            "$sort": {
                "count": -1
            }
        }])
        response = [[d["_id"], d["_id"], d["count"]] for d in result["result"]]
    else:
        response['error'] = "error: unknown datatype requested"

    # Convert to JSON and return the result.
    return bson.json_util.dumps(response)
Esempio n. 18
0
def run(server,
        db,
        coll,
        method='find',
        query=None,
        limit=1000,
        fields=None,
        sort=None,
        fill=None):
    # Create an empty response object.
    response = tangelo.empty_response()

    # Check the requested method.
    if method not in ['find', 'insert']:
        response['error'] = "Unsupported MongoDB operation '%s'" % (method)
        return bson.json_util.dumps(response)

    # Decode the query strings into Python objects.
    try:
        if query is not None: query = decode(query, 'query', response)
        if fields is not None: fields = decode(fields, 'fields', response)
        if sort is not None: sort = decode(sort, 'sort', response)
        if fill is not None:
            fill = decode(fill, 'fill', response)
        else:
            fill = True
    except ValueError:
        return bson.json_util.dumps(response)

    # Cast the limit value to an int.
    try:
        limit = int(limit)
    except ValueError:
        response[
            'error'] = "Argument 'limit' ('%s') could not be converted to int." % (
                limit)
        return bson.json_util.dumps(response)

    # Create database connection.
    try:
        c = pymongo.Connection(server)[db][coll]
    except pymongo.errors.AutoReconnect:
        response['error'] = "Could not connect to MongoDB server '%s'" % (
            server)
        return bson.json_util.dumps(response)

    # Perform the requested action.
    if method == 'find':
        # Do a find operation with the passed arguments.
        it = c.find(spec=query, fields=fields, limit=limit, sort=sort)

        # Create a list of the results.
        if fill:
            results = [x for x in it]
        else:
            results = []

        # Create an object to structure the results.
        retobj = {}
        retobj['count'] = it.count()
        retobj['data'] = results

        # Pack the results into the response object, and return it.
        response['result'] = retobj
    else:
        raise RuntimeError("illegal method '%s' in module 'mongo'")

    # Return the response object.
    return bson.json_util.dumps(response)
Esempio n. 19
0
    def invoke_service(self, module, *pargs, **kwargs):
        # TODO(choudhury): This method should attempt to load the named module,
        # then invoke it with the given arguments.  However, if the named module
        # is "config" or something similar, the method should instead launch a
        # special "config" app, which lists the available app modules, along
        # with docstrings or similar.  It should also allow the user to
        # add/delete search paths for other modules.
        tangelo.content_type("text/plain")

        # Save the system path (be sure to *make a copy* using the list()
        # function) - it will be modified before invoking the service, and must
        # be restored afterwards.
        origpath = list(sys.path)

        # By default, the result should be a bare response that we will place an
        # error message in if something goes wrong; if nothing goes wrong this
        # will be replaced with some other object.
        result = tangelo.empty_response()

        # Store the modpath in the thread-local storage (tangelo.paths() makes
        # use of this per-thread data, so this is the way to get the data across
        # the "module boundary" properly).
        modpath = os.path.dirname(module)
        cherrypy.thread_data.modulepath = modpath
        cherrypy.thread_data.modulename = module

        # Extend the system path with the module's home path.
        sys.path.insert(0, modpath)

        # Import the module if not already imported previously (or if the module
        # to import, or its configuration file, has been updated since the last
        # import).
        try:
            stamp = self.modules.get(module)
            mtime = os.path.getmtime(module)

            config_file = module[:-2] + "json"
            config_mtime = None
            if os.path.exists(config_file):
                config_mtime = os.path.getmtime(config_file)

            if stamp is None or mtime > stamp["mtime"] or (config_mtime is not None and config_mtime > stamp["mtime"]):
                if stamp is None:
                    tangelo.log("loading new module: " + module)
                else:
                    tangelo.log("reloading module: " + module)

                # Load any configuration the module might carry with it.
                if config_mtime is not None:
                    try:
                        with open(config_file) as f:
                            config = json.loads(json_minify(f.read()))
                            if type(config) != dict:
                                msg = "Service module configuration file does not contain a key-value store (i.e., a JSON Object)"
                                tangelo.log(msg)
                                raise TypeError(msg)
                    except IOError:
                        tangelo.log("Could not open config file %s" % (config_file))
                        raise
                    except ValueError as e:
                        tangelo.log("Error reading config file %s: %s" % (config_file, e))
                        raise
                else:
                    config = {}

                cherrypy.config["module-config"][module] = config

                # Remove .py to get the module name
                name = module[:-3]

                # Load the module.
                service = imp.load_source(name, module)
                self.modules[module] = { "module": service,
                                         "mtime": max(mtime, config_mtime) }
            else:
                service = stamp["module"]
        except:
            bt = traceback.format_exc()

            tangelo.log("Error importing module %s" % (tangelo.request_path()), "SERVICE")
            tangelo.log(bt, "SERVICE")

            result = tangelo.HTTPStatusCode("501 Error in Python Service", "There was an error while trying to import module %s:<br><pre>%s</pre>" % (tangelo.request_path(), bt))
        else:
            # Try to run the service - either it's in a function called "run()",
            # or else it's in a REST API consisting of at least one of "get()",
            # "put()", "post()", or "delete()".
            #
            # Collect the result in a variable - depending on its type, it will be
            # transformed in some way below (by default, to JSON, but may also raise
            # a cherrypy exception, log itself in a streaming table, etc.).
            #
            try:
                if 'run' in dir(service):
                    # Call the module's run() method, passing it the positional and
                    # keyword args that came into this method.
                    result = service.run(*pargs, **kwargs)
                else:
                    # Reaching here means it's a REST API.  Check for the
                    # requested method, ensure that it was marked as being part
                    # of the API, and call it; or give a 405 error.
                    method = cherrypy.request.method
                    restfunc = service.__dict__[method.lower()]
                    if restfunc is not None and hasattr(restfunc, "restful") and restfunc.restful:
                        result = restfunc(*pargs, **kwargs)
                    else:
                        result = tangelo.HTTPStatusCode(405, "Method not allowed")
            except Exception as e:
                bt = traceback.format_exc()

                tangelo.log("Caught exception while executing service %s" % (tangelo.request_path()), "SERVICE")
                tangelo.log(bt, "SERVICE")

                result = tangelo.HTTPStatusCode("501 Error in Python Service", "There was an error executing service %s:<br><pre>%s</pre>" % (tangelo.request_path(), bt))

        # Restore the path to what it was originally.
        sys.path = origpath

        # Check the type of the result to decide what result to finally return:
        #
        # 1. If it is an HTTPStatusCode object, raise a cherrypy HTTPError
        # exception, which will cause the browser to do the right thing.
        #
        # 2. TODO: If it's a Python generator object, log it with the Tangelo
        # streaming API.
        #
        # 3. If it's a Python dictionary, convert it to JSON.
        #
        # 4. If it's a string, don't do anything to it.
        #
        # This allows the services to return a Python object if they wish, or to
        # perform custom serialization (such as for MongoDB results, etc.).
        if isinstance(result, tangelo.HTTPStatusCode):
            if result.msg:
                raise cherrypy.HTTPError(result.code, result.msg)
            else:
                raise cherrypy.HTTPError(result.code)
        elif "next" in dir(result):
            # Generate a key corresponding to this object, using 100 random
            # bytes from the system - ensure the random key is not already in
            # the table (even though it would be crazy to wind up with a
            # collision).
            #
            # TODO(choudhury): replace this with a call to generate_key().
            # Move the comment above into the generate_key() function.
            key = md5.md5(os.urandom(100)).hexdigest()
            while key in self.streams:
                key = md5.md5(os.urandom(100)).hexdigest()

            # Log the object in the streaming table.
            self.streams[key] = result

            # Create an object describing the logging of the generator object.
            result = tangelo.empty_response()
            result["stream_key"] = key

            # Serialize it to JSON.
            result = json.dumps(result)
        elif not isinstance(result, types.StringTypes):
            try:
                result = json.dumps(result)
            except TypeError as e:
                t = e.message.split("<service.")[1].split()[0]
                msg = "Service %s returned an object of type %s that could not be serialized to JSON" % (tangelo.request_path(), t)

                tangelo.log("Error: %s" % (msg), "SERVICE")

                raise cherrypy.HTTPError("501 Error in Python Service", msg)

        return result
Esempio n. 20
0
def run(database, table, start_time, end_time, center, degree, host="localhost", port=10000, fields="true"):
        response = tangelo.empty_response()

        try:
          degree = int(degree)
        except ValueError:
          response["error"] = "argument 'degree' must be an integer"
          return response
  
        client = init_shark(host, port, database)

        talkers = set([center])

        distance = {center: 0}

        current_talkers = list(talkers)
        all_results = []

        for i in range(degree):
          query = build_query(database, table, start_time, end_time, current_talkers)
          
          client.execute(query)
          results = client.fetchAll()
          
          current_talkers = list(itertools.chain(*map(lambda x: [x.split("\t")[0], x.split("\t")[1]], results)))
          current_talkers = list(set(current_talkers))
            
          talkers = talkers.union(current_talkers)

          for t in current_talkers:
            if t not in distance:
              distance[t] = i+1

          all_results.append(results)

        
        talkers = list(talkers)
        talker_index = {name: index for (index, name) in enumerate(talkers)}

        all_results = itertools.chain(*all_results)
        
        edges = []
        ident = 0;
        for result in all_results:
            resultArray = result.split("\t")
            source = resultArray[1]
            target = resultArray[0]
            ident += 1
            
            rec = { "source": talker_index[source],
                    "target": talker_index[target],
                    "id": str(ident) }
            
            edges.append(rec)
            
        talkers = [{"email": n, "distance": distance[n]} for n in talkers]

        response["result"] = { "nodes": talkers,
                               "edges": edges }
        
        return response
Esempio n. 21
0
def run(host, port, graph, start_time=None, days=1, center=None, degree=None):
    response = tangelo.empty_response()

    # Bail with error if any of the required arguments is missing.
    missing = map(lambda x: x[0], filter(lambda x: x[1] is None, zip(["start_time", "days", "center", "degree"], [start_time, days, center, degree])))
    if len(missing) > 0:
        response["error"] = "missing required arguments: %s" % (", ".join(missing))
        return response

    # Cast the arguments to the right types.
    #
    # The degree is the degree of separation between the center element and the
    # retrieved nodes - an integer.
    try:
        degree = int(degree)
    except ValueError:
        response["error"] = "argument 'degree' must be an integer"
        return response

    # The start time is the number of milliseconds since the epoch (which is how
    # JavaScript dates are constructed, and therefore how dates are stored in
    # MongoDB) - an integer.
    try:
        start_time = datetime.datetime.strptime(start_time, "%Y-%m-%d")
    except ValueError:
        response["error"] = "argument 'start_time' must be in YYYY-MM-DD format"
        return response
    
    try:
        days = int(days)
    except ValueError:
        response["error"] = "argument 'days' must be an integer"
        return response
    
    dateList = [ start_time + datetime.timedelta(days=x) for x in range(0,days) ]

    config = Config("http://"+host+":"+port+"/graphs/"+graph)
    config.set_logger(DEBUG)
    #client = RexsterClient(config)
    g = Graph(config)

    # Start a set of all interlocutors we're interested in - that includes the
    # center emailer.
    talkers = set([center])

    # Also start a table of distances from the center.
    distance = {center: 0}
    current_talkers = list(talkers)
    
    center_vertex = g.vertices.index.lookup(email=center).next()

    edgeId = 0;
    edges = []
    for i in range(degree):
       
        new_talkers = []
          
        for talker_email in current_talkers:
        
            current_vertex = g.vertices.index.lookup(email=talker_email).next()
          
            for day in dateList:
                dayString = day.strftime('%m/%d/%Y')
              
                adjacent = current_vertex.bothV(dayString)
                
                if adjacent != None:
                    adjacent_talkers = list(set(itertools.chain(*map(lambda x: [x.email], adjacent))))
                    
                    if '' in adjacent_talkers:
                        adjacent_talkers.remove('')
                    
                    for this_talker in adjacent_talkers:
                        newEdge = { "source": this_talker,
                        "target": talker_email,
                        "id": edgeId }
                        edges.append(newEdge)
                        edgeId += 1
                    
                    new_talkers.extend(adjacent_talkers)
            
        current_talkers.extend(new_talkers)
        current_talkers = list(set(current_talkers))
        
        talkers = talkers.union(current_talkers)

        # Compute updates to everyone's distance from center.
        for t in current_talkers:
            if t not in distance:
                distance[t] = i+1

    # Construct a canonical graph structure from the set of talkers and the list
    # of emails.
    #
    # Start with an index map of the talkers.
    talkers = list(set(talkers))
    talker_index = {name: index for (index, name) in enumerate(talkers)}

    for edge in edges:
        edge["source"] = talker_index[edge["source"]]
        edge["target"] = talker_index[edge["target"]]
    
    talkers = [{"email": n, "distance": distance[n]} for n in talkers]
    
    # Stuff the graph data into the response object, and return it.
    response["result"] = { "nodes": talkers,
                           "edges": edges }
    return response
Esempio n. 22
0
def run_on_collection(servername, dbname, data_coll, boundary_type, _id=None, lng=-1.0, lat=-1.0, radius=0.0, swlng=-1.0, swlat=-1.0, nelng=-1.0, nelat=-1.0, limit=1000, _filter='true'):
	earthRadius = 6378137 #meters
	# Construct an empty response object.
	response = tangelo.empty_response();
	try:
		limit = int(limit)
	except ValueError:
		response['error'] = "Argument (%s), value (%s) could not be converted to int" % ('limit', limit)
	if limit > 1000 or limit < 1:
		limit = 1000

	if boundary_type == 'circle':
		# convert types to floats
		try:
			lng = float(lng)
			lat = float(lat)
			radius = float(radius)
		except ValueError as e:
			response['error'] = e.message + " Argument could not be converted to float."
			return  bson.json_util.dumps(response)
		#check bounds
		if lng > 180.0 or lng < -180.0:
			response['error'] = "Longitude out of bounds: %s" % (lng)
			return bson.json_util.dumps(response)
		if lat > 90.0 or lat < -90.0:
			response['error'] = "Latitude out of bounds: %s" % (lat)
			return bson.json_util.dumps(response)
		if radius < 0.0:
			response['error'] = "Radius cannot be negative: %s" % (radius)
			return bson.json_util.dumps(response)
		# convert radius from meters to percentage of earth
		radius = radius / earthRadius
		container = [[lng, lat], radius]
		query = {'loc' : {'$within' : { '$centerSphere' : container }}}
	elif boundary_type == 'rect':
		try:
			swlng = float(swlng)
			swlat = float(swlat)
			nelng = float(nelng)
			nelat = float(nelat)
		except ValueError as e:
			response['error'] = e.message + " Argument could not be converted to float."
			return bson.json_util.dumps(response)
		container = [[swlng, swlat], [nelng, nelat]]
		query = {'loc' : {'$within' : { '$box' : container }}}
	elif boundary_type == 'id':
		True
	else:
		response['error'] = "Invalid geometery type: %s" % (boundary_type)
		return bson.json_util.dumps(response)

	# Create database connection.
	try:
		c = pymongo.Connection(servername)[dbname][data_coll]
	except pymongo.errors.AutoReconnect:
		response['error'] = "Could not connect to MongoDB server '%s'" % (servername)
		return bson.json_util.dumps(response)

	# Perform the query
	if boundary_type == 'id':
		mpath = materializedPaths.materializedPaths(servername,dbname,data_coll,data_coll)
		if mpath.checkIfPresent():
			it = mpath.getDescendantsCriteria(_id, "loc")
		else:
			# materialized paths not present, create it!
			mpath.generateFromChildTree()
			it = mpath.getDescendantsCriteria(_id, "loc")
			#response['error'] = "Materialized Paths not present in dataset"
			#return bson.json_util.dumps(response)
	else:
		it = c.find(spec=query, limit=limit)
	#response['error'] = "we made it here" + " " + str(_id) + " " + str(it.count())
	#return bson.json_util.dumps(response)
	# Create a list of the results
	results = list()
	# if we want to filter by only those locations of items in our range
	if _filter == 'true':
		count = 0
		try:
			for item in it:
				# for each location of the item
				for location in item['loc']:
					# if it's in bounds create new object, add to results
					if boundary_type == 'id' or isInBounds(float(location[0]), float(location[1]), container, boundary_type, response):
						marker = dict()
						marker['name'] = item['taxonomies'][0]['scientific_name']
						marker['ID'] = item['_id']
						marker['lng'] = location[0]
						marker['lat'] = location[1]
						results.append(marker)
						count += 1
					# else: don't add point
		except ValueError:
			return bson.json_util.dumps(response)
		except KeyError:
			return bson.json_util.dumps(response)
	# otherwise return document containing all locations of item in range
	else:
		results = [x for x in it]

	# Create an object to structure the results
	retobj = dict()
	retobj['count'] = count if _filter == 'true' else it.count()
	retobj['data'] = results

	# Pack the results in the response object, and return it
	response['result'] = retobj
	return bson.json_util.dumps(response)