forked from mikejs/reconcile-demo
/
reconcile.py
108 lines (92 loc) · 3.75 KB
/
reconcile.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
"""
An example reconciliation service API for Google Refine 2.0.
See http://code.google.com/p/google-refine/wiki/ReconciliationServiceApi.
"""
import re
from flask import Flask, request, jsonify, json
app = Flask(__name__)
# Basic service metadata. There are a number of other documented options
# but this is all we need for a simple service.
metadata = {
"name": "Presidential Reconciliation Service",
"defaultTypes": [{"id": "/people/presidents", "name": "US President"}],
}
# The data we'll match against.
presidents = [
"George Washington", "John Adams", "Thomas Jefferson", "James Madison",
"James Monroe", "John Quincy Adams", "Andrew Jackson", "Martin Van Buren",
"William Henry Harrison", "John Tyler", "James K. Polk", "Zachary Taylor",
"Millard Fillmore", "Franklin Pierce", "James Buchanan",
"Abraham Lincoln", "Andrew Jackson", "Ulysses S. Grant",
"Rutherford B. Hayes", "James A. Garfield", "Chester A. Arthur",
"Grover Cleveland", "Benjamin Harrison", "William McKinley",
"Theodore Roosevelt", "William Howard Taft", "Woodrow Wilson",
"Warren G. Harding", "Calvin Coolidge", "Herbert Hoover",
"Franklin D. Roosevelt", "Harry S. Truman", "Dwight D. Eisenhower",
"John F. Kennedy", "Lyndon B. Johnson", "Richard Nixon", "Gerald Ford",
"Jimmy Carter", "Ronald Reagan", "George H. W. Bush", "Bill Clinton",
"George W. Bush", "Barack Obama",
]
def search(query):
"""
Do a simple fuzzy match of US presidents, returning results in
Refine reconciliation API format.
"""
pattern = re.compile(query, re.IGNORECASE)
matches = []
for (id, name) in zip(xrange(0, len(presidents)), presidents):
if pattern.search(name):
# If the name matches the query exactly then it's a
# (near-)certain match, otherwise it could be ambiguous.
if name == query:
match = True
else:
match = False
matches.append({
"id": id,
"name": name,
"score": 100,
"match": match,
"type": [
{"id": "/people/presidents",
"name": "US President"}]})
return matches
def jsonpify(obj):
"""
Like jsonify but wraps result in a JSONP callback if a 'callback'
query param is supplied.
"""
try:
callback = request.args['callback']
response = app.make_response("%s(%s)" % (callback, json.dumps(obj)))
response.mimetype = "text/javascript"
return response
except KeyError:
return jsonify(obj)
@app.route("/reconcile", methods=['POST', 'GET'])
def reconcile():
# If a single 'query' is provided do a straightforward search.
query = request.form.get('query')
if query:
# If the 'query' param starts with a "{" then it is a JSON object
# with the search string as the 'query' member. Otherwise,
# the 'query' param is the search string itself.
if query.startswith("{"):
query = json.loads(query)['query']
results = search(query)
return jsonpify({"result": results})
# If a 'queries' parameter is supplied then it is a dictionary
# of (key, query) pairs representing a batch of queries. We
# should return a dictionary of (key, results) pairs.
queries = request.form.get('queries')
if queries:
queries = json.loads(queries)
results = {}
for (key, query) in queries.items():
results[key] = {"result": search(query['query'])}
return jsonpify(results)
# If neither a 'query' nor 'queries' parameter is supplied then
# we should return the service metadata.
return jsonpify(metadata)
if __name__ == '__main__':
app.run(debug=True)