def test_parse(self): # test in/out equivalence and parsing res = json.loads(JSON) out = json.dumps(res) self.assertEquals(res, json.loads(out)) try: json.dumps(res, allow_nan=False) except ValueError: pass else: self.fail("23456789012E666 should be out of range")
def lines(self): line = self.ins.readline() while line: # log.write(line) # log.flush() yield json.loads(line) line = self.ins.readline()
def get_update_seq(self): url = urlparse.urlparse(self.uri) conn = httplib.HTTPConnection(url.netloc) conn.request("GET", url.path) resp = conn.getresponse() assert resp.status == 200 return json.loads(resp.read())['update_seq']
def add_attachments(self, doc, f, name=None, content_type=None, rev=None): if isinstance(doc, basestring): id_ = doc else: id_ = doc["_id"] if isinstance(f, basestring): assert os.path.isfile(f) body = open(f, 'r').read() if content_type is None: content_type = content_type_table[f.split('.')[-1]] name = os.path.split(f)[-1] else: body = f if content_type is None: raise Exception("Cannot send a string body without a content-type.") if name is None: raise Exception("Cannot send a string body with a name.") if rev: path = id_+'/'+name+'?rev='+rev else: path = id_+'/'+name response = self.http.put(path, body=body, headers={'content-type': content_type}) assert response.status == 201 return json.loads(response.body)
def bulk(self, docs, all_or_nothing=False): body = {'docs': list(docs), 'all_or_nothing': all_or_nothing} response = self.http.post('_bulk_docs', body=json.dumps(body)) if response.status == 201: return json.loads(response.body) else: raise CouchDBException("Bulk update failed "+response.body)
def loads(s, **kwargs): """Load object from JSON str. See %s.loads for details on kwargs. """ % jsonmod if str is unicode and isinstance(s, bytes): s = s.decode('utf8') return jsonmod.loads(s, **kwargs)
def parse_cluster(cluster): try: return json.loads(cluster.data) # TODO: read from real mahout data except Exception, ex: print ex return {}
def term_facet(host='localhost:9200', terms=['bibleverse'], _type='habakkuk', date_filter=[], size=10): ret = [] conn = ES(host) q = MatchAllQuery() if date_filter: start,end = date_filter q = FilteredQuery(q, RangeFilter(qrange=ESRange('created_at_date',start,end,include_upper=False))) q = q.search(size=0) for term in terms: q.facet.add_term_facet(term,order='count',size=size) print json.dumps(json.loads(q.to_search_json()),indent=2) resultset = conn.search(query=q, indices=_type+'-*', doc_types=[_type]) for facet in resultset.facets: print "Total",facet,resultset.facets[facet]['total'] for row in resultset.facets[facet]['terms']: print "\t",row['term'],row['count'] ret.append((facet,row['term'])) return ret
def _getDocIds(self): # Helper function to tersely compute a list of indices that evenly distribute # the items in it def partition(alist, indices): return [alist[i:j] for (i, j) in zip([0] + indices, indices + [None])][:-1] try: conn = httplib.HTTPConnection(self.host, self.port) conn.request('GET', '/%s/_all_docs' % (self.db, )) response = conn.getresponse() if response.status != 200: # OK print 'Unable to get docs: %s %s' % (response.status, response.reason) sys.exit() ids = [i['id'] for i in json.loads(response.read())['rows'] if not i['id'].startswith('_')] ids.sort() finally: conn.close() partition_size = int(ceil(1.0 * len(ids) / self.num_threads)) indices = [] _len = len(ids) idx = 0 while idx < _len: idx += partition_size indices.append(idx) return partition(ids, indices)
def __call__(self, keys=None, **kwargs): # for k, v in kwargs.items(): # if type(v) is bool: # kwargs[k] = str(v).lower() # if k in ['key', 'startkey', 'endkey']: # kwargs[k] = json.dumps(v) qs = {} for k, v in kwargs.iteritems(): if 'docid' not in k and k != 'stale': qs[k] = json.dumps(v) else: qs[k] = v query_string = urllib.urlencode(qs) if len(query_string) is not 0: path = self.path + '?' + query_string else: path = self.path if not keys: response = self.db.http.get(path) else: response = self.db.http.post(path, body=json.dumps({'keys': keys})) result = json.loads(response.body) if response.status == 200: return RowSet(self.db, result['rows'], offset=result.get('offset', None), total_rows=result.get('total_rows')) else: raise ViewException(result)
def _loadjson(self, json_input): # TODO really this should be accomplished by hooking # simplejson to create attrdicts instead of dicts. def struct2attrdict(st): """ copy a json structure, turning all dicts into attrdicts. copying descends instances of dict and list, including subclasses. """ if isinstance(st, dict): return attrdict([(k, struct2attrdict(v)) for k, v in st.items()]) if isinstance(st, list): return [struct2attrdict(li) for li in st] return st if json_input == '': self.log.error('the empty string is not valid json') raise MetawebError('the empty string is not valid json') try: r = json.loads(json_input) except ValueError, e: self.log.error('error parsing json string %r' % json_input) raise MetawebError, 'error parsing JSON string: %s' % e
def temp_view(self, map_, reduce_=None, language='javascript', **kwargs): view = {"map": map_, "language": language} if isinstance(reduce_, basestring): view['reduce'] = reduce_ body = json.dumps(view) if not kwargs: path = self.db.uri+'_temp_view' else: for k, v in kwargs.iteritems(): if type(v) is bool: kwargs[k] = str(v).lower() if k in ['key', 'startkey', 'endkey']: kwargs[k] = json.dumps(v) query_string = urllib.urlencode(kwargs) path = self.path+'_temp_view' + '?' + query_string response = self.db.http.post(path, body=body) if response.status == 200: result = json.loads(response.body) return RowSet(self.db, result['rows'], offset=result['offset'], total_rows=result['total_rows']) else: raise TempViewException('Status: ' + str(response.status) + '\nBody: ' + response.body)
def loads(s,**kwargs): """Load object from JSON str. See %s.loads for details on kwargs. """%jsonmod if str is unicode and isinstance(s, bytes): s = s.decode('utf8') return jsonmod.loads(s,**kwargs)
def temp_view(self, map_, reduce_=None, language='javascript', **kwargs): view = {"map": map_, "language": language} if isinstance(reduce_, basestring): view['reduce'] = reduce_ body = json.dumps(view) if not kwargs: path = self.db.uri + '_temp_view' else: for k, v in kwargs.iteritems(): if type(v) is bool: kwargs[k] = str(v).lower() if k in ['key', 'startkey', 'endkey']: kwargs[k] = json.dumps(v) query_string = urllib.urlencode(kwargs) path = self.path + '_temp_view' + '?' + query_string response = self.db.http.post(path, body=body) if response.status == 200: result = json.loads(response.body) return RowSet(self.db, result['rows'], offset=result['offset'], total_rows=result['total_rows']) else: raise TempViewException('Status: ' + str(response.status) + '\nBody: ' + response.body)
def default(self, *args, **kwargs): """ Nimmt die JSON-RPC-Anfrage entgegen und übergibt sie an die entsprechende JSON-RPC-Methode. """ responses = [] # Response content type -> JSON set_content_type_json() # Get data if cherrypy.request.method == "GET": data = kwargs if "params" in data: if self.debug: cherrypy.log("") cherrypy.log(u"params (raw): " + repr(data["params"])) cherrypy.log("") try: data["params"] = json.loads(data["params"]) except _ParseError, err: traceback_info = "".join(traceback.format_exception(*sys.exc_info())) cherrypy.log(traceback_info) return json.dumps( ParseErrorResponse( data = unicode(err) ).to_dict() ) requests = [data]
def test_topics_api_view_no_data(self): # """ # verify we get a 200 OK even if we don't send POST data # :return: # """ mock_return = { 'count': 5, 'topics' : [ { "es_phrase": "love your enemies, do good to those who hate you", "bibleverse": "luke 6:27", "search_url" : "http://localhost:8000/biblestudy/?search=enemies+good" }, { "es_phrase": "don\u2019t worry about tomorrow", "bibleverse" : "matthew 6:34", "search_url" : "http://localhost:8000/biblestudy/?search=worry+tomorrow" }, { "es_phrase": "some more text", "bibleverse" : "matthew 8:8", "search_url" : "http://localhost:8000/biblestudy/?search=worry+tomorrow" } ] } with patch('web.views.get_topics', return_value=mock_return) as mock_get_topics: client = Client() response = client.post('/api/topics/') try: ret = json.loads(response.content) except: self.fail("Could not parse the response from topics_api \n{}".format(response.content)) self.assertEquals(200, response.status_code)
def createdb(arg): if type(arg) is Database: db = arg else: db = Database(arg) response = db.http.put('') assert response.status == 201 return json.loads(response.body)
def all_ids(self): """List all documents ids in the database.""" response = self.http.get("_all_docs") obj = dict( (str(k),v) for k,v in json.loads(response.body).iteritems() ) ids = [] for row in obj["rows"]: ids.append(str(row["id"])) return tuple(ids)
def deletedb(arg): if type(arg) is Database: db = arg else: db = Database(arg) response = db.http.delete('') assert response.status == 200 return json.loads(response.body)
def createdb(arg): if type(arg) is Database: db = arg else: db = Database(arg) response = db.http.put('') if response.status is not 201: raise CouchDBException(response.body) return json.loads(response.body)
def deletedb(arg): if type(arg) is Database: db = arg else: db = Database(arg) response = db.http.delete('') if response.status != 200: raise CouchDBException(response.body) return json.loads(response.body)
def cmd_restore(fb, newlocation, graphfile): """restore a graph object to the graph %prog restore newlocation graphfile Restore a graph object to the newlocation """ fh = open(graphfile, "r") graph = json.loads(fh.read()) fh.close() return restore(fb.mss, graph, newlocation, ignore_types=None)
def _run(self): line = self.response.fp.readline() while line and not self.force_stop: try: obj = json.loads(line) except: obj = None if obj: self.dispatch(obj) line = self.response.fp.readline()
def get(self, id_, rev=None): """Get a single document by id and (optionally) revision from the database.""" if rev is None: response = self.http.get(id_) else: response = self.http.get(id_+"?rev="+rev) if response.status == 200: obj = dict( (str(k),v) for k,v in json.loads(response.body).iteritems() ) return Document(obj, db=self) else: raise CouchDBDocumentDoesNotExist("No document at id "+id_)
def handle_notifications(doc, req): db = Database(req['db']) doc = json.loads(req['body']) doc['_id'] = str(uuid.uuid1()) doc['type'] = 'notification' jobs = func(doc, req, db) for job in jobs: job['parent-notification'] = doc['_id'] info = db.create(job) job['_id'] = info['id'] doc['jobs'] = jobs return doc, json.dumps(doc)
def fix_results(fn, outputdir="/tmp/", show_misses=True): """ Assumes the caller has generated a new regex and wants to fix the results captured with the old regex. Read a JSON results file containing tweets captured by habakkuk and show any line does not match. """ from find_all_scriptures import find_all_scriptures, filtergroupdict import gzip, copy, traceback fp = None found_match_cnt = 0 miss_match_cnt = 0 if fn.endswith("gz"): fp = gzip.open(fn) found_match_fp = gzip.open(os.path.join(outputdir, os.path.basename(fn)), "w") else: fp = open(fn) found_match_fp = open(os.path.join(outputdir, os.path.basename(fn)), "w") bv_set = set([line.strip() for line in open("./analysis/join_data/bibleverses.txt")]) print "Reading", fn print "Writing fixed file to", found_match_fp.name print "" try: for line in fp: res = json.loads(line) txt = res["text"].lower() matches = [ma for ma in find_all_scriptures(txt)] if len(matches) is 0 or res["bibleverse"] not in bv_set: miss_match_cnt += 1 if show_misses: print "missed", line else: found_match_fp.write(line) found_match_cnt += 1 ret = filtergroupdict(ma) newres = copy.deepcopy(res) newres["matext"] = ma.string[ma.start() : ma.end()].replace("\r\n", " ") # actual matched string newres["book"] = ret["book"] newres["bibleverse"] = " ".join((ret["book"], ret["verse"])) if newres["bibleverse"] != res["bibleverse"]: print "Matched verse changed from %s to %s - text '%s'\n" % ( res["bibleverse"], newres["bibleverse"], unicode(res["text"]).encode("ascii", errors="ignore"), ) except Exception, ex: print "Failure!!!" print "line", line print "regex returned", ret print "traceback", "".join(traceback.format_exception(*sys.exc_info()))
def test_topics_api_view(self): mock_return = { 'count': 5, 'topics' : [ { "bibleverse" : "luke 6:27", "phrases" : [ { "es_phrase": "love your enemies, do good to those who hate you", "bibleverse": "luke 6:27", "search_url" : "http://localhost:8000/biblestudy/?search=enemies+good" } ] }, { "bibleverse": "matthew 6:34", "phrases" : [ { "es_phrase": "don\u2019t worry about tomorrow", "bibleverse" : "matthew 6:34", "search_url" : "http://localhost:8000/biblestudy/?search=worry+tomorrow" } ] }, { "bibleverse" : "matthew 8:8", "phrases" : [{ "es_phrase": "some more text", "bibleverse" : "matthew 8:8", "search_url" : "http://localhost:8000/biblestudy/?search=worry+tomorrow" }], } ] } with patch('web.views.get_topics', return_value=mock_return) as mock_get_topics: client = Client() response = client.post('/api/topics/', content_type="application/json", data=json.dumps({'size':10, 'offset': 99})) try: ret = json.loads(response.content) except: self.fail("Could not parse the response from topics_api \n{}".format(response.content)) self.assertEquals(200, response.status_code) self.assertTrue(ret["topic_results"]) self.assertTrue(ret['topic_results'].get('count')) self.assertTrue(ret['topic_results'].get('topics')) self.assertEquals(99, ret['offset']) self.assertTrue(mock_get_topics.called)
def get_revs(self, id_, fetch=False): """Get all revisions of a single document from the database. Returns a generator for the revision names or the actual documents, depending on whether fetch=True is given (defaults to False). """ response = self.http.get(id_ + "?revs_info=true") for rev in json.loads(response.body)["_revs_info"]: if rev["status"] != "available": continue if fetch: yield self.get(id_, rev=rev["rev"]) else: yield rev["rev"]
def home(request, template='clustering.html'): context = {} ret = _get_newest_or_for_date(None, None) if ret: ret = ret[0] cluster = json.loads(ret.d3_dendogram_json) context['clusters'] = cluster context['facets'] = cluster.get('facets', []) else: context['clusters'] = [] context['facets'] = [] return render(request, template, context)
def wrap(req, *args, **kwargs): try: j = json.loads(req.raw_post_data) except ValueError: #this means that the necessary data is in the request.REQUEST #j = None j = req.REQUEST or None resp = func(req, j, *args, **kwargs) if isinstance(resp, HttpResponse): return resp return HttpResponse(json.dumps(resp, ensure_ascii=False), mimetype="application/json")
def load(self): """ Läd die Daten aus dem Dateisystem """ # Neueste JSON-Datei ermitteln filelist = glob.glob(os.path.join(self.datadir_current_path, "*.json")) if not filelist: return datafile_path = os.path.abspath(sorted(filelist)[-1]) # JSON-Datei laden with io.open(datafile_path, "rb") as datafile: loaded_data = json.loads(datafile.read()) if not loaded_data: return # Sprachunabhängige Daten aus der JSON-Datei zur Klasseninstanz hinzufügen for data_key_item in self.all_data_keys: data_key_name = data_key_item["name"] data_key_type = data_key_item["type"] if data_key_type == TYPE_TIMESTAMP: timestamp_iso = loaded_data.get(data_key_name, None) if timestamp_iso: setattr(self, data_key_name, isodate.parse_datetime(timestamp_iso)) else: setattr(self, data_key_name, getattr(self, data_key_name)) else: setattr(self, data_key_name, loaded_data.get(data_key_name, getattr(self, data_key_name))) # Sprachabhängige Daten aus der JSON-Datei zu den # sprachabhängigen Klasseninstanzen hinzufügen for language_id, language_data in self.items(): assert isinstance(language_data, LangData) for data_key_item in language_data.all_data_keys: data_key_name = data_key_item["name"] data_key_type = data_key_item["type"] if data_key_type == TYPE_TIMESTAMP: timestamp_iso = loaded_data.get(data_key_name, {}).get(language_id, None) if timestamp_iso: setattr(language_data, data_key_name, isodate.parse_datetime(timestamp_iso)) else: setattr(language_data, data_key_name, None) else: setattr( language_data, data_key_name, loaded_data.get(data_key_name, {}).get(language_id, getattr(language_data, data_key_name)), )
def test_rank_results(self): cluster_data = open(os.path.join(settings.PROJECT_ROOT, 'topic_analysis', 'data', 'test_cluster_data.json')).read() cluster_data = json.loads(cluster_data) mock_es_conn = MagicMock() patches = { 'get_es_connection' : MagicMock(return_value=mock_es_conn) } with patch.multiple('topic_analysis.topic_extraction', **patches) as mocks: topic_extraction.rank_phrases_and_store(cluster_data) self.assertTrue(mock_es_conn.delete_by_query.called) self.assertTrue(mock_es_conn.index.called)
def _parse(self, resp, content): """Parses a rabj response to get the envelope information """ if resp['content-type'] == 'application/json': try: envelope = jsonlib2.loads(content) if envelope['status']['code'] == 200: return envelope else: error = envelope['error'] raise RabjError(error['code'], error['class'], error['detail'], envelope) except jsonlib2.ReadError, e: _log.warn("Decode error %s in content %s", e, content) raise RabjError(resp.status, resp.reason, {'msg': e.message}, content)
def setUp(self): """reads the "real" elasticsearch settings from SOURCE/elasticsearch/settings.json and uses it to configure an index for the unittests""" self.es_settings = {'ES_HOSTS':['localhost:9200',], 'INDEX':"unittest-binarypig", 'FACET_SIZE':999999} query.settings.ES_SETTINGS = self.es_settings index_template_fn = os.path.join(settings.SOURCE_ROOT, 'elasticsearch', 'settings.json') self.index_settings = json.loads(file(index_template_fn).read()) conn = ES(self.es_settings['ES_HOSTS']) self.createIndex(conn)
def create(self, doc, all_or_nothing=False): """Create a document. Accepts any object that can be converted in to a dict. If multiple documents are passed they are handed off to the bulk document handler. """ if type(doc) not in (dict, Document, list, tuple, types.GeneratorType, RowSet): doc = dict(doc) # Hand off to bulk handler when passing multiple documents if type(doc) in (list, tuple, types.GeneratorType, RowSet): return self.bulk(doc, all_or_nothing=all_or_nothing) response = self.http.post('', body=json.dumps(doc)) if response.status == 201: return json.loads(response.body) else: raise CouchDBException(response.body)
def all(self, keys=None, include_docs=True, **kwargs): kwargs['include_docs'] = include_docs qs = '&'.join( k+'='+json.dumps(v) for k,v in kwargs.iteritems() ) if keys: response = self.db.http.post('_all_docs?' + qs, body=json.dumps({"keys": keys})) else: response = self.db.http.get('_all_docs?' + qs) if response.status == 200: result = json.loads(response.body) # Normalize alldocs to a standard view result for RowSet for row in result['rows']: if 'doc' in row: row['rev'] = row['value']['rev'] row['value'] = row['doc'] return RowSet(self.db, result['rows'], offset=result.get('offset', None), total_rows=result.get('total_rows', None)) else: raise Exception(response.body)
def delete(self, doc, all_or_nothing=False): """Delete a document. Accepts any object that can be converted in to a dict. Document/s must contain _id and _rev properties. If multiple documents are passed they are removed using the bulk document API. """ if type(doc) not in (dict, Document, list, tuple, types.GeneratorType, RowSet): doc = dict(doc) if type(doc) not in (list, tuple, types.GeneratorType, RowSet): response = self.http.delete(doc['_id']+'?rev='+str(doc['_rev'])) else: for d in doc: d['_deleted'] = True self.bulk([doc], all_or_nothing=all_or_nothing) if response.status == 200: return json.loads(response.body) else: raise CouchDBException("Delete failed "+response.body)
def _getDocIds(self): # Helper function to tersely compute a list of indices that evenly distribute # the items in it def partition(alist, indices): return [ alist[i:j] for (i, j) in zip([0] + indices, indices + [None]) ][:-1] try: conn = httplib.HTTPConnection(self.host, self.port) conn.request('GET', '/%s/_all_docs' % (self.db, )) response = conn.getresponse() if response.status != 200: # OK print 'Unable to get docs: %s %s' % (response.status, response.reason) sys.exit() ids = [ i['id'] for i in json.loads(response.read())['rows'] if not i['id'].startswith('_') ] ids.sort() finally: conn.close() partition_size = int(ceil(1.0 * len(ids) / self.num_threads)) indices = [] _len = len(ids) idx = 0 while idx < _len: idx += partition_size indices.append(idx) return partition(ids, indices)
def _callback(self, request, result): rows = json.loads(result)['rows'] self.results.extend([row['doc'] for row in rows])
# -*- coding: utf-8 -*- import sys import os import couchdb try: import jsonlib2 as json except ImportError: import json JSON_MBOX = sys.argv[1] # i.e. enron.mbox.json server = couchdb.Server('http://localhost:5984') db = server["couch-irc-logs"] docs = json.loads(open(JSON_MBOX).read()) db.update(docs, all_or_nothing=True)