def read(self, request): counts = {} # db.counts contains the output of a m/r run that generates # per-state counts of bills and bill sub-objects for count in db.counts.find(): val = count['value'] state = count['_id'] if state == 'total': val['legislators'] = db.legislators.count() val['documents'] = db.documents.files.count() else: val['legislators'] = db.legislators.find({ 'roles.state': state }).count() val['documents'] = db.documents.files.find({ 'metadata.bill.state': state }).count() counts[state] = val stats = db.command('dbStats') stats['counts'] = counts return stats
def next_big_id(abbr, letter, collection): query = SON([('_id', abbr)]) update = SON([('$inc', SON([('seq', 1)]))]) seq = db.command( SON([('findandmodify', collection), ('query', query), ('update', update), ('new', True), ('upsert', True)]))['value']['seq'] return "%s%s%08d" % (abbr.upper(), letter, seq)
def next_big_id(abbr, letter, collection): query = SON([('_id', abbr)]) update = SON([('$inc', SON([('seq', 1)]))]) seq = db.command(SON([('findandmodify', collection), ('query', query), ('update', update), ('new', True), ('upsert', True)]))['value']['seq'] return "%s%s%08d" % (abbr.upper(), letter, seq)
def get_next_id(self): # Generate a new sequential ID for the vote query = SON([('_id', self.state)]) update = SON([('$inc', SON([('seq', 1)]))]) seq = db.command( SON([('findandmodify', 'vote_ids'), ('query', query), ('update', update), ('new', True), ('upsert', True)]))['value']['seq'] return "%sV%08d" % (self.state.upper(), seq)
def get_next_id(self): # Generate a new sequential ID for the vote query = SON([('_id', self.state)]) update = SON([('$inc', SON([('seq', 1)]))]) seq = db.command(SON([('findandmodify', 'vote_ids'), ('query', query), ('update', update), ('new', True), ('upsert', True)]))['value']['seq'] return "%sV%08d" % (self.state.upper(), seq)
def put_document(doc, content_type, metadata): # Generate a new sequential ID for the document query = SON([('_id', metadata['bill']['state'])]) update = SON([('$inc', SON([('seq', 1)]))]) seq = db.command( SON([('findandmodify', 'doc_ids'), ('query', query), ('update', update), ('new', True), ('upsert', True)]))['value']['seq'] id = "%sD%08d" % (metadata['bill']['state'].upper(), seq) logging.info("Saving as %s" % id) fs.put(doc, _id=id, content_type=content_type, metadata=metadata) return id
def _insert_with_id(event): query = SON([('_id', event['state'])]) update = SON([('$inc', SON([('seq', 1)]))]) seq = db.command( SON([('findandmodify', 'event_ids'), ('query', query), ('update', update), ('new', True), ('upsert', True)]))['value']['seq'] id = "%sE%08d" % (event['state'].upper(), seq) logging.info("Saving as %s" % id) event['_id'] = id db.events.save(event, safe=True) return id
def _insert_with_id(event): query = SON([('_id', event['state'])]) update = SON([('$inc', SON([('seq', 1)]))]) seq = db.command(SON([('findandmodify', 'event_ids'), ('query', query), ('update', update), ('new', True), ('upsert', True)]))['value']['seq'] id = "%sE%08d" % (event['state'].upper(), seq) logging.info("Saving as %s" % id) event['_id'] = id db.events.save(event, safe=True) return id
def put_document(doc, content_type, metadata): # Generate a new sequential ID for the document query = SON([('_id', metadata['bill']['state'])]) update = SON([('$inc', SON([('seq', 1)]))]) seq = db.command(SON([('findandmodify', 'doc_ids'), ('query', query), ('update', update), ('new', True), ('upsert', True)]))['value']['seq'] id = "%sD%08d" % (metadata['bill']['state'].upper(), seq) logging.info("Saving as %s" % id) fs.put(doc, _id=id, content_type=content_type, metadata=metadata) return id
def import_docs(user='', pw='', host='localhost', db_name='capublic'): if user and pw: conn_str = 'mysql://%s:%s@' % (user, pw) else: conn_str = 'mysql://' conn_str = '%s%s/%s?charset=utf8&unix_socket=/tmp/mysql.sock' % ( conn_str, host, db_name) engine = create_engine(conn_str) Session = sessionmaker(bind=engine) session = Session() fs = gridfs.GridFS(db, collection="documents") for version in session.query(CABillVersion).filter( CABillVersion.bill_xml != None): if fs.exists({"metadata": {"ca_version_id": version.bill_version_id}}): continue query = SON([('_id', 'ca')]) update = SON([('$inc', SON([('seq', 1)]))]) seq = db.command( SON([('findandmodify', 'doc_ids'), ('query', query), ('update', update), ('new', True), ('upsert', True)]))['value']['seq'] doc_id = "CAD%08d" % seq print "Saving: %s" % doc_id fs.put(version.bill_xml, _id=doc_id, content_type='text/xml', metadata={"ca_version_id": version.bill_version_id}) bill = db.bills.find_one({'versions.name': version.bill_version_id}) if not bill: print "Couldn't find bill for %s" % version.bill_version_id continue for v in bill['versions']: if v['name'] == version.bill_version_id: v['url'] = ("http://openstates.sunlightlabs.com/api/" "documents/%s/" % doc_id) break db.bills.save(bill, safe=True)
def import_docs(user='', pw='', host='localhost', db_name='capublic'): if user and pw: conn_str = 'mysql://%s:%s@' % (user, pw) else: conn_str = 'mysql://' conn_str = '%s%s/%s?charset=utf8&unix_socket=/tmp/mysql.sock' % ( conn_str, host, db_name) engine = create_engine(conn_str) Session = sessionmaker(bind=engine) session = Session() fs = gridfs.GridFS(db, collection="documents") for version in session.query(CABillVersion).filter( CABillVersion.bill_xml != None): if fs.exists({"metadata": {"ca_version_id": version.bill_version_id}}): continue query = SON([('_id', 'ca')]) update = SON([('$inc', SON([('seq', 1)]))]) seq = db.command(SON([('findandmodify', 'doc_ids'), ('query', query), ('update', update), ('new', True), ('upsert', True)]))['value']['seq'] doc_id = "CAD%08d" % seq print "Saving: %s" % doc_id fs.put(version.bill_xml, _id=doc_id, content_type='text/xml', metadata={"ca_version_id": version.bill_version_id}) bill = db.bills.find_one({'versions.name': version.bill_version_id}) if not bill: print "Couldn't find bill for %s" % version.bill_version_id continue for v in bill['versions']: if v['name'] == version.bill_version_id: v['url'] = ("http://openstates.sunlightlabs.com/api/" "documents/%s/" % doc_id) break db.bills.save(bill, safe=True)
def read(self, request): counts = {} # db.counts contains the output of a m/r run that generates counts of # bills and bill sub-objects for count in db.counts.find(): val = count["value"] abbr = count["_id"] if abbr == "total": val["legislators"] = db.legislators.count() else: level = metadata(abbr)["level"] val["legislators"] = db.legislators.find({"level": level, level: abbr}).count() counts[abbr] = val stats = db.command("dbStats") stats["counts"] = counts return stats
def read(self, request): counts = {} # db.bill_stats contains the output of a m/r run that generates counts # of bills and bill sub-objects for count in db.bill_stats.find(): val = count['value'] abbr = count['_id'] if abbr == 'total': val['legislators'] = db.legislators.count() else: level = metadata(abbr)['level'] val['legislators'] = db.legislators.find({'level': level, level: abbr}).count() counts[abbr] = val stats = db.command('dbStats') stats['counts'] = counts return stats
def read(self, request): counts = {} # db.counts contains the output of a m/r run that generates # per-state counts of bills and bill sub-objects for count in db.counts.find(): val = count['value'] state = count['_id'] if state == 'total': val['legislators'] = db.legislators.count() val['documents'] = db.documents.files.count() else: val['legislators'] = db.legislators.find( {'roles.state': state}).count() val['documents'] = db.documents.files.find( {'metadata.bill.state': state}).count() counts[state] = val stats = db.command('dbStats') stats['counts'] = counts return stats
def read(self, request): counts = {} # db.counts contains the output of a m/r run that generates counts of # bills and bill sub-objects for count in db.counts.find(): val = count['value'] abbr = count['_id'] if abbr == 'total': val['legislators'] = db.legislators.count() else: level = metadata(abbr)['level'] val['legislators'] = db.legislators.find({ 'level': level, level: abbr }).count() counts[abbr] = val stats = db.command('dbStats') stats['counts'] = counts return stats