def _get_batches_of_transformed_samples(self, index_array): batch_x = np.zeros((len(index_array), ) + self.image_shape, dtype=K.floatx()) if self.with_labels: batch_y = np.zeros((len(batch_x), self.num_class), dtype=K.floatx()) for i, j in enumerate(index_array): # Protect file and dataframe access with a lock. with self.lock: image_row = self.images_df.iloc[j] product_id = image_row["product_id"] offset_row = self.offsets_df.loc[product_id] # print(offset_row) # Read this product's data from the BSON file. self.file.seek(offset_row["offset"]) item_data = self.file.read(offset_row["length"]) # Grab the image from the product. item = bson.BSON(item_data).decode() img_idx = image_row["img_idx"] bson_img = item["imgs"][img_idx]["picture"] # Load the image. img = load_img(io.BytesIO(bson_img), target_size=self.target_size) # Preprocess the image. x = img_to_array(img) x = self.image_data_generator.random_transform(x) x = self.image_data_generator.standardize(x) # Add the image and the label to the batch (one-hot encoded). batch_x[i] = x if self.with_labels: batch_y[i, image_row["category_idx"]] = 1 if self.with_labels: return batch_x, batch_y else: return batch_x
def get_user_data(username): log(0, 'Getting db doc for user {}'.format(username)) req_data = bson.BSON.encode({ "key": key, "database": "feedlark", "collection": "user", "query": { "username": username }, "projection": {} }) get_response = gearman_client.submit_job('db-get', str(req_data)) result = bson.BSON(get_response.result).decode() if result['status'] != 'ok': log(2, "Error getting db entry for user {}".format(username)) log(2, result['description']) return None if "docs" not in result or len(result['docs']) == 0: log(1, "No docs returned for user {}".format(username)) return None return result['docs'][0]
def __getitem__(self, key: str) -> Any: """ Get an item from the dict. Will hit the cache first, then disk. :param key: the key to look up :return: the value """ key = str(key) locked = self.is_locked(key) or self.ignore_locks if not locked: self.lock(key) try: with open(self._dir_bsonname(key), "rb") as f: val = bson.BSON(f.read()).decode()["value"] except (OSError, bson.errors.BSONError) as ex: raise KeyError("Key {} not found - {}".format(key, ex)) finally: if not locked: self.release(key) return val
def get_single_feed_db_data(url, key=None): # format the request to_get_urls_ids = str(bson.BSON.encode({ "key": key, "database": "feedlark", "collection": "feed", "query": {"url": url}, "projection": { "_id": 1, "url": 1, "items": [{ "link": 1, "pub_date": 1, "link": 1, "article_text": 1, }], }, })) url_fields_gotten = gm_client.submit_job("db-get", to_get_urls_ids) bson_object = bson.BSON.decode(bson.BSON(url_fields_gotten.result)) return bson_object["docs"]
def _unpack_response(response, cursor_id=None, codec_options=CodecOptions()): """Unpack a response from the database. Check the response for errors and unpack, returning a dictionary containing the response data. Can raise CursorNotFound, NotMasterError, ExecutionTimeout, or OperationFailure. :Parameters: - `response`: byte string as returned from the database - `cursor_id` (optional): cursor_id we sent to get this response - used for raising an informative exception when we get cursor id not valid at server response - `codec_options` (optional): an instance of :class:`~bson.codec_options.CodecOptions` """ response_flag = struct.unpack("<i", response[:4])[0] if response_flag & 1: # Shouldn't get this response if we aren't doing a getMore assert cursor_id is not None raise CursorNotFound("cursor id '%s' not valid at server" % cursor_id) elif response_flag & 2: error_object = bson.BSON(response[20:]).decode() if error_object["$err"].startswith("not master"): raise NotMasterError(error_object["$err"]) elif error_object.get("code") == 50: raise ExecutionTimeout(error_object.get("$err"), error_object.get("code"), error_object) raise OperationFailure("database error: %s" % error_object.get("$err"), error_object.get("code"), error_object) result = {} result["cursor_id"] = struct.unpack("<q", response[4:12])[0] result["starting_from"] = struct.unpack("<i", response[12:16])[0] result["number_returned"] = struct.unpack("<i", response[16:20])[0] result["data"] = bson.decode_all(response[20:], codec_options) assert len(result["data"]) == result["number_returned"] return result
def test_details_returns_details(self): nodegroup = factory.make_node_group() node = factory.make_node(nodegroup=nodegroup) self.set_lshw_details(node, self.example_lshw_details) self.set_lldp_details(node, self.example_lldp_details) client = make_worker_client(nodegroup) response = client.post( reverse('nodegroup_handler', args=[nodegroup.uuid]), { 'op': 'details', 'system_ids': [node.system_id] }) self.assertEqual(httplib.OK, response.status_code) parsed_result = bson.BSON(response.content).decode() self.assertDictEqual( { node.system_id: { "lshw": self.example_lshw_details_bin, "lldp": self.example_lldp_details_bin, }, }, parsed_result)
def next_sample(self): """Helper function for reading in next sample.""" if self.cur[0] >= len(self.seq): raise StopIteration offset, length, idx = self.seq[self.cur[0]] self.cur[0] += 1 with open(self.path_bson, 'rb') as f: f.seek(offset) #print('seek',offset) content = f.read(length) #item = bson.BSON.decode(content) item = bson.BSON(content).decode() label = item['_id'] #used in test mode #print(label.__class__) if not self.test_mode and 'category_id' in item: label = item['category_id'] label = self.labelmap[label] pic = item['imgs'][idx] img = pic['picture'] #print(img.__class__) return label, img
def _unpack_response(response, cursor_id=None, as_class=dict, tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE): """Unpack a response from the database. Check the response for errors and unpack, returning a dictionary containing the response data. :Parameters: - `response`: byte string as returned from the database - `cursor_id` (optional): cursor_id we sent to get this response - used for raising an informative exception when we get cursor id not valid at server response - `as_class` (optional): class to use for resulting documents """ response_flag = struct.unpack("<i", response[:4])[0] if response_flag & 1: # Shouldn't get this response if we aren't doing a getMore assert cursor_id is not None raise OperationFailure("cursor id '%s' not valid at server" % cursor_id) elif response_flag & 2: error_object = bson.BSON(response[20:]).decode() if error_object["$err"].startswith("not master"): raise AutoReconnect(error_object["$err"]) raise OperationFailure("database error: %s" % error_object["$err"], error_object) result = {} result["cursor_id"] = struct.unpack("<q", response[4:12])[0] result["starting_from"] = struct.unpack("<i", response[12:16])[0] result["number_returned"] = struct.unpack("<i", response[16:20])[0] result["data"] = bson.decode_all(response[20:], as_class, tz_aware, uuid_subtype) assert len(result["data"]) == result["number_returned"] return result
def read_bson(bson_path, num_records, with_categories): rows = {} with open(bson_path, "rb") as f, tqdm(total=num_records) as pbar: offset = 0 while True: item_length_bytes = f.read(4) if len(item_length_bytes) == 0: break length = struct.unpack("<i", item_length_bytes)[0] f.seek(offset) item_data = f.read(length) assert len(item_data) == length item = bson.BSON(item_data).decode() #item = bson.BSON.decode(item_data) product_id = item["_id"] num_imgs = len(item["imgs"]) row = [num_imgs, offset, length] if with_categories: row += [item["category_id"]] rows[product_id] = row offset += length f.seek(offset) pbar.update() columns = ["num_imgs", "offset", "length"] if with_categories: columns += ["category_id"] df = pd.DataFrame.from_dict(rows, orient="index") df.index.name = "product_id" df.columns = columns df.sort_index(inplace=True) return df
def raw_response(self, cursor_id=None): """Check the response header from the database, without decoding BSON. Check the response for errors and unpack. Can raise CursorNotFound, NotMasterError, ExecutionTimeout, or OperationFailure. :Parameters: - `cursor_id` (optional): cursor_id we sent to get this response - used for raising an informative exception when we get cursor id not valid at server response. """ if self.flags & 1: # Shouldn't get this response if we aren't doing a getMore if cursor_id is None: raise ProtocolError("No cursor id for getMore operation") # Fake a getMore command response. OP_GET_MORE provides no # document. msg = "Cursor not found, cursor id: %d" % (cursor_id,) errobj = {"ok": 0, "errmsg": msg, "code": 43} raise CursorNotFound(msg, 43, errobj) elif self.flags & 2: error_object = bson.BSON(self.documents).decode() # Fake the ok field if it doesn't exist. error_object.setdefault("ok", 0) if error_object["$err"].startswith("not master"): raise NotMasterError(error_object["$err"], error_object) elif error_object.get("code") == 50: raise ExecutionTimeout(error_object.get("$err"), error_object.get("code"), error_object) raise OperationFailure("database error: %s" % error_object.get("$err"), error_object.get("code"), error_object) return [self.documents]
def update_user_data(username, data): ''' Update the document for the given user, With the dict of updates provided in `data` ''' log(0, 'Updating db doc for user {}'.format(username)) req_data = bson.BSON.encode({ "key": key, "database": "feedlark", "collection": "user", "data": { "updates": data, "selector": { "username": username } } }) update_response = gearman_client.submit_job('db-update', str(req_data)) result = bson.BSON(update_response.result).decode() if result['status'] != 'ok': log(2, 'Error updating db entry for user {}'.format(username)) log(2, result['description']) return
def test_empty_details(self): # Empty details are passed through. nodegroup = factory.make_node_group() node = factory.make_node(nodegroup=nodegroup) self.set_lshw_details(node, b'') self.set_lldp_details(node, b'') client = make_worker_client(nodegroup) response = client.post( reverse('nodegroup_handler', args=[nodegroup.uuid]), { 'op': 'details', 'system_ids': [node.system_id] }) self.assertEqual(httplib.OK, response.status_code) parsed_result = bson.BSON(response.content).decode() self.assertDictEqual( { node.system_id: { "lshw": bson.binary.Binary(b""), "lldp": bson.binary.Binary(b""), }, }, parsed_result)
def _extract_meta(args, train=True): if train: prefix = 'train' else: prefix = 'test' raw_data_filepath = os.path.join(args.raw_data_dir, '{}.bson'.format(prefix)) meta_data_filepath = os.path.join(args.meta_data_dir, 'meta_{}.csv'.format(prefix)) meta = [] with open(raw_data_filepath, 'rb') as f: offset = 0 while True: print(offset) f.seek(offset) item_length_bytes = f.read(4) if len(item_length_bytes) == 0: break # Decode item length: length = struct.unpack("<i", item_length_bytes)[0] f.seek(offset) item_data = f.read(length) assert len(item_data) == length, "%i vs %i" % (len(item_data), length) # Check if we can decode item = bson.BSON(item_data).decode() if train: row = (item['_id'], item['category_id'], offset, length, len(item['imgs'])) else: row = (item['_id'], offset, length, len(item['imgs'])) meta.append(row) offset += length meta_df = pd.DataFrame(data=meta, columns=['_id', 'category_id', 'offset', 'length', 'num_pictures']) meta_df.to_csv(meta_data_filepath, index=False)
def __open(self): fail = False while True: try: r = requests.post(self.__url + '/open', data=bson.BSON.encode(self.__param), **self.__kwargs) if r.status_code == 400: raise requests.exceptions.RequestException( "bad request: " + r.text.strip()) elif r.status_code == 503: raise requests.exceptions.RequestException( "service unavailable: " + r.text.strip()) r.raise_for_status() ack = bson.BSON(r.content).decode() self.__sid = ack['sid'] self.__oid = '' self.__param['cid'] = ack['cid'] self.__log_fn("session opened, sid=%s, cid=%s" % (ack['sid'], ack['cid'])) return except (requests.exceptions.RequestException, OSError) as e: if not fail: self.__log_fn("error: " + str(e)) self.__log_fn( "connection to message bus failed, retrying in %d seconds" % self.__retry_wait) fail = True time.sleep(self.__retry_wait)
def update_all_feeds(worker, job): log(0, "'update-all-feeds' initiated") if key is not None: log(0, "Checking secret key") request = bson.BSON(job.data).decode() if 'key' not in request or request['key'] != key: log(2, "Secret key mismatch") response = bson.BSON.encode({ 'status': 'error', 'description': 'Secret key mismatch', }) return str(response) log(0, "Retrieving data from feed db") feed_db_data = get_all_feed_docs() try: for doc in feed_db_data: updated_feeds = gather_updates(doc) update_database(doc, updated_feeds) except Exception as e: log(2, "'update-all-feeds' failed") return str( bson.BSON.encode({ "status": "error", "error-description": str(e) })) log(0, "'update-all-feeds' finished") return str( bson.BSON.encode({ "status": "ok", "updated_feeds": [x['_id'] for x in feed_db_data], })) return str(bson.BSON.encode({"status": "ok"}))
def get_votes_for_user(username): ''' Get all the votes that this user has cast on articles ''' log(0, 'Getting votes for user {}'.format(username)) req_data = bson.BSON.encode({ "key": key, "database": "feedlark", "collection": "vote", "query": { "username": username }, "projection": {} }) get_response = gearman_client.submit_job('db-get', str(req_data)) result = bson.BSON(get_response.result).decode() if result['status'] != 'ok': log(2, "Error getting votes for user {}".format(username)) log(2, result['description']) return None if 'docs' not in result or len(result['docs']) == 0: log(1, "No docs returned for user {}".format(username)) return [] return result['docs']
def __next_sample(self): """Helper function for reading in next sample.""" if self.cur[0] >= len(self.seq): raise StopIteration offset, length, idx = self.seq[self.cur[0]] self.cur[0] += 1 f = self.inputf if idx == 0: f.seek(offset) print('seek', offset) content = f.read(length) #item = bson.BSON.decode(content) item = bson.BSON(content).decode() self.buffer = item else: item = self.buffer label = item['_id'] #used in test mode if 'category_id' in item: label = item['category_id'] pic = item['imgs'][idx] img = pic['picture'] #print(img.__class__) return float(label), img
def get_details(self, node): url = reverse('node_handler', args=[node.system_id]) response = self.client.get(url, {'op': 'details'}) self.assertEqual(http.client.OK, response.status_code) self.assertEqual('application/bson', response['content-type']) return bson.BSON(response.content).decode()
def calculate(): data = request.get_data() expr = bson.BSON(data).decode() return bson.BSON.encode({"ret": str(eval(str(expr['expression'])))})
def on_message(_mosq, msg): '''callback called when a message from the subscriptions is received''' data = bson.BSON(msg.payload).decode() sistats.pretty_print(msg.topic, data)
def decode_bson(self, data): return bson.BSON(base64.b64decode(data["Data"])).decode()
from datetime import datetime from dataclasses_serialization.serializer_base import isinstance, noop_serialization, noop_deserialization, dict_serialization, dict_deserialization, list_deserialization, Serializer, DeserializationError try: import bson try: # Assume py-bson version of bson installed bson_loads = bson.loads bson_dumps = bson.dumps except AttributeError: # Fallback to pymongo version of bson bson_loads = lambda bson_str: bson.BSON(bson_str).decode() bson_dumps = bson.BSON.encode except ImportError: raise ImportError("bson module required for BSON serialization") __all__ = [ "BSONSerializer", "BSONSerializerMixin", "BSONStrSerializer", "BSONStrSerializerMixin" ] def bson_int_deserializer(cls, obj): """ Mongo implicitly converts ints to floats
#!/usr/bin/env python import sys import bson from urllib import urlopen BUS = 'http://localhost:8000/test' level = sys.argv[1] text = sys.argv[2] msg = { 'type': 'SYSTEM_ALERT', 'queue': 'SYSTEM_ALERT', 'data': { 'level': level, 'text': text } } ack = bson.BSON(urlopen(BUS + '/open', bson.BSON.encode({})).read()).decode() urlopen(BUS + '/send/' + ack['sid'], bson.BSON.encode(msg))
def format(self, value): try: return bson.BSON(value).decode() except ValueError as e: return self.process_error('Cannot format value: {}'.format(e))
import re import shlex import struct import bson import ntpath # from cuckoo.common.abstracts import BehaviorHandler # from cuckoo.common.netlog import BsonParser # from cuckoo.common.utils import guid_name, jsbeautify, htmlprettify FORMAT = '%(asctime)-15s %(message)s' logging.basicConfig(format=FORMAT) log = logging.getLogger(__name__) bson_decode = lambda d: bson.BSON(d).decode() # 20 Mb max message length. MAX_MESSAGE_LENGTH = 20 * 1024 * 1024 # From cuckoo/common/files.py class Storage(object): @staticmethod def get_filename_from_path(path): """Cross-platform filename extraction from path. @param path: file path. @return: filename. """ dirpath, filename = ntpath.split(path) return filename if filename else ntpath.basename(dirpath)
def get_details(self, node): url = reverse("node_handler", args=[node.system_id]) response = self.client.get(url, {"op": "details"}) self.assertEqual(http.client.OK, response.status_code) self.assertEqual("application/bson", response["content-type"]) return bson.BSON(response.content).decode()
def __init__(self, batch_size, data_shape, path_bson=None, path_index=None, path_labelmap=None, test_mode=False, shuffle=False, part_index=0, num_parts=1, aug_list=None, smooth_param='', rgb_mean=None, data_name='data', label_name='softmax_label', **kwargs): super(BsonImageIter, self).__init__() assert path_bson assert path_index is None or len(path_index) > 0 num_threads = os.environ.get('MXNET_CPU_WORKER_NTHREADS', 1) logging.info('Using %s threads for decoding...', str(num_threads)) #logging.info('Set enviroment variable MXNET_CPU_WORKER_NTHREADS to a' # ' larger number to use more threads.') class_name = self.__class__.__name__ self.seq = [] self.path_bson = path_bson self.test_mode = test_mode self.inputf = open(path_bson, 'rb') f = self.inputf item_data = [] length_size = 4 offset = 0 print('loading bson offset..') if path_index is not None and os.path.exists(path_index): print('loading index') self.seq = pickle.load(open(path_index, 'rb')) print('index loaded') else: while True: f.seek(offset) item_length_bytes = f.read(length_size) if len(item_length_bytes) == 0: break # Decode item length: length = struct.unpack("<i", item_length_bytes)[0] f.seek(offset) item_data = f.read(length) assert len(item_data) == length, "%i vs %i" % (len(item_data), length) #item = bson.BSON.decode(item_data) item = bson.BSON(item_data).decode() #print(item) img_size = len(item['imgs']) for i in xrange(img_size): self.seq.append((offset, length, i)) offset += length self.inputf.close() if path_index is not None: pickle.dump(self.seq, open(path_index, 'wb'), pickle.HIGHEST_PROTOCOL) print('loaded, item count', len(self.seq)) self.labelmap = {} self.rlabelmap = {} print('loading labelmap') if path_labelmap is not None: for line in open(path_labelmap, 'r'): vec = line.strip().split() self.labelmap[int(vec[0])] = int(vec[1]) self.rlabelmap[int(vec[1])] = int(vec[0]) print('labelmap loaded') self.rgb_mean = rgb_mean if self.rgb_mean: self.rgb_mean = np.array(self.rgb_mean, dtype=np.float32).reshape(1, 1, 3) self.rgb_mean = nd.array(self.rgb_mean) if len(smooth_param) == 0: self.label_width = 1 self.provide_label = [(label_name, (batch_size, ))] self.smoothed_label = None else: _vec = smooth_param.split(',') assert (len(_vec) == 4) self.confusion_matrix = np.load(_vec[0]) print(self.confusion_matrix.shape) self.smoothed_label = np.zeros(self.confusion_matrix.shape, dtype=np.float32) LS_K = int(_vec[1]) LS_A = float(_vec[2]) LS_B = float(_vec[3]) for i in xrange(self.confusion_matrix.shape[0]): am = np.argsort(self.confusion_matrix[i])[::-1] assert i == am[0] self.smoothed_label[i][i] = 1.0 - LS_A - LS_B for j in xrange(1, LS_K): self.smoothed_label[i][am[j]] += LS_A / (LS_K - 1) for j in xrange(LS_K, len(am)): self.smoothed_label[i][am[j]] += LS_B / (len(am) - LS_K) self.label_width = self.smoothed_label.shape[0] self.provide_label = [(label_name, (batch_size, self.label_width))] self.check_data_shape(data_shape) self.provide_data = [(data_name, (batch_size, ) + data_shape)] self.batch_size = batch_size self.data_shape = data_shape self.shuffle = shuffle if aug_list is None: self.auglist = CreateAugmenter(data_shape, **kwargs) else: self.auglist = aug_list self.buffer = None self.cur = [0, 0] self.reset()
def assertInvalid(self, data): self.assertRaises(InvalidBSON, bson.BSON(data).decode)
def register_vote(worker, job): """ Gearman entry point """ bson_input = bson.BSON(job.data) job_input = bson_input.decode() # auth check if key is not None: if 'key' not in job_input or job_input['key'] != key: log(2, "Secret key mismatch") response = bson.BSON.encode({ 'status': 'error', 'description': 'Secret key mismatch', }) return str(response) if vote_already_exists(job_input['username'], job_input['article_url']): log(1, 'User already voted on this article') response = bson.BSON.encode({ 'status': 'error', 'description': 'User already voted on this article' }) return str(response) # log the vote job_input['vote_datetime'] = datetime.now() add_update_to_db(job_input) required_fields = [ 'username', 'feed_url', 'article_url', 'positive_opinion' ] if not (all([x in job_input for x in required_fields])): log(1, 'Missing field in input: ' + str(job_input)) response = { "status": "error", "description": "Missing field in input." } bson_response = bson.BSON.encode(response) return str(bson_response) log( 0, "'register-vote' called for user '{}' for article {}".format( job_input['username'], job_input['article_url'])) # fetch that user's info from the database user_data = get_user_data(job_input["username"]) if user_data is None: log( 1, "No user data received from db for user " + str(job_input['username'])) response = { "status": "error", "description": "No user data received from db for user " + str(job_input["username"]) } bson_response = bson.BSON.encode(response) return str(bson_response) # fetch that feed's info from the database feed_data = get_feed_data(job_input["feed_url"]) if feed_data is None: log( 1, "No feed data received from db for feed " + str(job_input['feed_url'])) response = { "status": "error", "description": "No feed data receieved from db for feed " + str(job_input["feed_url"]) } bson_response = bson.BSON.encode(response) return str(bson_response) # get the user's interest words user_words = None if "words" in user_data: user_words = user_data['words'] else: user_data['words'] = {} user_words = {} for item in feed_data['items']: if item['link'] == job_input['article_url']: log(0, "found feed") if not 'topics' in item: log(1, "No topics associated with given article.") break topics = item['topics'] user_words = update_topic_counts(user_words, topics, job_input['positive_opinion']) break # put the user's new interest words back into their dict user_data['words'] = user_words # put it all back in the database update_user_data(job_input['username'], user_data) response = {"status": "ok"} bson_response = bson.BSON.encode(response) return str(bson_response)
def decodeTransaction(transaction): try: return bson.BSON.decode(bson.BSON(str(transaction[1])), codec_options=bson.codec_options.CodecOptions(document_class=collections.OrderedDict)) except: raise Exception('Invalid transaction.')