def train(self, model_id): #get training status of model container train_status = model_cont['train_status'] if train_status != "trained": #load the data to train data_loader = DataLoader() dataset = data_loader.load_user_data(user_data_path) #load model specific parameters #TODO if "train_test_split" in params.keys() and params["train_test_split"]: data_split = params['train_test_split'] trainset = DataProcessor().get_trainset(features, labels, data_split) #train the model clf = svc() clf.fit(dataset['features'], dataset['labels']) pkl_file = pdumps(clf) #update the model object with the results of training model_cont['learned_model']=Binary(pkl_file) model_cont['train_status'] = "trained" return model_cont else: print("Already trained") return False
def train(self, model_cont, user_data=None): train_status=model_cont['train_status'] if train_status != "training": #load the data to train data_loader = DataLoader() dataset = data_loader.load_user_data(user_data_path); #load the model specific parameters alpha = model_cont['parameters']['alpha'] #train the model clf = LinearRegression(alpha) clf.fit(dataset['features'], dataset['labels']) pkl_file = pdumps(clf) #update the model object with the results of training model_cont['learned_model']=Binary(pkl_file) model_cont['train_status'] = "trained" return model_cont else: print("Already Trained") return True
def build_product_model(host, port, **kwargs): prod_model_data = 'prod_model_data.pickle' print("Loading products from database:") prod_filt = {'comodegenic': {'$type': 'int'}} # Only return entries with comodegenic score prod_prjctn = { 'ingredient_list': True, 'comodegenic': True} db_objects = PRODUCTS_DB.read(prod_filt, projection=prod_prjctn) products = [DB_Object.build_from_dict(p) for p in db_objects] # The tfidf_vect will ignore the following words stop_words = [ '', 'water', 'glycerin', 'titanium dioxide', 'iron oxides', 'beeswax', 'methylparaben', 'propylparaben', 'propylene glycol', 'panthenol', 'mica'] # Tokenizer for product ingredient lists def get_prod_ings_as_list(product): ''' Queries the ingredients DB for a given product's ingredient list and returns the ingredient list as a list of ingredient strings Note: The DB query is performed once using all ingredient object IDs simultaneously. ''' fltr = {'_id': {'$in': product.get('ingredient_list', [])}} ing_prjctn = {'_id': False, 'ingredient_name': True} db_objects = INGREDIENTS_DB.read(fltr, projection=ing_prjctn) return [DB_Object.build_from_dict(i).get('ingredient_name', '') for i in db_objects] print('Vectorizing product ingredient lists') tfidf_vect = TfidfVectorizer( tokenizer=get_prod_ings_as_list, lowercase=False, stop_words=stop_words) X = tfidf_vect.fit_transform(products) y = [p['comodegenic'] for p in products] print('Storing vectorized data and training labels') # Flatten CSR sparse matrix to strings model = { 'X': X, 'y': y } print("Saving model data to disk for next time") # Insert the model into the model database MODEL_DB.create_file(pdumps(model, protocol=2), filename="ml_product_data") # Save model data to disk with open(prod_model_data, "wb") as pickle_out: pdump(model, pickle_out) print('[SUCCESS] Product model data post-processed and stored')
def dump_pending_points(self): result = [(name, pdumps({ 'xyz': xyz, 'rgb': rgb }), len(xyz)) for name, xyz, rgb in self._get_pending_points()] self.pending_xyz = [] self.pending_rgb = [] return result
def save_to_bytes(self): sub_pickle = {} if self.children is not None: sub_pickle['children'] = self.children sub_pickle['grid'] = self.grid else: sub_pickle['points'] = self.points d = pdumps(sub_pickle) return d
def dump(self, name, max_depth): """Serialize the stored nodes to a bytes list""" node = self.nodes[name] if node.dirty: self.node_bytes[name] = node.save_to_bytes() if node.children is not None and max_depth > 0: for n in node.children: self.dump(n, max_depth - 1) return pdumps(self.node_bytes)
def analyze_url(self,parsed): setup_task_logger(parsed) log_string("Start analyzing",task=parsed['task']) temp_container = None try: parsed["domain"] = "" try: extracted = textract(parsed['buffer']) parsed["domain"] = "{}.{}".format(extracted.domain,extracted.suffix) except: pass log_string(parsed["domain"],task=parsed['task']) parsed['locations'] = json_settings[environ["project_env"]]["task_logs"] if parsed['use_proxy']: log_string("Proxy detected",task=parsed['task']) temp_container = DOCKER_CLIENT.containers.run("url-sandbox_box", command=[hexlify(pdumps(parsed)).decode()] , volumes={json_settings[environ["project_env"]]["output_folder"]: {'bind': json_settings[environ["project_env"]]["task_logs"]["box_output"], 'mode': 'rw'}}, detach=True, network="url-sandbox_frontend_box") else: log_string("No proxy, running privileged for custom tor config",task=parsed['task']) temp_container = DOCKER_CLIENT.containers.run("url-sandbox_box", command=[hexlify(pdumps(parsed)).decode()] , volumes={json_settings[environ["project_env"]]["output_folder"]: {'bind': json_settings[environ["project_env"]]["task_logs"]["box_output"], 'mode': 'rw'}}, detach=True, network="url-sandbox_frontend_box", privileged=True) temp_logs = "" for item in range(1,parsed['analyzer_timeout']): temp_logs = temp_container.logs() if len(temp_logs) > 1: if temp_logs.endswith(b"Done!!\n"): break sleep(1) temp_container.stop() if len(temp_logs) > 0: for item in temp_logs.split(b"\n"): with ignore_excpetion(): if len(item) > 0: log_string(item.decode("utf-8"),task=parsed['task']) log_string("Parsing output",task=parsed['task']) parsed['locations']['box_output'] = json_settings[environ["project_env"]]["output_folder"] except Exception as e: log_string("Error -> {}".format(e),task=parsed['task']) #clean_up() try: if temp_container != None: temp_container.stop() temp_container.remove() except Exception as e: log_string("Error -> {}".format(e),task=parsed['task']) make_report(parsed) cancel_task_logger(parsed['task'])
def run(_id, filename, offset_scale, portion, queue, projection, verbose): """ Reads points from a xyz file """ try: f = open(filename, "r") point_count = portion[1] - portion[0] step = min(point_count, max((point_count) // 10, 100000)) f.seek(portion[2]) for i in range(0, point_count, step): points = np.zeros((step, 3), dtype=np.float32) for j in range(0, step): line = f.readline() if not line: points = np.resize(points, (j, 3)) break points[j] = [float(s) for s in line.split(" ")] x, y, z = [points[:, c] for c in [0, 1, 2]] if projection: x, y, z = pyproj.transform(projection[0], projection[1], x, y, z) x = (x + offset_scale[0][0]) * offset_scale[1][0] y = (y + offset_scale[0][1]) * offset_scale[1][1] z = (z + offset_scale[0][2]) * offset_scale[1][2] coords = np.vstack((x, y, z)).transpose() if offset_scale[2] is not None: # Apply transformation matrix (because the tile's transform will contain # the inverse of this matrix) coords = np.dot(coords, offset_scale[2]) coords = np.ascontiguousarray(coords.astype(np.float32)) # Read colors colors = np.full((point_count, 3), 255, dtype=np.uint8) result = ( "".encode("ascii"), pdumps({ "xyz": coords, "rgb": colors }), len(coords), ) queue.send_multipart( [ "".encode("ascii"), pdumps({ "xyz": coords, "rgb": colors }), struct.pack(">I", len(coords)), ], copy=False, ) queue.send_multipart([pdumps({"name": _id, "total": 0})]) # notify we're idle queue.send_multipart([b""]) f.close() except Exception as e: print("Exception while reading points from xyz file") print(e) traceback.print_exc()
def run(_id, filename, offset_scale, portion, queue, transformer, verbose): """ Reads points from a xyz file Consider XYZIRGB format following FME documentation(*). If the number of features does not correspond (i.e. does not equal to 7), we do the following hypothesis: - 3 features mean XYZ - 4 features mean XYZI - 6 features mean XYZRGB (*) See: https://docs.safe.com/fme/html/FME_Desktop_Documentation/FME_ReadersWriters/pointcloudxyz/pointcloudxyz.htm """ try: f = open(filename, "r") point_count = portion[1] - portion[0] step = min(point_count, max((point_count) // 10, 100000)) f.seek(portion[2]) feature_nb = 7 for i in range(0, point_count, step): points = np.zeros((step, feature_nb), dtype=np.float32) for j in range(0, step): line = f.readline() if not line: points = np.resize(points, (j, feature_nb)) break line_features = [float(s) for s in line.split(" ")] if len(line_features) == 3: line_features += [None] * 4 # Insert intensity and RGB elif len(line_features) == 4: line_features += [None] * 3 # Insert RGB elif len(line_features) == 6: line_features.insert(3, None) # Insert intensity points[j] = line_features x, y, z = [points[:, c] for c in [0, 1, 2]] if transformer: x, y, z = transformer.transform(x, y, z) x = (x + offset_scale[0][0]) * offset_scale[1][0] y = (y + offset_scale[0][1]) * offset_scale[1][1] z = (z + offset_scale[0][2]) * offset_scale[1][2] coords = np.vstack((x, y, z)).transpose() if offset_scale[2] is not None: # Apply transformation matrix (because the tile's transform will contain # the inverse of this matrix) coords = np.dot(coords, offset_scale[2]) coords = np.ascontiguousarray(coords.astype(np.float32)) # Read colors: 3 last columns of the point cloud colors = points[:, -3:].astype(np.uint8) queue.send_multipart( [ "".encode("ascii"), pdumps({ "xyz": coords, "rgb": colors }), struct.pack(">I", len(coords)), ], copy=False, ) queue.send_multipart([pdumps({"name": _id, "total": 0})]) # notify we're idle queue.send_multipart([b""]) f.close() except Exception as e: print("Exception while reading points from xyz file") print(e) traceback.print_exc()
def test_py_pickle(): """ Tests: test_py_pickle frompickle """ print('::: TEST: test_py_pickle()') edict_with_all = _get_orig__edict_with_all() new_reobj_all__pdumps = pdumps(edict_with_all) new_reobj_all = ploads(new_reobj_all__pdumps) ok_(edict_with_all == new_reobj_all, msg=None) ok_(isinstance(new_reobj_all, Edict), msg=None) ok_(edict_with_all.extra_data == new_reobj_all.extra_data, msg=None) ok_(new_reobj_all.extra_data['edict extra2'] == 'edict extra_value2', msg=None) ok_(edict_with_all['edict1'] == new_reobj_all['edict1'], msg=None) ok_(isinstance(new_reobj_all['edict1'], Edict), msg=None) ok_(edict_with_all['edict1'].extra_data == new_reobj_all['edict1'].extra_data, msg=None) ok_(new_reobj_all['edict1'].extra_data['edict_obj.edict1 extra2'] == 'edict_obj.edict1 extra_value2', msg=None) ok_(edict_with_all['rdict1'] == new_reobj_all['rdict1'], msg=None) ok_(isinstance(new_reobj_all['rdict1'], Rdict), msg=None) ok_(edict_with_all['rdict1'].extra_data == new_reobj_all['rdict1'].extra_data, msg=None) ok_(new_reobj_all['rdict1'].extra_data['edict_obj.rdict1 extra2'] == 'edict_obj.rdict1 extra_value2', msg=None) ok_(edict_with_all['edictf1'] == new_reobj_all['edictf1'], msg=None) ok_(isinstance(new_reobj_all['edictf1'], RdictF), msg=None) ok_(edict_with_all['edictf1'].extra_data == new_reobj_all['edictf1'].extra_data, msg=None) ok_(new_reobj_all['edictf1'].extra_data['edict_obj.edictf1 extra2'] == 'edict_obj.edictf1 extra_value2', msg=None) ok_(edict_with_all['edictio1'] == new_reobj_all['edictio1'], msg=None) ok_(isinstance(new_reobj_all['edictio1'], RdictIO), msg=None) ok_(edict_with_all['edictio1'].extra_data == new_reobj_all['edictio1'].extra_data, msg=None) ok_(new_reobj_all['edictio1'].extra_data['edict_obj.edictio1 extra2'] == 'edict_obj.edictio1 extra_value2', msg=None) ok_(edict_with_all['edictio1'].key_order == new_reobj_all['edictio1'].key_order, msg=None) ok_(new_reobj_all['edictio1'].key_order == ['edictio_inner1', 'edictio_inner2', 'edictio_inner3'], msg=None) ok_(edict_with_all['edictio1'].extra_key_order == new_reobj_all['edictio1'].extra_key_order, msg=None) ok_(new_reobj_all['edictio1'].extra_key_order == ['edictio_inner2', 'edictio_inner3', 'edictio_inner1'], msg=None) ok_(edict_with_all['edictfo1'] == new_reobj_all['edictfo1'], msg=None) ok_(isinstance(new_reobj_all['edictfo1'], RdictFO), msg=None) ok_(edict_with_all['edictfo1'].extra_data == new_reobj_all['edictfo1'].extra_data, msg=None) ok_(new_reobj_all['edictfo1'].extra_data['edict_obj.edictfo1 extra2'] == 'edict_obj.edictfo1 extra_value2', msg=None) ok_(edict_with_all['edictfo1'].key_order == new_reobj_all['edictfo1'].key_order, msg=None) ok_(new_reobj_all['edictfo1'].key_order == ['edictfo_inner1', 'edictfo_inner2', 'edictfo_inner3'], msg=None) ok_(edict_with_all['edictfo1'].extra_key_order == new_reobj_all['edictfo1'].extra_key_order, msg=None) ok_(new_reobj_all['edictfo1'].extra_key_order == ['edictfo_inner2', 'edictfo_inner3', 'edictfo_inner1'], msg=None) ok_(edict_with_all['edictfo2_1'] == new_reobj_all['edictfo2_1'], msg=None) ok_(isinstance(new_reobj_all['edictfo2_1'], RdictFO2), msg=None) ok_(edict_with_all['edictfo2_1'].extra_data == new_reobj_all['edictfo2_1'].extra_data, msg=None) ok_(new_reobj_all['edictfo2_1'].extra_data['edict_obj.edictfo2_1 extra2'] == 'edict_obj.edictfo2_1 extra_value2', msg=None) ok_(edict_with_all['edictfo2_1'].key_order == new_reobj_all['edictfo2_1'].key_order, msg=None) ok_(new_reobj_all['edictfo2_1'].key_order == ['edictfo2_inner1', 'edictfo2_inner2', 'edictfo2_inner3'], msg=None) ok_(edict_with_all['edictfo2_1'].extra_key_order == new_reobj_all['edictfo2_1'].extra_key_order, msg=None) ok_(new_reobj_all['edictfo2_1'].extra_key_order == ['edictfo2_inner2', 'edictfo2_inner3', 'edictfo2_inner1'], msg=None) ok_(edict_with_all['elist1'] == new_reobj_all['elist1'], msg=None) ok_(isinstance(new_reobj_all['elist1'], Elist), msg=None) ok_(edict_with_all['elist1'].extra_data == new_reobj_all['elist1'].extra_data, msg=None) ok_(new_reobj_all['elist1'].extra_data['edict_obj.elist1 extra2'] == 'edict_obj.elist1 extra_value2', msg=None) ok_(edict_with_all['rlist1'] == new_reobj_all['rlist1'], msg=None) ok_(isinstance(new_reobj_all['rlist1'], Rlist), msg=None) ok_(edict_with_all['rlist1'].extra_data == new_reobj_all['rlist1'].extra_data, msg=None) ok_(new_reobj_all['rlist1'].extra_data['edict_obj.rlist1 extra2'] == 'edict_obj.rlist1 extra_value2', msg=None) ok_(edict_with_all['rlistf1'] == new_reobj_all['rlistf1'], msg=None) ok_(isinstance(new_reobj_all['rlistf1'], RlistF), msg=None) ok_(edict_with_all['rlistf1'].extra_data == new_reobj_all['rlistf1'].extra_data, msg=None) ok_(new_reobj_all['rlistf1'].extra_data['edict_obj.rlistf1 extra2'] == 'edict_obj.rlistf1 extra_value2', msg=None) ok_(edict_with_all['etuple1'] == new_reobj_all['etuple1'], msg=None) ok_(isinstance(new_reobj_all['etuple1'], Etuple), msg=None) ok_(edict_with_all['etuple1'].extra_data == new_reobj_all['etuple1'].extra_data, msg=None) ok_(new_reobj_all['etuple1'].extra_data['edict_obj.etuple1 extra2'] == 'edict_obj.etuple1 extra_value2', msg=None) ok_(edict_with_all['lmatrix1'] == new_reobj_all['lmatrix1'], msg=None) ok_(isinstance(new_reobj_all['lmatrix1'], Lmatrix), msg=None) ok_(edict_with_all['lmatrix1'].extra_data == new_reobj_all['lmatrix1'].extra_data, msg=None) ok_(new_reobj_all['lmatrix1'].extra_data['edict_obj.lmatrix1 extra2'] == 'edict_obj.lmatrix1 extra_value2', msg=None) ok_(edict_with_all['lmatrixf1'] == new_reobj_all['lmatrixf1'], msg=None) ok_(isinstance(new_reobj_all['lmatrixf1'], LmatrixF), msg=None) ok_(edict_with_all['lmatrixf1'].extra_data == new_reobj_all['lmatrixf1'].extra_data, msg=None) ok_(new_reobj_all['lmatrixf1'].extra_data['edict_obj.lmatrixf1 extra2'] == 'edict_obj.lmatrixf1 extra_value2', msg=None) # some data checks ok_(edict_with_all['edictfo1']['edictfo_inner2'] == new_reobj_all['edictfo1']['edictfo_inner2'] and new_reobj_all['edictfo1']['edictfo_inner2'] == 'edictfo_inner2 value', msg=None) ok_(edict_with_all['rlist1'][1] == new_reobj_all['rlist1'][1] and new_reobj_all['rlist1'][1] == 'rlist_inner value2', msg=None) ok_(edict_with_all['lmatrixf1'].this_column_values('name') == new_reobj_all['lmatrixf1'].this_column_values('name') and new_reobj_all['lmatrixf1'].this_column_values('name') == ['darkorange', 'flesh', 'firebrick 3'], msg=None) ok_(edict_with_all['lmatrixf1'][1][2] == new_reobj_all['lmatrixf1'][1][new_reobj_all['lmatrixf1'].column_names_idx_lookup['green']] and new_reobj_all['lmatrixf1'][1][2] == 125, msg=None) # Change original edict_with_all['etuple1'].replace_extra_data({'edict_obj.etuple1 UPDATED': 'UPDATED'}) ok_(edict_with_all['etuple1'] == new_reobj_all['etuple1'], msg=None) ok_(isinstance(new_reobj_all['etuple1'], Etuple), msg=None) ok_(edict_with_all['etuple1'].extra_data != new_reobj_all['etuple1'].extra_data, msg=None) ok_(new_reobj_all['etuple1'].extra_data['INFO'] == 'edict_obj.etuple1 inner', msg=None) edict_with_all = {} ok_(isinstance(new_reobj_all['etuple1'], Etuple), msg=None) ok_(new_reobj_all['etuple1'].extra_data['INFO'] == 'edict_obj.etuple1 inner', msg=None) ok_(isinstance(new_reobj_all['lmatrix1'], Lmatrix), msg=None) ok_(new_reobj_all['lmatrix1'].extra_data['INFO'] == 'edict_obj.lmatrix1 inner', msg=None) ok_(new_reobj_all['lmatrix1'].column_names == ('name', 'red', 'green', 'blue'), msg=None)
def run(_id, filename, offset_scale, portion, queue, projection, verbose): ''' Reads points from a las file ''' try: f = laspy.file.File(filename, mode='r') point_count = portion[1] - portion[0] step = min(point_count, max((point_count) // 10, 100000)) indices = [i for i in range(math.ceil((point_count) / step))] color_scale = offset_scale[3] file_points = f.get_points()['point'] X = file_points['X'] Y = file_points['Y'] Z = file_points['Z'] # todo: attributes if 'red' in f.point_format.lookup: RED = file_points['red'] GREEN = file_points['green'] BLUE = file_points['blue'] else: RED = file_points['intensity'] GREEN = file_points['intensity'] BLUE = file_points['intensity'] for index in indices: start_offset = portion[0] + index * step num = min(step, portion[1] - start_offset) # read scaled values and apply offset x = X[start_offset:start_offset + num] * f.header.scale[0] + f.header.offset[0] y = Y[start_offset:start_offset + num] * f.header.scale[1] + f.header.offset[1] z = Z[start_offset:start_offset + num] * f.header.scale[2] + f.header.offset[2] if projection: x, y, z = pyproj.transform(projection[0], projection[1], x, y, z) x = (x + offset_scale[0][0]) * offset_scale[1][0] y = (y + offset_scale[0][1]) * offset_scale[1][1] z = (z + offset_scale[0][2]) * offset_scale[1][2] coords = np.vstack((x, y, z)).transpose() if offset_scale[2] is not None: # Apply transformation matrix (because the tile's transform will contain # the inverse of this matrix) coords = np.dot(coords, offset_scale[2]) coords = np.ascontiguousarray(coords.astype(np.float32)) # Read colors red = RED[start_offset:start_offset + num] green = GREEN[start_offset:start_offset + num] blue = BLUE[start_offset:start_offset + num] if color_scale is None: red = red.astype(np.uint8) green = green.astype(np.uint8) blue = blue.astype(np.uint8) else: red = (red * color_scale).astype(np.uint8) green = (green * color_scale).astype(np.uint8) blue = (blue * color_scale).astype(np.uint8) colors = np.vstack((red, green, blue)).transpose() result = (''.encode('ascii'), pdumps({ 'xyz': coords, 'rgb': colors }), len(coords)) queue.send_multipart([ ''.encode('ascii'), pdumps({ 'xyz': coords, 'rgb': colors }), struct.pack('>I', len(coords)) ], copy=False) queue.send_multipart([pdumps({'name': _id, 'total': 0})]) # notify we're idle queue.send_multipart([b'']) f.close() except Exception as e: print('Exception while reading points from las file') print(e) traceback.print_exc()
def build_people_model(host, port, **kwargs): global PROD_COMO ppl_model_data = 'ppl_model_data.pickle' batch_size = kwargs.get('batch_size', 10000) vocabulary = get_ingredient_vocabulary(host, port) # The tfidf_vect will ignore the following words stop_words = [ '', 'water', 'glycerin', 'titanium dioxide', 'iron oxides', 'beeswax', 'methylparaben', 'propylparaben', 'propylene glycol', 'panthenol', 'mica'] # Create vectorizers d_vect = DictVectorizer(sparse=False) tfidf_vect = TfidfVectorizer( tokenizer=get_ingredients_as_list, lowercase=False, stop_words=stop_words, vocabulary=vocabulary) print("Loading people from database, batch_size:", str(batch_size)) ppl_filt = {} ppl_prjctn = { '_id': False, 'race': True, 'birth_sex': True, 'age': True, 'acne': True, 'skin': True, 'acne_products': True} # Don't include any PII db_objects = PEOPLE_DB.read(ppl_filt, projection=ppl_prjctn) y, demo_mult = [], [] batch_num, pulled = 0, 0 X = None # Work in batches to build dataset while pulled <= db_objects.count(with_limit_and_skip=True): # Initialize X_demo_lst, X_prod_lst = [], [] people = [] print('Parsing batch:', batch_num) try: # Build a batch for i in range(batch_size): people.append(DB_Object.build_from_dict(db_objects.next())) pulled += 1 except StopIteration: # End of available data break # Extract features for person in people: # Create new entry for each product # Note: Model is only applicable to entries with products for product_id in person.pop('acne_products'): # Pull product ingredients info X_prod_lst.append([product_id]) # Pull demographic info X_demo_lst.append(person) # Generate demographic multiplier mult = get_multiplier(person) demo_mult.append(mult) # Vectorize data X_demo = d_vect.fit_transform(X_demo_lst) # X_demo is now a numpy array X_prod = tfidf_vect.fit_transform(X_prod_lst) # X_prod is now a CSR sparse matrix # Add batch result to output matrix if X is not None: X_t = hstack([csr_matrix(X_demo), X_prod], format="csr") try: X = vstack([X, X_t], format="csr") except ValueError: break else: # Initialize X X = hstack([csr_matrix(X_demo), X_prod], format="csr") batch_num += 1 for como, mult in zip(PROD_COMO, demo_mult): val = como * mult if val < 6: y.append(0) elif val < 12: y.append(1) else: y.append(2) print('Storing vectorized data and training labels') # Flatten CSR sparse matrix to strings model = { 'X': X, 'y': y, 'd_vect': d_vect, 'tfidf_vect': tfidf_vect, 'vocabulary': vocabulary } print("Saving model data to disk for next time") # Insert the model into the model database MODEL_DB.create_file(pdumps(model, protocol=2), filename="ml_people_data") # Save model data to disk with open(ppl_model_data, "wb") as pickle_out: pdump(model, pickle_out) print('[SUCCESS] People model data post-processed and stored')
if day.attrs['class'] == ['ProductionCalendar_holiday']: self.days[date_day] = 'holiday' elif day.attrs['class'] == ['ProductionCalendar_preholiday']: self.days[date_day] = 'short' else: self.days[date_day] = 'work' def serialize(self): return dumps(dict([(x[0].isoformat(), x[1]) for x in self.days.iteritems()])) # example: if __name__ == '__main__': s = SuperjobCalendarParser('http://www.superjob.ru/proizvodstvennyj_kalendar/', debug=True) all_days = s.days print all_days.get(date(2012, 2, 2)) print all_days.get(date(2008, 1, 23)) print all_days.get(date(2014, 1, 7)) print all_days.get(date(2014, 2, 2)) # save as json json = s.serialize() with open('days.json', 'w') as f: f.write(json) # save as pickle with open('days.pickle', 'w') as f: f.write(pdumps(all_days))
def remote_action(): vbox, x, y, s, key = request.json t = (int(x), int(y), 0, 0, s, key) vm_name_action = "{}_action".format(all_boxes[vbox]["vm"]) r.set(vm_name_action, pdumps(t, protocol=2)) return jsonify(status="sent")
def do_task_and_print(rdd,partition_number): '''Do a task, serialize the result''' result = do_task(rdd,partition_number) result = {'payload':pdumps(list(result)), 'partition_number':partition_number} printer(jdumps(result))
def hash_obj(*args): return '_{:x}'.format(abs(hash(pdumps(args))))