def _build_page_nodes(node_id, traversing_nodes, update): session = Session() for levels in [100, 90, 80, 70, 60, 50, 40, 30, 20, 10]: try: page = SessionManager.get_instance().get_document( TREE_URL, raw_data=DATA_TEMPLATE.format(levels, node_id)) except Exception as ex: logger.exception("Couldn't get tree page") raise ex members = sel_members(page) if len(members) > 0: logger.debug("Got %s levels. Processing", levels) last_level_top = max( map_reduce( members, keyfunc=lambda m: int( _get_element_style_items(m)['top'][:-2])).keys()) if update: # logger.info("Processing nodes %s-%s", nodes, nodes + len(members)) _update_nodes(traversing_nodes, members, last_level_top, session) # nodes += len(members) else: _get_children(node_id, traversing_nodes, members[0], members[1:], level_distance=_get_levels_distance(members), last_level_top=last_level_top, session=session) logger.debug('Committing transaction') session.commit() Session.remove() logger.debug('Done') # sleep(60) break logger.debug("Couldn't get %s levels. Decreasing", levels)
def _segmented_by_study(validated_groups): for user, child_study_pairs in validated_groups: yield user, dict( map_reduce(child_study_pairs, itemgetter(1), itemgetter(0)))
def prepare_data(out_path, animal, batch_size, seed, val_seed, fold_num, params): # Parse image number from data in labels.txt def parse_image_num(image_name): return int(re.match('([0-9]+)_[0-9]+.jpg', image_name).groups()[0]) # Create output directory for processed data #shutil.rmtree(out_path) #os.makedirs(out_path) # Determine features and labels paths based on specified animal features_path, labels_path = params['tmp_dir'] + r'training_data/%s' % animal, params['tmp_dir'] + r'%s_labels.txt' % animal # Read labels from the file # Trim the path, leaving only images names # Parse labels and store them along with image names as tuples # Parse the original image number, since it will be needed for creating data splits # Group labels based on the original image numbers # Remove original image names, since they are not longer needed after grouping with open(labels_path, 'r') as file: labels = file.read().splitlines() labels = [x[x.rfind('/') + 1:] for x in labels] labels = [(lambda t: (t[0], float(t[1])))(x.split(':')) for x in labels] labels = [(parse_image_num(x[0]), x) for x in labels] labels = mit.map_reduce(labels, keyfunc=lambda x: x[0], valuefunc=lambda x: x[1]) labels = [(k, v) for k, v in labels.items()] labels.sort(key=lambda x: x[0]) labels = [x[1] for x in labels] # Perform test/train split and flatten the labels labels_train, labels_test = sks.train_test_split(labels, test_size=0.5, random_state=seed) # Assign train data to test data and vise versa for second fold if fold_num == 1: labels_train, labels_test = labels_test, labels_train # Split training data into training and validation sets labels_train, labels_valid = \ sks.train_test_split(labels_train, test_size=0.2, random_state=val_seed) # Flatten arrays after the splits are created labels_train, labels_test, labels_valid = \ [list(mit.flatten(x)) for x in [labels_train, labels_test, labels_valid]] nums_batches = [] # Process train and validation data for name, labels in zip(['train', 'valid', 'test'], [labels_train, labels_valid, labels_test]): # Calculate number of batches and append it to the list to be returned num_batches = int(np.ceil(len(labels) / batch_size)) nums_batches.append(num_batches) # Create batches of features and labels, and store them as .npy files for batch_num in range(num_batches): batch_labels = labels[batch_num * batch_size:(batch_num + 1) * batch_size] batch_features = np.array([ cv.imread('%s/%s' % (features_path, x[0])) for x in batch_labels ]).astype(np.float16) / 255 # batch_labels = ku.to_categorical(np.array(batch_labels)[:, 1].astype(np.float), 2) batch_labels = np.array(batch_labels)[:, 1].astype(np.float) for category, data in zip(['features', 'labels'], [batch_features, batch_labels]): np.save('%s/%s_%s_%s' % (out_path, name, category, batch_num), data) return nums_batches
from more_itertools import map_reduce, windowed, run_length from functools import lru_cache, reduce from operator import mul import networkx as nx import numpy as np adapters = list(map(int, open('day10.txt').readlines())) #adapters=[16,10,15,5,1,11,7,19,6,12,4] #adapters=[28,33,18,42,31,14,46,20,48,47,24,23,49,45,19,38,39,11,1,32,25,35,8,17,7,9,4,2,34,10,3] adapters += [0, max(adapters) + 3] adapted = [b - a for a, b in windowed(sorted(adapters), 2)] diff_dict = map_reduce(adapted, keyfunc=lambda x: x, valuefunc=lambda x: 1, reducefunc=sum) print(diff_dict[1] * diff_dict[3]) ones_differences = map( lambda x: x[1], filter(lambda x: x[0] != 3, list(run_length.encode(adapted)))) @lru_cache def tribonacci(n): if 0 <= n <= 1: return 0 elif n == 2: return 1 else: return tribonacci(n - 1) + tribonacci(n - 2) + tribonacci(n - 3)
from more_itertools import map_reduce data = 'This sentence has words of various lengths in it, both short ones and long ones'.split( ) def keyfunc(x): return len(x) result = map_reduce(data, keyfunc) # defaultdict(None, { # 4: ['This', 'both', 'ones', 'long', 'ones'], # 8: ['sentence'], # 3: ['has', 'it,', 'and'], # 5: ['words', 'short'], # 2: ['of', 'in'], # 7: ['various', 'lengths']}) def valuefunc(x): return 1 result = map_reduce(data, keyfunc, valuefunc) # defaultdict(None, { # 4: [1, 1, 1, 1, 1], # 8: [1], # 3: [1, 1, 1], # 5: [1, 1], # 2: [1, 1], # 7: [1, 1]})
def post_purchase_orders(po_id=None): logger = get_task_logger(__name__) logger.setLevel(current_app.config['LOG_LEVEL']) pending_purchase_orders = PurchaseOrder.query if po_id: pending_purchase_orders = pending_purchase_orders.filter_by(id=po_id) pending_purchase_orders = pending_purchase_orders.filter_by( status=PurchaseOrderStatus.pending) try: # Wrap whole operation in order to # mark all pending POs as failed in case of any failure logger.info("There are %s purchase orders to post", pending_purchase_orders.count()) tz = timezone('Asia/Seoul') today = datetime.now().astimezone(tz).date() grouped_vendors = map_reduce(pending_purchase_orders, lambda po: po.vendor) for vendor_id, pos in grouped_vendors.items(): vendor = PurchaseOrderVendorManager.get_vendor( vendor_id, logger=logger, config=current_app.config) for po in pos: if po.purchase_date and po.purchase_date > today + timedelta( days=1): logger.info("Skip <%s>: purchase date is %s", po.id, po.purchase_date) continue logger.info("Posting a purchase order %s", po.id) try: vendor.post_purchase_order(po) posted_ops_count = len([ op for op in po.order_products if op.status == OrderProductStatus.purchased ]) if posted_ops_count == len(po.order_products): po.status = PurchaseOrderStatus.posted po.when_changed = po.when_posted = datetime.now() elif posted_ops_count > 0: po.status = PurchaseOrderStatus.partially_posted po.when_changed = po.when_posted = datetime.now() failed_order_products = [ po for po in po.order_products if po.status != OrderProductStatus.purchased ] po.status_details = "Not posted products:\n" + \ '\n'.join(map( lambda fop: f"{fop.product_id}: {fop.product.name}", failed_order_products)) else: po.status = PurchaseOrderStatus.failed po.when_changed = datetime.now() logger.warning( "Purchase order %s posting went successfully but no products were ordered", po.id) logger.info("Posted a purchase order %s", po.id) except (PurchaseOrderError, AtomyLoginError) as ex: logger.warning("Failed to post the purchase order %s.", po.id) logger.warning(ex) po.status = PurchaseOrderStatus.failed po.status_details = str(ex) po.when_changed = datetime.now() db.session.commit() logger.info('Done posting purchase orders') except Exception as ex: for po in pending_purchase_orders: po.status = PurchaseOrderStatus.failed db.session.commit() raise ex