Esempio n. 1
0
def _build_page_nodes(node_id, traversing_nodes, update):
    session = Session()
    for levels in [100, 90, 80, 70, 60, 50, 40, 30, 20, 10]:
        try:
            page = SessionManager.get_instance().get_document(
                TREE_URL, raw_data=DATA_TEMPLATE.format(levels, node_id))
        except Exception as ex:
            logger.exception("Couldn't get tree page")
            raise ex
        members = sel_members(page)
        if len(members) > 0:
            logger.debug("Got %s levels. Processing", levels)
            last_level_top = max(
                map_reduce(
                    members,
                    keyfunc=lambda m: int(
                        _get_element_style_items(m)['top'][:-2])).keys())
            if update:
                # logger.info("Processing nodes %s-%s", nodes, nodes + len(members))
                _update_nodes(traversing_nodes, members, last_level_top,
                              session)
                # nodes += len(members)
            else:
                _get_children(node_id,
                              traversing_nodes,
                              members[0],
                              members[1:],
                              level_distance=_get_levels_distance(members),
                              last_level_top=last_level_top,
                              session=session)
            logger.debug('Committing transaction')
            session.commit()
            Session.remove()
            logger.debug('Done')
            # sleep(60)
            break
        logger.debug("Couldn't get %s levels. Decreasing", levels)
Esempio n. 2
0
def _segmented_by_study(validated_groups):
    for user, child_study_pairs in validated_groups:
        yield user, dict(
            map_reduce(child_study_pairs, itemgetter(1), itemgetter(0)))
Esempio n. 3
0
def prepare_data(out_path, animal, batch_size, seed, val_seed, fold_num, params):

    # Parse image number from data in labels.txt
    def parse_image_num(image_name):
        return int(re.match('([0-9]+)_[0-9]+.jpg', image_name).groups()[0])

    # Create output directory for processed data
    #shutil.rmtree(out_path)
    #os.makedirs(out_path)

    # Determine features and labels paths based on specified animal
    features_path, labels_path = params['tmp_dir'] + r'training_data/%s' % animal, params['tmp_dir'] +  r'%s_labels.txt' % animal

    # Read labels from the file
    # Trim the path, leaving only images names
    # Parse labels and store them along with image names as tuples
    # Parse the original image number, since it will be needed for creating data splits
    # Group labels based on the original image numbers
    # Remove original image names, since they are not longer needed after grouping
    with open(labels_path, 'r') as file:
        labels = file.read().splitlines()
        labels = [x[x.rfind('/') + 1:] for x in labels]
        labels = [(lambda t: (t[0], float(t[1])))(x.split(':')) for x in labels]
        labels = [(parse_image_num(x[0]), x) for x in labels]
        labels = mit.map_reduce(labels, keyfunc=lambda x: x[0], valuefunc=lambda x: x[1])
        labels = [(k, v) for k, v in labels.items()]
        labels.sort(key=lambda x: x[0])
        labels = [x[1] for x in labels]

    # Perform test/train split and flatten the labels
    labels_train, labels_test = sks.train_test_split(labels, test_size=0.5, random_state=seed)

    # Assign train data to test data and vise versa for second fold
    if fold_num == 1:
        labels_train, labels_test = labels_test, labels_train

    # Split training data into training and validation sets
    labels_train, labels_valid = \
        sks.train_test_split(labels_train, test_size=0.2, random_state=val_seed)

    # Flatten arrays after the splits are created
    labels_train, labels_test, labels_valid = \
        [list(mit.flatten(x)) for x in [labels_train, labels_test, labels_valid]]

    nums_batches = []

    # Process train and validation data
    for name, labels in zip(['train', 'valid', 'test'], [labels_train, labels_valid, labels_test]):

        # Calculate number of batches and append it to the list to be returned
        num_batches = int(np.ceil(len(labels) / batch_size))
        nums_batches.append(num_batches)

        # Create batches of features and labels, and store them as .npy files
        for batch_num in range(num_batches):
            batch_labels = labels[batch_num * batch_size:(batch_num + 1) * batch_size]
            batch_features = np.array([
                cv.imread('%s/%s' % (features_path, x[0])) for x in batch_labels
            ]).astype(np.float16) / 255
            #  batch_labels = ku.to_categorical(np.array(batch_labels)[:, 1].astype(np.float), 2)
            batch_labels = np.array(batch_labels)[:, 1].astype(np.float)
            for category, data in zip(['features', 'labels'], [batch_features, batch_labels]):
                np.save('%s/%s_%s_%s' % (out_path, name, category, batch_num), data)

    return nums_batches
Esempio n. 4
0
from more_itertools import map_reduce, windowed, run_length
from functools import lru_cache, reduce
from operator import mul
import networkx as nx
import numpy as np

adapters = list(map(int, open('day10.txt').readlines()))
#adapters=[16,10,15,5,1,11,7,19,6,12,4]
#adapters=[28,33,18,42,31,14,46,20,48,47,24,23,49,45,19,38,39,11,1,32,25,35,8,17,7,9,4,2,34,10,3]
adapters += [0, max(adapters) + 3]
adapted = [b - a for a, b in windowed(sorted(adapters), 2)]
diff_dict = map_reduce(adapted,
                       keyfunc=lambda x: x,
                       valuefunc=lambda x: 1,
                       reducefunc=sum)
print(diff_dict[1] * diff_dict[3])

ones_differences = map(
    lambda x: x[1],
    filter(lambda x: x[0] != 3, list(run_length.encode(adapted))))


@lru_cache
def tribonacci(n):
    if 0 <= n <= 1:
        return 0
    elif n == 2:
        return 1
    else:
        return tribonacci(n - 1) + tribonacci(n - 2) + tribonacci(n - 3)
Esempio n. 5
0
from more_itertools import map_reduce
data = 'This sentence has words of various lengths in it, both short ones and long ones'.split(
)


def keyfunc(x):
    return len(x)


result = map_reduce(data, keyfunc)
# defaultdict(None, {
#   4: ['This', 'both', 'ones', 'long', 'ones'],
#   8: ['sentence'],
#   3: ['has', 'it,', 'and'],
#   5: ['words', 'short'],
#   2: ['of', 'in'],
#   7: ['various', 'lengths']})


def valuefunc(x):
    return 1


result = map_reduce(data, keyfunc, valuefunc)
# defaultdict(None, {
#   4: [1, 1, 1, 1, 1],
#   8: [1],
#   3: [1, 1, 1],
#   5: [1, 1],
#   2: [1, 1],
#   7: [1, 1]})
Esempio n. 6
0
def post_purchase_orders(po_id=None):
    logger = get_task_logger(__name__)
    logger.setLevel(current_app.config['LOG_LEVEL'])
    pending_purchase_orders = PurchaseOrder.query
    if po_id:
        pending_purchase_orders = pending_purchase_orders.filter_by(id=po_id)
    pending_purchase_orders = pending_purchase_orders.filter_by(
        status=PurchaseOrderStatus.pending)
    try:
        # Wrap whole operation in order to
        # mark all pending POs as failed in case of any failure
        logger.info("There are %s purchase orders to post",
                    pending_purchase_orders.count())
        tz = timezone('Asia/Seoul')
        today = datetime.now().astimezone(tz).date()
        grouped_vendors = map_reduce(pending_purchase_orders,
                                     lambda po: po.vendor)
        for vendor_id, pos in grouped_vendors.items():
            vendor = PurchaseOrderVendorManager.get_vendor(
                vendor_id, logger=logger, config=current_app.config)
            for po in pos:
                if po.purchase_date and po.purchase_date > today + timedelta(
                        days=1):
                    logger.info("Skip <%s>: purchase date is %s", po.id,
                                po.purchase_date)
                    continue
                logger.info("Posting a purchase order %s", po.id)
                try:
                    vendor.post_purchase_order(po)
                    posted_ops_count = len([
                        op for op in po.order_products
                        if op.status == OrderProductStatus.purchased
                    ])
                    if posted_ops_count == len(po.order_products):
                        po.status = PurchaseOrderStatus.posted
                        po.when_changed = po.when_posted = datetime.now()
                    elif posted_ops_count > 0:
                        po.status = PurchaseOrderStatus.partially_posted
                        po.when_changed = po.when_posted = datetime.now()
                        failed_order_products = [
                            po for po in po.order_products
                            if po.status != OrderProductStatus.purchased
                        ]
                        po.status_details = "Not posted products:\n" + \
                            '\n'.join(map(
                                lambda fop: f"{fop.product_id}: {fop.product.name}",
                                failed_order_products))
                    else:
                        po.status = PurchaseOrderStatus.failed
                        po.when_changed = datetime.now()
                        logger.warning(
                            "Purchase order %s posting went successfully but no products were ordered",
                            po.id)
                    logger.info("Posted a purchase order %s", po.id)
                except (PurchaseOrderError, AtomyLoginError) as ex:
                    logger.warning("Failed to post the purchase order %s.",
                                   po.id)
                    logger.warning(ex)
                    po.status = PurchaseOrderStatus.failed
                    po.status_details = str(ex)
                    po.when_changed = datetime.now()
                db.session.commit()
        logger.info('Done posting purchase orders')
    except Exception as ex:
        for po in pending_purchase_orders:
            po.status = PurchaseOrderStatus.failed
        db.session.commit()
        raise ex