Example #1
0
def request_status(scan_id):
    request = scan_db.get(scan_id)

    def _dirty(system_name):
        return market_db.get(("dirty", system_name), default=None)

    dirty = groupby(_dirty, request["system_names"])
    system_completion = {
        ("Partial" if k is True else "Complete" if k is False else "Pending"):
        len(v)
        for (k, v) in dirty.items()
    }

    num_systems = len(request["system_names"])
    partial_or_pending = (system_completion.get("Partial", 0) +
                          system_completion.get("Pending", 0))

    system_completion_percent = 100 * (
        (num_systems - partial_or_pending) / num_systems)

    def _shell_status(item):
        return AsyncResult(item["task_id"]).state

    shell_completion = groupby(_shell_status, request["tasks"].values())

    return {
        "scan_id": request["scan_id"],
        "location": request["location"],
        "radius": request["radius"],
        "system_completion_percent": system_completion_percent,
        "system_completion": system_completion,
        "system_names": request["system_names"],
        "tasks": shell_completion,
        "unfinished_shells": dissoc(shell_completion, "SUCCESS"),
    }
Example #2
0
    def _fuzzy_filter(self, text, candidates, metric=fuzz.ratio):
        """
        :param text: str
        :param candidates: List[TrieEntry]
        :param metric: (str, str) -> Numeric
        :return: List[TrieEntry]
        """
        # similar = groupby(lambda entry: metric(entry.sf, text), candidates)  # group by val of metric
        # Calculate a metric
        measured = [(metric(entry.sf, text), entry) for entry in candidates]
        # Group by the same uri
        similar = groupby(lambda entry: entry[1].uri,
                          measured)  # uri: (m, entry)
        # In each group of same matches leave only the one with the highest match-metric
        similar = [max(sames, key=first) for sames in similar.values()]
        # Sort by the metric
        best_matches = sorted(similar, key=first, reverse=True)
        # Filter bad matches
        best_matches = [
            entry for m, entry in best_matches
            if m >= self._metric_threshold * 100
        ]

        # Some more checks on the best matches if there're several matches
        if len(best_matches) > 1:
            # best_matches = [max(best_matches, key=lambda entry: metric(raw_d(raw(entry.uri)), text))]
            best_matches = groupby(
                lambda entry: metric(raw_d(raw(entry.uri)), text),
                best_matches)
            best_matches = best_matches[max(best_matches)]
        return best_matches
def parse_instance(content, outdir):
    categories = {d['id']: d['name'] for d in content['categories']}
    # merge images and annotations: id in images vs image_id in annotations
    merged_info_list = list(
        map(
            cytoolz.merge,
            cytoolz.join('id', content['images'], 'image_id',
                         content['annotations'])))
    # convert category id to name
    for instance in merged_info_list:
        instance['category_id'] = categories[instance['category_id']]
    # group by filename to pool all bbox in same file
    for name, groups in cytoolz.groupby('file_name', merged_info_list).items():
        multiple = groups[0]['url'].split('/')[-4]
        if multiple != 'multiple':
            continue
        subfolder = groups[0]['url'].split('/')[-2]
        folder = groups[0]['url'].split('/')[-3]
        if not os.path.exists(os.path.join(outdir, folder)):
            os.mkdir(os.path.join(outdir, folder))
        if not os.path.exists(os.path.join(outdir, folder, subfolder)):
            os.mkdir(os.path.join(outdir, folder, subfolder))

        anno_tree = instance2xml_base(groups[0])
        # if one file have multiple different objects, save it in each category sub-directory
        filenames = []
        for group in groups:
            filenames.append(
                os.path.join(outdir, folder, subfolder,
                             os.path.splitext(name)[0] + ".xml"))
            anno_tree.append(instance2xml_bbox(group, bbox_type='xyxy'))
        for filename in filenames:
            etree.ElementTree(anno_tree).write(filename, pretty_print=True)
        print("Formating instance xml file {} done!".format(name))
Example #4
0
def compute(t, lhs, rhs):
    """ Join Operation for Python Streaming Backend

    Note that a pure streaming Join is challenging/impossible because any row
    in one seq might connect to any row in the other, requiring simultaneous
    complete access.

    As a result this approach compromises and fully realizes the LEFT sequence
    while allowing the RIGHT sequence to stream.  As a result

    Always put your bigger table on the RIGHT side of the Join.
    """
    lhs = compute(t.lhs, lhs)
    rhs = compute(t.rhs, rhs)

    on_left = rowfunc(t.lhs[t.on_left])
    on_right = rowfunc(t.rhs[t.on_right])

    right_columns = list(range(len(t.rhs.columns)))
    for col in listpack(t.on_right):
        right_columns.remove(t.rhs.columns.index(col))

    get_right = lambda x: type(x)(get(right_columns, x))

    lhs_dict = groupby(on_left, lhs)

    for row in rhs:
        try:
            key = on_right(row)
            matches = lhs_dict[key]
            for match in matches:
                yield match + get_right(row)
        except KeyError:
            pass
Example #5
0
def prepare_audio_grouped(audio_paths: List[Pathlike], ) -> RecordingSet:
    import soundfile as sf

    # Group together multiple channels from the same session.
    # We will use that to create a Recording with multiple sources (channels).
    from cytoolz import groupby

    channel_wavs = groupby(lambda p: p.parts[-3], audio_paths)

    recordings = []
    for session_name, channel_paths in channel_wavs.items():
        audio_sf = sf.SoundFile(str(channel_paths[0]))

        recordings.append(
            Recording(
                id=session_name,
                sources=[
                    AudioSource(type="file",
                                channels=[idx],
                                source=str(audio_path))
                    for idx, audio_path in enumerate(sorted(channel_paths))
                ],
                sampling_rate=audio_sf.samplerate,
                num_samples=audio_sf.frames,
                duration=audio_sf.frames / audio_sf.samplerate,
            ))
    return RecordingSet.from_recordings(recordings)
Example #6
0
    def _index_by_recording_id_and_cache(self):
        if self._features_by_recording_id is None:
            from cytoolz import groupby

            self._features_by_recording_id = groupby(
                lambda feat: feat.recording_id, self)
        return self._features_by_recording_id
Example #7
0
def create_annotations(dbpath, subset, dst):
    '''

    :param dbpath:   root path of coco dataset
    :param subset:  'train' or  'val'
    :param dst:     where to save transfered result
    :return:
    '''
    annotations_path = dbpath + '/annotations_trainval2014/annotations/instances_{}2014.json'.format(
        subset)
    images_path = dbpath + '/images/{}2014'.format(subset)
    categories, instances = get_instances(annotations_path)

    if not os.path.exists(dst):
        os.makedirs(dst)

    for i, instance in enumerate(instances):
        instances[i]['category_id'] = categories[instance['category_id']]

    for name, group in iteritems(groupby('file_name', instances)):
        print("image_path is %s , name is %s " % (images_path, name))
        img = imread(images_path + "/" + name)
        if img.ndim == 3:
            annotation = root(images_path, name, group[0]['height'],
                              group[0]['width'])
            for instance in group:
                annotation.append(instance_to_xml(instance))
            etree.ElementTree(annotation).write(
                dst + '/{}.xml'.format(name.split(".")[0]))
            print(name)
        else:
            print(instance['file_name'])
Example #8
0
def create_annotations(dbpath, subset, dst):
    annotations_path = path(
        dbpath).expand() / 'annotations/instances_{}2014.json'.format(subset)
    #images_path = path(dbpath).expand() / 'images/{}2014'.format(subset)
    images_path = path(dbpath).expand() / '{}2014'.format(
        subset)  #clw note:这里图片直接放在2014val下,前面没有images
    #         另外subset这里一般是val或者train,见文件名
    categories, instances = get_instances(annotations_path)
    dst = path(dst).expand()

    for i, instance in enumerate(instances):
        instances[i]['category_id'] = categories[instance['category_id']]

    for name, group in iteritems(groupby('file_name', instances)):
        img = imread(images_path / name)
        if img.ndim == 3:
            out_name = rename(name)
            annotation = root('VOC2014', '{}.jpg'.format(out_name),
                              group[0]['height'], group[0]['width'])
            for instance in group:
                annotation.append(instance_to_xml(instance))
            etree.ElementTree(annotation).write(dst /
                                                '{}.xml'.format(out_name))
            #print out_name
            print(out_name)  #clw modify: 看来之前是py2.7
        else:
            #print instance['file_name']
            print(instance['file_name'])  #clw modify: 看来之前是py2.7
Example #9
0
 def highest_td_peer(self) -> BasePeer:
     peers = tuple(self.connected_nodes.values())
     if not peers:
         raise NoConnectedPeers()
     peers_by_td = groupby(operator.attrgetter('head_td'), peers)
     max_td = max(peers_by_td.keys())
     return random.choice(peers_by_td[max_td])
Example #10
0
def change_annotations(data_path, subset, destination_path):
    if not os.path.exists(data_path):
        raise FileNotFoundError("{} path is not exist".format(data_path))

    os.makedirs(destination_path, exist_ok=True)

    annotation_path = os.path.join(
        data_path, "annotations/instances_{}2014.json".format(subset))
    image_path = os.path.join(data_path, "images/{}2014".format(subset))

    if not os.path.exists(annotation_path):
        raise FileNotFoundError(
            "{} annotation is not exist".format(annotation_path))

    if not os.path.exists(image_path):
        raise FileNotFoundError("{} image is not exist".format(image_path))

    categories, instances = create_annotation_instance(annotation_path)

    for i, instance in enumerate(instances):
        instances[i]["category_id"] = categories[instance["category_id"]]

    for name, group in tqdm(iteritems(groupby("file_name", instances)),
                            desc="Create annotation xml files"):
        out_name = name.split(".")[-2]
        img = imread(os.path.join(image_path, name))

        if img.ndim == 3:
            annotation = xml_root("{}.jpg".format(out_name),
                                  group[0]["height"], group[0]["width"])

            for instance in group:
                annotation.append(instance_to_xml(instance))
            etree.ElementTree(annotation).write(
                os.path.join(destination_path, "{}.xml".format(out_name)))
Example #11
0
def _get_sub_overrides_by_prop(
        overrides: Dict[str, Any]) -> Iterator[Tuple[str, Dict[str, List[str]]]]:
    # we only want the overrides that are not top level.
    sub_overrides = _get_sub_overrides(overrides)
    key_groups = groupby(_extract_top_level_key, sub_overrides.keys())
    for top_level_key, props in key_groups.items():
        yield top_level_key, {_extract_tail_key(prop): overrides[prop] for prop in props}
    def X_feature_onehot(self, dataframe: DataFrame) -> DataFrame:
        # fieldgroups[basename] = [ fieldname ]
        # noinspection PyArgumentList
        fieldgroups = groupby(
            curry(re.sub)('\d+(st|nd|rd)?$')(''),  # basename
            self.params['X_feature_onehot']  # fieldnames
        )
        encodings = {}
        for basename, fieldnames in fieldgroups.items():
            # NOTE: in theory, unique_values should be hardcoded based on data_description.txt
            #       for Kaggle, we can cheat and just take unique_values from self.data['combined']
            # BUGFIX: running to_X() separately on test/train/validate datasets results in column name mismatches
            unique_values = np.unique(
                self.data['combined'][fieldnames].dropna().values)
            category_dtype = CategoricalDtype(categories=unique_values)

            for fieldname in fieldnames:
                dataframe[fieldname] = dataframe[fieldname].astype(
                    category_dtype)
                onehot = pd.get_dummies(dataframe[fieldname],
                                        prefix=basename,
                                        prefix_sep='_')
                if not basename in encodings: encodings[basename] = onehot
                else:
                    encodings[basename] = onehot & encodings[
                        basename]  # Bitwise addition

        # Add additional onehot columns to dataframe
        for basename, onehot in encodings.items():
            dataframe = dataframe.join(onehot)

        # Mark original categorical columns for exclusion
        self.params['X_feature_exclude'] += self.params['X_feature_onehot']
        return dataframe
Example #13
0
def parse_keypoints(content, outdir):
    keypoints = dict(
        zip(range(1,
                  len(content['categories'][0]['keypoints']) + 1),
            content['categories'][0]['keypoints']))
    # merge images and annotations: id in images vs image_id in annotations
    merged_info_list = map(
        cytoolz.merge,
        cytoolz.join('id', content['images'], 'image_id',
                     content['annotations']))
    # convert category name to person
    for keypoint in merged_info_list:
        keypoint['category_id'] = "person"
    # group by filename to pool all bbox and keypoint in same file
    for name, groups in cytoolz.groupby('file_name', merged_info_list).items():
        filename = os.path.join(outdir, os.path.splitext(name)[0] + ".xml")
        anno_tree = keypoints2xml_base(groups[0])
        for group in groups:
            anno_tree = keypoints2xml_object(group,
                                             anno_tree,
                                             keypoints,
                                             bbox_type="xyxy")
        doc = etree.ElementTree(anno_tree)
        doc.write(open(filename, "w"), pretty_print=True)
        print("Formating keypoints xml file {} done!".format(name))
Example #14
0
def batch_stitch_stack(file_dict, output, stitch_order=None,
                       channel_order=[0, 1, 2], target_bit_depth=8,
                       compress=1, **kwargs):
    """Run snail stitch and concatenate the channels across a set of images.

    This function takes the (plate, well) dictionary built using the
    ``make_key2file`` function. Images are grouped according to their channel,
    stitched together and stacked into a single 3-channel image. Images
    are re-scaled and saved to a user specified output directory. Images
    are saved to directories according to their plate number.

    Parameters
    ----------
    file_dict : dict { tuple (plate, well) : list of strings }
        The dictionary mapping the (plate, well) tuple to a list of image
        files. This dictionary is built using the ``make_key2file`` function.
    output : string
        The directory to output the stitched and concatenated images to.
    stitch_order : array of int, shape (M, N)
        The order of the stitching.
        Passed to "stitch_order" argument of `snail_stitch`.
    channel_order : list of int
        The order the channels should be in in the final image.
        Passed to "channel_order" argument of `stack_channels`.
    target_bit_depth : int in {8, 16}, optional
        If None, perform no rescaling. Otherwise, rescale to occupy
        the dynamic range of the target bit depth.
    compress : int in [0, 9], optional
        Compression level for saved images. 0 = no compression,
        1 = fast compression, 9 = maximum compression, slowest.
    **kwargs : dict
        Keyword arguments to be passed to
        `microscopium.preprocess.stretchlim`
    """
    for fns in list(file_dict.values()):
        sem = cellomics_semantic_filename(fns[0])
        plate = str(sem['plate'])
        new_fn = '-'.join([sem['prefix'], plate, sem['well']])
        new_fn = '.'.join([new_fn, sem['suffix']])

        channels = groupby(get_channel, fns)
        while len(channels) < 3:
            channels[np.max(list(channels.keys())) + 1] = None

        images = []
        for channel, fns in sorted(channels.items()):
            if fns is None:
                images.append(None)
            else:
                image = snail_stitch(fns, stitch_order)
                image = rescale_from_12bit(image, target_bit_depth, **kwargs)
                images.append(image)

        stack_image = stack_channels(images, channel_order)

        out_dir = os.path.join(output, plate)
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)
        mio.imsave(os.path.join(out_dir, new_fn), stack_image,
                   compress=compress)
Example #15
0
def batch_stitch_stack(file_dict, output, stitch_order=None,
                       channel_order=[0, 1, 2], target_bit_depth=8,
                       compress=1, **kwargs):
    """Run snail stitch and concatenate the channels across a set of images.

    This function takes the (plate, well) dictionary built using the
    ``make_key2file`` function. Images are grouped according to their channel,
    stitched together and stacked into a single 3-channel image. Images
    are re-scaled and saved to a user specified output directory. Images
    are saved to directories according to their plate number.

    Parameters
    ----------
    file_dict : dict { tuple (plate, well) : list of strings }
        The dictionary mapping the (plate, well) tuple to a list of image
        files. This dictionary is built using the ``make_key2file`` function.
    output : string
        The directory to output the stitched and concatenated images to.
    stitch_order : array of int, shape (M, N)
        The order of the stitching.
        Passed to "stitch_order" argument of `snail_stitch`.
    channel_order : list of int
        The order the channels should be in in the final image.
        Passed to "channel_order" argument of `stack_channels`.
    target_bit_depth : int in {8, 16}, optional
        If None, perform no rescaling. Otherwise, rescale to occupy
        the dynamic range of the target bit depth.
    compress : int in [0, 9], optional
        Compression level for saved images. 0 = no compression,
        1 = fast compression, 9 = maximum compression, slowest.
    **kwargs : dict
        Keyword arguments to be passed to
        `microscopium.preprocess.stretchlim`
    """
    for fns in list(file_dict.values()):
        sem = cellomics_semantic_filename(fns[0])
        plate = str(sem['plate'])
        new_fn = '-'.join([sem['prefix'], plate, sem['well']])
        new_fn = '.'.join([new_fn, sem['suffix']])

        channels = groupby(get_channel, fns)
        while len(channels) < 3:
            channels[np.max(list(channels.keys())) + 1] = None

        images = []
        for channel, fns in sorted(channels.items()):
            if fns is None:
                images.append(None)
            else:
                image = snail_stitch(fns, stitch_order)
                image = rescale_from_12bit(image, target_bit_depth, **kwargs)
                images.append(image)

        stack_image = stack_channels(images, channel_order)

        out_dir = os.path.join(output, plate)
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)
        mio.imsave(os.path.join(out_dir, new_fn), stack_image,
                   compress=compress)
def create_annotations(dbpath, subset, dst):
    first_part = path(dbpath).expand()
    last_part = 'annotations/instances_{}2014.json'.format(subset)
    annotations_path = first_part / last_part
    images_path = first_part / 'images/{}2014'.format(subset)
    categories, instances = get_instances(annotations_path)
    dst = path(dst).expand()

    for i, instance in enumerate(instances):
        instances[i]['category_id'] = categories[instance['category_id']]

    for name, group in iteritems(groupby('file_name', instances)):
        img = imread(images_path / name)
        if img.ndim == 3:
            out_name = rename(name)
            annotation = root('VOC2014', '{}.jpg'.format(out_name),
                              group[0]['height'], group[0]['width'])
            for instance in group:
                annotation.append(instance_to_xml(instance))
            etree.ElementTree(annotation).write(
                dst / '{}.xml'.format(out_name)
            )
            print(out_name)
        else:
            print(instance['file_name'])
Example #17
0
def parse_instance(content, outdir):
    categories = {d['id']: d['name'] for d in content['categories']}
    # merge images and annotations: id in images vs image_id in annotations
    merged_info_list = map(
        cytoolz.merge,
        cytoolz.join('id', content['images'], 'image_id',
                     content['annotations']))
    # convert category id to name
    for instance in merged_info_list:
        instance['category_id'] = categories[instance['category_id']]
    # group by filename to pool all bbox in same file
    for name, groups in cytoolz.groupby('file_name', merged_info_list).items():
        anno_tree = instance2xml_base(groups[0])
        # if one file have multiple different objects, save it in each category sub-directory
        filenames = []
        for group in groups:
            if group[u'iscrowd'] == 0:
                filenames.append(
                    os.path.join(outdir, re.sub(" ", "_",
                                                group['category_id']),
                                 os.path.splitext(name)[0] + ".xml"))
                anno_tree.append(instance2xml_bbox(group, bbox_type='xyxy'))
        for filename in filenames:
            etree.ElementTree(anno_tree).write(filename, pretty_print=True)
        print "Formating instance xml file {} done!".format(name)
Example #18
0
    def validate_uncles(self, block: BaseBlock) -> None:
        """
        Validate the uncles for the given block.
        """
        # Check for duplicates
        uncle_groups = groupby(operator.attrgetter('hash'), block.uncles)
        duplicate_uncles = tuple(sorted(
            hash for hash, twins in uncle_groups.items() if len(twins) > 1
        ))
        if duplicate_uncles:
            raise ValidationError(
                "Block contains duplicate uncles:\n"
                " - {0}".format(' - '.join(duplicate_uncles))
            )

        recent_ancestors = tuple(
            ancestor
            for ancestor
            in self.get_ancestors(MAX_UNCLE_DEPTH + 1, header=block.header)
        )
        recent_ancestor_hashes = {ancestor.hash for ancestor in recent_ancestors}
        recent_uncle_hashes = _extract_uncle_hashes(recent_ancestors)

        for uncle in block.uncles:
            if uncle.hash == block.hash:
                raise ValidationError("Uncle has same hash as block")

            # ensure the uncle has not already been included.
            if uncle.hash in recent_uncle_hashes:
                raise ValidationError(
                    "Duplicate uncle: {0}".format(encode_hex(uncle.hash))
                )

            # ensure that the uncle is not one of the canonical chain blocks.
            if uncle.hash in recent_ancestor_hashes:
                raise ValidationError(
                    "Uncle {0} cannot be an ancestor of {1}".format(
                        encode_hex(uncle.hash), encode_hex(block.hash)))

            # ensure that the uncle was built off of one of the canonical chain
            # blocks.
            if uncle.parent_hash not in recent_ancestor_hashes or (
               uncle.parent_hash == block.header.parent_hash):
                raise ValidationError(
                    "Uncle's parent {0} is not an ancestor of {1}".format(
                        encode_hex(uncle.parent_hash), encode_hex(block.hash)))

            # Now perform VM level validation of the uncle
            self.validate_seal(uncle)

            try:
                uncle_parent = self.get_block_header_by_hash(uncle.parent_hash)
            except HeaderNotFound:
                raise ValidationError(
                    "Uncle ancestor not found: {0}".format(uncle.parent_hash)
                )

            uncle_vm_class = self.get_vm_class_for_block_number(uncle.block_number)
            uncle_vm_class.validate_uncle(block, uncle, uncle_parent)
Example #19
0
def parse_instance(content, outdir):
    categories = {d['id']: d['name'] for d in content['categories']}
    # merge images and annotations: id in images vs image_id in annotations
    merged_info_list = list(map(cytoolz.merge, cytoolz.join('id', content['images'], 'image_id', content['annotations'])))

    filtered_info_list = []

    # convert category id to name && get target object info
    for instance in merged_info_list:
        cat_name = categories[instance['category_id']]
        filepath = os.path.join(dataDir, instance['file_name'])
        if cat_name in target_classes:

            # 过滤对于voc不合格的照片
            origimg = Image.open(filepath)
            if len(np.asarray(origimg).shape) != 3:
                continue
            instance['category_id'] = cat_name
            filtered_info_list.append(instance)

    # ##  控制每个类别的数量
    # target_image_list = []
    # for img_info in filtered_info_list:
    #     if img_info['category_id'] == 'bicycle':
    #         target_image_list.append(img_info)
    #     elif len(target_image_list) < total_num:
    #         target_image_list.append(img_info)


    # group by filename to pool all bbox in same file
    target_images = []
    for name, groups in cytoolz.groupby('file_name', filtered_info_list).items():
        anno_tree = instance2xml_base(groups[0])
        # if one file have multiple different objects, save it in each category sub-directory
        filenames = []
        for group in groups:
            # filenames.append(os.path.join(outdir, re.sub(" ", "_", group['category_id']),
            #                               'annotations', os.path.splitext(name)[0] + ".xml"))

            filenames.append(os.path.join(outdir, 'annotations', os.path.splitext(name)[0] + ".xml"))
            anno_tree.append(instance2xml_bbox(group, bbox_type='xyxy'))
        for filename in filenames:
            etree.ElementTree(anno_tree).write(filename, pretty_print=True)

        print("Formating instance xml file {} done!".format(name))

        # copy target image file to outdir
        if name not in target_images:
            img_path = os.path.join(dataDir, name)
            # target_dir = os.path.join(output_dir, re.sub(" ", "_", group['category_id']), 'images', name)
            target_dir = os.path.join(output_dir, 'images', name)

            shutil.copyfile(img_path, target_dir)
            target_images.append(name)

        # if len(target_images) > total_num:
        #     break

    print(len(target_images))
Example #20
0
 def replace_key(self, key: KeyId, replace_with: Signature) -> 'Signatures':
     """Return a new object with the matching keys replaced."""
     matches: Dict[bool,
                   List[Signature]] = groupby(lambda sig: sig.keyid == key,
                                              self.sigs)
     return Signatures(
         list(concat([[replace_with],
                      matches.get(False, [])])))
Example #21
0
def process(workbook: Any, content: str) -> None:
    """Process Hosts (3Par) worksheet

    :param workbook:
    :param content:
    """
    worksheet = workbook.get_sheet_by_name('Hosts')

    headers = list(concat([
        get_parser_header(SHOWHOST_TMPL),
        get_parser_header(SHOWHOST_LINES_TMPL)[4:],
    ]))

    RowTuple = namedtuple('RowTuple', headers)  # pylint: disable=invalid-name

    build_header(worksheet, headers)

    show_hosts_out = groupby(
        itemgetter(0, 1, 2, 3, 4), run_parser_over(content, SHOWHOST_TMPL))
    show_hosts_lines_out = groupby(
        itemgetter(0, 1, 2, 3), run_parser_over(content, SHOWHOST_LINES_TMPL))

    rows = []
    for idfier in show_hosts_out:
        with suppress(KeyError):
            for host_line, details_line in \
                    zip(show_hosts_out[idfier], show_hosts_lines_out[idfier[:-1]]):
                rows.append(host_line + details_line[4:])

    final_col, final_row = 0, 0
    for row_n, row_tuple in enumerate(map(RowTuple._make, rows), 2):
        for col_n, col_value in \
                enumerate(row_tuple._asdict().values(), ord('A')):
            cell = worksheet['{}{}'.format(column_format(col_n), row_n)]
            cell.value = str.strip(col_value)
            style_value_cell(cell)
            set_cell_to_number(cell)
            final_col = col_n
        final_row = row_n

    sheet_process_output(
        worksheet,
        'HostsTable',
        'Hosts',
        final_col,
        final_row)
def group_data(ctx):
    grouper = lambda x: 'defaults' if x['sday'] == '0001-01-01' and x[
        'eday'] == '0001-01-01' else 'data'
    groups = groupby(grouper, ctx['data'])
    return merge(ctx, {
        'data': get('data', groups, []),
        'defaults': get('defaults', groups, [])
    })
Example #23
0
 def is_valid_connection_candidate(self, candidate: Node) -> bool:
     # connect to no more then 2 nodes with the same IP
     nodes_by_ip = groupby(
         operator.attrgetter('remote.address.ip'),
         self.connected_nodes.values(),
     )
     matching_ip_nodes = nodes_by_ip.get(candidate.address.ip, [])
     return len(matching_ip_nodes) <= 2
Example #24
0
def broadcast_dimensions(argpairs,
                         numblocks,
                         sentinels=(1, (1, )),
                         consolidate=None):
    """ Find block dimensions from arguments

    Parameters
    ----------
    argpairs: iterable
        name, ijk index pairs
    numblocks: dict
        maps {name: number of blocks}
    sentinels: iterable (optional)
        values for singleton dimensions
    consolidate: func (optional)
        use this to reduce each set of common blocks into a smaller set

    Examples
    --------
    >>> argpairs = [('x', 'ij'), ('y', 'ji')]
    >>> numblocks = {'x': (2, 3), 'y': (3, 2)}
    >>> broadcast_dimensions(argpairs, numblocks)
    {'i': 2, 'j': 3}

    Supports numpy broadcasting rules

    >>> argpairs = [('x', 'ij'), ('y', 'ij')]
    >>> numblocks = {'x': (2, 1), 'y': (1, 3)}
    >>> broadcast_dimensions(argpairs, numblocks)
    {'i': 2, 'j': 3}

    Works in other contexts too

    >>> argpairs = [('x', 'ij'), ('y', 'ij')]
    >>> d = {'x': ('Hello', 1), 'y': (1, (2, 3))}
    >>> broadcast_dimensions(argpairs, d)
    {'i': 'Hello', 'j': (2, 3)}
    """
    # List like [('i', 2), ('j', 1), ('i', 1), ('j', 2)]
    argpairs2 = [(a, ind) for a, ind in argpairs if ind is not None]
    L = toolz.concat([
        zip(inds, dims) for (x, inds), (x, dims) in toolz.join(
            toolz.first, argpairs2, toolz.first, numblocks.items())
    ])

    g = toolz.groupby(0, L)
    g = dict((k, set([d for i, d in v])) for k, v in g.items())

    g2 = dict(
        (k, v - set(sentinels) if len(v) > 1 else v) for k, v in g.items())

    if consolidate:
        return toolz.valmap(consolidate, g2)

    if g2 and not set(map(len, g2.values())) == set([1]):
        raise ValueError("Shapes do not align %s" % g)

    return toolz.valmap(toolz.first, g2)
Example #25
0
def partition(grouper, sequence, npartitions, p, nelements=2**20):
    """ Partition a bag along a grouper, store partitions on disk """
    for block in partition_all(nelements, sequence):
        d = groupby(grouper, block)
        d2 = defaultdict(list)
        for k, v in d.items():
            d2[abs(hash(k)) % npartitions].extend(v)
        p.append(d2)
    return p
Example #26
0
def partition(grouper, sequence, npartitions, p, nelements=2**20):
    """ Partition a bag along a grouper, store partitions on disk """
    for block in partition_all(nelements, sequence):
        d = groupby(grouper, block)
        d2 = defaultdict(list)
        for k, v in d.items():
            d2[abs(hash(k)) % npartitions].extend(v)
        p.append(d2)
    return p
Example #27
0
 def find(self, key: KeyId) -> Optional[Signature]:
     """Return the first matching key if available."""
     matches: Dict[bool,
                   List[Signature]] = groupby(lambda sig: sig.keyid == key,
                                              self.sigs)
     try:
         return matches[True][0]
     except KeyError:
         return None
Example #28
0
def getrecursive(dict_, keys):
    if not any(keys):
        return dict_
    head_to_tails = valmap(
        lambda l: [t[1:] for t in l], groupby(itemgetter(0), filter(len, keys))
    )
    return {
        head: getrecursive(dict_[head], tails) for head, tails in head_to_tails.items()
    }
Example #29
0
def _get_sub_overrides_by_prop(overrides):
    # we only want the overrides that are not top level.
    sub_overrides = _get_sub_overrides(overrides)
    key_groups = groupby(_extract_top_level_key, sub_overrides.keys())
    for top_level_key, props in key_groups.items():
        yield top_level_key, {
            _extract_tail_key(prop): overrides[prop]
            for prop in props
        }
Example #30
0
def left_join1(lseq, rseq, key):
    key_fn = operator.itemgetter(*key)
    lr = [
        cytoolz.merge(_)
        for _ in cytoolz.groupby(key_fn, lseq + rseq).values()
    ]
    return (sorted(list(
        filter(lambda d: key_fn(d) in [key_fn(l) for l in lseq], lr)),
                   key=key_fn))
Example #31
0
 async def multi_set(self, triplets):
     # pylint: disable=no-member
     grouped_by_ttl = cytoolz.groupby(lambda t: t[2], triplets)
     futures = []
     for cache in self._caches:
         for ttl, ttl_group in grouped_by_ttl.items():
             pairs = [t[:2] for t in ttl_group]
             futures.append(
                 asyncio.ensure_future(cache.multi_set(pairs, ttl=ttl)))
     return await asyncio.gather(*futures)
Example #32
0
def create_annotations(coco_annotation, dst='annotations_voc'):

    os.makedirs(dst, exist_ok=True)

    categories, instances = get_instances(coco_annotation)
    '''
        About categories: Dictionary where the keys are the categories IDs and the values are tha categories names.
        About instances: Tuple of dictionaries containing information of the annotations and its respective images.
        NOTE: There is one instance for every annotation, not image.
    '''

    dst = os.path.abspath(dst)
    '''
        Modifying the category ID to show an string instead of a number.
        The string corresponds to the name of the category.
    '''
    for i, instance in tqdm(enumerate(instances), desc="rewriting categories"):
        instances[i]['category_id'] = categories[instance['category_id']]

    for name, group in tqdm(iteritems(groupby('file_name', instances)),
                            total=len(groupby('file_name', instances)),
                            desc="processing annotations"):
        '''
            About name: the image path
            About group: the image informations
        '''

        img = imread(os.path.abspath(name))
        if img.ndim == 3:
            out_name = rename(name)
            image_folder, image_name = os.path.split(out_name)
            annotation = root(image_folder, '{}.jpg'.format(image_name),
                              group[0]['height'], group[0]['width'])
            for instance in group:
                annotation.append(instance_to_xml(instance))

            # Exporting XML to destination folder
            destination_file = "{}.xml".format(out_name)
            _, destination_file = os.path.split(destination_file)
            xml_file = etree.ElementTree(annotation)
            xml_file.write(os.path.join(dst, destination_file))
Example #33
0
def increment_rt_counts(tweet_pks):
    """
    :param tweet_pks: dictionary {tweet_pk: rt_count}
    :return:
    """
    items = sorted(tweet_pks.items(), key=lambda x: x[1], reverse=True)
    grouped = groupby(lambda x: x[1], items)

    for incr, pairs in grouped.items():
        if incr > 0:
            pks = pluck(0, pairs)
            TweetFeatures.objects.filter(tweet_id__in=pks).update(count_rts=F('count_rts') + incr)
Example #34
0
def broadcast_dimensions(argpairs, numblocks, sentinels=(1, (1,)),
                         consolidate=None):
    """ Find block dimensions from arguments

    Parameters
    ----------
    argpairs: iterable
        name, ijk index pairs
    numblocks: dict
        maps {name: number of blocks}
    sentinels: iterable (optional)
        values for singleton dimensions
    consolidate: func (optional)
        use this to reduce each set of common blocks into a smaller set

    Examples
    --------
    >>> argpairs = [('x', 'ij'), ('y', 'ji')]
    >>> numblocks = {'x': (2, 3), 'y': (3, 2)}
    >>> broadcast_dimensions(argpairs, numblocks)
    {'i': 2, 'j': 3}

    Supports numpy broadcasting rules

    >>> argpairs = [('x', 'ij'), ('y', 'ij')]
    >>> numblocks = {'x': (2, 1), 'y': (1, 3)}
    >>> broadcast_dimensions(argpairs, numblocks)
    {'i': 2, 'j': 3}

    Works in other contexts too

    >>> argpairs = [('x', 'ij'), ('y', 'ij')]
    >>> d = {'x': ('Hello', 1), 'y': (1, (2, 3))}
    >>> broadcast_dimensions(argpairs, d)
    {'i': 'Hello', 'j': (2, 3)}
    """
    # List like [('i', 2), ('j', 1), ('i', 1), ('j', 2)]
    argpairs2 = [(a, ind) for a, ind in argpairs if ind is not None]
    L = toolz.concat([zip(inds, dims) for (x, inds), (x, dims)
                     in toolz.join(toolz.first, argpairs2, toolz.first, numblocks.items())])

    g = toolz.groupby(0, L)
    g = dict((k, set([d for i, d in v])) for k, v in g.items())

    g2 = dict((k, v - set(sentinels) if len(v) > 1 else v) for k, v in g.items())

    if consolidate:
        return toolz.valmap(consolidate, g2)

    if g2 and not set(map(len, g2.values())) == set([1]):
        raise ValueError("Shapes do not align %s" % g)

    return toolz.valmap(toolz.first, g2)
Example #35
0
def collate_discovery_messages(encoded_blobs):
    all_messages = tuple(map(decode_discovery_message, encoded_blobs))
    messages_by_type = groupby(type, all_messages)

    ping_blobs = tuple(rlp.encode(msg) for msg in messages_by_type[RLPPing])
    pong_blobs = tuple(rlp.encode(msg) for msg in messages_by_type[RLPPong])
    find_node_blobs = tuple(
        rlp.encode(msg) for msg in messages_by_type[RLPFindNode])
    neighbours_blobs = tuple(
        rlp.encode(msg) for msg in messages_by_type[RLPNeighbours])

    return ping_blobs, pong_blobs, find_node_blobs, neighbours_blobs
Example #36
0
def unique_mentions_per_word(mentions, field):
    """Count of unique mentions per previous/next-word
    Parameters:
        mentions, list: a list of Mention objects
        field, string : can be one of `('previous_word', 'next_word')`
    Returns:
        a dictionary with words as keys and counts as values
    """
    d = defaultdict(int)
    groups = cytoolz.groupby(lambda x: x[field], mentions)
    for k, g in groups.iteritems():
        d[k] = count(unique(g, lambda x: x.text))

    return d
def parse_keypoints(content, outdir):
    keypoints = dict(zip(range(1, len(content['categories'][0]['keypoints'])+1), content['categories'][0]['keypoints']))
    # merge images and annotations: id in images vs image_id in annotations
    merged_info_list = map(cytoolz.merge, cytoolz.join('id', content['images'], 'image_id', content['annotations']))
    # convert category name to person
    for keypoint in merged_info_list:
        keypoint['category_id'] = "person"
    # group by filename to pool all bbox and keypoint in same file
    for name, groups in cytoolz.groupby('file_name', merged_info_list).items():
        filename = os.path.join(outdir, os.path.splitext(name)[0]+".xml")
        anno_tree = keypoints2xml_base(groups[0])
        for group in groups:
            anno_tree = keypoints2xml_object(group, anno_tree, keypoints, bbox_type="xyxy")
        doc = etree.ElementTree(anno_tree)
        doc.write(open(filename, "w"), pretty_print=True)
        print "Formating keypoints xml file {} done!".format(name)
Example #38
0
File: core.py Project: OspreyX/dask
    def extend_chunk(self, seq):
        self._open_files()
        grouper = self.grouper
        npart = self.npartitions
        groups = groupby(grouper, seq)

        # Unify groups that hash the same
        groups2 = dict()
        for k, v in groups.items():
            key = hash(k) % self.npartitions
            if key not in groups2:
                groups2[key] = []
            groups2[key].extend(v)

        # Store to disk
        for k, group in groups2.items():
            if group:
                self.dump(group, self.files[k])
def parse_instance(content, outdir):
    categories = {d['id']: d['name'] for d in content['categories']}
    # merge images and annotations: id in images vs image_id in annotations
    merged_info_list = map(cytoolz.merge, cytoolz.join('id', content['images'], 'image_id', content['annotations']))
    # convert category id to name
    for instance in merged_info_list:
        instance['category_id'] = categories[instance['category_id']]
    # group by filename to pool all bbox in same file
    for name, groups in cytoolz.groupby('file_name', merged_info_list).items():
        anno_tree = instance2xml_base(groups[0])
        # if one file have multiple different objects, save it in each category sub-directory
        filenames = []
        for group in groups:
            filenames.append(os.path.join(outdir, re.sub(" ", "_", group['category_id']),
                                    os.path.splitext(name)[0] + ".xml"))
            anno_tree.append(instance2xml_bbox(group, bbox_type='xyxy'))
        for filename in filenames:
            etree.ElementTree(anno_tree).write(filename, pretty_print=True)
        print "Formating instance xml file {} done!".format(name)
Example #40
0
def make_key2file(fns):
    """Return a dictionary mapping well co-ordinates to filenames.

    Returns a dictionary where key are (plate, well) co-ordinates and
    values are lists of images corresponding to that plate and well.

    Parameters
    ----------
    fns : list of string
        A list of Cellomics TIF files.

    Returns
    -------
    wellchannel2file : dict {tuple : list of string}
        The dictionary mapping the (plate, well) co-ordinate to
        a list of files corresponding to that well.
    """
    wellchannel2file = groupby(filename2coord, fns)
    return wellchannel2file
Example #41
0
def get_interactions():
    dates = sorted(set(map(_g('date'), data['interactions'])))
    d = t.pipe(data['interactions'],
               tc.groupby(lambda i: i.student),
               tc.valmap(lambda x: t.pipe(t.groupby(lambda i: i.date,x),
                                          tc.valmap(lambda v: [v[0].time_in, v[0].time_out]))))

    mat = [['student'] + dates]
    for student, attendance in d.items():
        record = [student]
        for dt in dates:
            if dt in attendance:
                record.append(attendance[dt])
            elif dt in data['students'][student].absences:
                record.append(('',''))
            else:
                record.append((None,None))
        mat.append(record)

    return {'interactions': mat}
Example #42
0
def compute_up(t, seq, **kwargs):
    if ((isinstance(t.apply, Reduction) and type(t.apply) in binops) or
        (isinstance(t.apply, Summary) and builtins.all(type(val) in binops
                                                for val in t.apply.values))):
        grouper, binop, combiner, initial = reduce_by_funcs(t)
        d = reduceby(grouper, binop, seq, initial)
    else:
        grouper = rrowfunc(t.grouper, t._child)
        groups = groupby(grouper, seq)
        d = dict((k, compute(t.apply, {t._child: v})) for k, v in groups.items())

    if isscalar(t.grouper.dshape.measure):
        keyfunc = lambda x: (x,)
    else:
        keyfunc = identity
    if isscalar(t.apply.dshape.measure):
        valfunc = lambda x: (x,)
    else:
        valfunc = identity
    return tuple(keyfunc(k) + valfunc(v) for k, v in d.items())
Example #43
0
def compute_one(t, seq, **kwargs):
    grouper = rrowfunc(t.grouper, t.child)
    if (isinstance(t.apply, Reduction) and
        type(t.apply) in binops):

        binop, initial = binops[type(t.apply)]
        applier = rrowfunc(t.apply.child, t.child)

        def binop2(acc, x):
            return binop(acc, applier(x))

        d = reduceby(grouper, binop2, seq, initial)
    else:
        groups = groupby(grouper, seq)
        d = dict((k, compute(t.apply, {t.child: v})) for k, v in groups.items())

    if t.grouper.iscolumn:
        return d.items()
    else:
        return tuple(k + (v,) for k, v in d.items())
Example #44
0
def compute(t, seq):
    parent = compute(t.parent, seq)

    if (isinstance(t.apply, Reduction) and
        type(t.apply) in binops):

        binop, initial = binops[type(t.apply)]
        applier = rowfunc(t.apply.parent)
        grouper = rowfunc(t.grouper)

        def binop2(acc, x):
            return binop(acc, applier(x))

        d = reduceby(grouper, binop2, parent, initial)
    else:
        grouper = rowfunc(t.grouper)
        groups = groupby(grouper, parent)
        d = dict((k, compute(t.apply, v)) for k, v in groups.items())

    if t.grouper.iscolumn:
        return d.items()
    else:
        return tuple(k + (v,) for k, v in d.items())
Example #45
0
def collect(grouper, group, p, barrier_token):
    """ Collect partitions from disk and yield k,v group pairs """
    d = groupby(grouper, p.get(group, lock=False))
    return list(d.items())
Example #46
0
def _group_clusters(docs, labels):
    """Group docs by their cluster labels."""
    return [zip(*cluster)[1]
            for cluster in itervalues(toolz.groupby(operator.itemgetter(0),
                                                    zip(labels, docs)))]
Example #47
0
ct = bquery.ctable(z, rootdir=rootdir, )
print(ct)

# -- pandas --
df = pd.DataFrame(z)
with ctime(message='pandas'):
    result = df.groupby(['f0'])['f2'].sum()
print(result)
t_pandas = t_elapsed

# -- cytoolz --
with ctime(message='cytoolz over bcolz'):
    # In Memory Split-Apply-Combine
    # http://toolz.readthedocs.org/en/latest/streaming-analytics.html?highlight=reduce#split-apply-combine-with-groupby-and-reduceby
    r = cytoolz.groupby(lambda row: row.f0, ct)
    result = valmap(compose(sum, pluck(2)), r)
print('x{0} slower than pandas'.format(round(t_elapsed / t_pandas, 2)))
print(result)

# -- blaze + bcolz --
blaze_data = blz.Data(ct.rootdir)
expr = blz.by(blaze_data.f0, sum_f2=blaze_data.f2.sum())
with ctime(message='blaze over bcolz'):
    result = blz.compute(expr)
print('x{0} slower than pandas'.format(round(t_elapsed / t_pandas, 2)))
print(result)

# -- bquery --
with ctime(message='bquery over bcolz'):
    result = ct.groupby(['f0'], ['f2'])
Example #48
0
def _aggregate_miner_data(raw_data):
    data_by_miner = groupby(0, raw_data)

    for miner, miner_data in data_by_miner.items():
        _, block_hashes, gas_prices = map(set, zip(*miner_data))
        yield MinerData(miner, len(set(block_hashes)), min(gas_prices))
Example #49
0
 def groupby(self, key):
     return fdict(cytoolz.groupby(key, self)).valmap(flist)