Beispiel #1
0
def parse_ag_opinion(html: Response) -> OpinionParseResult:
    summary = first(html,
                    css=".page-top__subtitle--re p::text",
                    expected="summary")
    title = first(html,
                  css="h1.page-top__title--opinion::text",
                  expected="title")
    date = first(html, css="time::text", expected="date")
    full_text = pipe(
        all(html, ".body-content p::text"),
        map(normalize_whitespace),
        join("\n"),
    )
    citation_set = pipe(
        re.findall(r"\d+-\d+-\d+(?:\([-().A-Za-z0-9]*[-A-Za-z0-9]\))?",
                   full_text),
        set,
        sorted,
        CitationSet,
    )

    return OpinionParseResult(
        summary=summary,
        title=title,
        is_official=title.startswith("Official"),
        date=opinion_date_to_iso8601(date),
        full_text=full_text,
        source_url=html.url,
        citations=citation_set,
    )
def test_preprocess():
    test_im_fn = '1.jpg'
    test_im = PIL.Image.open(test_im_fn)
    pipeline = (
        preproc_resize,    # 2
        preproc_grayscale, # 3
        preproc_rescale,   # 4
        #preproc_normalise, # 5
        preproc_smooth,    # 6
        preproc_thresh,    # 7
    )
    this_pipeline = []
    for i, p in enumerate(pipeline):
        this_pipeline.append(p)
        pipe(test_im, *this_pipeline).save('{}.jpg'.format(i+2))
def top_model_crps_id(model):
    """
    Gets the Id of the corpus created by the top version of the model
    type supplied.
    """
    return pipe(model, find_best_fit_model_corpus_id,
                tlz.curried.get_in(['id']))
 async def delete(self, id_):
     self.items, has_changed = pipe(
         id_,
         lambda key: dissoc(self.items, key),
         lambda new: (new, len(self.items) != len(new)),
     )
     return has_changed
Beispiel #5
0
def add_to_data_frame(df):
  df['lat'], df['long'] = zip(
      *df['Locations'].map(lambda x: f.pipe(
        x
        , query_gmap_geocode
        , gmap_query_result_to_latlng)))
  return df
Beispiel #6
0
def top_model_kmeans_clstrs(model, n_clusters=8, **kwargs):
    """
    Fits a kmeans model on the requested corpus (uses mini batch kmeans).
    returns the cluster labels.
    """
    return pipe(model, top_model_corpus_df,
                kmeans_clusters(n_clusters=n_clusters, **kwargs))
def test_reform_combination():
    assert pipe(
        france_data_tax_benefit_system,
        plfr2014,
        plf2015,
        plf2016,
        ayrault_muet,
        )
def preproc_thresh(image):
    """Threshhold image."""
    pipeline = (
        pil_2_numpy,
        lambda i: i > skimage.filters.threshold_otsu(i),
        #lambda i: i > THRESH * 255,
        numpy_2_pil,
    )
    return pipe(image, *pipeline)
def main():
    img = cv2.imread('images/crane1.png')
    merged, lines = pipe(img, binarize, get_lines, cluster_lines,
                         cluster_coordinates)
    #cluster y axis
    cluster_axis(merged, 1)
    #cluster x axis
    cluster_axis(merged, 0)
    save_img(merged, lines, "final.png", img)
def get_reviews(x):
    return pipe(x, 
        lambda x: or_pipe(x, 
            _.find_elements_by_class_name("reviewText"),
            _.find_elements_by_class_name("review-text"),
            default=[],
        ),
        map(lambda x: x.text)
    )
Beispiel #11
0
def preproc_thresh(image):
    """Threshhold image."""
    pipeline = (
        pil_2_numpy,
        lambda i: i > skimage.filters.threshold_otsu(i),
        #lambda i: i > THRESH * 255,
        numpy_2_pil,
    )
    return pipe(image, *pipeline)
Beispiel #12
0
 def _clean_text(self, aspirate_result_text: str) -> str:
     return pipe(
         aspirate_result_text,
         _remove_signature,
         _remove_inline_space,
         _remove_end_space,
         _remove_reporting_system,
         _replace_add,
     )
Beispiel #13
0
def task_scheduler2() -> NoReturn:
    s = sched.scheduler(time.time, time.sleep)
    balancesheet = rim_db.get_ts_statement('balancesheet')
    code_year = balancesheet.index
    completion_of_code_year = set(
        zip(code_year.get_level_values(0), code_year.get_level_values(1)))

    code_year_set = pipe(
        ts.pro_api().stock_basic(exchange='',
                                 list_status='L',
                                 fields='ts_code'),
        lambda x: [t.ts_code for t in x.itertuples()],  # 枚举当前可用的公司代码
        lambda x: set(product(x, [f"{y}1231" for y in range(2017, 2020)]))
    )  # 构造 tuple (code, year)

    undo_code_year_set = code_year_set - completion_of_code_year

    tasks = pipe(
        undo_code_year_set,
        lambda x: zip(count(), x),
        lambda x: groupby(x, key=lambda y: y[0] // 36),  # 分组,便于限流
        lambda x: [
            s.enter(
                i * 30,
                1,
                download_and_save_statement,  # 每分钟安排36个下载任务
                kwargs={'code_year_lst': [j for j in jobs]}) for i, jobs in x
        ])

    # c = list(code_year_set)
    #
    # today = datetime.datetime.now()
    # today = today.strftime("%Y-%m-%d")
    # financial_indicators = rim_db.get_financial_indicator(today)
    # code_set = financial_indicators.index
    # done_code_year_set = set(zip(code_set.get_level_values(0), code_set.get_level_values(1)))
    #
    # to_do_set = code_year_set - done_code_year_set
    # to_do_with_index = zip(to_do_set, count())
    #
    # job_groups = groupby(to_do_with_index, key=lambda x: x[1]//36)      # 每30秒查询-保存36条记录
    # for i, jobs in job_groups:
    #     s.enter(i * 30, 1, save_ts_indicator_to_db, kwargs={'code_year_lst': [j for j in jobs]})
    s.run()
Beispiel #14
0
def preprocess(image):
    pipeline = (
        preproc_resize,
        preproc_grayscale,
        preproc_rescale,
        #preproc_normalise,
        preproc_smooth,
        preproc_thresh,
    )
    return pipe(image, *pipeline)
def predictions_most_frequent(label, entry):
    data_keys = label['data_keys']
    data_fn = juxt(map(op.itemgetter, data_keys))

    histogram = pipe(entry['predictions'], partial(groupby, data_fn),
                     partial(valmap, len), _ordered_dict_sorted_by_value)

    return map(lambda values: {k: v
                               for k, v in zip(data_keys, values)},
               histogram.keys())
 async def create(self, dto: CreateTodoItemDto):
     self.items, new_item = pipe(
         self.items.keys(),
         last,
         lambda key: key + 1,
         lambda new_key: TodoItem(
             id=new_key, msg=dto.msg, is_done=dto.is_done),
         lambda item: (assoc(self.items, item.id, item), item),
     )
     return new_item
def preprocess(image):
    pipeline = (
        preproc_resize,
        preproc_grayscale,
        preproc_rescale,
        #preproc_normalise,
        preproc_smooth,
        preproc_thresh,
    )
    return pipe(image, *pipeline)
def kmeans_clstrs_with_corpus(corpusdf, **kwargs):
    """
    Not needed.

    Returns corpus dataframe with kmeans clusters added as a column.
    """
    return pipe(corpusdf,
                copy.deepcopy,
                lambda crps: (crps, kmeans_clusters(crps, **kwargs)),
                lambda args: args[0].assign(clusters=args[1])
                )
def go_review_page(rank):
    def test(x):
        print x.tag_name
        return x

    return lambda x: pipe(x,
        find_review_anchers,
        _[rank],
        test,
        _.click(),
    )
Beispiel #20
0
    def find_block_devs(self, folder):
        # Map of major_minor to path
        # Should be able to look at the paths prop for all devs, and put
        # matching MM to path back in a list.

        def build_paths(x):
            return [(x['major_minor'], path) for path in x['paths']
                    if path.startswith(folder)]

        return pipe(self.block_device_nodes.itervalues(), cmapcat(build_paths),
                    dict)
Beispiel #21
0
 def _validate_predictor_params(self, params):
     errors = None
     schema = self._get_predictor_schema()
     try:
         schema(params)
     except (Invalid, MultipleInvalid) as exc:
         errors = exc
         print("Error validing predictor params: %s" % exc)
     # Filter all hyper params that don't start with '_'
     hypers = pipe(params, self._keep_hyper_params, self._coerce_hypers)
     return hypers, errors
def kmeans_clusters(corpusdf, n_clusters=8, random_state=1, n_init=100, **kwargs):
    """
    Fits a kmeans model on the supplied corpus (uses mini batch kmeans).
    returns the cluster labels.
    """
    mkmeans_m = MiniBatchKMeans(n_clusters=n_clusters,
                                random_state=random_state,
                                n_init=n_init)
    return pipe(mkmeans_m,
                lambda mdl: mdl.fit(corpusdf),
                lambda mdl: mdl.labels_)
Beispiel #23
0
    def __compileRegex(self):
        key_pattern = dict()
        for key, value in self.revars.items():
            r = self.__preparevalue(key[1:], value)
            key_pattern.update(r)

        piped = pipe(
            self.regex, *[
                callOnObject('replace', f'${key}', rf'(?P<{key}>{pattern})', 1)
                for key, pattern in key_pattern.items()
            ])
        self.compiled = re.compile(piped)
Beispiel #24
0
def preprocess_skimage(image):
    return pipe(image,
                color.rgb2gray,
                #partial(restoration.denoise_bilateral, multichannel=False),
                #exposure.equalize_hist,
                partial(exposure.adjust_gamma, gamma=0.75),
                #lambda img: img > filters.threshold_local(img, block_size=11, method='mean'),
                #lambda img: img > filters.threshold_otsu(img),
                #lambda img: img > filters.threshold_local(img, block_size=7),
                #skutil.img_as_int,
                partial(transform.pyramid_expand, upscale=2)
                )
Beispiel #25
0
 def id_check(author):
     """
     checks ID type and calls appropriate function
     """
     id_type = author['authorID_Type']
     id_value = author['authorID']
     funcs = {'ORCID': _orcid, 'GND': _ytc, 'Scopus': _ytc, 'WoS': _ytc,
             'Repec': _ytc}
     if id_type and id_value:
         return pipe(author, funcs[id_type], url_check, error_check)
     if id_value and not id_type:
         author.update({error_key: u'Please provide type of author ID'})
     return error_check(author)
Beispiel #26
0
    def paths_to_major_minors(self, device_paths):
        """
        Create a list of device major minors for a list of
        device paths from _path_to_major_minor dict.
        If any of the paths come back as None, continue to the next.

        :param device_paths: The list of paths to get
            the list of major minors for.
        :return: list of dev_major_minors, or an empty
            list if any device_path is not found.
        """

        return pipe(device_paths, cmap(self.path_to_major_minor),
                    cfilter(None), list)
Beispiel #27
0
def main():
    syntaxes_dir_path = project_dir_path / "syntaxes"
    src_json = syntaxes_dir_path / "markdown.tmLanguage.json"
    dst_json = syntaxes_dir_path / "rmarkdown.tmLanguage.json"
    addition_json = syntaxes_dir_path / "addition.json"

    content = json.loads(src_json.read_text())
    additions = json.loads(addition_json.read_text())

    # content = conv_chunkparser(content)
    # add_new_with_context = lambda content: add_new(content, additions)
    content = pipe(content, conv_chunkparser)  # , add_new_with_context)

    dst_json.write_text(json.dumps(content))
 async def update(self, dto: UpdateTodoItemDto, id_: int):
     item = get_in([id_], self.items)
     if not item:
         return None
     self.items, new_item = pipe(
         (item, dto),
         lambda items: {
             **items[0].dict(),
             **items[1].dict(exclude_defaults=True)
         },
         lambda data: TodoItem(**data),
         lambda todo: (assoc(self.items, id_, todo), todo),
     )
     return new_item
def process_game_data(df: pd.DataFrame) -> pd.DataFrame:

    result = pipe(
        df,
        rename_features,
        remove_forfeits,
        add_features,
        drop_features,
        clean_team_names,
        create_team_indices,
        remove_duplicate_games,
    )

    return result
def save_reviews(product_id, tag, reviews):
    f = open("data/{}.{}.csv".format(product_id, tag), "w")
    for review in reviews:
        print review
        f.write("{},{}\n".format(
            tag, 
            pipe(
                review, 
                _.split("\n"),
                map(_.strip()),
                map(_.encode("utf-8")),
                SF(" ".join)(_)
            )
        ))
    f.close()
Beispiel #31
0
def paths_to_major_minors(node_block_devices, ndt, device_paths):
    """
    Create a list of device major minors for a list of
    device paths from _path_to_major_minor dict.
    If any of the paths come back as None, continue to the next.

    :param node_block_devices: dict of major-minor ids keyed on path
    :param ndt: normalised device table
    :param device_paths: The list of paths to get
        the list of major minors for.
    :return: list of dev_major_minors, or an empty
        list if any device_path is not found.
    """
    c_path_to_major_minor = path_to_major_minor(node_block_devices, ndt)

    return pipe(device_paths, cmap(c_path_to_major_minor), cfilter(None), list)
Beispiel #32
0
    def _init_pipeline(self):
        # Get rid of URLs
        text_operations = [
            # Get rid of URLs
            remove_pattern('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'),
            # Take out retweet header, there is only one
            remove_pattern('RT @[a-z,A-Z]*: '),
            # Get rid of hashtags
            remove_pattern('#'),
            # Get rid of references to other screennames
            remove_pattern('@[a-z,A-Z]*'),
            # everything thats no a number or space
            remove_pattern('[^\w\s]'),
            lambda series: series.str.replace('\s\s+', ' '),
            # strip series entries
            lambda series: series.str.strip()
        ]

        self.text_pipeline = lambda data: pipe(data, *text_operations)
def parse_stratagem_results_to_influx(measurement, fs_name,
                                      stratagem_results_json):
    parse_fns = {
        "size_distribution":
        partial(parse_size_distribution, measurement, fs_name, labels),
        "user_distribution":
        partial(parse_user_distribution, measurement, fs_name),
    }

    group_counters = stratagem_results_json.get("group_counters")

    return pipe(
        [],
        partial(
            reduce,
            lambda out, cur: out + [(cur.get("name"), cur.get("counters"))],
            group_counters),
        partial(filter, lambda xs: xs[0] not in ["warn_fids", "purge_fids"]),
        partial(map, lambda xs, parse_fns=parse_fns: parse_fns[xs[0]](xs[1])),
        partial(flatten),
    )
Beispiel #34
0
    def run(self, args):
        host, mount_point, uuid, report_duration, purge_duration = args[
            "client_args"]

        if report_duration is None and purge_duration is None:
            return

        action_list = [(label, args) for (duration, label, args) in [
            (
                purge_duration,
                "action_purge_stratagem",
                (mount_point, "{}-{}".format(uuid, "purge_fids-fids_expired")),
            ),
            (
                report_duration,
                "action_warning_stratagem",
                (mount_point,
                 "{}-{}".format(uuid, "warn_fids-fids_expiring_soon")),
            ),
        ] if duration is not None]

        action_list = filter(
            lambda xs: path.exists("{}/{}".format(MAILBOX_PATH, xs[1][1])),
            action_list)

        file_location = pipe(
            action_list,
            partial(map,
                    lambda xs, host=host: self.invoke_rust_agent_expect_result(
                        host, xs[0], xs[1])),
            partial(filter, bool),
            iter,
            partial(flip, next, None),
        )

        if file_location:
            self.log(u"\u2713 Scan results sent to client under:\n{}".format(
                file_location))

        return file_location
def parse_size_distribution(measurement, fs_name, labels, counters):
    return pipe(
        counters,
        filter_out_other_counter,
        partial(
            map, lambda x: x.update(
                {"name": size_distribution_name_table[x.get("name").lower()]})
            or x),
        partial(
            map,
            lambda x: create_stratagem_influx_point(
                measurement,
                [
                    ("group_name", "size_distribution"),
                    ("counter_name", x.get("name")),
                    ("label", labels.get(x.get("name"))),
                    ("fs_name", fs_name),
                ],
                [("count", x.get("count")), ("size", x.get("size"))],
            ),
        ),
    )
Beispiel #36
0
def test_pipe():
    assert pipe(1, inc) == 2
    assert pipe(1, inc, inc) == 3
    assert pipe(1, double, inc, iseven) is False
Beispiel #37
0
 def __call__(self, d):
     return functoolz.pipe(d, *self.filters)