Ejemplos de juxt en Python, ejemplos de toolz.juxt en Python

Ejemplo n.º 1

0

Mostrar archivo

def get_8yrs_mg_and_ms(
    code: str,
    getter: Callable[[str], pd.DataFrame] = rdb.get_financial_indicator_by_code
) -> Optional[dict]:
    """ 获取上市公司8年（或多年）的毛利增长率的几何平均数，并以dict返回。
    输入假设:
    code:
    列表，例如，(-0.02, [('2011', -0.1523), ('2012', 0.2447), ..., ('2018', 0.1009)])。注意，可能为None
    其中-0.02是增长率的几何平均数

    输出规定：
    {'mg': -0.02, 'years_mgr': [('2011', -0.1523), ('2012', 0.2447), ..., ('2018', 0.1009)],
     'ms': 2.87, 'years_mg': [('2011', 0.421819), ('2012', 0.466875), ..., ('2018', 0.206218)]}
    """
    print(code)
    result = pipe(
        getter(_convert_to_ts_code(code)), _get_yrs_gm, _get_last_9_years_fi,
        _sort_years_fi,
        juxt(compose(_calc_gmean_8yrs_mg, _calc_8yrs_mg),
             compose(_calc_years_ms, _get_8_years_gm)))
    if result[0] is not None:
        return {
            'mg': result[0][0],
            'years_mgr': result[0][1],
            'ms': result[1][0],
            'years_mg': result[1][1]
        }
    else:
        return None

Ejemplo n.º 2

0

Mostrar archivo

def main():
    # URL to rss.xml's
    URLS = []

    title_hashes = load_title_hashes()

    pipe(
        URLS,
        (
            say("Fetching {} feeds...", len),
            fetch_feeds(feed_fetcher),
            parse_feeds,
            say("Found {} articles", len),
            remove_duplicates,
            say("Of those, {} are unique", len),
            hash_titles,
            remove_seen(have_seen(title_hashes)),
            say("and {} are new", len),
            say("Fetching article bodies...", id),
            fetch_bodies(body_fetcher),
            classify(NB_classifier()),
            sort,
            mark_as_seen(title_hashes),
            format_as_text,
            toolz.juxt(print, sendmail([])),
        ),
    )

Ejemplo n.º 3

0

Mostrar archivo

Archivo: neo4j_query.py Proyecto: ashapochka/saapy

 def run(self, *queries: Neo4jAbstractQuery):
     results = juxt(*queries)(self.neo4j_client)
     if len(results) > 1:
         return results
     elif len(results) == 1:
         return results[0]
     else:
         return None

Ejemplo n.º 4

0

Mostrar archivo

Archivo: screenshots.py Proyecto: david-fischer/Anki_CardGen

def main(command_fn):

    with CD("acg"):
        from main import AnkiCardGenApp

        app = AnkiCardGenApp()
        app.on_start = toolz.juxt(app.on_start, command_fn)
        app.run()

Ejemplo n.º 5

0

Mostrar archivo

class Params():
    coordinate = lens['coordinate']

    coordinates = juxt([
        coordinate['latitude'].get(),
        coordinate['longitude'].get(),
        compose(constrain(0, 1), float, lens.Get('tolerance', '0.5').get()),
        compose(constrain(10, 19), int, lens.Get('zoom', 16).get())
    ])

    image_url = lens['image_url'].get()

Ejemplo n.º 6

0

Mostrar archivo

def check_lines(lines):
    errors = [
        msg for boolean, msg in juxt(
            not_correct_order,
            not_enough_tags,
        )(lines) if boolean
    ]

    if errors:
        return False, '\n'.join(errors)
    return True, ''

Ejemplo n.º 7

0

Mostrar archivo

Archivo: text2tokens.py Proyecto: steven-cutting/text2math

def uni_and_bigram_tuples(string, minlen=3, maxlen=25):
    return tlz.pipe(string,
                    lower,
                    simple_split,
                    filter_longer_than(maxlen),
                    tlz.compose(tlz.concat, map_c(splitter_of_words)),
                    filter_shorter_than(minlen),
                    filter_stopwords,
                    tuple,
                    tlz.juxt(sliding_window_c(1), sliding_window_c(2)),
                    tlz.interleave,
                    map_c(join_strings("_")))

Ejemplo n.º 8

0

Mostrar archivo

Archivo: core.py Proyecto: dheeraj-thedev/chanjo

def export_intervals(chanjo_db, include_header=True, bed_score=0):
    r"""Return BED-formatted interval lines from existing ``chanjo_db``.

  BED lines are ready to be printed or written to a file.

  Args:
    chanjo_db (session): ``sqlalchemy.orm.session`` object with a
      ``.query``-method
    include_header (bool, optional): whether to include BED header
    bed_score (int, optional): dummy score (0-1000) to insert at field 5
      to complete the BED format

  Yields:
    str: stringified and tab-delimited interval

  Examples:
    >>> from chanjo import export_intervals, Store
    ... # instantiate a new connection to a Chanjo database
    >>> db = Store('./coverage.sqlite3')
    >>> with open('intervals.sorted.bed', 'w') as stream:
    ...   # write intervals in BED-format with appropriate headers
    ...   for bed_line in export_intervals(db):
    ...     stream.write(bed_line + '\n')
  """
    if include_header:
        yield '#chrom\tchromStart\tchromEnd\tname\tscore\tstrand'

    # setup up which columns to fetch to make BED file
    # column 5 is just a silly default for the "score" field in BED
    i = Interval  # alias
    columns = (i.contig, i.start - 1, i.end, i.id, i.strand)

    # BED files are tab-delimited
    delimiter = '\t'

    # 1. fetch interval tuples from the database (producer)
    # 2. stringify each item in each subsequence (interval tuple)
    # 3. join lines on tab-character
    # 4. prepend the header
    bed_lines = pipe(
        fetch_records(chanjo_db, columns),
        map(map(str)),  # convert fields to strings
        map(
            juxt(
                compose(list, take(4)),  # keep first 4 fields
                lambda _: [str(bed_score)],  # insert BED score
                compose(list, last))),  # keep last field
        map(concat),  # flatten each item
        map(delimiter.join)  # join on \t
    )

    for bed_line in bed_lines:
        yield bed_line

Ejemplo n.º 9

0

Mostrar archivo

Archivo: core.py Proyecto: kern3020/chanjo

def export_intervals(chanjo_db, include_header=True, bed_score=0):
  r"""Return BED-formatted interval lines from existing ``chanjo_db``.

  BED lines are ready to be printed or written to a file.

  Args:
    chanjo_db (session): ``sqlalchemy.orm.session`` object with a
      ``.query``-method
    include_header (bool, optional): whether to include BED header
    bed_score (int, optional): dummy score (0-1000) to insert at field 5
      to complete the BED format

  Yields:
    str: stringified and tab-delimited interval

  Examples:
    >>> from chanjo import export_intervals, Store
    ... # instantiate a new connection to a Chanjo database
    >>> db = Store('./coverage.sqlite')
    >>> with open('intervals.sorted.bed', 'w') as stream:
    ...   # write intervals in BED-format with appropriate headers
    ...   for bed_line in export_intervals(db):
    ...     stream.write(bed_line + '\n')
  """
  if include_header:
    yield '#chrom\tchromStart\tchromEnd\tname\tscore\tstrand'

  # setup up which columns to fetch to make BED file
  # column 5 is just a silly default for the "score" field in BED
  i = Interval  # alias
  columns = (i.contig, i.start - 1, i.end, i.id, i.strand)

  # BED files are tab-delimited
  delimiter = '\t'

  # 1. fetch interval tuples from the database (producer)
  # 2. stringify each item in each subsequence (interval tuple)
  # 3. join lines on tab-character
  # 4. prepend the header
  bed_lines = pipe(
    fetch_records(chanjo_db, columns),
    map(map(str)),                        # convert fields to strings
    map(juxt(compose(list, take(4)),      # keep first 4 fields
             lambda _: [str(bed_score)],  # insert BED score
             compose(list, last))),       # keep last field
    map(concat),                          # flatten each item
    map(delimiter.join)                   # join on \t
  )

  for bed_line in bed_lines:
    yield bed_line

Ejemplo n.º 10

0

Mostrar archivo

Archivo: fitbit.py Proyecto: technillogue/qspy

def find_ranges(seq: Sequence[TS]) -> Sequence[Tuple[TS, TS]]:
    # consecutive dates have the same offset from their index
    ranges = pipe(
        seq,
        enumerate,  # -> Iterator[Tuple[int, TS]]
        # groupby will include the last consequetive ts
        groupby(offset_between_index_and_ts
                ),  # -> Dict[int, List[Tuple[int, TS]]]
        dict.values,  # -> Iterator[List[Tuple[int, TS]]]
        map(map(last)),  # -> Iterator[Iterator[TS]]
        map(list),  # -> Iterator[List[TS]]
        # the range stop is the day after the last date in the group
        map(juxt(first, compose(add(pd.Timedelta(days=1)), last))),
        # -> Iterator[Tuple[Ts, Ts]]
        list,  # -> List[Tuple[Ts,Ts]]
    )
    return ranges

Ejemplo n.º 11

0

Mostrar archivo

def run_filters(filters, inpath, outpath, file):
    logging.debug(f"Processing file {file}")
    os.makedirs(os.path.join(outpath, os.path.dirname(file)), exist_ok=True)
    with open(os.path.join(inpath, file), 'r') as fin:
        with open(os.path.join(outpath, file), 'w') as fout:
            for article_json in fin:
                article = json.loads(article_json)

                filter_results = juxt([f.filter for f in filters])(article)

                if all(filter_results):
                    logging.info(f'Including article "{article["title"]}"')
                    fout.writelines([article_json])
                else:
                    failed_filters = [
                        filters[i].name
                        for i, filter_ok in enumerate(filter_results)
                        if not filter_ok
                    ]
                    logging.info(
                        f'Excluding article "{article["title"]}". Filters [{", ".join(failed_filters)}] '
                        f'failed')

Ejemplo n.º 12

0

Mostrar archivo

Archivo: python.py Proyecto: jcrist/blaze

def rowfunc(t):
    children = [optimize(child, []) for child in t.children]
    funcs = [rrowfunc(_child, t._child) for _child in children]
    return compose(concat_maybe_tuples, juxt(*funcs))

Ejemplo n.º 13

0

Mostrar archivo

Archivo: optim.py Proyecto: pabloguarda/inverse-astar-search

def linear_program(graph: nx.DiGraph, paths: Paths) -> Result:
    cost = edge_cost(graph.number_of_edges())

    trips = list(set(map(trip, paths)))
    least_path_cost = min_path_cost(len(trips))
    gamma = cp.Parameter(nonneg=True)
    gamma.value = 0.0

    edge_index = {e: i for i, e in enumerate(graph.edges)}
    trip_index = {t: i for i, t in enumerate(trips)}

    edge_cost_of = get_by_node_pair(edge_index, cost)
    trip_cost_of = get_by_node_pair(trip_index, least_path_cost)

    out_edges_of_node = out_edges(successors(graph))  # Node -> Iterable[NodePair]

    min_trip_cost_from_path = compose(
        trip_cost_of,  # NodePair -> cp.Variable
        trip  # Path -> NodePair
    )  # Path -> cp.Variable

    compute_gaps = compose(
        starmap(path_cost_gap),
        map(juxt(min_trip_cost_from_path,
                 path_cost(edge_cost_of))
            )
    )  # Iterable[Path] -> Iterable[float]

    out_edge_cost_is_normalized = compose(
        edge_cost_is_normalized(edge_cost_of),
        out_edges_of_node,
    )  # Node -> Constraint

    constraints = []

    constraints.extend(map(out_edge_cost_is_normalized, graph.nodes))

    constraints.extend(map(gap_is_optimal, compute_gaps(paths)))

    other_paths = concat(map(suboptimal_paths(graph.nodes, trip_indexed_paths(paths)),
                             trips))
    other_paths = list(other_paths)
    suboptimal_gaps = compute_gaps(other_paths)
    constraints.extend(map(gap_is_suboptimal, suboptimal_gaps))

    out_edge_entropy = compose(
        edges_entropy(edge_cost_of),  # Iterable[NodePair] -> cp.Expression
        out_edges_of_node,  # Node -> Iterable[NodePair]
    )  # Node -> cp.Expression

    total_out_edge_entropy = compose(
        sum,  # Iterable[cp.Expression] -> cp.Expression
        map(out_edge_entropy) # Iterable[Node] -> Iterable[cp.Expression]
    )  # Iterable[Node] -> cp.Expression

    objective = total_out_edge_entropy(graph.nodes) - cp.sum(least_path_cost)

    problem = cp.Problem(
        cp.Maximize(objective),
        constraints
    )

    return Result(
        problem=problem,
        penalty=gamma,  # TODO: remove unused
        edge_cost=cost,
        min_trip_cost=least_path_cost,
        edge_index=edge_index,
        trip_index=trip_index,
        discovered_paths=other_paths,
    )

Ejemplo n.º 14

0

Mostrar archivo

Archivo: python.py Proyecto: yihongfa/blaze

def rowfunc(t):
    children = [optimize(child, []) for child in t.children]
    funcs = [rrowfunc(_child, t._child) for _child in children]
    return compose(concat_maybe_tuples, juxt(*funcs))

Ejemplo n.º 15

0

Mostrar archivo

Archivo: engine.py Proyecto: litespeeddi2/zipline

    def compute_chunk(self, graph, dates, assets, initial_workspace):
        """
        Compute the Pipeline terms in the graph for the requested start and end
        dates.

        Parameters
        ----------
        graph : zipline.pipeline.graph.TermGraph
        dates : pd.DatetimeIndex
            Row labels for our root mask.
        assets : pd.Int64Index
            Column labels for our root mask.
        initial_workspace : dict
            Map from term -> output.
            Must contain at least entry for `self._root_mask_term` whose shape
            is `(len(dates), len(assets))`, but may contain additional
            pre-computed terms for testing or optimization purposes.

        Returns
        -------
        results : dict
            Dictionary mapping requested results to outputs.
        """
        self._validate_compute_chunk_params(dates, assets, initial_workspace)
        get_loader = self.get_loader

        # Copy the supplied initial workspace so we don't mutate it in place.
        workspace = initial_workspace.copy()

        # If loadable terms share the same loader and extra_rows, load them all
        # together.
        loader_group_key = juxt(get_loader, getitem(graph.extra_rows))
        loader_groups = groupby(loader_group_key, graph.loadable_terms)

        for term in graph.ordered():
            # `term` may have been supplied in `initial_workspace`, and in the
            # future we may pre-compute loadable terms coming from the same
            # dataset.  In either case, we will already have an entry for this
            # term, which we shouldn't re-compute.
            if term in workspace:
                continue

            # Asset labels are always the same, but date labels vary by how
            # many extra rows are needed.
            mask, mask_dates = self._mask_and_dates_for_term(
                term, workspace, graph, dates
            )

            if isinstance(term, LoadableTerm):
                to_load = sorted(
                    loader_groups[loader_group_key(term)],
                    key=lambda t: t.dataset
                )
                loader = get_loader(term)
                loaded = loader.load_adjusted_array(
                    to_load, mask_dates, assets, mask,
                )
                workspace.update(loaded)
            else:
                workspace[term] = term._compute(
                    self._inputs_for_term(term, workspace, graph),
                    mask_dates,
                    assets,
                    mask,
                )
                assert(workspace[term].shape == mask.shape)

        out = {}
        graph_extra_rows = graph.extra_rows
        for name, term in iteritems(graph.outputs):
            # Truncate off extra rows from outputs.
            out[name] = workspace[term][graph_extra_rows[term]:]
        return out

Ejemplo n.º 16

0

Mostrar archivo

Archivo: pipelines.py Proyecto: MasterGowen/open-discussions

)
xpro_courses_etl = compose(
    loaders.load_courses, xpro.transform_courses, xpro.extract_courses
)

mitx_etl = compose(
    loaders.load_courses,
    # take the first argument (the output of mitx.tranform)
    first,
    # duplicate the raw responses into two streams between our transformation code and the ocw/mitx manifest upload
    juxt(
        log_exceptions("Error tranforming MITx response", exc_return_value=[])(
            mitx.transform
        ),
        # for the sake of not touching OCW code, we've implementing this function here in discussions
        # it takes the concatenated raw results from MITx and uploads them as a json file to the OCW bucket
        # we'll probably do away with this at later date when we can easily move it into OCW
        log_exceptions("Error uploading MITx manifest to OCW")(
            ocw.upload_mitx_course_manifest
        ),
    ),
    log_exceptions("Error extracting MITx catalog", exc_return_value=[])(mitx.extract),
)

oll_etl = compose(loaders.load_courses, oll.transform, oll.extract)

see_etl = compose(loaders.load_courses, see.transform, see.extract)

mitpe_etl = compose(loaders.load_courses, mitpe.transform, mitpe.extract)

youtube_etl = compose(loaders.load_video_channels, youtube.transform, youtube.extract)

Ejemplo n.º 17

0

Mostrar archivo

Archivo: assessment.py Proyecto: sayin/glyph

 def closure(*args, **kwargs) -> tuple:
     m = toolz.compose(post, _tt_flatten,
                       toolz.juxt([tuple_wrap(f) for f in funcs]), pre)
     return m(*args, **kwargs)

Ejemplo n.º 18

0

Mostrar archivo

Archivo: Juxt.py Proyecto: chinobing/midProjects

from toolz import groupby, juxt
inc = lambda x: x + 1
double = lambda x: x * 2
exp = lambda x: x * x
a = juxt(inc, double, exp)(10)
b = juxt([inc, double, exp])(10)
c = 0

Ejemplo n.º 19

0

Mostrar archivo

from .models import WeatherUndergroundObservation, WeatherUndergroundObservationSchema  # NOQA

log = logging.getLogger(__name__)

get_observations = compose(  # extract observations from api
    extract_observations,  # get observations from payload
    get(0),  # drop the deserialization errors
    WeatherUndergroundAPIResponse().load,  # deserialize api response
    query_api  # query the api
)

collect_data = compose(  # create observation models from api response
    do(compose(log.info, "Created {} observations".format, len)),
    process_response,  # create observations models
    fapply(map),  # merge metadata into each observation
    juxt(process_metadata, get_observations)  # query params as metadata
)


def collect_many(api_key, on_dates, zipcodes, t):
    """Collect data over many dates and zipcodes

    :param api_key: str weather underground api key
    :param on_dates: list of dates
    :param zipcodes: list of zipcodes
    :param t: float delay between api calls
    :return: list of observations
    """
    collect_one = curry(collect_data)
    process = compose(flatten, map_sleep(t, fapply(collect_one(api_key))), zip)
    return process(on_dates, zipcodes)

Ejemplo n.º 20

0

Mostrar archivo

Archivo: engine.py Proyecto: uniwin/zipline

    def compute_chunk(self, graph, dates, assets, initial_workspace):
        """
        Compute the Pipeline terms in the graph for the requested start and end
        dates.

        Parameters
        ----------
        graph : zipline.pipeline.graph.TermGraph
        dates : pd.DatetimeIndex
            Row labels for our root mask.
        assets : pd.Int64Index
            Column labels for our root mask.
        initial_workspace : dict
            Map from term -> output.
            Must contain at least entry for `self._root_mask_term` whose shape
            is `(len(dates), len(assets))`, but may contain additional
            pre-computed terms for testing or optimization purposes.

        Returns
        -------
        results : dict
            Dictionary mapping requested results to outputs.
        """
        self._validate_compute_chunk_params(dates, assets, initial_workspace)
        get_loader = self.get_loader

        # Copy the supplied initial workspace so we don't mutate it in place.
        workspace = initial_workspace.copy()

        # If loadable terms share the same loader and extra_rows, load them all
        # together.
        loader_group_key = juxt(get_loader, getitem(graph.extra_rows))
        loader_groups = groupby(loader_group_key, graph.loadable_terms)

        for term in graph.ordered():
            # `term` may have been supplied in `initial_workspace`, and in the
            # future we may pre-compute loadable terms coming from the same
            # dataset.  In either case, we will already have an entry for this
            # term, which we shouldn't re-compute.
            if term in workspace:
                continue

            # Asset labels are always the same, but date labels vary by how
            # many extra rows are needed.
            mask, mask_dates = self._mask_and_dates_for_term(
                term, workspace, graph, dates)

            if isinstance(term, LoadableTerm):
                to_load = sorted(loader_groups[loader_group_key(term)],
                                 key=lambda t: t.dataset)
                loader = get_loader(term)
                loaded = loader.load_adjusted_array(
                    to_load,
                    mask_dates,
                    assets,
                    mask,
                )
                workspace.update(loaded)
            else:
                workspace[term] = term._compute(
                    self._inputs_for_term(term, workspace, graph),
                    mask_dates,
                    assets,
                    mask,
                )
                assert (workspace[term].shape == mask.shape)

        out = {}
        graph_extra_rows = graph.extra_rows
        for name, term in iteritems(graph.outputs):
            # Truncate off extra rows from outputs.
            out[name] = workspace[term][graph_extra_rows[term]:]
        return out

Ejemplo n.º 21

0

Mostrar archivo

def filter_titles_regex_whitelist(whitelist_regexes, article):
    return any(juxt([x.match for x in whitelist_regexes])(article['title']))

Ejemplo n.º 22

0

Mostrar archivo

Archivo: fitbit.py Proyecto: technillogue/qspy

    ts = start_ts_parsed_from_raw_data(raw_data)
    # the date might need to be corrected
    return (ts - pd.Timedelta(days=(0 if ts.hour > 18 else 1))).round("D")


ROOT_FIELDS = ["timeInBed", "minutesAsleep", "efficiency"]
LEVELS_FIELDS = ["wake", "light", "deep", "rem"]
HEADER = ["startTime"] + ROOT_FIELDS + LEVELS_FIELDS

parsed_levels = [
    compose(get("minutes"), level_getter, get("summary"), get("levels"))
    for level_getter in map(get(default={"minutes": 0}), LEVELS_FIELDS)
]
parsed_row = juxt(
    start_date_parsed_from_raw_data,
    start_ts_parsed_from_raw_data,
    *map(get, ROOT_FIELDS),
    *parsed_levels,
)
longest_sleep = curry(max)(key=get(2))


def parsed_data(raw_data: RawData) -> pd.DataFrame:
    parsed_rows: Iterator[Tuple] = map(parsed_row, raw_data)
    dates_sleeps: List[List[Tuple]] = groupby(first, parsed_rows).values()
    dates_longest_sleep: Iterator[Tuple] = map(longest_sleep, dates_sleeps)
    parsed_columns: List[Tuple] = list(zip(*dates_longest_sleep))
    dates = pd.DatetimeIndex(parsed_columns[0], name="dates")
    body: List[Tuple] = list(zip(*parsed_columns[1:]))
    if len(body[0]) != len(HEADER):
        breakpoint()
    data = pd.DataFrame(body, columns=HEADER, index=dates)

Ejemplo n.º 23

0

Mostrar archivo

Archivo: python.py Proyecto: Casolt/blaze

def rowfunc(t):
    funcs = [rrowfunc(_child, t._child) for _child in t.children]
    return compose(concat_maybe_tuples, juxt(*funcs))

Ejemplo n.º 24

0

Mostrar archivo

def rowfunc(t):
    funcs = [rrowfunc(_child, t._child) for _child in t.children]
    return compose(concat_maybe_tuples, juxt(*funcs))

Ejemplo n.º 25

0

Mostrar archivo

    def compute_chunk(self, graph, dates, assets, initial_workspace):
        """
        Compute the Pipeline terms in the graph for the requested start and end
        dates.

        Parameters
        ----------
        graph : zipline.pipeline.graph.TermGraph
        dates : pd.DatetimeIndex
            Row labels for our root mask.
        assets : pd.Int64Index
            Column labels for our root mask.
        initial_workspace : dict
            Map from term -> output.
            Must contain at least entry for `self._root_mask_term` whose shape
            is `(len(dates), len(assets))`, but may contain additional
            pre-computed terms for testing or optimization purposes.

        Returns
        -------
        results : dict
            Dictionary mapping requested results to outputs.
        """
        self._validate_compute_chunk_params(dates, assets, initial_workspace)
        get_loader = self.get_loader

        # Copy the supplied initial workspace so we don't mutate it in place.
        workspace = initial_workspace.copy()
        refcounts = graph.initial_refcounts(workspace)
        execution_order = graph.execution_order(refcounts)

        # If loadable terms share the same loader and extra_rows, load them all
        # together.
        loadable_terms = graph.loadable_terms
        loader_group_key = juxt(get_loader, getitem(graph.extra_rows))
        loader_groups = groupby(
            loader_group_key,
            # Only produce loader groups for the terms we expect to load.  This
            # ensures that we can run pipelines for graphs where we don't have
            # a loader registered for an atomic term if all the dependencies of
            # that term were supplied in the initial workspace.
            (t for t in execution_order if t in loadable_terms),
        )

        for term in graph.execution_order(refcounts):
            # `term` may have been supplied in `initial_workspace`, and in the
            # future we may pre-compute loadable terms coming from the same
            # dataset.  In either case, we will already have an entry for this
            # term, which we shouldn't re-compute.
            if term in workspace:
                continue

            # Asset labels are always the same, but date labels vary by how
            # many extra rows are needed.
            mask, mask_dates = graph.mask_and_dates_for_term(
                term,
                self._root_mask_term,
                workspace,
                dates,
            )

            if isinstance(term, LoadableTerm):
                to_load = sorted(
                    loader_groups[loader_group_key(term)],
                    key=lambda t: t.dataset
                )
                loader = get_loader(term)
                loaded = loader.load_adjusted_array(
                    to_load, mask_dates, assets, mask,
                )
                assert set(loaded) == set(to_load), (
                    'loader did not return an AdjustedArray for each column\n'
                    'expected: %r\n'
                    'got:      %r' % (sorted(to_load), sorted(loaded))
                )
                workspace.update(loaded)
            else:
                workspace[term] = term._compute(
                    self._inputs_for_term(term, workspace, graph),
                    mask_dates,
                    assets,
                    mask,
                )
                if term.ndim == 2:
                    assert workspace[term].shape == mask.shape
                else:
                    assert workspace[term].shape == (mask.shape[0], 1)

                # Decref dependencies of ``term``, and clear any terms whose
                # refcounts hit 0.
                for garbage_term in graph.decref_dependencies(term, refcounts):
                    del workspace[garbage_term]

        out = {}
        graph_extra_rows = graph.extra_rows
        for name, term in iteritems(graph.outputs):
            # Truncate off extra rows from outputs.
            out[name] = workspace[term][graph_extra_rows[term]:]
        return out

Ejemplo n.º 26

0

Mostrar archivo

Archivo: python.py Proyecto: B-Rich/blaze

def rowfunc(t):
    funcs = list(map(recursive_rowfunc, t.children))
    return compose(concat_maybe_tuples, juxt(*funcs))