Ejemplo n.º 1
0
def not_correct_order(lines):
    valid = pipe(
        all_indices(lines),
        complement(monotonic),
    )
    return (
        valid, 'Start and end tags are not in the correct order.'
    )
Ejemplo n.º 2
0
 def __repr__(self):
     self._tokens = pipe(
         self._tokens, filter(
             compose(complement(
                 lambda s: s.startswith('_ipython') or
                     s.startswith('_repr') if isinstance(s, str) else s,
             ), first, second,)
         ), list
     )
     return super().__repr__()
Ejemplo n.º 3
0
def annotate_bed_stream(bed_stream,
                        bam_path,
                        cutoff=10,
                        extension=0,
                        contig_prefix='',
                        bp_threshold=17000):
    """Annotate all intervals from a BED-file stream.

  Yields tuple data for each interval with calculated coverage and
  completeness.

  Args:
    bed_stream (sequence): usually a BED-file handle to read from
    bam_path (str): path to BAM-file
    cutoff (int, optional): threshold for completeness calculation,
      defaults to 10
    extension (int, optional): number of bases to extend each interval
      with (+/-), defaults to 0
    contig_prefix (str, optional): rename contigs by prefixing,
      defaults to empty string
    bp_threshold (int, optional): optimization threshold for reading
      BAM-file in chunks, default to 17000

  Yields:
    tuple: :class:`chanjo.BaseInterval`, coverage (float), and
      completeness (float)
  """
    # setup: connect to BAM-file
    bam = BamFile(bam_path)

    # the pipeline
    return pipe(
        bed_stream,
        filter(complement(comment_sniffer)),  # filter out comments
        map(text_type.rstrip),  # strip invisble chars.
        map(prefix(contig_prefix)),  # prefix to contig
        map(split(sep='\t')),  # split lines
        map(do(validate_bed_format)),  # check correct format
        map(lambda row: bed_to_interval(*row)),  # convert to objects
        map(extend_interval(extension=extension)),  # extend intervals
        group_intervals(bp_threshold=bp_threshold),  # group by threshold
        map(process_interval_group(bam)),  # read coverage
        concat,  # flatten list of lists
        map(calculate_metrics(threshold=cutoff))  # calculate cov./compl.
    )
Ejemplo n.º 4
0
def add_weight(answer: dict):
    def is_a_matching_question(answer):
        return pipe(
            [answer_keys.match_left, answer_keys.incorrect],
            map(lambda k: k in answer),
            any,
        )

    needs_weight = compose(
        any,
        juxt(complement(is_a_matching_question), ),
    )

    if needs_weight(answer):
        return assoc(answer, answer_keys.weight,
                     int(answer.get(answer_keys.weight, 0) and 100))

    return answer
Ejemplo n.º 5
0
Archivo: core.py Proyecto: dnil/chanjo
def annotate_bed_stream(bed_stream, bam_path, cutoff=10, extension=0,
                        contig_prefix='', bp_threshold=17000):
  """Annotate all intervals from a BED-file stream.

  Yields tuple data for each interval with calculated coverage and
  completeness.

  Args:
    bed_stream (sequence): usually a BED-file handle to read from
    bam_path (str): path to BAM-file
    cutoff (int, optional): threshold for completeness calculation,
      defaults to 10
    extension (int, optional): number of bases to extend each interval
      with (+/-), defaults to 0
    contig_prefix (str, optional): rename contigs by prefixing,
      defaults to empty string
    bp_threshold (int, optional): optimization threshold for reading
      BAM-file in chunks, default to 17000

  Yields:
    tuple: :class:`chanjo.BaseInterval`, coverage (float), and
      completeness (float)
  """
  # setup: connect to BAM-file
  bam = BamFile(bam_path)

  # the pipeline
  return pipe(
    bed_stream,
    filter(complement(comment_sniffer)),         # filter out comments
    map(text_type.rstrip),                       # strip invisble chars.
    map(prefix(contig_prefix)),                  # prefix to contig
    map(split(sep='\t')),                        # split lines
    map(do(validate_bed_format)),                # check correct format
    map(lambda row: bed_to_interval(*row)),      # convert to objects
    map(extend_interval(extension=extension)),   # extend intervals
    group_intervals(bp_threshold=bp_threshold),  # group by threshold
    map(process_interval_group(bam)),            # read coverage
    concat,                                      # flatten list of lists
    map(calculate_metrics(threshold=cutoff))     # calculate cov./compl.
  )
Ejemplo n.º 6
0
    def __init__(
        self,
        data=None,
        index=None,
        columns=None,
        estimator=None,
        parent=None,
        feature_level=None,
        copy=False,
        extensions=[
            'harness.python.ext.base.JinjaExtension',
            'harness.python.ext.SciKit.SciKitExtension',
            'harness.python.ext.Bokeh.BokehModelsExtension',
            'harness.python.ext.Bokeh.BokehPlottingExtension',
            'harness.python.ext.Bokeh.BokehChartsExtension'
        ],
    ):
        kwargs = dict(
            estimator=estimator,
            parent=parent,
            feature_level=feature_level,
            extensions=extensions,
        )

        self.set_params(**kwargs)

        for ext in self.extensions:
            if not ext in self.env.extensions:
                self.env.add_extension(ext)
            ext = self.env.extensions[ext]
            if (not (ext.mixin is None)
                    and not (ext.mixin in self.__class__.__bases__)):
                self.__class__.__bases__ += (ext.mixin, )

        kwargs = pipe(locals(),
                      keyfilter(partial(operator.contains, self._blacklist)),
                      valfilter(complement(lambda x: x is None)))

        super().__init__(**kwargs)
Ejemplo n.º 7
0
Archivo: core.py Proyecto: roryk/chanjo
def apply_bed_stream(bed_stream, bam_path, fn, extension=0,
                     contig_prefix='', bp_threshold=17000):
  """Maps a function to all intervals of a BED stream
  Args:
    bed_stream (sequence): usually a BED-file handle to read from
    bam_path (str): path to BAM-file
    fn: function that takes a list of intervals and read depths
      and computes a summary statistic over them. See
      annotator.stages.calculate_metrics for an example.
    cutoff (int, optional): threshold for completeness calculation,
      defaults to 10
    extension (int, optional): number of bases to extend each interval
      with (+/-), defaults to 0
    contig_prefix (str, optional): rename contigs by prefixing,
      defaults to empty string
    bp_threshold (int, optional): optimization threshold for reading
      BAM-file in chunks, default to 17000
  """
  # setup: connect to BAM-file
  bam = BamFile(bam_path)

  # the pipeline
  return pipe(
    bed_stream,
    filter(complement(comment_sniffer)),         # filter out comments
    map(text_type.rstrip),                       # strip invisble chars.
    map(prefix(contig_prefix)),                  # prefix to contig
    map(split(sep='\t')),                        # split lines
    map(do(validate_bed_format)),                # check correct format
    map(lambda row: bed_to_interval(*row)),      # convert to objects
    map(extend_interval(extension=extension)),   # extend intervals
    group_intervals(bp_threshold=bp_threshold),  # group by threshold
    map(process_interval_group(bam)),            # read coverage
    concat,                                      # flatten list of lists
    map(fn)                                      # map provided function
  )
Ejemplo n.º 8
0
 def _get_param_names(cls):
     """Ignore the parameters that are specific to the dataframe."""
     return pipe(
         super()._get_param_names(),
         filter(complement(partial(operator.contains, cls._blacklist))),
         list)
def split_paths(split_paths, graph_in):
    debug("____")
    debug("split_paths:", split_paths)
    debug("graph_in:", graph_in)

    # Convert list of split_paths into list of vertex indices. Ignores
    # split_paths which don"t match any vertices in the graph.
    # All edges pointing at the indices will be deleted from the graph.
    split_path_indices = list(unnest_iterable(map(
        split_path_spec_to_indices(graph_in),
        split_paths
    )))

    debug("split_path_indices:", split_path_indices)

    # Short circuit if there is nothing to do (split_paths didn"t match any
    # vertices in the graph).
    if len(split_path_indices) == 0:
        return {"rest": graph_in}

    # If graph has multiple roots, add a single one connecting all existing
    # roots to make it easy to split the graph into 2 sets of vertices after
    # deleting edges pointing at split_path_indices.
    fake_root_name = "__root__"
    graph, root_name = add_root(fake_root_name, graph_in)

    debug("root_name", root_name)

    if (
        find_vertex_by_name_or_none(graph)(root_name).index
        in split_path_indices
    ):
        return {"main": graph_in}

    # Copy graph if add_root has not already created a copy, since we are
    # going to mutate the graph and don"t want to mutate a function argument.
    graph = graph if graph is not graph_in else graph.copy()

    if DEBUG_PLOT:
        layout = graph.layout('tree')
        debug_plot(graph, layout=layout)

    # Get incidences of all vertices which can be reached split_path_indices
    # (including split_path_indices). This is a set of all split_paths and their
    # dependencies.
    split_off_vertex_indices = frozenset(
        subcomponent_multi(graph, split_path_indices))
    debug("split_off_vertex_indices", split_off_vertex_indices)

    # Delete edges which point at any of the vertices in split_path_indices.
    graph.delete_edges(_target_in=split_path_indices)

    if DEBUG_PLOT:
        debug_plot(graph, layout=layout)

    # Get incidences of all vertices which can be reached from the root. Since
    # edges pointing at split_path_indices have been deleted, none of the
    # split_path_indices will be included. Dependencies of rest_with_common will
    # only be included if they can be reached from any vertex which is itself
    # not in split_off_vertex_indices.
    rest_with_common = frozenset(graph.subcomponent(root_name, mode="out"))
    debug("rest_with_common", rest_with_common)

    # Get a set of all dependencies common to split_path_indices and the rest
    # of the graph.
    common = split_off_vertex_indices.intersection(rest_with_common)
    debug("common", common)

    # Get a set of vertices which cannot be reached from split_path_indices.
    rest_without_common = rest_with_common.difference(common)
    debug("rest_without_common", rest_without_common)

    # Get a set of split_path_indices and their dependencies which cannot be
    # reached from the rest of the graph.
    split_off_without_common = split_off_vertex_indices.difference(common)
    debug("split_off_without_common", split_off_without_common)

    if DEBUG_PLOT:
        def choose_color(index):
            if (index in split_off_without_common):
                return "green"
            elif (index in rest_without_common):
                return "red"
            else:
                return "purple"

        vertex_color = [choose_color(v.index) for v in graph.vs]

        debug_plot(
            graph,
            layout=layout,
            vertex_color=vertex_color
        )

    # Return subgraphs based on calculated sets of vertices.

    result_keys = ["main", "common", "rest"]
    result_values = [
        # Split paths and their deps (unreachable from rest of the graph).
        graph.induced_subgraph(split_off_without_common),
        # Dependencies of split paths which can be reached from the rest of the
        # graph.
        graph.induced_subgraph(common),
        # Rest of the graph (without dependencies common with split paths).
        graph.induced_subgraph(rest_without_common),
    ]

    debug('result_values', result_values[0].vs["name"])

    return tlz.valfilter(
        tlz.complement(graph_is_empty),
        dict(zip(
            result_keys,
            (
                result_values if root_name != fake_root_name
                # If root was added, remove it
                else tlz.map(remove_added_root(fake_root_name), result_values)
            )
        ))
    )