def not_correct_order(lines): valid = pipe( all_indices(lines), complement(monotonic), ) return ( valid, 'Start and end tags are not in the correct order.' )
def __repr__(self): self._tokens = pipe( self._tokens, filter( compose(complement( lambda s: s.startswith('_ipython') or s.startswith('_repr') if isinstance(s, str) else s, ), first, second,) ), list ) return super().__repr__()
def annotate_bed_stream(bed_stream, bam_path, cutoff=10, extension=0, contig_prefix='', bp_threshold=17000): """Annotate all intervals from a BED-file stream. Yields tuple data for each interval with calculated coverage and completeness. Args: bed_stream (sequence): usually a BED-file handle to read from bam_path (str): path to BAM-file cutoff (int, optional): threshold for completeness calculation, defaults to 10 extension (int, optional): number of bases to extend each interval with (+/-), defaults to 0 contig_prefix (str, optional): rename contigs by prefixing, defaults to empty string bp_threshold (int, optional): optimization threshold for reading BAM-file in chunks, default to 17000 Yields: tuple: :class:`chanjo.BaseInterval`, coverage (float), and completeness (float) """ # setup: connect to BAM-file bam = BamFile(bam_path) # the pipeline return pipe( bed_stream, filter(complement(comment_sniffer)), # filter out comments map(text_type.rstrip), # strip invisble chars. map(prefix(contig_prefix)), # prefix to contig map(split(sep='\t')), # split lines map(do(validate_bed_format)), # check correct format map(lambda row: bed_to_interval(*row)), # convert to objects map(extend_interval(extension=extension)), # extend intervals group_intervals(bp_threshold=bp_threshold), # group by threshold map(process_interval_group(bam)), # read coverage concat, # flatten list of lists map(calculate_metrics(threshold=cutoff)) # calculate cov./compl. )
def add_weight(answer: dict): def is_a_matching_question(answer): return pipe( [answer_keys.match_left, answer_keys.incorrect], map(lambda k: k in answer), any, ) needs_weight = compose( any, juxt(complement(is_a_matching_question), ), ) if needs_weight(answer): return assoc(answer, answer_keys.weight, int(answer.get(answer_keys.weight, 0) and 100)) return answer
def __init__( self, data=None, index=None, columns=None, estimator=None, parent=None, feature_level=None, copy=False, extensions=[ 'harness.python.ext.base.JinjaExtension', 'harness.python.ext.SciKit.SciKitExtension', 'harness.python.ext.Bokeh.BokehModelsExtension', 'harness.python.ext.Bokeh.BokehPlottingExtension', 'harness.python.ext.Bokeh.BokehChartsExtension' ], ): kwargs = dict( estimator=estimator, parent=parent, feature_level=feature_level, extensions=extensions, ) self.set_params(**kwargs) for ext in self.extensions: if not ext in self.env.extensions: self.env.add_extension(ext) ext = self.env.extensions[ext] if (not (ext.mixin is None) and not (ext.mixin in self.__class__.__bases__)): self.__class__.__bases__ += (ext.mixin, ) kwargs = pipe(locals(), keyfilter(partial(operator.contains, self._blacklist)), valfilter(complement(lambda x: x is None))) super().__init__(**kwargs)
def apply_bed_stream(bed_stream, bam_path, fn, extension=0, contig_prefix='', bp_threshold=17000): """Maps a function to all intervals of a BED stream Args: bed_stream (sequence): usually a BED-file handle to read from bam_path (str): path to BAM-file fn: function that takes a list of intervals and read depths and computes a summary statistic over them. See annotator.stages.calculate_metrics for an example. cutoff (int, optional): threshold for completeness calculation, defaults to 10 extension (int, optional): number of bases to extend each interval with (+/-), defaults to 0 contig_prefix (str, optional): rename contigs by prefixing, defaults to empty string bp_threshold (int, optional): optimization threshold for reading BAM-file in chunks, default to 17000 """ # setup: connect to BAM-file bam = BamFile(bam_path) # the pipeline return pipe( bed_stream, filter(complement(comment_sniffer)), # filter out comments map(text_type.rstrip), # strip invisble chars. map(prefix(contig_prefix)), # prefix to contig map(split(sep='\t')), # split lines map(do(validate_bed_format)), # check correct format map(lambda row: bed_to_interval(*row)), # convert to objects map(extend_interval(extension=extension)), # extend intervals group_intervals(bp_threshold=bp_threshold), # group by threshold map(process_interval_group(bam)), # read coverage concat, # flatten list of lists map(fn) # map provided function )
def _get_param_names(cls): """Ignore the parameters that are specific to the dataframe.""" return pipe( super()._get_param_names(), filter(complement(partial(operator.contains, cls._blacklist))), list)
def split_paths(split_paths, graph_in): debug("____") debug("split_paths:", split_paths) debug("graph_in:", graph_in) # Convert list of split_paths into list of vertex indices. Ignores # split_paths which don"t match any vertices in the graph. # All edges pointing at the indices will be deleted from the graph. split_path_indices = list(unnest_iterable(map( split_path_spec_to_indices(graph_in), split_paths ))) debug("split_path_indices:", split_path_indices) # Short circuit if there is nothing to do (split_paths didn"t match any # vertices in the graph). if len(split_path_indices) == 0: return {"rest": graph_in} # If graph has multiple roots, add a single one connecting all existing # roots to make it easy to split the graph into 2 sets of vertices after # deleting edges pointing at split_path_indices. fake_root_name = "__root__" graph, root_name = add_root(fake_root_name, graph_in) debug("root_name", root_name) if ( find_vertex_by_name_or_none(graph)(root_name).index in split_path_indices ): return {"main": graph_in} # Copy graph if add_root has not already created a copy, since we are # going to mutate the graph and don"t want to mutate a function argument. graph = graph if graph is not graph_in else graph.copy() if DEBUG_PLOT: layout = graph.layout('tree') debug_plot(graph, layout=layout) # Get incidences of all vertices which can be reached split_path_indices # (including split_path_indices). This is a set of all split_paths and their # dependencies. split_off_vertex_indices = frozenset( subcomponent_multi(graph, split_path_indices)) debug("split_off_vertex_indices", split_off_vertex_indices) # Delete edges which point at any of the vertices in split_path_indices. graph.delete_edges(_target_in=split_path_indices) if DEBUG_PLOT: debug_plot(graph, layout=layout) # Get incidences of all vertices which can be reached from the root. Since # edges pointing at split_path_indices have been deleted, none of the # split_path_indices will be included. Dependencies of rest_with_common will # only be included if they can be reached from any vertex which is itself # not in split_off_vertex_indices. rest_with_common = frozenset(graph.subcomponent(root_name, mode="out")) debug("rest_with_common", rest_with_common) # Get a set of all dependencies common to split_path_indicesĀ and the rest # of the graph. common = split_off_vertex_indices.intersection(rest_with_common) debug("common", common) # Get a set of vertices which cannot be reached from split_path_indices. rest_without_common = rest_with_common.difference(common) debug("rest_without_common", rest_without_common) # Get a set of split_path_indices and their dependencies which cannot be # reached from the rest of the graph. split_off_without_common = split_off_vertex_indices.difference(common) debug("split_off_without_common", split_off_without_common) if DEBUG_PLOT: def choose_color(index): if (index in split_off_without_common): return "green" elif (index in rest_without_common): return "red" else: return "purple" vertex_color = [choose_color(v.index) for v in graph.vs] debug_plot( graph, layout=layout, vertex_color=vertex_color ) # Return subgraphs based on calculated sets of vertices. result_keys = ["main", "common", "rest"] result_values = [ # Split paths and their deps (unreachable from rest of the graph). graph.induced_subgraph(split_off_without_common), # Dependencies of split paths which can be reached from the rest of the # graph. graph.induced_subgraph(common), # Rest of the graph (without dependencies common with split paths). graph.induced_subgraph(rest_without_common), ] debug('result_values', result_values[0].vs["name"]) return tlz.valfilter( tlz.complement(graph_is_empty), dict(zip( result_keys, ( result_values if root_name != fake_root_name # If root was added, remove it else tlz.map(remove_added_root(fake_root_name), result_values) ) )) )