def apropos(keyword="", notes=False): """ Search in all command description files those related to a user-defined keyword. """ out_list = set() for i in CmdManager.cmd_obj_list: if keyword in CmdManager.cmd_obj_list[i].desc: out_list.add(i) if notes: try: if keyword in CmdManager.cmd_obj_list[i].notes: out_list.add(i) except: pass if out_list: message(">> Keyword '" + keyword + "' was found in the following command:", force=True) for i in out_list: print("\t- " + i + ".") else: message(">> Keyword '" + keyword + "' was not found.", force=True)
def intronic(inputfile=None, outputfile=None, names='transcript_id', separator="_", intron_nb_in_name=False, no_feature_name=False, by_transcript=False): """ Extract intronic regions. """ message("Searching for intronic regions.") # Need to load if the gtf comes from # <stdin> gtf = GTF(inputfile, check_ensembl_format=False) if not by_transcript: introns_bo = gtf.get_introns() for i in introns_bo: write_properly(chomp(str(i)), outputfile) else: introns_bo = gtf.get_introns(by_transcript=True, name=names.split(","), sep=separator, intron_nb_in_name=intron_nb_in_name, feat_name=not no_feature_name) for i in introns_bo: write_properly(chomp(str(i)), outputfile) gc.disable() close_properly(outputfile, inputfile)
def bed_to_gtf(inputfile=None, outputfile=None, ft_type="transcript", source="Unknown"): """ Convert a bed file to a gtf. This will make the poor bed feel as if it was a nice gtf (but with lots of empty fields...). May be helpful sometimes... """ message("Converting the bed file into GTF file.") if inputfile.name == '<stdin>': tmp_file = make_tmp_file(prefix="input_bed", suffix=".bed") for i in inputfile: write_properly(chomp(str(i)), tmp_file) tmp_file.close() inputfile.close() bed_obj = BedTool(tmp_file.name) else: bed_obj = BedTool(inputfile.name) n = 1 for i in bed_obj: if i.strand == "": i.strand = "." if i.name == "": i.name = str("feature_" + str(n)) if i.score == "": i.score = "0" if ft_type == "exon": key_value = "gene_id \"" + i.name + "\"; " + \ "transcript_id \"" + i.name + "\"; " + \ "exon_id \"" + i.name + "\";" elif ft_type == "gene": key_value = "gene_id \"" + i.name + "\";" else: key_value = "gene_id \"" + i.name + "\"; " + \ "transcript_id \"" + i.name + "\";" if pygtftk.utils.ADD_CHR == 1: chrom_out = "chr" + i.chrom else: chrom_out = i.chrom list_out = [ chrom_out, source, ft_type, str(i.start + 1), str(i.end), str(i.score), i.strand, ".", key_value ] write_properly("\t".join(list_out), outputfile) n += 1 gc.disable() close_properly(outputfile)
def find_interesting_combinations(self): """ MAIN FUNCTION. Will call the others. """ ## Hardcode ignoring of Python (and by extension SKLearn) warnings try: previous_warning_level = os.environ["PYTHONWARNINGS"] except: previous_warning_level = 'default' if utils.VERBOSITY < 2: # Only if not debugging os.environ["PYTHONWARNINGS"] = "ignore" #warnings.filterwarnings('ignore', module='^{}\.'.format(re.escape("sklearn"))) message("Filtering out sklearn warnings.") ## Now call the functions self.generate_candidate_words() self.filter_library() self.select_best_words_from_library() # Re-enable warnings os.environ["PYTHONWARNINGS"] = previous_warning_level return self.best_words
def select_best_words_from_library(self): """ This is step 2. Takes the library of candidates produced at step 1 and will get the best N words among it that best rebuild the original matrix. """ # You can't request more words than are actually present in the library, # nor than unique elements in the data upper_floor_words = min(self.number_of_words_in_library, len(np.unique(self.data, axis=0))) if self.queried_words_nb > upper_floor_words: self.queried_words_nb = upper_floor_words message("Requesting too many words, reducing to " + str(self.queried_words_nb)) # NOTE It will actually be +1 to make room for the root (0,0,0,...) word, but this is added later # Read the parameters that were supplied when creating the Modl object best_dict = modl_subroutines.build_best_dict_from_library( self.data, self.library, # Data and Library of candidates self.queried_words_nb, # N best words self.error_function, # Potential custom error function self.nb_threads, self.normalize_words, # Normalize words by sum of square self.step_2_alpha) # Sparsity control # Final step : register the best dictionary self.best_words = best_dict
def midpoints(inputfile=None, outputfile=None, ft_type="transcript", names="transcript_id", separator="|"): """ Get the midpoint coordinates for the requested feature. """ message("Loading input file...") if inputfile.name == '<stdin>': is_gtf = True else: region_bo = BedTool(inputfile.name) if len(region_bo) == 0: message("Unable to find requested regions", type="ERROR") if region_bo.file_type == 'gff': is_gtf = True else: is_gtf = False if is_gtf: gtf = GTF(inputfile.name, check_ensembl_format=False) bed_obj = gtf.select_by_key("feature", ft_type).get_midpoints( name=names.split(","), sep=separator) for line in bed_obj: write_properly(chomp(str(line)), outputfile) else: for line in region_bo: diff = line.end - line.start if diff % 2 != 0: # e.g 10-13 (zero based) -> 11-13 one based # mipoint is 12 (one-based) -> 11-12 (zero based) # e.g 949-1100 (zero based) -> 950-1100 one based # mipoint is 1025 (one-based) -> 1024-1025 (zero based) # floored division (python 2)... line.end = line.start + int(diff // 2) + 1 line.start = line.end - 1 else: # e.g 10-14 (zero based) -> 11-14 one based # mipoint is 12-13 (one-based) -> 11-13 (zero based) # e.g 9-5100 (zero based) -> 10-5100 one based # mipoint is 2555-2555 (one-based) -> 2554-2555 (zero based) # floored division (python 2)... # No real center. Take both line.start = line.start + int(diff // 2) - 1 line.end = line.start + 2 outputfile.write(str(line)) gc.disable() close_properly(outputfile, inputfile)
def negbin_pval(k, mean, var, precision=320, ft_type="Unknown"): r""" P-value for a negative binomial distribution of the given moments (mean, var). This is the two-sided p-value : it will return the minimum of the left-sided and right-sided p-value NOTE : To prevent division by zero or negative r, if the mean is higher than or equal to the variance, set the variance to mean + epsilon and send a warning :param k: the critical value for which the pvalue is computed. :param mean: The mean for the negative binomial model. :param var: The variance for the negative binomial model. :param precision: Floating point precision of mpmath. Should be at least 1000 :param ft_type: The name of the feature to be tested (just for meaningful messages). >>> from pygtftk.stats.negbin_fit import negbin_pval >>> mean = 18400 >>> var = 630200 >>> k = 65630 >>> pval = negbin_pval(k, mean, var) >>> import math >>> assert(math.isclose(pval,1.1999432787236828e-307)) """ if mean < 1: mean = 1 msg = "Computing log(p-val) for a Neg Binom with mean < 1 ; mean was set to 1 (" + ft_type + ")" message(msg, type='WARNING') # This is necessary, since r must be above 0. if mean >= var: var = mean + 1 msg = "Computing log(p-val) for a Neg Binom with mean >= var ; var was set to mean+1 (" + ft_type + ")" message(msg, type='WARNING') # Floating point precision of mpmath. Should be at least 320. mpmath.mp.dps = precision # Calculate r and p based on mean and var r = mpmath.mpf(mean**2 / (var - mean)) p = mpmath.mpf(1 / (mean / r + 1)) # To circumvent scipy floating point precision issues, we implement a # custom p-value calculation (see 'beta.py' for details) mybetacalc = BetaCalculator(use_log=True, precision=precision, ft_type=ft_type) incomplete_beta = mybetacalc.betainc(a=r, b=k + 1, x=p) complete_beta = mybetacalc.beta(a=r, b=k + 1) # Take the minimum of CDF and SF pval = 1 - (incomplete_beta / complete_beta) twosided_pval = min(pval, 1 - pval) # Convert back to Python float and return return float(twosided_pval)
def del_attr(inputfile=None, outputfile=None, key="transcript_id", reg_exp=False, invert_match=False): """ Delete extended attributes in the target gtf file. attr_list can be a comma-separated list of attributes. """ # ---------------------------------------------------------------------- # Read the GTF and get the list of attributes # ---------------------------------------------------------------------- gtf = GTF(inputfile, check_ensembl_format=False) attr_list = gtf.attr_extended # ---------------------------------------------------------------------- # If regExp, select the corresponding keys # ---------------------------------------------------------------------- if reg_exp: key_list = [] try: rgxp = re.compile(key) except: message("Check the regular expression please.", type="ERROR") for attr in attr_list: if rgxp.search(attr): key_list += [attr] else: key_list = key.split(",") # ---------------------------------------------------------------------- # If invert-match select all but the selected # ---------------------------------------------------------------------- key_to_del = [] if invert_match: for attr in attr_list: if attr not in key_list: key_to_del += [attr] else: key_to_del = key_list # ---------------------------------------------------------------------- # Delete the keys # ---------------------------------------------------------------------- gtf = gtf.del_attr(feat="*", keys=",".join(key_list), force=True).write(outputfile, gc_off=True) close_properly(outputfile, inputfile)
def __call__(self, parser, namespace, values, option_string=None): if not os.path.exists(CmdManager.config_dir): message("Please run gtftk -h before adding additional plugins", force=True) sys.exit(0) open(os.path.join(CmdManager.config_dir, "reload"), "w") message("Plugins will be updated at next startup.", force=True) sys.exit()
def dump_plugins(self): """Save the plugins into a pickle object.""" message("Dumping plugins", force=True) f_handler = open(CmdManager.dumped_plugin_path, "wb") pick = cloudpickle.CloudPickler(f_handler) pick.dump((self.cmd_obj_list, self.parser)) f_handler.close()
def add_exon_nb(inputfile=None, outputfile=None, exon_numbering_key=None): """Add the exon number to each exon (based on 5' to 3' orientation).""" message("Calling nb_exons.", type="DEBUG") GTF(inputfile.name, check_ensembl_format=False).add_exon_number(exon_numbering_key).write( outputfile, gc_off=True) close_properly(inputfile, outputfile)
def contfractbeta(self, a, b, x): """ Evaluates the continued fraction form of the incomplete Beta function. Code translated from: GNU Scientific Library Uses the modified Lentz's method. You can see a representation of this form in the Digial Library of Mathematical functions <https://dlmf.nist.gov/8.17#SS5.p1>. The goal of the method is to calculate the successive 'd' terms, separately for odd and even. """ a, b, x = mpmath.mpf(a), mpmath.mpf(b), mpmath.mpf(x) num_term = 1.0 den_term = 1.0 - (a + b) * x / (a + 1.0) den_term = 1.0 / den_term cf = den_term for i in range(self.itermax + 1): k = i + 1 coeff = k * (b - k) * x / (((a - 1.0) + 2 * k) * (a + 2 * k)) # First step of the recurrence den_term = 1.0 + coeff * den_term num_term = 1.0 + coeff / num_term den_term = 1.0 / den_term delta_frac = den_term * num_term cf *= delta_frac coeff = -(a + k) * (a + b + k) * x / ((a + 2 * k) * (a + 2 * k + 1.0)) # Second step den_term = 1.0 + coeff * den_term num_term = 1.0 + coeff / num_term den_term = 1.0 / den_term delta_frac = den_term * num_term cf *= delta_frac # Are we done ? if (abs(delta_frac - 1.0) < 2.0 * self.epsilon): return cf # If failed to converge, return our best guess but send a warning msg = 'a or b too large or given itermax too small for computing incomplete' msg += ' beta function ; pval may be slightly erroneous for feature (' + self.ft_type + ').' message(msg, type='WARNING') return cf
def get_datasets(self, database=None): message("Listing available datasets", type="DEBUG") if database in self.databases: self.query(query={'type': 'datasets', 'mart': database}) for i in self.response.text.split("\n"): fields = i.split("\t") if len(fields) > 1: self.datasets[fields[1]] = fields[2:] else: message("Database not found.")
def select_by_max_exon_nb(inputfile=None, outputfile=None): """ Select transcripts based on the number of exons. """ msg = "Selecting transcript with the highest number of exon for each gene." message(msg) gtf = GTF(inputfile, check_ensembl_format=False).select_by_max_exon_nb() gtf.write(outputfile, gc_off=True)
def _get_databases(self): message("Listing available databases", type="DEBUG") try: self.query(query={'type': 'registry'}) except ConErr: message("Raised a connection Error.", type="ERROR") tree = ElementTree.fromstring(self.response.content) for child in tree: if child.tag == 'MartURLLocation': self.databases += [child.attrib['name']]
def select_most_5p_tx(inputfile=None, outputfile=None, keep_gene_lines=False): """ Select the most 5' transcript of each gene. """ message("Selecting the most 5' transcript of each gene.") gtf = GTF(inputfile) if keep_gene_lines: gtf = gtf.select_5p_transcript() else: gtf = gtf.select_5p_transcript().select_by_key("feature", "gene", 1) gtf.write(outputfile, gc_off=True)
def _find_plugins(): message("Searching plugins", force=True) config_file = CmdManager.config_file # User plugins plugin_dir_user = yaml.load(open(config_file, "r"), Loader=yaml.FullLoader)["plugin_path"] sys.path.append(plugin_dir_user) plugins = sorted(os.listdir(plugin_dir_user)) plugins_user = [os.path.join(plugin_dir_user, x) for x in plugins] # System wide plugins (those declared in the plugins directory of # pygtftk source) plugin_dir_base = os.path.join(pygtftk.__path__[0], "plugins") sys.path.append(plugin_dir_base) plugins = sorted(os.listdir(plugin_dir_base)) plugins_system = [os.path.join(plugin_dir_base, x) for x in plugins] plugins = plugins_user + plugins_system for plug in plugins: if plug.endswith(".py") and plug != "__init__.py": # Loading the plugin should force code to create # a cmdObject that will be added to the CmdManager # gtftk.plugins.tss_dist module_name = re.sub("\.py$", "", plug) module_name = re.sub("/", ".", module_name) module_name = re.sub(".*pygtftk", "pygtftk", module_name) try: SourceFileLoader(module_name, plug).load_module() except Exception as e: message("Failed to load plugin :" + plug, type="WARNING") print(e) elif plug.endswith(".R"): pass # declare_r_cmd(plugin_path, plug) CmdManager.reload = False if os.path.exists(os.path.join(CmdManager.config_dir, "reload")): os.remove(os.path.join(CmdManager.config_dir, "reload"))
def make_tmp_file_pool(prefix='tmp', suffix='', store=True, dir=None): """ This :Example: >>> from pygtftk.utils import make_tmp_file_pool >>> tmp_file = make_tmp_file_pool() >>> assert os.path.exists(tmp_file.name) >>> tmp_file = make_tmp_file_pool(prefix="pref") >>> assert os.path.exists(tmp_file.name) >>> tmp_file = make_tmp_file_pool(suffix="suf") >>> assert os.path.exists(tmp_file.name) """ dir_target = None if dir is None: if pygtftk.utils.TMP_DIR is not None: if not os.path.exists(pygtftk.utils.TMP_DIR): msg = "Creating directory {d}." message(msg.format(d=pygtftk.utils.TMP_DIR), type="INFO") os.mkdir(pygtftk.utils.TMP_DIR) dir_target = pygtftk.utils.TMP_DIR else: if not os.path.exists(dir): msg = "Creating directory {d}." message(msg.format(d=dir), type="INFO") os.mkdir(dir) dir_target = dir tmp_file = NamedTemporaryFile(delete=False, mode='w', prefix=prefix + "_pygtftk_", suffix=suffix, dir=dir_target) if store: TMP_FILE_POOL_MANAGER.append(tmp_file.name) return tmp_file
def __call__(self, minibatch_len, seed, id): my_result = compute_all_intersections_minibatch( self.Lr1, self.Li1, self.Lrs, self.Lis, self.all_chrom1, self.all_chrom2, minibatch_len, self.use_markov_shuffling, self.keep_intact_in_shuffling, self.nb_threads, seed=seed) message("--- Minibatch nb. : " + str(id) + " is complete.") return my_result
def select_by_tx_size(inputfile=None, outputfile=None, min_size=None, max_size=None): """ Select features by size. """ msg = "Selecting 'mature/spliced transcript by size (range: [{m},{M}])." msg = msg.format(m=str(min_size), M=str(max_size)) message(msg) GTF(inputfile ).select_by_transcript_size(min_size, max_size ).write(outputfile, gc_off=True)
def select_by_nb_exon(inputfile=None, outputfile=None, min_exon_number=None, max_exon_number=None): """ Select transcripts based on the number of exons. """ msg = "Selecting transcript by exon number (range: [{m},{M}])" msg = msg.format(m=str(min_exon_number), M=str(max_exon_number)) message(msg) gtf = GTF(inputfile, check_ensembl_format=False).select_by_number_of_exons( min_exon_number, max_exon_number) gtf.write(outputfile, gc_off=True) close_properly(outputfile, inputfile)
def exon_sizes(inputfile=None, outputfile=None, key_name=None): """ Add a new key to transcript features containing a comma-separated list of exon-size. """ gtf = GTF(inputfile) all_tx_ids = gtf.get_tx_ids(nr=True) tx_to_size_list = dict() exons_starts = gtf.select_by_key("feature", "exon").extract_data( "transcript_id,start", as_dict_of_merged_list=True, no_na=True, nr=False) if not len(exons_starts): message("No exon found.", type="ERROR") exons_ends = gtf.select_by_key("feature", "exon").extract_data( "transcript_id,end", as_dict_of_merged_list=True, no_na=True, nr=False) strands = gtf.select_by_key("feature", "transcript").extract_data( "transcript_id,strand", as_dict_of_values=True, no_na=True, nr=True, hide_undef=True) for tx_id in all_tx_ids: size_list = [] for s, e in zip(exons_starts[tx_id], exons_ends[tx_id]): size = str(int(e) - int(s) + 1) size_list += [size] if strands[tx_id] == "-": size_list = reversed(size_list) tx_to_size_list[tx_id] = ",".join(size_list) if len(tx_to_size_list): gtf = gtf.add_attr_from_dict(feat="transcript", key="transcript_id", a_dict=tx_to_size_list, new_key=key_name) gtf.write(outputfile, gc_off=True) close_properly(outputfile, inputfile)
def select_by_go(inputfile=None, outputfile=None, go_id=None, https_proxy=None, http_proxy=None, list_datasets=None, species=None, invert_match=False): """ Select lines from a GTF file based using a Gene Ontology ID (e.g GO:0050789). """ if not go_id.startswith("GO:"): go_id = "GO:" + go_id is_associated = OrderedDict() bm = Biomart(http_proxy=http_proxy, https_proxy=https_proxy) bm.get_datasets('ENSEMBL_MART_ENSEMBL') if list_datasets: for i in sorted(bm.datasets): write_properly(i.replace("_gene_ensembl", ""), outputfile) sys.exit() else: if species + "_gene_ensembl" not in bm.datasets: message("Unknow dataset/species.", type="ERROR") bm.query({'query': XML.format(species=species, go=go_id)}) for i in bm.response.content.decode().split("\n"): i = i.rstrip("\n") if i != '': is_associated[i] = 1 gtf = GTF(inputfile) gtf_associated = gtf.select_by_key("gene_id", ",".join(list(is_associated.keys())), invert_match) gtf_associated.write(outputfile, gc_off=True)
def random_tx(inputfile=None, outputfile=None, max_transcript=None, seed_value=None): """ Select randomly up to m transcript for each gene. """ message("loading the GTF.") gtf = GTF(inputfile).select_by_key("feature", "gene", invert_match=True) message("Getting gene_id and transcript_id") gene2tx = gtf.extract_data("gene_id,transcript_id", as_dict_of_merged_list=True, no_na=True, nr=True) message("Selecting random transcript") if seed_value is not None: random.seed(seed_value, version=1) tx_to_delete = [] for gn_id in gene2tx: tx_list = gene2tx[gn_id] nb_tx = len(tx_list) max_cur = min(max_transcript, nb_tx) pos_to_keep = random.sample(list(range(len(tx_list))), max_cur) tx_list = [j for i, j in enumerate(tx_list) if i not in pos_to_keep] tx_to_delete += tx_list message("Printing results") message("Selecting transcript.") gtf.select_by_key("transcript_id", ",".join(tx_to_delete), invert_match=True).write(outputfile, gc_off=True) close_properly(outputfile, inputfile)
def join_multi_file(inputfile=None, outputfile=None, target_feature=None, key_to_join=None, matrix_files=()): """ Join attributes from a set of tabulated files. """ # ----------------------------------------------------------- # load the GTF # ----------------------------------------------------------- gtf = GTF(inputfile, check_ensembl_format=False) # ----------------------------------------------------------- # Check target feature # ----------------------------------------------------------- feat_list = gtf.get_feature_list(nr=True) if target_feature is not None: target_feature_list = target_feature.split(",") for i in target_feature_list: if i not in feat_list + ["*"]: message("Feature " + i + " not found.", type="ERROR") else: target_feature = ",".join(feat_list) # ----------------------------------------------------------- # Do it # ----------------------------------------------------------- for join_file in matrix_files: gtf = gtf.add_attr_from_matrix_file(feat=target_feature, key=key_to_join, inputfile=join_file.name) gtf.write(outputfile, gc_off=True) gc.disable() close_properly(outputfile, inputfile)
def intergenic(inputfile=None, outputfile=None, chrom_info=None): """ Extract intergenic regions. """ message("Searching for intergenic regions.") gtf = GTF(inputfile) intergenic_regions = gtf.get_intergenic(chrom_info) nb_intergenic_region = 1 for i in intergenic_regions: i.name = "region_" + str(nb_intergenic_region) write_properly(chomp(str(i)), outputfile) nb_intergenic_region += 1 gc.disable() close_properly(outputfile, inputfile)
def parse_cmd_args(cls): """ Parse arguments of all declared commands.""" CmdManager.args = cls.parser.parse_args(None) args = CmdManager.args cmd_name = args.command if cmd_name is None: message("Please provide a subcommand or argument (e.g. -h)", type="WARNING", force=True) CmdManager.parser.print_help() exit(0) lang = cls.cmd_obj_list[cmd_name].lang if lang == 'Python': if args.tmp_dir is not None: if not os.path.exists(args.tmp_dir): msg = "Creating directory {d}." message(msg.format(d=args.tmp_dir), type="INFO") mkdir_p(args.tmp_dir) if not os.path.isdir(args.tmp_dir): msg = "{d} is not a directory." message(msg.format(d=args.tmp_dir), type="ERROR") pygtftk.utils.TMP_DIR = args.tmp_dir return args
def query(self, query): message("Sending query", type="DEBUG") self.response = requests.get(self.url, query, proxies=self.proxies) message("Checking http response", type="DEBUG") if self.response.status_code != requests.codes.ok: msg = "HTTP response status code: {c}. {m}" msg = msg.format(c=str(self.response.status_code), m=self.response.reason) message(msg, type="ERROR") msg = "([ \.\w]+ service you requested is currently unavailable[ \.\w]+)" hit = re.search(msg, self.response.text) if hit: msg = re.search(msg, self.response.text).group(1) message(msg.lstrip().rstrip(), type="WARNING") message( "More information about this downtime " "may be available on http://www.ensembl.info/", type="ERROR")
def nb_exons(inputfile=None, outputfile=None, key_name=None, text_format=False): """ Count the number of exons in the gtf file. """ gtf = GTF(inputfile) n_exons = defaultdict(int) # ------------------------------------------------------------------------- # Computing number of exon for each transcript in input GTF file # # ------------------------------------------------------------------------- message("Computing number of exons for each transcript in input GTF file.") exon = gtf.select_by_key("feature", "exon") fields = exon.extract_data("transcript_id") for i in fields: tx_id = i[0] n_exons[tx_id] += 1 if text_format: for tx_id in n_exons: outputfile.write(tx_id + "\t" + str(n_exons[tx_id]) + "\ttranscript\n") else: if len(n_exons): gtf = gtf.add_attr_from_dict(feat="transcript", key="transcript_id", a_dict=n_exons, new_key=key_name) gtf.write(outputfile, gc_off=True) close_properly(outputfile, inputfile)
def produce_dot_for_node(node, graph): for c in node.children: node_name = node_to_combi_string(node, features_names) child_name = node_to_combi_string(c, features_names) message("Drawing " + node_name + ' --> ' + child_name, type='DEBUG') ## Add nodes # Only add node if not already present of course. # If present, the graph's 'body' contains the combi string prefixed with a tab character ## Parent if not ('\t' + node_name in s.body): # print(combi_string, node.s, node.pval, node.fc) graph.node(node_name, format_node_string(node_name, node.s, node.pval, node.fc)) # Child if not ('\t' + child_name in s.body): graph.node(child_name, format_node_string(child_name, c.s, c.pval, c.fc)) graph.edge(node_name + ':s', child_name + ':n') return 1