def get_feature_list(inputfile=None, outputfile=None, separator=""): """ Get the list of features enclosed in the GTF. """ gtf = GTF(inputfile, check_ensembl_format=False) for i in gtf.get_feature_list(nr=True): outputfile.write(str(i) + separator) gc.disable() close_properly(outputfile, inputfile)
def join_multi_file(inputfile=None, outputfile=None, target_feature=None, key_to_join=None, matrix_files=()): """ Join attributes from a set of tabulated files. """ # ----------------------------------------------------------- # load the GTF # ----------------------------------------------------------- gtf = GTF(inputfile, check_ensembl_format=False) # ----------------------------------------------------------- # Check target feature # ----------------------------------------------------------- feat_list = gtf.get_feature_list(nr=True) if target_feature is not None: target_feature_list = target_feature.split(",") for i in target_feature_list: if i not in feat_list + ["*"]: message("Feature " + i + " not found.", type="ERROR") else: target_feature = ",".join(feat_list) # ----------------------------------------------------------- # Do it # ----------------------------------------------------------- for join_file in matrix_files: gtf = gtf.add_attr_from_matrix_file(feat=target_feature, key=key_to_join, inputfile=join_file.name) gtf.write(outputfile, gc_off=True) gc.disable() close_properly(outputfile, inputfile)
def feature_size(inputfile=None, outputfile=None, ft_type="transcript", names="transcript_id", key_name='feature_size', separator="|", bed=False): """ Get the size and limits (start/end) of features enclosed in the GTF. If bed format is requested returns the limits zero-based half open and the size as a score. Otherwise output GTF file with 'feat_size' as a new key and size as value. """ message("Computing feature sizes.") gtf = GTF(inputfile) feat_list = gtf.get_feature_list(nr=True) + ['mature_rna'] if ft_type not in feat_list + ["*"]: message("Unable to find requested feature.", type="ERROR") names = names.split(",") if ft_type != 'mature_rna': if bed: bed_obj = gtf.select_by_key("feature", ft_type).to_bed(name=names, sep=separator, add_feature_type=True) for i in bed_obj: i.score = str(i.end - i.start) write_properly(chomp(str(i)), outputfile) else: tmp_file = make_tmp_file(prefix="feature_size", suffix=".txt") elmt = gtf.extract_data("feature,start,end", as_list_of_list=True, no_na=False, hide_undef=False) for i in elmt: if i[0] != ft_type and ft_type != "*": tmp_file.write("?\n") else: tmp_file.write(str(int(i[2]) - int(i[1]) + 1) + "\n") tmp_file.close() gtf.add_attr_column(tmp_file, key_name).write(outputfile, gc_off=True) else: tx_size = gtf.get_transcript_size() if bed: bed_obj = gtf.select_by_key("feature", 'transcript').to_bed( ['transcript_id'] + names, add_feature_type=False, sep=separator, more_name=['mature_rna']) for i in bed_obj: names = i.name.split(separator) tx_id = names.pop(0) i.score = tx_size[tx_id] i.name = separator.join(names) write_properly(chomp(str(i)), outputfile) else: if len(tx_size): gtf = gtf.add_attr_from_dict(feat="transcript", key="transcript_id", a_dict=tx_size, new_key=key_name) gtf.write(outputfile, gc_off=True) close_properly(outputfile, inputfile)
def join_attr(inputfile=None, outputfile=None, join_file=None, has_header=False, new_key=None, target_feature=None, key_to_join=None, matrix=None): """ Join attributes from a tabulated file. """ # ----------------------------------------------------------- # Check argument consistency # ----------------------------------------------------------- if matrix is True: if new_key is not None: message("--new-key and --matrix are mutually exclusive.", type="ERROR") else: if new_key is None: message("--new-key is required when --matrix is False.", type="ERROR") # ----------------------------------------------------------- # load the GTF # ----------------------------------------------------------- gtf = GTF(inputfile, check_ensembl_format=False) # ----------------------------------------------------------- # Check target feature # ----------------------------------------------------------- feat_list = gtf.get_feature_list(nr=True) if target_feature is not None: target_feature_list = target_feature.split(",") for i in target_feature_list: if i not in feat_list + ["*"]: message("Feature " + i + " not found.", type="ERROR") else: target_feature = ",".join(feat_list) # ----------------------------------------------------------- # Do it # ----------------------------------------------------------- if not matrix: gtf = gtf.add_attr_from_file(feat=target_feature, key=key_to_join, new_key=new_key, inputfile=join_file.name, has_header=has_header) gtf.write(outputfile, gc_off=True) else: gtf = gtf.add_attr_from_matrix_file(feat=target_feature, key=key_to_join, inputfile=join_file.name) gtf.write(outputfile, gc_off=True) gc.disable() close_properly(outputfile, inputfile)