def get_feature_list(inputfile=None, outputfile=None, separator=""):
    """
    Get the list of features enclosed in the GTF.
    """

    gtf = GTF(inputfile, check_ensembl_format=False)
    for i in gtf.get_feature_list(nr=True):
        outputfile.write(str(i) + separator)

    gc.disable()
    close_properly(outputfile, inputfile)
def join_multi_file(inputfile=None,
                    outputfile=None,
                    target_feature=None,
                    key_to_join=None,
                    matrix_files=()):
    """
    Join attributes from a set of tabulated files.
    """

    # -----------------------------------------------------------
    #  load the GTF
    # -----------------------------------------------------------

    gtf = GTF(inputfile, check_ensembl_format=False)

    # -----------------------------------------------------------
    #  Check target feature
    # -----------------------------------------------------------

    feat_list = gtf.get_feature_list(nr=True)

    if target_feature is not None:
        target_feature_list = target_feature.split(",")

        for i in target_feature_list:
            if i not in feat_list + ["*"]:
                message("Feature " + i + " not found.", type="ERROR")
    else:
        target_feature = ",".join(feat_list)

    # -----------------------------------------------------------
    #  Do it
    # -----------------------------------------------------------

    for join_file in matrix_files:
        gtf = gtf.add_attr_from_matrix_file(feat=target_feature,
                                            key=key_to_join,
                                            inputfile=join_file.name)
    gtf.write(outputfile, gc_off=True)

    gc.disable()
    close_properly(outputfile, inputfile)
Beispiel #3
0
def feature_size(inputfile=None,
                 outputfile=None,
                 ft_type="transcript",
                 names="transcript_id",
                 key_name='feature_size',
                 separator="|",
                 bed=False):
    """
 Get the size and limits (start/end) of features enclosed in the GTF. If bed
 format is requested returns the limits zero-based half open and the size as a score.
 Otherwise output GTF file with 'feat_size' as a new key and size as value.
    """

    message("Computing feature sizes.")

    gtf = GTF(inputfile)

    feat_list = gtf.get_feature_list(nr=True) + ['mature_rna']

    if ft_type not in feat_list + ["*"]:
        message("Unable to find requested feature.", type="ERROR")

    names = names.split(",")

    if ft_type != 'mature_rna':

        if bed:
            bed_obj = gtf.select_by_key("feature",
                                        ft_type).to_bed(name=names,
                                                        sep=separator,
                                                        add_feature_type=True)

            for i in bed_obj:
                i.score = str(i.end - i.start)
                write_properly(chomp(str(i)), outputfile)
        else:

            tmp_file = make_tmp_file(prefix="feature_size", suffix=".txt")

            elmt = gtf.extract_data("feature,start,end",
                                    as_list_of_list=True,
                                    no_na=False,
                                    hide_undef=False)

            for i in elmt:
                if i[0] != ft_type and ft_type != "*":
                    tmp_file.write("?\n")
                else:
                    tmp_file.write(str(int(i[2]) - int(i[1]) + 1) + "\n")

            tmp_file.close()

            gtf.add_attr_column(tmp_file, key_name).write(outputfile,
                                                          gc_off=True)

    else:

        tx_size = gtf.get_transcript_size()

        if bed:
            bed_obj = gtf.select_by_key("feature", 'transcript').to_bed(
                ['transcript_id'] + names,
                add_feature_type=False,
                sep=separator,
                more_name=['mature_rna'])

            for i in bed_obj:
                names = i.name.split(separator)
                tx_id = names.pop(0)
                i.score = tx_size[tx_id]
                i.name = separator.join(names)
                write_properly(chomp(str(i)), outputfile)
        else:

            if len(tx_size):
                gtf = gtf.add_attr_from_dict(feat="transcript",
                                             key="transcript_id",
                                             a_dict=tx_size,
                                             new_key=key_name)

            gtf.write(outputfile, gc_off=True)

    close_properly(outputfile, inputfile)
Beispiel #4
0
def join_attr(inputfile=None,
              outputfile=None,
              join_file=None,
              has_header=False,
              new_key=None,
              target_feature=None,
              key_to_join=None,
              matrix=None):
    """
    Join attributes from a tabulated file.
    """

    # -----------------------------------------------------------
    #  Check argument consistency
    # -----------------------------------------------------------

    if matrix is True:
        if new_key is not None:
            message("--new-key and --matrix are mutually exclusive.",
                    type="ERROR")
    else:
        if new_key is None:
            message("--new-key is required when --matrix is False.",
                    type="ERROR")

    # -----------------------------------------------------------
    #  load the GTF
    # -----------------------------------------------------------

    gtf = GTF(inputfile, check_ensembl_format=False)

    # -----------------------------------------------------------
    #  Check target feature
    # -----------------------------------------------------------

    feat_list = gtf.get_feature_list(nr=True)

    if target_feature is not None:
        target_feature_list = target_feature.split(",")

        for i in target_feature_list:
            if i not in feat_list + ["*"]:
                message("Feature " + i + " not found.", type="ERROR")
    else:
        target_feature = ",".join(feat_list)

    # -----------------------------------------------------------
    #  Do it
    # -----------------------------------------------------------

    if not matrix:

        gtf = gtf.add_attr_from_file(feat=target_feature,
                                     key=key_to_join,
                                     new_key=new_key,
                                     inputfile=join_file.name,
                                     has_header=has_header)
        gtf.write(outputfile, gc_off=True)

    else:

        gtf = gtf.add_attr_from_matrix_file(feat=target_feature,
                                            key=key_to_join,
                                            inputfile=join_file.name)
        gtf.write(outputfile, gc_off=True)

    gc.disable()
    close_properly(outputfile, inputfile)