예제 #1
0
def generate_rna_lib_profiling_results(configs_json: str, rna_items_json: str, output_file: str):
    r"""
    Generate RNA Lib "profiling" results file.

    It should be called after "ShapeMapper 2.x" finishes the run.

    Parameters
    ----------
    configs_json: str
        The "RNA Lib" configs, in `string` format
    rna_items_json: str
        The RNA Lib Item object
    output_file: str
        The "file path" of data output.

    Returns
    -------

    """

    # Parse the "Python Object"
    configs: RnaLibConfig = json.loads(configs_json, object_hook=as_python_object)
    rna_items: List[LibraryItem] = json.loads(rna_items_json, object_hook=as_python_object)

    #
    updated_rna_items = []
    for rna_item in rna_items:
        #
        rna_id = rna_item.rna_id
        barcode = rna_item.barcode

        # Load "Profile" data
        shape_profile = get_shape2_path('profile', configs.working_folder, rna_id, barcode.barcode)
        for nt_profile in io.parse(shape_profile, "shape-profile"):
            #
            nt_position = nt_profile.nt_position
            rna_item.profile_list.append(nt_profile)
            rna_item.profile_dict[nt_position] = nt_profile

        # Load "Shape Reactivity" data
        shape_reactivity = get_shape2_path('shape', configs.working_folder, rna_id, barcode.barcode)
        for nt_reactivity in io.parse(shape_reactivity, "shape-reactivity"):
            #
            nt_position = nt_reactivity.nt_position
            rna_item.shape_reactivity_list.append(nt_reactivity)
            rna_item.shape_reactivity_dict[nt_position] = nt_reactivity

        updated_rna_items.append(rna_item)

    # Build RNA Library object
    library = RnaLibrary()
    library.load_meta(configs)

    #
    library.rna_items = updated_rna_items

    # Output
    data_str = json.dumps(library, cls=PythonObjectEncoder)
    FileUtils.save_file(output_file, data_str)
예제 #2
0
def parse_bprna_annotation(structure_content: str) -> Tuple[str, SecondaryStructure]:

    # use an intermediate tmp file because bpRNA needs it
    out_tmp = open('tmp.dbn', 'w')
    out_tmp.write(structure_content + '\n')
    out_tmp.close()

    #
    subprocess.call(["bpRNA.pl", "tmp.dbn"], stdout=subprocess.PIPE)

    #
    in_tmp = open("tmp.st", 'r')
    annotation = in_tmp.read()
    in_tmp.close()

    # Also parse the bpRNA contents
    with open("tmp.st") as handle:
        for secondary_structure in io.parse(handle, "bp-rna"):
            #
            return annotation, secondary_structure
예제 #3
0
def load(logger, config, bprna_folder_path, editing_level_file_path):
    #
    # Only load "bpRNA" file
    bprna_files = [
        f for f in os.listdir(bprna_folder_path)
        if os.path.isfile(os.path.join(bprna_folder_path, f))
        and os.path.splitext(f)[1] == '.st'
    ]

    # bpRNA structures
    bprna_structure_dict: Dict[str, SecondaryStructure] = dict()
    for file in bprna_files:
        #
        file_path = os.path.join(bprna_folder_path, file)

        with open(file_path) as handle:
            for secondary_structure in io.parse(handle, "bp-rna"):
                #
                chromosome_id = secondary_structure.comment
                bprna_structure_dict[chromosome_id] = secondary_structure
    #
    return bprna_files, bprna_structure_dict
예제 #4
0
# ----------------------------------
# region Pipeline - Prep
#
#
# The tool "PyPPL" cannot accept "python object" as input, we will need to convert it to
#  "string" and then convert it back while using it.

# Load RNA Items
if not configs['rna_lib_file']:
    raise ValueError('"RNA Lib" file load error. Please double check. ')

rna_lib_items = []
# A list of "RNA Lib items", each of which is in "python object string" format.
# It is used to pass via "Channel".
rna_lib_item_object_json_list = []
for rna_item in io.parse(configs['rna_lib_file'], "rna-lib-def"):
    rna_lib_items.append(rna_item)
    # Convert each item object to "json string"
    rna_lib_item_object_json_list.append(
        json.dumps(rna_item, cls=PythonObjectEncoder))

# Convert the objects to "string" and pass it as "argument"
configs_object_json = json.dumps(configs, cls=PythonObjectEncoder)
# A "python object string" format of a list, which includes a list of "RNA Lib items".
# It is used to passed as "whole"
rna_lib_items_list_json = json.dumps(rna_lib_items, cls=PythonObjectEncoder)

# endregion

# ----------------------------------
# region Pipeline - AfterQC
예제 #5
0
# ----------------------------------
# region Prep

if not os.path.isabs(output_file_path):
    cwd = os.getcwd()
    output_file_path = os.path.join(cwd, output_file_path)

# endregion

# ----------------------------------------------------------------
# region RNA Lib Items
#

rna_lib_items: List[LibraryItem] = []
for rna_item in io.parse(rna_lib_def_file_path, "rna-lib-def"):
    rna_lib_items.append(rna_item)

# endregion

# ----------------------------------
# region QC

qc_results = []

#
os.chdir(dataset_folder_path)

# barcode = 'CGCGGTTGT'
# reverse = 'ACAACCGCG'
# cmd = 'cat {} | grep -E -c "^[ATCG]{{{}}}{}[ATCG]{{4}}{}"'.format(read_file_name, prefix, barcode, reverse)