def generate_rna_lib_profiling_results(configs_json: str, rna_items_json: str, output_file: str): r""" Generate RNA Lib "profiling" results file. It should be called after "ShapeMapper 2.x" finishes the run. Parameters ---------- configs_json: str The "RNA Lib" configs, in `string` format rna_items_json: str The RNA Lib Item object output_file: str The "file path" of data output. Returns ------- """ # Parse the "Python Object" configs: RnaLibConfig = json.loads(configs_json, object_hook=as_python_object) rna_items: List[LibraryItem] = json.loads(rna_items_json, object_hook=as_python_object) # updated_rna_items = [] for rna_item in rna_items: # rna_id = rna_item.rna_id barcode = rna_item.barcode # Load "Profile" data shape_profile = get_shape2_path('profile', configs.working_folder, rna_id, barcode.barcode) for nt_profile in io.parse(shape_profile, "shape-profile"): # nt_position = nt_profile.nt_position rna_item.profile_list.append(nt_profile) rna_item.profile_dict[nt_position] = nt_profile # Load "Shape Reactivity" data shape_reactivity = get_shape2_path('shape', configs.working_folder, rna_id, barcode.barcode) for nt_reactivity in io.parse(shape_reactivity, "shape-reactivity"): # nt_position = nt_reactivity.nt_position rna_item.shape_reactivity_list.append(nt_reactivity) rna_item.shape_reactivity_dict[nt_position] = nt_reactivity updated_rna_items.append(rna_item) # Build RNA Library object library = RnaLibrary() library.load_meta(configs) # library.rna_items = updated_rna_items # Output data_str = json.dumps(library, cls=PythonObjectEncoder) FileUtils.save_file(output_file, data_str)
def parse_bprna_annotation(structure_content: str) -> Tuple[str, SecondaryStructure]: # use an intermediate tmp file because bpRNA needs it out_tmp = open('tmp.dbn', 'w') out_tmp.write(structure_content + '\n') out_tmp.close() # subprocess.call(["bpRNA.pl", "tmp.dbn"], stdout=subprocess.PIPE) # in_tmp = open("tmp.st", 'r') annotation = in_tmp.read() in_tmp.close() # Also parse the bpRNA contents with open("tmp.st") as handle: for secondary_structure in io.parse(handle, "bp-rna"): # return annotation, secondary_structure
def load(logger, config, bprna_folder_path, editing_level_file_path): # # Only load "bpRNA" file bprna_files = [ f for f in os.listdir(bprna_folder_path) if os.path.isfile(os.path.join(bprna_folder_path, f)) and os.path.splitext(f)[1] == '.st' ] # bpRNA structures bprna_structure_dict: Dict[str, SecondaryStructure] = dict() for file in bprna_files: # file_path = os.path.join(bprna_folder_path, file) with open(file_path) as handle: for secondary_structure in io.parse(handle, "bp-rna"): # chromosome_id = secondary_structure.comment bprna_structure_dict[chromosome_id] = secondary_structure # return bprna_files, bprna_structure_dict
# ---------------------------------- # region Pipeline - Prep # # # The tool "PyPPL" cannot accept "python object" as input, we will need to convert it to # "string" and then convert it back while using it. # Load RNA Items if not configs['rna_lib_file']: raise ValueError('"RNA Lib" file load error. Please double check. ') rna_lib_items = [] # A list of "RNA Lib items", each of which is in "python object string" format. # It is used to pass via "Channel". rna_lib_item_object_json_list = [] for rna_item in io.parse(configs['rna_lib_file'], "rna-lib-def"): rna_lib_items.append(rna_item) # Convert each item object to "json string" rna_lib_item_object_json_list.append( json.dumps(rna_item, cls=PythonObjectEncoder)) # Convert the objects to "string" and pass it as "argument" configs_object_json = json.dumps(configs, cls=PythonObjectEncoder) # A "python object string" format of a list, which includes a list of "RNA Lib items". # It is used to passed as "whole" rna_lib_items_list_json = json.dumps(rna_lib_items, cls=PythonObjectEncoder) # endregion # ---------------------------------- # region Pipeline - AfterQC
# ---------------------------------- # region Prep if not os.path.isabs(output_file_path): cwd = os.getcwd() output_file_path = os.path.join(cwd, output_file_path) # endregion # ---------------------------------------------------------------- # region RNA Lib Items # rna_lib_items: List[LibraryItem] = [] for rna_item in io.parse(rna_lib_def_file_path, "rna-lib-def"): rna_lib_items.append(rna_item) # endregion # ---------------------------------- # region QC qc_results = [] # os.chdir(dataset_folder_path) # barcode = 'CGCGGTTGT' # reverse = 'ACAACCGCG' # cmd = 'cat {} | grep -E -c "^[ATCG]{{{}}}{}[ATCG]{{4}}{}"'.format(read_file_name, prefix, barcode, reverse)