def main(): try: debug=ast.literal_eval(sys.argv[1]) except IndexError: debug=True if (debug): print ("***************************************\n" "\t\t\t DEBUG \n" "***************************************\n") interaction_file = str(Path("Papers/1-s2.0-S009286741300439X-mmc1.txt")) log_dir = "Datafiles_Prepare/Logs/" tmp_dir = utils.make_tmp_dir("Datafiles_Prepare/tmp_dir", parents=True) organisms = ["Human"] for organism in organisms: JsonLog.set_filename( utils.filename_date_append(Path(log_dir) / Path("Mapping_the_Human_miRNA_" + organism + ".json"))) JsonLog.add_to_json('file name', interaction_file) JsonLog.add_to_json('paper', "Mapping the Human miRNA Interactome by CLASH Reveals Frequent Noncanonical Binding") JsonLog.add_to_json('Organism', organism) JsonLog.add_to_json('paper_url', "https://www.sciencedirect.com/science/article/pii/S009286741300439X") p = Pipeline(paper_name="Mapping_the_Human_miRNA", organism=organism, in_df=df_prepare(read_paper_data(interaction_file, debug)), tmp_dir=tmp_dir) p.run()
def main(): try: debug = ast.literal_eval(sys.argv[1]) except IndexError: debug = True if (debug): print("***************************************\n" "\t\t\t DEBUG \n" "***************************************\n") mouse_config = { "organism": "Mouse", "interaction_file": "Papers/ncomms9864-s2.xlsx" } human_config = { "organism": "Human", "interaction_file": "Papers/ncomms9864-s4.xlsx" } tmp_dir = utils.make_tmp_dir("Datafiles_Prepare/tmp_dir", parents=True) log_dir = "Datafiles_Prepare/Logs/" for cnfg in [mouse_config, human_config]: organism = cnfg["organism"] interaction_file = cnfg["interaction_file"] JsonLog.set_filename( utils.filename_date_append( Path(log_dir) / Path("Darnell_miRNA_target_chimeras_" + organism + ".json"))) JsonLog.add_to_json('file name', interaction_file) JsonLog.add_to_json( 'paper', "miRNA–target chimeras reveal miRNA 3-end pairing as a major determinant of Argonaute target specificity" ) JsonLog.add_to_json('Organism', organism) JsonLog.add_to_json('paper_url', "https://www.nature.com/articles/ncomms9864") org = Darnell_miRNA_target_chimeras(interaction_file, tmp_dir, organism, debug=debug) org.run() print("Pipeline start") p = Pipeline(paper_name="Darnell_miRNA_target_chimeras", organism=organism, in_df=org.prepare_for_pipeline(), tmp_dir=tmp_dir) p.run()
def main(): p = Path("Datafiles_Prepare/CSV") log_dir = "Datafiles_Prepare/Logs/" files = list(p.glob('**/*.csv')) run_list = [p for p in files if not p.match("*duplex*")] process_list = [] for fin in run_list: fout = utils.filename_suffix_append(fin, "_duplex") tmp_dir = utils.make_tmp_dir("Datafiles_Prepare/tmp_dir", parents=True) p = Process(target=seed_interaction_add, args=(fin, fout, tmp_dir)) p.start() process_list.append(p) print(f"start process {p.name} {fin}") for p in process_list: p.join()
def main(): try: debug = ast.literal_eval(sys.argv[1]) except IndexError: debug = True if (debug): print("***************************************\n" "\t\t\t DEBUG \n" "***************************************\n") interaction_file = str(Path("Papers/1-s2.0-S1097276516305214-mmc3.xlsx")) log_dir = "Datafiles_Prepare/Logs/" tmp_dir = utils.make_tmp_dir("Datafiles_Prepare/tmp_dir", parents=True) organisms = ["Celegans"] for organism in organisms: JsonLog.set_filename( utils.filename_date_append( Path(log_dir) / Path("Pairing_Beyond_Seed_" + organism + ".json"))) JsonLog.add_to_json('file name', interaction_file) JsonLog.add_to_json( 'paper', "Pairing beyond the Seed Supports MicroRNA Targeting Specificity") JsonLog.add_to_json('Organism', organism) JsonLog.add_to_json( 'paper_url', "https://www.sciencedirect.com/science/article/pii/S1097276516305214#mmc3" ) ce = Pairing_Beyond_Seed(input_file=interaction_file, organism=organism, tmp_dir=tmp_dir, debug=debug) ce.run() p = Pipeline(paper_name="Pairing_Beyond_Seed", organism=organism, in_df=ce.prepare_for_pipeline(), tmp_dir=tmp_dir) p.run()
def main(): try: debug = ast.literal_eval(sys.argv[1]) except IndexError: debug = True if (debug): print("***************************************\n" "\t\t\t DEBUG \n" "***************************************\n") interaction_file = str(Path("Papers/41598_2017_7880_MOESM4_ESM.csv")) log_dir = "Datafiles_Prepare/Logs/" tmp_dir = utils.make_tmp_dir("Datafiles_Prepare/tmp_dir", parents=True) organisms = ["Cow"] for organism in organisms: JsonLog.set_filename( utils.filename_date_append( Path(log_dir) / Path("Global_Mapping_Cattle_" + organism + ".json"))) JsonLog.add_to_json('file name', interaction_file) JsonLog.add_to_json( 'paper', "Global mapping of miRNA-target interactions in cattle (Bos taurus)" ) JsonLog.add_to_json('Organism', organism) JsonLog.add_to_json( 'paper_url', "https://www.nature.com/articles/s41598-017-07880-8#MOESM1") cow = Global_Mapping_Cattle(input_file=interaction_file, tmp_dir=tmp_dir, debug=debug) cow.run() p = Pipeline(paper_name="Global_Mapping_Cattle", organism=organism, in_df=cow.prepare_for_pipeline(), tmp_dir=tmp_dir) p.run()
def main(): try: debug = ast.literal_eval(sys.argv[1]) except IndexError: debug = True if (debug): print("***************************************\n" "\t\t\t DEBUG \n" "***************************************\n") interaction_file = str(Path("Papers/1-s2.0-S1097276514003566-mmc3.xls")) log_dir = "Datafiles_Prepare/Logs/" tmp_dir = utils.make_tmp_dir("Datafiles_Prepare/tmp_dir", parents=True) organisms = ["Celegans", "Human", "Mouse"] for organism in organisms: JsonLog.set_filename( utils.filename_date_append( Path(log_dir) / Path("Unambiguous_Identification_" + organism + ".json"))) JsonLog.add_to_json('file name', interaction_file) JsonLog.add_to_json( 'paper', "Unambiguous Identification of miRNA:Target Site Interactions by Different Types of Ligation Reactions" ) JsonLog.add_to_json('Organism', organism) JsonLog.add_to_json( 'paper_url', "https://www.sciencedirect.com/science/article/pii/S1097276514003566#app3" ) p = Pipeline(paper_name="Unambiguous_Identification", organism=organism, in_df=df_prepare( read_paper_data(interaction_file, organism, debug)), tmp_dir=tmp_dir) p.run()
def main(): input_dir = Path("Datafiles_Prepare/CSV") output_dir = Path("Features/CSV") log_dir = Path("Features/Logs") tmp_base = "Features/tmp_dir" files = list(input_dir.glob("*_duplex_*.csv")) process_list = [] for fin in files: tmp_dir = utils.make_tmp_dir(tmp_base, parents=True) fout = output_dir / f"{fin.stem}_feature.csv" if fout.exists(): continue flog = log_dir / f"{fin.stem}_feature.json" p = Process(target=worker, args=(fin, fout, tmp_dir)) p.start() process_list.append(p) print(f"start process {p.name} {fin}") for p in process_list: p.join()
def main(): input_dir = Path("Datafiles_Prepare/CSV") tmp_base = "Features/tmp_dir" files = list(input_dir.glob("*duplex*.csv")) files = [f for f in files if not f.match("*negative*")] print(files) process_list = [] for fin in files: tmp_dir = utils.make_tmp_dir(tmp_base, parents=True) organism = fin.stem.split("_")[0] print(organism) p = Process(target=worker, args=(organism, fin, tmp_dir)) p.start() process_list.append(p) print(f"start process {p.name} {fin}") for p in process_list: p.join()