Exemple #1
0
    def Pipeline_Files(self):
        self.blast_db_fasta_biomart = str(
            Path("Biomart") / f"{self.organism}_biomart_3utr.fasta")
        self.blast_db_fasta_biomart_valid = str(
            self.tmp_dir / Path(self.organism + "_biomart_valid.fasta"))
        self.blast_db_biomart = str(self.tmp_dir / "blast_files" /
                                    "blastdb_biomart")

        self.blast_db_csv_ucsc = str(self.tmp_dir /
                                     Path(self.organism + "_ucsc.csv"))
        self.blast_db_fasta_ucsc_valid = str(
            self.tmp_dir / Path(self.organism + "_ucsc_valid.fasta"))
        self.blast_db_ucsc = str(self.tmp_dir / "blast_files" / "blastdb_ucsc")
        self.blast_tmp_result = str(self.tmp_dir / "tmp_blast_result.csv")
        self.blast_with_mRNA = str(self.tmp_dir /
                                   Path(self.organism + "_mRNA.csv"))
        self.blast_with_mRNA = utils.filename_date_append(self.blast_with_mRNA)
        self.final_output = str(
            "Datafiles_Prepare/CSV" /
            Path(self.organism + "_" + self.paper_name + "_Data.csv"))
        self.final_output = utils.filename_date_append(self.final_output)
        self.blast_no_unique = str("Datafiles_Prepare/Logs" /
                                   Path(self.organism + "_" + self.paper_name +
                                        "_Blast_Nonunique.fasta"))
        self.blast_no_unique = utils.filename_date_append(self.blast_no_unique)
Exemple #2
0
def main():
    try:
        debug=ast.literal_eval(sys.argv[1])
    except IndexError:
        debug=True

    if (debug):
        print ("***************************************\n"
               "\t\t\t DEBUG \n"
               "***************************************\n")

    interaction_file = str(Path("Papers/1-s2.0-S009286741300439X-mmc1.txt"))
    log_dir = "Datafiles_Prepare/Logs/"
    tmp_dir = utils.make_tmp_dir("Datafiles_Prepare/tmp_dir", parents=True)


    organisms = ["Human"]
    for organism in organisms:
        JsonLog.set_filename(
            utils.filename_date_append(Path(log_dir) / Path("Mapping_the_Human_miRNA_" + organism + ".json")))
        JsonLog.add_to_json('file name', interaction_file)
        JsonLog.add_to_json('paper',
                            "Mapping the Human miRNA Interactome by CLASH Reveals Frequent Noncanonical Binding")
        JsonLog.add_to_json('Organism', organism)
        JsonLog.add_to_json('paper_url', "https://www.sciencedirect.com/science/article/pii/S009286741300439X")
        p = Pipeline(paper_name="Mapping_the_Human_miRNA",
                     organism=organism,
                     in_df=df_prepare(read_paper_data(interaction_file, debug)),
                     tmp_dir=tmp_dir)

        p.run()
Exemple #3
0
    def file_formatting(self):
        in_df = pd.read_csv(self.blast_with_mRNA)
        in_df['Source'] = self.paper_name
        in_df['Organism'] = self.organism
        in_df['Creation_time'] = JsonLog.get_creation_time()

        # Take only the valid rows: Status=OK
        valid_rows = in_df['biomart_blast_status'] == "OK"
        JsonLog.add_to_json("Pipeline valid blast results", sum(valid_rows))
        JsonLog.add_to_json("Pipeline invalid blast results",
                            (in_df.shape[0] - sum(valid_rows)))
        in_df = in_df[valid_rows]

        #Remove miRNA with XXX/stars
        rows_without_XXX = in_df['miRNA sequence'].apply(
            lambda x: x.find('X') == -1)
        JsonLog.add_to_json("Pipeline valid miRNA_no_xxx",
                            sum(rows_without_XXX))
        JsonLog.add_to_json("Pipeline invalid miRNA (xxx)",
                            (in_df.shape[0] - sum(rows_without_XXX)))
        in_df = in_df[rows_without_XXX]

        # Remove miRNA with stars
        rows_without_stars = in_df['microRNA_name'].apply(
            lambda x: x.find('star') == -1)
        JsonLog.add_to_json("Pipeline valid miRNA_no_***",
                            sum(rows_without_stars))
        JsonLog.add_to_json("Pipeline invalid miRNA (star)",
                            (in_df.shape[0] - sum(rows_without_stars)))
        in_df = in_df[rows_without_stars]

        # Choose the necessary columns
        in_df_filter = in_df.filter([
            'Source', 'Organism', 'GI_ID', 'microRNA_name', 'miRNA sequence',
            'target sequence', 'number of reads', 'biomart_title',
            'biomart_sbjct_start', 'biomart_sbjct_end', 'biomart_full_mrna'
        ],
                                    axis=1)

        in_df_filter.rename(columns={
            'biomart_title': 'mRNA_name',
            'biomart_sbjct_start': 'mRNA_start',
            'biomart_sbjct_end': 'mRNA_end',
            'biomart_full_mrna': 'full_mrna'
        },
                            inplace=True)

        # reset the index
        in_df_filter.reset_index(drop=True, inplace=True)
        # self.log.append("miRNA statistics")
        # self.log.append(dict(in_df_filter['microRNA_name'].value_counts()))

        # save to file
        in_df_filter.to_csv(utils.filename_date_append(self.final_output))
def main():
    try:
        debug = ast.literal_eval(sys.argv[1])
    except IndexError:
        debug = True

    if (debug):
        print("***************************************\n"
              "\t\t\t DEBUG \n"
              "***************************************\n")

    mouse_config = {
        "organism": "Mouse",
        "interaction_file": "Papers/ncomms9864-s2.xlsx"
    }
    human_config = {
        "organism": "Human",
        "interaction_file": "Papers/ncomms9864-s4.xlsx"
    }

    tmp_dir = utils.make_tmp_dir("Datafiles_Prepare/tmp_dir", parents=True)
    log_dir = "Datafiles_Prepare/Logs/"

    for cnfg in [mouse_config, human_config]:
        organism = cnfg["organism"]
        interaction_file = cnfg["interaction_file"]

        JsonLog.set_filename(
            utils.filename_date_append(
                Path(log_dir) /
                Path("Darnell_miRNA_target_chimeras_" + organism + ".json")))
        JsonLog.add_to_json('file name', interaction_file)
        JsonLog.add_to_json(
            'paper',
            "miRNA–target chimeras reveal miRNA 3-end pairing as a major determinant of Argonaute target specificity"
        )
        JsonLog.add_to_json('Organism', organism)
        JsonLog.add_to_json('paper_url',
                            "https://www.nature.com/articles/ncomms9864")

        org = Darnell_miRNA_target_chimeras(interaction_file,
                                            tmp_dir,
                                            organism,
                                            debug=debug)
        org.run()

        print("Pipeline start")
        p = Pipeline(paper_name="Darnell_miRNA_target_chimeras",
                     organism=organism,
                     in_df=org.prepare_for_pipeline(),
                     tmp_dir=tmp_dir)

        p.run()
Exemple #5
0
def main():
    try:
        debug = ast.literal_eval(sys.argv[1])
    except IndexError:
        debug = True

    if (debug):
        print("***************************************\n"
              "\t\t\t DEBUG \n"
              "***************************************\n")

    interaction_file = str(Path("Papers/1-s2.0-S1097276516305214-mmc3.xlsx"))
    log_dir = "Datafiles_Prepare/Logs/"
    tmp_dir = utils.make_tmp_dir("Datafiles_Prepare/tmp_dir", parents=True)

    organisms = ["Celegans"]
    for organism in organisms:
        JsonLog.set_filename(
            utils.filename_date_append(
                Path(log_dir) /
                Path("Pairing_Beyond_Seed_" + organism + ".json")))
        JsonLog.add_to_json('file name', interaction_file)
        JsonLog.add_to_json(
            'paper',
            "Pairing beyond the Seed Supports MicroRNA Targeting Specificity")
        JsonLog.add_to_json('Organism', organism)
        JsonLog.add_to_json(
            'paper_url',
            "https://www.sciencedirect.com/science/article/pii/S1097276516305214#mmc3"
        )

        ce = Pairing_Beyond_Seed(input_file=interaction_file,
                                 organism=organism,
                                 tmp_dir=tmp_dir,
                                 debug=debug)
        ce.run()

        p = Pipeline(paper_name="Pairing_Beyond_Seed",
                     organism=organism,
                     in_df=ce.prepare_for_pipeline(),
                     tmp_dir=tmp_dir)
        p.run()
def main():
    try:
        debug = ast.literal_eval(sys.argv[1])
    except IndexError:
        debug = True

    if (debug):
        print("***************************************\n"
              "\t\t\t DEBUG \n"
              "***************************************\n")

    interaction_file = str(Path("Papers/41598_2017_7880_MOESM4_ESM.csv"))
    log_dir = "Datafiles_Prepare/Logs/"
    tmp_dir = utils.make_tmp_dir("Datafiles_Prepare/tmp_dir", parents=True)

    organisms = ["Cow"]
    for organism in organisms:
        JsonLog.set_filename(
            utils.filename_date_append(
                Path(log_dir) /
                Path("Global_Mapping_Cattle_" + organism + ".json")))
        JsonLog.add_to_json('file name', interaction_file)
        JsonLog.add_to_json(
            'paper',
            "Global mapping of miRNA-target interactions in cattle (Bos taurus)"
        )
        JsonLog.add_to_json('Organism', organism)
        JsonLog.add_to_json(
            'paper_url',
            "https://www.nature.com/articles/s41598-017-07880-8#MOESM1")

        cow = Global_Mapping_Cattle(input_file=interaction_file,
                                    tmp_dir=tmp_dir,
                                    debug=debug)

        cow.run()

        p = Pipeline(paper_name="Global_Mapping_Cattle",
                     organism=organism,
                     in_df=cow.prepare_for_pipeline(),
                     tmp_dir=tmp_dir)
        p.run()
def main():
    try:
        debug = ast.literal_eval(sys.argv[1])
    except IndexError:
        debug = True

    if (debug):
        print("***************************************\n"
              "\t\t\t DEBUG \n"
              "***************************************\n")

    interaction_file = str(Path("Papers/1-s2.0-S1097276514003566-mmc3.xls"))
    log_dir = "Datafiles_Prepare/Logs/"
    tmp_dir = utils.make_tmp_dir("Datafiles_Prepare/tmp_dir", parents=True)

    organisms = ["Celegans", "Human", "Mouse"]
    for organism in organisms:
        JsonLog.set_filename(
            utils.filename_date_append(
                Path(log_dir) /
                Path("Unambiguous_Identification_" + organism + ".json")))
        JsonLog.add_to_json('file name', interaction_file)
        JsonLog.add_to_json(
            'paper',
            "Unambiguous Identification of miRNA:Target Site Interactions by Different Types of Ligation Reactions"
        )
        JsonLog.add_to_json('Organism', organism)
        JsonLog.add_to_json(
            'paper_url',
            "https://www.sciencedirect.com/science/article/pii/S1097276514003566#app3"
        )
        p = Pipeline(paper_name="Unambiguous_Identification",
                     organism=organism,
                     in_df=df_prepare(
                         read_paper_data(interaction_file, organism, debug)),
                     tmp_dir=tmp_dir)

        p.run()