Ejemplo n.º 1
0
 def iupred_analysis(self):
     
     Timer.get_instance().step(" Start of Iupred analysis...")
     
     
     self.tool_path_input = Constants.PATH_HOME + PropertyManager.get_instance().get_property( DataConstants.TOOL_PATH_INPUT_PROPERTY, True)
     self.iupred_path_output = Constants.PATH_HOME + PropertyManager.get_instance().get_property( DataConstants.IUPRED_PATH_OUTPUT_PROPERTY,True)
     
         
     Iupred.global_iupred_analysis(self.tool_path_input,
                                   self.iupred_path_output)
     
     Timer.get_instance().step(" End of Iupred analysis")
Ejemplo n.º 2
0
 def iupred_motifs(self):
     
     Logger.get_instance().info( "        .....Start of IUPred motifs analysis.....\n")
     
     self.iupred_folder = self.path_home +  PropertyManager.get_instance().get_property( DataConstants.MOTIFS_IUPRED_FOLDER_PROPERTY,True)
     
     # Iupred Analysis at threshold value of 0.4
     
     Timer.get_instance().step(" Start of IUPred motifs analysis - threshold = 0.4 \n")
     
     self.threshold_1 = Constants.MOTIFS_THRESHOLD_1
     self.output_folder_1 = self.path_home +  PropertyManager.get_instance().get_property( DataConstants.MOTIFS_IUP_OUTPUT_FOLDER_1_PROPERTY, True)
     
     GlobalOverlapRegionAnalysis.iupred_overlap_analysis(self.protein_list,self.iupred_folder, self.output_folder_1, self.threshold_1,
                                                         self.motif_folder,self.domain_region_file)
     
     Timer.get_instance().step(" End of IUPred motifs analysis - threshold = 0.4 \n")
     
     
     # Iupred Analysis at threshold value of 0.5
     
     Timer.get_instance().step(" Start of IUPred motifs analysis - threshold = 0.5 \n")
     self.threshold_2 = Constants.MOTIFS_THRESHOLD_2
     self.output_folder_2 = self.path_home +  PropertyManager.get_instance().get_property( DataConstants.MOTIFS_IUP_OUTPUT_FOLDER_2_PROPERTY, True)
     
     GlobalOverlapRegionAnalysis.iupred_overlap_analysis(self.protein_list,self.iupred_folder, self.output_folder_2, self.threshold_2,
                                                         self.motif_folder,self.domain_region_file)
     
     Timer.get_instance().step(" End of IUPred motifs analysis - threshold = 0.5 \n")
     
     Logger.get_instance().info( "        .....End of IUPred motifs analysis.....\n")
Ejemplo n.º 3
0
 def whole_procedure():
         
     dataset_type = PropertyManager.get_instance().get_property( DataConstants.MOTIFS_DATASET_TYPE_PROPERTY, True)
     
     # start chrono
     Timer.get_instance().start_chrono()
     Logger.get_instance().info("        ........Start of " + dataset_type + " Motifs Analysis.....\n ")
     
     motifs = MotifsAnalysis()
     motifs.iupred_motifs()
     motifs.anchor_motifs()
     motifs.disordp_motifs()
     
     Timer.get_instance().stop_chrono(" End of " + dataset_type + " Motifs Analysis")
Ejemplo n.º 4
0
 def anchor_analysis(self):
     
     
     
     Timer.get_instance().step(" Start of Anchor analysis...")
     
     self.tool_path_input = Constants.PATH_HOME + PropertyManager.get_instance().get_property( DataConstants.TOOL_PATH_INPUT_PROPERTY, True)
     self.anchor_path_output = Constants.PATH_HOME + PropertyManager.get_instance().get_property( DataConstants.ANCHOR_PATH_OUTPUT_PROPERTY,True)
     self.motif_list_path = Constants.PATH_HOME + PropertyManager.get_instance().get_property( DataConstants.ANCHOR_MOTIF_PATH_PROPERTY, True)
     
     Anchor.global_anchor_analysis(self.motif_list_path,
                                   self.tool_path_input,
                                   self.anchor_path_output)
     
     Timer.get_instance().step(" End of Anchor analysis")
Ejemplo n.º 5
0
    def whole_procedure():

        # start chrono
        Timer.get_instance().start_chrono()
        Logger.get_instance().info("Start of the sequences extraction.....\n ")

        D = DownloadEnsemblSeq()

        #D.download_product_gene_seq()

        #D.make_dictionary()
        #D.get_longest_seq()
        #D.isoform_sequences()
        #D.merger_sequences()

        Timer.get_instance().stop_chrono(' End of the sequences extraction')
Ejemplo n.º 6
0
 def disordpbind_analysis(self):
     
     Timer.get_instance().step(" Start of DisoRDPbind output analysis.. ")
     
     self.path_home = Constants.PATH_HOME
     self.input_file = self.path_home + PropertyManager.get_instance().get_property( DataConstants.DISO_INPUT_FILE_PROPERTY, True)
     self.ouput_path = self.path_home + PropertyManager.get_instance().get_property( DataConstants.DISO_OUTPUT_FOLDER_PROPERTY, True)
     self.binding_partner = PropertyManager.get_instance().get_property( DataConstants.DISO_BINDING_PARTNER_PROPERTY, True)
     self.num_aa_diso = PropertyManager.get_instance().get_property( DataConstants.DISO_NUM_AA_PROPERTY, True)
     self.dataset_type = PropertyManager.get_instance().get_property( DataConstants.DISO_DATASET_TYPE_PROPERTY, True)
     
     
     
     DisoRDPbind.make_disordp_file(self.input_file, self.ouput_path, int(self.binding_partner), int(self.num_aa_diso), self.dataset_type)
     
     Timer.get_instance().step(" End of DisoRDPbind output analysis")
Ejemplo n.º 7
0
 def anchor_motifs(self):
     
     Logger.get_instance().info( "        .....Start of IUPred motifs analysis.....\n")
     
     self.anchor_folder = self.path_home +  PropertyManager.get_instance().get_property( DataConstants.MOTIFS_ANCHOR_FOLDER_PROPERTY, True)
     self.anchor_output_folder = self.path_home +  PropertyManager.get_instance().get_property( DataConstants.MOTIFS_ANCHOR_OUTPUT_FOLDER_PROPERTY, True)
     
     Timer.get_instance().step(" Start of ANCHOR motifs analysis \n")
     
     GlobalOverlapRegionAnalysis.anchor_overlap_analysis(self.protein_list,self.anchor_folder,self.anchor_output_folder,
                                                         self.motif_folder,self.domain_region_file)
     
      
     Timer.get_instance().step(" End of IUPred motifs analysis \n")
     
     Logger.get_instance().info( "        .....End of ANCHOR motifs analysis.....\n")
Ejemplo n.º 8
0
 def whole_procedure():
         
     
     # start chrono
     Timer.get_instance().start_chrono()
     Logger.get_instance().info("Start of Disorder Analysis.....\n ")
        
     disorder = DisorderAnalysis()
     #disorder.change_header()
     #disorder.split_dataset()
     #disorder.anchor_analysis()
     #disorder.iupred_analysis()
     #disorder.analysis_tools_output()
     #disorder.disordpbind_analysis()
     #disorder.particular_analysis()
         
     Timer.get_instance().stop_chrono(' End of Disorder Analysis')
Ejemplo n.º 9
0
    def disordp_motifs(self):
        
        Logger.get_instance().info( "        .....Start of DisoRDPbind motifs analysis.....\n")
        
        self.disordp_folder = self.path_home +  PropertyManager.get_instance().get_property( DataConstants.MOTIFS_DISORDP_FOLDER_PROPERTY, True)
        self.disordp_output_folder = self.path_home +  PropertyManager.get_instance().get_property( DataConstants.MOTIFS_DISORDP_OUTPUT_FOLDER_PROPERTY, True)
        self.filename =  PropertyManager.get_instance().get_property( DataConstants.MOTIFS_DISORDP_FILE_PROPERTY,True)
        
        Timer.get_instance().step(" Start of DisoRDPbind motifs analysis \n")
        
        GlobalOverlapRegionAnalysis.disordp_overlap_analysis(self.protein_list, self.disordp_folder, self.filename, self.motif_folder,
                                                             self.domain_region_file,self.disordp_output_folder)
        
        
        
        Timer.get_instance().step(" End of DisoRDPbind motifs analysis \n")

        Logger.get_instance().info( "        .....End of DisoRDPbind motifs analysis.....\n")
Ejemplo n.º 10
0
    def whole_procedure():

        # start chrono
        Timer.get_instance().start_chrono()
        Logger.get_instance().info(
            "Start of the creation of RBP dataset.....\n ")

        M = MakeDatasetRbp()

        #M.delet_append_file()

        #M.comparison_dataset()
        #M.creation_list()
        #M.connection_to_ensembl()
        #M.dictionary_identifier()
        #M.longest_sequence()
        #M.isoform_sequences()
        #M.merger_sequences()
        M.split_dataset()

        Timer.get_instance().stop_chrono(' End of the creation of RBP dataset')
Ejemplo n.º 11
0
 def analysis_tools_output(self):
     
     Timer.get_instance().step(" Start of tools analysis.. ")
     
     
     self.path_home = Constants.PATH_HOME
     self.input_path_iupred = self.path_home + PropertyManager.get_instance().get_property( DataConstants.ANALYSIS_INPUT_PATH_IUPRED_PROPERTY, True)
     self.output_path_analysis = self.path_home + PropertyManager.get_instance().get_property( DataConstants.ANALYSIS_OUTPUT_PATH_TOOLS_PROPERTY, True)
     
     self.threshold_1 =  PropertyManager.get_instance().get_property( DataConstants.ANALYSIS_THRESHOLD_1_PROPERTY, True)
     self.threshold_2 =  PropertyManager.get_instance().get_property( DataConstants.ANALYSIS_THRESHOLD_2_PROPERTY, True)
     self.number_aa_iupred =  PropertyManager.get_instance().get_property( DataConstants.ANALYSIS_AMINOACID_NUMBER_IUPRED_PROPERTY, True)
     self.dataset_type = PropertyManager.get_instance().get_property( DataConstants.ANALYSIS_DATASET_TYPE_PROPERTY, True)
     
     Iupred.make_iupred_file(self.input_path_iupred, self.output_path_analysis, float(self.threshold_1), float(self.threshold_2), int(self.number_aa_iupred), self.dataset_type)
     
     self.input_path_anchor = Constants.PATH_HOME + PropertyManager.get_instance().get_property( DataConstants.ANALYSIS_INPUT_PATH_ANCHOR_PROPERTY, True)
     self.num_aa_anchor = PropertyManager.get_instance().get_property( DataConstants.ANALYSIS_AMINOACID_NUMBER_ANCHOR_PROPERTY, True)
          
     Anchor.make_anchor_file(self.input_path_anchor, self.output_path_analysis, int(self.num_aa_anchor),self.dataset_type )
             
     
     Timer.get_instance().step(" End of tools analysis")
Ejemplo n.º 12
0
    def longest_sequence(self):

        Logger.get_instance().info(
            " Start of the selection of longest sequences of novel dataset \n")

        # Definition of arguments
        self.path_sequences = Constants.PATH_HOME + PropertyManager.get_instance(
        ).get_property(DataConstants.LONGEST_PATH_SEQUENCE_PROPERTY, True)
        self.file_sequences = self.path_sequences + PropertyManager.get_instance(
        ).get_property(DataConstants.LONGEST_PROT_FILE_SEQUENCES_2_PROPERTY,
                       True)
        self.path_dictionary_identifier = Constants.PATH_HOME + PropertyManager.get_instance(
        ).get_property(DataConstants.LONGEST_PATH_DICTIONARY_PROPERTY, True)
        self.file_dictionary = self.path_dictionary_identifier + PropertyManager.get_instance(
        ).get_property(DataConstants.LONGEST_DICTIONARY_NAME_FILE_PROPERTY,
                       True)

        self.path_output_longest = Constants.PATH_HOME + PropertyManager.get_instance(
        ).get_property(DataConstants.LONGEST_PATH_OUTPUT_PROPERTY, True)

        self.path_file_longest = self.path_output_longest + PropertyManager.get_instance(
        ).get_property(DataConstants.LONGEST_FILE_PROPERTY, True)
        self.path_file_isoform = self.path_output_longest + PropertyManager.get_instance(
        ).get_property(DataConstants.ISOFORM_FILE_PROPERTY, True)

        # Extraction the longest sequences from dataset 2 sequences (isoforms)
        LengthSeq.longest_seq(self.file_sequences, self.file_dictionary,
                              self.path_file_longest, self.path_file_isoform)

        # Timer step
        Timer.get_instance().step(
            " End of selection of the longest sequences in dataset 2  \n")

        Logger.get_instance().info(
            " End of selection of the longest sequences: \n \
two file have been generated one with longest sequences and the other one containing the isoform with same length  "
        )
Ejemplo n.º 13
0
 def particular_analysis(self):
     
     
     Timer.get_instance().step(" Start of tools analysis for specific protein ")
     
     self.path_home = Constants.PATH_HOME
     self.path_input_anchor_file = self.path_home +  PropertyManager.get_instance().get_property( DataConstants.SPECIFIC_INPUT_ANCHOR_FILE_PROPERTY, True)
     self.path_input_iupred_file = self.path_home +  PropertyManager.get_instance().get_property( DataConstants.SPECIFIC_INPUT_IUPRED_FILE_PROPERTY, True)
     self.path_input_disordp_file = self.path_home + PropertyManager.get_instance().get_property( DataConstants.SPECIFIC_INPUT_DISORDP_FILE_PROPERTY, True)
     self.path_input_reg_anchor = self.path_home + PropertyManager.get_instance().get_property( DataConstants.SPECIFIC_INPUT_REG_ANCHOR_FILE_PROPERTY, True)
     self.path_input_reg_iupred_1 = self.path_home + PropertyManager.get_instance().get_property( DataConstants.SPECIFIC_INPUT_REG_IUPRED_1_FILE_PROPERTY, True)
     self.path_input_reg_iupred_2 = self.path_home + PropertyManager.get_instance().get_property( DataConstants.SPECIFIC_INPUT_REG_IUPRED_2_FILE_PROPERTY, True)
     self.path_input_reg_diso = self.path_home + PropertyManager.get_instance().get_property( DataConstants.SPECIFIC_INPUT_REG_DISO_FILE_PROPERTY, True)
     self.input_files = self.path_home + PropertyManager.get_instance().get_property( DataConstants.SPECIFIC_INPUT_DIR_FILE_PROPERTY, True)
     self.list_namefiles = PropertyManager.get_instance().get_property( DataConstants.SPECIFIC_LIST_NAMEFILE_PROPERTY, True)
     self.path_output_dir = self.path_home + PropertyManager.get_instance().get_property( DataConstants.SPECIFIC_OUTPUT_DIR_PROPERTY, True)
     self.path_output_dir_diso = self.path_home + PropertyManager.get_instance().get_property( DataConstants.SPECIFIC_OUTPUT_DIR_DISO_PROPERTY, True)
     
     # This parameter represents the column of protein id in the classification files
     #
     # In Domain Class  files the column of protein id is the 2 ( that is 1 for python)
     # In RNA target files the column of protein id is the 1 (that is 0 for python)
     #
     
     #self.protein_column_rna =  PropertyManager.get_instance().get_property( DataConstants.SPECIFIC_PROTEIN_LIST_COLUMN_RNA_PROPERTY, True)
     self.protein_column_class =  PropertyManager.get_instance().get_property( DataConstants.SPECIFIC_PROTEIN_LIST_COLUMN_CLASS_PROPERTY, True)
     
     # region file
     anchor_table = FileParser.make_table(self.path_input_reg_anchor, skip=1)
     iupred_table_1 = FileParser.make_table(self.path_input_reg_iupred_1, skip=1)
     iupred_table_2 = FileParser.make_table(self.path_input_reg_iupred_2, skip=1)
     disordp_table = FileParser.make_table(self.path_input_reg_diso, skip=1)
     
     # table file (fraction)
     anchor_t = FileParser.make_table(self.path_input_anchor_file, skip=1)
     iupred_t = FileParser.make_table(self.path_input_iupred_file, skip=1)
     disordp_t = FileParser.make_table(self.path_input_disordp_file, skip=1)
     
     list_filenames = self.list_namefiles.split(',')
     
     for filename in list_filenames:
         feature = filename.split('.')[0]
         table_domain = FileParser.make_table(self.input_files + str(filename))
         list_prot = TableWrapper.get_column(table_domain,int(self.protein_column_class))
         prot_id_anchor = TableWrapper.get_column(anchor_table, 0)
         prot_id_iupred_1 = TableWrapper.get_column(iupred_table_1, 0)
         prot_id_iupred_2 = TableWrapper.get_column(iupred_table_2, 0)
         prot_id_disordp = TableWrapper.get_column(disordp_table, 0)
         
         prot_id_anchor_t = TableWrapper.get_column(anchor_t, 0)
         prot_id_iupred_t = TableWrapper.get_column(iupred_t, 0)
         prot_id_disordp_t = TableWrapper.get_column(disordp_t, 0)
         
         # region file
         new_table_anchor = [line for n, line in enumerate(anchor_table) if prot_id_anchor[n] in list_prot]
         new_table_iupred_1 = [line for n, line in enumerate(iupred_table_1) if prot_id_iupred_1[n] in list_prot]
         new_table_iupred_2 = [line for n, line in enumerate(iupred_table_2) if prot_id_iupred_2[n] in list_prot]
         new_table_disordp = [line for n, line in enumerate(disordp_table) if prot_id_disordp[n] in list_prot]
         anchor_output_file_path = self.path_output_dir + feature + '_AnchorRegion.txt'
         iupred1_output_file_path = self.path_output_dir + feature + '_IUPredRegion_0.4.txt'
         iupred2_output_file_path = self.path_output_dir + feature + '_IUPredRegion_0.5.txt'
         disordp_output_file_path = self.path_output_dir_diso + feature + '_DisoRDPRegion.txt'
         
         # Table file (fraction)
         new_table_a = [line for n, line in enumerate(anchor_t) if prot_id_anchor_t[n] in list_prot]
         new_table_i = [line for n, line in enumerate(iupred_t) if prot_id_iupred_t[n] in list_prot]
         new_table_d = [line for n, line in enumerate(disordp_t) if prot_id_disordp_t[n] in list_prot]
         anchor_output_table = self.path_output_dir + feature + '_AnchorTable.txt'
         iupred_output_table = self.path_output_dir + feature + '_IUPredTable_0.4_0.5.txt'
         disordp_output_table = self.path_output_dir_diso + feature + '_DisoRDPTable.txt'
         
         
         # file writing
         
         # Region file
         FileWriter.write_table(anchor_output_file_path, new_table_anchor)
         FileWriter.write_table(iupred1_output_file_path, new_table_iupred_1)
         FileWriter.write_table(iupred2_output_file_path, new_table_iupred_2)
         FileWriter.write_table(disordp_output_file_path, new_table_disordp)
         
         # Table file
         FileWriter.write_table(anchor_output_table, new_table_a)
         FileWriter.write_table(iupred_output_table, new_table_i)
         FileWriter.write_table(disordp_output_table, new_table_d)
     
     Timer.get_instance().step(" End of tools analysis for specific protein ")
Ejemplo n.º 14
0
    def connection_to_ensembl(self):

        Logger.get_instance().info(" Connection to Ensembl: Starting...\n")

        # DATASET 1
        # =============================================

        # Collection of sequences for dataset 1
        Logger.get_instance().info(" Dataset 1 : Extraction of sequences...\n")

        # Timer step
        Timer.get_instance().step(" Start of sequences extraction \n")

        # Definition of Ensembl list_get_seq arguments

        self.path_home = Constants.PATH_HOME
        self.list_path = Constants.PATH_HOME + PropertyManager.get_instance(
        ).get_property(DataConstants.DATASET_INPUT_PATH_PROPERTY, True)

        self.gene_list_1 = PropertyManager.get_instance().get_property(
            DataConstants.LIST_FILE_GENE_DATASET_1_PROPERTY, True)
        self.protein_list = PropertyManager.get_instance().get_property(
            DataConstants.LIST_FILE_PROTEIN_DATASET_1_PROPERTY, True)

        self.ensembl_gene_list_1_path = self.list_path + self.gene_list_1
        self.ensembl_protein_list_1_path = self.list_path + self.protein_list
        self.type_query1_ensembl = PropertyManager.get_instance().get_property(
            DataConstants.ENSEMBL_TYPE_QUERY_DATASET_1_PROPERTY, True)

        self.ensembl_path_output = Constants.PATH_HOME + PropertyManager.get_instance(
        ).get_property(DataConstants.ENSEMBL_OUTPUT_PATH_SEQUENCE_PROPERTY,
                       True)
        self.ensembl_output_dataset1 = self.ensembl_path_output + PropertyManager.get_instance(
        ).get_property(DataConstants.ENSEMBL_FILE_SEQUENCES_1_PROPERTY)

        # Calling Ensembl.list_get_seq
        Ensembl.list_get_seq(
            self.ensembl_gene_list_1_path,
            int(self.type_query1_ensembl),
            path_protein_list=self.ensembl_protein_list_1_path,
            path_output=self.ensembl_output_dataset1)

        # Timer step
        Timer.get_instance().step(" End of Dataset 1 Sequences Extraction\n")

        Logger.get_instance().info(
            " Extraction of sequences for the dataset 1 has been completed \n\n"
        )

        # END DATASET 1
        # =====================================================

        # DATASET 2
        # =====================================================

        # Collection of sequences for dataset 2
        Logger.get_instance().info(
            " Dataset 2 : Extraction of sequences ....\n")

        # Definition of Ensembl list_get_seq arguments

        self.ensembl_input_list_2 = Constants.PATH_HOME + PropertyManager.get_instance(
        ).get_property(DataConstants.DATASET_OUTPUT_PROPERTY, True)
        self.gene_list_2 = Constants.FILE_DIFF
        self.ensembl_gene_list_2_path = self.ensembl_input_list_2 + self.gene_list_2
        self.type_query2_ensembl = PropertyManager.get_instance().get_property(
            DataConstants.ENSEMBL_TYPE_QUERY_DATASET_2_PROPERTY, True)

        self.ensembl_path_output = Constants.PATH_HOME + PropertyManager.get_instance(
        ).get_property(DataConstants.ENSEMBL_OUTPUT_PATH_SEQUENCE_PROPERTY,
                       True)
        self.ensembl_output_dataset2 = self.ensembl_path_output + PropertyManager.get_instance(
        ).get_property(DataConstants.ENSEMBL_FILE_SEQUENCES_2_PROPERTY, True)

        # Calling Ensembl.list_get_seq
        Ensembl.list_get_seq(self.ensembl_gene_list_2_path,
                             int(self.type_query2_ensembl),
                             path_protein_list=None,
                             path_output=self.ensembl_output_dataset2)

        # Timer step
        Timer.get_instance().step(" End of Dataset 2 Sequences Extraction\n")

        Logger.get_instance().info(
            " Extraction of sequences for the dataset 2 has been completed \n\n"
        )

        # END DATASET 2
        # =====================================================

        Logger.get_instance().info(
            " The sequences file of dataset 1 and the novel gene in dataset 2 \
have been created in the following path  \n" + self.ensembl_path_output)