def get_duplicates_sequences_ids_by_organism_id(id_organism): """ get list of ids of proteins duplicates :param id_organism: id of the organism :type id_organism: int - required :return: list with the proteins ids :rtype: List((int, int)) """ list_duplicate_prots_ids = [] list_duplicate_sequence = Protein.get_duplicates_sequence_by_organism_id( id_organism) if len(list_duplicate_sequence) > 0: for sequence, start_point, end_point in list_duplicate_sequence: if start_point != None and end_point != None: sqlObj = _Protein_sql_new() sequence = sequence.replace("'", "") results = sqlObj.get_protein_id_by_sequence_location( sequence, start_point, end_point, id_organism) assert len(results) > 1 default_val = results[0][0] for element in results[1:]: list_duplicate_prots_ids.append( (default_val, element[0])) else: with open("proteins_none.txt", "a") as myfile: myfile.write((str(id_organism) + "," + sequence)) return list_duplicate_prots_ids
def get_protein_id_by_sequence_location(sequence, location_start, location_end, fk_id_organism): """ get id protein with a give sequence, location start and end in a Fk id organism :param sequence: id of the organism :param location_start: id of the organism :param location_end: id of the organism :param fk_id_organism: id of the organism :type sequence: int - required :type location_start: int - required :type location_end: int - required :type fk_id_organism: int - required :return: list with the proteins ids :rtype: List(int) """ list_ids = [] sqlObj = _Protein_sql_new() results = sqlObj.get_protein_id_by_sequence_location( sequence, location_start, location_end, fk_id_organism) for element in results: list_ids.append(element[0]) return list_ids
def create_protein_all_details_procedure(self, id_organism): """ Insert a Protein in the database if it doesn't yet exists and return it id WITHOUT ANY VERIFICATIONS The protein contains - accession number - designation (header) - proteic sequence - nucleic sequence - position start in the gene - position end in the gene - position start in the contig - position end in the contig - fk_contig :return: id of the protein :rtype int """ sqlObj = _Protein_sql_new() id_prot = sqlObj.insert_protein_all_info_return_id_procedure( self.id_accession, self.designation, self.sequence_prot, self.sequence_dna, self.start_point, self.end_point, self.start_point_cnt, self.end_point_cnt, self.fk_id_contig, id_organism) self.id_protein = id_prot return id_prot
def get_id_prot_by_designation(self): """ Return the id of the protein given its description :return: id of the protein :rtype int """ sqlObj = _Protein_sql_new() id_protein = sqlObj.get_protein_id_by_designation(self.designation) return id_protein
def remove_protein_by_its_id(id_protein): """ remove a protein given its id :param id_protein: id of the protein :type id_protein: int - required :return: prot_dom it removed :rtype: int """ sqlObj = _Protein_sql_new() id_couple = sqlObj.remove_protein_by_its_id(id_protein) return id_couple
def get_qty_proteins_by_organism_id(id_organism): """ get quantity of proteins given a organism id :param id_organism: id of the organism :type id_organism: int - required :return: quantity of proteins or -1 if unknown the organism :rtype: int """ sqlObj = _Protein_sql_new() id_couple = sqlObj.get_qty_proteins_by_organism_is(id_organism) return id_couple
def get_all_Proteins_not_in_temp_prot(self): """ return an array with all the Protein in the database which are not in temp_prots (not treated) :return: array of Protein :rtype: array(Protein) """ listOfProteins = [] sqlObj = _Protein_sql_new() results = sqlObj.select_all_proteins_not_in_temp_prot() for element in results: listOfProteins.append( Protein(element[0], element[1], element[2], element[3], element[4], element[5], element[6], element[7], element[8], element[9])) return listOfProteins
def get_all_Proteins(self): """ return an array with all the Protein in the database :return: array of Protein :rtype: array(Protein) """ listOfProteins = [] sqlObj = _Protein_sql_new() results = sqlObj.select_all_proteins_all_attributes() for element in results: listOfProteins.append( Protein(element[0], element[1], element[2], element[3], element[4], element[5], element[6], element[7], element[8], element[9])) return listOfProteins
def update_protein_contig(self): """ Update the data of the contig for a given protein id, it updates: - position start of the contig - position end of the contig - FK contig key :return: if of the updated protein :rtype int """ sqlObj = _Protein_sql_new() id_prot = sqlObj.set_contig_protein_by_id(self.id_protein, self.start_point_cnt, self.end_point_cnt, self.fk_id_contig) self.id_protein = id_prot return id_prot
def get_protein_by_id(id_protein): """ return a proteing given its ID :param id_protein: id of the protein :type id_protein: int - required :return: Protein :rtype: Protein """ sqlObj = _Protein_sql_new() results = sqlObj.select_protein_by_id(id_protein) protein_obj = Protein(results[0][0], results[0][1], results[0][2], results[0][3], results[0][4], results[0][5], results[0][6], results[0][7], results[0][8], results[0][9]) return protein_obj
def get_duplicates_sequence_by_organism_id(id_organism): """ get list of ids of proteins duplicates :param id_organism: id of the organism :type id_organism: int - required :return: list with the proteins ids :rtype: List((sequence, start_point, end_point)) """ list_values = [] sqlObj = _Protein_sql_new() results = sqlObj.get_duplicates_sequences_orga_id(id_organism) if len(results) > 0: for element in results: list_values.append((element[0], element[1], element[2])) return list_values else: return list_values
def create_protein(self): """ Insert a Protein in the database if it doesn't yet exists and return it id WITHOUT ANY VERIFICATIONS The protein contains - accession number - designation (header) - proteic sequence - nucleic sequence - position start in the gene - position end in the gene :return: id of the protein :rtype int """ sqlObj = _Protein_sql_new() id_prot = sqlObj.insert_protein_return_id( self.id_accession, self.designation, self.sequence_prot, self.sequence_dna, self.start_point, self.end_point) self.id_protein = id_prot return id_prot
def get_all_Proteins_limited(quantity_prots, offset_position): """ return an array with all the Protein in the database limited by an offset :param quantity_prots: number of proteins :param offset_position: where start the collect :return: array of Protein :rtype: array(Protein) """ listOfProteins = [] sqlObj = _Protein_sql_new() results = sqlObj.select_all_proteins_all_attributes_limited( quantity_prots, offset_position) for element in results: listOfProteins.append( Protein(element[0], element[1], element[2], element[3], element[4], element[5], element[6], element[7], element[8], element[9])) return listOfProteins
def get_all_Proteins_by_organism_id(organism_id): """ return an array with all the Protein by an organism id :param organism_id: id of the protein :type organism_id: int - required :return: array of Protein :rtype: array(Protein) """ listOfProteins = [] sqlObj = _Protein_sql_new() results = sqlObj.select_all_proteins_all_attributes_by_organism_id( organism_id) for element in results: listOfProteins.append( Protein(element[0], element[1], element[2], element[3], element[4], element[5], element[6], element[7], element[8], element[9])) return listOfProteins
def create_protein_secure(self): """ Insert a Protein in the database if it doesn't yet exists and return it id, verify if any other protein with the same accession number exists The protein contains - accession number - designation (header) - proteic sequence - nucleic sequence - position start in the gene - position end in the gene :return: id of the protein :rtype int """ sqlObj = _Protein_sql_new() id_protein = sqlObj.get_protein_id_by_accession(self.id_accession) if id_protein == -1: id_protein = self.create_protein() return id_protein else: print("It already exists a protein with the accession number n: " + str(self.id_accession)) return id_protein