Beispiel #1
0
def create_accession_sets(genome_dict):
    """Generate set of unique and non-unique accessions.

    Input is a dictionary of pdm_utils genome objects."""
    accessions = []
    for id in genome_dict.keys():
        gnm = genome_dict[id]
        # In the MySQL databse, empty accession data is stored as ''.
        # There are no NULL accessions.
        if gnm.accession != "":
            accessions.append(gnm.accession)
    unique, duplicated = basic.identify_unique_items(accessions)
    return unique, duplicated
Beispiel #2
0
def create_accession_sets(genome_dict):
    """Generate set of unique and non-unique accessions.

    Input is a dictionary of pdm_utils genome objects."""
    l = []
    for id in genome_dict.keys():
        gnm = genome_dict[id]
        # In the MySQL databse, empty accession data is stored as ''.
        # There are no NULL accessions.
        if gnm.accession != "":
            l.append(gnm.accession)
    unique, duplicated = basic.identify_unique_items(l)

    if len(duplicated) > 0:
        print("There are duplicated accessions. Some data will not be "
              "retrieved from GenBank:")
        for duplicate in duplicated:
            print(duplicate)
        input("\n\nPress ENTER to continue.")

    return unique, duplicated
Beispiel #3
0
 def set_unique_cds_end_orient_ids(self):
     """Identify CDS features contain unique transcription end-orientation coordinates."""
     unique_id_tuples, duplicate_id_tuples = \
         basic.identify_unique_items(self._cds_end_orient_ids)
     self._cds_unique_end_orient_ids = set(unique_id_tuples)
     self._cds_duplicate_end_orient_ids = set(duplicate_id_tuples)
Beispiel #4
0
 def set_unique_cds_start_end_ids(self):
     """Identify CDS features contain unique start-end coordinates."""
     unique_id_tuples, duplicate_id_tuples = \
         basic.identify_unique_items(self._cds_start_end_ids)
     self._cds_unique_start_end_ids = set(unique_id_tuples)
     self._cds_duplicate_start_end_ids = set(duplicate_id_tuples)