コード例 #1
0
    def get_available_organisms(self):
        available_organism = sorted(
            ((tax_id, taxonomy.name(tax_id)) for tax_id in taxonomy.common_taxids()), key=lambda x: x[1]
        )

        self.organisms = [tax_id[0] for tax_id in available_organism]
        self.organism_select_combobox.addItems([tax_id[1] for tax_id in available_organism])
コード例 #2
0
def upload_genesets():
    """ Builds the default gene sets and
    """

    genesetsfn = [
        go_gene_sets,
        kegg_gene_sets,
        # omim_gene_sets,  # stop supporting OMIM. Did not update files since 2011
        cytoband_gene_sets,
        reactome_gene_sets,
        dicty_mutant_gene_sets
    ]

    organisms = taxonomy.common_taxids()
    for fn in genesetsfn:
        for org in organisms:
            try:
                # print("Uploading ORG {} {}".format(org, fn))
                try:
                    genesets = fn(org).split_by_hierarchy()
                except AttributeError as e:
                    # print(e)
                    # genesets = fn().split_by_hierarchy() print(e)
                    continue

                for gs in genesets:
                    # print("registering {}".format(str(gs.common_hierarchy())))
                    register_serverfiles(gs)  # server files register(gs)
            except taxonomy.UnknownSpeciesIdentifier:
                print("Organism ontology not available %s" % org)
            except GeneSetException:
                print("Empty gene sets. %s" % org)
コード例 #3
0
def listAvailable():
    taxids = taxonomy.common_taxids()
    essential = [(taxonomy.name(taxid), 'gene_association.{}'.format(taxid))
                 for taxid in taxids
                 if (DOMAIN, 'gene_association.{}'.format(taxid)
                     ) in serverfiles.ServerFiles().listfiles(DOMAIN)]
    return dict(essential)
コード例 #4
0
    def test_uncommon_taxonomy(self):
        self.assertTrue(self.dog not in taxonomy.common_taxids())
        self.assertEqual(taxonomy.name(self.dog), 'Canis lupus familiaris')

        # not supported yet.
        self.assertIsNone(
            taxonomy.species_name_to_taxid('Canis lupus familiaris'))
        self.assertFalse(len(taxonomy.shortname(self.dog)))
コード例 #5
0
    def _get_available_organisms(self):
        available_organism = sorted([(tax_id, taxonomy.name(tax_id))
                                     for tax_id in taxonomy.common_taxids()],
                                    key=lambda x: x[1])

        self.organisms = [tax_id[0] for tax_id in available_organism]

        self.organismComboBox.addItems(
            [tax_id[1] for tax_id in available_organism])
コード例 #6
0
    def create_model(self):
        allkeys = set(self.allinfo_local)

        if self.allinfo_remote is not None:
            allkeys = allkeys | set(self.allinfo_remote)

        allkeys = sorted(allkeys)

        model = QStandardItemModel(self)
        model.setHorizontalHeaderLabels(self._header_labels)

        current_index = -1
        for i, file_path in enumerate(allkeys):
            data_info = self._parse_info(file_path)
            row = []

            for info_tag, header_setting in self.HEADER_SCHEMA:
                item = QStandardItem()

                try:
                    data = data_info.__getattribute__(info_tag)
                except AttributeError:
                    # unknown tag in JSON
                    data = ''

                # first column indicating cached data sets
                if info_tag == 'islocal':
                    item.setData(' ' if data else '', Qt.DisplayRole)
                    item.setData(data_info, Qt.UserRole)

                else:
                    # parse taxid to common name
                    if info_tag == 'taxid' and data in common_taxids():
                        data = shortname(data)[0].title()

                    if info_tag == 'tags':
                        data = ', '.join(data) if data else ''

                    item.setData(data, Qt.DisplayRole)

                # set icon to Target column
                if info_tag == 'target' and data:
                    item.setIcon(
                        Orange.widgets.data.owdatasets.variable_icon(data))

                row.append(item)
            model.appendRow(row)

            if os.path.join(*file_path) == self.selected_id:
                current_index = i

        return model, current_index
コード例 #7
0
    def test_common_taxonomy(self):
        self.assertGreater(len(taxonomy.common_taxids()), 0)

        self.assertEqual(taxonomy.name(self.human), 'H**o sapiens')
        self.assertEqual(taxonomy.name(self.dicty), 'Dictyostelium discoideum')

        self.assertEqual(taxonomy.species_name_to_taxid('H**o sapiens'),
                         self.human)
        self.assertEqual(
            taxonomy.species_name_to_taxid('Dictyostelium discoideum'),
            self.dicty)

        self.assertGreater(len(taxonomy.shortname(self.human)), 0)
        self.assertGreater(len(taxonomy.shortname(self.dicty)), 0)
コード例 #8
0
    mapper[MAP_LOCUS][gene.locus_tag].append(gene)
    mapper[MAP_GENE_IDS][gene.gene_id].append(gene)

    for gene_synonym in gene.synonyms:
        mapper[MAP_SYNONYMS][gene_synonym].append(gene)

    for source_id in gene.sources.values():
        mapper[MAP_SOURCES][source_id].append(gene)


print("Creating gene name mapper ...")

con = sqlite3.connect(db_path, timeout=15)
cursor = con.cursor()

for taxonomy_id in common_taxids():
    g_db = GeneInfoDB()
    gene_mapper = {
        MAP_GENE_IDS: defaultdict(list),
        MAP_SOURCES: defaultdict(list),
        MAP_SYMBOLS: defaultdict(list),
        MAP_SYNONYMS: defaultdict(list),
        MAP_LOCUS: defaultdict(list)
    }

    for record in g_db.select_gene_matcher_data(taxonomy_id):
        parse_gene_record(taxonomy_id, gene_mapper, record)

    with open(os.path.join(domain_path, MATCHER_FILENAME.format(taxonomy_id)),
              'wb') as file:
        pickle.dump(gene_mapper, file, protocol=pickle.HIGHEST_PROTOCOL)
コード例 #9
0
class FileUploadHelper(QDialog):

    # settings
    kegg_domain = 'KEGG'

    supported_domains = OrderedDict({
        'Gene Ontology': gene_ontology_domain,
        'Gene Sets': gene_sets_domain
    })

    supported_organisms = [
        common_taxid_to_name(tax_id) for tax_id in common_taxids()
    ]

    hierarchies = {
        'GO - Biological Process': ('GO', 'biological_process'),
        'GO - Molecular Function': ('GO', 'molecular_function'),
        'GO - Cellular Component': ('GO', 'cellular_component'),
        'KEGG - Pathways': ('KEGG', 'pathways'),
        'KEGG - Orthologs': ('KEGG', 'orthologs')
    }

    def __init__(self, parent=None):
        super(FileUploadHelper, self).__init__(
            parent, Qt.Window | Qt.WindowTitleHint | Qt.CustomizeWindowHint
            | Qt.WindowCloseButtonHint | Qt.WindowMaximizeButtonHint)
        self.setAttribute(Qt.WA_DeleteOnClose)
        self.setWindowTitle('Add new file')

        self.info_state = INFO_FILE_SCHEMA
        self.layout = QVBoxLayout(self)

        # domain selection combobox
        self.domain_selection = QComboBox()
        self.domain_selection.addItems(self.supported_domains.keys())
        self.domain_selection.currentIndexChanged.connect(
            self.__on_domain_selection)
        self.__create_selection_row('Domain: ', self.domain_selection)

        # domain selection combobox
        self.hierarchy_selection = QComboBox()
        self.hierarchy_selection.addItems(self.hierarchies.keys())
        self.layout.addWidget(self.hierarchy_selection,
                              alignment=Qt.AlignVCenter)
        self.__on_domain_selection()

        # select organism
        self.organism_selection = QComboBox()
        self.organism_selection.addItems(self.supported_organisms)
        self.__create_selection_row('Organism: ', self.organism_selection)

        # title
        self.line_edit_title = QLineEdit()
        self.__create_selection_row('Title: ', self.line_edit_title)

        # tags
        self.line_edit_tags = QLineEdit()
        self.__create_selection_row('Tags (comma-separated): ',
                                    self.line_edit_tags)

        # file selector
        self.file_info = QLabel()
        self.file_select_btn = QPushButton('Select File', self)
        self.file_select_btn.clicked.connect(self.__handle_file_selector)
        self.__create_selection_row(' ', self.file_select_btn)

        # add file info section
        self.layout.addWidget(self.file_info, alignment=Qt.AlignCenter)

        self.layout.addStretch(1)

        # Ok and Cancel buttons
        self.buttons = QDialogButtonBox(
            QDialogButtonBox.Ok | QDialogButtonBox.Cancel, Qt.Horizontal, self)
        self.layout.addWidget(self.buttons, alignment=Qt.AlignJustify)

        self.buttons.accepted.connect(self.__accept)
        self.buttons.rejected.connect(self.__close)

        # path to a selected file
        self.file_path = None

    def __on_domain_selection(self):
        selected = self.__get_selected_domain() == gene_sets_domain
        self.hierarchy_selection.setVisible(selected)

    def __get_selected_domain(self):
        domain_label = list(self.supported_domains.keys())[
            self.domain_selection.currentIndex()]
        return self.supported_domains[domain_label]

    def __get_selected_hier(self):
        hier_label = list(
            self.hierarchies.keys())[self.hierarchy_selection.currentIndex()]
        return self.hierarchies[hier_label]

    def __create_selection_row(self, label, widget):
        self.layout.addWidget(QLabel(label), alignment=Qt.AlignLeft)
        self.layout.addWidget(widget, alignment=Qt.AlignVCenter)

    def __accept(self):
        if self.file_path:
            self.info_state = self.__parse_selection()
            self.__move_to_serverfiles_folder(self.file_path)

            self.parent().initialize_files_view()
            self.close()

    def __close(self):
        self.close()

    def closeEvent(self, event):
        # clean-up
        self.parent()._dialog = None

    def __filename(self, domain, organism):
        """ Create filename based od domain name and organism.
        """

        if domain in self.supported_domains.values(
        ) and domain == gene_ontology_domain and organism:
            return FILENAME_ANNOTATION.format(organism)

        elif domain in self.supported_domains.values(
        ) and domain == gene_sets_domain and organism:
            return filename((self.__get_selected_hier()), organism)

    def __parse_selection(self):
        try:
            domain = self.__get_selected_domain()
            organism = taxname_to_taxid(self.supported_organisms[
                self.organism_selection.currentIndex()])
        except KeyError as e:
            raise e

        return {
            'domain': domain,
            'organism': organism,
            'filename': self.__filename(domain, organism),
            'title': self.line_edit_title.text(),
            'tags': self.line_edit_tags.text().split(','),
            'source': SOURCE_USER
        }

    def __move_to_serverfiles_folder(self, selected_file_path):
        domain_path = serverfiles.localpath(self.info_state['domain'])
        file_path = os.path.join(domain_path, self.info_state['filename'])
        create_folder(domain_path)

        try:
            copyfile(selected_file_path, file_path)
        except IOError as e:
            # TODO: handle error properly
            raise e

        # if copy successful create .info file
        create_info_file(file_path, **self.info_state)

    def __handle_file_selector(self):
        self.file_path = QFileDialog.getOpenFileName(self, 'Open File')[0]
        self.file_info.setText('Selected File: {}'.format(
            os.path.basename(self.file_path)))
コード例 #10
0
from orangecontrib.bioinformatics.ncbi.taxonomy.utils import Taxonomy

# columns indexes
# ftp://ftp.ncbi.nlm.nih.gov/gene/README under "gene_info" section
tax_id, gene_id, symbol, synonyms, db_refs, description = 0, 1, 2, 4, 5, 8
locus_tag, chromosome, map_location, type_of_gene, modification_date = 3, 6, 7, 9, 14
symbol_from_nomenclature_authority, full_name_from_nomenclature_authority = 10, 11
nomenclature_status, other_designations = 12, 13

domain_path = sf_local.localpath(DOMAIN)
temp_path = os.path.join(domain_path, sf_temp)
file_path = os.path.join(domain_path, FILENAME)

create_folder(domain_path)
create_folder(temp_path)
parent_tax_ids = common_taxids()

# we must include all strains of organism. Genes refer to specific strain and not to parent organism
tax_ids = []
tax_obj = Taxonomy()
for parent_id in parent_tax_ids:
    strains = tax_obj.get_all_strains(parent_id)
    tax_ids.append(parent_id)
    # print(parent_id, len(strains))
    if strains:
        [tax_ids.append(strain_id) for strain_id in strains]

init_table = """ 
 CREATE TABLE "gene_info" ( 
    tax_id INTEGER NOT NULL, 
    gene_id INTEGER NOT NULL UNIQUE, 
コード例 #11
0
    # read the information from the local file
    with open(localfile, 'rb') as f:
        gds_info, excluded = pickle.load(f, encoding='latin1')
        f.close()

except FileNotFoundError as e:
    print('{} file on the server not found!'.format(GDS_INFO))
    force_update = True

# if needed to refresh the data base
if force_update:
    gds_info, excluded = ({}, {})

# list of common organisms may have changed, rescan excluded list
excluded = dict([(id, taxid) for id, taxid in excluded.items()
                 if taxid not in taxonomy.common_taxids()])
excluded.update([(id, info["taxid"]) for id, info in gds_info.items()
                 if info["taxid"] not in taxonomy.common_taxids()])
gds_info = dict([(id, info) for id, info in gds_info.items()
                 if info["taxid"] in taxonomy.common_taxids()])

# get the list of GDS files from NCBI directory
print("Retrieving ftp directory ...")
ftp = ftplib.FTP(FTP_NCBI)
ftp.login()
ftp.cwd(NCBI_DIR)
dirlist = []
ftp.dir(dirlist.append)

m = re.compile("GDS[0-9]*")
gds_names = [m.search(d).group(0) for d in dirlist if m.search(d)]
コード例 #12
0
        'tax_id', 'GeneID', 'GO_ID', 'Evidence', 'Qualifier', 'GO_term'
        'PubMed', 'Category'
    ]
    data = gene2go.get(species, None)

    if data is not None:
        with open(f'data/go/{species}.tab', 'w') as fp:
            csv_writer = csv.writer(fp, delimiter='\t')
            csv_writer.writerow(header)
            csv_writer.writerows(data)


def gene_ontology(file_path: str) -> None:
    copy2(file_path, f'data/go/gene_ontology.obo')


if __name__ == "__main__":
    taxonomy_db = Taxonomy()
    supported_taxonomies = [[tax] + taxonomy_db.get_all_strains(tax)
                            for tax in common_taxids()]
    to_species = {
        tax: taxonomy_db.get_species(tax)
        for strains in supported_taxonomies for tax in strains
    }

    gene2go = load_gene2go(sys.argv[1])
    for tax in common_taxids():
        gene_annotations(tax)

    gene_ontology(sys.argv[2])
コード例 #13
0
 def setUp(self):
     self.common_ids = taxonomy.common_taxids()
     self.organisms = [(taxonomy.name(tax_id), tax_id)
                       for tax_id in self.common_ids]
     self.taxon = taxonomy.Taxonomy()
コード例 #14
0
with bz2.BZ2File(os.path.join(temp_path, FILENAME_ONTOLOGY), mode='w', compresslevel=9) as f_compressed:
    shutil.copyfileobj(open(os.path.join(domain_path, FILENAME_ONTOLOGY), 'rb'), f_compressed)

create_info_file(os.path.join(temp_path, FILENAME_ONTOLOGY),
                 domain=DOMAIN,
                 filename=FILENAME_ONTOLOGY,
                 source=SOURCE_SERVER,
                 title=ONTOLOGY_TITLE,
                 tags=ONTOLOGY_TAGS,
                 uncompressed=db_size,
                 compression='bz2')

# GENE ANNOTATIONS

tax_ids = common_taxids()
taxonomy = Taxonomy()
store_lines_by_taxid = defaultdict(list)


stream = urlopen(FTP_URL_ANNOTATIONS, timeout=30)
with open(os.path.join(domain_path, FTP_FILE_ANNOTATIONS), 'wb') as f:
    shutil.copyfileobj(stream, f)


with gzip.open(os.path.join(domain_path, FTP_FILE_ANNOTATIONS), 'rb') as gene2go:
    header = gene2go.readline()

    for line in gene2go:
        split_line = line.decode().split('\t')
コード例 #15
0
                    name=str(cell_type),
                    genes=set([str(gene) for gene in genes if gene != '?']),
                    hierarchy=('Marker Genes', file_name_to_hier[file_name]),
                    organism=tax_id,
                    link='')

                gene_sets.append(gs)

            for gs_group in GeneSets(gene_sets).split_by_hierarchy():
                hierarchy = gs_group.common_hierarchy()
                gs_group.to_gmt_file_format(
                    f'{data_path}/gene_sets/{filename(hierarchy, tax_id)}')


if __name__ == "__main__":
    for common_tax_id in taxonomy.common_taxids():
        reactome_gene_sets(common_tax_id)
        cytoband_gene_sets(common_tax_id)
        dicty_mutant_gene_sets(common_tax_id)

        try:
            kegg_gene_sets(common_tax_id)
        except taxonomy.utils.UnknownSpeciesIdentifier as e:
            # KEGG organism code not found
            pass
        try:
            go_gene_sets(common_tax_id)
        except FileNotFoundError as e:
            # Organism is not supported in Gene Ontology module
            pass