def list_all(**kwargs): """ Returns available gene sets from the server files repository. :param kwargs: * *organism* (``str``) -- Taxonomy id (NCBI taxonomy database) :rtype: :obj:`list` of (hierarchy, organism) Example -------- The available gene set collection can be listed with: >>> list_all(organism='10090') """ organism = kwargs.get("organism", None) all_available = { filename_parse(f_name) for domain, f_name in serverfiles.ServerFiles().listfiles(DOMAIN) + serverfiles.listfiles(DOMAIN) } if organism: return [hier for hier, org in all_available if org == organism] else: return all_available
def listAvailable(): taxids = taxonomy.common_taxids() essential = [(taxonomy.name(taxid), 'gene_association.{}'.format(taxid)) for taxid in taxids if (DOMAIN, 'gene_association.{}'.format(taxid) ) in serverfiles.ServerFiles().listfiles(DOMAIN)] return dict(essential)
def list_all(**kwargs): """ Returns available gene sets from the server files repository: a list of (hierarchy, organism) """ organism = kwargs.get("organism", None) all_available = [ filename_parse(f_name) for domain, f_name in serverfiles.ServerFiles().listfiles(DOMAIN) ] if organism: return [(hier, org) for hier, org in all_available if org == organism] else: return all_available
def _load_data(self): self.Warning.using_local_files.clear() found_sources = {} try: found_sources.update( serverfiles.ServerFiles().allinfo(serverfiles_domain)) except requests.exceptions.ConnectionError: found_sources.update(serverfiles.allinfo(serverfiles_domain)) self.Warning.using_local_files() self.available_db_sources = { item.get('title').split(': ')[-1]: item for item in found_sources.values() } if self.available_db_sources: file_name = self.available_db_sources[ self.selected_db_source]['filename'] try: serverfiles.update(serverfiles_domain, file_name) except requests.exceptions.ConnectionError: # try to update file. Ignore network errors. pass try: file_path = serverfiles.localpath_download( serverfiles_domain, file_name) except requests.exceptions.ConnectionError as err: # Unexpected error. raise err data = Table(file_path) # enforce order old_domain = data.domain new_domain = Domain( [], metas=[ old_domain['Organism'], old_domain['Name'], old_domain['Entrez ID'], old_domain['Cell Type'], old_domain['Function'], old_domain['Reference'], old_domain['URL'], ], ) data = data.transform(new_domain) self.data = data
def retrieveFilesList(advance=lambda: None): """ Retrieve and return serverfiles.allinfo for all domains. """ import requests.exceptions advance() try: serverInfo = serverfiles.ServerFiles().allinfo() except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e: raise ConnectionError advance() return serverInfo
def _load_data(self) -> None: """ Collect available data sources (marker genes data sets). """ self.Warning.using_local_files.clear() found_sources = {} try: found_sources.update(serverfiles.ServerFiles().allinfo(SERVER_FILES_DOMAIN)) except requests.exceptions.ConnectionError: found_sources.update(serverfiles.allinfo(SERVER_FILES_DOMAIN)) self.Warning.using_local_files() self.available_sources = {item.get('title').split(': ')[-1]: item for item in found_sources.values()}
def get_available_db_sources(): found_sources = {} try: found_sources.update( serverfiles.ServerFiles().allinfo(serverfiles_domain)) except ConnectionError: raise ConnectionError( 'Can not connect to {}. Using only local files.'.format( serverfiles.server_url)) finally: found_sources.update(serverfiles.allinfo(serverfiles_domain)) return { item.get('title').split(': ')[-1]: item for item in found_sources.values() }
def evaluate_files_state(progress_callback): progress_callback.emit() files = [] # fetch remote info try: server_info = serverfiles.ServerFiles().allinfo() except (Timeout, ConnectionError) as e: raise e progress_callback.emit() # fetch local info local_info = serverfiles.allinfo() all_info = set(local_info.keys()).union(server_info.keys()) for domain, file_name in sorted(all_info): files.append( FileState(domain, file_name, server_info.get((domain, file_name), None), local_info.get((domain, file_name), None))) progress_callback.emit() return files
def __init__(self, parent=None): super().__init__(self, parent) self.input_data = None self.ref_data = None self.ontology = None self.annotations = None self.loaded_annotation_code = None self.treeStructRootKey = None self.probFunctions = [statistics.Binomial(), statistics.Hypergeometric()] self.selectedTerms = [] self.selectionChanging = 0 self.__state = State.Ready self.__scheduletimer = QTimer(self, singleShot=True) self.__scheduletimer.timeout.connect(self.__update) ############# # GUI ############# self.tabs = gui.tabWidget(self.controlArea) # Input tab self.inputTab = gui.createTabPage(self.tabs, "Input") box = gui.widgetBox(self.inputTab, "Info") self.infoLabel = gui.widgetLabel(box, "No data on input\n") gui.button(box, self, "Ontology/Annotation Info", callback=self.ShowInfo, tooltip="Show information on loaded ontology and annotations") self.referenceRadioBox = gui.radioButtonsInBox( self.inputTab, self, "useReferenceDataset", ["Entire genome", "Reference set (input)"], tooltips=["Use entire genome for reference", "Use genes from Referece Examples input signal as reference"], box="Reference", callback=self.__invalidate) self.referenceRadioBox.buttons[1].setDisabled(True) gui.radioButtonsInBox( self.inputTab, self, "aspectIndex", ["Biological process", "Cellular component", "Molecular function"], box="Aspect", callback=self.__invalidate) # Filter tab self.filterTab = gui.createTabPage(self.tabs, "Filter") box = gui.widgetBox(self.filterTab, "Filter GO Term Nodes") gui.checkBox(box, self, "filterByNumOfInstances", "Genes", callback=self.FilterAndDisplayGraph, tooltip="Filter by number of input genes mapped to a term") ibox = gui.indentedBox(box) gui.spin(ibox, self, 'minNumOfInstances', 1, 100, step=1, label='#:', labelWidth=15, callback=self.FilterAndDisplayGraph, callbackOnReturn=True, tooltip="Min. number of input genes mapped to a term") gui.checkBox(box, self, "filterByPValue_nofdr", "p-value", callback=self.FilterAndDisplayGraph, tooltip="Filter by term p-value") gui.doubleSpin(gui.indentedBox(box), self, 'maxPValue_nofdr', 1e-8, 1, step=1e-8, label='p:', labelWidth=15, callback=self.FilterAndDisplayGraph, callbackOnReturn=True, tooltip="Max term p-value") # use filterByPValue for FDR, as it was the default in prior versions gui.checkBox(box, self, "filterByPValue", "FDR", callback=self.FilterAndDisplayGraph, tooltip="Filter by term FDR") gui.doubleSpin(gui.indentedBox(box), self, 'maxPValue', 1e-8, 1, step=1e-8, label='p:', labelWidth=15, callback=self.FilterAndDisplayGraph, callbackOnReturn=True, tooltip="Max term p-value") box = gui.widgetBox(box, "Significance test") gui.radioButtonsInBox(box, self, "probFunc", ["Binomial", "Hypergeometric"], tooltips=["Use binomial distribution test", "Use hypergeometric distribution test"], callback=self.__invalidate) # TODO: only update the p values box = gui.widgetBox(self.filterTab, "Evidence codes in annotation", addSpace=True) self.evidenceCheckBoxDict = {} for etype in go.evidenceTypesOrdered: ecb = QCheckBox( etype, toolTip=go.evidenceTypes[etype], checked=self.useEvidenceType[etype]) ecb.toggled.connect(self.__on_evidenceChanged) box.layout().addWidget(ecb) self.evidenceCheckBoxDict[etype] = ecb # Select tab self.selectTab = gui.createTabPage(self.tabs, "Select") box = gui.radioButtonsInBox( self.selectTab, self, "selectionDirectAnnotation", ["Directly or Indirectly", "Directly"], box="Annotated genes", callback=self.ExampleSelection) box = gui.widgetBox(self.selectTab, "Output", addSpace=True) gui.radioButtonsInBox( box, self, "selectionDisjoint", btnLabels=["All selected genes", "Term-specific genes", "Common term genes"], tooltips=["Outputs genes annotated to all selected GO terms", "Outputs genes that appear in only one of selected GO terms", "Outputs genes common to all selected GO terms"], callback=self.ExampleSelection) # ListView for DAG, and table for significant GOIDs self.DAGcolumns = ['GO term', 'Cluster', 'Reference', 'p-value', 'FDR', 'Genes', 'Enrichment'] self.splitter = QSplitter(Qt.Vertical, self.mainArea) self.mainArea.layout().addWidget(self.splitter) # list view self.listView = GOTreeWidget(self.splitter) self.listView.setSelectionMode(QTreeView.ExtendedSelection) self.listView.setAllColumnsShowFocus(1) self.listView.setColumnCount(len(self.DAGcolumns)) self.listView.setHeaderLabels(self.DAGcolumns) self.listView.header().setSectionsClickable(True) self.listView.header().setSortIndicatorShown(True) self.listView.header().setSortIndicator(self.DAGcolumns.index('p-value'), Qt.AscendingOrder) self.listView.setSortingEnabled(True) self.listView.setItemDelegateForColumn( 6, EnrichmentColumnItemDelegate(self)) self.listView.setRootIsDecorated(True) self.listView.itemSelectionChanged.connect(self.ViewSelectionChanged) # table of significant GO terms self.sigTerms = QTreeWidget(self.splitter) self.sigTerms.setColumnCount(len(self.DAGcolumns)) self.sigTerms.setHeaderLabels(self.DAGcolumns) self.sigTerms.setSortingEnabled(True) self.sigTerms.setSelectionMode(QTreeView.ExtendedSelection) self.sigTerms.header().setSortIndicator(self.DAGcolumns.index('p-value'), Qt.AscendingOrder) self.sigTerms.setItemDelegateForColumn( 6, EnrichmentColumnItemDelegate(self)) self.sigTerms.itemSelectionChanged.connect(self.TableSelectionChanged) self.sigTableTermsSorted = [] self.graph = {} self.originalGraph = None self.inputTab.layout().addStretch(1) self.filterTab.layout().addStretch(1) self.selectTab.layout().addStretch(1) class AnnotationSlot(SimpleNamespace): taxid = ... # type: str name = ... # type: str filename = ... # type:str @staticmethod def parse_tax_id(f_name): return f_name.split('.')[1] try: remote_files = serverfiles.ServerFiles().listfiles(DOMAIN) except (ConnectTimeout, RequestException, ConnectionError): # TODO: Warn user about failed connection to the remote server remote_files = [] self.available_annotations = [ AnnotationSlot( taxid=AnnotationSlot.parse_tax_id(annotation_file), name=taxonomy.common_taxid_to_name(AnnotationSlot.parse_tax_id(annotation_file)), filename=FILENAME_ANNOTATION.format(AnnotationSlot.parse_tax_id(annotation_file)) ) for _, annotation_file in set(remote_files + serverfiles.listfiles(DOMAIN)) if annotation_file != FILENAME_ONTOLOGY ] self._executor = ThreadExecutor()