def _get_reader(self): """ Returns ------- FileFormat """ if self.source == self.LOCAL_FILE: path = self.last_path() if path is None: return self.NoFileSelected if self.recent_paths and self.recent_paths[0].file_format: qname = self.recent_paths[0].file_format reader_class = class_from_qualified_name(qname) reader = reader_class(path) else: reader = FileFormat.get_reader(path) if self.recent_paths and self.recent_paths[0].sheet: reader.select_sheet(self.recent_paths[0].sheet) return reader else: url = self.url_combo.currentText().strip() if url: return UrlReader(url) else: return self.NoFileSelected
def _get_reader(self) -> FileFormat: if self.source == self.LOCAL_FILE: path = self.last_path() self.reader_combo.setEnabled(True) if self.recent_paths and self.recent_paths[0].file_format: qname = self.recent_paths[0].file_format qname_index = { r.qualified_name(): i for i, r in enumerate(self.available_readers) } if qname in qname_index: self.reader_combo.setCurrentIndex(qname_index[qname] + 1) else: # reader may be accessible, but not in self.available_readers # (perhaps its code was moved) self.reader_combo.addItem(qname) self.reader_combo.setCurrentIndex( len(self.reader_combo) - 1) try: reader_class = class_from_qualified_name(qname) except Exception as ex: raise MissingReaderException( f'Can not find reader "{qname}"') from ex reader = reader_class(path) else: self.reader_combo.setCurrentIndex(0) reader = FileFormat.get_reader(path) if self.recent_paths and self.recent_paths[0].sheet: reader.select_sheet(self.recent_paths[0].sheet) return reader else: url = self.url_combo.currentText().strip() return UrlReader(url)
def _get_reader(self): """ Returns ------- FileFormat """ if self.source == self.LOCAL_FILE: path = self.last_path() if self.recent_paths and self.recent_paths[0].file_format: qname = self.recent_paths[0].file_format reader_class = class_from_qualified_name(qname) reader = reader_class(path) print('reader_class', reader_class) else: reader = FileFormat.get_reader(path) # Return reader instance that can be used to read the file if self.recent_paths and self.recent_paths[0].sheet: reader.select_sheet(self.recent_paths[0].sheet) return reader elif self.source == self.URL: url = self.url_combo.currentText().strip() if url: return UrlReader(url)
def _get_reader(self): """ Returns ------- FileFormat """ if self.source == self.LOCAL_FILE: path = self.last_path() if self.recent_paths and self.recent_paths[0].file_format: qname = self.recent_paths[0].file_format reader_class = class_from_qualified_name(qname) reader = reader_class(path) else: reader = FileFormat.get_reader(path) if self.recent_paths and self.recent_paths[0].sheet: reader.select_sheet(self.recent_paths[0].sheet) # set preprocessor here if hasattr(reader, "read_tile"): reader.set_preprocessor(self.preprocessor) if self.preprocessor is not None: self.info_preproc.setText( self._format_preproc_str( self.preprocessor).lstrip("\n")) else: # only allow readers with tile-by-tile support to run. reader = None return reader elif self.source == self.URL: url = self.url_combo.currentText().strip() if url: return UrlReader(url)
def read_data(self): try: data = UrlReader(self.URL).read() except Exception: data = self._read_cached_data() if data is None: return else: dir_path = local_cache_path(self.DIR_NAME) if not os.path.exists(dir_path): os.makedirs(dir_path) data.save(local_cache_path(self.FILE_NAME)) self.set_source(data) self.commit()
def _get_reader(self): """ Returns ------- FileFormat """ if self.source == self.LOCAL_FILE: reader = FileFormat.get_reader(self.last_path()) if self.recent_paths and self.recent_paths[0].sheet: reader.select_sheet(self.recent_paths[0].sheet) return reader elif self.source == self.URL: return UrlReader(self.url_combo.currentText())
def test_basic_file(self): data = UrlReader("https://datasets.biolab.si/core/titanic.tab").read() self.assertEqual(2201, len(data)) data = UrlReader("https://datasets.biolab.si/core/grades.xlsx").read() self.assertEqual(16, len(data))
def test_special_characters_with_query_and_fragment(self): path = "http://file.biolab.si/text-semantics/data/elektrotehniski-" \ "vestnik-clanki/detektiranje-utrdb-v-šahu-.txt?a=1&b=2#c=3" self.assertRaises(OSError, UrlReader(path).read)
def test_url_with_fragment(self): data = UrlReader( "https://datasets.biolab.si/core/grades.xlsx#tab=1").read() self.assertEqual(16, len(data))
def test_base_url_with_query(self): data = UrlReader( "https://datasets.biolab.si/core/grades.xlsx?a=1&b=2").read() self.assertEqual(16, len(data))
def test_special_characters(self): # TO-DO - replace this file with a more appropriate one (e.g. .csv) # and change the assertion accordingly path = "http://file.biolab.si/text-semantics/data/elektrotehniski-" \ "vestnik-clanki/detektiranje-utrdb-v-šahu-.txt" self.assertRaises(OSError, UrlReader(path).read)
def test_zipped(self): """ Test zipped files with two extensions""" data = UrlReader( "http://datasets.biolab.si/core/philadelphia-crime.csv.xz").read() self.assertEqual(9666, len(data))