예제 #1
0
 def read_data(self):
     try:
         data = UrlReader(self.URL).read()
     except Exception:
         data = self._read_cached_data()
         if data is None:
             return
     else:
         dir_path = local_cache_path(self.DIR_NAME)
         if not os.path.exists(dir_path):
             os.makedirs(dir_path)
         data.save(local_cache_path(self.FILE_NAME))
     self.set_source(data)
     self.commit()
예제 #2
0
    def _get_reader(self):
        """

        Returns
        -------
        FileFormat
        """
        if self.source == self.LOCAL_FILE:
            path = self.last_path()
            if path is None:
                return self.NoFileSelected
            if self.recent_paths and self.recent_paths[0].file_format:
                qname = self.recent_paths[0].file_format
                reader_class = class_from_qualified_name(qname)
                reader = reader_class(path)
            else:
                reader = FileFormat.get_reader(path)
            if self.recent_paths and self.recent_paths[0].sheet:
                reader.select_sheet(self.recent_paths[0].sheet)
            return reader
        else:
            url = self.url_combo.currentText().strip()
            if url:
                return UrlReader(url)
            else:
                return self.NoFileSelected
예제 #3
0
    def _get_reader(self):
        """

        Returns
        -------
        FileFormat
        """
        if self.source == self.LOCAL_FILE:
            path = self.last_path()
            if self.recent_paths and self.recent_paths[0].file_format:
                qname = self.recent_paths[0].file_format
                reader_class = class_from_qualified_name(qname)
                reader = reader_class(path)
                print('reader_class', reader_class)
            else:
                reader = FileFormat.get_reader(path)
                # Return reader instance that can be used to read the file
            if self.recent_paths and self.recent_paths[0].sheet:
                reader.select_sheet(self.recent_paths[0].sheet)

            return reader
        elif self.source == self.URL:
            url = self.url_combo.currentText().strip()
            if url:
                return UrlReader(url)
 def _get_reader(self):
     """
     Returns
     -------
     FileFormat
     """
     if self.source == self.LOCAL_FILE:
         path = self.last_path()
         if self.recent_paths and self.recent_paths[0].file_format:
             qname = self.recent_paths[0].file_format
             reader_class = class_from_qualified_name(qname)
             reader = reader_class(path)
         else:
             reader = FileFormat.get_reader(path)
         if self.recent_paths and self.recent_paths[0].sheet:
             reader.select_sheet(self.recent_paths[0].sheet)
         # set preprocessor here
         if hasattr(reader, "read_tile"):
             reader.set_preprocessor(self.preprocessor)
             if self.preprocessor is not None:
                 self.info_preproc.setText(
                     self._format_preproc_str(
                         self.preprocessor).lstrip("\n"))
         else:
             # only allow readers with tile-by-tile support to run.
             reader = None
         return reader
     elif self.source == self.URL:
         url = self.url_combo.currentText().strip()
         if url:
             return UrlReader(url)
예제 #5
0
 def _get_reader(self) -> FileFormat:
     if self.source == self.LOCAL_FILE:
         path = self.last_path()
         self.reader_combo.setEnabled(True)
         if self.recent_paths and self.recent_paths[0].file_format:
             qname = self.recent_paths[0].file_format
             qname_index = {
                 r.qualified_name(): i
                 for i, r in enumerate(self.available_readers)
             }
             if qname in qname_index:
                 self.reader_combo.setCurrentIndex(qname_index[qname] + 1)
             else:
                 # reader may be accessible, but not in self.available_readers
                 # (perhaps its code was moved)
                 self.reader_combo.addItem(qname)
                 self.reader_combo.setCurrentIndex(
                     len(self.reader_combo) - 1)
             try:
                 reader_class = class_from_qualified_name(qname)
             except Exception as ex:
                 raise MissingReaderException(
                     f'Can not find reader "{qname}"') from ex
             reader = reader_class(path)
         else:
             self.reader_combo.setCurrentIndex(0)
             reader = FileFormat.get_reader(path)
         if self.recent_paths and self.recent_paths[0].sheet:
             reader.select_sheet(self.recent_paths[0].sheet)
         return reader
     else:
         url = self.url_combo.currentText().strip()
         return UrlReader(url)
예제 #6
0
    def _get_reader(self):
        """

        Returns
        -------
        FileFormat
        """
        if self.source == self.LOCAL_FILE:
            reader = FileFormat.get_reader(self.last_path())
            if self.recent_paths and self.recent_paths[0].sheet:
                reader.select_sheet(self.recent_paths[0].sheet)
            return reader
        elif self.source == self.URL:
            return UrlReader(self.url_combo.currentText())
예제 #7
0
    def test_basic_file(self):
        data = UrlReader("https://datasets.biolab.si/core/titanic.tab").read()
        self.assertEqual(2201, len(data))

        data = UrlReader("https://datasets.biolab.si/core/grades.xlsx").read()
        self.assertEqual(16, len(data))
예제 #8
0
 def test_special_characters_with_query_and_fragment(self):
     path = "http://file.biolab.si/text-semantics/data/elektrotehniski-" \
            "vestnik-clanki/detektiranje-utrdb-v-šahu-.txt?a=1&b=2#c=3"
     self.assertRaises(OSError, UrlReader(path).read)
예제 #9
0
 def test_url_with_fragment(self):
     data = UrlReader(
         "https://datasets.biolab.si/core/grades.xlsx#tab=1").read()
     self.assertEqual(16, len(data))
예제 #10
0
 def test_base_url_with_query(self):
     data = UrlReader(
         "https://datasets.biolab.si/core/grades.xlsx?a=1&b=2").read()
     self.assertEqual(16, len(data))
예제 #11
0
 def test_special_characters(self):
     # TO-DO - replace this file with a more appropriate one (e.g. .csv)
     #  and change the assertion accordingly
     path = "http://file.biolab.si/text-semantics/data/elektrotehniski-" \
            "vestnik-clanki/detektiranje-utrdb-v-šahu-.txt"
     self.assertRaises(OSError, UrlReader(path).read)
예제 #12
0
 def test_zipped(self):
     """ Test zipped files with two extensions"""
     data = UrlReader(
         "http://datasets.biolab.si/core/philadelphia-crime.csv.xz").read()
     self.assertEqual(9666, len(data))
예제 #13
0
 def __init__(self, path, *args):
     CoreUrlReader.__init__(self, path)
     Reader.__init__(self, self.filename, *args)