Python PyCSVReaderの例、pyerna.coordinates.data.py_csv_reader.PyCSVReader Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_csvreader.py プロジェクト: hackyhacker/PyERNA

 def test_read_1file_oneline(self):
     tiny = np.array([1, 2, 3])
     with tempfile.NamedTemporaryFile(mode='wb', suffix='.dat', delete=False) as f:
         np.savetxt(f, tiny)
         f.close()
         reader = CSVReader(f.name, delimiters=" ")
         np.testing.assert_equal(reader.get_output()[0], np.atleast_2d(tiny).T)

コード例 #2

0

ファイルを表示

ファイル: test_csvreader.py プロジェクト: hackyhacker/PyERNA

    def test_with_kwargs(self):
        args = {'header': 27}

        reader = CSVReader(self.filename1, **args)

        output = reader.get_output()
        np.testing.assert_almost_equal(output[0], self.data)

コード例 #3

0

ファイルを表示

ファイル: test_csvreader.py プロジェクト: hackyhacker/PyERNA

 def test_newline_at_eof_with_header(self):
     with tempfile.NamedTemporaryFile(mode='w', delete=False) as f:
         f.write("#x y z\n1 2 3\n4 5 6\n\n")
         f.close()
         desired = np.genfromtxt(f.name, dtype=np.float32).reshape(-1, 3)
         reader = CSVReader(f.name)
         result = reader.get_output()[0]
         np.testing.assert_allclose(result, desired)

コード例 #4

0

ファイルを表示

ファイル: test_csvreader.py プロジェクト: hackyhacker/PyERNA

    def test_reset(self):
        reader = CSVReader((self.filename1, self.filename2))
        it = reader.iterator()

        data = [chunk for chunk in it]
        it.reset()
        data2 = [chunk for chunk in it]
        np.testing.assert_equal(data, data2)

コード例 #5

0

ファイルを表示

ファイル: test_csvreader.py プロジェクト: hackyhacker/PyERNA

 def test_holes_in_file(self):
     x = "1 2 3\n4 5 6\n7 8 9"
     desired = np.fromstring(x, sep=" ", dtype=np.float32).reshape(-1, 3)
     with tempfile.NamedTemporaryFile(mode='w', delete=False) as f:
         f.write(x)
         f.close()
         reader = CSVReader(f.name)
         result = reader.get_output()[0]
         np.testing.assert_allclose(result, desired)

コード例 #6

0

ファイルを表示

ファイル: test_csvreader.py プロジェクト: hackyhacker/PyERNA

    def test_read_1file_with_header(self):
        reader = CSVReader(self.file_with_header)
        self.assertEqual(reader.number_of_trajectories(), 1)
        self.assertEqual(reader.dimension(), self.nd)
        self.assertEqual(reader.n_frames_total(), self.nt)

        output = reader.get_output()

        np.testing.assert_almost_equal(output[0], self.data)

コード例 #7

0

ファイルを表示

ファイル: test_csvreader.py プロジェクト: hackyhacker/PyERNA

 def test_newline_at_eof_carriage_return(self):
     x = "1 2 3\r\n4 5 6\r\n"
     desired = np.fromstring(x, sep=" ", dtype=np.float32).reshape(-1, 3)
     with tempfile.NamedTemporaryFile(mode='w', delete=False) as f:
         f.write(x)
         f.close()
         reader = CSVReader(f.name)
         result = reader.get_output()[0]
         np.testing.assert_allclose(result, desired)

コード例 #8

0

ファイルを表示

ファイル: test_csvreader.py プロジェクト: hackyhacker/PyERNA

    def test_with_lag(self):
        reader = CSVReader(self.filename1)

        for t in [23, 7, 59]:
            chunks = []
            it = reader.iterator(stride=1, lag=t)
            with it:
                for _, _, Y in it:
                    chunks.append(Y)
            chunks = np.vstack(chunks)
            np.testing.assert_almost_equal(chunks, self.data[t:])

コード例 #9

0

ファイルを表示

ファイル: test_csvreader.py プロジェクト: hackyhacker/PyERNA

 def test_read_with_skipping_first_few_couple_lines_multiple_trajectoryfiles(self):
     for skip in [0, 3, 13]:
         r1 = CSVReader([self.filename1, self.filename2])
         out_with_skip = r1.get_output(skip=skip)
         r2 = CSVReader([self.filename1, self.filename2])
         out = r2.get_output()
         np.testing.assert_almost_equal(out_with_skip[0], out[0][skip::],
                                        err_msg="The first %s rows of the first file were skipped, but that did not "
                                                "match the rows with skip=0 and sliced by [%s::]" % (skip, skip))
         np.testing.assert_almost_equal(out_with_skip[1], out[1][skip::],
                                        err_msg="The first %s rows of the second file were skipped, but that did not"
                                                " match the rows with skip=0 and sliced by [%s::]" % (skip, skip))

コード例 #10

0

ファイルを表示

ファイル: test_csvreader.py プロジェクト: hackyhacker/PyERNA

 def test_read_with_skipping_first_few_couple_lines(self):
     for skip in [0, 3, 13]:
         # FIXME: opening the same file twice is not being liked by py27
         r1 = CSVReader(self.filename1, chunksize=30)
         out_with_skip = r1.get_output(skip=skip)[0]
         assert len(out_with_skip) == len(self.data[skip:])
         r2 = CSVReader(self.filename1, chunksize=30)
         self.maxDiff=None
         #self.assertDictEqual(r1.__dict__, r2.__dict__)
         out = r2.get_output()[0]
         np.testing.assert_almost_equal(out_with_skip, out[skip::],
                                        err_msg="The first %s rows were skipped, but that did not "
                                                "match the rows with skip=0 and sliced by [%s::]" % (skip, skip))

コード例 #11

0

ファイルを表示

ファイル: test_csvreader.py プロジェクト: hackyhacker/PyERNA

    def test_read_lagged_small_chunks(self):
        lag = 200
        reader = CSVReader(self.filename1, chunksize=30)

        lagged_data = self.data[lag:]

        lagged_chunks = []
        it = reader.iterator(lag=lag)
        with it:
            for _, _, Y in it:
                assert len(Y) > 0
                lagged_chunks.append(Y)

        lagged_chunks = np.vstack(lagged_chunks)

        np.testing.assert_almost_equal(lagged_chunks, lagged_data)

コード例 #12

0

ファイルを表示

 def test_csvreader(self):
     data = np.random.random((101, 3))
     fn = tempfile.mktemp()
     try:
         np.savetxt(fn, data)
         # calc offsets
         offsets = [0]
         with open(fn, PyCSVReader.DEFAULT_OPEN_MODE) as new_fh:
             while new_fh.readline():
                 offsets.append(new_fh.tell())
         reader = PyCSVReader(fn)
         assert reader.dimension() == 3
         trajinfo = reader._get_traj_info(fn)
         np.testing.assert_equal(offsets, trajinfo.offsets)
     finally:
         os.unlink(fn)

コード例 #13

0

ファイルを表示

ファイル: test_csvreader.py プロジェクト: hackyhacker/PyERNA

    def test_with_stride_and_lag_with_header(self):
        reader = CSVReader(self.file_with_header)

        for s in [2, 3, 7, 10]:
            for t in [1, 23, 7, 59]:
                chunks = []
                chunks_lag = []
                it = reader.iterator(stride=s, lag=t)
                with it:
                    for _, X, Y in it:
                        chunks.append(X)
                        chunks_lag.append(Y)
                chunks = np.vstack(chunks)
                chunks_lag = np.vstack(chunks_lag)
                actual_lagged = self.data[t::s]
                np.testing.assert_almost_equal(chunks, self.data[::s][0:len(actual_lagged)])
                np.testing.assert_almost_equal(chunks_lag, self.data[t::s],
                                               err_msg="output is not equal for"
                                                       " lag %i and stride %i" % (t, s))

コード例 #14

0

ファイルを表示

ファイル: test_csvreader.py プロジェクト: hackyhacker/PyERNA

    def test_compare_readline(self):
        data = np.arange(99*3).reshape(-1, 3)
        with tempfile.NamedTemporaryFile(delete=False) as f:
            fn = f.name
            np.savetxt(fn, data)
            # calc offsets
            reader = CSVReader(fn)
            assert reader.dimension() == 3
            trajinfo = reader._get_traj_info(fn)
            offset = [0]
            with open(fn, CSVReader.DEFAULT_OPEN_MODE) as fh2:
                while fh2.readline():
                    offset.append(fh2.tell())
                fh2.seek(0)
                np.testing.assert_equal(trajinfo.offsets, offset)
                for ii, off in enumerate(trajinfo.offsets):
                    fh2.seek(off)
                    line = fh2.readline()
                    fh2.seek(offset[ii])
                    line2 = fh2.readline()

                    self.assertEqual(line, line2, "differs at offset %i (%s != %s)" % (ii, off, offset[ii]))

コード例 #15

0

ファイルを表示

ファイル: test_csvreader.py プロジェクト: hackyhacker/PyERNA

 def test_use_cols(self):
     reader = CSVReader(self.filename1)
     cols = (0, 2)
     with reader.iterator(chunk=0, cols=cols, return_trajindex=False) as it:
         for x in it:
             np.testing.assert_equal(x, self.data[:, cols])

コード例 #16

0

ファイルを表示

ファイル: reader_utils.py プロジェクト: hackyhacker/PyERNA

def create_file_reader(input_files,
                       topology,
                       featurizer,
                       chunksize=None,
                       **kw):
    r"""
    Creates a (possibly featured) file reader by a number of input files and either a topology file or a featurizer.
    Parameters
    ----------
    :param input_files:
        A single input file or a list of input files.
    :param topology:
        A topology file. If given, the featurizer argument can be None.
    :param featurizer:
        A featurizer. If given, the topology file can be None.
    :param chunksize:
        The chunk size with which the corresponding reader gets initialized.
    :return: Returns the reader.
    """
    from pyerna.coordinates.data.numpy_filereader import NumPyFileReader
    from pyerna.coordinates.data.py_csv_reader import PyCSVReader
    from pyerna.coordinates.data import FeatureReader
    from pyerna.coordinates.data.fragmented_trajectory_reader import FragmentedTrajectoryReader

    # fragmented trajectories
    if (isinstance(input_files, (list, tuple)) and len(input_files) > 0
            and any(isinstance(item, (list, tuple)) for item in input_files)):
        return FragmentedTrajectoryReader(input_files, topology, chunksize,
                                          featurizer)

    # normal trajectories
    if (isinstance(input_files, str)
            or (isinstance(input_files, (list, tuple)) and
                (any(isinstance(item, str)
                     for item in input_files) or len(input_files) is 0))):
        reader = None
        # check: if single string create a one-element list
        if isinstance(input_files, str):
            input_list = [input_files]
        elif len(input_files) > 0 and all(
                isinstance(item, str) for item in input_files):
            input_list = input_files
        else:
            if len(input_files) is 0:
                raise ValueError("The passed input list should not be empty.")
            else:
                raise ValueError(
                    "The passed list did not exclusively contain strings or was a list of lists "
                    "(fragmented trajectory).")

        # TODO: this does not handle suffixes like .xyz.gz (rare)
        _, suffix = os.path.splitext(input_list[0])

        suffix = str(suffix)

        # check: do all files have the same file type? If not: raise ValueError.
        if all(item.endswith(suffix) for item in input_list):

            # do all the files exist? If not: Raise value error
            all_exist = True
            from six import StringIO
            err_msg = StringIO()
            for item in input_list:
                if not os.path.isfile(item):
                    err_msg.write('\n' if err_msg.tell() > 0 else "")
                    err_msg.write('File %s did not exist or was no file' %
                                  item)
                    all_exist = False
            if not all_exist:
                raise ValueError(
                    'Some of the given input files were directories'
                    ' or did not exist:\n%s' % err_msg.getvalue())
            featurizer_or_top_provided = featurizer is not None or topology is not None
            # we need to check for h5 first, because of mdtraj custom HDF5 traj format (which is deprecated).
            if suffix in ('.h5', '.hdf5') and not featurizer_or_top_provided:
                # This check is potentially expensive for lots of files, we also re-open the file twice (causing atime updates etc.)
                # So we simply require that no featurizer option is given.
                # and not all((_is_mdtraj_hdf5_file(f) for f in input_files)):
                from pyerna.coordinates.data.h5_reader import H5Reader
                reader = H5Reader(filenames=input_files,
                                  chunk_size=chunksize,
                                  **kw)
            # CASE 1.1: file types are MD files
            elif FeatureReader.supports_format(suffix):
                # check: do we either have a featurizer or a topology file name? If not: raise ValueError.
                # create a MD reader with file names and topology
                if not featurizer_or_top_provided:
                    raise ValueError(
                        'The input files were MD files which makes it mandatory to have either a '
                        'Featurizer or a topology file.')

                if suffix in ('.pdb', '.pdb.gz'):
                    raise ValueError(
                        'PyEMMA can not read PDB-fake-trajectories. '
                        'Please consider using a sane trajectory format (e.g. xtc, dcd).'
                    )

                reader = FeatureReader(input_list,
                                       featurizer=featurizer,
                                       topologyfile=topology,
                                       chunksize=chunksize)
            elif suffix in ('.npy', '.npz'):
                reader = NumPyFileReader(input_list, chunksize=chunksize)
            # otherwise we assume that given files are ascii tabulated data
            else:
                reader = PyCSVReader(input_list, chunksize=chunksize, **kw)
        else:
            raise ValueError(
                'Not all elements in the input list were of the type %s!' %
                suffix)
    else:
        raise ValueError(
            'Input "{}" was no string or list of strings.'.format(input_files))
    return reader

コード例 #17

0

ファイルを表示

ファイル: test_csvreader.py プロジェクト: hackyhacker/PyERNA

    def test_with_multiple_files(self):
        files = [self.filename1, self.file_with_header]
        reader = CSVReader(files)

        self.assertEqual(reader.number_of_trajectories(), len(files))

コード例 #18

0

ファイルを表示

ファイル: test_csvreader.py プロジェクト: hackyhacker/PyERNA

    def test_with_stride(self):
        reader = CSVReader(self.filename1)

        for s in [2, 3, 7, 10]:
            output = reader.get_output(stride=s)[0]
            np.testing.assert_almost_equal(output, self.data[::s], err_msg="stride=%s"%s)