def ReadFilesToDF(files, delim=None):
    """
	ReadFilesToDF is a helper routine that takes the nominal dataframes (can be singletons)
	labels data index according to any found date index , or monotonically increasing sequence
	"""
    allR = []
    for indexa, elem in enumerate(files):
        try:
            c0 = _read_csv(elem,
                           error_bad_lines=False,
                           header=None,
                           delimiter=delim,
                           engine='python')
        except _EmptyDataError as err:
            c0 = _DataFrame()
            c0[0] = [0]
        try:
            c0['date'] = _parse(elem, fuzzy=True)
        except ValueError as err:
            dateFound = FindDate(elem)
            if len(dateFound) == 0:
                c0['date'] = indexa
            else:
                c0['date'] = dateFound[0]
        allR.append(c0)
    merged = _concat(allR, ignore_index=True)
    merged.index = merged['date']
    return allR, merged
Example #2
0
def read_csv(filepath_or_buffer: FilePathOrBuffer,
             latitude: str = LATITUDE,
             longitude: str = LONGITUDE,
             datetime: str = DATETIME,
             traj_id: str = TRAJ_ID,
             type_: str = TYPE_PANDAS,
             n_partitions: int = 1,
             **kwargs):
    """
    Reads a `csv` file and structures the data.

    Parameters
    ----------
    filepath_or_buffer : str or path object or file-like object
        Any valid string path is acceptable. The string could be a URL.
        Valid URL schemes include http, ftp, s3, gs, and file.
        For file URLs, a host is expected.
        A local file could be: file://localhost/path/to/table.csv.
        If you want to pass in a path object, pandas accepts any os.PathLike.
        By file-like object, we refer to objects with a read() method,
        such as a file handle (e.g. via builtin open function) or StringIO.
    latitude : str, optional
        Represents the column name of feature latitude, by default 'lat'
    longitude : str, optional
        Represents the column name of feature longitude, by default 'lon'
    datetime : str, optional
        Represents the column name of feature datetime, by default 'datetime'
    traj_id : str, optional
        Represents the column name of feature id trajectory, by default 'id'
    type_ : str, optional
        Represents the type of the MoveDataFrame, by default 'pandas'
    n_partitions : int, optional
        Represents number of partitions for DaskMoveDataFrame, by default 1
    **kwargs : Pandas read_csv arguments
        https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html?highlight=read_csv#pandas.read_csv

    Returns
    -------
    MoveDataFrameAbstract subclass
        Trajectory data

    Examples
    --------
    >>> from pymove.utils.trajectories import read_csv
    >>> move_df = read_csv('geolife_sample.csv')
    >>> move_df.head()
              lat          lon              datetime  id
    0   39.984094   116.319236   2008-10-23 05:53:05   1
    1   39.984198   116.319322   2008-10-23 05:53:06   1
    2   39.984224   116.319402   2008-10-23 05:53:11   1
    3   39.984211   116.319389   2008-10-23 05:53:16   1
    4   39.984217   116.319422   2008-10-23 05:53:21   1
    >>> type(move_df)
    <class 'pymove.core.pandas.PandasMoveDataFrame'>
    """
    data = _read_csv(filepath_or_buffer, **kwargs)

    return MoveDataFrame(data, latitude, longitude, datetime, traj_id, type_,
                         n_partitions)
Example #3
0
def read_csv(file_pth):
    """ read table from a CSV file
    """
    tbl = _read_csv(file_pth)
    if IDX_KEY in keys_(tbl):
        tbl.set_index(IDX_KEY, inplace=True)
    else:
        tbl.index.rename(IDX_KEY, inplace=True)
    return tbl
Example #4
0
def read_csv(filepath_or_buffer: FilePathOrBuffer,
             latitude: Optional[Text] = LATITUDE,
             longitude: Optional[Text] = LONGITUDE,
             datetime: Optional[Text] = DATETIME,
             traj_id: Optional[Text] = TRAJ_ID,
             type_: Optional[Text] = TYPE_PANDAS,
             n_partitions: Optional[int] = 1,
             **kwargs):
    """
    Reads a .csv file and structures the data into the desired structure
    supported by PyMove.

    Parameters

    ----------
    filepath_or_buffer : str or path object or file-like object
        Any valid string path is acceptable. The string could be a URL.
        Valid URL schemes include http, ftp, s3, gs, and file.
        For file URLs, a host is expected.
        A local file could be: file://localhost/path/to/table.csv.
        If you want to pass in a path object, pandas accepts any os.PathLike.
        By file-like object, we refer to objects with a read() method,
        such as a file handle (e.g. via builtin open function) or StringIO.
    latitude : str, optional
        Represents the column name of feature latitude, by default 'lat'
    longitude : str, optional
        Represents the column name of feature longitude, by default 'lon'
    datetime : str, optional
        Represents the column name of feature datetime, by default 'datetime'
    traj_id : str, optional
        Represents the column name of feature id trajectory, by default 'id'
    type_ : str, optional
        Represents the type of the MoveDataFrame, by default 'pandas'
    n_partitions : int, optional
        Represents number of partitions for DaskMoveDataFrame, by default 1
    **kwargs : Pandas read_csv arguments
        https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html?highlight=read_csv#pandas.read_csv

    Returns
    -------
    MoveDataFrameAbstract subclass
        Trajectory data

    """

    data = _read_csv(filepath_or_buffer, **kwargs)

    return MoveDataFrame(data, latitude, longitude, datetime, traj_id, type_,
                         n_partitions)
Example #5
0
File: parser.py Project: ddkn/xps
    def __init__(self,
                 filepath_or_buffer,
                 delimiter=',',
                 data_start=7,
                 header_start=2,
                 header_len=4,
                 *args,
                 **kws):
        try:
            with open(filepath_or_buffer, 'r') as f:
                self.__parse_xps_param(f, delimiter)
        except TypeError:
            self.__parse_xps_param(filepath_or_buffer, delimiter)
            filepath_or_buffer.seek(0)

        self.peak_data = _read_csv(filepath_or_buffer,
                                   skiprows=data_start,
                                   delimiter=delimiter)

        if type(filepath_or_buffer) is _StringIO:
            filepath_or_buffer.seek(0)
        hdr = _read_csv(filepath_or_buffer,
                        skiprows=header_start,
                        index_col=0,
                        nrows=header_len,
                        delimiter=delimiter)

        col_names = [col for col in hdr if 'unnamed' not in col.lower()]
        self.peak_param = hdr[col_names]
        self.peak_param = self.peak_param.transpose()
        for k in self.peak_param.keys():
            if k in ['Lineshape', 'Name']:
                self.peak_param[k] = self.peak_param[k].astype(str)
                continue
            self.peak_param[k] = self.peak_param[k].astype('float64')
        self.peak_ids = col_names
def input_csv_read(fileName):
    dtype = {'input_state': "string", 'input_number_to_generate': int}
    data = pd._read_csv(fileName, dtype=dtype)
    parsedInput = data.values[0]
    return parsedInput