def ReadFilesToDF(files, delim=None): """ ReadFilesToDF is a helper routine that takes the nominal dataframes (can be singletons) labels data index according to any found date index , or monotonically increasing sequence """ allR = [] for indexa, elem in enumerate(files): try: c0 = _read_csv(elem, error_bad_lines=False, header=None, delimiter=delim, engine='python') except _EmptyDataError as err: c0 = _DataFrame() c0[0] = [0] try: c0['date'] = _parse(elem, fuzzy=True) except ValueError as err: dateFound = FindDate(elem) if len(dateFound) == 0: c0['date'] = indexa else: c0['date'] = dateFound[0] allR.append(c0) merged = _concat(allR, ignore_index=True) merged.index = merged['date'] return allR, merged
def read_csv(filepath_or_buffer: FilePathOrBuffer, latitude: str = LATITUDE, longitude: str = LONGITUDE, datetime: str = DATETIME, traj_id: str = TRAJ_ID, type_: str = TYPE_PANDAS, n_partitions: int = 1, **kwargs): """ Reads a `csv` file and structures the data. Parameters ---------- filepath_or_buffer : str or path object or file-like object Any valid string path is acceptable. The string could be a URL. Valid URL schemes include http, ftp, s3, gs, and file. For file URLs, a host is expected. A local file could be: file://localhost/path/to/table.csv. If you want to pass in a path object, pandas accepts any os.PathLike. By file-like object, we refer to objects with a read() method, such as a file handle (e.g. via builtin open function) or StringIO. latitude : str, optional Represents the column name of feature latitude, by default 'lat' longitude : str, optional Represents the column name of feature longitude, by default 'lon' datetime : str, optional Represents the column name of feature datetime, by default 'datetime' traj_id : str, optional Represents the column name of feature id trajectory, by default 'id' type_ : str, optional Represents the type of the MoveDataFrame, by default 'pandas' n_partitions : int, optional Represents number of partitions for DaskMoveDataFrame, by default 1 **kwargs : Pandas read_csv arguments https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html?highlight=read_csv#pandas.read_csv Returns ------- MoveDataFrameAbstract subclass Trajectory data Examples -------- >>> from pymove.utils.trajectories import read_csv >>> move_df = read_csv('geolife_sample.csv') >>> move_df.head() lat lon datetime id 0 39.984094 116.319236 2008-10-23 05:53:05 1 1 39.984198 116.319322 2008-10-23 05:53:06 1 2 39.984224 116.319402 2008-10-23 05:53:11 1 3 39.984211 116.319389 2008-10-23 05:53:16 1 4 39.984217 116.319422 2008-10-23 05:53:21 1 >>> type(move_df) <class 'pymove.core.pandas.PandasMoveDataFrame'> """ data = _read_csv(filepath_or_buffer, **kwargs) return MoveDataFrame(data, latitude, longitude, datetime, traj_id, type_, n_partitions)
def read_csv(file_pth): """ read table from a CSV file """ tbl = _read_csv(file_pth) if IDX_KEY in keys_(tbl): tbl.set_index(IDX_KEY, inplace=True) else: tbl.index.rename(IDX_KEY, inplace=True) return tbl
def read_csv(filepath_or_buffer: FilePathOrBuffer, latitude: Optional[Text] = LATITUDE, longitude: Optional[Text] = LONGITUDE, datetime: Optional[Text] = DATETIME, traj_id: Optional[Text] = TRAJ_ID, type_: Optional[Text] = TYPE_PANDAS, n_partitions: Optional[int] = 1, **kwargs): """ Reads a .csv file and structures the data into the desired structure supported by PyMove. Parameters ---------- filepath_or_buffer : str or path object or file-like object Any valid string path is acceptable. The string could be a URL. Valid URL schemes include http, ftp, s3, gs, and file. For file URLs, a host is expected. A local file could be: file://localhost/path/to/table.csv. If you want to pass in a path object, pandas accepts any os.PathLike. By file-like object, we refer to objects with a read() method, such as a file handle (e.g. via builtin open function) or StringIO. latitude : str, optional Represents the column name of feature latitude, by default 'lat' longitude : str, optional Represents the column name of feature longitude, by default 'lon' datetime : str, optional Represents the column name of feature datetime, by default 'datetime' traj_id : str, optional Represents the column name of feature id trajectory, by default 'id' type_ : str, optional Represents the type of the MoveDataFrame, by default 'pandas' n_partitions : int, optional Represents number of partitions for DaskMoveDataFrame, by default 1 **kwargs : Pandas read_csv arguments https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html?highlight=read_csv#pandas.read_csv Returns ------- MoveDataFrameAbstract subclass Trajectory data """ data = _read_csv(filepath_or_buffer, **kwargs) return MoveDataFrame(data, latitude, longitude, datetime, traj_id, type_, n_partitions)
def __init__(self, filepath_or_buffer, delimiter=',', data_start=7, header_start=2, header_len=4, *args, **kws): try: with open(filepath_or_buffer, 'r') as f: self.__parse_xps_param(f, delimiter) except TypeError: self.__parse_xps_param(filepath_or_buffer, delimiter) filepath_or_buffer.seek(0) self.peak_data = _read_csv(filepath_or_buffer, skiprows=data_start, delimiter=delimiter) if type(filepath_or_buffer) is _StringIO: filepath_or_buffer.seek(0) hdr = _read_csv(filepath_or_buffer, skiprows=header_start, index_col=0, nrows=header_len, delimiter=delimiter) col_names = [col for col in hdr if 'unnamed' not in col.lower()] self.peak_param = hdr[col_names] self.peak_param = self.peak_param.transpose() for k in self.peak_param.keys(): if k in ['Lineshape', 'Name']: self.peak_param[k] = self.peak_param[k].astype(str) continue self.peak_param[k] = self.peak_param[k].astype('float64') self.peak_ids = col_names
def input_csv_read(fileName): dtype = {'input_state': "string", 'input_number_to_generate': int} data = pd._read_csv(fileName, dtype=dtype) parsedInput = data.values[0] return parsedInput