Beispiel #1
0
Datei: csv.py Projekt: phue/limix
def read(filename, sep=None, header=True, verbose=True):
    """
    Read a CSV file.

    Parameters
    ----------
    filename : str
        Path to a CSV file.
    sep : str
        Separator. ``None`` triggers auto-detection. Defaults to ``None``.
    header : bool
        ``True`` for file with a header; ``False`` otherwise. Defaults
        to ``True``.
    verbose : bool
        `True` for progress information; `False` otherwise.

    Returns
    -------
    data : dask dataframes

    Examples
    --------
    .. doctest::

        >>> from limix.io.csv import read
        >>> from limix import file_example
        >>>
        >>> with file_example("data.csv") as filepath:
        ...     df = read(filepath, verbose=False)
        ...     print(df)  # doctest: +FLOAT_CMP
           pheno   attr1 attr2 attr3
        0    sex  string    10     a
        1   size   float    -3     b
        2  force     int     f     c
    """
    from dask.dataframe import read_csv as dask_read_csv
    from pandas import read_csv as pandas_read_csv
    from .._display import session_line

    if sep is None:
        sep = _infer_separator(filename)

    header = 0 if header else None

    with session_line("Reading {}... ".format(filename), disable=not verbose):

        if _is_large_file(filename):
            df = dask_read_csv(filename, sep=sep, header=header)
        else:
            df = pandas_read_csv(filename, sep=sep, header=header)

    if len(df.columns) > 0:
        if df.columns[0] == "Unnamed: 0":
            df = df.set_index("Unnamed: 0")
            df.index.name = None

    return df
Beispiel #2
0
    def read_csv(cls, filename, sampling_rate=1.0, *args, **kwargs):
        """Read comma-separated file.

        Parameters
        ----------
        filename : str
            Filename for the csv file

        Returns
        -------
        eeg_run : EEGRun
            EEGRun dataframe with read data.

        """
        return cls(pandas_read_csv(filename, *args, **kwargs),
                   sampling_rate=sampling_rate)
Beispiel #3
0
    def read_csv(cls, filename, sampling_rate=None, *args, **kwargs):
        """Read comma-separated file.

        Parameters
        ----------
        filename : str
            Filename for the csv file

        Returns
        -------
        eeg_run : EEGAuxRun
            EEGAuxRun dataframe with read data.

        """
        return EEGAuxRun(pandas_read_csv(filename, *args, **kwargs),
                         sampling_rate=sampling_rate)
Beispiel #4
0
    def read_csv(cls, filename, sampling_rate=None, *args, **kwargs):
        """Read comma-separated file.

        Parameters
        ----------
        filename : str
            Filename for the csv file

        Returns
        -------
        eeg_run : EEGElectrodeRun
            EEGElectrodeRun dataframe with read data.

        """
        # TODO: This will not instance in derived class.
        return EEGElectrodeRun(pandas_read_csv(filename, *args, **kwargs),
                               sampling_rate=sampling_rate)
Beispiel #5
0
    def read_csv(cls, filename, sampling_rate=None, *args, **kwargs):
        """Read comma-separated file.

        Parameters
        ----------
        filename : str
            Filename for the csv file

        Returns
        -------
        eeg_run : EEGRun
            EEGRun dataframe with read data.

        """
        # TODO: This will not instance in derived class.
        return EEGRun(pandas_read_csv(filename, *args, **kwargs),
                      sampling_rate=sampling_rate)
Beispiel #6
0
def gameInfoDownloadForAllPlayers(restoreMainPath):
    """
    // 文件保存主目录 restoreMainPath
    // 一旦发生异常,则捕获并更新infoOfAllPlayers到本地
    """
    """
    // 读取infoOfAllPlayers,若本地不存在,则新建并初始化
    """
    try:
        if (os_path_exists(restoreMainPath + "\infoOfAllPlayers.csv")):
            infoOfAllPlayers = pandas_read_csv(restoreMainPath +
                                               "\infoOfAllPlayers.csv")
            print("已读取本地球员基本信息")
        else:
            infoOfAllPlayers = filterDataForAllPlayers()
            infoOfAllPlayers["downloaded"] = "No"
            print("创建本地球员基本信息")

        for ii in range(0, len(infoOfAllPlayers)):
            restoreFilePathOfPlayer = restoreMainPath + "\\" + infoOfAllPlayers[
                "playerName"][ii]
            if (infoOfAllPlayers["downloaded"][ii] == "No"):
                if (os_path_exists(restoreFilePathOfPlayer)):
                    shutil_rmtree(restoreFilePathOfPlayer)
                    print("{0}信息不完整,删除重建".format(
                        infoOfAllPlayers["playerName"][ii]))
                if (os_mkdir(restoreFilePathOfPlayer) == False):
                    print("文件夹{0}创建不成功".format(restoreFilePathOfPlayer))
                    raise Exception()
                """
                // 获取球员所有赛季的统计信息链接页面
                """
                print("开始获取{0}信息".format(infoOfAllPlayers["playerName"][ii]))
                allSeasonWebLog = filterDataForallSeasonWebLog(
                    infoOfAllPlayers["mainPageUrl"][ii])

                for singleSeasonWebLog in allSeasonWebLog:
                    res = requests_get(singleSeasonWebLog)
                    strOfSeasonPage = res.text
                    """
                    // 保存常规赛和季后赛每场比赛数据到本地
                    """
                    yearStr = singleSeasonWebLog[-5:-1]  #有可能为无效链接,最后四位非数字
                    if (yearStr.isdigit()):
                        filterDataForRegularSeason(strOfSeasonPage).to_csv(restoreFilePathOfPlayer + "\\" \
                                                                           + "regularSeason" + singleSeasonWebLog[-5:-1] + ".csv")

                        filterDataForPlayOffs(strOfSeasonPage).to_csv(restoreFilePathOfPlayer + "\\" \
                                                                      + "playOffs" + singleSeasonWebLog[-5:-1] + ".csv")
                    else:
                        continue
                """
                // 该球员信息保存成功
                """
                infoOfAllPlayers["downloaded"][ii] = "Yes"
            else:
                print("{0}信息已下载".format(infoOfAllPlayers["playerName"][ii]))
                continue

    except Exception as e:
        infoOfAllPlayers.to_csv(restoreMainPath + "\infoOfAllPlayers.csv")
        print(e)
        print("本地球员基本信息已更新")
    else:
        infoOfAllPlayers.to_csv(restoreMainPath + "\infoOfAllPlayers.csv")
        print("下载完成")