Python read_data 예제들, filter.read_data Python 예제들

예제 #1

0

파일 보기

파일: Feature_6_magnitude_deficit.py 프로젝트: sajeed786/Earthquake-Prediction

def read_feature_6(path):
    df = read_data(path)

    df['Magnitude'] = df['Magnitude'].apply(pd.to_numeric)
    new_df = df.loc[df["Magnitude"] > threshold]
    x = new_df['Date']
    new_df["Date"] = pd.to_datetime(x, format='%Y/%m/%d')
    del new_df["Time"]
    groups = new_df.groupby(
        new_df.Date.dt.month
    )  # Problem Solved By link:- https://www.shanelynn.ie/summarising-aggregation-and-grouping-data-in-python-pandas/
    months_available = groups.groups.keys(
    )  # https://stackoverflow.com/questions/28844535/python-pandas-groupby-get-list-of-groups
    feat = read_feature_4(path)
    M_observed = []
    for month in months_available:
        M_observed.append(groups.get_group(month)["Magnitude"].max())
    M_expected = []
    for a, b in feat:
        if (b == 0.0 or a == 0.0):
            M_expected.append(0.0)
        else:
            M_expected.append(a / b)
    deficit = np.subtract(np.array(M_observed), np.array(M_expected))
    # print(deficit)
    return deficit

예제 #2

0

파일 보기

def read_feature_2(path):
    df = read_data(path)

    df['Magnitude'] = df['Magnitude'].apply(pd.to_numeric)
    new_df = df.loc[df["Magnitude"] > threshold]
    # x = new_df['Date']+' '+new_df['Time']
    # new_df["Datetime"] = pd.to_datetime(x ,format='%Y/%m/%d %H:%M:%S.%f')
    x = new_df['Date']
    new_df["Date"] = pd.to_datetime(x, format='%Y/%m/%d')
    del new_df["Time"]
    groups = new_df.groupby(new_df.Date.dt.month)

    #print(groups['Magnitude'].mean())
    return groups['Magnitude'].mean()

예제 #3

0

파일 보기

def read_feature_2(path):
    df = read_data(path)

    df['Magnitude'] = df['Magnitude'].apply(pd.to_numeric)
    new_df = df.loc[df["Magnitude"] > threshold]
    # x = new_df['Date']+' '+new_df['Time']
    # new_df["Datetime"] = pd.to_datetime(x ,format='%Y/%m/%d %H:%M:%S.%f')
    x = new_df['Date']
    new_df["Date"] = pd.to_datetime(x, format='%Y/%m/%d')
    del new_df["Time"]
    groups = new_df.groupby(
        new_df.Date.dt.month
    )  # Problem Solved By link:- https://www.shanelynn.ie/summarising-aggregation-and-grouping-data-in-python-pandas/

    #print(groups['Magnitude'].mean())
    return groups['Magnitude'].mean()

예제 #4

0

파일 보기

def read_feature_5(path):
    df = read_data(path)

    df['Magnitude'] = df['Magnitude'].apply(pd.to_numeric)
    new_df = df.loc[df["Magnitude"] > threshold]
    x = new_df['Date']
    new_df["Date"] = pd.to_datetime(x, format='%Y/%m/%d')
    del new_df["Time"]
    groups = new_df.groupby(
        new_df.Date.dt.month
    )  # Problem Solved By link:- https://www.shanelynn.ie/summarising-aggregation-and-grouping-data-in-python-pandas/
    months_available = groups.groups.keys(
    )  # https://stackoverflow.com/questions/28844535/python-pandas-groupby-get-list-of-groups
    lst = []
    for month in months_available:
        sorted_mag = groups.get_group(month)["Magnitude"]
        sorted_mag = np.sort(sorted_mag)
        n = len(sorted_mag)
        mapped_mag = dict(Counter(sorted_mag))
        M = []
        N = []
        for mag in mapped_mag:
            M.append(mag)
            N.append(n)
            n = n - mapped_mag[mag]
        N = list(map(math.log, N))
        n = len(sorted_mag)
        part1 = np.sum(np.array(M) * np.array(N))
        part2 = np.sum(np.array(M)) * np.sum(np.array(N))
        part3 = np.square(np.sum(np.array(M)))
        part4 = np.sum(np.square(np.array(M)))
        b = 0.0
        eta = 0.0
        if n == 1:
            b = 0.0
            a = np.sum(np.array(N)) / n
            eta = 0.0
        else:
            b = (n * part1 - part2) / (part3 - n * part4)
            M = b * np.array(M)
            a = np.sum(np.array(N) + np.array(M)) / n
            eta = np.sum(np.square(np.array(N) - (a - M))) / (n - 1)
        lst.append(eta)
    # print(lst)
    return np.array(lst)

예제 #5

0

파일 보기

파일: Feature_1_time.py 프로젝트: sajeed786/Earthquake-Prediction

def read_feature_1(path):
    df = read_data(path)

    df['Magnitude'] = df['Magnitude'].apply(pd.to_numeric)
    new_df = df.loc[df["Magnitude"] > threshold]
    x = new_df['Date'] + ' ' + new_df['Time']
    new_df["Datetime"] = pd.to_datetime(x, format='%Y/%m/%d %H:%M:%S.%f')
    # x = new_df['Date']
    # new_df["Date"] = pd.to_datetime(x, format='%Y/%m/%d')
    del new_df["Time"]
    groups = new_df.groupby(new_df.Datetime.dt.month).agg({'Datetime': ['first',
                                                                        'last']})  # Problem Solved By link:- https://www.shanelynn.ie/summarising-aggregation-and-grouping-data-in-python-pandas/

    days = (groups.Datetime['last'] - groups.Datetime['first'])
    groups['seconds'] = days / np.timedelta64(1, 's')
    groups['days'] = days
    #print(days.dt.days)
    return np.float64(days.dt.days)

예제 #6

0

파일 보기

def read_feature_1(path):
    df = read_data(path)

    df['Magnitude'] = df['Magnitude'].apply(pd.to_numeric)
    new_df = df.loc[df["Magnitude"] > threshold]
    x = new_df['Date'] + ' ' + new_df['Time']
    new_df["Datetime"] = pd.to_datetime(x, format='%Y/%m/%d %H:%M:%S.%f')
    # x = new_df['Date']
    # new_df["Date"] = pd.to_datetime(x, format='%Y/%m/%d')
    del new_df["Time"]
    groups = new_df.groupby(new_df.Datetime.dt.month).agg(
        {'Datetime': ['first', 'last']})

    days = (groups.Datetime['last'] - groups.Datetime['first'])
    groups['seconds'] = days / np.timedelta64(1, 's')
    groups['days'] = days
    #print(days.dt.days)
    return np.float64(days.dt.days)

예제 #7

0

파일 보기

파일: Feature_5_deviation.py 프로젝트: kavya76/Microsoft-Codefundoo-2018

def read_feature_5(path):
    df = read_data(path)

    df['Magnitude'] = df['Magnitude'].apply(pd.to_numeric)
    new_df = df.loc[df["Magnitude"] > threshold]
    x = new_df['Date']
    new_df["Date"] = pd.to_datetime(x, format='%Y/%m/%d')
    del new_df["Time"]
    groups = new_df.groupby(
        new_df.Date.dt.month) 
    months_available = groups.groups.keys()  
    lst = []
    for month in months_available:
        sorted_mag = groups.get_group(month)["Magnitude"]
        sorted_mag = np.sort(sorted_mag)
        n = len(sorted_mag)
        mapped_mag = dict(Counter(sorted_mag))
        M = []
        N = []
        for mag in mapped_mag:
            M.append(mag)
            N.append(n)
            n = n - mapped_mag[mag]
        N = list(map(math.log, N))
        n = len(sorted_mag)
        part1 = np.sum(np.array(M) * np.array(N))
        part2 = np.sum(np.array(M)) * np.sum(np.array(N))
        part3 = np.square(np.sum(np.array(M)))
        part4 = np.sum(np.square(np.array(M)))
        b = 0.0
        eta = 0.0
        if n == 1:
            b = 0.0
            a = np.sum(np.array(N)) / n
            eta = 0.0
        else:
            b = (n * part1 - part2) / (part3 - n * part4)
            M = b * np.array(M)
            a = np.sum(np.array(N) + np.array(M)) / n
            eta = np.sum(np.square(np.array(N) - (a - M))) / (n - 1)
        lst.append(eta)
    # print(lst)
    return np.array(lst)

예제 #8

0

파일 보기

파일: Feature_3_Energy.py 프로젝트: kavya76/Microsoft-Codefundoo-2018

def read_feature_3(path):
    df = read_data(path)

    df['Magnitude'] = df['Magnitude'].apply(pd.to_numeric)
    new_df = df.loc[df["Magnitude"] > threshold]
    x = new_df['Date']
    new_df["Date"] = pd.to_datetime(x, format='%Y/%m/%d')
    del new_df["Time"]
    groups = new_df.groupby(new_df.Date.dt.month)
    T = read_feature_1(path)
    months_available = groups.groups.keys()
    dE = []
    for month in months_available:
        dE.append(
            np.sum(
                groups.get_group(month)["Magnitude"].apply(
                    lambda x: np.sqrt(10**(11.8 + 1.5 * x)))))
    dE = np.array(dE)
    old = np.seterr('ignore')  # used to ignore divide by zero exception
    return np.array(dE / T)

예제 #9

0

파일 보기

def read_feature_6(path):
    df = read_data(path)

    df['Magnitude'] = df['Magnitude'].apply(pd.to_numeric)
    new_df = df.loc[df["Magnitude"] > threshold]
    x = new_df['Date']
    new_df["Date"] = pd.to_datetime(x, format='%Y/%m/%d')
    del new_df["Time"]
    groups = new_df.groupby(new_df.Date.dt.month)
    months_available = groups.groups.keys()
    feat = read_feature_4(path)
    M_observed = []
    for month in months_available:
        M_observed.append(groups.get_group(month)["Magnitude"].max())
    M_expected = []
    for a, b in feat:
        if (b == 0.0 or a == 0.0):
            M_expected.append(0.0)
        else:
            M_expected.append(a / b)
    deficit = np.subtract(np.array(M_observed), np.array(M_expected))
    # print(deficit)
    return deficit

예제 #10

0

파일 보기

파일: Feature_3_Energy.py 프로젝트: sajeed786/Earthquake-Prediction

def read_feature_3(path):
    df = read_data(path)

    df['Magnitude'] = df['Magnitude'].apply(pd.to_numeric)
    new_df = df.loc[df["Magnitude"] > threshold]
    x = new_df['Date']
    new_df["Date"] = pd.to_datetime(x, format='%Y/%m/%d')
    del new_df["Time"]
    groups = new_df.groupby(
        new_df.Date.dt.month
    )  # Problem Solved By link:- https://www.shanelynn.ie/summarising-aggregation-and-grouping-data-in-python-pandas/
    T = read_feature_1(path)
    months_available = groups.groups.keys(
    )  # https://stackoverflow.com/questions/28844535/python-pandas-groupby-get-list-of-groups
    dE = []
    # Solved using https://pandas.pydata.org/pandas-docs/stable/groupby.html        - (get_group() attribute)
    for month in months_available:
        dE.append(
            np.sum(
                groups.get_group(month)["Magnitude"].apply(
                    lambda x: np.sqrt(10**(11.8 + 1.5 * x)))))
    dE = np.array(dE)
    old = np.seterr('ignore')  # used to ignore divide by zero exception
    return np.array(dE / T)