Ejemplo n.º 1
0
def load_data3(stations=None, lon_range=None, lat_range=None, starttime='20150916', endtime='20160116', latest=False,
               train_start=0, train_stop=None, valid_start=None, valid_stop=None, 
               segment=True, filter=False, normalize_target=False):
    if latest:
        endtime = (datetime.datetime.today() - datetime.timedelta(days=1)).strftime('%Y%m%d')
        starttime = (datetime.datetime.today() - datetime.timedelta(days=121)).strftime('%Y%m%d')
    print 'time range:', starttime, '-', endtime
    data = generate_data(pm_stations=stations, lon_range=lon_range, lat_range=lat_range, 
                         starttime=starttime, endtime=endtime, latest=latest).result
#    print starttime, endtime, data.shape
    if normalize_target:
        data[:,:,-1] = normalize_pm25(data[:,:,-1])
#    data[:,:,-2:] /= 100.
    data[:,:,-1] -= data[:,:,-2] # subtract pm25 mean from pm25 target
    
    if train_stop is None:
        train_stop = int(round(data.shape[1] * 3. / 4))
#        train_stop = 750
    if valid_start is None:
        valid_start = train_stop
    if valid_stop is None:
        valid_stop = -1
    train_data = data[:,train_start:train_stop,:]
    valid_data = data[:,valid_start:valid_stop,:]
    train_data2 = data[:,-(train_stop - train_start):,:]
    
    if segment:
        train_data = segment_data(train_data)
        valid_data = segment_data(valid_data)
        train_data2 = segment_data(train_data2)
    if filter:
        train_data = filter_data(train_data)
        valid_data = filter_data(valid_data)
        train_data2 = filter_data(train_data2)
    return train_data, valid_data, train_data2
Ejemplo n.º 2
0
def load_data4(
        stations=None,
        lon_range=None,
        lat_range=None,
        heating=False,
        segment=True,
        filter=False,
        normalize_target=False):

    data = generate_data(
        pm_stations=stations,
        lon_range=lon_range,
        lat_range=lat_range,
        starttime=startday_string,
        endtime=yesterday_string,
        )
    data_starttime = data.starttime
    data = data.result
    # print starttime, endtime, data.shape
    if normalize_target:
        data[:, :, -1] = normalize_pm25(data[:, :, -1])
    # data[:,:,-2:] /= 100.
    data[:, :, -1] -= data[:, :, -2]
    # subtract pm25 mean from pm25 target

    if heating:
        if today > heating_start - datetime.timedelta(days=46) and \
                today < heating_start:
                    train_endtime = heating_start.replace(
                            year=heating_start.year - 1) - datetime.timedelta(days=1)
                    train_starttime = train_endtime - datetime.timedelta(days=90)
                    valid_starttime = today - datetime.timedelta(days=31)
                    valid_endtime = valid_starttime + datetime.timedelta(days=30)
        # before 12 month but must in same heating state period
        elif today > heating_end - datetime.timedelta(days=46) and \
                today < heating_end:
                    train_endtime = heating_end.replace(
                            year=heating_end.year - 1) - datetime.timedelta(days=1)
                    train_starttime = train_endtime - datetime.timedelta(days=90)
                    valid_starttime = today - datetime.timedelta(days=31)
                    valid_endtime = valid_starttime + datetime.timedelta(days=30)
        elif today < heating_start + datetime.timedelta(days=46) and \
                today > heating_start:
                    train_starttime = heating_start.replace(
                        year=heating_start.year - 1) + datetime.timedelta(days=1)
                    train_endtime = train_starttime + datetime.timedelta(days=90)
                    if today > heating_start + datetime.timedelta(days=31):
                        valid_starttime = today - datetime.timedelta(days=31)
                        valid_endtime = valid_starttime + datetime.timedelta(days=30)
                    else:
                        valid_starttime = train_endtime
                        valid_endtime = valid_starttime + datetime.timedelta(days=30)
        elif today < heating_end + datetime.timedelta(days=46) and \
                today > heating_end:
                    train_starttime = heating_end.replace(
                        year=heating_end.year - 1) + datetime.timedelta(days=1)
                    train_endtime = train_starttime + datetime.timedelta(days=90)
                    if today > heating_end + datetime.timedelta(days=31):
                        valid_starttime = today - datetime.timedelta(days=31)
                        valid_endtime = valid_starttime + datetime.timedelta(days=30)
                    else:
                        valid_starttime = train_endtime
                        valid_endtime = valid_starttime + datetime.timedelta(days=30)
        else:
            train_starttime = today - datetime.timedelta(days=406)
            train_endtime = train_starttime + datetime.timedelta(days=90)
            valid_starttime = today - datetime.timedelta(days=31)
            valid_endtime = valid_starttime + datetime.timedelta(days=30)
    else:
        train_starttime = today - datetime.timedelta(days=406)
        train_endtime = train_starttime + datetime.timedelta(days=90)
        valid_starttime = today - datetime.timedelta(days=31)
        valid_endtime = valid_starttime + datetime.timedelta(days=30)

    if train_starttime < startdaytime:
        train_starttime = startdaytime
        train_endtime = train_starttime + datetime.timedelta(days=90)
        valid_starttime = train_endtime
        valid_endtime = valid_starttime + datetime.timedelta(days=30)

    timediff = train_starttime - data_starttime
    train_start = timediff.days * 8 + timediff.seconds / 7200
    timediff = train_endtime - data_starttime
    train_stop = timediff.days * 8 + timediff.seconds / 7200
    timediff = valid_starttime - data_starttime
    valid_start = timediff.days * 8 + timediff.seconds / 7200
    timediff = valid_endtime - data_starttime
    valid_stop = timediff.days * 8 + timediff.seconds / 7200

    train_data = data[:, train_start:train_stop, :]
    valid_data = data[:, valid_start:valid_stop, :]

    if segment:
        train_data = segment_data(train_data)
        valid_data = segment_data(valid_data)
    if filter:
        train_data = filter_data(train_data)
        valid_data = filter_data(valid_data)
    return train_data, valid_data
Ejemplo n.º 3
0
def load_data4(stations=None,
               lon_range=None,
               lat_range=None,
               heating=False,
               segment=True,
               filter=False,
               normalize_target=False):

    data = generate_data(
        pm_stations=stations,
        lon_range=lon_range,
        lat_range=lat_range,
        starttime=startday_string,
        endtime=yesterday_string,
    )
    data_starttime = data.starttime
    data = data.result
    # print starttime, endtime, data.shape
    if normalize_target:
        data[:, :, -1] = normalize_pm25(data[:, :, -1])
    # data[:,:,-2:] /= 100.
    data[:, :, -1] -= data[:, :, -2]
    # subtract pm25 mean from pm25 target

    if heating:
        if today > heating_start - datetime.timedelta(days=46) and \
                today < heating_start:
            train_endtime = heating_start.replace(
                year=heating_start.year - 1) - datetime.timedelta(days=1)
            train_starttime = train_endtime - datetime.timedelta(days=90)
            valid_starttime = today - datetime.timedelta(days=31)
            valid_endtime = valid_starttime + datetime.timedelta(days=30)
        # before 12 month but must in same heating state period
        elif today > heating_end - datetime.timedelta(days=46) and \
                today < heating_end:
            train_endtime = heating_end.replace(year=heating_end.year -
                                                1) - datetime.timedelta(days=1)
            train_starttime = train_endtime - datetime.timedelta(days=90)
            valid_starttime = today - datetime.timedelta(days=31)
            valid_endtime = valid_starttime + datetime.timedelta(days=30)
        elif today < heating_start + datetime.timedelta(days=46) and \
                today > heating_start:
            train_starttime = heating_start.replace(
                year=heating_start.year - 1) + datetime.timedelta(days=1)
            train_endtime = train_starttime + datetime.timedelta(days=90)
            if today > heating_start + datetime.timedelta(days=31):
                valid_starttime = today - datetime.timedelta(days=31)
                valid_endtime = valid_starttime + datetime.timedelta(days=30)
            else:
                valid_starttime = train_endtime
                valid_endtime = valid_starttime + datetime.timedelta(days=30)
        elif today < heating_end + datetime.timedelta(days=46) and \
                today > heating_end:
            train_starttime = heating_end.replace(
                year=heating_end.year - 1) + datetime.timedelta(days=1)
            train_endtime = train_starttime + datetime.timedelta(days=90)
            if today > heating_end + datetime.timedelta(days=31):
                valid_starttime = today - datetime.timedelta(days=31)
                valid_endtime = valid_starttime + datetime.timedelta(days=30)
            else:
                valid_starttime = train_endtime
                valid_endtime = valid_starttime + datetime.timedelta(days=30)
        else:
            train_starttime = today - datetime.timedelta(days=406)
            train_endtime = train_starttime + datetime.timedelta(days=90)
            valid_starttime = today - datetime.timedelta(days=31)
            valid_endtime = valid_starttime + datetime.timedelta(days=30)
    else:
        train_starttime = today - datetime.timedelta(days=406)
        train_endtime = train_starttime + datetime.timedelta(days=90)
        valid_starttime = today - datetime.timedelta(days=31)
        valid_endtime = valid_starttime + datetime.timedelta(days=30)

    if train_starttime < startdaytime:
        train_starttime = startdaytime
        train_endtime = train_starttime + datetime.timedelta(days=90)
        valid_starttime = train_endtime
        valid_endtime = valid_starttime + datetime.timedelta(days=30)

    timediff = train_starttime - data_starttime
    train_start = timediff.days * 8 + timediff.seconds / 7200
    timediff = train_endtime - data_starttime
    train_stop = timediff.days * 8 + timediff.seconds / 7200
    timediff = valid_starttime - data_starttime
    valid_start = timediff.days * 8 + timediff.seconds / 7200
    timediff = valid_endtime - data_starttime
    valid_stop = timediff.days * 8 + timediff.seconds / 7200

    train_data = data[:, train_start:train_stop, :]
    valid_data = data[:, valid_start:valid_stop, :]

    if segment:
        train_data = segment_data(train_data)
        valid_data = segment_data(valid_data)
    if filter:
        train_data = filter_data(train_data)
        valid_data = filter_data(valid_data)
    return train_data, valid_data