예제 #1
0
 def __init__(self):
     self.data = self.read_file("train_pred.csv.gz")
     self.test_data = self.read_file("test_pred.csv.gz")
     X = np.vstack(self.day_time(self.data))
     y = np.array(self.duration(self.data))
     test_matrix = np.vstack(self.day_time(self.test_data))
     lr = linear.LinearLearner(lambda_=1.)
     napovednik = lr(X, y)
     result = [napovednik(line) for line in test_matrix]
     fo = open("predtekmovanje_results.txt", "wt")
     for l, e in zip(result, self.test_data):
         fo.write(lpputil.tsadd(e[6], l) + "\n")
예제 #2
0
파일: main.py 프로젝트: kg7155/DataMining
def train_routes(routes_to_train, train):
    models_routes = {}
    for route in routes_to_train:
        X_train = my_train[my_train["Route"] == route]
        Y_train = X_train["TravelTime"].dt.total_seconds()
        X_train = X_train.drop([
            "TravelTime", "DepartureTime", "ArrivalTime", "Route",
            "RouteDirection"
        ],
                               axis=1)
        X_train = np.array(X_train)
        Y_train = np.array(Y_train)
        lr = linear.LinearLearner(lambda_=1.)
        models_routes[route] = lr(X_train, Y_train)
    return models_routes
예제 #3
0
파일: main.py 프로젝트: kg7155/DataMining
def train_route_dirs(route_dirs_to_train, train):
    models_dirs = {}
    routes_to_train = []
    for route_dir in route_dirs_to_train:
        X_train = train[train["RouteDirection"] == route_dir]

        # train later with route number
        if (X_train.shape[0] == 0):
            rows = train.loc[train["RouteDirection"] == route_dir, "Route"]
            if (len(rows) > 0):
                routes_to_train.append(rows.iloc[0])
            continue

        Y_train = X_train["TravelTime"].dt.total_seconds()
        X_train = X_train.drop([
            "TravelTime", "DepartureTime", "ArrivalTime", "Route",
            "RouteDirection"
        ],
                               axis=1)

        lr = linear.LinearLearner(lambda_=1.)
        models_dirs[route_dir] = lr(np.array(X_train), np.array(Y_train))

    return [routes_to_train, models_dirs]
예제 #4
0
import linear
import numpy

if __name__ == "__main__":

    X = numpy.array([[1, 3], [2, 2], [3, 3]])

    y = numpy.array([10, 11, 12])

    lr = linear.LinearLearner(lambda_=1.)
    napovednik = lr(X, y)

    print "Koeficienti", napovednik.th  #prvi je konstanten faktor

    nov_primer = numpy.array([2, 11])
    print "Napoved", napovednik(nov_primer)
예제 #5
0
        X = linear.append_ones(np.array(x))
        return route, dejanski_cas, originalen_datum, X


f = gzip.open("train.csv.gz", "rt", encoding="latin1")
reader = csv.reader(f, delimiter="\t")
next(reader)

linije = {}
for primer in reader:
    if primer[3] in linije:
        linije[primer[3]].append(primer)
    else:
        linije[primer[3]] = [primer]

linearna_regresija = linear.LinearLearner()
for linija in linije.keys():
    x, y = zgradi_matrike(linije[linija], True)
    linije[linija] = linearna_regresija(x, y)

f = gzip.open("test.csv.gz", "rt",
              encoding="latin1")  #za izpis MAE spremeni v "train.csv.gz"
vrstica = csv.reader(f, delimiter="\t")
next(vrstica)
ime, dejanski_cas, primeri, testni_X = zgradi_matrike(vrstica, False)

datoteka = open("napovedi_tekmovanje.txt", "wt", encoding="latin1")

mae_mesec = 11
mae = 0
stevilo_primerov = 0
예제 #6
0
def read_file(file_path):
    #funkcija za branje podtkov iz datoteke
    f = gzip.open(file_path, "rt", encoding="UTF-8")
    reader = csv.reader(f, delimiter="\t")
    next(reader)                                    #preskocimo glavo tabele
    data = [d for d in reader]
    return data

if __name__ == "__main__":

    #preberemo datoteke ankaterih se ucimo in tiste na katerih testiramo
    data = read_file("train.csv.gz")
    test_data = read_file("test.csv.gz")

    #zgradimo model
    l = SeparateBySetLearner(linear.LinearLearner(lambda_=1.))
    c = l(data)

    fo = open("results.txt", "wt")
    for l in test_data:
        fo.write(lpputil.tsadd(l[-3], c(l)) + "\n")
    fo.close()

    #preverjamo na internih podatkih
    data, test_data, real = loci_po_mesecu(data)

    l = SeparateBySetLearner(linear.LinearLearner(lambda_=1.))
    c = l(data)
    results = []
    for l in test_data:
        results.append(lpputil.tsadd(l[-3], c(l)))
예제 #7
0
def learn(data, DEC):
    map_reg, inv_map_reg = mapData([getReg(l) for l in data])
    map_dri, inv_map_dri = mapData([getDri(l) for l in data])
    map_route, inv_map_route = mapData([getRoute(l) for l in data])

    N = len(map_route)
    line_tim = []
    line_route_tim = [[] for _ in range(N)]
    lm_data = [(0, 0) for _ in range(N)]
    for l in data:
        mp_route = map_route[getRoute(l)]
        dtx = timeDifference(getArr(l), getDep(l), FMT)

        line_tim.append(dtx.seconds)
        line_route_tim[mp_route].append(dtx.seconds)

        x, y = lm_data[mp_route]
        lm_data[mp_route] = x + dtx.seconds, y + 1

    avg_line = sum(line_tim) / len(line_tim)
    avg_route = [
        sum(line_route_tim[i]) / len(line_route_tim[i]) for i in range(N)
    ]
    avg_data = [x / max(1, y) for x, y in lm_data]

    dr = [[] for _ in range(N)]
    dd = [[] for _ in range(N)]
    dt = [[] for _ in range(N)]
    dy = [[] for _ in range(N)]
    for l in data:
        mp_route = map_route[getRoute(l)]
        dr[mp_route].append(map_reg[getReg(l)])
        dd[mp_route].append(map_dri[getDri(l)])
        dt[mp_route].append(mapTime(getDep(l), FMT, DEC))
        dy[mp_route].append(timeDifference(getArr(l), getDep(l), FMT).seconds)

    # [MAPPED_ROUTE][X] ... X == 0 ? AVG : X == 1 ? RANK FOR THE I-TH EXAMPLE
    lm_reg = []
    lm_dri = []
    lm_tim = []

    mpx_reg = []
    mpx_dri = []
    mpx_tim = []

    for i in range(N):
        x, y = rankData(dr[i], dy[i])
        lm_reg.append(x)
        mpx_reg.append(y)

        x, y = rankData(dd[i], dy[i])
        lm_dri.append(x)
        mpx_dri.append(y)

        x, y = rankData(dt[i], dy[i])
        lm_tim.append(x)
        mpx_tim.append(y)

    sr = [len(set(dr[i])) for i in range(N)]
    sd = [len(set(dd[i])) for i in range(N)]
    st = [len(set(dt[i])) for i in range(N)]

    for i in range(N):
        for j in range(len(lm_reg[i][1])):
            lm_reg[i][1][j] /= sr[i]
            lm_dri[i][1][j] /= sd[i]
            lm_tim[i][1][j] /= st[i]

    models = []
    for i in range(N):
        #print("i = %2d %s" % (i, inv_map_route[i]))

        Y = numpy.array(dy[i])
        X = numpy.array([
            [
                #lm_reg[i][0][j],
                lm_reg[i][1][j],
                #lm_dri[i][0][j],
                lm_dri[i][1][j],
                #lm_tim[i][0][j],
                lm_tim[i][1][j],
                lm_tim[i][1][j]**2,
                lm_reg[i][1][j] + lm_dri[i][1][j] + lm_tim[i][1][j]
            ] for j in range(len(dy[i]))
        ])

        lr = linear.LinearLearner(lambda_=17)
        models.append(lr(X, Y))

    return models, avg_line, map_route, map_reg, map_dri, mpx_reg, mpx_dri, mpx_tim, sr, sd, st