Exemple #1
0
def get_files(directory):
    for name, filename in lms_files.iteritems():

        if name not in [STREETS, NON_URBAN_INTERSECTION, ACCIDENTS]:
            continue

        files = filter(lambda path: filename.lower() in path.lower(), os.listdir(directory))
        amount = len(files)
        if amount == 0:
            raise ValueError("file not found in directory: " + filename)
        if amount > 1:
            raise ValueError("there are too many matches: " + filename)

        csv = CsvReader(os.path.join(directory, files[0]))

        if name == STREETS:
            streets_map = {}
            for settlement in itertools.groupby(csv, lambda street: street.get(field_names.settlement, "OTHER")):
                key, val = tuple(settlement)

                streets_map[key] = [{field_names.street_sign: x[field_names.street_sign],
                                     field_names.street_name: x[field_names.street_name]} for x in val if
                                    field_names.street_name in x and field_names.street_sign in x]
            csv.close()
            yield name, streets_map
        elif name == NON_URBAN_INTERSECTION:
            roads = {(x[field_names.road1], x[field_names.road2]): x[field_names.junction_name] for x in csv if
                     field_names.road1 in x and field_names.road2 in x}
            csv.close()
            yield ROADS, roads
        elif name == ACCIDENTS:
            yield name, csv
Exemple #2
0
def estimate(problem, path_train, path_test, target, cuda, config):
    csvreader_train = CsvReader(target, path_train, 5)
    csvreader_test = CsvReader(target, path_test, 5)
    loader_train = LoadDataset()
    loader_train.fit(csvreader_train)
    loader_test = copy.deepcopy(loader_train)

    loader_train.transform(csvreader_train)
    train_data = DataLoader(dataset=loader_train,
                            batch_size=config["batch_size"])
    loader_test.transform(csvreader_test)
    test_data = DataLoader(dataset=loader_test,
                           batch_size=config["batch_size"])

    if cuda and torch.cuda.is_available():
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")

    if problem == "classification":
        estimator = LogisticRegressionTorch(input_dim=loader_train.dim,
                                            device=device)
        report = classification_report
        criterion = torch.nn.BCELoss()
    elif problem == "regression":
        estimator = LinearRegressionTorch(input_dim=loader_train.dim,
                                          device=device)
        report = regression_report
        criterion = torch.nn.MSELoss()

    if config["optimizer"] == "adam":
        optimizer = torch.optim.Adam(estimator.parameters())
    else:
        optimizer = torch.optim.SGD(estimator.parameters(),
                                    lr=config["learning_rate"])

    if config.get("epochs", False):
        epochs = config["epochs"]
    else:
        epochs = 1e4

    trainer = BaseLinear(estimator, device, criterion, optimizer, epochs)
    trainer.fit(train_data)

    X_test, y_test = next(iter(test_data))
    X_test, y_test = X_test.to(device), y_test.to(device)

    X_train, y_train = next(iter(train_data))
    X_train, y_train = X_train.to(device), y_train.to(device)

    print("Train metrics\n-------")
    report(y_train[:, None], estimator(X_train))
    print("Test metrics\n-------")
    report(y_test[:, None], estimator(X_test))
Exemple #3
0
def get_dict_file(directory):
    for name, filename in lms_dict_files.iteritems():
        files = filter(lambda path: filename.lower() in path.lower(), os.listdir(directory))
        amount = len(files)
        if amount == 0:
            raise ValueError("file not found in directory: " + filename)
        if amount > 1:
            raise ValueError("there are too many matches: " + filename)
        csv = CsvReader(os.path.join(directory, files[0]))
        yield name, csv
Exemple #4
0
def get_files(directory):
    for name, filename in lms_files.iteritems():

        if name not in [STREETS, NON_URBAN_INTERSECTION, ACCIDENTS]:
            continue

        files = filter(lambda path: filename.lower() in path.lower(),
                       os.listdir(directory))
        amount = len(files)
        if amount == 0:
            raise ValueError(
                "file doesn't exist directory, cannot parse it; directory: {0};filename: {1}"
                .format(directory, filename))
        if amount > 1:
            raise ValueError(
                "there are too many files in the directory, cannot parse!;directory: {0};filename: {1}"
                .format(directory, filename))

        csv = CsvReader(os.path.join(directory, files[0]))

        if name == STREETS:
            streets_map = {}
            for settlement in itertools.groupby(
                    csv, lambda street: street.get(field_names.settlement,
                                                   "OTHER")):
                key, val = tuple(settlement)

                streets_map[key] = [{
                    field_names.street_sign:
                    x[field_names.street_sign],
                    field_names.street_name:
                    x[field_names.street_name]
                } for x in val if field_names.street_name in x
                                    and field_names.street_sign in x]
            csv.close()
            yield name, streets_map
        elif name == NON_URBAN_INTERSECTION:
            roads = {(x[field_names.road1], x[field_names.road2]):
                     x[field_names.junction_name]
                     for x in csv
                     if field_names.road1 in x and field_names.road2 in x}
            csv.close()
            yield ROADS, roads
        elif name == ACCIDENTS:
            yield name, csv
Exemple #5
0
def estimate(problem, path_train, path_test, target, cuda, config):
    csvreader_train = CsvReader(target, path_train, 5)
    csvreader_test = CsvReader(target, path_test, 5)
    X_train, y_train = csvreader_train.get_X_y()
    X_test, y_test = csvreader_test.get_X_y()

    one_hot = OneHotEncoder()
    scaler = StandardScaler()

    one_hot.fit(X_train[:, csvreader_train.categorical_idx])
    X_train_one_hot = one_hot.transform(
        X_train[:, csvreader_train.categorical_idx])
    X_test_one_hot = one_hot.transform(X_test[:,
                                              csvreader_test.categorical_idx])

    X_train_new = np.hstack(
        (X_train_one_hot, X_train[:, csvreader_train.continuous_idx]))
    X_test_new = np.hstack(
        (X_test_one_hot, X_test[:, csvreader_test.continuous_idx]))

    scaler.fit(X_train_new)
    X_train_scaled = scaler.transform(X_train_new)
    X_test_scaled = scaler.transform(X_test_new)

    if cuda and torch.cuda.is_available():
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")
    X_train_scaled = torch.from_numpy(X_train_scaled).to(device)
    X_test_scaled = torch.from_numpy(X_test_scaled).to(device)
    y_train = torch.from_numpy(y_train).to(device)
    y_test = torch.from_numpy(y_test).to(device)

    if problem == "classification":
        estimator = LogisticRegression(**config)
        report = classification_report
    elif problem == "regression":
        estimator = LinearRegression(**config)
        report = regression_report

    estimator.fit(X_train_scaled, y_train)
    print("Train metrics\n-------")
    report(y_train, estimator.predict(X_train_scaled))
    print("Test metrics\n-------")
    report(y_test, estimator.predict(X_test_scaled))
Exemple #6
0
def get_files(directory):
    for name, filename in lms_files.iteritems():

        if name not in (STREETS, NON_URBAN_INTERSECTION, ACCIDENTS, INVOLVED,
                        VEHICLES):
            continue

        files = filter(lambda path: filename.lower() in path.lower(),
                       os.listdir(directory))
        amount = len(files)
        if amount == 0:
            raise ValueError("Not found: '%s'" % filename)
        if amount > 1:
            raise ValueError("Ambiguous: '%s'" % filename)

        csv = CsvReader(os.path.join(directory, files[0]))

        if name == STREETS:
            streets_map = {}
            for settlement in itertools.groupby(
                    csv, lambda street: street.get(field_names.settlement,
                                                   "OTHER")):
                key, val = tuple(settlement)

                streets_map[key] = [{
                    field_names.street_sign:
                    x[field_names.street_sign],
                    field_names.street_name:
                    x[field_names.street_name]
                } for x in val if field_names.street_name in x
                                    and field_names.street_sign in x]
            csv.close()
            yield name, streets_map
        elif name == NON_URBAN_INTERSECTION:
            roads = {(x[field_names.road1], x[field_names.road2], x["KM"]):
                     x[field_names.junction_name]
                     for x in csv
                     if field_names.road1 in x and field_names.road2 in x}
            csv.close()
            yield ROADS, roads
        elif name in (ACCIDENTS, INVOLVED, VEHICLES):
            yield name, csv