def get_files(directory): for name, filename in lms_files.iteritems(): if name not in [STREETS, NON_URBAN_INTERSECTION, ACCIDENTS]: continue files = filter(lambda path: filename.lower() in path.lower(), os.listdir(directory)) amount = len(files) if amount == 0: raise ValueError("file not found in directory: " + filename) if amount > 1: raise ValueError("there are too many matches: " + filename) csv = CsvReader(os.path.join(directory, files[0])) if name == STREETS: streets_map = {} for settlement in itertools.groupby(csv, lambda street: street.get(field_names.settlement, "OTHER")): key, val = tuple(settlement) streets_map[key] = [{field_names.street_sign: x[field_names.street_sign], field_names.street_name: x[field_names.street_name]} for x in val if field_names.street_name in x and field_names.street_sign in x] csv.close() yield name, streets_map elif name == NON_URBAN_INTERSECTION: roads = {(x[field_names.road1], x[field_names.road2]): x[field_names.junction_name] for x in csv if field_names.road1 in x and field_names.road2 in x} csv.close() yield ROADS, roads elif name == ACCIDENTS: yield name, csv
def estimate(problem, path_train, path_test, target, cuda, config): csvreader_train = CsvReader(target, path_train, 5) csvreader_test = CsvReader(target, path_test, 5) loader_train = LoadDataset() loader_train.fit(csvreader_train) loader_test = copy.deepcopy(loader_train) loader_train.transform(csvreader_train) train_data = DataLoader(dataset=loader_train, batch_size=config["batch_size"]) loader_test.transform(csvreader_test) test_data = DataLoader(dataset=loader_test, batch_size=config["batch_size"]) if cuda and torch.cuda.is_available(): device = torch.device("cuda") else: device = torch.device("cpu") if problem == "classification": estimator = LogisticRegressionTorch(input_dim=loader_train.dim, device=device) report = classification_report criterion = torch.nn.BCELoss() elif problem == "regression": estimator = LinearRegressionTorch(input_dim=loader_train.dim, device=device) report = regression_report criterion = torch.nn.MSELoss() if config["optimizer"] == "adam": optimizer = torch.optim.Adam(estimator.parameters()) else: optimizer = torch.optim.SGD(estimator.parameters(), lr=config["learning_rate"]) if config.get("epochs", False): epochs = config["epochs"] else: epochs = 1e4 trainer = BaseLinear(estimator, device, criterion, optimizer, epochs) trainer.fit(train_data) X_test, y_test = next(iter(test_data)) X_test, y_test = X_test.to(device), y_test.to(device) X_train, y_train = next(iter(train_data)) X_train, y_train = X_train.to(device), y_train.to(device) print("Train metrics\n-------") report(y_train[:, None], estimator(X_train)) print("Test metrics\n-------") report(y_test[:, None], estimator(X_test))
def get_dict_file(directory): for name, filename in lms_dict_files.iteritems(): files = filter(lambda path: filename.lower() in path.lower(), os.listdir(directory)) amount = len(files) if amount == 0: raise ValueError("file not found in directory: " + filename) if amount > 1: raise ValueError("there are too many matches: " + filename) csv = CsvReader(os.path.join(directory, files[0])) yield name, csv
def get_files(directory): for name, filename in lms_files.iteritems(): if name not in [STREETS, NON_URBAN_INTERSECTION, ACCIDENTS]: continue files = filter(lambda path: filename.lower() in path.lower(), os.listdir(directory)) amount = len(files) if amount == 0: raise ValueError( "file doesn't exist directory, cannot parse it; directory: {0};filename: {1}" .format(directory, filename)) if amount > 1: raise ValueError( "there are too many files in the directory, cannot parse!;directory: {0};filename: {1}" .format(directory, filename)) csv = CsvReader(os.path.join(directory, files[0])) if name == STREETS: streets_map = {} for settlement in itertools.groupby( csv, lambda street: street.get(field_names.settlement, "OTHER")): key, val = tuple(settlement) streets_map[key] = [{ field_names.street_sign: x[field_names.street_sign], field_names.street_name: x[field_names.street_name] } for x in val if field_names.street_name in x and field_names.street_sign in x] csv.close() yield name, streets_map elif name == NON_URBAN_INTERSECTION: roads = {(x[field_names.road1], x[field_names.road2]): x[field_names.junction_name] for x in csv if field_names.road1 in x and field_names.road2 in x} csv.close() yield ROADS, roads elif name == ACCIDENTS: yield name, csv
def estimate(problem, path_train, path_test, target, cuda, config): csvreader_train = CsvReader(target, path_train, 5) csvreader_test = CsvReader(target, path_test, 5) X_train, y_train = csvreader_train.get_X_y() X_test, y_test = csvreader_test.get_X_y() one_hot = OneHotEncoder() scaler = StandardScaler() one_hot.fit(X_train[:, csvreader_train.categorical_idx]) X_train_one_hot = one_hot.transform( X_train[:, csvreader_train.categorical_idx]) X_test_one_hot = one_hot.transform(X_test[:, csvreader_test.categorical_idx]) X_train_new = np.hstack( (X_train_one_hot, X_train[:, csvreader_train.continuous_idx])) X_test_new = np.hstack( (X_test_one_hot, X_test[:, csvreader_test.continuous_idx])) scaler.fit(X_train_new) X_train_scaled = scaler.transform(X_train_new) X_test_scaled = scaler.transform(X_test_new) if cuda and torch.cuda.is_available(): device = torch.device("cuda") else: device = torch.device("cpu") X_train_scaled = torch.from_numpy(X_train_scaled).to(device) X_test_scaled = torch.from_numpy(X_test_scaled).to(device) y_train = torch.from_numpy(y_train).to(device) y_test = torch.from_numpy(y_test).to(device) if problem == "classification": estimator = LogisticRegression(**config) report = classification_report elif problem == "regression": estimator = LinearRegression(**config) report = regression_report estimator.fit(X_train_scaled, y_train) print("Train metrics\n-------") report(y_train, estimator.predict(X_train_scaled)) print("Test metrics\n-------") report(y_test, estimator.predict(X_test_scaled))
def get_files(directory): for name, filename in lms_files.iteritems(): if name not in (STREETS, NON_URBAN_INTERSECTION, ACCIDENTS, INVOLVED, VEHICLES): continue files = filter(lambda path: filename.lower() in path.lower(), os.listdir(directory)) amount = len(files) if amount == 0: raise ValueError("Not found: '%s'" % filename) if amount > 1: raise ValueError("Ambiguous: '%s'" % filename) csv = CsvReader(os.path.join(directory, files[0])) if name == STREETS: streets_map = {} for settlement in itertools.groupby( csv, lambda street: street.get(field_names.settlement, "OTHER")): key, val = tuple(settlement) streets_map[key] = [{ field_names.street_sign: x[field_names.street_sign], field_names.street_name: x[field_names.street_name] } for x in val if field_names.street_name in x and field_names.street_sign in x] csv.close() yield name, streets_map elif name == NON_URBAN_INTERSECTION: roads = {(x[field_names.road1], x[field_names.road2], x["KM"]): x[field_names.junction_name] for x in csv if field_names.road1 in x and field_names.road2 in x} csv.close() yield ROADS, roads elif name in (ACCIDENTS, INVOLVED, VEHICLES): yield name, csv