Example #1
0
def main(argc, argv):
    """ entry point to the program """

    if argc < 3 or argc > 4:
        sys.exit(
            f"Usage python3 {argv[0]} <training_file> <output_dir> <random_features?>"
        )

    _, training_y, training_x = parse_data.read_data(argv[1],
                                                     skip_header=False,
                                                     delimiter=",")

    random_features = None
    if argc >= 4:
        random_features = int(argv[3])

    num_rows = len(training_y)
    while True:
        tree = DecisionTree()

        rows_to_evaluate = random.choices(range(num_rows), k=num_rows)
        tree.train(rows_to_evaluate,
                   training_x,
                   training_y,
                   random_features=random_features)

        filename = f"{argv[2]}/{uuid.uuid4()}.json"
        with open(filename, "w") as out_file:
            out_file.write(tree.to_json())
        print(filename)
Example #2
0
def main(argv):
    if len(argv) != 5:
        sys.exit(
            f"Usage python3 {argv[0]} <file> <percent_validation> <training_filename> <validation_filename>"
        )

    rna_ids, y_data, x_data = parse_data.read_data(argv[1])
    pct_validation = float(argv[2])

    y_counts = {}
    for index, y in enumerate(y_data):
        key = repr(y)
        if key not in y_counts:
            y_counts[key] = []
        y_counts[key].append(index)

    validation_indexes = []
    training_indexes = []
    for key in y_counts:
        random.shuffle(y_counts[key])

        split_point = int(len(y_counts[key]) * pct_validation)
        for index in y_counts[key][:split_point]:
            validation_indexes.append(index)
        for index in y_counts[key][split_point:]:
            training_indexes.append(index)

    create_csv(argv[3], training_indexes, rna_ids, y_data, x_data)
    create_csv(argv[4], validation_indexes, rna_ids, y_data, x_data)
Example #3
0
def main():
    commands = []
    data = pd.read_data()
    products = data["products_weights"]
    warehouses = data["warehouses"]
    max_dron_size = data["max_payload"]
    num_of_orders = len(data["orders"])
    drons = []
    for i in xrange(data["drones_num"]):
        drons.append({"id": i, "position": warehouses[warehouse_id]["position"], "cooldown": 0})
        drons[-1]["warehouse"] = get_dron_start_warehouse(drons[-1], warehouses)

    turn = 0
    while turn < data["turns"]:
        while has_free_drons(drons):
            dron = get_free_dron(drons)
            order = get_nearest_order(data["orders"], dron["warehouse"])
            while order == None:
                dron["warehouse"]["disabled"] = True
                if get_not_disabled_warehouse(dron, warehouses) == None:
                    break
                dron["warehouse"] = get_not_disabled_warehouse(dron, warehouses)
                order = get_nearest_order(data["orders"], dron["warehouse"])

            loads, delivers = pack_dron(dron, max_dron_size, order, products, dron["warehouse"])
            output_commands(loads)
            output_commands(delivers)

        commands = []
        for dron in drons:
            if dron["cooldown"] > 0:
                dron["cooldown"] -= 1
        turn += 1
Example #4
0
def main(argv):
    if len(argv) != 3:
        sys.exit(f"Usage python3 {argv[0]} <testing_file> <tree_json_dir>")

    rna_ids, _, test_x = parse_data.read_data(argv[1],
                                              skip_header=False,
                                              delimiter=",")

    json_files = glob.glob(f"{argv[2]}/*.json")
    forest = RandomForest()
    weights_filename = f"{argv[2]}/tree_weights.json"
    json_files.remove(weights_filename)
    weights = {}
    with open(weights_filename, "r") as weights_file:
        weights = json.loads(weights_file.read())
    for filename in json_files:
        with open(filename, "r") as tree_file:
            tree = DecisionTree.from_json(tree_file.read())
            forest.add_tree(tree)
        forest.weights.append(weights[filename])

    for i, x in enumerate(test_x):
        prediction, confidence = forest.predict_with_confidence(x)

        if prediction == 0.0:
            confidence = 1 - confidence

        print(f"{rna_ids[i]},{confidence}")
Example #5
0
def main(argv):
    """ entry point to the program """

    if len(argv) != 3:
        sys.exit(f"Usage python3 {argv[0]} <testing_file> <tree_json_dir>")

    _, test_y, test_x = parse_data.read_data(argv[1],
                                             skip_header=False,
                                             delimiter=",")

    json_files = glob.glob(f"{argv[2]}/*.json")
    forest = RandomForest()
    for filename in json_files:
        with open(filename, "r") as tree_file:
            tree = DecisionTree.from_json(tree_file.read())
            forest.add_tree(tree)

    total_right = 0
    for i, point in enumerate(test_x):
        expected = forest.predict(point)
        if test_y[i] == expected:
            total_right += 1

    accuracy = total_right / len(test_y)
    print(f"Accuracy: {accuracy}")
Example #6
0
def import_data():
    clear_db()
    with MongoClient() as client:
        db = client[DB_NAME]
        training_collection = db['training_data']
        test_collection = db['test_data']
        for data_file, labels_file in TRAINING_DATA:
            read_data(
                data_file_path=path.join(DATA_DIR, data_file),
                db_collection=training_collection,
                labels_file_path=path.join(DATA_DIR, labels_file),
            )
        for data_file in TEST_DATA:
            read_data(
                data_file_path=path.join(DATA_DIR, data_file),
                db_collection=test_collection
            )
Example #7
0
def main(argv):
	if len(argv) != 3:
		sys.exit(f"Usage python3 {argv[0]} <testing_file> <tree_json_dir>")

	_, test_y, test_x = parse_data.read_data(argv[1], skip_header=False, delimiter=",")

	tree_files = glob.glob(f"{argv[2]}/*.json")
	forest = []
	for filename in tree_files:
		with open(filename, "r") as tree_file:
			tree = DecisionTree.from_json(tree_file.read())
			forest.append(tree)

	diffs = []
	forest_predictions = []
	for tree in forest:
		tree_predictions = [tree.predict(x) for x in test_x]
		forest_predictions.append(tree_predictions)

		diff = dist(test_y, tree_predictions)
		diffs.append(diff)

	sorted_refs = list(range(len(forest)))
	sorted_refs.sort(key=lambda ref: diffs[ref])
	#do not need diffs anymore
	del diffs

	prediction_sum = numpy.array(forest_predictions[sorted_refs[0]])
	smallest_dist = dist(test_y, prediction_sum)
	print(smallest_dist)
	best_trees = [tree_files[sorted_refs[0]]]
	for ref in sorted_refs[1:]:
		#correctness = numpy.subtract(forest_predictions[ref], test_y)
		#total_wrong = numpy.count_nonzero(correctness)
		#accuracy = 1 - (total_wrong / len(test_y))
		#print(f"Accuracy: {accuracy}")

		new_combination = numpy.add(prediction_sum, forest_predictions[ref])
		normalized_combination = new_combination / (len(best_trees) + 1)

		new_dist = dist(test_y, normalized_combination)
		#might need to make this an <= due to math
		if new_dist < smallest_dist:
			prediction_sum = new_combination
			smallest_dist = new_dist
			print(smallest_dist)
			best_trees.append(tree_files[ref])

	#should have the combination of trees that give us the closest value to the ground truth (i.e. the test data)
	#choose a set of trees such that the dist(ground, predict_prob) is minimized to 1
	#only problem is with multiple labels, this could be biased against labels that have larger distances (i.e. 2 and 0 vs 1 and 0)
	#print(prediction_sum / len(best_trees))
	for tree_file in tree_files:
		if tree_file not in best_trees:
			print(f"removing {tree_file}")
			os.remove(tree_file)
Example #8
0
    def test_read_data(self):
        (h, d) = pd.read_data(test_file)

        assert h is not None
        logger.debug("h:  {}".format(h))
        assert "pool_id" in h

        assert d is not None
        logger.debug("d:  {}".format(d))
        assert len(d) > 0
Example #9
0
def interpret_and_run(args):
    if args.subcommand == 'data' or args.subcommand == 'npz':
        if args.amplitude_colour_limit == None:
            amplitude_colour_limit = None
        else:
            amplitude_colour_limit = eval(args.amplitude_colour_limit)
        if args.norm_squared_colour_limit == None:
            norm_squared_colour_limit = None
        else:
            norm_squared_colour_limit = eval(args.norm_squared_colour_limit)
    if args.subcommand == 'data':
        if args.data_file == None:
            raise exceptions.StupidityError('No data_file entered.')
        unoptimized_size = eval(args.size)
        center = eval(args.center)
        data = parse_data.read_data(args.data_file)
        args_dict = {
            'frequency': args.frequency,
            'distance': args.distance,
            'resolution': args.resolution,
            'size': unoptimized_size,
            'center': center,
            'source_locations': data[0],
            'source_amplitudes': data[1]
        }
        main_directory_path = initialize_main_directory(
            args.data_file, args.directory_save_location,
            args.save_directory_name, args.new_save_directory)
        spherical_interpreter = SphericalInterpreter("raw_data", args_dict)
        spherical_interpreter.to_analysis_file(
            os.path.join(main_directory_path, 'analysis_file.txt'))
        spherical_interpreter.to_npz_file(
            os.path.join(main_directory_path, 'npz_file.npz'))
        colour_plot_directory_path = initialize_colour_plot_directory(
            args.colour_plot_directory_name, main_directory_path,
            args.new_colour_plot_directory)
        to_colour_plot(spherical_interpreter, colour_plot_directory_path,
                       amplitude_colour_limit, norm_squared_colour_limit)
    elif args.subcommand == 'npz':
        if args.npz_file == None:
            raise exceptions.StupidityError('No npz_file entered')
        if os.path.exists(args.npz_file):
            main_directory_path = os.path.dirname(args.npz_file)
            args_dict = {'npz_dict': np.load(args.npz_file)}
        else:
            raise ValueError('The specified npz_file does not exist')
        spherical_interpreter = SphericalInterpreter('npz_file', args_dict)
        spherical_interpreter.to_analysis_file(
            os.path.join(main_directory_path, 'analysis_file.txt'))
        colour_plot_directory_path = initialize_colour_plot_directory(
            args.colour_plot_directory_name, main_directory_path,
            args.new_colour_plot_directory)
        to_colour_plot(spherical_interpreter, colour_plot_directory_path,
                       amplitude_colour_limit, norm_squared_colour_limit)
Example #10
0
def read_prism_cell_from_file(row_metadata_file, items):

    filepath = row_metadata_file

    (headers, data) = parse_data.read_data(filepath)

    data = [x for x in data if x[0][0] != "#"]

    header_map = parse_data.generate_header_map(headers, items, False)

    logger.debug("header_map:  {}".format(header_map))
    return parse_data.parse_data(header_map, data, PrismCell)
Example #11
0
def main():
    args = parse_args()
    if args.runOnScenario != 'all':  # all scenarios will be transformed to upper case, because names are case insensitive
        args.runOnScenario = list(set(args.runOnScenario))
        args.runOnScenario = [i.upper() for i in args.runOnScenario]

    if args.checkStatistic != -1:
        args.checkStatistic = list(set(args.checkStatistic))

    nodes = read_data(args.fileName, args.ignore)
    check_options(args.checkStatistic, nodes, args.runOnScenario,
                  args.reversePrecondPostcond, args.reverseTrigDesc, args.top,
                  args.noCPU, args.healScenarios)
Example #12
0
def _read_perturbagen_from_file(filepath, do_keep_all):

    (headers, data) = parse_data.read_data(filepath)

    #todo: think about other checks / better notification of wrong map type
    if "well_position" in headers:
        Exception(
            "Merino no longer supports CM map type, please convert map to CMap map type"
        )

    header_map = parse_data.generate_header_map(headers, None, do_keep_all)
    logger.debug("header_map:  {}".format(header_map))

    return parse_data.parse_data(header_map, data, Perturbagen)
Example #13
0
def main():
    data = pd.read_data()

    warehouses_pos = split_position(data['warehouses'])
    orders_pos = split_position(data['orders'])
    orders_sizes = []
    for order in data['orders']:
        items_total_weight = 0
        for i in xrange(len(order['items'])):
            items_total_weight += -data['products_weights'][i]*order['items'][i]
        orders_sizes.append(items_total_weight)

    max_orders_size = max(orders_sizes)
    orders_sizes = map(lambda x: int(x/float(max_orders_size) * 100), orders_sizes)
    plt.scatter(orders_pos[0], orders_pos[1], s=orders_sizes, c='b')
    plt.scatter(warehouses_pos[0], warehouses_pos[1], c='r')
    plt.axis([0, data['field_size'][0], 0, data['field_size'][1]])
    plt.show()
def train_NBC(filepath):
	new_df = read_data(filepath)
	new_train_test = new_df.values.tolist()
	x_train, x_test = train_test_split(new_train_test, test_size=0.1)
	
	cl = NaiveBayesClassifier(x_train)
	# print(cl.classify("Please create an assignment and forward it by EOD"))
	# print(cl.classify("Im not a dessert person but the warm butter cake should be illegal its so good."))
	
	print("Acheived a test accuracy of : %s " % cl.accuracy(x_test))
	
	# details of classifier train
	cl.show_informative_features()
	
	if not os.path.isdir("./models"):
		os.mkdir("./models")
	# saving the trained model
	file = open("./models/cl_NBC.obj", "wb")
	pickle.dump(cl, file)
	file.close()