def __init__(self): self.creator = DataCreator() self.dataset_a = self.creator.create_data_set( self.creator.data_function_a, 0, self.K_NUM_DATA) self.dataset_b = self.creator.create_data_set( self.creator.data_function_b, 1, self.K_NUM_DATA) self.plot_original_sets() self.net = VagueNet()
def create_square(n_forlabel, save_filename, noise_scale=30, labelcolour=(230, 30, 30), labelsize=(3, 3), bkgdsize=(90, 90, 3)): '''Description''' dd = DataCreator(dtype=np.uint8) dd.zeros_bkgd(bkgdsize) # dd.add_noise(scale=10.0, abs_noise=True) errors = 0 n_forlabel = n_forlabel for ii in range(n_forlabel): # errors += dd.add_shape(shape='square', colour=(255, 255, 255), size=(3, 3)) errors += dd.add_shape(shape='square', colour=labelcolour, size=labelsize) # error = dd.add_shape(shape='square', colour=(10, 0, 255), size=(4, 1)) dd.add_noise(scale=noise_scale, noise='uniform', abs_noise=True) # plt.imshow(dd.img) # plt.show() n_forlabel -= errors plt.imsave(arr=dd.img, fname=save_filename, format='png') return n_forlabel
def update_tickers(to_fetch, create_4hour, current_time, expected_length): if args.update_tickers: tickers, tickers_id = util.get_tickers() util.print_msg("Fetching intervals: ", to_fetch, " 4 hour:", create_4hour) fetch_data(to_fetch, current_time, tickers, tickers_id) if create_4hour: # create_4hr_settings['start_time'] = str(create_4hr_settings['start_time'].replace(tzinfo=None)) DataCreator([create_4hr_settings], util, tickers, tickers_id).start() util.update_next_earning_date()
def main(args): mode = args.mode overwrite_flag = args.overwrite model_name = 'trajgru' data_folder = 'data' hurricane_path = os.path.join(data_folder, 'ibtracs.NA.list.v04r00.csv') results_folder = 'results' config_obj = Config(model_name) data = DataCreator(hurricane_path, **config_obj.data_params) hurricane_list, weather_list = data.hurricane_list, data.weather_list if mode == 'train': print("Starting experiments") for exp_count, conf in enumerate(config_obj.conf_list): print('\nExperiment {}'.format(exp_count)) print('-*-' * 10) batch_generator = BatchGenerator(hurricane_list=hurricane_list, weather_list=weather_list, batch_size=conf["batch_size"], window_len=conf["window_len"], phase_shift=conf["phase_shift"], return_mode=conf['return_mode'], cut_start=conf['cut_start'], vector_mode=conf['vector_mode'], vector_freq=conf['vector_freq'], **config_obj.experiment_params) train(model_name, batch_generator, exp_count, overwrite_flag, **conf) elif mode == 'test': best_model, best_conf, trainer = select_best_model(results_folder) batch_generator = BatchGenerator(hurricane_list=hurricane_list, weather_list=weather_list, batch_size=best_conf["batch_size"], window_len=best_conf["window_len"], phase_shift=best_conf["phase_shift"], return_mode=best_conf['return_mode'], cut_start=best_conf['cut_start'], vector_mode=best_conf['vector_mode'], vector_freq=best_conf['vector_freq'], **config_obj.experiment_params) print("Testing with best model...") predict(best_model, batch_generator, trainer) else: raise ValueError('input mode: {} is not found'.format(mode))
from instance import Instance from data_parser import DataParser from data_creator import DataCreator import time import json data_creator = DataCreator('compare_ex2/compare0.txt', 2, 2) data_parser = DataParser('compare_ex2/compare0.txt') def main(): for i in range(3,11): result = 'compare_ex2/results/result_2ma_' + str(i) data_creator.filename = 'compare_ex2/data/compare_2ma_' + str(i) + '.txt' data_creator.jobs = i data_creator.run() data_parser.filename = data_creator.filename jobs, machines, tasks, neh_prio = data_parser.get_instance_parameters() instance = Instance(str(i), machines, jobs, tasks, neh_prio) instance.print_info() start = time.time() instance.generate_best_cmax() end = time.time() bruteforce_time = end - start instance.save_results(data_parser.filename, 'bruteforce', result + '_bruteforce.json') start = time.time() instance.johnsons_algorithm() end = time.time() johnson_time = end - start instance.save_results(data_parser.filename, 'johnson', result + '_johnson.json') start = time.time() instance.neh()
class Facade: K_NUM_DATA = 3000 def __init__(self): self.creator = DataCreator() self.dataset_a = self.creator.create_data_set( self.creator.data_function_a, 0, self.K_NUM_DATA) self.dataset_b = self.creator.create_data_set( self.creator.data_function_b, 1, self.K_NUM_DATA) self.plot_original_sets() self.net = VagueNet() def run(self): # Loss Functions p_criterion = nn.MSELoss() c_criterion = VagueLoss() e_loss = nn.CrossEntropyLoss() # Optimizers p_optimizer = optim.SGD(self.net.params[0], lr=0.002, momentum=0.9) c_optimizer = optim.SGD(self.net.params[1], lr=0.002, momentum=0.9) # Declare data-sets. train_set, test_set = self.creator.get_train_test( self.dataset_a, self.dataset_b) # Train the raw network to recognize the classes. self.train_first_net(epochs=100, train_set=train_set, optimizer=p_optimizer, criterion=e_loss) self.evaluate(test_set, test_confidence=False, mask=False, title="1st Net Evaluation") # Create a sub-sample for secondary training. sub_samples = self.create_sub_sample(train_set) # Train Second Net. self.train_second_net(epochs=100, train_set=sub_samples, optimizer=c_optimizer, criterion=e_loss) self.evaluate(sub_samples, test_confidence=True, mask=False, title="2nd Net Evaluation") # Evaluate results. self.evaluate(test_set, test_confidence=False, mask=True, title="Final Evaluation") def train_first_net(self, epochs, train_set, optimizer, criterion): for epoch in range(epochs): batch_size = 50 batch_steps = int(math.floor(len(train_set) // batch_size)) for i in range(batch_steps): train_batch = train_set[i * batch_size:(i + 1) * batch_size] features = train_batch[:, :2] labels = train_batch[:, 2] one_hot_labels = self.create_one_hot(labels, 2) features, labels = Variable( torch.from_numpy(features)), Variable( torch.from_numpy(one_hot_labels)) optimizer.zero_grad() p, c = self.net(features) # print("Output: {}".format(p)) # print("Target: {}".format(torch.max(labels, 1)[1])) loss = criterion(p, torch.max(labels, 1)[1]) loss.backward() optimizer.step() print("Loss: {}".format(loss.data[0])) def create_one_hot(self, labels, num_classes): num_samples = len(labels) labels_array = np.zeros((num_samples, num_classes), np.long) for i in range(num_samples): label = int(labels[i]) labels_array[i, label] = 1 return labels_array def train_second_net(self, epochs, train_set, optimizer, criterion): for epoch in range(epochs): batch_size = 10 batch_steps = int(math.floor(len(train_set) // batch_size)) for i in range(batch_steps): train_batch = train_set[i * batch_size:(i + 1) * batch_size] features = train_batch[:, :2] labels = train_batch[:, 2] one_hot_labels = self.create_one_hot(labels, 2) label_tensor = torch.from_numpy(one_hot_labels) features, labels = Variable( torch.from_numpy(features)), Variable(label_tensor) optimizer.zero_grad() p, c = self.net(features) # p.data = p.data.view(batch_size) # cmp = [] # q = Variable(torch.Tensor(cmp)) # m = Variable(torch.Tensor(mask_list)) # a = Variable(torch.Tensor(a_mask_list)) loss = criterion(c, torch.max(labels, 1)[1]) loss.backward() optimizer.step() print("Loss 2: {}".format(loss.data[0])) print("Net 2 Training Complete") def create_sub_sample(self, train_set): positive_samples = [] negative_samples = [] # Use the net to predict the training data. for i in range(len(train_set)): train_data = train_set[i] features = train_data[:2] label = train_data[2] x = Variable(torch.from_numpy(features)) p, c = self.net(x) result = torch.max(p.data, 0)[1][0] is_correct = result == int(label) if is_correct: positive_samples.append(features) else: negative_samples.append(features) # Normalize the size of the two sets. size = min(len(positive_samples), len(negative_samples)) print("Pos: {} | Neg: {} | Resample: {}".format( len(positive_samples), len(negative_samples), size)) final_samples = np.zeros((size * 2, 3), np.float32) for i in range(size): j = i * 2 final_samples[j][0] = positive_samples[i][0] final_samples[j][1] = positive_samples[i][1] final_samples[j][2] = 1 final_samples[j + 1][0] = negative_samples[i][0] final_samples[j + 1][1] = negative_samples[i][1] final_samples[j + 1][2] = 0 # Plot the output samples. pos_x = [sample[0] for sample in positive_samples[:size]] pos_y = [sample[1] for sample in positive_samples[:size]] neg_x = [sample[0] for sample in negative_samples[:size]] neg_y = [sample[1] for sample in negative_samples[:size]] plt.plot(pos_x, pos_y, "g.") plt.plot(neg_x, neg_y, "rx") plt.title("Sub Sample") plt.show() return final_samples def evaluate(self, test_set, test_confidence=False, mask=True, title="Evaluation"): correct_x = [] correct_y = [] wrong_x = [] wrong_y = [] skipped_x = [] skipped_y = [] correct_count = 1 running_count = 1 for i in range(len(test_set)): test_data = test_set[i] test_x = test_data[:2] test_label = test_data[2] x = Variable(torch.from_numpy(test_x)) p_out, c_out = self.net(x) p = torch.max(p_out.data, 0)[1][0] c = torch.max(c_out.data, 0)[1][0] # round(c_out.data[0]) # c_value = c[0] if mask and c < 0.5: skipped_x.append(test_x[0]) skipped_y.append(test_x[1]) continue if test_confidence: f = c else: f = p running_count += 1 is_correct = test_label == int(f) if is_correct: correct_count += 1 correct_x.append(test_x[0]) correct_y.append(test_x[1]) else: wrong_x.append(test_x[0]) wrong_y.append(test_x[1]) pass full_count = len(test_set) correct_final = "{:.2f}%".format(100 * correct_count / running_count) discard_final = "{:.2f}%".format(100 * (full_count - running_count) / full_count) raw_correct_final = "{:.2f}%".format(100 * correct_count / full_count) print("Correct (Filtered): {}".format(correct_final)) print("Correct (Total): {}".format(raw_correct_final)) print("Discarded: {}".format(discard_final)) plt.plot(correct_x, correct_y, "gx") plt.plot(wrong_x, wrong_y, "rx") plt.plot(skipped_x, skipped_y, "bx", color=(0.8, 0.8, 0.8)) plt.title(title) plt.show() def plot_original_sets(self): self.plot_dataset(self.dataset_a, "bx") self.plot_dataset(self.dataset_b, "rx") plt.show() def plot_dataset(self, dataset, style="bx"): px, py = self.creator.get_dataset_x_y(dataset) plt.plot(px, py, style)
import matplotlib.pyplot as plt import numpy as np from data_creator import DataCreator data_creator = DataCreator(hurricane_path='../data/ibtracs.NA.list.v04r00.csv', season_range=(1994, 2020), weather_spatial_range=[[0, 65], [-110, 10]], weather_im_size=(25, 25), weather_freq=3, weather_raw_dir='data/weather_raw', rebuild=False) hurricanes = [np.load(path) for path in data_creator.hurricane_list] hurricane_coordinates = [hurr[:, :2] for hurr in hurricanes] hurricane_directions = [hurr[:, -1] for hurr in hurricanes] for idx, hurricane in enumerate(hurricanes): if idx not in np.arange(20, 40): continue coordinates = hurricane[:, :2] storm_speeds = hurricane[:, -2] / np.max(hurricane[:, -2]) storm_dirs = hurricane[:, -1] plt.figure(figsize=(7, 7)) plt.scatter(coordinates[:, 1], coordinates[:, 0]) for i in range(len(coordinates)):
'val_ratio': 0.1, 'window_len': 10, 'hur_input_dim': list(range(7)), "side_info_dim": list(range(2, 7)), 'hur_output_dim': [0, 1], 'weather_input_dim': list(range(5)), 'return_mode': 'weather', 'phase_shift': 10, 'cut_start': False } data_params = { "season_range": (2015, 2020), "weather_im_size": (25, 25), "weather_freq": 3, "weather_spatial_range": [[0, 65], [-110, 10]], "weather_raw_dir": 'data/weather_raw', "rebuild": False } data_creator = DataCreator( hurricane_path='data/ibtracs.NA.list.v04r00.csv', **data_params) batch_generator = BatchGenerator( hurricane_list=data_creator.hurricane_list, weather_list=data_creator.weather_list, **params) print(len(batch_generator.dataset_dict['train'])) for x, y in batch_generator.generate('train'): print(x.shape, y.shape)
def main(): dc = DataCreator(verbose=True, db_filename="/tmp/faker.db") print("First name: {}".format(dc.firstname(boy=True))) print("Last name: {}".format(dc.lastname())) print("Age: {}".format(dc.age())) print("Teenage: {}".format(dc.age(category="teen"))) print("State: {}".format(dc.state())) print("City: {}".format(dc.city())) print("Money: {}".format(dc.money(100))) print() print("Company: {}".format(dc.company_name())) print("get_line:") print( dc.get_line( pattern= '"%firstname%","%lastname%","%occupation%","%age,category=teen%"')) print(dc.get_line(pattern="foo bar baz"))
def run(): global_start_date = experiment_params['global_start_date'] global_end_date = experiment_params['global_end_date'] stride = experiment_params['data_step'] data_length = experiment_params['data_length'] val_ratio = experiment_params['val_ratio'] test_ratio = experiment_params['test_ratio'] normalize_flag = experiment_params['normalize_flag'] model_name = experiment_params['model'] device = experiment_params['device'] model_dispatcher = { 'moving_avg': MovingAvg, 'convlstm': ConvLSTM, 'u_net': UNet, 'weather_model': WeatherModel, } dump_file_dir = os.path.join('data', 'data_dump') months = pd.date_range(start=global_start_date, end=global_end_date, freq=str(1) + 'M') for i in range(0, len(months) - (data_length - stride), stride): start_date_str = '-'.join([str(months[i].year), str(months[i].month), '01']) start_date = pd.to_datetime(start_date_str) end_date = start_date + pd.DateOffset(months=data_length) - pd.DateOffset(hours=1) date_range_str = start_date_str + "_" + end_date.strftime("%Y-%m-%d") data_creator = DataCreator(start_date=start_date, end_date=end_date, **data_params) weather_data = data_creator.create_data() selected_model_params = model_params[model_name]["core"] batch_gen_params = model_params[model_name]["batch_gen"] trainer_params = model_params[model_name]["trainer"] config = { "data_params": data_params, "experiment_params": experiment_params, f"{model_name}_params": model_params[model_name] } batch_generator = BatchGenerator(weather_data=weather_data, val_ratio=val_ratio, test_ratio=test_ratio, params=batch_gen_params, normalize_flag=normalize_flag) model = model_dispatcher[model_name](device=device, **selected_model_params) print(f"Training {model_name} for the {date_range_str}") train(model_name=model_name, model=model, batch_generator=batch_generator, trainer_params=trainer_params, date_r=date_range_str, config=config, device=device) print(f"Predicting {model_name} for the {date_range_str}") try: predict(model_name=model_name, batch_generator=batch_generator, device=device) except Exception as e: print(f"Couldnt perform prediction, the exception is {e}") # remove dump directory shutil.rmtree(dump_file_dir)