from CF import * from DataLoader import * import numpy as np from sklearn.model_selection import train_test_split if __name__ == '__main__': filePath = "./data/ml-20m/ratings.csv" print("Loading data:") dataLoader = DataLoader(filePath) Y_data = dataLoader.readFile() print("Done!\n--------------------") y = np.zeros(Y_data.shape[0]) rate_train, rate_test, y_train, y_test = train_test_split(Y_data, y, test_size=0.2) print("User-user Collaborative Filtering: ") cf = CF(rate_train, 5) cf.fit() # cf.print_recommendation() print("-------------------------") print("\nTESTING: \nComputing RMSE: ") cf.RMSE(rate_test)
@author: ivis """ from ELM import * from DataLoader import * import numpy as np from sklearn import datasets from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score from torchvision import transforms import time from Time import * ##data loader transformations = transforms.Compose([transforms.ToTensor()]) train_np = DataLoader('data/', 'train', transformations) test_np = DataLoader('data/', 'test', transformations) ##ELM parameters num_images_train = 28099 num_images_test = 7025 input_size = train_np[0][0].shape[0] * train_np[0][0].shape[1] * train_np[0][ 0].shape[2] output_size = 5 hidden_size = 1000 ##process data """ train_data = np.zeros((num_images_train, input_size)) train_label = np.zeros(num_images_train) for i in range(num_images_train):
import time start_time = time.time() def list_gen(inp_data): input_data = [] for i in range(0, 80): input_data.append([0]) for key in inp_data: input_data[key] = [1] return input_data n = kn.KinoNetwork([80, 100, 200, 400, 200, 100, 80]) dataloader = dl.DataLoader() data = dataloader.returnData('2015-01-02 10:00', '2015-01-05 21:30') counter = [0, 0, 0, 0, 0, 0] for i in range(0, len(data) - 1): input_data = list_gen(data[i]) output = n.feedforward(input_data).tolist() output2 = heapq.nlargest(20, output) output3 = [] for key in output2: output3.append(output.index(key) + 1) chance = 0
def run(): df = pd.read_csv( CONFIG.input_path).sample(frac=1).reset_index(drop=True).fillna("") print('------- [INFO] TOKENIZING -------\n') if not os.path.exists('input/word_to_idx.pickle') or not os.path.exists( 'input/idx_to_word.pickle'): pickle.dump(loader.vocab.word_to_idx, open('input/word_to_idx.pickle', 'wb')) pickle.dump(loader.vocab.idx_to_word, open('input/idx_to_word.pickle', 'wb')) train_data = df[df['is_duplicate'] == 1] val_data = df[:10000] train_data = DataLoader.DataLoader(train_data) val_data = DataLoader.DataLoader(val_data) pad_idx = train_data.vocab.word_to_idx['<PAD>'] train_loader = torch.utils.data.DataLoader( train_data, batch_size=CONFIG.Batch_Size, num_workers=2, pin_memory=True, collate_fn=DataLoader.MyCollate(pad_idx)) val_loader = torch.utils.data.DataLoader( val_data, num_workers=2, batch_size=CONFIG.Batch_Size, pin_memory=True, collate_fn=DataLoader.MyCollate(pad_idx)) if torch.cuda.is_available(): accelarator = 'cuda' torch.backends.cudnn.benchmark = True else: accelarator = 'cpu' device = torch.device(accelarator) model = DocSimModel.DocSimModel(voacb_size=len( train_data.vocab.word_to_idx), embed_dims=CONFIG.embed_dims, hidden_dims=CONFIG.hidden_dims, num_layers=CONFIG.num_layers, bidirectional=CONFIG.bidirectional, dropout=CONFIG.dropout, out_dims=CONFIG.out_dims) model = model.to(device) optimizer = transformers.AdamW(model.parameters(), lr=CONFIG.LR, weight_decay=1e-2) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, threshold=CONFIG.scheduler_threshold, mode='min', patience=CONFIG.scheduler_patience, factor=CONFIG.scheduler_decay_factor) if os.path.exists(CONFIG.CHECKPOINT): checkpoint = torch.load(CONFIG.CHECKPOINT) model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) checkpointed_epoch = checkpoint['epoch'] print( f'\n-------------- [INFO] LOADING CHECKPOINT | EPOCH -> {checkpoint["epoch"]} | LOSS = {checkpoint["loss"]}--------' ) else: checkpointed_epoch = 0 best_auc_roc = -1e4 print( '\n------------------------------ [INFO] STARTING TRAINING --------------------------------\n' ) for epoch in range(checkpointed_epoch, CONFIG.Epochs): train_loss = engine.train_fn(model, train_loader, optimizer, scheduler, device) val_auc_roc, val_loss = engine.eval_fn(model, val_loader, device) print( f'EPOCH -> {epoch+1}/ {CONFIG.Epochs} | TRAIN LOSS = {train_loss} | VAL AUC SCORE = {val_auc_roc} | VAL LOSS = {val_loss} | LR = {optimizer.param_groups[0]["lr"]}\n' ) scheduler.step(val_auc_roc) if best_auc_roc < val_auc_roc: best_auc_roc = val_auc_roc best_model = model.state_dict() torch.save(best_model, CONFIG.MODEL_PATH) torch.save( { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'loss': val_loss, 'auc_roc': val_auc_roc }, CONFIG.CHECKPOINT)
import os from DataLoader import * # hyper parameter T = 5 # must be the same as during training outclass = 21 # must be the same as during trainin epoch = 0 # the epoch number of the model to load save_dir = './predictions/' data_dir = './test_data/' model_dir = './ckpt/' model_dir = os.path.join(model_dir, 'lstm_pm_epoch{}.ckpt'.format(epoch)) batch_size = 1 # load data dataset = Feeder(data_dir=data_dir, train=False, temporal=T, joints=outclass) dl = DataLoader(dataset, batch_size, shuffle=False) print('Dataset Loaded') # hyper parameter # load data if not os.path.exists(save_dir): os.mkdir(save_dir) # **************************************** test all images **************************************** # print('********* test data *********') #placeholder for the image image = tf.placeholder(tf.float32,
from Classifier import WebClassifier import WebScrapper import DataLoader if __name__ == "__main__": # init data loader loader = DataLoader.DataLoader(verbose=True) # try to load previous repository loader.loadRepoFromJSON('repo.json') # load links and categories from Excel loader.loadClassesAndCategoriesFromExcel(r'Categories.xlsx') # if site was not present in loader's WordRepository object, pull it here loader.scrapMissingSites() # get data for classifier pages, classes, images = loader.getPagesClassesAndImagesCount() # save repo for the next time loader.saveToJSON('repo.json') clf = WebClassifier() clf.loadData(pages, classes, list(images.values())) clf.saveToDataToFile('wyniki.txt') site = 'https://likegeeks.com/python-gui-examples-tkinter-tutorial/' print('predicting category for ', site, '...') data = WebScrapper.Scrapper().scrapPage(site) clf.predict(data[0], data[1])
start = time.time() ##################### # SETUP ##################### # data = DataPreprocess('./toy_preprocessed') # data.process_train_val_data('./toy_data', 2, 2) # data.process_challenge_data('./challenge_data') # Run script 'trainingValidationSplit.py' BATCH_SIZE = 50 EPOCH = 1 dataset = DataLoader('./toy_preprocessed/id_dicts') training_set = dataset.get_traing_set('./toy_train', BATCH_SIZE, 123) validation_sets = dataset.get_validation_sets('./toy_val') challenge_sets = dataset.get_challenge_sets( './toy_preprocessed/challenge_data') model = DAE(BATCH_SIZE) opt = keras.optimizers.Adam() ##################### # TRAIN MODEL ##################### # print("Initial Training") # count = 0
def run(): mean = (0.5, 0.5, 0.5) std = (0.5, 0.5, 0.5) transforms = alb.Compose([ alb.Normalize(mean, std, always_apply=True), alb.Resize(50, 200, always_apply=True) ]) dataset = DataLoader.DataLoader(transforms) pickle.dump(dataset.vocab.word_to_idx, open('input/word_to_idx.pickle', 'wb')) pickle.dump(dataset.vocab.idx_to_word, open('input/idx_to_word.pickle', 'wb')) dataset_size = int(len(dataset)) indexex = list(range(dataset_size)) train_index, val_index = indexex[int(CONFIG.val_size * dataset_size ):], indexex[:int(CONFIG.val_size * dataset_size)] train_sampler = torch.utils.data.sampler.RandomSampler(train_index) val_sampler = torch.utils.data.sampler.RandomSampler(val_index) train_loader = torch.utils.data.DataLoader(dataset, batch_size=CONFIG.Batch_Size, num_workers=4, pin_memory=True, sampler=train_sampler) val_loader = torch.utils.data.DataLoader(dataset, batch_size=CONFIG.Batch_Size, num_workers=4, pin_memory=True, sampler=val_sampler) if torch.cuda.is_available(): accelarator = 'cuda' torch.backends.cudnn.benchmark = True else: accelarator = 'cpu' device = torch.device(accelarator) num_classes = len(dataset.vocab.word_to_idx) + 1 model = CaptchaModel.CaptchaModel(input_channels=CONFIG.input_channels, out_channels=CONFIG.out_channels, kernel_size=CONFIG.kernel_size, conv_dropout=CONFIG.conv_dropout, max_pool_size=CONFIG.max_pool_size, num_conv_layers=CONFIG.num_conv_layers, input_dims=CONFIG.input_dims, hidden_dims=CONFIG.hidden_dims, num_layers=CONFIG.num_layers, rnn_dropout=CONFIG.rnn_dropout, num_classes=num_classes) model = model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, threshold=0.01, mode='min') blank = num_classes - 1 best_loss = 1e4 print('------ [INFO] STARTING TRAINING ------') for epoch in range(CONFIG.Epochs): train_loss = engine.train_fn(model, train_loader, optimizer, blank, device) val_loss = engine.eval_fn(model, val_loader, blank, device) scheduler.step(val_loss) print( f'EPOCH -> {epoch}/{CONFIG.Epochs} | TRAIN LOSS = {train_loss} | VAL LOSS = {val_loss}' ) if best_loss > val_loss: best_loss = val_loss best_model = model.state_dict() predict.predict('input/captcha_images_v2/8y6b3.png') torch.save(best_model, CONFIG.MODEL_PATH)
def __init__(self): # Input shape self.img_rows = 128 self.img_cols = 128 self.channels = 3 self.img_shape = (self.img_rows, self.img_cols, self.channels) # Configure data loader self.dataset_name = 'FaceToSticker' # Use the DataLoader object to import a preprocessed dataset self.data_loader = DataLoader.DataLoader( dataset_name=self.dataset_name, img_res=(self.img_rows, self.img_cols)) # Calculate output shape of D (PatchGAN) patch = int(self.img_rows / 2**4) self.disc_patch = (patch, patch, 1) # Number of filters in the first layer of G and D self.gf = 32 self.df = 64 # Loss weights self.lambda_cycle = 10.0 # Cycle-consistency loss self.lambda_id = 0.9 * self.lambda_cycle # Identity loss optimizer = Adam(0.0002, 0.5) # Build and compile the discriminators self.d_A = self.build_discriminator() self.d_B = self.build_discriminator() self.d_A.compile(loss='mse', optimizer=optimizer, metrics=['accuracy']) self.d_B.compile(loss='mse', optimizer=optimizer, metrics=['accuracy']) # ------------------------- # Construct Computational # Graph of Generators # ------------------------- # Build the generators self.g_AB = self.build_generator() self.g_BA = self.build_generator() # Input images from both domains img_A = Input(shape=self.img_shape) img_B = Input(shape=self.img_shape) # Translate images to the other domain fake_B = self.g_AB(img_A) fake_A = self.g_BA(img_B) # Translate images back to original domain reconstr_A = self.g_BA(fake_B) reconstr_B = self.g_AB(fake_A) # Identity mapping of images img_A_id = self.g_BA(img_A) img_B_id = self.g_AB(img_B) # For the combined model we will only train the generators self.d_A.trainable = False self.d_B.trainable = False self.d_A.compile(loss='mse', optimizer=optimizer, metrics=['accuracy']) self.d_B.compile(loss='mse', optimizer=optimizer, metrics=['accuracy']) # Discriminators determines validity of translated images valid_A = self.d_A(fake_A) valid_B = self.d_B(fake_B) # Combined model trains generators to fool discriminators self.combined = Model(inputs=[img_A, img_B], outputs=[ valid_A, valid_B, reconstr_A, reconstr_B, img_A_id, img_B_id ]) self.combined.compile(loss=['mse', 'mse', 'mae', 'mae', 'mae', 'mae'], loss_weights=[ 1, 1, self.lambda_cycle, self.lambda_cycle, self.lambda_id, self.lambda_id ], optimizer=optimizer)
def __init__(self): # Input shape self.channels = 3 self.lr_height = 128 # Low resolution height self.lr_width = 128 # Low resolution width self.lr_shape = (self.lr_height, self.lr_width, self.channels) self.hr_height = 128 # High resolution height self.hr_width = 128 # High resolution width self.hr_shape = (self.hr_height, self.hr_width, self.channels) # Number of residual blocks in the generator self.n_residual_blocks = 8 # Following parameter and optimizer set as recommended in paper self.n_critic = 5 self.clip_value = 0.01 optimizer = RMSprop(lr=0.00005) # optimizer = Adam(0.0002, 0.5) # optimizer1 = RMSprop(lr=0.0001) # We use a pre-trained VGG19 model to extract image features from the high resolution # and the generated high resolution images and minimize the mse between them self.vgg = self.build_vgg() self.vgg.trainable = False self.vgg.compile(loss='mse', optimizer=optimizer, metrics=['accuracy']) # Configure data loader self.dataset_name = 'random_dataset' self.predict_dir = 'predict' self.data_loader = DataLoader(dataset_name=self.dataset_name, img_res=(self.hr_width, self.hr_height)) # Calculate output shape of D (PatchGAN) patch = int(self.hr_height / 2**4) self.disc_patch = (2, 1) # Number of filters in the first layer of G and D self.gf = 64 self.df = 64 # Build and compile the discriminator # self.discriminator = self.build_discriminator() # self.discriminator.summary() # self.discriminator.compile(loss='mse', # optimizer=optimizer, # metrics=['accuracy']) # Build and compile the critic self.discriminator = self.build_critic() self.discriminator.compile(loss=self.wasserstein_loss, optimizer=optimizer, metrics=['accuracy']) # Build the generator self.generator = self.dense_gener() self.generator.summary() # High res. and low res. images img_hr = Input(shape=self.hr_shape) img_lr = Input(shape=self.lr_shape) # Generate high res. version from low res. fake_hr = self.generator(img_lr) # Extract image features of the generated img fake_features = self.vgg(fake_hr) # For the combined model we will only train the generator self.discriminator.trainable = False # Discriminator determines validity of generated high res. images validity = self.discriminator(fake_hr) self.combined = Model([img_lr, img_hr], [validity, fake_features]) self.combined.compile(loss=[self.wasserstein_loss, 'mse'], loss_weights=[1e-3, 1], optimizer=optimizer)
# -*- coding: utf-8 -*- from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf import DataLoader as D import Utils as U from VRAE_tf import VRAE_tf data_loader = D.DataLoader(D.Vocab('europarl_tvae_tf', D.Level.WORD)) flags = tf.app.flags flags.DEFINE_string('model_name', 'VRAE_tf', '') flags.DEFINE_string('ckpt_path', './results/VRAE_tf/ckpt/', '') flags.DEFINE_string('logs_path', './results/VRAE_tf/logs/', '') flags.DEFINE_integer('batch_size', 32, '') flags.DEFINE_integer( 'steps', U.epoch_to_step(10, data_loader.train_size, batch_size=32), '') flags.DEFINE_integer('lr', 0.001, 'learning rate') flags.DEFINE_integer('z_size', 32, '') flags.DEFINE_integer('max_seq_len', 15, '') flags.DEFINE_integer('n_layers', 1, '') flags.DEFINE_integer('embed_size', 512, '') flags.DEFINE_integer('vocab_size', data_loader.vocab_size, '') flags.DEFINE_integer('hidden_size', 512, '') flags.DEFINE_bool('kl_anealing', True, '是否使用kl_anealing技巧') flags.DEFINE_float('beta', 1.0, 'kl_loss coef') flags.DEFINE_float('gamma', 5, '')
import numpy as np import pandas as pd import matplotlib.pyplot as plt import DataLoader as dl from sklearn.linear_model import LinearRegression from datetime import datetime dLoad = dl.DataLoader() df = dLoad.PrepareDataSet() print(df) data = df.sort_index(ascending=True, axis=0) new_data = pd.DataFrame(index=range(0,len(df)),columns=['Date','Close']) for i in range(0,len(data)): new_data['Date'][i] = data['Date'][i].strftime('%Y%m%d') new_data['Close'][i] = data['Close'][i] train = new_data[:770] valid = new_data[770:] x_train = train.drop('Close', axis=1) y_train = train['Close'] x_valid = valid.drop('Close', axis=1) y_valid = valid['Close'] from sklearn import neighbors from sklearn.model_selection import GridSearchCV from sklearn.preprocessing import MinMaxScaler scaler = MinMaxScaler(feature_range=(0, 1))
def main(cfg: DictConfig) -> None: # set up mlflow experiment id mlflow.set_tracking_uri(f"file://{to_absolute_path(cfg.path_to_mlflow)}") experiment = mlflow.get_experiment_by_name(cfg.experiment_name) if experiment is not None: run_kwargs = {'experiment_id': experiment.experiment_id} if cfg["pretrained"] is not None: # initialise with pretrained run, otherwise create a new run run_kwargs['run_id'] = cfg["pretrained"]["run_id"] else: # create new experiment experiment_id = mlflow.create_experiment(cfg.experiment_name) run_kwargs = {'experiment_id': experiment_id} # run the training with mlflow tracking with mlflow.start_run(**run_kwargs) as main_run: if cfg["pretrained"] is not None: mlflow.start_run(experiment_id=run_kwargs['experiment_id'], nested=True) active_run = mlflow.active_run() run_id = active_run.info.run_id setup_gpu(cfg.gpu_cfg) training_cfg = OmegaConf.to_object( cfg.training_cfg) # convert to python dictionary scaling_cfg = to_absolute_path(cfg.scaling_cfg) dataloader = DataLoader.DataLoader(training_cfg, scaling_cfg) setup = dataloader.config["SetupNN"] TauLosses.SetSFs(*setup["TauLossesSFs"]) print("loss consts:", TauLosses.Le_sf, TauLosses.Lmu_sf, TauLosses.Ltau_sf, TauLosses.Ljet_sf) if setup["using_new_loss"]: tf.config.run_functions_eagerly(True) netConf_full = dataloader.get_net_config() if dataloader.input_type == "Adversarial": model = create_model( netConf_full, dataloader.model_name, loss=setup["loss"], use_newloss=setup["using_new_loss"], use_AdvDataset=True, adv_param=dataloader.adversarial_parameter, n_adv_tau=dataloader.adv_batch_size, adv_learning_rate=dataloader.adv_learning_rate) else: model = create_model(netConf_full, dataloader.model_name, loss=setup["loss"], use_newloss=setup["using_new_loss"]) if cfg.pretrained is None: print( "Warning: no pretrained NN -> training will be started from scratch" ) old_opt = None else: print("Warning: training will be started from pretrained model.") print( f"Model: run_id={cfg.pretrained.run_id}, experiment_id={cfg.pretrained.experiment_id}, model={cfg.pretrained.starting_model}" ) path_to_pretrain = to_absolute_path( f'{cfg.path_to_mlflow}/{cfg.pretrained.experiment_id}/{cfg.pretrained.run_id}/artifacts/' ) old_model = load_model( path_to_pretrain + f"/model_checkpoints/{cfg.pretrained.starting_model}", compile=False, custom_objects=None) for layer in model.layers: weights_found = False for old_layer in old_model.layers: if layer.name == old_layer.name: layer.set_weights(old_layer.get_weights()) weights_found = True break if not weights_found: print(f"Weights for layer '{layer.name}' not found.") old_opt = old_model.optimizer old_vars = [var.name for var in old_model.trainable_variables] compile_model(model, setup["optimizer_name"], setup["learning_rate"], setup["metrics"], setup["schedule_decay"]) fit_hist = run_training(model, dataloader, False, cfg.log_suffix, setup["using_new_loss"], old_opt=old_opt) # log NN params for net_type in [ 'tau_net', 'comp_net', 'comp_merge_net', 'conv_2d_net', 'dense_net' ]: mlflow.log_params({ f'{net_type}_{k}': v for k, v in cfg.training_cfg.SetupNN[net_type].items() }) mlflow.log_params({ f'TauLossesSFs_{i}': v for i, v in enumerate(cfg.training_cfg.SetupNN.TauLossesSFs) }) with open( to_absolute_path( f'{cfg.path_to_mlflow}/{run_kwargs["experiment_id"]}/{run_id}/artifacts/model_summary.txt' )) as f: for l in f: if (s := 'Trainable params: ') in l: mlflow.log_param('n_train_params', int(l.split(s)[-1].replace(',', ''))) # log training related files mlflow.log_dict(training_cfg, 'input_cfg/training_cfg.yaml') mlflow.log_artifact(scaling_cfg, 'input_cfg') mlflow.log_artifact(to_absolute_path("Training_CNN.py"), 'input_cfg') mlflow.log_artifact(to_absolute_path("common.py"), 'input_cfg') # log hydra files mlflow.log_artifacts('.hydra', 'input_cfg/hydra') mlflow.log_artifact('Training_CNN.log', 'input_cfg/hydra') # log misc. info mlflow.log_param('run_id', run_id) mlflow.log_param('git_commit', _get_git_commit(to_absolute_path('.'))) print( f'\nTraining has finished! Corresponding MLflow experiment name (ID): {cfg.experiment_name}({run_kwargs["experiment_id"]}), and run ID: {run_id}\n' ) mlflow.end_run() # Temporary workaround to kill additional subprocesses that have not exited correctly try: current_process = psutil.Process() children = current_process.children(recursive=True) for child in children: child.kill() except: pass
help="Training config") args = parser.parse_args() save_path = args.save_path # "/home/russell/tfdata/testing" scaling_cfg = args.scaling_cfg #"../../configs/ShuffleMergeSpectral_trainingSamples-2_files_0_50.json" training_cfg_path = args.training_cfg #../../configs/training_v1.yaml with open(training_cfg_path) as file: training_cfg = yaml.full_load(file) print("Training Config Loaded") training_cfg["SetupNN"]["n_batches"] = args.n_batches training_cfg["SetupNN"][ "n_batches_val"] = 0 # only generate training data as train/val split done later in training training_cfg["SetupNN"]["validation_split"] = 0 training_cfg["Setup"]["input_type"] = "ROOT" # make ROOT so generator loads dataloader = DataLoader.DataLoader(training_cfg, scaling_cfg) print("DataLoader Created") gen_train = dataloader.get_generator(primary_set=True, return_weights=dataloader.use_weights, show_progress=True) print("Generator Loaded") input_shape, input_types = dataloader.get_input_config() print("Input shapes and Types acquired") data_train = tf.data.Dataset.from_generator( gen_train, output_types=input_types, output_shapes=input_shape).prefetch(tf.data.AUTOTUNE) print("Dataset extracted from DataLoader") tf.data.experimental.save(data_train, save_path, compression="GZIP") print("Conversion Complete")
self.model.fit(X_train, y_train) # save model self.save_model() val_predictions = self.model.predict(X_val) # save training result self.save_result(accuracy_score(val_predictions, y_val) * 100) print("Accuracy valuation - {}: {} %".format(self.model_name, accuracy_score(val_predictions, y_val) * 100)) if __name__ == '__main__': # get training data data_loader = DataLoader('./data/train/train.txt', 'latin-1') file_content = data_loader.read_file() X_train_raw, y_train_raw = data_loader.get_data(file_content) # preprocessing training data nlp = NLP() X_train_preprocessed = nlp.preprocessing(X_train_raw) # transform text data to vector transform = FeatureExtraction(X_train_preprocessed) print("1. Using count vectorizer: ") X_train_vector = transform.count_vect() X_train, y_train = X_train_vector, y_train_raw # traning model
from keras.models import Model from keras.layers import Input, Dense, Dropout from keras.callbacks import EarlyStopping, ModelCheckpoint from keras.models import load_model from keras import utils import numpy as np from sklearn.metrics import accuracy_score import matplotlib.pyplot as plt import DataLoader import os import json train_dl = DataLoader.DataLoader(DataLoader.TRAINING_DATA_DIR) dev_dl = DataLoader.DataLoader(DataLoader.DEV_DATA_DIR) test_dl = DataLoader.DataLoader(DataLoader.TEST_DATA_DIR) print("Train samples: ", train_dl.samples) print("Dev samples: ", dev_dl.samples) print("Test samples: ", test_dl.samples) input = Input(shape=(DataLoader.SENT_FEATURES, )) dense0 = Dense(512, activation="relu")(input) dropout = Dropout(0.5)(dense0) dense1 = Dense(256, activation="relu")(dropout) dropout = Dropout(0.5)(dense1) dense2 = Dense(3, activation="softmax")(dropout) model = Model(inputs=input, outputs=dense2) model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["categorical_accuracy"])
so it is like resnet18 + fc(customized) """ if __name__ == '__main__': data_dir = 'F:\img_training\data' data_transform = { 'train': transforms.Compose([ DataLoader.Rescale(256), DataLoader.RandomCrop(224), # transforms.Resize(224, 224), DataLoader.ToTensor(), # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), # transforms.ToPILImage ])} image_dataset = DataLoader.img_dataset(root_dir=data_dir, transform=data_transform['train']) dataloader = DataLoader.DataLoader(image_dataset, batch_size=4, shuffle=True, num_workers=4) dataset_sizes = len(image_dataset) model_conv = models.resnet18(pretrained=True) for param in model_conv.parameters(): param.requires_grad = False # Parameters of newly constructed modules have requires_grad=True by default num_ftrs = model_conv.fc.in_features model_conv.fc = nn.Linear(num_ftrs, 12) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model_conv = model_conv.to(device) # Observe that only parameters of final layer are being optimized as # opposed to before. optimizer_conv = torch.optim.SGD(model_conv.fc.parameters(), lr=0.001, momentum=0.9)
from datetime import datetime as dt import dash from dash.dependencies import Input, Output import dash_html_components as html import dash_core_components as dcc import plotly.graph_objects as go import pandas as pd from sklearn.cluster import KMeans from sklearn.mixture import GaussianMixture from sklearn.preprocessing import MinMaxScaler import DataLoader dataLoader = DataLoader.DataLoader("./Sensor_Weather_Data_Challenge.csv") df = dataLoader.getDf() scalar = MinMaxScaler() clusterDf = df.iloc[:, 0:14].copy() clusterDf["maxValue"] = clusterDf.iloc[:, 0:13].max(axis=1) clusterDf.drop(columns=[ "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", "d12", "d13" ], inplace=True) x_scaled = scalar.fit_transform(clusterDf) clusterDf = pd.DataFrame(data=x_scaled, index=clusterDf.index) cDf = clusterDf.sample(frac=1) nObs = len(cDf) splitNo = round(0.7 * nObs) cDf = cDf.head(splitNo)
import DataLoader data_loader = DataLoader.DataLoader() # data_loader.load_places_for_city(city_id=777934, use_last_cursor=True) DataLoader.DataLoader.search_places_for_city('5b01ac9aff93a20480b397a9', use_last_cursor=True)
def main(): args = parse_arguments() lr_disc = args.lr_disc lr_gen = args.lr_gen num_epochs = args.num_epochs data_dir = args.data_dir save_interval = args.save_interval wt_recon = args.wt_recon wt_KL = args.wt_KL dataset = KITTIDataset(folder_name=data_dir, transform=transforms.Compose([ RandomVerticalFlip(), RandomHorizontalFlip(), RandomCrop([320, 896]), Normalize(), ToTensor() ])) dataloader = DataLoader(dataset, batch_size=64, shuffle=True, num_workers=4) # create required directories results_dir = os.path.join(os.getcwd(), "results") # models_dir = os.path.join(os.getcwd(), "saved_models") timestamp = datetime.now().strftime("%Y-%m-%d_%I-%M-%S_%p") curr_dir = os.path.join(results_dir, timestamp) disc_save_path = os.path.join(curr_dir, "disc_lrd_{}".format(lr_disc)) gen_save_path = os.path.join(curr_dir, "gen_lrg_{}".format(lr_gen)) make_dirs([results_dir, curr_dir]) ## create generator and discriminator instances model_gen = gen().to(DEVICE) model_disc = disc().to(DEVICE) RCLoss = nn.L1Loss() # criterion = nn.BCELoss() losses_GG = [] losses_DD = [] losses_RR = [] mean_fake_probs_arr = [] std_fake_probs_arr = [] # train the GAN model for epoch in range(num_epochs): losses_D = [] losses_G = [] losses_Rec = [] fake_probs = [] for batch_ndx, frames in enumerate(dataloader): # my data # frames = np.random.randint(0, high=1, size=(4,2,320,896)) # frames = torch.tensor(frames).to(DEVICE, dtype=torch.float) frames = frames.to(DEVICE).float() frames1 = frames[:, 0:1, :, :] frames2 = frames[:, 1:2, :, :] # train discriminator with torch.no_grad(): optical_flow, mean, logvar = model_gen(frames) frame2_fake = warp(frames1, optical_flow) outDis_real = model_disc(frames1) lossD_real = torch.log(outDis_real) outDis_fake = model_disc(frame2_fake) lossD_fake = torch.log(1.0 - outDis_fake) loss_dis = lossD_real + lossD_fake loss_dis = -0.5 * loss_dis.mean() # calculate customized GAN loss for discriminator model_disc.optimizer.zero_grad() loss_dis.backward() model_disc.optimizer.step() losses_D.append(loss_dis.item()) # train generator optical_flow, mean, logvar = model_gen(frames) frame2_fake = warp(frames1, optical_flow) model_disc.optimizer.zero_grad() outDis_fake = model_disc(frame2_fake) loss_KLD = -0.5 * torch.sum(1 + logvar - mean * mean - torch.exp(logvar)) loss_gen = -torch.log(outDis_fake) loss_gen = loss_gen.mean() loss_recons = RCLoss(frame2_fake, frames2) total_gen_loss = loss_gen + wt_recon * loss_recons + wt_KL * loss_KLD model_gen.optimizer.zero_grad() total_gen_loss.backward() model_gen.optimizer.step() losses_G.append(loss_gen.item()) losses_Rec.append(loss_recons.item()) fake_probs.extend(outDis_fake.clone().detach().cpu().numpy()) print( "Epoch: [{}/{}], Batch_num: {}, Discriminator loss: {:.4f}, Generator loss: {:.4f}, Recons_Loss: {:.4f}, fake_prob: {:.4f}" .format(epoch, num_epochs, batch_ndx, losses_D[-1], losses_G[-1], loss_recons, np.mean(fake_probs))) losses_GG.append(np.mean(losses_G)) losses_DD.append(np.mean(losses_D)) losses_RR.append(np.mean(losses_Rec)) mean_fake_probs_arr.append(np.mean(fake_probs)) std_fake_probs_arr.append(np.std(fake_probs)) print( "Epoch: [{}/{}], Discriminator loss: {:.4f}, Generator loss: {:.4f}, recons_loss: {:.4f} fake_prob: {:.4f}" .format(epoch + 1, num_epochs, losses_DD[-1], losses_GG[-1], losses_RR[-1], mean_fake_probs_arr[-1])) if (epoch + 1) % save_interval == 0: save_model(model_disc, epoch, model_disc.optimizer, disc_save_path + "epoch_{}.pth".format(epoch)) save_model(model_gen, epoch, model_gen.optimizer, gen_save_path + "epoch_{}.pth".format(epoch)) plot_props([losses_GG, losses_DD, losses_RR, mean_fake_probs_arr], [ "Generator_loss", "Discriminator_loss", "Reconstruction_loss", "disc_fake_prob" ], curr_dir)
return answers def get_the_answer_unclassified(print_answers, best_sentence, best_score, question): import csv answers = "" with open('resources/Single_FaQ.csv') as csvfile: csv_content = csv.reader(csvfile, delimiter='\t') for row in csv_content: if row[1] == best_sentence: answers = row[2] if print_answers: print_question_answer(question, answers, best_score) return answers if __name__ == '__main__': print(dl) loader = dl.DataLoader('resources/Single_FaQ.csv') feature_set = loader.get_feature_set() train_set_length = round(len(feature_set) / 2) train_set = feature_set[:train_set_length] test_set = feature_set[train_set_length:] classifier = nltk.NaiveBayesClassifier.train(train_set) print('Actual Value: ' + test_set[0][1]) print('Predicted Value: ' + classifier.classify(test_set[0][0])) # print("Classifier accuracy percent:",(nltk.classify.accuracy(classifier, test_set))*100) # print(classifier.show_most_informative_features(15)) get_questions_from_user(True)
def main(args=None): log = open('/home/binhnguyen/PycharmProjects/Sentence_Compression/ver_1.0/log.txt','w') BATCH = 64 SENTENCE_LEN = 50 glove_model = load_glove() train, test, valid = SC("train",SENTENCE_LEN,glove_model) , SC("test",SENTENCE_LEN,glove_model), SC("valid",SENTENCE_LEN,glove_model) train_data = DataLoader(train, batch_size= BATCH, shuffle=True) test_data = DataLoader(test , batch_size=BATCH , shuffle=False) valid_data = DataLoader(valid , batch_size=BATCH,shuffle=False) model = LSTMnet(input_dim= 100, hidden_dim= 128 ) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(),lr=0.01) print("Load model done!!") for epoch in range(20): train_loss = 0.0 for data in train_data: feature, label = data label = label.view(-1) feature, label = Variable(feature.float()), Variable(label.squeeze()) y_hat = model(feature) loss = criterion(y_hat, label) train_loss += loss.data.item() optimizer.zero_grad() loss.backward() optimizer.step() correct = 0 total = 0 test_loss = 0 with torch.no_grad(): for data in test_data: feature, label = data label = label.view(-1) feature, label = Variable(feature.float()), Variable(label.squeeze()) outputs = model(feature) _, predicted = torch.max(outputs.data, 1) total += label.size(0) correct += (predicted == label).sum().item() test_loss += criterion(outputs , label).data.item() test_accuracy = correct * 100.0 / total correct = 0 total = 0 valid_loss = 0 with torch.no_grad(): for data in valid_data: feature, label = data label = label.view(-1) feature, label = Variable(feature.float()), Variable(label.squeeze()) outputs = model(feature) _, predicted = torch.max(outputs.data, 1) total += label.size(0) correct += (predicted == label).sum().item() valid_loss += criterion(outputs, label).data.item() valid_accuracy = correct * 100.0 / total print('Epoch {}'.format(epoch), file=log) print('Train Loss = %0.2f'%(train_loss), file = log) print("Test Loss = %0.2f .. Accuracy Test = %0.2f" % (test_loss, test_accuracy) , file = log) print('Valid Loss = %0.2f .. Accuracy Valid = %0.2f' % (valid_loss , valid_accuracy) , file = log) print("--------------------",file=log) log.close()
data_list = [ # First data set (Positional Gestures) image_val_data_location_1_1, image_val_data_location_2_1, image_val_data_location_3_1, image_val_data_location_4_1, image_val_data_location_5_1, image_val_data_location_6_1, image_val_data_location_7_1, image_val_data_location_8_1, image_val_data_location_9_1, image_val_data_location_10_1] # if training mode is false, then data list can just be an empty array if not train_mode: data_list = [] # create data loader # if data_list is an empty array then calling set_elements_to_train will be an error data = DataLoader.DataLoader(data_paths=data_list, size_x=res_x, size_y=res_y, num_inputs=raw_input_size, num_outputs=raw_output_size, black_white=True) # # just for testing purpose # raw_RGB = data.load_image(real_time_path) # loader raw image # raw_RGB = np.array(raw_RGB, dtype=np.float32) # # pre = net.predict(np.array([raw_RGB])) # for p in pre: # for i in p: # print(i*100, end=" ") # exit(0) # # testing ends here # if in training mode
def main(): args = parse_arguments() lr_disc = args.lr_disc lr_gen = args.lr_gen num_epochs = args.num_epochs data_dir = args.data_dir save_interval = args.save_interval wt_recon = args.wt_recon wt_KL = args.wt_KL dataset = KITTIDataset(folder_name=data_dir, transform=transforms.Compose([ RandomVerticalFlip(), RandomHorizontalFlip(), RandomCrop([320, 896]), Normalize(), ToTensor() ])) dataloader = DataLoader(dataset, batch_size=64, shuffle=True, num_workers=4) # create required directories results_dir = os.path.join(os.getcwd(), "results") # models_dir = os.path.join(os.getcwd(), "saved_models") timestamp = datetime.now().strftime("%Y-%m-%d_%I-%M-%S_%p") curr_dir = os.path.join(results_dir, timestamp) # disc_save_path = os.path.join(curr_dir, "disc_lrd_{}".format(lr_disc)) gen_save_path = os.path.join(curr_dir, "gen_lrg_{}".format(lr_gen)) make_dirs([results_dir, curr_dir]) ## create generator and discriminator instances model_gen = gen().to(DEVICE) # model_disc = disc().to(DEVICE) RCLoss = nn.L1Loss() # criterion = nn.BCELoss() losses_GG = [] losses_DD = [] losses_RR = [] mean_fake_probs_arr = [] std_fake_probs_arr = [] # train the GAN model save_sample_flag = False for epoch in range(num_epochs): losses_D = [] losses_G = [] losses_Rec = [] fake_probs = [] if (epoch % 2 == 0): save_sample_flag = True for batch_ndx, frames in enumerate(dataloader): # process data ########################################## frames = frames.to(DEVICE).float() frames1 = frames[:, 0:1, :, :] frames2 = frames[:, 1:2, :, :] # train generator ######################################### optical_flow, frame2_fake, total_gen_loss, loss_recons = train_vae( frames, frames1, frames2, RCLoss, wt_recon, wt_KL, model_gen) losses_G.append(total_gen_loss * 1.0) losses_Rec.append(loss_recons * 1.0) # save images, and flow ########################################## if (save_sample_flag): save_samples(frame2_fake.clone().detach().cpu().numpy(), curr_dir, epoch, "predicted") save_samples(frames1.cpu().numpy(), curr_dir, epoch, "actual_frame1") save_samples(frames2.cpu().numpy(), curr_dir, epoch, "actual_frame2") save_flow(optical_flow.clone().detach().cpu().numpy(), curr_dir, epoch, "flow") save_sample_flag = False print( "Epoch: [{}/{}], Batch_num: {}, Generator loss: {:.4f}, Recons_Loss: {:.4f}" .format(epoch, num_epochs, batch_ndx, losses_G[-1], loss_recons)) losses_GG.append(np.mean(losses_G)) losses_RR.append(np.mean(losses_Rec)) print("Epoch: [{}/{}], Generator loss: {:.4f}, recons_loss: {:.4f}". format(epoch + 1, num_epochs, losses_GG[-1], losses_RR[-1])) # save model ################################################## if (epoch + 1) % save_interval == 0: save_model(model_gen, epoch, model_gen.optimizer, gen_save_path + "epoch_{}.pth".format(epoch)) plot_props([losses_GG, losses_RR], ["Generator_loss", "Reconstruction_loss"], curr_dir)
xfrom todloop.routines import DataLoader from todloop.tod import TODLoader from todloop.base import TODLoop from reduction_routines import TimeSeries, PlotGlitches, Energy,SaveEvents, NPixelStudy, EnergyStudy,CRCorrelationFilter from calibration.routines import FixOpticalSign, CalibrateTOD """ INITIALIZE TODLoop """ loop = TODLoop() todid = raw_input("Enter TOD id:") tod_id = int(todid) loop.add_tod_list("../data/covered_tods.txt") loop.add_routine(DataLoader(input_dir="../outputs/covered_tods_cosig/", output_key="cuts")) """ LOAD TOD DATA """ loop.add_routine(TODLoader(output_key="tod_data")) loop.add_routine(FixOpticalSign(input_key="tod_data", output_key="tod_data")) loop.add_routine(CalibrateTOD(input_key="tod_data",output_key="tod_data")) """ ROUTINES """ loop.add_routine(TimeSeries(tod_key="tod_data",output_key="timeseries")) loop.add_routine(Energy(timeseries_key="timeseries",output_key="energy_calculator")) loop.add_routine(CRCorrelationFilter(timeseries_key="timeseries",cosig_key = "cuts",tod_key="tod_data", output_key= "cr_cuts")) loop.add_routine(PlotGlitches(tag=tod_id,cosig_key="cr_cuts",tod_key="tod_data",timeseries_key = "timeseries")) #loop.add_routine(SaveEvents(tag=tod_id,cosig_key ="cr_cuts",tod_key="tod_data",energy_key="energy_calculator",output_key="events"))
def main(argv): #load configuration parameters = load_configuration() #load parameters #dataset path_to_dataset = parameters['path_to_dataset'] load_size = parameters['load_size'] #SAX alphabet_size = parameters['alphabet_size'] paa_size = parameters['paa_size'] window_size = parameters['window_size'] step = parameters['step'] substring_size = parameters['substring_size'] #smoothing threshold_freq = parameters['threshold_freq'] #projections prj_size = parameters['prj_size'] prj_iterations = parameters['prj_iterations'] anomaly_threshold = parameters['anomaly_threshold'] #loading data loader = DataLoader.DataLoader(path_to_dataset) data = DataTypes.Data() #loader.load_all(data,200) loader.load_subset(data, load_size, 100) #period from which extract anomalies begin_date = datetime.datetime.fromtimestamp(data.index_to_time[0]) end_date = datetime.datetime.fromtimestamp(data.index_to_time[load_size - 1]) if parameters['power_type'] == -1: tank = parameters['tank'] sensor_type = parameters['sensor_type'] #print(data.measures[0]) print("Loading of %i tank %i data from %s to %s " % (sensor_type, tank, begin_date, end_date)) s_values = [ data.measures[i][0][tank][sensor_type] for i in range(0, len(data.measures)) ] else: power_type = parameters['power_type'] print("Loading measures of power %i from %s to %s " % (power_type, begin_date, end_date)) s_values = [ data.measures[i][1][power_type] for i in range(0, len(data.measures)) ] len_serie = len(s_values) hash_table_substrings = {} #getting first n alphabet letters alphabet = get_alphabet_letters(alphabet_size) #creating hash table indexed by all of substrings of length k hash_table_substrings = get_hash_table(alphabet, prj_size) #list containg score for each window anomalies_score = [] for index in range(0, len_serie, step): begin = index end = begin + window_size if end < len_serie: window_values = s_values[begin:end] window_znorm = znorm(s_values) window_paa = paa(window_znorm, paa_size) window_string = ts_to_string(window_paa, cuts_for_asize(alphabet_size)) #each character of the string corresponds to k values of the series k = window_size // paa_size #get smoothed string window_smoothed = smoothing(window_string, threshold_freq) #fill hash table by applying random projection hash_table_substrings = put_in_bucket(hash_table_substrings, window_smoothed, begin, prj_iterations, prj_size, substring_size, k) total = 0 for key, values in hash_table_substrings.items(): total = total + len(values) buckets_with_anomalies, bucket_freq = analyzed_bucket( hash_table_substrings, total, anomaly_threshold) #number of bucket with anomalies n_buckets_anomalies = len(buckets_with_anomalies.keys()) #getting score for current window avg_window_score = getting_score(hash_table_substrings, buckets_with_anomalies, n_buckets_anomalies) anomalies_score.append(avg_window_score) #reset table hash_table_substrings = get_hash_table(alphabet, prj_size) else: break print(anomalies_score)
k = 5 SOURCE_L = "/home/pf/pfstaff/projects/ruzicka/TiledDataset_256x256_32ov/2012_strip"+str(k)+"_256x256_over32_png/" SOURCE_R = "/home/pf/pfstaff/projects/ruzicka/TiledDataset_256x256_32ov/2015_strip"+str(k)+"_256x256_over32_png/" SOURCE_Y = "/home/pf/pfstaff/projects/ruzicka/CleanedVectors_manually_256x256_32over/vector_strip"+str(k)+"_256x256_over32/" import numpy as np import Settings import mock args = mock.Mock() args.name = "test" settings = Settings.Settings(args) import DataLoader, DataPreprocesser, Debugger import DatasetInstance_OurAerial dataLoader = DataLoader.DataLoader(settings) datasetInstance = DatasetInstance_OurAerial.DatasetInstance_OurAerial(settings, dataLoader, "256_cleanManual") #""" paths_2012 = [SOURCE_L] paths_2015 = [SOURCE_R] paths_vectors = [SOURCE_Y] files_paths_2012 = datasetInstance.load_path_lists(paths_2012) all_2012_png_paths, edge_tile_2012, total_tiles_2012 = datasetInstance.process_path_lists(files_paths_2012, paths_2012) files_paths_2015 = datasetInstance.load_path_lists(paths_2015) all_2015_png_paths, _, _ = datasetInstance.process_path_lists(files_paths_2015, paths_2015) files_vectors = datasetInstance.load_path_lists(paths_vectors) all_vector_paths = datasetInstance.process_path_lists_for_vectors(files_vectors, paths_vectors, edge_tile_2012, total_tiles_2012)
def load_data(): retval = DataTypes.Data() loader = DataLoader.DataLoader("../dataset/") #loader.load_subset(retval, 5000) loader.load_all(retval) return retval