def baseline(): try: obj = models.Models() x = obj.uni_baseline(data, steps) x["company"] = dname print(x) return x except: return json.dumps({})
def sarima(): try: obj = models.Models() x = json.dumps(obj.uni_sarima(data, steps)) x["company"] = dname print(x) return x except: return json.dumps({})
def main(): parser = argparse.ArgumentParser() parser.add_argument("-v", "--verbose", help="verbose computation", action="store_true") parser.add_argument("-i", "--input", help="add the input csv file") parser.add_argument( "-o", "--output", help="the csv file you want to output (default result.csv)", default="result.csv") parser.add_argument( "-m", "--model", help= "specify the model, currently support 'linear' or 'log-linear', default: linear", default="linear") parser.add_argument( "-s", "--step", help="steps for computing each Shapley value (default 10,000)", default=10000, type=int) args = parser.parse_args() assert args.input, "You didn't provide the input file!" assert os.path.exists(args.input) == 1, "The input file does not exist!" assert args.model == 'linear' or args.model == 'log-linear', "The model must be either linear or log-linear" assert type(args.step) == int, "Steps must be integer!" df = pd.read_csv(args.input, engine='c') df = df.dropna() print("Dataset Loaded!") model_selected = models.Models(df, model=args.model) coefs = model_selected.coefs varnames = model_selected.varnames shap = shapley_value_for_all(model_selected, args.verbose, args.step) with open(args.output, 'w') as myfile: wr = csv.writer(myfile, quoting=csv.QUOTE_ALL) wr.writerow(['Variable Name'] + varnames[1:] + ['Residuals']) wr.writerow(['Coefficients'] + list(coefs)) wr.writerow(['Shapley Value'] + shap)
args = parser.parse_args() device = "cuda" if torch.cuda.is_available() else "cpu" args.output_path = util.get_output_folder(args.output_path, args.dataset) # Loss generation_loss = nn.BCELoss() if args.target_class is None: classification_loss = nn.CrossEntropyLoss() else: classification_loss = nn.CrossEntropyLoss(reduction='none') # Models model_pack = models.Models(args.dataset, "dcgan_teeyo", args.latent_dim, args.resume) model_pack.choose_device(device) generator = model_pack.model_list["generator"] discriminator = model_pack.model_list["discriminator"] classifier = model_pack.model_list["classifier"] # Data dataclass = (data.MNISTDataLoader() if args.dataset == "mnist" else data.CIFAR10DataLoader()) dataloader = torch.utils.data.DataLoader( dataclass.get_dataset( args.data_path, normalize=args.normalize_data, resize=args.image_size, ), batch_size=args.batch_size,
steps = 10 class Database(object): def __init__(self): with open("./global.config") as f: self.config = json.load(f) self.client = pymongo.MongoClient(self.config["db_url"]) self.db = self.client.model_db self.models = self.db.models def add_to_db(self, obj): self.models.insert_one(obj) if __name__ == "__main__": # Initialize the database db = Database() # Grab models models = models.Models("\data\data_ub.csv") # Run the models uni_baseline = models.uni_baseline(steps) #uni_sarima = models.uni_sarima(steps) print(uni_baseline) # Add results to database db.add_to_db(uni_baseline) #db.add_to_db(uni_sarima)
'nine', 'six', 'zero', 'five' ] train_data_dir = '../../data/preprocessed/train' validation_data_dir = '../../data/preprocessed/validation' nb_train_samples = 49700 nb_validation_samples = 2000 epochs = 10 batch_size = 32 # Note: Must be less than or equal to the nb_validation_samples size. img_width, img_height = 26, 99 if K.image_data_format() == 'channels_first': input_shape = (1, img_width, img_height) else: input_shape = (img_width, img_height, 1) m = models.Models() #model = m.get_cifar_model(input_shape, 10) #model = m.get_cifar_model_2(input_shape, 10) model = m.get_covn2d_six_layer_model(input_shape, len(training_categories) + 1) du = DataUtility(bucket_id='kaggle_voice_data', root_folder='/') X, Y = du.load_data_local('../../data/npz', training_categories, other_categories) #X, Y = du.du.load_local_binary_data('../../data/npz', target) x_train, y_train, x_test, y_test = train_test_split(X, Y, test_size=0.33, random_state=42) # x_train -> Training data to feed the net
def baseline_train(): #Basic parameters gpus = FLAG.gpus batch_size = FLAG.batch_size epoches = FLAG.epoch LOG_INTERVAL = 10 TEST_INTERVAL = 2 data_name = FLAG.source target_name =FLAG.target model_name = FLAG.arch l2_decay = 5e-4 lr = FLAG.lr #Loading dataset if FLAG.isLT: train_dataset,test_dataset,classes = baseline_LT_dataset(FLAG) else: train_dataset,test_dataset_tgt,classes = my_baseline_dataset(FLAG) #print(train_dataset) train_loader = torch.utils.data.DataLoader(dataset=train_dataset,batch_size=batch_size, shuffle=True,num_workers=8,drop_last=True) #test_loader = torch.utils.data.DataLoader(dataset=test_dataset,batch_size=batch_size, # shuffle=False,num_workers=8) test_loader_tgt = torch.utils.data.DataLoader(dataset=test_dataset_tgt,batch_size=batch_size, shuffle=False,num_workers=8) #Define model Nets = models.Models(FLAG) base_model = Nets.model() if len(gpus)>1: gpus = gpus.split(',') gpus = [int(v) for v in gpus] base_model = nn.DataParallel(base_model,device_ids=gpus) base_model.to(DEVICE) #print(base_model) #Define Optimizer paras = dict(base_model.named_parameters()) paras_new = [] if 'resnet' or 'resnest' in model_name: for k,v in paras.items(): if 'fc' not in k: paras_new.append({'params':[v],'lr':1e-3}) else: paras_new.append({'params':[v],'lr':1e-2}) elif model_name == 'vgg' or model_name == 'alexnet': for k,v in paras.items(): if 'classifier.6' not in k: paras_new.append({'params':[v],'lr':1e-3}) else: paras_new.append({'params':[v],'lr':1e-2}) #print(paras_new) optimizer = optim.SGD(paras_new,lr=lr,momentum=0.9,weight_decay=l2_decay) #scheduler = optim.lr_scheduler.MultiStepLR(optimizer,[50,80],gamma=0.1) scheduler = optim.lr_scheduler.ExponentialLR(optimizer,gamma=0.94) #print(optimizer.param_groups[-1]['lr']) #Define loss criterion criterion = torch.nn.CrossEntropyLoss() #Training best_result = 0.0 best_result1 = 0.0 #Model store if FLAG.isLT: model_dir = os.path.join('./models/','baseline-'+data_name+'-'+target_name+'-'+model_name) else: model_dir = os.path.join('./models/','baseline-'+data_name+'-'+model_name) if not os.path.exists(model_dir): os.makedirs(model_dir) #Tensorboard configuration if FLAG.isLT: log_dir = os.path.join('./logs/','baseline-'+data_name+'-'+target_name+'-'+model_name) else: log_dir = os.path.join('./logs/','baseline-'+data_name+'-'+model_name) if not os.path.exists(log_dir): os.makedirs(log_dir) writer = SummaryWriter(logdir=log_dir) for epoch in range(epoches): base_model.train() #scheduler.step(epoch) running_loss = 0.0 for i,data in enumerate(train_loader,0): label = torch.squeeze(data[1].to(DEVICE)) inputs,labels = data[0].to(DEVICE),label optimizer.zero_grad() outputs = base_model(inputs) loss = criterion(outputs,labels) #log training loss if i%5 ==0: n_iter = epoch*len(train_loader)+i writer.add_scalar('data/training loss',loss,n_iter) #print(optimizer.param_groups[0]['lr']) loss.backward() optimizer.step() #Print statistics running_loss += loss.item() if i%LOG_INTERVAL == 0: #Print every 30 mini-batches print('Epoch:[{}/{}],Batch:[{}/{}] loss: {:4f}'.format(epoch+1,epoches,i+1,len(train_loader),running_loss/30)) running_loss = 0 scheduler.step(epoch) if epoch%TEST_INTERVAL ==0: #Every 2 epoches #acc_test,class_corr,class_total=baseline_test(base_model,test_loader,epoch) acc_test1,class_correct1,class_total1=baseline_test(base_model,test_loader_tgt,epoch) #log test acc writer.add_scalar('data/test accuracy',acc_test,epoch) #Store the best model #if acc_test>best_result: #log results for classes #log_path = model_path = os.path.join(model_dir, #'{}-{}-epoch_{}-accval_{}.csv'.format(data_name,model_name,epoch,round(acc_test,3))) #log_to_csv(log_path,classes,class_corr,class_total) #best_result = acc_test # else: # print('The results in this epoch cannot exceed the best results !') if acc_test1>best_result1: #log results for classes log_path1 = model_path = os.path.join(model_dir, '{}-{}-epoch_{}-accvaltgt_{}.csv'.format(data_name,model_name,epoch,round(acc_test1,3))) log_to_csv(log_path1,classes,class_correct1,class_total1) best_result1 = acc_test1 else: print('The results in this epoch cannot exceed the best results !') writer.close()
def train(self, target): start_time = time() img_width, img_height = 26, 99 epochs = 20 batch_size = 32 tb_callback = CB.TensorBoard(log_dir='./logs', histogram_freq=0, batch_size=1, write_graph=True, write_grads=True, write_images=True, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None) m = models.Models() print('Training with target "{0}".'.format(target)) du = DataUtility(bucket_id='kaggle_voice_data', root_folder='/') if K.image_data_format() == 'channels_first': input_shape = (1, img_width, img_height) else: input_shape = (img_width, img_height, 1) model = m.get_covn2d_six_layer_model(input_shape, 1) X, Y = du.load_local_binary_data('../../data/npz', target) # X, Y = du.load_cloud_binary_data(target) x_train, y_train, x_test, y_test = train_test_split(X, Y, test_size=0.1, random_state=42) # x_train -> Training data to feed the net # x_test -> Training data for evaluation # y_train -> VALIDATION data for net input # y_test -> Expected Validation output # # Train the network with x_train and x_test # Evaluate the network with y_train and y_test # x_test = np_utils.to_categorical(x_test, 2) # y_test = np_utils.to_categorical(y_test, 2) new_x_train = np.expand_dims(x_train, axis=3) new_y_train = np.expand_dims(y_train, axis=3) # datagen = ImageDataGenerator( # featurewise_std_normalization=True, # rotation_range=0, # height_shift_range=0.2, # horizontal_flip=False # ) # Fit the data generator to the test data for featurewise_std. #datagen.fit(new_x_train) # x_train = x_train[0:nb_train_samples] # x_test = x_test[0:nb_train_samples] # y_train = y_train[0:nb_validation_samples] # y_test = y_test[0:nb_validation_samples] #model.fit_generator(datagen.flow(new_x_train, x_test, batch_size=batch_size), # steps_per_epoch=len(x_train) / batch_size, epochs=epochs, validation_data=(new_y_train, y_test)) history = model.fit(x=new_x_train, y=x_test, validation_data=(new_y_train, y_test), batch_size=batch_size, epochs=epochs, verbose=0, callbacks=[tb_callback]) stop_time = time() print("Total training time: {0} seconds.".format( int(stop_time - start_time))) # model.save("./local_big_training") du.save_multi_model(self.save_dir, '{0}'.format(target), model) print("Model saved as {0}.h5".format(target)) return {"name": target, "accuracy": history.history['acc']}
def home(): obj = models.Models()