Exemple #1
0
def baseline():
    try:
        obj = models.Models()
        x = obj.uni_baseline(data, steps)
        x["company"] = dname
        print(x)
        return x
    except:
        return json.dumps({})
Exemple #2
0
def sarima():
    try:
        obj = models.Models()
        x = json.dumps(obj.uni_sarima(data, steps))
        x["company"] = dname
        print(x)
        return x
    except:
        return json.dumps({})
Exemple #3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-v",
                        "--verbose",
                        help="verbose computation",
                        action="store_true")
    parser.add_argument("-i", "--input", help="add the input csv file")
    parser.add_argument(
        "-o",
        "--output",
        help="the csv file you want to output (default result.csv)",
        default="result.csv")
    parser.add_argument(
        "-m",
        "--model",
        help=
        "specify the model, currently support 'linear' or 'log-linear', default: linear",
        default="linear")
    parser.add_argument(
        "-s",
        "--step",
        help="steps for computing each Shapley value (default 10,000)",
        default=10000,
        type=int)
    args = parser.parse_args()
    assert args.input, "You didn't provide the input file!"
    assert os.path.exists(args.input) == 1, "The input file does not exist!"
    assert args.model == 'linear' or args.model == 'log-linear', "The model must be either linear or log-linear"
    assert type(args.step) == int, "Steps must be integer!"
    df = pd.read_csv(args.input, engine='c')
    df = df.dropna()
    print("Dataset Loaded!")
    model_selected = models.Models(df, model=args.model)
    coefs = model_selected.coefs
    varnames = model_selected.varnames
    shap = shapley_value_for_all(model_selected, args.verbose, args.step)
    with open(args.output, 'w') as myfile:
        wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
        wr.writerow(['Variable Name'] + varnames[1:] + ['Residuals'])
        wr.writerow(['Coefficients'] + list(coefs))
        wr.writerow(['Shapley Value'] + shap)
Exemple #4
0
args = parser.parse_args()

device = "cuda" if torch.cuda.is_available() else "cpu"

args.output_path = util.get_output_folder(args.output_path, args.dataset)

# Loss
generation_loss = nn.BCELoss()

if args.target_class is None:
    classification_loss = nn.CrossEntropyLoss()
else:
    classification_loss = nn.CrossEntropyLoss(reduction='none')

# Models
model_pack = models.Models(args.dataset, "dcgan_teeyo", args.latent_dim,
                           args.resume)
model_pack.choose_device(device)
generator = model_pack.model_list["generator"]
discriminator = model_pack.model_list["discriminator"]
classifier = model_pack.model_list["classifier"]

# Data
dataclass = (data.MNISTDataLoader()
             if args.dataset == "mnist" else data.CIFAR10DataLoader())
dataloader = torch.utils.data.DataLoader(
    dataclass.get_dataset(
        args.data_path,
        normalize=args.normalize_data,
        resize=args.image_size,
    ),
    batch_size=args.batch_size,
Exemple #5
0
steps = 10


class Database(object):
    def __init__(self):
        with open("./global.config") as f:
            self.config = json.load(f)
        self.client = pymongo.MongoClient(self.config["db_url"])
        self.db = self.client.model_db
        self.models = self.db.models

    def add_to_db(self, obj):
        self.models.insert_one(obj)


if __name__ == "__main__":
    # Initialize the database
    db = Database()

    # Grab models
    models = models.Models("\data\data_ub.csv")

    # Run the models
    uni_baseline = models.uni_baseline(steps)
    #uni_sarima = models.uni_sarima(steps)
    print(uni_baseline)
    # Add results to database
    db.add_to_db(uni_baseline)
    #db.add_to_db(uni_sarima)
    'nine', 'six', 'zero', 'five'
]
train_data_dir = '../../data/preprocessed/train'
validation_data_dir = '../../data/preprocessed/validation'
nb_train_samples = 49700
nb_validation_samples = 2000
epochs = 10
batch_size = 32  # Note:  Must be less than or equal to the nb_validation_samples size.
img_width, img_height = 26, 99

if K.image_data_format() == 'channels_first':
    input_shape = (1, img_width, img_height)
else:
    input_shape = (img_width, img_height, 1)

m = models.Models()
#model = m.get_cifar_model(input_shape, 10)
#model = m.get_cifar_model_2(input_shape, 10)
model = m.get_covn2d_six_layer_model(input_shape, len(training_categories) + 1)
du = DataUtility(bucket_id='kaggle_voice_data', root_folder='/')

X, Y = du.load_data_local('../../data/npz', training_categories,
                          other_categories)
#X, Y = du.du.load_local_binary_data('../../data/npz', target)

x_train, y_train, x_test, y_test = train_test_split(X,
                                                    Y,
                                                    test_size=0.33,
                                                    random_state=42)

# x_train -> Training data to feed the net
Exemple #7
0
def baseline_train():
    #Basic parameters
    gpus = FLAG.gpus
    batch_size = FLAG.batch_size
    epoches = FLAG.epoch
    LOG_INTERVAL = 10
    TEST_INTERVAL = 2
    data_name = FLAG.source
    target_name =FLAG.target
    model_name = FLAG.arch
    l2_decay = 5e-4
    lr = FLAG.lr
    #Loading dataset
    if FLAG.isLT:
        train_dataset,test_dataset,classes = baseline_LT_dataset(FLAG)
    else:    
        train_dataset,test_dataset_tgt,classes = my_baseline_dataset(FLAG)
        #print(train_dataset)
    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,batch_size=batch_size,
                    shuffle=True,num_workers=8,drop_last=True)
    #test_loader = torch.utils.data.DataLoader(dataset=test_dataset,batch_size=batch_size,
     #               shuffle=False,num_workers=8)

    test_loader_tgt = torch.utils.data.DataLoader(dataset=test_dataset_tgt,batch_size=batch_size,
                    shuffle=False,num_workers=8)
    #Define model
    Nets = models.Models(FLAG)
    base_model = Nets.model()
    if len(gpus)>1:
        gpus = gpus.split(',')
        gpus = [int(v) for v in gpus]
        base_model = nn.DataParallel(base_model,device_ids=gpus)

    base_model.to(DEVICE)
    #print(base_model)
    #Define Optimizer
    paras = dict(base_model.named_parameters())
    paras_new = []
    if 'resnet' or 'resnest' in model_name:
        for k,v in paras.items():
            if 'fc' not in k:
                paras_new.append({'params':[v],'lr':1e-3})
            else:
                paras_new.append({'params':[v],'lr':1e-2})

    elif model_name == 'vgg' or model_name == 'alexnet':
        for k,v in paras.items():
            if 'classifier.6' not in k:
                paras_new.append({'params':[v],'lr':1e-3})
            else:
                paras_new.append({'params':[v],'lr':1e-2})

    #print(paras_new)
    optimizer = optim.SGD(paras_new,lr=lr,momentum=0.9,weight_decay=l2_decay)
    #scheduler = optim.lr_scheduler.MultiStepLR(optimizer,[50,80],gamma=0.1)
    scheduler = optim.lr_scheduler.ExponentialLR(optimizer,gamma=0.94)
    #print(optimizer.param_groups[-1]['lr'])
    #Define loss criterion
    criterion = torch.nn.CrossEntropyLoss()

    #Training
    
    best_result = 0.0
    best_result1 = 0.0
    #Model store
    if FLAG.isLT:
        model_dir = os.path.join('./models/','baseline-'+data_name+'-'+target_name+'-'+model_name)
    else:
        model_dir = os.path.join('./models/','baseline-'+data_name+'-'+model_name)
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    #Tensorboard configuration
    if FLAG.isLT:
        log_dir = os.path.join('./logs/','baseline-'+data_name+'-'+target_name+'-'+model_name)
    else:
        log_dir = os.path.join('./logs/','baseline-'+data_name+'-'+model_name)
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)
    writer = SummaryWriter(logdir=log_dir)
    for epoch in range(epoches):
        base_model.train()
        #scheduler.step(epoch)
        running_loss = 0.0
        
        for i,data in enumerate(train_loader,0):
            label = torch.squeeze(data[1].to(DEVICE))
            inputs,labels = data[0].to(DEVICE),label
            optimizer.zero_grad()
            outputs = base_model(inputs)
            loss = criterion(outputs,labels)
            #log training loss
            if i%5 ==0:
                n_iter = epoch*len(train_loader)+i
                writer.add_scalar('data/training loss',loss,n_iter)
                #print(optimizer.param_groups[0]['lr'])
            loss.backward()
            optimizer.step()

            #Print statistics
            running_loss += loss.item()
            if i%LOG_INTERVAL == 0: #Print every 30 mini-batches
                print('Epoch:[{}/{}],Batch:[{}/{}] loss: {:4f}'.format(epoch+1,epoches,i+1,len(train_loader),running_loss/30))
                running_loss = 0

        scheduler.step(epoch)

        if epoch%TEST_INTERVAL ==0:   #Every 2 epoches
            
            #acc_test,class_corr,class_total=baseline_test(base_model,test_loader,epoch)
            acc_test1,class_correct1,class_total1=baseline_test(base_model,test_loader_tgt,epoch)
            #log test acc
            writer.add_scalar('data/test accuracy',acc_test,epoch)
            #Store the best model
            #if acc_test>best_result:
                #log results for classes
                #log_path = model_path = os.path.join(model_dir,
                            #'{}-{}-epoch_{}-accval_{}.csv'.format(data_name,model_name,epoch,round(acc_test,3)))
                #log_to_csv(log_path,classes,class_corr,class_total)
                #best_result = acc_test
           # else:
              #  print('The results in this epoch cannot exceed the best results !')
            if acc_test1>best_result1:
                #log results for classes
                log_path1 = model_path = os.path.join(model_dir,
                            '{}-{}-epoch_{}-accvaltgt_{}.csv'.format(data_name,model_name,epoch,round(acc_test1,3)))
                log_to_csv(log_path1,classes,class_correct1,class_total1)
                best_result1 = acc_test1
            else:
                print('The results in this epoch cannot exceed the best results !')
    
    writer.close()
Exemple #8
0
    def train(self, target):
        start_time = time()
        img_width, img_height = 26, 99
        epochs = 20
        batch_size = 32
        tb_callback = CB.TensorBoard(log_dir='./logs',
                                     histogram_freq=0,
                                     batch_size=1,
                                     write_graph=True,
                                     write_grads=True,
                                     write_images=True,
                                     embeddings_freq=0,
                                     embeddings_layer_names=None,
                                     embeddings_metadata=None)

        m = models.Models()
        print('Training with target "{0}".'.format(target))
        du = DataUtility(bucket_id='kaggle_voice_data', root_folder='/')
        if K.image_data_format() == 'channels_first':
            input_shape = (1, img_width, img_height)
        else:
            input_shape = (img_width, img_height, 1)

        model = m.get_covn2d_six_layer_model(input_shape, 1)

        X, Y = du.load_local_binary_data('../../data/npz', target)
        # X, Y = du.load_cloud_binary_data(target)
        x_train, y_train, x_test, y_test = train_test_split(X,
                                                            Y,
                                                            test_size=0.1,
                                                            random_state=42)

        # x_train -> Training data to feed the net
        # x_test ->  Training data for evaluation
        # y_train -> VALIDATION data for net input
        # y_test -> Expected Validation output
        #
        # Train the network with x_train and x_test
        # Evaluate the network with y_train and y_test
        # x_test = np_utils.to_categorical(x_test, 2)
        # y_test = np_utils.to_categorical(y_test, 2)

        new_x_train = np.expand_dims(x_train, axis=3)
        new_y_train = np.expand_dims(y_train, axis=3)

        # datagen = ImageDataGenerator(
        #     featurewise_std_normalization=True,
        #     rotation_range=0,
        #     height_shift_range=0.2,
        #     horizontal_flip=False
        # )

        #  Fit the data generator to the test data for featurewise_std.
        #datagen.fit(new_x_train)

        # x_train = x_train[0:nb_train_samples]
        # x_test = x_test[0:nb_train_samples]
        # y_train = y_train[0:nb_validation_samples]
        # y_test = y_test[0:nb_validation_samples]

        #model.fit_generator(datagen.flow(new_x_train, x_test, batch_size=batch_size),
        #                   steps_per_epoch=len(x_train) / batch_size, epochs=epochs, validation_data=(new_y_train, y_test))

        history = model.fit(x=new_x_train,
                            y=x_test,
                            validation_data=(new_y_train, y_test),
                            batch_size=batch_size,
                            epochs=epochs,
                            verbose=0,
                            callbacks=[tb_callback])

        stop_time = time()
        print("Total training time:  {0} seconds.".format(
            int(stop_time - start_time)))
        # model.save("./local_big_training")
        du.save_multi_model(self.save_dir, '{0}'.format(target), model)
        print("Model saved as {0}.h5".format(target))
        return {"name": target, "accuracy": history.history['acc']}
Exemple #9
0
def home():
    obj = models.Models()