Beispiel #1
0
def main(opt):
    train_dataset = BADataset(opt.dataroot, opt.L, True, False, False)
    train_dataloader = BADataloader(train_dataset, batch_size=opt.batchSize, \
                                      shuffle=True, num_workers=opt.workers, drop_last=True)

    valid_dataset = BADataset(opt.dataroot, opt.L, False, True, False)
    valid_dataloader = BADataloader(valid_dataset, batch_size=opt.batchSize, \
                                     shuffle=True, num_workers=opt.workers, drop_last=True)

    test_dataset = BADataset(opt.dataroot, opt.L, False, False, True)
    test_dataloader = BADataloader(test_dataset, batch_size=opt.batchSize, \
                                     shuffle=True, num_workers=opt.workers, drop_last=True)

    all_dataset = BADataset(opt.dataroot, opt.L, False, False, False)
    all_dataloader = BADataloader(all_dataset, batch_size=opt.batchSize, \
                                     shuffle=False, num_workers=opt.workers, drop_last=False)

    opt.n_edge_types = train_dataset.n_edge_types
    opt.n_node = train_dataset.n_node
    opt.n_existing_node = all_node_num

    net = FNN(opt)
    net.double()
    print(net)

    criterion = nn.CosineSimilarity(dim=1, eps=1e-6)

    if opt.cuda:
        net.cuda()
        criterion.cuda()

    optimizer = optim.Adam(net.parameters(), lr=opt.lr)
    early_stopping = EarlyStopping(patience=opt.patience, verbose=True)

    os.makedirs(OutputDir, exist_ok=True)
    train_loss_ls = []
    valid_loss_ls = []
    test_loss_ls = []

    for epoch in range(0, opt.niter):
        train_loss = train(epoch, train_dataloader, net, criterion, optimizer, opt)
        valid_loss = valid(valid_dataloader, net, criterion, opt)
        test_loss = test(test_dataloader, net, criterion, opt)

        train_loss_ls.append(train_loss)
        valid_loss_ls.append(valid_loss)
        test_loss_ls.append(test_loss)

        early_stopping(valid_loss, net, OutputDir)
        if early_stopping.early_stop:
            print("Early stopping")
            break

    df = pd.DataFrame({'epoch':[i for i in range(1, len(train_loss_ls)+1)], 'train_loss': train_loss_ls, 'valid_loss': valid_loss_ls, 'test_loss': test_loss_ls})
    df.to_csv(OutputDir + '/loss.csv', index=False)

    net.load_state_dict(torch.load(OutputDir + '/checkpoint.pt'))
    inference(all_dataloader, net, criterion, opt, OutputDir)
Beispiel #2
0
    def MFE(self, X_split, y_split, model):
        if model == 'SVM':
            X_split_scaled = standard_scale(X_split)
            Model = SVM()
            Model.fit(X_split_scaled[0], y_split[0])
            y_hat = Model.predict(X_split_scaled[2])

        elif model == 'RF':
            Model = RF()
            Model.fit(np.concatenate([X_split[0], X_split[1]]),
                      np.concatenate([y_split[0], y_split[1]]))
            y_hat = Model.predict(X_split[2])

        elif model == 'FNN':
            X_split_scaled = standard_scale(X_split)
            Model = FNN(model)
            Model.fit(X_split_scaled[0],
                      y_split[0],
                      validation_data=[X_split_scaled[1], y_split[1]],
                      epochs=self.MAX_EPOCH,
                      batch_size=self.BATCH_SIZE,
                      callbacks=[self.es])
            y_hat = Model.predict_classes(X_split_scaled[2])

        else:
            print('model undefined')

        return self.evaluate(y_split[2], y_hat)
Beispiel #3
0
def main(opt):
    train_dataset = Dataset(opt.dataroot, True)
    train_dataloader = Dataloader(train_dataset, batch_size=opt.batchSize, \
                                      shuffle=False, num_workers=2)

    test_dataset = Dataset(opt.dataroot, False)
    test_dataloader = Dataloader(test_dataset, batch_size=opt.batchSize, \
                                      shuffle=False, num_workers=2)

    net = FNN(d=opt.d, n=opt.n)
    net.double()
    print(net)

    criterion = nn.CosineSimilarity(dim=2)
    optimizer = optim.Adam(net.parameters(), lr=opt.lr)

    with open('train.csv', 'a') as f:
        writer = csv.writer(f, lineterminator='\n')
        writer.writerow(
            ["train_loss", "train_gain", "baseline_loss", "baseline_gain"])

    with open('test.csv', 'a') as f:
        writer = csv.writer(f, lineterminator='\n')
        writer.writerow(
            ["test_loss", "test_gain", "baseline_loss", "baseline_gain"])

    start = time.time()

    for epoch in range(0, opt.niter):
        train(epoch, train_dataloader, net, criterion, optimizer, opt)
        test(test_dataloader, net, criterion, optimizer, opt)

    elapsed_time = time.time() - start

    with open('time.csv', 'a') as f:
        writer = csv.writer(f, lineterminator='\n')
        writer.writerow(["学習時間", elapsed_time])
Beispiel #4
0
def run_FNN():
    fnn = FNN.FNN(field_size, feature_sizes, batch_size=32 * 16, verbose=True, use_cuda=True,
                          h_depth = 3,pre_weight_decay= 0.0001 ,weight_decay=0.00001,learning_rate=0.0001, use_fm=True, use_ffm=False, n_epochs=num_epoch)
    if online = False:
        fnn.fit(Xi_train, Xv_train, y_train, Xi_test, Xv_test, y_test, ealry_stopping=True,refit=True)
Beispiel #5
0
                    help="the value of diminishing momentum")
args = parser.parse_args()
dd = args.data_dir
ds = args.dataset  #dataset name

###################################### build model
if ds == "mnist":
    D_in, H, D_out = 784, 300, 10
    num_epochs = 200
    batch_size = 100
else:
    D_in, H, D_out = 41, 40, 24  # tentative
    num_epochs = 100
    batch_size = 1000

model = FNN(D_in, H, D_out, momentum=float(args.momentum), gpu=True)

#################################### load data
train_set = Dataset(dd + "/" + ds + ".train.x.pt",
                    dd + "/" + ds + ".train.y.pt")
train_size = len(train_set)
train_loader = train_set.getLoader(batch_size, shuffle=True)
test_set = Dataset(dd + "/" + ds + ".test.x.pt", dd + "/" + ds + ".test.y.pt")
test_size = len(test_set)
test_loader = test_set.getLoader(test_size, shuffle=True)

print("Finishing loading data")

################################### train model
test_err = []
test_acc = []
def train(
        configName,
        model_parameters,
        modelType,
        rateSize,
        optim,
        lr,
        momentum,
        n_class,
        epochSize,
        n_train_sample,
        n_test_sample,
        device,
        Rdevice,
        dataset,
        dataPATH,
        logPATH,
        top_n_acc,
):
    '''
    modelType: model supported in torchvision
    '''



    loss_train = []
    acc_train = []
    loss_test = []
    acc_test = []
    rate_train = []
    rate_test = []
    channel_train = []
    channel_test = []

    if modelType == 'res18':
        model = resnet18(act=model_parameters['act'])
    elif modelType == 'fnn':
        model = FNN(act=model_parameters['act'])
    elif modelType == 'autoencoder':
        model = NNencoder(act=model_parameters['act'])
    elif modelType == 'vgg':
        model = vgg13(num_classes=n_class)
    elif modelType == 'densenet':
        model = models.densenet121()
    elif modelType == 'alexnet':
        model = AlexNet(num_classes=n_class, act=model_parameters['act'])
    elif modelType == 'lenet':
        model = LeNet(num_classes=n_class, in_channels=int(model_parameters['n_channel']))
    elif modelType == 'CaL':
        model = CaLnet(
            in_channel=int(model_parameters['in_channel']),
            num_classes=n_class,
            n_Layer=int(model_parameters['n_layer']),
            n_Channel=model_parameters['n_channel'],
            act=model_parameters['act']
        )
    else:
        raise ValueError

    loss_fn = F.cross_entropy

    model = model.train().to(device)

    if optim == 'SGD':
        optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum)
    elif optim == 'Adam':
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    else:
        print(optim)
        raise ValueError

    transform = transforms.Compose(
        [transforms.ToTensor()]
    )
    trainset, testset = useData(mode=dataset, PATH=dataPATH, transform=transform)

    train_X = torch.stack([trainset[i][0] for i in range(rateSize)]).to(device)
    train_Y = torch.tensor([trainset[i][1] for i in range(rateSize)])
    test_X = torch.stack([testset[i][0] for i in range(rateSize)]).to(device)
    test_Y = torch.tensor([testset[i][1] for i in range(rateSize)])

    trainloader = DataLoader(
        trainset,
        batch_size=n_train_sample,
        shuffle=True
    )
    testloader = DataLoader(
        testset,
        batch_size=n_test_sample,
        shuffle=True
    )


    # N, C, H, W = train_data.size()
    # n, _, _, _ = test_data.size()
    # if C == 1:
    #     train_data = train_data.expand(N, 3, H, W)
    #     test_data = test_data.expand(n, 3, H, W)



    def cross_validation_epoch(
            model,
            loader,
            optimizer,
            top_n_acc
    ):
        loss = 0
        acc = 0

        n_iter = len(loader)

        for data, label in tqdm(loader):
            pre = model(data.to(device), label, device=device)
            optimizer.zero_grad()


            l = loss_fn(target=label.to(device), input=pre)


            l.backward()
            optimizer.step()

            pre = pre.detach().cpu().numpy()
            ac = top_n(pre=pre, label=label.cpu().numpy(), n=top_n_acc)

            torch.cuda.empty_cache()

            loss += l.item()
            acc += ac

        return loss / n_iter, acc / n_iter

    def estimate(
            model,
            loader,
            top_n_acc
    ):
        loss = 0
        acc = 0

        n_iter = len(loader)

        for data, label in tqdm(loader):
            pre = model(data.to(device), label, device=device)

            l = loss_fn(target=label.to(device), input=pre)

            pre = pre.detach().cpu().numpy()
            ac = top_n(pre=pre, label=label.cpu().numpy(), n=top_n_acc)

            loss += l.item()
            acc += ac


        return loss / n_iter, acc / n_iter

    # def getRate(rateSample):
    #     np.cuda.Device(5).use()
    #     batchRate = []
    #     for i in range(len(rateSample)):
    #         _, rate = model(rateSample[i].unsqueeze(dim=0).to(device), return_rate=True)
    #         batchRate.append(rate)
    #
    #     return batchRate

    def getRate(rate_X, rate_Y):

        _, rate, Channel = model(x=rate_X, sample=rate_X, label=rate_Y, device=device, return_rate=True)

        return rate, Channel # R(n_layer, 2)

    for i in range(epochSize):
        rate, C = getRate(train_X, train_Y)
        rate_train.append(rate)
        channel_train.append(C)

        # print(rate_train[-1],rate_train[-1])
        rate, C = getRate(test_X, test_Y)
        rate_test.append(rate)
        channel_test.append(C)

        loss, acc = cross_validation_epoch(
            model=model,
            loader=trainloader,
            optimizer=optimizer,
            top_n_acc=top_n_acc
        )

        loss_train.append(loss)
        acc_train.append(acc)

        loss, acc = estimate(
            model=model,
            loader=testloader,
            top_n_acc=top_n_acc
        )
        loss_test.append(loss)
        acc_test.append(acc)

        print("---Epoch {0}---\nLoss: --train{1} --test{2}\nAcc: --train{3} --test{4}".format(
            i+1, loss_train[-1], loss_test[-1], acc_train[-1], acc_test[-1]
        ))
        torch.cuda.empty_cache()
    # 'rate_train': torch.stack(rate_train), # (epoch, n_layer, k)
    torch.save(
        obj={
            'model_state_dict':model.state_dict(),
            'acc_train': acc_train,
            'acc_test': acc_test,
            'loss_train': loss_train,
            'loss_test': loss_test,
            'rate_test': torch.stack(rate_test),
            'rate_train': torch.stack(rate_train),
            'channel_train': torch.stack(channel_train),
            'channel_test': torch.stack(channel_test)
        },
        f=logPATH + '/network/'+ modelType + '/' + dataset + '/' + configName[:configName.find('.')] + '.pth'
    )
Beispiel #7
0
from flask import Flask, jsonify, request
from flask_cors import CORS
import json
from model import FakeNewsNetwork as FNN
from featureextract import UrlNGram, UrlAnalyzer, get_site_info

app = Flask(__name__)
CORS(app)

urls = None
with open('../data/urls.json') as url_file:
    urls = json.load(url_file)

model = FNN(7, 2)
model.load_weights('../data/fake-news-model.pt')

raw_bigram = UrlNGram(urls['train']['fake']['raw'] +
                      urls['train']['real']['raw'])
clean_bigram = UrlNGram(urls['train']['fake']['clean'] +
                        urls['train']['real']['clean'])

analyzer = UrlAnalyzer(urls['popular'])


@app.route('/predict', methods=['POST'])
def predict():
    url = request.json['url']
    clean_url = analyzer.clean_url(url)
    try:
        entropy = raw_bigram.get_entropy(url)
        perplexity = raw_bigram.get_perplexity(url)