print("Data set loaded...")
        # Sample size
        n_samples = len(data_set['words'])
        fold_size = int(math.ceil(n_samples / 10.0))

        # Get truths
        truths = []
        for truth in data_set['labels']:
            truths += [truth[0]]
        # end for

        # Deep-Learning model
        deep_learning_model = PAN17DeepNNModel(PAN17ConvNet(n_classes=2),
                                               classes=("male", "female"),
                                               cuda=args.cuda,
                                               lr=args.lr,
                                               momentum=args.momentum,
                                               log_interval=args.log_interval,
                                               seed=args.seed)

        # K-10 fold
        grams_set = np.array(data_set['words'])
        truths_set = np.array(truths)
        grams_set.shape = (10, fold_size)
        truths_set.shape = (10, fold_size)

        # Select training and test sets
        test = grams_set[-1]
        test_truths = truths_set[-1]
        training = np.delete(grams_set, -1, axis=0)
        training_truths = np.delete(truths_set, -1, axis=0)
Esempio n. 2
0
        data_set = pickle.load(f)

        # Sample size
        n_samples = len(data_set['2grams'])
        fold_size = int(math.ceil(n_samples / 10.0))

        # Get truths
        truths = []
        for truth in data_set['labels']:
            truths += [truth[1]]
        # end for

        # Deep-Learning model
        deep_learning_model = PAN17DeepNNModel(PAN17ConvNet(n_classes=5), classes=('great britain', 'canada',
                                                                                   'ireland', 'new zealand',
                                                                                   'australia'), cuda=args.cuda,
                                               lr=args.lr, momentum=args.momentum, log_interval=args.log_interval,
                                               seed=args.seed)

        # K-10 fold
        grams_set = np.array(data_set['2grams'])
        m_height = grams_set.shape[1]
        m_width = grams_set.shape[2]
        truths_set = np.array(truths)
        grams_set.shape = (10, fold_size, m_height, m_width)
        truths_set.shape = (10, fold_size)

        # Select training and test sets
        test = grams_set[-1]
        test_truths = truths_set[-1]
        training = np.delete(grams_set, -1, axis=0)
        # Get truths
        truths = []
        for truth in data_set['labels']:
            truths += [truth[1]]
        # end for

        minis = np.zeros(args.k)
        success_rates = np.zeros((args.epoch, args.k))

        for k in range(args.k):
            print("%d th fold" % k)

            # Deep-Learning model
            deep_learning_model = PAN17DeepNNModel(PAN17ConvNet(n_classes=len(params[args.lang][2]), params=(params[args.lang][0],
                                                params[args.lang][1])), classes=params[args.lang][2], cuda=args.cuda, lr=args.lr,
                                                momentum=args.momentum, log_interval=args.log_interval,
                                                seed=args.seed)

            # K-10 fold
            grams_set = np.array(data_set['2grams'])
            m_height = grams_set.shape[1]
            m_width = grams_set.shape[2]
            truths_set = np.array(truths)
            grams_set.shape = (10, fold_size, m_height, m_width)
            truths_set.shape = (10, fold_size)

            # Select training and test sets
            test = grams_set[k]
            test_truths = truths_set[k]
            training = np.delete(grams_set, k, axis=0)
            training_truths = np.delete(truths_set, k, axis=0)