Example #1
0
    def test_get_path(self):
        state = self.blank_state
        self.assertEqual((False, False), get_path(state))

        state = self.bfs_state_1
        self.assertEqual((False, True), get_path(state))

        state = self.get_path_1
        self.assertEqual((True, True), get_path(state))
def model_creation_pipeline(config_path: str):
    params = read_training_config_params(get_path(CONFIG_DIR, config_path))

    model_folder = os.path.join(os.getcwd(), MODELS_DIR, params.model_folder)
    if not os.path.exists(model_folder):
        os.mkdir(model_folder)

    setup_logging(
        params.logging_config,
        os.path.join(os.getcwd(), MODELS_DIR, params.model_folder,
                     "train.log"),
    )
    model = model_pipeline(params)
    return params, model
Example #3
0
import os

import numpy as np
from matplotlib import pyplot as plt

from src.data import L
from src.utils import get_trial, get_path


sess_root = get_path(__file__) + "/.."
plot_dir = sess_root + "/plots"

by_trial = {}
by_feature = {}


def get_plotname(trial, feature):
    return "%s-%s.png" % (trial, feature)


for f in os.listdir(plot_dir):
    (t_str, rest) = f.split("-")
    (f_str, ext) = rest.split(".")
    if not by_trial.has_key(t_str):
        by_trial[t_str] = []
    if not by_feature.has_key(f_str):
        by_feature[f_str] = []
    by_trial[t_str].append(f_str)
    by_feature[f_str].append(t_str)

for k in by_feature:
import matplotlib.pyplot as plt
import numpy as np

from src.data_interface import d, L_clean
from src.utils import get_path


path = get_path(__file__) + '/..'

trials = range(0,51)

ticklabels = []
for i in trials:
    if i==trials[0] or i%10 == 0:
        ticklabels.append(i)
    else:
        ticklabels.append('')

font = {'weight': 'normal', 'size': 16}

for label in L_clean:
    data = [d.get_trial(i).get_feature(label).view() for i in trials]
    plt.title('Boxplot of feature {0} in the trials {1}-{2}'.format(
            label, trials[0], trials[-1]), font)
    plt.boxplot(data)
    ax = plt.gca()
    ax.set_xticklabels(ticklabels)
    for tick in ax.xaxis.get_major_ticks():
        tick.label1.set_fontsize(10)
    for tick in ax.yaxis.get_major_ticks():
        tick.label1.set_fontsize(10)
Example #5
0
import os

from src.utils import get_path


sess_root = get_path(__file__) + '/..'

lines = []
for f in os.listdir(sess_root):
    if f[0]!='t' and f.endswith('rst') and not f.startswith('index'):
        (f_str, ext) = f.split('.')
        lines.append(f_str)

lines.sort()
print "\n".join(lines)
Example #6
0
import json

from src.utils import get_path


path = get_path(__file__)

def get_dict(file_name):
    f = open(path + '/' + file_name, 'r')
    d = json.load(f)
    f.close()
    return d

unique_pr_trial = get_dict('unique_values_pr_trial.json')
unique_all_data = get_dict('unique_values.json')

values_combined = {}

for k in unique_all_data:
    tmp = {}
    tmp["all_data"] = unique_all_data[k]
    tmp["min_pr_trial"] = unique_pr_trial[k]["min"]
    tmp["max_pr_trial"] = unique_pr_trial[k]["max"]
    values_combined[k] = tmp

with open(path + '/unique_values_combined.json', 'w') as f:
    json.dump(values_combined, f, indent=4)
Example #7
0
def main():
    # Argparse custom actions
    class SetModes(argparse.Action):
        """Set the modes of operations."""
        def __call__(self, parser, args, values, option_string=None):
            for value in values:
                setattr(args, value, True)

    # yapf: disable
    parser = argparse.ArgumentParser(description='Fake News Classifier')
    # Initialization
    parser.add_argument('--init', action='store_true', default=False,
                        help='perform initialization')
    # Modes
    parser.add_argument('-m', '--mode', action=SetModes, nargs='+', choices=['train', 'test', 'demo', 'plot'],
                        help='specify the mode of operation: train, test, demo, plot')
    parser.add_argument('--train', action='store_true', default=False,
                        help='train the model')
    parser.add_argument('--test', action='store_true', default=False,
                        help='test the model (must either train or load a model)')
    parser.add_argument('--demo', action='store_true', default=False,
                        help='demo the model on linewise samples from a file (must either train or load a model)')
    parser.add_argument('--plot', action='store_true', default=False,
                        help='plot training data (must either train or have existing training data)')
    # Options
    parser.add_argument('-b', '--batch-size', type=int, default=64, metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('-c', '--config', type=str,
                        help='path to configuration json file (overrides args)')
    parser.add_argument('--data-loader', type=str, default='BatchLoader',
                        help='data loader to use (default: "BatchLoader")')
    parser.add_argument('--dataset', type=str, default='FakeRealNews',
                        help='dataset to use (default: "FakeRealNews")')
    parser.add_argument('-e', '--epochs', type=int, default=10,
                        help='number of epochs to train (default: 10)')
    parser.add_argument('-f', '--file', type=str,
                        help='specify a file for another argument')
    parser.add_argument('--lr', '--learning-rate', dest='learning_rate', type=float, default=1e-4,
                        help='learning rate (default: 1e-4)')
    parser.add_argument('-l', '--load', type=int, metavar='EPOCH',
                        help='load a model and its training data')
    parser.add_argument('--loss', type=str, default='BCEWithLogitsLoss',
                        help='loss function (default: "BCEWithLogitsLoss")')
    parser.add_argument('--model', type=str, default='FakeNewsNet',
                        help='model architecture to use (default: "FakeNewsNet")')
    parser.add_argument('-s', '--sample-size', type=int, metavar='N',
                        help='limit sample size for training')
    parser.add_argument('--seed', type=int, default=0,
                        help='random seed (default: 0)')
    parser.add_argument('--save', action='store_true', default=True,
                        help='save model checkpoints and training data (default: True)')
    parser.add_argument('--no-save', dest='save', action='store_false')
    args = parser.parse_args()
    # yapf: enable

    # Print help if no args
    if len(sys.argv) == 1:
        parser.print_help()
        parser.exit()

    # Configure logger
    logging.basicConfig(level=logging.DEBUG)
    logging.getLogger('matplotlib').setLevel(logging.WARNING)

    # Load configuration file if specified
    if args.config is not None:
        utils.load_config(args)

    # Exit if no mode is specified
    if not args.init and not args.train and not args.test and not args.demo and not args.plot:
        logging.error(
            'No mode specified. Please specify with: --mode {init,train,test,demo,plot}'
        )
        exit(1)
    # Exit on `--load` if run directory not found
    if (args.load is not None or
        (args.plot
         and not args.train)) and not os.path.isdir(utils.get_path(args)):
        logging.error(
            'Could not find directory for current configuration {}'.format(
                utils.get_path(args)))
        exit(1)
    # Exit on `test` or `demo` without `train` or `--load EPOCH`
    if (args.test or args.demo) and not (args.train or args.load is not None):
        logging.error(
            'Cannot run `test` or `demo` without a model. Try again with either `train` or `--load EPOCH`.'
        )
        exit(1)
    # Exit on `demo` without a string file
    if args.demo and not args.file:
        logging.error(
            'Cannot run `demo` without a file. Try again with `--file FILE`.')
        exit(1)

    # Setup run directory
    if args.save and not args.init and not (args.train or args.test
                                            or args.demo or args.plot):
        utils.save_config(args)
        path = utils.get_path(args) + '/output.log'
        os.makedirs(os.path.dirname(path), exist_ok=True)
        logging.getLogger().addHandler(logging.FileHandler(path))

    # Set random seeds
    random.seed(args.seed)
    torch.manual_seed(args.seed)

    # Variable declarations
    training_data = None

    # Load GloVe vocabulary
    if args.init or args.train or args.test or args.demo:
        glove = torchtext.vocab.GloVe(name='6B', dim=50)

    # Perform initialization
    if args.init or args.train or args.test:
        # Determine which dataset to use
        dataset = utils.get_dataset(args)
        # Preload the dataset
        dataset.load()
        # Get preprocessed samples
        samples = preprocessing.get_samples(dataset, glove, args.init)
        random.shuffle(samples)

    # DataLoader setup for `train`, `test`
    if args.train or args.test:
        # Select data loader to use
        DataLoader = utils.get_data_loader(args)

        # Split samples
        split_ratio = [.6, .2, .2]
        trainset, validset, testset = list(
            DataLoader.splits(samples, split_ratio))
        if args.sample_size is not None:  # limit samples used in training
            trainset = trainset[:args.sample_size]
            validset = validset[:int(args.sample_size * split_ratio[1] /
                                     split_ratio[0])]

        # Get data loaders
        train_loader, valid_loader, test_loader = [
            DataLoader(split, batch_size=args.batch_size)
            for split in [trainset, validset, testset]
        ]

    # Load samples for demo
    if args.demo:
        if os.path.isfile(args.file):
            # Read samples from the input file
            with open(args.file, 'r') as f:
                samples = [line for line in f if line.strip()]
            data = pd.DataFrame({
                'text': samples,
                'label': [0.5] * len(samples)
            })
            # Preprocess samples
            preprocessing.clean(data)
            samples = preprocessing.encode(data, glove)
            samples = [(torch.tensor(text).long(), label)
                       for text, label in samples]

            # Select data loader to use
            DataLoader = utils.get_data_loader(args)

            # Get data loader
            data_loader = DataLoader(samples, batch_size=1, shuffle=False)
        else:
            logging.error('Could not find file for demo at {}'.format(
                args.file))
            exit(1)

    # Model setup for `train`, `test`, `demo`
    if args.train or args.test or args.demo:
        # Create the model
        model = utils.get_model(glove, args)

        # Load a model
        if args.load is not None:
            utils.load_model(args.load, model, args)

    # Run `train`
    if args.train:
        training_data = training.train(model, train_loader, valid_loader, args)

    # Run `test`
    if args.test:
        if args.train or args.load is not None:
            criterion = utils.get_criterion(args.loss)
            acc, loss = training.evaluate(model, test_loader, criterion)
            logging.info('Testing accuracy: {:.4%}, loss: {:.6f}'.format(
                acc, loss))
        else:
            logging.error('No model loaded for testing')
            exit(1)

    # Run `demo`
    if args.demo:
        if args.train or args.load is not None:
            model.eval()  # set model to evaluate mode
            logging.info('-- Results --')
            for i, (text, _) in enumerate(data_loader):
                preview = data['text'][i][:32] + '...'
                out = model(text).flatten()
                prob = torch.sigmoid(out)  # apply sigmoid to get probability
                pred = (prob >
                        0.5).long()  # predict `true` if greater than 0.5
                label = ['fake', 'true'][pred.item()]
                label = '{}{}{}'.format(
                    '\033[92m' if pred.item() else '\033[93m', label,
                    '\033[0m')
                confidence = (prob if pred.item() else 1 - prob).item()
                logging.info(
                    'Report {}: {} with {:.2%} confidence - "{}"'.format(
                        i, label, confidence, preview))
        else:
            logging.error('No model loaded for demo')
            exit(1)

    # Run `plot`
    if args.plot:
        if training_data is None:
            training_data = utils.load_training_data(args, allow_missing=False)
        if args.load is not None and not args.train:
            for k, v in training_data.items():
                training_data[k] = v[:args.load + 1]

        logging.info('Plotting training data')
        training.plot(training_data)
Example #8
0
import numpy as np
from matplotlib import pyplot as plt

from src.data_interface import d
from src.utils import get_path


path = get_path(__file__) + '/../plots/gear_idea'
V10_idx = 31
IsAlert_idx = 2

for trial_id in d.trial_id_list:
    t = d.get_trial(trial_id)
    v = t.view()
    unique_values = t.V10.unique_values()
    if unique_values in [1,4,5]:
        fig = plt.figure()
        ax = fig.add_subplot(111)
        #ax.set_ylim(-1, 2)
        ax.plot(range(len(v)), v[:, V10_idx])
        ax.plot(range(len(v)), v[:, IsAlert_idx])
        ax.set_title('V10 vs IsAlert - Trial: %s' % (trial_id,))
        file_name = '/t%s.png' % (trial_id,)
        fig.savefig(path + '/' + file_name)
Example #9
0
        self.register()

        img = pil_to_tensor(Image.open(self.image_paths[self.r]))
        print(self.image_paths[self.r])
        # forward
        output = self.model(img)
        # acc
        h_x = F.softmax(output, dim=1).data.squeeze()
        pred = h_x.argmax(0).item()

        print(pred)
        return self.feature


model_path = './models/model3.pth'
mix_paths = get_path('./datasets/mix/')

search = Fusion_Search(model_path, mix_paths)

feature = search.predict()

for f in feature:
    dog_cnt = 0
    cat_cnt = 0

    show_feature(f)

    for i in f:
        #i = Image.fromarray(i)
        #i = i.resize((128, 128), Image.BILINEAR)
    def warmup_model(self):
        # Load saved Model
        """initially loading a model is by path, but it depends on the model storage convention by the team"""

        self.model = pickle.load(open(get_path('\\bin\model.pkl'), 'rb'))
        print("[Progress]: Warm Up.")
def receive_model_input():
    # Load dataset
    return pd.read_csv(get_path('\\bin\dataset.csv'))