Example #1
0
    args = parser.parse_args()
    infolder = args.infolder
    task = args.feature
    recipe = args.recipe

    print 'Loading dataset...'
    data = ProfilingDataset(infolder)
    print 'Loaded {} users...\n'.format(len(data.entries))
    config = data.config
    tasks = config.tasks
    if task in tasks:
        print ('Creating learning curves for %s task..' % task)
        if not recipe:
            recipe = config.recipes[task]
            clf = from_recipe(recipe)
        else:
            clf = from_recipe(recipe)
        print ('Loading recipe from file %s..' % recipe)
        X, y = data.get_data(task)
        # Cross validation with 100 iterations to get smoother mean test and train
        # score curves, each time with 20% data randomly selected as a validation set.
        cv = cross_validation.KFold(len(X), n_folds=5, random_state=0)
        # cv = cross_validation.ShuffleSplit(len(X), n_iter=20, test_size=0.2,
        #                                    random_state=0)
        title = 'Learning Curves from recipe %s' % recipe
        plot_learning_curve(clf, title, X, y, ylim=(0.3, 1.01), cv=cv, n_jobs=-1)
        plt.show()
    else:
        print('task "%s" does not exist - try one of the'
              ' following: %s' % (task, tasks))
Example #2
0
    parser.add_argument('-i',
                        '--input',
                        type=str,
                        required=True,
                        dest='infolder',
                        help='path to folder with pan dataset for a language')
    parser.add_argument('-n',
                        '--numfolds',
                        type=int,
                        dest='num_folds',
                        default=4,
                        help='Number of folds to use in cross validation')

    args = parser.parse_args()
    infolder = args.infolder
    num_folds = args.num_folds

    print('Loading dataset...')
    dataset = ProfilingDataset(infolder)
    print('Loaded %s users...\n' % len(dataset.entries))
    config = dataset.config
    tasks = config.tasks
    print('\n--------------- Thy time of Running ---------------')
    for task in tasks:
        tictac = from_recipe(config.recipes[task])
        cross_val(dataset, task, tictac, num_folds)
    # print results at end
    print('\n--------------- Thy time of Judgement ---------------')
    for message in log:
        print(message)
Example #3
0
if __name__ == '__main__':
    parser = ArgumentParser(description='Train pan model on pan dataset')
    parser.add_argument('-i', '--input', type=str,
                        required=True, dest='infolder',
                        help='path to folder with pan dataset for a language')
    parser.add_argument('-o', '--output', type=str,
                        required=True, dest='outfolder',
                        help='path to folder where model should be written')

    args = parser.parse_args()
    infolder = args.infolder
    outfolder = args.outfolder

    dataset = ProfilingDataset(infolder)
    print('Loaded {} users...\n'.format(len(dataset.entries)))
    # get config
    config = dataset.config
    tasks = config.tasks
    print('\n--------------- Thy time of Running ---------------')
    all_models = {}
    for task in tasks:
        print('Learning to judge %s..' % task)
        # load data
        X, y = dataset.get_data(task)
        tictac = from_recipe(config.recipes[task])
        all_models[task] = tictac.fit(X, y)
    modelfile = os.path.join(outfolder, '%s.bin' % dataset.lang)
    print('Writing model to {}'.format(modelfile))
    joblib.dump(all_models, modelfile, compress=3)
Example #4
0
    args = parser.parse_args()
    infolder = args.infolder
    task = args.feature
    recipe = args.recipe

    print 'Loading dataset...'
    data = ProfilingDataset(infolder)
    print 'Loaded {} users...\n'.format(len(data.entries))
    config = data.config
    tasks = config.tasks
    if task in tasks:
        print('Creating learning curves for %s task..' % task)
        if not recipe:
            recipe = config.recipes[task]
            clf = from_recipe(recipe)
        else:
            clf = from_recipe(recipe)
        print('Loading recipe from file %s..' % recipe)
        X, y = data.get_data(task)
        # Cross validation with 100 iterations to get smoother mean test and train
        # score curves, each time with 20% data randomly selected as a validation set.
        cv = cross_validation.KFold(len(X), n_folds=5, random_state=0)
        # cv = cross_validation.ShuffleSplit(len(X), n_iter=20, test_size=0.2,
        #                                    random_state=0)
        title = 'Learning Curves from recipe %s' % recipe
        plot_learning_curve(clf,
                            title,
                            X,
                            y,
                            ylim=(0.3, 1.01),
Example #5
0
 # get config
 config = dataset.config
 tasks = config.tasks
 print('\n--------------- Thy time of Running ---------------')
 list_model_names = ['tictac', 'lda', 'voting']
 total_model = {}
 for model_name in list_model_names:
     all_models = {}
     if model_name != 'voting':
         for task in tasks:
             print('Learning to judge %s with %s' % (task, model_name))
             # load data
             X, y = dataset.get_data(task)
             if 'meta' in list_model_names:
                 X, X_cv, y, y_cv = train_test_split(X, y, test_size=split, random_state=42, stratify=y)
             tictac = from_recipe(config.recipes[task + '-' + model_name])
             outline = ""
             for step in tictac.steps:
                 if step[0] == "features":
                     # print type(step[1])
                         for tf in step[1].transformer_list:
                             # print type(tf[1])
                             # print type(tf[1].get_params())
                             outline += tf[0] + " with Params:[" + str(tf[1].get_params()) + "]+"
                 else:
                     # if hasattr(step[1], 'get_params'):
                         # outline += step[0] + " with Params:[" + str(step[1].get_params()) + "]+"
                     # else:
                         # outline += step[0]+ "+"
                     outline += step[0] + "+"
             outline = outline[:-1] + "\n"
Example #6
0
from argparse import ArgumentParser
from tictacs import from_recipe

if __name__ == '__main__':
    parser = ArgumentParser(description='A tester of recipes for tic tacs')
    parser.add_argument('--recipe', '-r', required=True, dest='recipe',
                        help='Path to the file where the recipe to create '
                             'delicious tictacs resides. All recipes must be '
                             'written in yaml format.',
                        default='recipes/example.py')
    args = parser.parse_args()
    recipe = args.recipe
    print('Using recipe from file: %s' % recipe)
    texts = ['@sly_pedantic_octopus @glorified_ml I walked on ice yesterday,'
             ' but no one laughed when it broke #lol',
             '@blue_world I hate java #java #programming #hell',
             '@BarnieTheDinosaur raaawwwwr.',
             'omg that just happened. #omg #rofl #yolo'
             ]

    print('Creating model...')
    tictac = from_recipe(recipe)
    print('Fitting model...')
    tictac.fit(texts, [0, 1, 1, 1])
    print('Predicting with model...')
    res = tictac.predict(['#dog wtf omg java', '@blue yes broke #lol'])
    print('Predicted %s' % res)
Example #7
0
                        dest='num_folds', default=4,
                        help='Number of folds to use in cross validation')

    args = parser.parse_args()
    X_path = args.x_path
    y_path = args.y_path
    num_folds = args.num_folds
    # This part for tira-io
    with open(X_path, 'r') as xin:
        X = pickle.load(xin)
    with open(y_path, 'r') as yin:
        y = pickle.load(yin)
    ######
    print('Number of docs: %d,%d' % (len(X), len(y)))
    for task in ['gender']:
        tictac = from_recipe("./config/recipes/gender.yml")
        outline = ""
        for step in tictac.steps:
            if step[0] == "features":
                # print type(step[1])
                    for tf in step[1].transformer_list:
                        # print type(tf[1])
                        # print type(tf[1].get_params())
                        outline += tf[0] + " with Params:[" + str(tf[1].get_params()) + "]+"
            else:
                # if hasattr(step[1], 'get_params'):
                    # outline += step[0] + " with Params:[" + str(step[1].get_params()) + "]+"
                # else:
                    # outline += step[0]+ "+"
                outline += step[0] + "+"
        outline = outline[:-1] + "\n"