emotion_name = filename.emotions['LJ40K'][emotion_id]
        logger.info('training model for emotion "%s"' % emotion_name)
        
        X_train, y_train = fused_dataset.get_dataset(emotion_name, 'train')
        X_dev, y_dev = fused_dataset.get_dataset(emotion_name, 'dev')
        import pdb; pdb.set_trace()
        if not args.no_scaling:
            scaler = StandardScaler(with_mean=True, with_std=True)
            logger.debug("applying standard scaling")
            X_train = scaler.fit_transform(X_train)
            X_dev = scaler.transform(X_dev)
            
            fpath = os.path.join(args.output_folder, filename.get_scaler_filename(emotion_name, 'pkl'))
            logger.info('dumpping scaler to %s' % (fpath))
            utils.save_pkl_file(scaler, fpath)

        best_res[emotion_name] = {}
        best_score = 0

        if args.parameter_file != None:
            Cs = [param_dict[emotion_name][0]]
            gammas = [param_dict[emotion_name][1]]
        else:
            Cs = args.c
            gammas = args.gamma

        for c in Cs:
            for g in gammas:

                trainer.set(X=X_train, y=y_train, feature_name=fused_dataset.get_feature_name())
예제 #2
0
        loglevel = logging.DEBUG
    elif args.verbose:
        loglevel = logging.INFO
    else:
        loglevel = logging.ERROR
    logging.basicConfig(format='[%(levelname)s][%(name)s] %(message)s', level=loglevel) 
    logger = logging.getLogger(__name__)


    # get number of files in each emotion folder
    n_doc = get_and_check_files(args.corpus_folder)

    n_train = n_doc * args.percent_train / 100;
    n_dev = n_doc * args.percent_dev / 100;
    n_test = n_doc * args.percent_test / 100;

    # remaining as training data
    n_train += (n_doc - n_train - n_dev - n_test)

    random_list = range(n_doc)
    random.shuffle(random_list)

    idx_dict = {}
    idx_dict['train'] = random_list[0:n_train]
    idx_dict['dev'] = random_list[n_train:n_train+n_dev]
    idx_dict['test'] = random_list[n_train+n_dev:n_train+n_dev+n_test]


    logger.info("dumping file to %s" % (args.output_filename))
    utils.save_pkl_file(idx_dict, args.output_filename)
예제 #3
0
 def dump_local(self, e_ID, save_path):
     utils.save_pkl_file(self.LocalInfo[e_ID], save_path)
예제 #4
0
 def dump_global(self):
     utils.save_pkl_file(self.GlobalInfo,
                         self.global_dumppath + '/GlobalInfo.pkl')
     utils.save_pkl_file(self.DatasetInfo,
                         self.global_dumppath + '/DatasetInfo.pkl')
        logger.info("load features from %s", fpath)
        Xy = utils.load_pkl_file(fpath)

        idxs = all_idxs['train'][emotion_name][emotion_name] if all_idxs is not None else range(len(Xy))

        X = np.zeros((len(idxs), Xy[0]['X'].shape[1]), dtype="float32")
        logger.info('X.shape = (%u, %u)' % (X.shape[0], X.shape[1]))

        for i in idxs:    
            # make sure only one feature vector in each doc
            assert Xy[i]['X'].shape[0] == 1         
            X[i] = Xy[i]['X']

        tsvd = TruncatedSVD(n_components=args.target_n_component)
        logger.info('start fitting for "%s"' % (emotion_name))

        start_time = time.time()
        X_new = tsvd.fit_transform(X)
        end_time = time.time()
        logger.info('fit time = %f' % (end_time-start_time))

        fname = os.path.basename(fpath)
        fpath = os.path.join(args.output_folder, fname)
        logger.info('ouputing %s' % (fpath))
        utils.save_pkl_file(X_new, fpath)

        fname = '%s_tsvd.pkl' % (emotion_name)
        fpath = os.path.join(args.output_folder, fname)
        logger.info('ouputing %s' % (fpath))
        utils.save_pkl_file(tsvd, fpath)
예제 #6
0
 def dump_results(self, filename):
     utils.save_pkl_file(self.results, filename)
예제 #7
0
 def dump(self, filename):
     utils.save_pkl_file(self, filename)
예제 #8
0
 def dump_results(self, filename):
     utils.save_pkl_file(self.results, filename)
예제 #9
0
 def dump(self, filename):
     utils.save_pkl_file(self, filename)