emotion_name = filename.emotions['LJ40K'][emotion_id] logger.info('training model for emotion "%s"' % emotion_name) X_train, y_train = fused_dataset.get_dataset(emotion_name, 'train') X_dev, y_dev = fused_dataset.get_dataset(emotion_name, 'dev') import pdb; pdb.set_trace() if not args.no_scaling: scaler = StandardScaler(with_mean=True, with_std=True) logger.debug("applying standard scaling") X_train = scaler.fit_transform(X_train) X_dev = scaler.transform(X_dev) fpath = os.path.join(args.output_folder, filename.get_scaler_filename(emotion_name, 'pkl')) logger.info('dumpping scaler to %s' % (fpath)) utils.save_pkl_file(scaler, fpath) best_res[emotion_name] = {} best_score = 0 if args.parameter_file != None: Cs = [param_dict[emotion_name][0]] gammas = [param_dict[emotion_name][1]] else: Cs = args.c gammas = args.gamma for c in Cs: for g in gammas: trainer.set(X=X_train, y=y_train, feature_name=fused_dataset.get_feature_name())
loglevel = logging.DEBUG elif args.verbose: loglevel = logging.INFO else: loglevel = logging.ERROR logging.basicConfig(format='[%(levelname)s][%(name)s] %(message)s', level=loglevel) logger = logging.getLogger(__name__) # get number of files in each emotion folder n_doc = get_and_check_files(args.corpus_folder) n_train = n_doc * args.percent_train / 100; n_dev = n_doc * args.percent_dev / 100; n_test = n_doc * args.percent_test / 100; # remaining as training data n_train += (n_doc - n_train - n_dev - n_test) random_list = range(n_doc) random.shuffle(random_list) idx_dict = {} idx_dict['train'] = random_list[0:n_train] idx_dict['dev'] = random_list[n_train:n_train+n_dev] idx_dict['test'] = random_list[n_train+n_dev:n_train+n_dev+n_test] logger.info("dumping file to %s" % (args.output_filename)) utils.save_pkl_file(idx_dict, args.output_filename)
def dump_local(self, e_ID, save_path): utils.save_pkl_file(self.LocalInfo[e_ID], save_path)
def dump_global(self): utils.save_pkl_file(self.GlobalInfo, self.global_dumppath + '/GlobalInfo.pkl') utils.save_pkl_file(self.DatasetInfo, self.global_dumppath + '/DatasetInfo.pkl')
logger.info("load features from %s", fpath) Xy = utils.load_pkl_file(fpath) idxs = all_idxs['train'][emotion_name][emotion_name] if all_idxs is not None else range(len(Xy)) X = np.zeros((len(idxs), Xy[0]['X'].shape[1]), dtype="float32") logger.info('X.shape = (%u, %u)' % (X.shape[0], X.shape[1])) for i in idxs: # make sure only one feature vector in each doc assert Xy[i]['X'].shape[0] == 1 X[i] = Xy[i]['X'] tsvd = TruncatedSVD(n_components=args.target_n_component) logger.info('start fitting for "%s"' % (emotion_name)) start_time = time.time() X_new = tsvd.fit_transform(X) end_time = time.time() logger.info('fit time = %f' % (end_time-start_time)) fname = os.path.basename(fpath) fpath = os.path.join(args.output_folder, fname) logger.info('ouputing %s' % (fpath)) utils.save_pkl_file(X_new, fpath) fname = '%s_tsvd.pkl' % (emotion_name) fpath = os.path.join(args.output_folder, fname) logger.info('ouputing %s' % (fpath)) utils.save_pkl_file(tsvd, fpath)
def dump_results(self, filename): utils.save_pkl_file(self.results, filename)
def dump(self, filename): utils.save_pkl_file(self, filename)