def get_writing_features(image_path): # Pre-processing gray_img = cv.imread(image_path, cv.IMREAD_GRAYSCALE) gray_img, bin_img = PreProcessor.process(gray_img) gray_lines, bin_lines = LineSegmentor(gray_img, bin_img).segment() # Feature extraction return FeatureExtractor(gray_lines, bin_lines).extract()
def get_features(path): # Read and pre-process the image gray_img = cv.imread(path, cv.IMREAD_GRAYSCALE) gray_img, bin_img = PreProcessor.process(gray_img) gray_lines, bin_lines = LineSegmentor(gray_img, bin_img).segment() # Extract features of every line separately x = [] for g, b in zip(gray_lines, bin_lines): f = FeatureExtractor([g], [b]).extract() x.append(f) # Return list of features for every line in the image return x
def main(args): feature_extractor = FeatureExtractor(get_resources(), args.model) train_feats, train_labels, _, _ = feature_extractor.get_features( args.train, args.train_topk) test_feats, _, test_segs, test_gold_truths = feature_extractor.get_features( args.test, args.test_topk) epochs, lr1, lr2 = 100, 0.01, 0.05 # Initialize model model = None if args.model == "mse": model = mse_ranker.MSERanker(epochs, lr1) elif args.model == "mr": model = mr_ranker.MRRanker(epochs, lr1) elif args.model == "mse_multi": model = mse_multi_ranker.MSEMultiRanker(epochs, lr1, lr2) elif args.model == "mr_multi": model = mr_multi_ranker.MRMultiRanker(epochs, lr1, lr2) # Train model model.train(train_feats, train_labels) # Rerank top-k segmentations top_segmentations = [] for segs_feats, segs, gds in zip(test_feats, test_segs, test_gold_truths): if len(segs) == 1: top_segmentations.extend(segs) else: reranked_segs = rerank(segs, segs_feats, model, args.model) top_segmentations.append(reranked_segs) if args.output is not None: fp = open(args.output, 'w') for segs in top_segmentations: target = "".join(segs[0].split()) fp.write(target + "\t" + "\t".join([seg.strip() for seg in segs]) + "\n") fp.close() # Evaluate metrics print("MRR:", mean_reciprocal_rank(test_gold_truths, top_segmentations)) print("Accuracy@1:", accuracy(1, test_gold_truths, top_segmentations)) print("Accuracy@2:", accuracy(2, test_gold_truths, top_segmentations)) print("Fscore@1:", fscore(1, test_gold_truths, top_segmentations)) print("Fscore@2:", fscore(2, test_gold_truths, top_segmentations))
def get_writer_features(path, writer_id): # All lines of the writer total_gray_lines, total_bin_lines = [], [] # Read and append all lines of the writer for root, dirs, files in os.walk(path): for filename in files: gray_img = cv.imread(path + filename, cv.IMREAD_GRAYSCALE) gray_img, bin_img = PreProcessor.process(gray_img) gray_lines, bin_lines = LineSegmentor(gray_img, bin_img).segment() total_gray_lines.extend(gray_lines) total_bin_lines.extend(bin_lines) break # Extract features of every line separately x, y = [], [] for g, b in zip(total_gray_lines, total_bin_lines): f = FeatureExtractor([g], [b]).extract() x.append(f) y.append(writer_id) return x, y
def extract_features(txt_io, feat_io, cword_io, train=False, factor_files={}): csets = CSetPair(config['source-cset'], config['target-cset']) extractor = FeatureExtractor(csets, config['features'], config['costs']) check_factor_requirements(extractor.required_factors(), factor_files) finder = CWordFinder(csets, train) if config['nulls-ngrams']: null_finder = NullFinder(csets.src, config['nulls-ngrams']) finder.add_extra_finder(null_finder) reader = CWordReader(cword_io) log.info("Extract features from {}".format(txt_io.name)) count = 0 for sid, line, fact_sent in each_factorized_input(txt_io, factor_files): for cword in finder.find_confusion_words(line, fact_sent): feat_str = extractor.extract_features(cword, fact_sent) feat_io.write(feat_str) reader.format(sid, cword) count += 1 log.info("Found {} confusion words".format(count))
# Choose statistical features and physical parameters features_list = [fc.get_min, fc.get_max, fc.get_median] params_name_list = ['TOTUSJH', 'TOTBSQ', 'TOTPOT', 'TOTUSJZ'] # Input and output path #path_to_root = os.path.join('..', CONST.IN_PATH_TO_MVTS_FL) #path_to_dest = os.path.join('..', CONST.OUT_PATH_TO_RAW_FEATURES) #path_to_root = CONST.IN_PATH_TO_MVTS_FL path_to_root = CONST.IN_PATH_TO_MVTS_FL2 path_to_dest = CONST.OUT_PATH_TO_RAW_FEATURES output_filename = 'raw_features_p2_FL.csv' # Extract features pc = FeatureExtractor(path_to_root, path_to_dest, output_filename) pc.calculate_all(features_list, params_name_list=params_name_list) path_to_root = CONST.IN_PATH_TO_MVTS_NF2 path_to_dest = CONST.OUT_PATH_TO_RAW_FEATURES output_filename = 'raw_features_p2_NF.csv' # Extract features pc = FeatureExtractor(path_to_root, path_to_dest, output_filename) pc.calculate_all(features_list, params_name_list=params_name_list) path_to_root = CONST.IN_PATH_TO_MVTS_FL3 path_to_dest = CONST.OUT_PATH_TO_RAW_FEATURES output_filename = 'raw_features_p3_FL.csv' # Extract features