Exemple #1
0
def get_writing_features(image_path):
    # Pre-processing
    gray_img = cv.imread(image_path, cv.IMREAD_GRAYSCALE)
    gray_img, bin_img = PreProcessor.process(gray_img)
    gray_lines, bin_lines = LineSegmentor(gray_img, bin_img).segment()

    # Feature extraction
    return FeatureExtractor(gray_lines, bin_lines).extract()
Exemple #2
0
def get_features(path):
    # Read and pre-process the image
    gray_img = cv.imread(path, cv.IMREAD_GRAYSCALE)
    gray_img, bin_img = PreProcessor.process(gray_img)
    gray_lines, bin_lines = LineSegmentor(gray_img, bin_img).segment()

    # Extract features of every line separately
    x = []
    for g, b in zip(gray_lines, bin_lines):
        f = FeatureExtractor([g], [b]).extract()
        x.append(f)

    # Return list of features for every line in the image
    return x
Exemple #3
0
def main(args):

    feature_extractor = FeatureExtractor(get_resources(), args.model)
    train_feats, train_labels, _, _ = feature_extractor.get_features(
        args.train, args.train_topk)
    test_feats, _, test_segs, test_gold_truths = feature_extractor.get_features(
        args.test, args.test_topk)

    epochs, lr1, lr2 = 100, 0.01, 0.05

    # Initialize model
    model = None
    if args.model == "mse":
        model = mse_ranker.MSERanker(epochs, lr1)
    elif args.model == "mr":
        model = mr_ranker.MRRanker(epochs, lr1)
    elif args.model == "mse_multi":
        model = mse_multi_ranker.MSEMultiRanker(epochs, lr1, lr2)
    elif args.model == "mr_multi":
        model = mr_multi_ranker.MRMultiRanker(epochs, lr1, lr2)

    # Train model
    model.train(train_feats, train_labels)

    # Rerank top-k segmentations
    top_segmentations = []
    for segs_feats, segs, gds in zip(test_feats, test_segs, test_gold_truths):
        if len(segs) == 1:
            top_segmentations.extend(segs)
        else:
            reranked_segs = rerank(segs, segs_feats, model, args.model)
            top_segmentations.append(reranked_segs)

    if args.output is not None:
        fp = open(args.output, 'w')
        for segs in top_segmentations:
            target = "".join(segs[0].split())
            fp.write(target + "\t" + "\t".join([seg.strip()
                                                for seg in segs]) + "\n")
        fp.close()

    # Evaluate metrics
    print("MRR:", mean_reciprocal_rank(test_gold_truths, top_segmentations))
    print("Accuracy@1:", accuracy(1, test_gold_truths, top_segmentations))
    print("Accuracy@2:", accuracy(2, test_gold_truths, top_segmentations))
    print("Fscore@1:", fscore(1, test_gold_truths, top_segmentations))
    print("Fscore@2:", fscore(2, test_gold_truths, top_segmentations))
Exemple #4
0
def get_writer_features(path, writer_id):
    # All lines of the writer
    total_gray_lines, total_bin_lines = [], []

    # Read and append all lines of the writer
    for root, dirs, files in os.walk(path):
        for filename in files:
            gray_img = cv.imread(path + filename, cv.IMREAD_GRAYSCALE)
            gray_img, bin_img = PreProcessor.process(gray_img)
            gray_lines, bin_lines = LineSegmentor(gray_img, bin_img).segment()
            total_gray_lines.extend(gray_lines)
            total_bin_lines.extend(bin_lines)
        break

    # Extract features of every line separately
    x, y = [], []
    for g, b in zip(total_gray_lines, total_bin_lines):
        f = FeatureExtractor([g], [b]).extract()
        x.append(f)
        y.append(writer_id)

    return x, y
Exemple #5
0
def extract_features(txt_io, feat_io, cword_io, train=False, factor_files={}):
    csets = CSetPair(config['source-cset'], config['target-cset'])
    extractor = FeatureExtractor(csets, config['features'], config['costs'])

    check_factor_requirements(extractor.required_factors(), factor_files)

    finder = CWordFinder(csets, train)
    if config['nulls-ngrams']:
        null_finder = NullFinder(csets.src, config['nulls-ngrams'])
        finder.add_extra_finder(null_finder)
    reader = CWordReader(cword_io)

    log.info("Extract features from {}".format(txt_io.name))

    count = 0
    for sid, line, fact_sent in each_factorized_input(txt_io, factor_files):
        for cword in finder.find_confusion_words(line, fact_sent):
            feat_str = extractor.extract_features(cword, fact_sent)
            feat_io.write(feat_str)

            reader.format(sid, cword)
            count += 1

    log.info("Found {} confusion words".format(count))
Exemple #6
0
# Choose statistical features and physical parameters
features_list = [fc.get_min, fc.get_max, fc.get_median]
params_name_list = ['TOTUSJH', 'TOTBSQ', 'TOTPOT', 'TOTUSJZ']

# Input and output path
#path_to_root = os.path.join('..', CONST.IN_PATH_TO_MVTS_FL)
#path_to_dest = os.path.join('..', CONST.OUT_PATH_TO_RAW_FEATURES)
#path_to_root =  CONST.IN_PATH_TO_MVTS_FL

path_to_root = CONST.IN_PATH_TO_MVTS_FL2
path_to_dest = CONST.OUT_PATH_TO_RAW_FEATURES
output_filename = 'raw_features_p2_FL.csv'

# Extract features
pc = FeatureExtractor(path_to_root, path_to_dest, output_filename)
pc.calculate_all(features_list, params_name_list=params_name_list)

path_to_root = CONST.IN_PATH_TO_MVTS_NF2
path_to_dest = CONST.OUT_PATH_TO_RAW_FEATURES
output_filename = 'raw_features_p2_NF.csv'

# Extract features
pc = FeatureExtractor(path_to_root, path_to_dest, output_filename)
pc.calculate_all(features_list, params_name_list=params_name_list)

path_to_root = CONST.IN_PATH_TO_MVTS_FL3
path_to_dest = CONST.OUT_PATH_TO_RAW_FEATURES
output_filename = 'raw_features_p3_FL.csv'

# Extract features