def main(argv):
    args = parse_argments(argv)

    basename = args.input_dir.split('/')[-2]
    train_dir = os.path.join(args.output_dir, 'train', basename)
    test_dir = os.path.join(args.output_dir, 'test', basename)
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)

    names = set(get_filenames(args.input_dir))
    test_names = set(get_filenames(test_dir))
    train_names = set(get_filenames(train_dir))
    invalid_names = test_names & train_names
    names -= test_names
    names -= train_names

    for name in invalid_names:
        os.remove(os.path.join(test_dir, name))
        logger.info('"%s" is duplicated... Remove', name)

    n_test = args.n_test - len(test_names)
    test_samples = random.sample(names, n_test)
    for name in names:
        src_path = os.path.join(args.input_dir, name)
        if name in test_samples:
            dst_path = os.path.join(test_dir, name)
        else:
            dst_path = os.path.join(train_dir, name)

        shutil.copyfile(src_path, dst_path)
def main(argv):
    args = parse_arguments(argv)

    dirname = os.path.basename(args.input_dir)
    valid_dir = os.path.join(args.output_dir, dirname, 'valid')
    invalid_dir = os.path.join(args.output_dir, dirname, 'invalid')
    os.makedirs(valid_dir, exist_ok=True)
    os.makedirs(invalid_dir, exist_ok=True)
    removed_json = os.path.join(args.output_dir, dirname, '.cache.json')

    names = get_filenames(args.input_dir)
    valid_names = get_filenames(valid_dir)
    invalid_names = get_filenames(invalid_dir)
    removed_names = read_json(removed_json)
    removed_names = [] if removed_names is None else removed_names
    names = sorted(
        set(names) - set(valid_names) - set(invalid_names) -
        set(removed_names))

    # Instruction
    sys.stdout.write('Key input instructions:\n'
                     'j: Accept current image\n'
                     'k: Reject current image\n'
                     'u: Undo recent validation\n'
                     'd: Exclude image \n'
                     'q: Quit validation\n')

    i = 0
    while i < len(names):
        path = os.path.join(args.input_dir, names[i])
        key = show_image(path, args.size)

        if key == KeyStatus.UNDO and i > 1:
            i -= 1
            if os.path.exists(os.path.join(valid_dir, names[i])):
                os.remove(os.path.join(valid_dir, names[i]))
            elif os.path.exists(os.path.join(invalid_dir, names[i])):
                os.remove(os.path.join(invalid_dir, names[i]))
            else:
                removed_names.pop()
        elif key == KeyStatus.OK:
            shutil.copyfile(path, os.path.join(valid_dir, names[i]))
            i += 1
        elif key == KeyStatus.FAIL:
            shutil.copyfile(path, os.path.join(invalid_dir, names[i]))
            i += 1
        elif key == KeyStatus.REMOVE:
            removed_names.append(names[i])
            write_json(removed_json, removed_names)
            i += 1
        else:
            exit()
Example #3
0
def import_csv(data_folder):
    csv, spec, cfg = get_filenames(data_folder)
    wipe_db_tables()
    # to_database(csv, spec)
    to_database(csv, spec, cfg)
    db_dump()    
    dump_var_list_explained()
def main(argv):
    args = parse_arguments(argv)

    dirname = os.path.basename(args.input_dir)
    output_dir = os.path.join(args.output_dir, dirname)

    extractor = AnimeFaceExtractor(args.detector_path,
                                   output_dir,
                                   image_size=args.img_size)

    names = get_filenames(args.input_dir)
    for name in names:
        extractor(os.path.join(args.input_dir, name))
Example #5
0
def batch_match_to_pub(filename, result_filename):
    """

    :param filename:
    :param result_filename:
    :return:
    """
    filenames = get_filenames(filename)
    if len(filenames) > 0:
        if result_filename:
            # output file
            with open(result_filename, 'w') as fp:
                # one file at a time, parse and score, and then write the result to the file
                for arXiv_filename in filenames:
                    a_match, for_inspection = single_match_to_pub(arXiv_filename)
                    fp.write('%s\n'%a_match)
                    if for_inspection:
                        write_for_inspection_hits(result_filename, for_inspection)
        else:
            for arXiv_filename in filenames:
                single_match_output(arXiv_filename)
Example #6
0
def get_results(labels_filename,
                xd_preconv_filename,
                target_variables,
                set_type="Test"):
    """
    Gather actual and predicted values for a list of target_variables.
    This function includes all the models which are capable of predicting on those target_variables.
    """

    ACTUAL_COL = (255, 255, 255)  # WHITE
    MODEL1_COL = (255, 0, 0)  # BLUE
    MODEL2_COL = (0, 0, 255)  # RED

    with open(xd_preconv_filename, "rb") as f:
        xd_preconv = pickle.load(f)

    results = []

    for target_variable in target_variables:

        filenames = get_filenames(labels_filename,
                                  target_variable,
                                  set_type=set_type)  #

        print(f"len filenames: {len(filenames)}")

        if target_variable == "queue_end_pos":

            print(f"len filenames: {len(filenames)}")

            # Actual
            r = Series_y("actual.queue_end_pos", True, ACTUAL_COL,
                         target_variable)
            r.set_results(
                get_y_data(LABELS_FILENAME, filenames, target_variable))
            results.append(r)

            TO_BE_EVALUATED = "CNN_EoQ_VGG16_noaug_weighted_20210514.h5"
            r = Series_y(
                TO_BE_EVALUATED,
                False,
                MODEL1_COL,
                target_variable,
            )
            r.set_results(
                VGG16_predict(
                    TO_BE_EVALUATED,
                    xd_preconv,
                    filenames,
                    target_variable,
                ))
            results.append(r)

        elif target_variable == "queue_full":

            r = Series_y("actual.queue_full", True, ACTUAL_COL,
                         target_variable)
            r.set_results(
                get_y_data(LABELS_FILENAME, filenames, target_variable))
            results.append(r)

            r = Series_y("CNN_Queue_full_20210408.h5", False, MODEL1_COL,
                         target_variable)
            h = VGG16_predict(r"CNN_Queue_full_20210408.h5", xd_preconv,
                              filenames, target_variable)
            r.set_results(h)
            results.append(r)

        elif target_variable == "lanes":

            r = Series_y("actual.lanes", True, ACTUAL_COL, target_variable)
            y = get_y_data(LABELS_FILENAME, filenames, target_variable)
            y = convert_float_lanes_to_boolean(y, input_is_12=True)
            r.set_results(y)
            results.append(r)

            r = Series_y(
                "CNN_Lanes_VGG16_weighted_20210408.h5",
                False,
                MODEL1_COL,
                target_variable,
            )
            h = VGG16_predict(
                r"CNN_Lanes_VGG16_weighted_20210408.h5",
                xd_preconv,
                filenames,
                target_variable,
            )
            r.set_results(h)
            results.append(r)

    clear_session(
    )  # Clear tf memory usage - https://www.tensorflow.org/api_docs/python/tf/keras/backend/clear_session

    return results
Example #7
0
import pandas as pd
#import tabulate

from common import docstring_to_file

# NOTE: this function is a direct query to all unique labels
from database import get_unique_labels

FILLER = "<...>"


# ----------------------------------------------------------------------------

from common import get_filenames
data_folder = "../data/ind09/"
csv, spec, cfg = get_filenames(data_folder)

from load_spec import load_spec
default_dicts = load_spec(spec)

# ----------------------------------------------------------------------------

def get_var_abbr(name):
    words = name.split('_')
    return '_'.join(itertools.takewhile(lambda word: word.isupper(), words))
assert get_var_abbr('PROD_E_TWh') == 'PROD_E' 

def get_unit_abbr(name):
    words = name.split('_')
    return '_'.join(itertools.dropwhile(lambda word: word.isupper(), words))
assert get_unit_abbr('PROD_E_TWh') == 'TWh'