Esempio n. 1
0
def autotune(input_train, input_validation, output_model, output_parameters,
             metric, k, duration, model_size):
    input_train_path = get_input_path(input_train)
    input_validation_path = get_input_path(input_validation)
    output_model_path = get_output_path(output_model)
    output_parameters_path = get_output_path(output_parameters)

    # Autotune model
    model = fasttext.train_supervised(
        input=input_train_path,
        autotuneValidationFile=input_validation_path,
        autotuneMetric=metric,
        autotuneDuration=duration,
        autotuneModelSize=model_size,
        verbose=VERBOSE)

    # Log best model metrics
    n, p, r = model.test(input_validation_path, k=k)
    print(json.dumps({'n': n, 'precision': p, 'recall': r, 'k': k}))

    # Save best parameters
    with open(output_parameters_path, 'w') as f:
        json.dump(get_model_parameters(model), f)

    # Save best model
    model.save_model(output_model_path)
Esempio n. 2
0
def split(input_data, output_train, output_validation, output_test,
          train_ratio, validation_ratio, test_ratio, shuffle):
    input_data_path = get_input_path(input_data)
    output_train_path = get_output_path(output_train)
    output_validation_path = get_output_path(output_validation)
    output_test_path = get_output_path(output_test)

    with open(input_data_path, 'r') as f:
        data = f.read().strip().split('\n')

    # Shuffle data
    if shuffle:
        print('Shuffling data')
        random.seed(RANDOM_SEED)
        random.shuffle(data)

    # Split train, validation and test data
    validation_index = round(len(data) * train_ratio)
    test_index = round(len(data) * (train_ratio + validation_ratio))
    end_index = round(
        len(data) * (train_ratio + validation_ratio + test_ratio))

    with open(output_train_path, 'w') as f:
        f.write('\n'.join(data[:validation_index]))

    with open(output_validation_path, 'w') as f:
        f.write('\n'.join(data[validation_index:test_index]))

    with open(output_test_path, 'w') as f:
        f.write('\n'.join(data[test_index:end_index]))
Esempio n. 3
0
def test(input_test, input_model, output_predictions, k):
    input_test_path = get_input_path(input_test)
    input_model_path = get_input_path(input_model)
    output_predictions_path = get_output_path(output_predictions)

    model = fasttext.load_model(input_model_path)

    # Log model metrics
    n, p, r = model.test(input_test_path, k=k)
    print(json.dumps({'n': n, 'precision': p, 'recall': r, 'k': k}))

    # Split feature and category in a DataFrame
    with open(input_test_path) as f:
        df = pd.DataFrame((split_text(line) for line in f),
                          columns=[TEXT_COLUMN, LABEL_COLUMN])

    # Get predictions
    all_labels, all_probs = model.predict(list(df[TEXT_COLUMN]), k=k)

    # Add formatted predictions
    predictions_df = get_predictions_df(all_labels, all_probs, k)
    df = df.join(predictions_df)

    # Add error column
    df['error'] = (df[f'{LABEL_COLUMN}'] != df[f'{LABEL_COLUMN}@1'])

    # Save predictions
    df.to_csv(output_predictions_path, index=False)
Esempio n. 4
0
def bag_to_json(options):
    stream_array = StreamArray(json_generator(options))
    path = utils.get_output_path(options)
    path += ".json"
    with open(path, 'w') as outfile:
        for chunk in json.JSONEncoder().iterencode(stream_array):
            outfile.write(chunk)
Esempio n. 5
0
def save_gallery(title, data):
    log("Saving gallery {0}...".format(title))

    output_path = get_output_path(title)

    with open(output_path, "w") as jin:
        jin.write(data)
        log_replace_line("Saving gallery {0}...Done".format(title))
def preprocess(input_data, output_data):
    # TODO: make it work also with prediction data without label
    input_data_path = get_input_path(input_data)
    output_data_path = get_output_path(output_data)

    df = pd.read_csv(
        input_data_path,
        engine='python')

    with open(output_data_path, 'w') as output:
        for text, label in zip(df[TEXT_COLUMN], df[LABEL_COLUMN]):
            output.write(f'{process_text(text)} {LABEL_SEPARATOR}{label}\n')
Esempio n. 7
0
def main():
    url = utils.get_url()
    output_path = utils.get_output_path()
    audio_only = utils.get_audio_only()

    if not audio_only:
        video_quality = utils.get_stream_quality()

    if audio_only:
        stream_list = utils.list_streams(url, audio_only)
        if not stream_list:
            print(
                'There are no streams available for the options that you specified.'
            )
            return
        itag = utils.get_itag()
        utils.download_stream(url, itag, audio_only, output_path)
        utils.print_video_statistics(url)

    elif video_quality == 1:
        progressive = True
        stream_list = utils.list_streams(url, audio_only, progressive)
        if not stream_list:
            print(
                'There are no streams available for the options that you specified.'
            )
            return
        itag = utils.get_itag()
        utils.download_stream(url, itag, audio_only, output_path)
        utils.print_video_statistics(url)

    else:
        progressive = False
        stream_lists = utils.list_streams(url, audio_only, progressive)
        if not stream_lists[0]:
            print(
                'There are no video streams available for the options that you specified.'
            )
            return
        elif not stream_lists[1]:
            print(
                'There are no audio streams available for the options that you specified.'
            )
            return
        elif not stream_lists:
            print(
                'There are no video or audio streams available for the options that you specified.'
            )
            return
        itags = utils.get_adaptive_itags()
        utils.download_and_mux(url, itags[0], itags[1], output_path)
Esempio n. 8
0
def train(input_data, input_parameters, output_model):
    input_data_path = get_input_path(input_data)
    input_parameters_path = get_input_path(input_parameters)
    output_model_path = get_output_path(output_model)

    # Parse parameters
    with open(input_parameters_path) as f:
        parameters = json.load(f)

    # Train model
    model = fasttext.train_supervised(input=input_data_path, **parameters)

    # Save model
    model.save_model(output_model_path)
Esempio n. 9
0
def preprocess(input_data, output_data, text_column, label_column, engine):
    # TODO: make it work also with prediction data without label
    input_data_path = get_input_path(input_data)
    output_data_path = get_output_path(output_data)

    df = pd.read_csv(input_data_path, engine=engine).fillna('')

    # Concatenate strings if multiple text columns
    if ',' in text_column:
        df[text_column] = df[text_column.split(',')].agg(' '.join, axis=1)

    with open(output_data_path, 'w') as output:
        for text, label in zip(df[text_column], df[label_column]):
            if not_empty_str(text) and not_empty_str(label):
                output.write(
                    f'{process_text(text)} {LABEL_SEPARATOR}{label}\n')
Esempio n. 10
0
def predict(input_data, input_model, output_predictions, k):
    input_data_path = get_input_path(input_data)
    input_model_path = get_input_path(input_model)
    output_predictions_path = get_output_path(output_predictions)

    model = fasttext.load_model(input_model_path)

    # Create text DataFrame
    with open(input_data_path) as f:
        df = pd.DataFrame((line for line in f), columns=[TEXT_COLUMN])

    # Get predictions
    all_labels, all_probs = model.predict(list(df[TEXT_COLUMN]), k=k)

    # Add formatted predictions
    predictions_df = get_predictions_df(all_labels, all_probs, k)
    df = df.join(predictions_df)

    # Save predictions
    df.to_csv(output_predictions_path, index=False)
Esempio n. 11
0
def open_csv(options, topic_name):
    path = utils.get_output_path(options, topic_name)
    path += ".csv"
    return open(path, 'w')
Esempio n. 12
0
RHO = 0.2

# mode parameters
ACO_MODE = 'elite_system'
RANK_FRAC = 0.1

# plotting parameters
SAVE_OUTPUT = True
N_PLOTS = 10

##### -------------------------------------------- #####

indx_list = []
dist_list = []
plot_steps = np.linspace(1, N_ITERATIONS - 1, N_PLOTS, dtype=int)
plot_path = get_output_path() if SAVE_OUTPUT else None

# initializing world
world = World(N_NODES,
              N_ANTS,
              20,
              alpha=ALPHA,
              beta=BETA,
              gamma=GAMMA,
              rho=RHO,
              aco_mode=ACO_MODE,
              rank_frac=RANK_FRAC)

# Step 0: Populating world with nodes and ants
world.populate_world()
Esempio n. 13
0
        jin.write(data)
        log_replace_line("Saving gallery {0}...Done".format(title))


if __name__ == "__main__":
    start_time = timeit.default_timer()

    cfg = get_gallery_config()
    gallery_path = cfg.get("setup", "location")
    title = cfg.get("setup", "title")
    row_height = cfg.get("setup", "row_height", fallback=150)
    recursive = cfg.getboolean("setup", "recursive", fallback=False)

    colours = get_section(cfg, 'colours', DEFAULT_COLOURS)

    output_path = get_output_path(title)

    if os.path.exists(output_path):
        has_confirmed = ask_user_to_confirm(title)

        if not has_confirmed:
            log("Exiting...")
            os._exit(0)

    log_dict("Generating gallery with config:", cfg._sections)

    groups = read_files(gallery_path, recursive)

    template = get_gallery_template()
    output = template.render(title=title,
                             groups=groups,
Esempio n. 14
0
     model_name = opt.model_type
     model = get_resnet_model(resnet_type=opt.model_type,
                              n_classes=opt.n_classes)
 else:
     model_name = get_ViT_name(model_type=opt.model_type,
                               patch_size=opt.patch_size,
                               hybrid=opt.hybrid)
     model = get_ViT_model(type=opt.model_type,
                           image_size=opt.image_size,
                           patch_size=opt.patch_size,
                           n_classes=opt.n_classes,
                           n_channels=opt.n_channels,
                           dropout=opt.dropout,
                           hybrid=opt.hybrid)
 output_graph_path, dump_file = get_output_path(
     model_name=model_name,
     root_path=opt.output_root_path,
     dataset_name=opt.dataset_name)
 csv_result_path = os.path.join(opt.output_root_path, "models_results.csv")
 val_ratio = opt.val_ratio
 if opt.eval_type == "test":
     val_ratio = 0
 train_loader, validation_loader, test_loader = get_loader_from_dataset(
     dataset_name=opt.dataset_name,
     root_path=opt.dataset_path,
     batch_size_train=opt.batch_size_train,
     batch_size_test=opt.batch_size_test,
     image_size=opt.image_size,
     augmentation=opt.data_augmentation,
     val_ratio=val_ratio,
     n_cpu=opt.n_cpu)
 device = device("cuda:0" if opt.cuda else "cpu")