Example #1
0
def prepare_multi_modal_data(files_path, task: Task, images_size=(128, 128), with_split=True):
    path = os.path.join(str(fedot_project_root()), files_path)

    unpack_archived_data(path)

    data = InputData.from_json_files(path, fields_to_use=['votes', 'year'],
                                     label='rating', task=task)

    class_labels = np.asarray([0 if t <= 7 else 1 for t in data.target])
    data.target = class_labels

    ratio = 0.5

    img_files_path = f'{files_path}/*.jpeg'
    img_path = os.path.join(str(fedot_project_root()), img_files_path)

    data_img = InputData.from_image(images=img_path, labels=class_labels, task=task, target_size=images_size)

    data_text = InputData.from_json_files(path, fields_to_use=['plot'],
                                          label='rating', task=task,
                                          data_type=DataTypesEnum.text)
    data_text.target = class_labels

    if with_split:
        train_num, test_num = train_test_data_setup(data, shuffle_flag=False, split_ratio=ratio)
        train_img, test_img = train_test_data_setup(data_img, shuffle_flag=False, split_ratio=ratio)
        train_text, test_text = train_test_data_setup(data_text, shuffle_flag=False, split_ratio=ratio)
    else:
        train_num, test_num = data, data
        train_img, test_img = data_img, data_img
        train_text, test_text = data_text, data_text

    return train_num, test_num, train_img, test_img, train_text, test_text
Example #2
0
def test_data_from_json():
    # several features
    files_path = os.path.join('test', 'data', 'multi_modal')
    path = os.path.join(str(fedot_project_root()), files_path)
    data = InputData.from_json_files(path,
                                     fields_to_use=['votes', 'year'],
                                     label='rating',
                                     task=Task(TaskTypesEnum.regression))
    assert data.features.shape[1] == 2  # check there is two features
    assert len(data.target) == data.features.shape[0] == len(data.idx)

    # single feature
    data = InputData.from_json_files(path,
                                     fields_to_use=['votes'],
                                     label='rating',
                                     task=Task(TaskTypesEnum.regression))
    assert len(data.features.shape) == 1  # check there is one feature
    assert len(data.target) == len(data.features) == len(data.idx)