Ejemplo n.º 1
0
def move_label_from_labeled_to_pool(project_id, paper_i):

    # load the papers from the pool
    pool_list = read_pool(project_id)

    # Add the paper to the reviewed papers.
    labeled_list = read_label_history(project_id)

    labeled_list_new = []

    for item_id, item_label in labeled_list:

        item_id = int(item_id)
        item_label = int(item_label)
        paper_i = int(paper_i)

        if paper_i == item_id:
            pool_list.append(item_id)
        else:
            labeled_list_new.append([item_id, item_label])

    # write the papers to the label dataset
    write_pool(project_id, pool_list)

    # load the papers from the pool
    write_label_history(project_id, labeled_list_new)
Ejemplo n.º 2
0
def add_dataset_to_project(project_id, file_name):
    """Add file path to the project file.

    Add file to data subfolder and fill the pool of iteration 0.
    """

    project_file_path = get_project_file_path(project_id)
    fp_lock = get_lock_path(project_id)

    with SQLiteLock(fp_lock, blocking=True, lock_name="active"):
        # open the projects file
        with open(project_file_path, "r") as f_read:
            project_dict = json.load(f_read)

        # add path to dict (overwrite if already exists)
        project_dict["dataset_path"] = file_name

        with open(project_file_path, "w") as f_write:
            json.dump(project_dict, f_write)

        # fill the pool of the first iteration
        pool_indices = read_data(project_id).record_ids
        np.random.shuffle(pool_indices)

        write_pool(project_id, pool_indices.tolist())

        # make a empty qeue for the items to label
        write_label_history(project_id, [])
Ejemplo n.º 3
0
def add_dataset_to_project(project_id, file_name):
    """Add file path to the project file.

    Add file to data subfolder and fill the pool of iteration 0.
    """

    project_file_path = get_project_file_path(project_id)

    # clean temp project files
    clean_project_tmp_files(project_id)

    with SQLiteLock(
            get_lock_path(project_id),
            blocking=True,
            lock_name="active",
            project_id=project_id
    ):
        # open the projects file
        with open(project_file_path, "r") as f_read:
            project_dict = json.load(f_read)

        # add path to dict (overwrite if already exists)
        project_dict["dataset_path"] = file_name

        with open(project_file_path, "w") as f_write:
            json.dump(project_dict, f_write)

        # fill the pool of the first iteration
        as_data = read_data(project_id)

        if as_data.labels is not None:
            unlabeled = np.where(as_data.labels == LABEL_NA)[0]
            pool_indices = as_data.record_ids[unlabeled]

            labeled_indices = np.where(as_data.labels != LABEL_NA)[0]
            label_indices = list(zip(
                as_data.record_ids[labeled_indices].tolist(),
                as_data.labels[labeled_indices].tolist()
            ))
        else:
            pool_indices = as_data.record_ids
            label_indices = []

        np.random.shuffle(pool_indices)
        write_pool(project_id, pool_indices.tolist())

        # make a empty qeue for the items to label
        write_label_history(project_id, label_indices)
Ejemplo n.º 4
0
def move_label_from_pool_to_labeled(project_id, paper_i, label):

    # load the papers from the pool
    pool_idx = read_pool(project_id)

    # Remove the paper from the pool.
    try:
        pool_idx.remove(int(paper_i))
    except (IndexError, ValueError):
        return

    write_pool(project_id, pool_idx)

    # Add the paper to the reviewed papers.
    labeled = read_label_history(project_id)
    labeled.append([int(paper_i), int(label)])
    write_label_history(project_id, labeled)
Ejemplo n.º 5
0
def add_dataset_to_project(project_id, file_name):
    """Add file path to the project file.

    Add file to data subfolder and fill the pool of iteration 0.
    """

    project_file_path = get_project_file_path(project_id)
    fp_lock = get_lock_path(project_id)

    with SQLiteLock(fp_lock, blocking=True, lock_name="active"):
        # open the projects file
        with open(project_file_path, "r") as f_read:
            project_dict = json.load(f_read)

        # add path to dict (overwrite if already exists)
        project_dict["dataset_path"] = file_name

        with open(project_file_path, "w") as f_write:
            json.dump(project_dict, f_write)

        # fill the pool of the first iteration
        as_data = read_data(project_id)

        if as_data.labels is not None:
            unlabeled = np.where(as_data.labels == LABEL_NA)[0]
            pool_indices = as_data.record_ids[unlabeled]

            label_indices_included = \
                [[int(x), 1] for x in np.where(as_data.labels == 1)[0]]
            label_indices_excluded = \
                [[int(x), 0] for x in np.where(as_data.labels == 0)[0]]
            label_indices = label_indices_included + label_indices_excluded
        else:
            pool_indices = as_data.record_ids
            label_indices = []

        np.random.shuffle(pool_indices)
        write_pool(project_id, pool_indices.tolist())

        # make a empty qeue for the items to label
        write_label_history(project_id, label_indices)