def move_label_from_labeled_to_pool(project_id, paper_i): # load the papers from the pool pool_list = read_pool(project_id) # Add the paper to the reviewed papers. labeled_list = read_label_history(project_id) labeled_list_new = [] for item_id, item_label in labeled_list: item_id = int(item_id) item_label = int(item_label) paper_i = int(paper_i) if paper_i == item_id: pool_list.append(item_id) else: labeled_list_new.append([item_id, item_label]) # write the papers to the label dataset write_pool(project_id, pool_list) # load the papers from the pool write_label_history(project_id, labeled_list_new)
def add_dataset_to_project(project_id, file_name): """Add file path to the project file. Add file to data subfolder and fill the pool of iteration 0. """ project_file_path = get_project_file_path(project_id) fp_lock = get_lock_path(project_id) with SQLiteLock(fp_lock, blocking=True, lock_name="active"): # open the projects file with open(project_file_path, "r") as f_read: project_dict = json.load(f_read) # add path to dict (overwrite if already exists) project_dict["dataset_path"] = file_name with open(project_file_path, "w") as f_write: json.dump(project_dict, f_write) # fill the pool of the first iteration pool_indices = read_data(project_id).record_ids np.random.shuffle(pool_indices) write_pool(project_id, pool_indices.tolist()) # make a empty qeue for the items to label write_label_history(project_id, [])
def add_dataset_to_project(project_id, file_name): """Add file path to the project file. Add file to data subfolder and fill the pool of iteration 0. """ project_file_path = get_project_file_path(project_id) # clean temp project files clean_project_tmp_files(project_id) with SQLiteLock( get_lock_path(project_id), blocking=True, lock_name="active", project_id=project_id ): # open the projects file with open(project_file_path, "r") as f_read: project_dict = json.load(f_read) # add path to dict (overwrite if already exists) project_dict["dataset_path"] = file_name with open(project_file_path, "w") as f_write: json.dump(project_dict, f_write) # fill the pool of the first iteration as_data = read_data(project_id) if as_data.labels is not None: unlabeled = np.where(as_data.labels == LABEL_NA)[0] pool_indices = as_data.record_ids[unlabeled] labeled_indices = np.where(as_data.labels != LABEL_NA)[0] label_indices = list(zip( as_data.record_ids[labeled_indices].tolist(), as_data.labels[labeled_indices].tolist() )) else: pool_indices = as_data.record_ids label_indices = [] np.random.shuffle(pool_indices) write_pool(project_id, pool_indices.tolist()) # make a empty qeue for the items to label write_label_history(project_id, label_indices)
def move_label_from_pool_to_labeled(project_id, paper_i, label): # load the papers from the pool pool_idx = read_pool(project_id) # Remove the paper from the pool. try: pool_idx.remove(int(paper_i)) except (IndexError, ValueError): return write_pool(project_id, pool_idx) # Add the paper to the reviewed papers. labeled = read_label_history(project_id) labeled.append([int(paper_i), int(label)]) write_label_history(project_id, labeled)
def add_dataset_to_project(project_id, file_name): """Add file path to the project file. Add file to data subfolder and fill the pool of iteration 0. """ project_file_path = get_project_file_path(project_id) fp_lock = get_lock_path(project_id) with SQLiteLock(fp_lock, blocking=True, lock_name="active"): # open the projects file with open(project_file_path, "r") as f_read: project_dict = json.load(f_read) # add path to dict (overwrite if already exists) project_dict["dataset_path"] = file_name with open(project_file_path, "w") as f_write: json.dump(project_dict, f_write) # fill the pool of the first iteration as_data = read_data(project_id) if as_data.labels is not None: unlabeled = np.where(as_data.labels == LABEL_NA)[0] pool_indices = as_data.record_ids[unlabeled] label_indices_included = \ [[int(x), 1] for x in np.where(as_data.labels == 1)[0]] label_indices_excluded = \ [[int(x), 0] for x in np.where(as_data.labels == 0)[0]] label_indices = label_indices_included + label_indices_excluded else: pool_indices = as_data.record_ids label_indices = [] np.random.shuffle(pool_indices) write_pool(project_id, pool_indices.tolist()) # make a empty qeue for the items to label write_label_history(project_id, label_indices)