def get_statistics(project_id): fp_lock = get_lock_path(project_id) with SQLiteLock(fp_lock, blocking=True, lock_name="active"): # get the index of the active iteration label_history = read_label_history(project_id) current_labels = read_current_labels(project_id, label_history=label_history) n_since_last_inclusion = 0 for _, inclusion in reversed(label_history): if inclusion == 1: break n_since_last_inclusion += 1 n_included = len(np.where(current_labels == 1)[0]) n_excluded = len(np.where(current_labels == 0)[0]) n_papers = len(current_labels) stats = { "n_included": n_included, "n_excluded": n_excluded, "n_since_last_inclusion": n_since_last_inclusion, "n_papers": n_papers, "n_pool": n_papers - n_included - n_excluded } return stats
def export_to_string(project_id, export_type="csv"): fp_lock = get_lock_path(project_id) as_data = read_data(project_id) with SQLiteLock(fp_lock, blocking=True, lock_name="active"): proba = read_proba(project_id) if proba is None: proba = np.flip(np.arange(len(as_data))) else: proba = np.array(proba) labels = read_current_labels(project_id, as_data=as_data) pool_idx = np.where(labels == LABEL_NA)[0] one_idx = np.where(labels == 1)[0] zero_idx = np.where(labels == 0)[0] proba_order = np.argsort(-proba[pool_idx]) ranking = np.concatenate((one_idx, pool_idx[proba_order], zero_idx), axis=None) if export_type == "csv": return as_data.to_csv(fp=None, labels=labels, ranking=ranking) if export_type == "excel": get_tmp_path(project_id).mkdir(exist_ok=True) fp_tmp_export = Path(get_tmp_path(project_id), "export_result.xlsx") return as_data.to_excel(fp=fp_tmp_export, labels=labels, ranking=ranking) else: raise ValueError("This export type isn't implemented.")
def export_to_string(project_id): fp_lock = get_lock_path(project_id) as_data = read_data(project_id) with SQLiteLock(fp_lock, blocking=True, lock_name="active"): proba = read_proba(project_id) if proba is None: proba = np.flip(np.arange(len(as_data))) else: proba = np.array(proba) labels = read_current_labels(project_id, as_data=as_data) pool_idx = np.where(labels == LABEL_NA)[0] one_idx = np.where(labels == 1)[0] zero_idx = np.where(labels == 0)[0] proba_order = np.argsort(-proba[pool_idx]) ranking = np.concatenate((one_idx, pool_idx[proba_order], zero_idx), axis=None) return as_data.to_csv(fp=None, labels=labels, ranking=ranking)