예제 #1
0
def get_all_documents(doccano_client: DoccanoClient, project_id):
    documents = doccano_client.get_document_list(project_id, {
        'limit': [doccano_client.get_project_statistics(project_id)['total']],
        'offset': [0],
    })['results']
    for doc in documents:
        doc['meta'] = json.loads(doc['meta'])
    return documents
예제 #2
0
# imports
import json
import os
import psycopg2
import pandas as pd
from doccano_api_client import DoccanoClient

#client for receiving API DATA
doccano_client = DoccanoClient('http://15.207.89.34', 'admin', 'spinnaker')

total_doc = doccano_client.get_project_statistics(1)[
    'total']  # 1 is for project_id
remaining_doc = doccano_client.get_project_statistics(1)[
    'remaining']  # 1 is for project_id

documnet_iter_start = doccano_client.get_document_list(1)['results'][0]['id']
try:
    connection = psycopg2.connect(user="******",
                                  password="******",
                                  host="127.0.0.1",
                                  port="5432",
                                  database="sahib")
    cursor = connection.cursor()
    postgres_insert_query = """ INSERT INTO output_data (ID, info) VALUES (%s,%s)"""
    for i in range(documnet_iter_start, documnet_iter_start + total_doc):
        s = doccano_client.get_document_detail(1, i)
        doc_id = s['id']
        doc_text = s['text'].replace("'", "")
        doc_ann = s['annotations']
        temp = {"text": doc_text, "annotations": doc_ann}
        cursor.execute(