Exemple #1
0
 def __init__(self, path="", rootDir=configuration.get_path_to_reports()):
     self.path = path
     self.rootDir = rootDir
     self.CONN_STRING = configuration.get_connection_string()
     if not self.rootDir.endswith('/'):
         self.rootDir += '/'
     self.pathParts = self.path[len(self.rootDir):].split('/')
 def __init__(self, path="", rootDir=configuration.get_path_to_reports()):
     self.path = path
     self.rootDir = rootDir
     self.CONN_STRING = configuration.get_connection_string()
     if not self.rootDir.endswith('/'):
         self.rootDir += '/'
     self.pathParts = self.path[len(self.rootDir):].split('/')
import os, sys
import psycopg2
import argparse

import configuration

CONN_STRING = configuration.get_connection_string()


def get_entities(max_occurence):
    conn = psycopg2.connect(CONN_STRING)
    #try:
    cmd = "select entity_text, entity_inferred_name, count(*) c from entities \
        where entity_type != 'Currency' group by entity_text, \
        entity_inferred_name having count(*) > %s  order by c desc"

    entities = set()
    cur = conn.cursor()
    cur.execute(cmd, (max_occurence, ))
    records = cur.fetchall()
    for r in records:
        entities.add(r[0])
        entities.add(r[1])
    return entities
    #except Exception as ex:
    #print ex
    #raise ex
    #finally:
    conn.close()

Exemple #4
0
 def __init__(self, path="", rootDir=configuration.get_path_to_bills()):
     self.path = path
     self.rootDir = rootDir
     self.CONN_STRING = configuration.get_connection_string()
     if not self.rootDir.endswith('/'):
         self.rootDir += '/'
 def __init__(self, path="", rootDir=configuration.get_path_to_bills()):
     self.path = path
     self.rootDir = rootDir
     self.CONN_STRING = configuration.get_connection_string()
     if not self.rootDir.endswith('/'):
         self.rootDir += '/'

import os, sys
import codecs
import psycopg2
import csv

from path_tools import BillPathUtils
from sunlight_id_to_path import sunlightid_to_path

import configuration
CONN_STRING =  configuration.get_connection_string()

conn = psycopg2.connect(CONN_STRING)
cmd = "select distinct bill_id from  old_billentities where entity_type = 'Currency'"
cur = conn.cursor()
cur.execute(cmd)
ids = cur.fetchall()
split_ids = [ i[0].split("-") for i in ids]
bpu = BillPathUtils();
#paths = [bpu.get_bill_path( int(split_id[1]), split_id[0], split_id[2])+'document.txt' for split_id in split_ids]
paths = [sunlightid_to_path(i[0]) for i in ids]
print paths[0:3]
print "bills with Currency",  len(ids)


#get bill from file

# chema id entity_text, entity_type, entity_offset, entity_length, entity_name, bill_id
#schema  0        1            2            3              4              5          6
pre_window = 700