Python get_paragraphs Examples

Programming Language: Python

Namespace/Package Name: util.text_table_tools

Method/Function: get_paragraphs

Examples at hotexamples.com: 4

Python get_paragraphs - 4 examples found. These are the top rated real world Python examples of util.text_table_tools.get_paragraphs extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: label_new_documents.py Project: dssg/machine_learning_legislation

 def label_doc(self, doc_path, congress, chamber, document_type, number):
     print doc_path
     paragraphs_list = text_table_tools.get_paragraphs(open(doc_path,'r'))
     tables = text_table_tools.identify_tables(paragraphs_list)
     for table in tables:
         table_offset = table.offset
         column_indices = sorted(text_table_tools.get_candidate_columns(table))
         sponsor_indices = self.sponsor_coder.find_sponsor_index(table, congress)
         for row in table.rows:
             self.label_row(row, column_indices, table_offset, congress, chamber, document_type, number, sponsor_indices)

Example #2

Show file

File: label_new_documents.py Project: dssg/machine_learning_legislation

 def label_doc(self, doc_path, congress, chamber, document_type, number):
     print doc_path
     paragraphs_list = text_table_tools.get_paragraphs(open(doc_path, 'r'))
     tables = text_table_tools.identify_tables(paragraphs_list)
     for table in tables:
         table_offset = table.offset
         column_indices = sorted(
             text_table_tools.get_candidate_columns(table))
         sponsor_indices = self.sponsor_coder.find_sponsor_index(
             table, congress)
         for row in table.rows:
             self.label_row(row, column_indices, table_offset, congress,
                            chamber, document_type, number, sponsor_indices)

Example #3

Show file

File: populate_tables_table.py Project: dssg/machine_learning_legislation

def extract_tables(document_paths):
    print "begin table extraction"
    for path in document_paths:
        paragraphs_list = ttt.get_paragraphs(codecs.open(path[0], 'r', 'utf8'))
        tables = ttt.identify_tables(paragraphs_list)
        try:
            params = [(t.offset, t.length, ','.join(t.header), ' '.join(t.title), ' '.join(t.body), ' '.join(t.content), path[1]) for t in tables]
            cmd = 'INSERT INTO tables ("offset", "length", headers, title, body, content, document_id) VALUES (%s,%s,%s,%s,%s,%s,%s)'
            cur = conn.cursor()
            cur.executemany(cmd, params)
            conn.commit()
        except Exception as ex:
            print "Failed to import doc %s: %s" % (path[0], ex)

Example #4

Show file

def extract_tables(document_paths):
    print "begin table extraction"
    for path in document_paths:
        paragraphs_list = ttt.get_paragraphs(codecs.open(path[0], 'r', 'utf8'))
        tables = ttt.identify_tables(paragraphs_list)
        try:
            params = [
                (t.offset, t.length, ','.join(t.header), ' '.join(t.title),
                 ' '.join(t.body), ' '.join(t.content), path[1])
                for t in tables
            ]
            cmd = 'INSERT INTO tables ("offset", "length", headers, title, body, content, document_id) VALUES (%s,%s,%s,%s,%s,%s,%s)'
            cur = conn.cursor()
            cur.executemany(cmd, params)
            conn.commit()
        except Exception as ex:
            print "Failed to import doc %s: %s" % (path[0], ex)