Esempio n. 1
0
def clean(value):
    """
    Strip out characters that are not allowed in files in some OS's
    """

    value = asciiDammit(value.encode('iso-8859-1'))
    return re.sub(r'[*|\/:"<>?]', '', value)
Esempio n. 2
0
def reportHelper(localAppDatadir, run_id, app_name):
	# list all files in the report folder
	dirname, dirnames, filenames = next(os.walk(localAppDatadir+'/'+run_id))
	filepaths = ["file://localhost/"+dirname+"/"+z for z in filenames ]

	# identify all png files in the directory and encode it into database
	images = [x for x in filenames if str(x).endswith('.png')]
	imagepaths = [dirname+"/"+x for x in images]
	imagetags = []
	for ipath in imagepaths:
		data_uri = open(ipath, 'rb').read().encode('base64').replace('\n', '')
		img_tag = '<img src="data:image/png;base64,{0}">'.format(data_uri)
		imagetags.append(img_tag)

	# identify waypoint databases in the folder
	databases = [dirname+'/'+x for x in filenames if str(x).endswith('waypoint.sqlite') ]

	dbTables = collections.OrderedDict()
	colnames = {}
	if databases:
		for db in databases:
			conn = sqlite3.connect(db)
			c = conn.cursor()
			c.execute("SELECT name FROM sqlite_master WHERE type='table';")
			tblNms = sorted([tblNm[0] for tblNm in c.fetchall()])

			# reorder tblNms according to tableOrder
			x = [d for d in configDict['applications'] if d['appName'] == app_name][0]
			if x and 'tableOrder' in x.keys():
				tableOrder = x['tableOrder']
				tn_in_db = []
				for tn in tableOrder:
					if tn in tblNms:
						tn_in_db.append(tn)
						tblNms.remove(tn)
				tblNms = tn_in_db + tblNms

			tblTags= ["#%s"%tblNm for tblNm in tblNms]

			# Iterate over individual tables and retrieve the row data for display
			for tblNm in tblNms:
				rowcount = [row for row in c.execute("SELECT count(*) row_count FROM %s"%tblNm)][0][0]
				if rowcount < 500:
					rows = c.execute('select * from %s'%tblNm)
					# force ascii conversion for display
					colnames[tblNm] = [asciiDammit(description[0]) for description in c.description]
					dbTables[tblNm] = [[wpu.renderHtmlTableCell(x) for x in row] for row in rows]
			conn.close()

	return render_template('report.html', dbpaths=databases, run_id=run_id, tableNames=tblTags, filenames=filenames, filepaths=filepaths, imagetags=imagetags, dbTables=dbTables, colnames=colnames, app_name=app_name)
Esempio n. 3
0
  for row in contribution_reader :
    try:
      c.execute('INSERT INTO raw_table VALUES '
                '(?, ?, ?, ?, ?, ?, ?, ?, '
                ' ?, ?, ?, ?, ?, ?, ?, '
                ' ?, ?, ?, ?, ?, ?, ?, '
                ' ?, ?, ?, ?, ?, ?, ?)',
                row[0:29])
    except sqlite3.ProgrammingError: 
      try:
        c.execute('INSERT INTO raw_table VALUES '
                  '(?, ?, ?, ?, ?, ?, ?, ?, '
                  ' ?, ?, ?, ?, ?, ?, ?, '
                  ' ?, ?, ?, ?, ?, ?, ?, '
                  ' ?, ?, ?, ?, ?, ?, ?)',
                  [asciiDammit(field) for field in row[0:29]])

      except:
        print "failed to import row"
        print row
        raise

conn.commit()

print 'creating donors table...'
c.execute("CREATE TABLE donors "
          "(donor_id INTEGER PRIMARY KEY, first_name TEXT, "
          " last_name TEXT, address_1 TEXT, address_2 TEXT, "
          " city TEXT, state TEXT, zip TEXT)")
c.execute("INSERT INTO donors "
          "(first_name, last_name, address_1,"
def get_text(jd):
    txt = jd.get('text', "")
    if txt:
        return asciiDammit(txt)
    else:
        return ""
Esempio n. 5
0
def preProcess(column) :
  column = asciiDammit(column)
  column = re.sub('  +', ' ', column)
  column = re.sub('\n', ' ', column)
  column = column.strip().strip('"').strip("'").lower()
  return column
              violations text, x_coord real, y_coord real, lat real,
              long real, location text)''')
c.execute('''CREATE TABLE inspections_clean
             (inspection_id int, dba text, aka text, license_no int,
              facility_type text, risk text, address text, 
              zip text, inspection_date date, inspection_type text,
              results int, violations text, lat real, long real)''')

conn.commit()

# Insert data into raw table
with open('../data/food_inspections.csv', 'rb') as f :
  reader = csv.reader(f)
  reader.next()
  for row in reader:
    values = [asciiDammit(field) for field in row]
    c.execute('''INSERT INTO inspections_raw VALUES
    (?, ?, ?, ?,
     ?, ?, ?, ?,
     ?, ?, ?, ?,
     ?, ?, ?, ?,
     ?, ?, ?)''', values)
    
conn.commit()


fields = ('inspection_id', 'dba', 'aka', 'license_no',
          'facility_type', 'risk', 'address', 'zip',
          'inspection_date', 'inspection_type', 'results',
          'violations', 'lat', 'long')
def get_text(jd):
    txt = jd.get('text', "")
    if txt:
        return asciiDammit(txt)
    else:
        return ""
Esempio n. 8
0
from Corpus import Corpus
from Document import Document
import io, os
from AsciiDammit import asciiDammit
import numpy as np
from scipy.sparse import csc_matrix, lil_matrix
from scipy.spatial import distance
from sklearn.preprocessing import normalize

#1-grams through n-grams will be compilated
n_gram_length = 4
corpus_title = "inaugural/all.txt"
corp_text = asciiDammit(open(corpus_title).read())
corp = Corpus(corp_text, corpus_title, n_gram_length)

#Just a mapping of the vocabulary to the natural numbers
vocab_map = {}

for index, key in enumerate(corp.n_gram_count):
     vocab_map[key] = index

#List of inaugural speech document objects
speeches = []

#index 0 is .DS_STore
folder = "inaugural"
for file in os.listdir(folder)[1:]:
    doc_name = folder + "/" + file
    doc_text = open(doc_name, "r").read()
    doc_text = asciiDammit(doc_text)
    speeches.append(Document(doc_text, file, corp, n_gram_length))
Esempio n. 9
0
              violations text, x_coord real, y_coord real, lat real,
              long real, location text)''')
c.execute('''CREATE TABLE inspections_clean
             (inspection_id int, dba text, aka text, license_no int,
              facility_type text, risk text, address text, 
              zip text, inspection_date date, inspection_type text,
              results int, violations text, lat real, long real)''')

conn.commit()

# Insert data into raw table
with open('../data/food_inspections.csv', 'rb') as f:
    reader = csv.reader(f)
    reader.next()
    for row in reader:
        values = [asciiDammit(field) for field in row]
        c.execute(
            '''INSERT INTO inspections_raw VALUES
    (?, ?, ?, ?,
     ?, ?, ?, ?,
     ?, ?, ?, ?,
     ?, ?, ?, ?,
     ?, ?, ?)''', values)

conn.commit()

fields = ('inspection_id', 'dba', 'aka', 'license_no', 'facility_type', 'risk',
          'address', 'zip', 'inspection_date', 'inspection_type', 'results',
          'violations', 'lat', 'long')

# Load data to clean it