def test_no_header(self): args = ['--no-header-row', 'examples/no_header_row3.csv'] output_file = StringIO.StringIO() utility = CSVLook(args, output_file) utility.main() input_file = StringIO.StringIO(output_file.getvalue()) #self.assertEqual(input_file.next(), '|----+---+----|\n') #self.assertEqual(input_file.next(), '| 1 | 2 | 3 |\n') #self.assertEqual(input_file.next(), '| 4 | 5 | 6 |\n') #self.assertEqual(input_file.next(), '|----+---+----|\n') self.assertEqual(input_file.next(), '|----------+---------+----------|\n') self.assertEqual(input_file.next(), '| column1 | column2 | column3 |\n') self.assertEqual(input_file.next(), '|----------+---------+----------|\n') self.assertEqual(input_file.next(), '| 1 | 2 | 3 |\n') self.assertEqual(input_file.next(), '| 4 | 5 | 6 |\n') self.assertEqual(input_file.next(), '|----------+---------+----------|\n')
def test_no_header(self): args = ['--no-header-row', 'examples/no_header_row3.csv'] output_file = StringIO.StringIO() utility = CSVLook(args, output_file) utility.main() input_file = StringIO.StringIO(output_file.getvalue()) self.assertEqual(input_file.next(), '|----+---+----|\n') self.assertEqual(input_file.next(), '| 1 | 2 | 3 |\n') self.assertEqual(input_file.next(), '| 4 | 5 | 6 |\n') self.assertEqual(input_file.next(), '|----+---+----|\n')
def test_simple(self): args = ['examples/dummy3.csv'] output_file = six.StringIO() utility = CSVLook(args, output_file) utility.main() input_file = six.StringIO(output_file.getvalue()) self.assertEqual(next(input_file), '|----+---+----|\n') self.assertEqual(next(input_file), '| a | b | c |\n') self.assertEqual(next(input_file), '|----+---+----|\n') self.assertEqual(next(input_file), '| 1 | 2 | 3 |\n') self.assertEqual(next(input_file), '| 1 | 4 | 5 |\n') self.assertEqual(next(input_file), '|----+---+----|\n')
def test_phabricator(self): args = ['examples/dummy3.csv', '--phabricator'] output_file = six.StringIO() utility = CSVLook(args, output_file) utility.main() input_file = six.StringIO(output_file.getvalue()) self.assertEqual(next(input_file), '|----|---|----|\n') self.assertEqual(next(input_file), '| a | b | c |\n') self.assertEqual(next(input_file), '|----|---|----|\n') self.assertEqual(next(input_file), '| 1 | 2 | 3 |\n') self.assertEqual(next(input_file), '| 1 | 4 | 5 |\n') self.assertRaises(StopIteration, next, input_file)
def test_no_header(self): args = ["--no-header-row", "examples/no_header_row3.csv"] output_file = six.BytesIO() utility = CSVLook(args, output_file) utility.main() input_file = six.BytesIO(output_file.getvalue()) self.assertEqual(next(input_file), b"|----------+---------+----------|\n") self.assertEqual(next(input_file), b"| column1 | column2 | column3 |\n") self.assertEqual(next(input_file), b"|----------+---------+----------|\n") self.assertEqual(next(input_file), b"| 1 | 2 | 3 |\n") self.assertEqual(next(input_file), b"| 4 | 5 | 6 |\n") self.assertEqual(next(input_file), b"|----------+---------+----------|\n")
def test_simple(self): args = ["examples/dummy3.csv"] output_file = six.BytesIO() utility = CSVLook(args, output_file) utility.main() input_file = six.BytesIO(output_file.getvalue()) self.assertEqual(next(input_file), b"|----+---+----|\n") self.assertEqual(next(input_file), b"| a | b | c |\n") self.assertEqual(next(input_file), b"|----+---+----|\n") self.assertEqual(next(input_file), b"| 1 | 2 | 3 |\n") self.assertEqual(next(input_file), b"| 1 | 4 | 5 |\n") self.assertEqual(next(input_file), b"|----+---+----|\n")
def test_my_unicode(self): args = ["examples/customer_sample.csv"] output_file = six.StringIO() utility = CSVLook(args, output_file) utility.main() input_file = six.StringIO(output_file.getvalue()) self.assertEqual(next(input_file), "|-----+------------+-----------+----------|\n") self.assertEqual(next(input_file), "| id | first_name | last_name | company |\n") self.assertEqual(next(input_file), "|-----+------------+-----------+----------|\n") self.assertEqual(next(input_file), "| 1 | Ramon | Jašek | |\n") self.assertEqual(next(input_file), "|-----+------------+-----------+----------|\n")
def test_no_header(self): args = ['--no-header-row', 'examples/no_header_row3.csv'] output_file = six.StringIO() utility = CSVLook(args, output_file) utility.main() input_file = six.StringIO(output_file.getvalue()) self.assertEqual(next(input_file), '|----------+---------+----------|\n') self.assertEqual(next(input_file), '| column1 | column2 | column3 |\n') self.assertEqual(next(input_file), '|----------+---------+----------|\n') self.assertEqual(next(input_file), '| 1 | 2 | 3 |\n') self.assertEqual(next(input_file), '| 4 | 5 | 6 |\n') self.assertEqual(next(input_file), '|----------+---------+----------|\n') self.assertRaises(StopIteration, next, input_file)
def test_unicode(self): args = ["examples/test_utf8.csv"] output_file = six.StringIO() utility = CSVLook(args, output_file) utility.main() input_file = six.StringIO(output_file.getvalue()) self.assertEqual(next(input_file), "|----+---+----|\n") self.assertEqual(next(input_file), "| a | b | c |\n") self.assertEqual(next(input_file), "|----+---+----|\n") self.assertEqual(next(input_file), "| 1 | 2 | 3 |\n") self.assertEqual(next(input_file), u"| 4 | 5 | ʤ |\n") self.assertEqual(next(input_file), "|----+---+----|\n")
def test_unicode(self): args = ['examples/test_utf8.csv'] output_file = six.StringIO() utility = CSVLook(args, output_file) utility.main() input_file = six.StringIO(output_file.getvalue()) self.assertEqual(next(input_file), '|----+---+----|\n') self.assertEqual(next(input_file), '| a | b | c |\n') self.assertEqual(next(input_file), '|----+---+----|\n') self.assertEqual(next(input_file), '| 1 | 2 | 3 |\n') self.assertEqual(next(input_file), u'| 4 | 5 | ʤ |\n') self.assertEqual(next(input_file), '|----+---+----|\n') self.assertRaises(StopIteration, next, input_file)
def test_truncate(self): args = ['examples/dummy4.csv', '--truncate', '3'] output_file = six.StringIO() utility = CSVLook(args, output_file) utility.main() input_file = six.StringIO(output_file.getvalue()) self.assertEqual(next(input_file), '|----+---+----|\n') self.assertEqual(next(input_file), '| a | b | c |\n') self.assertEqual(next(input_file), '|----+---+----|\n') self.assertEqual(next(input_file), '| 1 | 2 | 3 |\n') self.assertEqual(next(input_file), '| 1 | 4 | 5 |\n') self.assertEqual(next(input_file), '| 6 | 7 | 8 |\n') self.assertEqual(next(input_file), '|----+---+----|\n') self.assertRaises(StopIteration, next, input_file)
def convert_attachment(attachment): outfile_path = '/tmp/' + attachment.file.name.split('/')[-1] outfile = open(outfile_path, 'w') ext = outfile_path.split('.')[-1].lower() if ext not in ('xls', 'xlsx', 'csv', 'pdf'): return infile_path = attachment.file.path if ext == 'pdf': infile_path = tabula_csv(attachment, listify=False) # in2csv ... can't say the python module utlities are working 100% here #convert = In2CSV(args=[attachment.file.path], output_file=outfile) #convert.main() import subprocess print 'attachment_id = ', attachment.id print 'agency = ', get_attachment_agency(attachment) print 'infile_path = ', infile_path if ext == 'csv': # already csv outfile_path = infile_path else: subprocess.call( [ 'in2csv', #'--format',outfile_path.split('.')[-1].lower(), infile_path ], #attachment.file.path], stdout=outfile) # label column headers by zero-index headers = get_headers(outfile_path) if headers: write_header_indices(outfile_path, headers) # csvlook file | head output = StringIO.StringIO() look = CSVLook(args=[outfile_path, '-l'], output_file=output) look.main() input = StringIO.StringIO(output.getvalue()) return input else: print 'something wrong with headers. attachment id', attachment.id
def run_words(self, words): config = self.config if 'db' not in self.config: print( 'Please specify database to analyze with --db foo.csv, or --db foo.sqlite, or --db postgres://...' ) exit(1) if 'api' not in self.config: print( 'Please specify model api with --docker sqlova|valuenet|irnet, or --api tag http://localhost:5050' ) exit(1) if 'cached' not in self.config: print('Database not cached, please retry') exit(1) result = None service = self.get_service() mode = service.get('mode') or 'sqlite' fname = config['cached' if mode == 'sqlite' else 'cached_csv'] with open(fname, 'rb') as fin: if mode == 'csv': files = {mode: ('{}.csv'.format(config['table_name']), fin)} else: files = {mode: fin} data = {'q': ' '.join(words)} result = requests.post(service['url'], files=files, data=data) j = result.json() self.log('response is {}'.format(json.dumps(j))) if 'result' in j: j = j['result'] if 'error' in j: if j['error'] == 'please include a csv file': if mode == 'sqlite': self.rerun_with_csv(words) return if 'sql' in j: sql = sqlparse.format(j['sql'], reindent=True, keyword_case='upper') fout = tempfile.NamedTemporaryFile('wb', delete=False, suffix=".sql") fout.write(sql.encode('utf-8')) fout.close() _main(["_", fout.name]) if 'params' in j and len(j['params']) > 0: code = " {}".format(json.dumps(j['params'])) print(highlight(code, PythonLexer(), TerminalFormatter()), end='') fout2 = tempfile.NamedTemporaryFile('w', delete=False, suffix=".csv") writer = csv.writer(fout2) conn = sqlite3.connect(config['cached']) params = j.get('params') or [] try: # SQLova setup assumes case insensitivity. # Should really mark all the columns, or update the SQL carefully, # but instead I just stick COLLATE NOCASE at the end and hope for # the best. result = conn.execute(sql + ' COLLATE NOCASE', params) except: result = conn.execute(sql, params) writer.writerow([d[0] for d in result.description]) writer.writerows(result) fout2.close() try: CSVLook(['--no-inference', fout2.name]).run() except: # CSVLook can fail if a row is blank :( with open(fout2.name, 'r') as fin: print(fin.read()) else: print(json.dumps(j, indent=2))