def url_request(url): table_header = url[url.find('v1') + 3:url.find('?')] try: r = etl.fromtext(url, header=[table_header]) except urllib.error.URLError: print('Connection error or Wrong token') r = '' return r
def main( plates_file, ticket_numbers_file, centroid_file, latlon_input, ): global logger, plates, plates_counter, ticket_numbers, ticket_numbers_counter, centroids logger = get_logging() plates = load_index_file(plates_file, 'plates_file') plate_numbers_values = plates.values() if len(plate_numbers_values) > 0: plates_counter = max(map(lambda x: int(x['id']), plate_numbers_values)) else: plates_counter = 0 logger.info('Plates autoincrement starting at: {}'.format(plates_counter)) ticket_numbers = load_index_file(ticket_numbers_file, 'license_file') ticket_numbers_values = ticket_numbers.values() if len(ticket_numbers_values) > 0: ticket_numbers_counter = max( map(lambda x: int(x), ticket_numbers_values)) else: ticket_numbers_counter = 0 logger.info('Ticket number autoincrement starting at: {}'.format( ticket_numbers_counter)) centroids = load_index_file(centroid_file, 'centroid_file') (petl.fromtext(strip=False).rowmap( get_transform_row(latlon_input), header=headers, failonerror=True ).select( '{fine} > 0.0 and ({issue_datetime}).isoformat() >= "2012-01-01T00:00:00Z"' ).rowmap(anonymize, header=headers, failonerror=True).rowmap(geocode, header=headers, failonerror=True).tocsv()) logger.info( 'Geocode stats - success rate: {:.2%}, successes: {}, gps: {}, zip: {}, failed_segment: {}, failed_address: {}' .format( geocode_stats['success'] / (geocode_stats['total'] - geocode_stats['gps']), geocode_stats['success'], geocode_stats['gps'], geocode_stats['zip'], geocode_stats['failed_segment'], geocode_stats['failed_address'])) if len(geocode_stats['failed_segments']) > 0: logger.info('Failed Segments - {}'.format(','.join( geocode_stats['failed_segments']))) save_index_file(plates_file, plates, ['id', 'plate', 'state', 'date_added'], 'plates_file') save_index_file(ticket_numbers_file, ticket_numbers, ['ticket_number', 'anon_ticket_number'], 'license_file')
def test_fromtext(): # initial data f = NamedTemporaryFile(delete=False) f.write('foo\tbar\n') f.write('a\t1\n') f.write('b\t2\n') f.write('c\t3\n') f.close() actual = fromtext(f.name) expect = (('lines', ), ('foo\tbar', ), ('a\t1', ), ('b\t2', ), ('c\t3', )) ieq(expect, actual) ieq(expect, actual) # verify can iterate twice
def test_fromtext(): # initial data f = NamedTemporaryFile(delete=False) f.write("foo\tbar\n") f.write("a\t1\n") f.write("b\t2\n") f.write("c\t3\n") f.close() actual = fromtext(f.name) expect = (("lines",), ("foo\tbar",), ("a\t1",), ("b\t2",), ("c\t3",)) ieq(expect, actual) ieq(expect, actual) # verify can iterate twice
def test_fromtext_gz(): # initial data f = NamedTemporaryFile(delete=False) f.close() fn = f.name + ".gz" os.rename(f.name, fn) with gzip.open(fn, "wb") as f: f.write("foo\tbar\n") f.write("a\t1\n") f.write("b\t2\n") f.write("c\t3\n") actual = fromtext(fn) expect = (("lines",), ("foo\tbar",), ("a\t1",), ("b\t2",), ("c\t3",)) ieq(expect, actual) ieq(expect, actual) # verify can iterate twice
def test_fromtext_gz(): # initial data f = NamedTemporaryFile(delete=False) f.close() fn = f.name + '.gz' os.rename(f.name, fn) with gzip.open(fn, 'wb') as f: f.write('foo\tbar\n') f.write('a\t1\n') f.write('b\t2\n') f.write('c\t3\n') actual = fromtext(fn) expect = (('lines', ), ('foo\tbar', ), ('a\t1', ), ('b\t2', ), ('c\t3', )) ieq(expect, actual) ieq(expect, actual) # verify can iterate twice
def test_fromtext(): # initial data f = NamedTemporaryFile(delete=False) f.write('foo\tbar\n') f.write('a\t1\n') f.write('b\t2\n') f.write('c\t3\n') f.close() actual = fromtext(f.name) expect = (('lines',), ('foo\tbar',), ('a\t1',), ('b\t2',), ('c\t3',)) ieq(expect, actual) ieq(expect, actual) # verify can iterate twice
def extract(file): tb = list() if file == "sql": host = raw_input("Enter Host:") user = raw_input("Enter Username:"******"Enter pwd:") dtb = raw_input("Enter Database Name:") table = raw_input("Enter Table Name:") conn = pymysql.connect(host=host, user=user, password=pwd, db=dtb, charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor) temp = etl.fromdb(conn, "SELECT * FROM " + table) tb = d2l(temp) elif ".csv" in file: tb = etl.fromcsv(file) elif ".xlsx" in file: tb = etl.fromxls(file) elif ".json" in file: tb = etl.fromjson(file) print tb elif ".xml" in file: f = open(file, 'r').read() options = optparse.Values({"pretty": True}) jsn = json.dumps(xml2json.xml2json(f, options)) ob = json.loads(jsn.decode('string-escape').strip('"')) temp = dict() for key in ob.keys(): for skey in ob[key].keys(): temp = json.dumps(ob[key][skey]) with open("temp.json", "w") as tmp: tmp.write(temp) tb = etl.fromjson("temp.json") print tb[0] #tb = etl.fromxml(file,'.//ROW',{'Service_Name':'Service_Name','Status':'Status','Service_Type':'Service_Type','Time':'Time'}) elif ".txt" in file: tb = etl.fromtext(file) print tb return tb
def test_fromtext_gz(): # initial data f = NamedTemporaryFile(delete=False) f.close() fn = f.name + '.gz' os.rename(f.name, fn) with gzip.open(fn, 'wb') as f: f.write('foo\tbar\n') f.write('a\t1\n') f.write('b\t2\n') f.write('c\t3\n') actual = fromtext(fn) expect = (('lines',), ('foo\tbar',), ('a\t1',), ('b\t2',), ('c\t3',)) ieq(expect, actual) ieq(expect, actual) # verify can iterate twice
pickle.dump(a, fileObject) # here we close the fileObject fileObject.close() table3 = etl.frompickle('pickel_file.p') print('Pick') print(table3) ###################Reading Text Files################################# text = 'a,1\nb,2\nc,2\n' with open('example.txt', 'w') as f: f.write(text) table4 = etl.fromtext('example.txt') print(table4) ################Reading XML files################################## table5 = etl.fromxml('data.xml', 'tr', 'td') print(table5) ################Reading JASON files############################### data = ''' [{"foo": "a", "bar": 1}, {"foo": "b", "bar": 2}, {"foo": "c", "bar": 2}] ''' with open('example.json', 'w') as f:
h = '#%02x%02x%02x' % tuple(int(255 * c) for c in pop_colors[p]) # chromatin _data_chromatin = b"""CHX chro X 20009764 24393108 CH2R chro 2R 58984778 61545105 CH2L chro 2L 1 2431617 PEU2L chro 2L 2487770 5042389 IH2L chro 2L 5078962 5788875 IH3R chro 3R 38988757 41860198 CH3R chro 3R 52161877 53200684 CH3L chro 3L 1 1815119 PEU3L chro 3L 1896830 4235209 IH3L chro 3L 4264713 5031692 """ tbl_chromatin = (etl.fromtext(etl.MemorySource(_data_chromatin)).split( 'lines', '\s+', ['name', 'type', 'chrom', 'start', 'stop']).convert( ('start', 'stop'), int).cutout('type')) # genome regions region_X_speciation = 'X-speciation', 'X', 15000000, 24000000 region_X_free = 'X-free', 'X', 1, 14000000 region_3L_free = '3L-free', '3L', 15000000, 41000000 region_3R_free = '3R-free', '3R', 1, 37000000 # noinspection PyGlobalUndefined def init(release_dir, load_geneset=False): """Initialise data resources. Parameters ----------
from __future__ import division, print_function, absolute_import # fromtext() ############ import petl as etl # setup example file text = 'a,1\nb,2\nc,2\n' with open('example.txt', 'w') as f: f.write(text) table1 = etl.fromtext('example.txt') table1 # post-process, e.g., with capture() table2 = table1.capture('lines', '(.*),(.*)$', ['foo', 'bar']) table2 # totext() ########## import petl as etl table1 = [['foo', 'bar'], ['a', 1], ['b', 2], ['c', 2]] prologue = '''{| class="wikitable" |- ! foo ! bar
def load(filename): sequence = petl.fromtext(filename).skip(1).setheader(['lines']).split( 'lines', '\s+').setheader(['lon', 'lat', 'value']).convertall( float).select("115 < {lon} < 125 and 20 < {lat} < 28").tuple() return sequence[1:] # skip header