Python fromtext Beispiele, petl.fromtext Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: finhub_data.py Projekt: almmtv/ML_task

def url_request(url):
    table_header = url[url.find('v1') + 3:url.find('?')]
    try:
        r = etl.fromtext(url, header=[table_header])
    except urllib.error.URLError:
        print('Connection error or Wrong token')
        r = ''
    return r

Beispiel #2

0

Datei anzeigen

Datei: main.py Projekt: CityOfPhiladelphia/parking-violations-data-pipeline

def main(
    plates_file,
    ticket_numbers_file,
    centroid_file,
    latlon_input,
):
    global logger, plates, plates_counter, ticket_numbers, ticket_numbers_counter, centroids

    logger = get_logging()

    plates = load_index_file(plates_file, 'plates_file')
    plate_numbers_values = plates.values()
    if len(plate_numbers_values) > 0:
        plates_counter = max(map(lambda x: int(x['id']), plate_numbers_values))
    else:
        plates_counter = 0

    logger.info('Plates autoincrement starting at: {}'.format(plates_counter))

    ticket_numbers = load_index_file(ticket_numbers_file, 'license_file')
    ticket_numbers_values = ticket_numbers.values()
    if len(ticket_numbers_values) > 0:
        ticket_numbers_counter = max(
            map(lambda x: int(x), ticket_numbers_values))
    else:
        ticket_numbers_counter = 0

    logger.info('Ticket number autoincrement starting at: {}'.format(
        ticket_numbers_counter))

    centroids = load_index_file(centroid_file, 'centroid_file')

    (petl.fromtext(strip=False).rowmap(
        get_transform_row(latlon_input), header=headers, failonerror=True
    ).select(
        '{fine} > 0.0 and ({issue_datetime}).isoformat() >= "2012-01-01T00:00:00Z"'
    ).rowmap(anonymize, header=headers,
             failonerror=True).rowmap(geocode,
                                      header=headers,
                                      failonerror=True).tocsv())

    logger.info(
        'Geocode stats - success rate: {:.2%}, successes: {}, gps: {}, zip: {}, failed_segment: {}, failed_address: {}'
        .format(
            geocode_stats['success'] /
            (geocode_stats['total'] - geocode_stats['gps']),
            geocode_stats['success'], geocode_stats['gps'],
            geocode_stats['zip'], geocode_stats['failed_segment'],
            geocode_stats['failed_address']))

    if len(geocode_stats['failed_segments']) > 0:
        logger.info('Failed Segments - {}'.format(','.join(
            geocode_stats['failed_segments'])))

    save_index_file(plates_file, plates,
                    ['id', 'plate', 'state', 'date_added'], 'plates_file')
    save_index_file(ticket_numbers_file, ticket_numbers,
                    ['ticket_number', 'anon_ticket_number'], 'license_file')

Beispiel #3

0

Datei anzeigen

Datei: test_io.py Projekt: pombredanne/petl

def test_fromtext():

    # initial data
    f = NamedTemporaryFile(delete=False)
    f.write('foo\tbar\n')
    f.write('a\t1\n')
    f.write('b\t2\n')
    f.write('c\t3\n')
    f.close()

    actual = fromtext(f.name)
    expect = (('lines', ), ('foo\tbar', ), ('a\t1', ), ('b\t2', ), ('c\t3', ))
    ieq(expect, actual)
    ieq(expect, actual)  # verify can iterate twice

Beispiel #4

0

Datei anzeigen

Datei: test_io.py Projekt: deytao/petl

def test_fromtext():

    # initial data
    f = NamedTemporaryFile(delete=False)
    f.write("foo\tbar\n")
    f.write("a\t1\n")
    f.write("b\t2\n")
    f.write("c\t3\n")
    f.close()

    actual = fromtext(f.name)
    expect = (("lines",), ("foo\tbar",), ("a\t1",), ("b\t2",), ("c\t3",))
    ieq(expect, actual)
    ieq(expect, actual)  # verify can iterate twice

Beispiel #5

0

Datei anzeigen

Datei: test_io.py Projekt: deytao/petl

def test_fromtext_gz():

    # initial data
    f = NamedTemporaryFile(delete=False)
    f.close()
    fn = f.name + ".gz"
    os.rename(f.name, fn)
    with gzip.open(fn, "wb") as f:
        f.write("foo\tbar\n")
        f.write("a\t1\n")
        f.write("b\t2\n")
        f.write("c\t3\n")

    actual = fromtext(fn)
    expect = (("lines",), ("foo\tbar",), ("a\t1",), ("b\t2",), ("c\t3",))
    ieq(expect, actual)
    ieq(expect, actual)  # verify can iterate twice

Beispiel #6

0

Datei anzeigen

Datei: test_io.py Projekt: greeness/petl

def test_fromtext_gz():

    # initial data
    f = NamedTemporaryFile(delete=False)
    f.close()
    fn = f.name + '.gz'
    os.rename(f.name, fn)
    with gzip.open(fn, 'wb') as f:
        f.write('foo\tbar\n')
        f.write('a\t1\n')
        f.write('b\t2\n')
        f.write('c\t3\n')

    actual = fromtext(fn)
    expect = (('lines', ), ('foo\tbar', ), ('a\t1', ), ('b\t2', ), ('c\t3', ))
    ieq(expect, actual)
    ieq(expect, actual)  # verify can iterate twice

Beispiel #7

0

Datei anzeigen

Datei: test_io.py Projekt: brutimus/petl

def test_fromtext():
    
    # initial data
    f = NamedTemporaryFile(delete=False)
    f.write('foo\tbar\n')
    f.write('a\t1\n')
    f.write('b\t2\n')
    f.write('c\t3\n')
    f.close()
    
    actual = fromtext(f.name)
    expect = (('lines',),
              ('foo\tbar',),
              ('a\t1',),
              ('b\t2',),
              ('c\t3',))
    ieq(expect, actual)
    ieq(expect, actual) # verify can iterate twice

Beispiel #8

0

Datei anzeigen

def extract(file):
    tb = list()
    if file == "sql":
        host = raw_input("Enter Host:")
        user = raw_input("Enter Username:"******"Enter pwd:")
        dtb = raw_input("Enter Database Name:")
        table = raw_input("Enter Table Name:")
        conn = pymysql.connect(host=host,
                               user=user,
                               password=pwd,
                               db=dtb,
                               charset='utf8mb4',
                               cursorclass=pymysql.cursors.DictCursor)
        temp = etl.fromdb(conn, "SELECT * FROM " + table)
        tb = d2l(temp)
    elif ".csv" in file:
        tb = etl.fromcsv(file)
    elif ".xlsx" in file:
        tb = etl.fromxls(file)
    elif ".json" in file:
        tb = etl.fromjson(file)
        print tb
    elif ".xml" in file:
        f = open(file, 'r').read()
        options = optparse.Values({"pretty": True})
        jsn = json.dumps(xml2json.xml2json(f, options))
        ob = json.loads(jsn.decode('string-escape').strip('"'))
        temp = dict()
        for key in ob.keys():
            for skey in ob[key].keys():
                temp = json.dumps(ob[key][skey])
        with open("temp.json", "w") as tmp:
            tmp.write(temp)
        tb = etl.fromjson("temp.json")
        print tb[0]
        #tb = etl.fromxml(file,'.//ROW',{'Service_Name':'Service_Name','Status':'Status','Service_Type':'Service_Type','Time':'Time'})
    elif ".txt" in file:
        tb = etl.fromtext(file)
        print tb
    return tb

Beispiel #9

0

Datei anzeigen

Datei: test_io.py Projekt: greeness/petl

def test_fromtext_gz():
    
    # initial data
    f = NamedTemporaryFile(delete=False)
    f.close()
    fn = f.name + '.gz'
    os.rename(f.name, fn)
    with gzip.open(fn, 'wb') as f:
        f.write('foo\tbar\n')
        f.write('a\t1\n')
        f.write('b\t2\n')
        f.write('c\t3\n')
    
    actual = fromtext(fn)
    expect = (('lines',),
              ('foo\tbar',),
              ('a\t1',),
              ('b\t2',),
              ('c\t3',))
    ieq(expect, actual)
    ieq(expect, actual) # verify can iterate twice

Beispiel #10

0

Datei anzeigen

pickle.dump(a, fileObject)

# here we close the fileObject
fileObject.close()

table3 = etl.frompickle('pickel_file.p')
print('Pick')
print(table3)

###################Reading Text Files#################################

text = 'a,1\nb,2\nc,2\n'
with open('example.txt', 'w') as f:
    f.write(text)

table4 = etl.fromtext('example.txt')
print(table4)

################Reading XML files##################################

table5 = etl.fromxml('data.xml', 'tr', 'td')
print(table5)

################Reading JASON files###############################

data = '''
[{"foo": "a", "bar": 1},
{"foo": "b", "bar": 2},
{"foo": "c", "bar": 2}]
'''
with open('example.json', 'w') as f:

Beispiel #11

0

Datei anzeigen

    h = '#%02x%02x%02x' % tuple(int(255 * c) for c in pop_colors[p])

# chromatin
_data_chromatin = b"""CHX     chro    X       20009764        24393108
CH2R    chro    2R      58984778        61545105
CH2L    chro    2L      1       2431617
PEU2L   chro    2L      2487770 5042389
IH2L    chro    2L      5078962 5788875
IH3R    chro    3R      38988757        41860198
CH3R    chro    3R      52161877        53200684
CH3L    chro    3L      1       1815119
PEU3L   chro    3L      1896830 4235209
IH3L    chro    3L      4264713 5031692
"""
tbl_chromatin = (etl.fromtext(etl.MemorySource(_data_chromatin)).split(
    'lines', '\s+', ['name', 'type', 'chrom', 'start', 'stop']).convert(
        ('start', 'stop'), int).cutout('type'))

# genome regions
region_X_speciation = 'X-speciation', 'X', 15000000, 24000000
region_X_free = 'X-free', 'X', 1, 14000000
region_3L_free = '3L-free', '3L', 15000000, 41000000
region_3R_free = '3R-free', '3R', 1, 37000000


# noinspection PyGlobalUndefined
def init(release_dir, load_geneset=False):
    """Initialise data resources.

    Parameters
    ----------

Beispiel #12

0

Datei anzeigen

Datei: text.py Projekt: DeanWay/petl

from __future__ import division, print_function, absolute_import


# fromtext()
############

import petl as etl
# setup example file
text = 'a,1\nb,2\nc,2\n'
with open('example.txt', 'w') as f:
    f.write(text)

table1 = etl.fromtext('example.txt')
table1
# post-process, e.g., with capture()
table2 = table1.capture('lines', '(.*),(.*)$', ['foo', 'bar'])
table2


# totext()
##########

import petl as etl
table1 = [['foo', 'bar'],
          ['a', 1],
          ['b', 2],
          ['c', 2]]
prologue = '''{| class="wikitable"
|-
! foo
! bar

Beispiel #13

0

Datei anzeigen

Datei: wave.py Projekt: hsinkai/crowa

def load(filename):
    sequence = petl.fromtext(filename).skip(1).setheader(['lines']).split(
        'lines', '\s+').setheader(['lon', 'lat', 'value']).convertall(
            float).select("115 < {lon} < 125 and 20 < {lat} < 28").tuple()
    return sequence[1:]  #  skip header