Python fromcsv Exemples, petl.fromcsv Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : test_tees.py Projet : rogerkwoodley/petl

def test_teecsv_unicode():

    t1 = ((u"name", u"id"), (u"Արամ Խաչատրյան", 1), (u"Johann Strauß", 2), (u"Вагиф Сәмәдоғлу", 3), (u"章子怡", 4))

    f1 = NamedTemporaryFile(delete=False)
    f2 = NamedTemporaryFile(delete=False)

    (etl.wrap(t1).teecsv(f1.name, encoding="utf-8").selectgt("id", 1).tocsv(f2.name, encoding="utf-8"))

    ieq(t1, etl.fromcsv(f1.name, encoding="utf-8").convertnumbers())
    ieq(etl.wrap(t1).selectgt("id", 1), etl.fromcsv(f2.name, encoding="utf-8").convertnumbers())

Exemple #2

0

Afficher le fichier

Fichier : test_tees.py Projet : rogerkwoodley/petl

def test_teecsv():

    t1 = (("foo", "bar"), ("a", 2), ("b", 1), ("c", 3))

    f1 = NamedTemporaryFile(delete=False)
    f2 = NamedTemporaryFile(delete=False)

    (etl.wrap(t1).teecsv(f1.name, encoding="ascii").selectgt("bar", 1).tocsv(f2.name, encoding="ascii"))

    ieq(t1, etl.fromcsv(f1.name, encoding="ascii").convertnumbers())
    ieq(etl.wrap(t1).selectgt("bar", 1), etl.fromcsv(f2.name, encoding="ascii").convertnumbers())

Exemple #3

0

Afficher le fichier

Fichier : test_io.py Projet : deytao/petl

def test_fromcsv_cachetag():
    """Test the cachetag method on tables returned by fromcsv."""

    # initial data
    f = NamedTemporaryFile(delete=False)
    writer = csv.writer(f)
    table = (("foo", "bar"), ("a", 1), ("b", 2), ("c", 2))
    for row in table:
        writer.writerow(row)
    f.close()

    # cachetag with initial data
    tbl = fromcsv(f.name)
    tag1 = tbl.cachetag()

    # make a change
    with open(f.name, "wb") as o:
        writer = csv.writer(o)
        rows = (
            ("foo", "bar"),
            ("d", 3),
            #                ('e', 5),
            ("f", 4),
        )
        for row in rows:
            writer.writerow(row)

    # check cachetag has changed
    tag2 = tbl.cachetag()
    assert tag2 != tag1, (tag2, tag1)

Exemple #4

0

Afficher le fichier

Fichier : test_sources.py Projet : Mgutjahr/petl

def test_stringsource():
    tbl1 = (('foo', 'bar'),
            ('a', '1'),
            ('b', '2'),
            ('c', '2'))

    # test writing to a string buffer
    ss = StringSource()
    etl.tocsv(tbl1, ss)
    expect = "foo,bar\r\na,1\r\nb,2\r\nc,2\r\n"
    if not PY2:
        expect = expect.encode('ascii')
    actual = ss.getvalue()
    eq_(expect, actual)

    # test reading from a string buffer
    tbl2 = etl.fromcsv(StringSource(actual))
    ieq(tbl1, tbl2)
    ieq(tbl1, tbl2)

    # test appending
    etl.appendcsv(tbl1, ss)
    actual = ss.getvalue()
    expect = "foo,bar\r\na,1\r\nb,2\r\nc,2\r\na,1\r\nb,2\r\nc,2\r\n"
    if not PY2:
        expect = expect.encode('ascii')
    eq_(expect, actual)

Exemple #5

0

Afficher le fichier

Fichier : etl.py Projet : kianho/bxrec

def check_bx_book_ratings(fn):
    """Check the consistency of the BX-Book-Ratings.csv file.

    """

    tab = etl.fromcsv(fn, delimiter=DELIMITER,
            quoting=QUOTE_ALL, encoding=ENCODING)

    v = Validator({
            "User-ID" : {
                "type" : "string",
                "required" : True
            },
            "ISBN" : {
                "type" : "string",
                "required" : True
            },
            "Book-Rating" : {
                "type" : "string",
                "validator" : validate_rating,
                "required" : True
            }
        })

    for row_num, r in enumerate(tab.dicts(), 1):
        is_valid = v.validate(r)
        if not is_valid:
            print "row %d -> %r, %r" % (row_num, v.errors, r)

    return

Exemple #6

0

Afficher le fichier

Fichier : test_io.py Projet : brutimus/petl

def test_fromcsv_gz():
    """Test the fromcsv function on a gzipped file."""
    
    f = NamedTemporaryFile(delete=False)
    f.close()
    fn = f.name + '.gz'
    os.rename(f.name, fn)

    fz = gzip.open(fn, 'wb')
    writer = csv.writer(fz, delimiter='\t')
    table = (('foo', 'bar'),
             ('a', 1),
             ('b', 2),
             ('c', 2))
    for row in table:
        writer.writerow(row)
    fz.close()
    
    actual = fromcsv(fn, delimiter='\t')
    expect = (('foo', 'bar'),
              ('a', '1'),
              ('b', '2'),
              ('c', '2'))
    ieq(expect, actual)
    ieq(expect, actual) # verify can iterate twice

Exemple #7

0

Afficher le fichier

Fichier : test_io.py Projet : greeness/petl

def test_fromcsv_cachetag_strict():
    """Test the cachetag method on tables returned by fromcsv."""
    
    # initial data
    f = NamedTemporaryFile(delete=False)
    writer = csv.writer(f)
    table = (('foo', 'bar'),
             ('a', 1),
             ('b', 2),
             ('c', 2))
    for row in table:
        writer.writerow(row)
    f.close()

    # cachetag with initial data
    tbl = fromcsv(FileSource(f.name, checksumfun=adler32sum))
    tag1 = tbl.cachetag()
    
    # make a change, preserving file size
    with open(f.name, 'wb') as o:
        writer = csv.writer(o)
        rows = (('foo', 'bar'),
                ('d', 3),
                ('e', 5),
                ('f', 4))
        for row in rows:
            writer.writerow(row)

    # check cachetag has changed
    tag2 = tbl.cachetag()
    assert tag2 != tag1, (tag2, tag1)

Exemple #8

0

Afficher le fichier

Fichier : etl.py Projet : kianho/bxrec

def check_bx_users(fn):
    """Check the consistentcy of the BX-Users.csv file.

    """

    tab = etl.fromcsv(fn, delimiter=DELIMITER,
            quoting=QUOTE_ALL, encoding=ENCODING)

    v = Validator({
            "User-ID" : {
                "type" : "string",
                "regex" : USER_ID_PAT,
                "required" : True
            },
            "Location" : {
                "type" : "string",
                "required" : True
            },
            "Age" : {
                "type" : "string",
                "validator" : validate_age,
                "required" : True
            }
        })

    for row_num, r in enumerate(tab.dicts(), 1):
        is_valid = v.validate(r)
        if not is_valid:
            print "row %d -> %r, %r" % (row_num, v.errors, r)

    return

Exemple #9

0

Afficher le fichier

Fichier : test_sources.py Projet : Mgutjahr/petl

def test_popensource():

    expect = (('foo', 'bar'),)
    delimiter = ' '
    actual = etl.fromcsv(PopenSource(r'echo foo bar',
                                     shell=True),
                         delimiter=delimiter)
    ieq(expect, actual)

Exemple #10

0

Afficher le fichier

Fichier : test_tees.py Projet : rogerkwoodley/petl

def test_teecsv_write_header():

    t1 = (("foo", "bar"), ("a", "2"), ("b", "1"), ("c", "3"))

    f1 = NamedTemporaryFile(delete=False)
    f2 = NamedTemporaryFile(delete=False)

    (
        etl.wrap(t1)
        .convertnumbers()
        .teecsv(f1.name, write_header=False, encoding="ascii")
        .selectgt("bar", 1)
        .tocsv(f2.name, encoding="ascii")
    )

    ieq(t1[1:], etl.fromcsv(f1.name, encoding="ascii"))
    ieq(etl.wrap(t1).convertnumbers().selectgt("bar", 1), etl.fromcsv(f2.name, encoding="ascii").convertnumbers())

Exemple #11

0

Afficher le fichier

Fichier : methods_raw_input.py Projet : Riccocez/Twitter-Sentiment-Classifier

def really_read_filelines(filename, p_train_data, split_mode):
    
    #Load the table
    csvfile = fromcsv(filename)

    train_data, test_data = split_dataset(csvfile, p_train_data, split_mode)

    return train_data, test_data

Exemple #12

0

Afficher le fichier

Fichier : petl_util.py Projet : Utah-Data-Science/dimensional

def from_csv(source=None, encoding=None, errors='strict', description=None,
        **csvargs):
    table = etl.fromcsv(source, encoding, errors, **csvargs)
    dataset = Dataset()
    dataset.header = table.header
    dataset.rows = table.records
    # FIXME: configure name, etc.
    return dataset

Exemple #13

0

Afficher le fichier

Fichier : etl.py Projet : kianho/bxrec

def check_bx_books(fn):
    """Check the consistency of the BX-Books.csv file.

    """

    tab = etl.fromcsv(fn, delimiter=DELIMITER,
            quoting=QUOTE_ALL, encoding=ENCODING)

    v = Validator({
            "ISBN" : {
                "type" : "string",
                "validator" : validate_not_null_str,
                "required" : True,
            },
            "Book-Title" : {
                "type" : "string",
                "validator" : validate_not_null_str,
                "required" : True
            },
            "Book-Author" : {
                "type" : "string",
                "validator" : validate_not_null_str,
                "required" : True
            },
            "Year-Of-Publication" : {
                "type" : "string",
                "regex" : YEAR_PAT,
                "required" : True
            },
            "Publisher" : {
                "type" : "string",
                "validator" : validate_not_null_str,
                "required" : True
            },
            "Image-URL-S" : {
                "type" : "string",
                "regex" : URL_PAT,
                "required" : True
            },
            "Image-URL-M" : {
                "type" : "string",
                "regex" : URL_PAT,
                "required" : True
            },
            "Image-URL-L" : {
                "type" : "string",
                "regex" : URL_PAT,
                "required" : True
            }
        })

    for row_num, r in enumerate(tab.dicts(), 1):
        is_valid = v.validate(r)
        if not is_valid:
            print "row %d -> %r, %r" % (row_num, v.errors, r)

    return

Exemple #14

0

Afficher le fichier

Fichier : datatools.py Projet : hexatonics/tennis-dev

def create_config(csvfile,config_name):
    '''
        Creates a configuration file from a CSV file
    '''
    print csvfile
    var = ''
    try: 
        open(config_name+".ini")
        var = raw_input("This file already exists. Do you wish to continue? (Yes/No) ")
    except:
        pass
        

    if var == 'Yes':
        cfgfile = open(config_name+".ini", "w")
        examplefile = open(config_name+".example", "w")
    else:
        print "goodbye"
        sys.exit()
        

    c = fromcsv(csvfile)
    columns = header(c)
    it = iterdata(c)
    print it.next()
    examplefile.write(str(see(rowslice(c,2,3))))
    examplefile.close()


    # add the settings to the structure of the file, and lets write it out...
    Config = ConfigParser.ConfigParser()
    # dont' change  names to lower case
    Config.optionxform = str
    Config.add_section('FieldTypes')
    Config.add_section('FieldMap')
    for name in columns:
        #Config.set('FieldTypes',c)
        #print name
        new = name
        new = new.split("(", 1)[0].strip()
        # Connect words with underscore
        new = new.replace("/","_")
        new = new.replace(" ","_")
        new = new.replace("-","_")
        new = new.lower()
        # try to guess some of the names
        if "amount" in name: 
            print name
            Config.set('FieldMap',name, new + " FLOAT")
        else:
            print name
            Config.set('FieldMap',name, new + " VARCHAR(10)")
        
    Config.write(cfgfile)
    cfgfile.close()

Exemple #15

0

Afficher le fichier

Fichier : test_sources.py Projet : DeanWay/petl

def test_memorysource_2():

    data = 'foo,bar\r\na,1\r\nb,2\r\nc,2\r\n'
    if not PY2:
        data = data.encode('ascii')
    actual = etl.fromcsv(MemorySource(data))
    expect = (('foo', 'bar'),
              ('a', '1'),
              ('b', '2'),
              ('c', '2'))
    ieq(expect, actual)
    ieq(expect, actual)

Exemple #16

0

Afficher le fichier

Fichier : disney_merge_datasets.py Projet : brutimus/parks_scraper

def main():
    hours = load_pickles('disneyland-hours.pickle')
    events = load_pickles('disneyland-events.pickle')
    passes = load_pickles('disneyland-passes.pickle')
    forecast = get_forecast()
    spreadsheet = petl.fromcsv(
        'https://spreadsheets.google.com/tq?key=%s&gid=0&tqx=out:csv' % 
            DISNEY_SPREADSHEET_KEY
    )

    events_lookup = {}
    for item in events:
        buff = events_lookup.get(item['date'].date(), {})
        buff[item['park']] = item
        buff['date'] = item['date'].date()
        events_lookup[item['date'].date()] = buff

    for item in spreadsheet.dicts():
        # print item
        sheet_date = du_parse(item['date']).date()
        if events_lookup.has_key(sheet_date):
            e = events_lookup[sheet_date]
            e['disneyland']['crowd_level'] = item['disneyland_crowd_level']
            e['disneyland']['closures'] = [x for x in map(
                unicode.strip,
                item['disneyland_closures'].split(',')) if x]
            e['disney-california-adventure']['crowd_level'] = \
                item['california_adventure_crowd_level']
            e['disney-california-adventure']['closures'] = \
                [x for x in map(
                    unicode.strip,
                    item['california_adventure_closures'].split(',')) if x]

    for item in hours:
        if events_lookup.has_key(item['date'].date()):
            events_lookup[item['date'].date()][item['park']]['hours'] = item

    for item in passes:
        # print item
        if events_lookup.has_key(item['date']):
            events_lookup[item['date']]['passes'] = item

    for date, item in forecast.items():
        if events_lookup.has_key(date.date()):
            events_lookup[date.date()]['forecast'] = item

    f = StringIO()
    from pprint import pprint
    pprint(events_lookup)
    pickle.dump(sorted(events_lookup.values(), key=lambda x:x['date']), f)
    s3_save(f, 'disneyland-merged_data.pickle')
    f.seek(0)
    s3_save(f, 'disney-california-adventure-merged_data.pickle')

Exemple #17

0

Afficher le fichier

Fichier : web_crawler.py Projet : Why-Not-Sky/hunting

    def _load_db(self, taiwan_date_str=None, is_delete=False):
        if not (taiwan_date_str is None): self.set_trade_date(taiwan_date_str)
        raw_file = self.source_file
        tse = etl.fromcsv(raw_file)

        connection = self._get_connection() #psycopg2.connect(_CONNECTION)
        if is_delete: self.clean_db(self._trade_date)

        # assuming table "quotes" already exists in the database, and tse need to have the header.
        # petl.io.db.todb(table, dbo, tablename, schema=None, commit=True, create=False, drop=False, constraints=True,
        #                metadata=None, dialect=None, sample=1000)[source]
        etl.todb(tse, connection, 'quotes', drop=False) #, truncate=False)

Exemple #18

0

Afficher le fichier

Fichier : load_groups_into_ccb.py Projet : Andy-Fraley/sk2ccb

def main(argv):
    parser = argparse.ArgumentParser(description='Dumps CSV-formatted ID-related info for every individual profile ' \
        'in CCB.  Pipe into a file to create CSV file.')
    parser.add_argument("--individuals-filename", required=True, help="Input UTF8 CSV with individuals data "
        "dumped from Servant Keeper")
    parser.add_argument('--trace', action='store_true', help="If specified, prints tracing/progress messages to "
        "stdout")
    args = parser.parse_args()

    assert os.path.isfile(args.individuals_filename), "Error: cannot open file '" + args.individuals_filename + "'"

    trace('RETRIEVING INDIVIDUALS XML FROM CCB...', args.trace, banner=True)
    tmp_filename = http_get2tmp_file('https://ingomar.ccbchurch.com/api.php?srv=individual_profiles',
        settings.ccbapi.username, settings.ccbapi.password)
    xml_tree = ET.parse(tmp_filename)
    xml_root = xml_tree.getroot()

    trace('WALKING XML TO CREATE SK2CCB ID MAP DICTIONARY...', args.trace, banner=True)
    sk2ccb_id_map_dict = xml2id_dict(xml_root)
    os.remove(tmp_filename)

    trace('WALKING SK DATA TO IDENTIFY GROUPS TO SET ON PER-INDIVIDUAL BASIS...', args.trace, banner=True)
    table = petl.fromcsv(args.individuals_filename)
    sk_indiv_id2groups = gather_semi_sep_by_indiv_id(table, {'Mailing Lists': ['Home Touch', 'Rummage Sale'],
        'Activities': ['Veteran', 'Celebration Singers', 'Wesleyan Choir', 'Praise Team']})
    sk_indiv_id2name = gather_name_by_indiv_id(table)

    trace('RETRIEVING GROUPS XML FROM CCB...', args.trace, banner=True)
    tmp_filename = http_get2tmp_file('https://ingomar.ccbchurch.com/api.php?srv=group_profiles',
        settings.ccbapi.username, settings.ccbapi.password)
    xml_tree = ET.parse(tmp_filename)
    xml_root = xml_tree.getroot()

    group_id_map_dict = xml2group_id_dict(xml_root)
    os.remove(tmp_filename)

    for sk_indiv_id in sk_indiv_id2groups:
        if sk_indiv_id in sk2ccb_id_map_dict:
            for group_name in sk_indiv_id2groups[sk_indiv_id]:
                if not group_name in group_id_map_dict:
                    print "*** Cannot find CCB group name '" + group_name + "' in CCB account."
                print "Adding " + sk_indiv_id2name[sk_indiv_id] + " (Individual ID = " + \
                    sk2ccb_id_map_dict[sk_indiv_id] + ") to group '" + group_name + "' (Group ID = " + \
                    group_id_map_dict[group_name] + ")."
                add_person_to_group(sk2ccb_id_map_dict[sk_indiv_id], group_id_map_dict[group_name])
        else:
            groups_trying_to_add = ', '.join(sk_indiv_id2groups[sk_indiv_id])
            print "*** No SK->CCB ID map for '" + str(sk_indiv_id) + "' (" + sk_indiv_id2name[sk_indiv_id] + "), " + \
                "so person not added to " + groups_trying_to_add + "."

    sys.stdout.flush()
    sys.stderr.flush()

Exemple #19

0

Afficher le fichier

Fichier : test_sources.py Projet : Mgutjahr/petl

def test_gzipsource():

    # setup
    tbl = [('foo', 'bar'), ('a', '1'), ('b', '2')]
    fn = NamedTemporaryFile().name + '.gz'
    expect = b"foo,bar\na,1\nb,2\n"

    # write explicit
    etl.tocsv(tbl, GzipSource(fn), lineterminator='\n')
    actual = gzip.open(fn).read()
    eq_(expect, actual)
    # write implicit
    etl.tocsv(tbl, fn, lineterminator='\n')
    actual = gzip.open(fn).read()
    eq_(expect, actual)

    # read explicit
    tbl2 = etl.fromcsv(GzipSource(fn))
    ieq(tbl, tbl2)
    # read implicit
    tbl2 = etl.fromcsv(fn)
    ieq(tbl, tbl2)

Exemple #20

0

Afficher le fichier

Fichier : iati_registry.py Projet : hexatonics/tennis-dev

def xmltodict(xmlFile):
    parser = etree.XMLParser(remove_blank_text=True)
    with open(xmlFile) as f:
        xml = f.read()
    root = objectify.fromstring(xml)
    print type(root)
    d = dict((e.tag, e.text) for e in root['iati-activity'].iterchildren())
    pprint(d)
    #Have a look at the format generated with CSV tool
    with open(datadir+"iati_transformations/iati_download_20131107_simple_aa-activity.csv") as f:
        k = fromcsv(f.read())
        
    sys.exit()

Exemple #21

0

Afficher le fichier

Fichier : test_io.py Projet : brutimus/petl

def test_issue_231():

    table = [['foo', 'bar'], ['a', '1'], ['b', '2']]
    t = cut(table, 'foo')
    totsv(t, 'tmp/issue_231.tsv')
    u = fromtsv('tmp/issue_231.tsv')
    ieq(t, u)
    tocsv(t, 'tmp/issue_231.csv')
    u = fromcsv('tmp/issue_231.csv')
    ieq(t, u)
    topickle(t, 'tmp/issue_231.pickle')
    u = frompickle('tmp/issue_231.pickle')
    ieq(t, u)

Exemple #22

0

Afficher le fichier

Fichier : test_io.py Projet : deytao/petl

def test_fromcsv():
    """Test the fromcsv function."""

    f = NamedTemporaryFile(delete=False)
    writer = csv.writer(f, delimiter="\t")
    table = (("foo", "bar"), ("a", 1), ("b", 2), ("c", 2))
    for row in table:
        writer.writerow(row)
    f.close()

    actual = fromcsv(f.name, delimiter="\t")
    expect = (("foo", "bar"), ("a", "1"), ("b", "2"), ("c", "2"))
    ieq(expect, actual)
    ieq(expect, actual)  # verify can iterate twice

Exemple #23

0

Afficher le fichier

Fichier : test_fluent.py Projet : alimanfoo/petl

def test_staticmethods():
    
    data = [b'foo,bar',
            b'a,1',
            b'b,2',
            b'c,2']
    f = NamedTemporaryFile(mode='wb', delete=False)
    f.write(b'\n'.join(data))
    f.close()

    expect = (('foo', 'bar'),
              ('a', '1'),
              ('b', '2'),
              ('c', '2'))
    actual = etl.fromcsv(f.name, encoding='ascii')
    ieq(expect, actual)
    ieq(expect, actual)  # verify can iterate twice

Exemple #24

0

Afficher le fichier

Fichier : main.py Projet : ceumicrodata/pir_search

def main(argv, version, org_data_path='data'):
    args = parse_args(argv, version)
    input_fields = InputFields.from_args(args)
    output_fields = OutputFields.from_args(args)
    input = petl.fromcsv(args.input_csv, encoding='utf-8', errors='strict')
    parser = Parser()
    parser.read_csv('data/settlements.csv', report_conflicts=False)

    matches = find_matches(
        input, input_fields, output_fields,
        index_data=org_data_path, parse=parser.parse,
        extramatches=args.extramatches,
        differentiating_ambiguity=args.differentiating_ambiguity)

    if args.progress:
        matches = matches.progress()

    matches.tocsv(args.output_csv, encoding='utf-8')

Exemple #25

0

Afficher le fichier

Fichier : test_io.py Projet : deytao/petl

def test_fromcsv_gz():
    """Test the fromcsv function on a gzipped file."""

    f = NamedTemporaryFile(delete=False)
    f.close()
    fn = f.name + ".gz"
    os.rename(f.name, fn)

    fz = gzip.open(fn, "wb")
    writer = csv.writer(fz, delimiter="\t")
    table = (("foo", "bar"), ("a", 1), ("b", 2), ("c", 2))
    for row in table:
        writer.writerow(row)
    fz.close()

    actual = fromcsv(fn, delimiter="\t")
    expect = (("foo", "bar"), ("a", "1"), ("b", "2"), ("c", "2"))
    ieq(expect, actual)
    ieq(expect, actual)  # verify can iterate twice

Exemple #26

0

Afficher le fichier

Fichier : sk_pledges2ccb.py Projet : Andy-Fraley/sk2ccb

def main(argv):

    parser = argparse.ArgumentParser()
    parser.add_argument("--pledges-filename", required=True, help="Input UTF8 CSV with pledges data "
        "dumped from Servant Keeper")
    parser.add_argument("--output-filename", required=True, help="Output CSV filename which will be loaded with "
        "pledges data in CCB import format ")
    parser.add_argument('--trace', action='store_true', help="If specified, prints tracing/progress messages to "
        "stdout")
    args = parser.parse_args()

    assert os.path.isfile(args.pledges_filename), "Error: cannot open file '" + args.pledges_filename + "'"

    table = petl.fromcsv(args.pledges_filename)

    table = petl.rename(table, {
        'Frequency': 'SK Frequency',
        'Individual ID': 'SK Individual ID'
        })

    table = petl.addfield(table, 'Individual ID', lambda rec: rec['SK Individual ID'])
    table = petl.addfield(table, 'Campus', '')
    table = petl.addfield(table, 'Category Pledged To', lambda rec: rec['Account'])
    table = petl.addfield(table, 'Amount Pledged', convert_amount)
    table = petl.addfield(table, 'Total Amount Pledged', lambda rec: rec['Pledged'])
    table = petl.addfield(table, 'Frequency', lambda rec: {1:'Yearly', 4:'Yearly', 12:'Monthly',
        52:'Weekly', 24:'Monthly', 2:'Yearly'}[int(rec['Payments'])])
    table = petl.addfield(table, 'Number of Gifts', lambda rec: {'Yearly':1, 'Monthly':12,
        'Weekly':52}[rec['Frequency']])
    table = petl.addfield(table, 'Length Multiplier', lambda rec: {'Yearly':'Years', 'Monthly':'Months',
        'Weekly':'Weeks'}[rec['Frequency']])
    table = petl.addfield(table, 'Start Date', lambda rec: {'Operating Income':'2013-01-01',
        'Mortgage Principal':'2013-01-01', 'Operating Income 2015':'2015-01-01'}[rec['Account']])
    table = petl.addfield(table, 'End Date', lambda rec: {'Operating Income':'2013-12-31',
        'Mortgage Principal':'2013-12-31', 'Operating Income 2015':'2015-12-31'}[rec['Account']])

    trace('CONVERTING AND THEN EMITTING TO CSV FILE...', args.trace, banner=True)

    table.progress(200).tocsv(args.output_filename)

    trace('OUTPUT TO CSV COMPLETE.', args.trace, banner=True)

    trace('DONE!', args.trace, banner=True)

Exemple #27

0

Afficher le fichier

Fichier : test_tees.py Projet : rogerkwoodley/petl

def test_teetext():

    t1 = (("foo", "bar"), ("a", 2), ("b", 1), ("c", 3))

    f1 = NamedTemporaryFile(delete=False)
    f2 = NamedTemporaryFile(delete=False)

    prologue = "foo,bar\n"
    template = "{foo},{bar}\n"
    epilogue = "d,4"
    (
        etl.wrap(t1)
        .teetext(f1.name, template=template, prologue=prologue, epilogue=epilogue)
        .selectgt("bar", 1)
        .topickle(f2.name)
    )

    ieq(t1 + (("d", 4),), etl.fromcsv(f1.name).convertnumbers())
    ieq(etl.wrap(t1).selectgt("bar", 1), etl.frompickle(f2.name))

Exemple #28

0

Afficher le fichier

Fichier : test_tees.py Projet : rogerkwoodley/petl

def test_teetext_unicode():

    t1 = ((u"foo", u"bar"), (u"Արամ Խաչատրյան", 2), (u"Johann Strauß", 1), (u"Вагиф Сәмәдоғлу", 3))

    f1 = NamedTemporaryFile(delete=False)
    f2 = NamedTemporaryFile(delete=False)

    prologue = u"foo,bar\n"
    template = u"{foo},{bar}\n"
    epilogue = u"章子怡,4"
    (
        etl.wrap(t1)
        .teetext(f1.name, template=template, prologue=prologue, epilogue=epilogue, encoding="utf-8")
        .selectgt("bar", 1)
        .topickle(f2.name)
    )

    ieq(t1 + ((u"章子怡", 4),), etl.fromcsv(f1.name, encoding="utf-8").convertnumbers())
    ieq(etl.wrap(t1).selectgt("bar", 1), etl.frompickle(f2.name))

Exemple #29

0

Afficher le fichier

Fichier : test_io.py Projet : brutimus/petl

def test_fromcsv():
    """Test the fromcsv function."""
    
    f = NamedTemporaryFile(delete=False)
    writer = csv.writer(f, delimiter='\t')
    table = (('foo', 'bar'),
             ('a', 1),
             ('b', 2),
             ('c', 2))
    for row in table:
        writer.writerow(row)
    f.close()
    
    actual = fromcsv(f.name, delimiter='\t')
    expect = (('foo', 'bar'),
              ('a', '1'),
              ('b', '2'),
              ('c', '2'))
    ieq(expect, actual)
    ieq(expect, actual) # verify can iterate twice

Exemple #30

0

Afficher le fichier

Fichier : ccb_validate.py Projet : Andy-Fraley/sk2ccb

def main(argv):

    parser = argparse.ArgumentParser()
    parser.add_argument("--input-ccb-csv-filename", required=True, help="Input CCB CSV loading file to validate")
    parser.add_argument("--output-validation-csv-filename", required=True, help="Output CSV file that'll be created " \
        "with validation results")
    args = parser.parse_args()

    table = petl.fromcsv(args.input_ccb_csv_filename)

    constraints = [
        {'name': 'max_len_20', 'field':'Legal Name', 'assertion':max_len(20)},
        {'name': 'max_len_30', 'field':'How They Heard', 'assertion':max_len(30)},
        {'name': 'max_len_20', 'field':'Last Name', 'assertion':max_len(20)},
        {'name': 'max_len_100', 'field':'Mailbox Number', 'assertion':max_len(100)},
        {'name': 'max_len_20', 'field':'Middle Name', 'assertion':max_len(20)},
        {'name': 'max_len_30', 'field':'Job Title', 'assertion':max_len(30)},
        {'name': 'max_len_20', 'field':'First Name', 'assertion':max_len(20)},
        {'name': 'max_len_30', 'field':'School', 'assertion':max_len(30)},
        {'name': 'max_len_semisep_30', 'field':'Abilities/Skills', 'assertion':max_len_semisep(30)},
        {'name': 'max_len_semisep_30', 'field':'Spiritual Gifts', 'assertion':max_len_semisep(30)},
        {'name': 'max_len_semisep_30', 'field':'Passions', 'assertion':max_len_semisep(30)},
        {'name': 'max_len_100', 'field':'Transferred Frm', 'assertion':max_len(100)},
        {'name': 'max_len_100', 'field':'Transferred To', 'assertion':max_len(100)},
        {'name': 'max_len_30', 'field':'How They Joined', 'assertion':max_len(30)},
        {'name': 'max_len_30', 'field':'Membership Type', 'assertion':max_len(30)},
        {'name': 'max_len_30', 'field':'Reason Left Church', 'assertion':max_len(30)},
        {'name': 'max_len_100', 'field':'Pastr When Join', 'assertion':max_len(100)},
        {'name': 'max_len_100', 'field':'Pastr When Leav', 'assertion':max_len(100)}
    ]

    validation_table = petl.validate(table, constraints=constraints)
    validation_table.progress(200).tocsv(args.output_validation_csv_filename)
    print "See output file '" + args.output_validation_csv_filename + "' for results"

    # Flush to ensure all output is written
    sys.stdout.flush()
    sys.stderr.flush()

Exemple #31

0

Afficher le fichier

import petl as etl
import csv

# importing the data from csv file and converting the type of data.
table1 = (etl.fromcsv('owid-covid-data.csv').convert(
    'iso_code', 'upper').convert('location', str).convert('date', str).convert(
        'total_cases', int).convert('new_cases', int).convert(
            'total_deaths', int).convert('new_deaths', int).convert(
                'total_cases_per_million',
                float).convert('new_cases_per_million', float).convert(
                    'total_deaths_per_million',
                    float).convert('new_deaths_per_million', float).convert(
                        'total_tests', int).convert('new_tests', int).convert(
                            'total_tests_per_thousand',
                            float).convert('new_tests_per_thousand',
                                           float).convert('tests_units', str))

# declaring a list of header that will be needed to make heading of the table
# other value will be appended from the for loop below
data1 = [[
    'iso_code', 'location', 'date', 'total_cases', 'new_cases', 'total_deaths',
    'new_deaths', 'total_cases_per_million', 'new_cases_per_million',
    'total_deaths_per_million', 'new_deaths_per_million', 'total_tests',
    'new_tests', 'total_tests_per_thousand', 'new_tests_per_thousand',
    'tests_units'
]]

# cut function is used to cut out the column given in the bracket below from the table
# cut function is not compulsory for table1 because the value given below are the total field that are present in table1
data = etl.cut(table1, 'iso_code', 'location', 'date', 'total_cases',
               'new_cases', 'total_deaths', 'new_deaths',

Exemple #32

0

Afficher le fichier

def home():
    data_table = petl.fromcsv(SERVICES_FILE)
    data = [("Business & Trade", 1), ("Life Events", 2),
            ("Legal Information & Services", 3), ("Employment & Jobs", 4),
            ("Health & Wellbeing", 5)]
    return bottle.template('office_map', data=data)

Exemple #33

0

Afficher le fichier

Fichier : s2_aggregate_results.py Projet : cm2c-internet-measurements/resources-not-visible

def Average(lst):
    return sum(lst) / len(lst)


if __name__ == "__main__":
    try:
        DATE = sys.argv[1]
    except:
        print("ERROR! Use con s2_aggregate DATE")
        sys.exit(1)
    #
    print("Algunos agregados de los resultados, para fecha: ", DATE)

    t1 = (etl.fromcsv(f"var/s1_invisible_prefixes-{DATE}.csv",
                      delimiter="|").convert('visible', int).convert(
                          'dark', int).convert('total', int))
    #print(t1.look())

    print("Algunos agregados de los resultados, para fecha: ", DATE)
    print("Total de espacio de todo el pool",
          sum([x[4] for x in list(t1)][1:]))

    print(" - ")
    # numero de asignaciones completamente invisibles
    n1 = etl.select(t1, lambda r: r['dark'] == r['total'])
    print("numero de asignaciones completamente invisibles", n1.nrows())
    print("total de ips en asignaciones completamente invisibles",
          sum([x[4] for x in list(n1)][1:]))
    print("tamano  promedio de asignaciones completamente invisibles",
          Average([x[4] for x in list(n1)][1:]))

Exemple #34

0

Afficher le fichier

    """Return a list of valid NUTS codes."""

    with open(GEOCODES_FILE) as stream:
        lines = csv.DictReader(stream)
        geocodes = []
        for i, line in enumerate(lines):
            # The first line has an empty NUTS-code
            if i > 0:
                geocode = line['NUTS-Code']
                geocodes.append(geocode)

    logging.debug('Loaded %d NUTS geocodes', len(geocodes))
    return tuple(geocodes)


GEOCODES = list(dicts(fromcsv(GEOCODES_FILE)))


def get_all_codelists():
    """Return all codelists as a dictionary of dictionaries."""

    codelists = {}

    for codelist_file in os.listdir(CODELISTS_DIR):
        codelist_name, _ = os.path.splitext(codelist_file)
        codelist = get_codelist(codelist_name)
        codelists.update({codelist_name: codelist})

    return codelists

Exemple #35

0

Afficher le fichier

Fichier : gp.py Projet : sourcery-ai-bot/peak-flow-calculator

def build_cn_raster(landcover_raster,
                    lookup_csv,
                    soils_polygon,
                    soils_hydrogroup_field="SOIL_HYDRO",
                    reference_raster=None,
                    out_cn_raster=None):
    """Build a curve number raster from landcover raster, soils polygon, and a crosswalk between 
    landcover classes, soil hydro groups, and curve numbers.

    :param lookup_csv: [description]
    :type lookup_csv: [type]
    :param landcover_raster: [description]
    :type landcover_raster: [type]
    :param soils_polygon: polygon containing soils with a hydro classification. 
    :type soils_polygon: [type]
    :param soils_hydrogroup_field: [description], defaults to "SOIL_HYDRO" (from the NCRS soils dataset)
    :type soils_hydrogroup_field: str, optional
    :param out_cn_raster: [description]
    :type out_cn_raster: [type]    
    """

    # GP Environment ----------------------------
    msg("Setting up GP Environment...")
    # if reference_raster is provided, we use it to set the GP environment for
    # subsequent raster operations
    if reference_raster:
        if not isinstance(reference_raster, Raster):
            # read in the reference raster as a Raster object.
            reference_raster = Raster(reference_raster)
    else:
        reference_raster = Raster(landcover_raster)

    # set the snap raster, cell size, and extent, and coordinate system for subsequent operations
    env.snapRaster = reference_raster
    env.cellSize = reference_raster.meanCellWidth
    env.extent = reference_raster
    env.outputCoordinateSystem = reference_raster

    cs = env.outputCoordinateSystem.exportToString()

    # SOILS -------------------------------------

    msg("Processing Soils...")
    # read the soils polygon into a raster, get list(set()) of all cell values from the landcover raster
    soils_raster_path = so("soils_raster")
    PolygonToRaster_conversion(soils_polygon, soils_hydrogroup_field,
                               soils_raster_path, "CELL_CENTER")
    soils_raster = Raster(soils_raster_path)

    # use the raster attribute table to build a lookup of raster values to soil hydro codes
    # from the polygon (that were stored in the raster attribute table after conversion)
    if not soils_raster.hasRAT:
        msg("Soils raster does not have an attribute table. Building...",
            "warning")
        BuildRasterAttributeTable_management(soils_raster, "Overwrite")
    # build a 2D array from the RAT
    fields = ["Value", soils_hydrogroup_field]
    rows = [fields]
    # soils_raster_table = MakeTableView_management(soils_raster_path)
    with SearchCursor(soils_raster_path, fields) as sc:
        for row in sc:
            rows.append([row[0], row[1]])
    # turn that into a dictionary, where the key==soil hydro text and value==the raster cell value
    lookup_from_soils = {v: k for k, v in etl.records(rows)}
    # also capture a list of just the values, used to iterate conditionals later
    soil_values = [v['Value'] for v in etl.records(rows)]

    # LANDCOVER ---------------------------------
    msg("Processing Landcover...")
    if not isinstance(landcover_raster, Raster):
        # read in the reference raster as a Raster object.
        landcover_raster_obj = Raster(landcover_raster)
    landcover_values = []
    with SearchCursor(landcover_raster, ["Value"]) as sc:
        for row in sc:
            landcover_values.append(row[0])

    # LOOKUP TABLE ------------------------------
    msg("Processing Lookup Table...")
    # read the lookup csv, clean it up, and use the lookups from above to limit it to just
    # those values in the rasters
    t = (etl.fromcsv(lookup_csv).convert('utc', int).convert('cn', int).select(
        'soil', lambda v: v in lookup_from_soils).convert(
            'soil', lookup_from_soils).select('utc',
                                              lambda v: v in landcover_values))

    # This gets us a table where we the landcover class (as a number) corresponding to the
    # correct value in the converted soil raster, with the corresponding curve number.

    # DETERMINE CURVE NUMBERS -------------------
    msg("Assigning Curve Numbers...")
    # Use that to reassign cell values using conditional map algebra operations
    cn_rasters = []
    for rec in etl.records(t):
        cn_raster_component = Con(
            (landcover_raster_obj == rec.utc) & (soils_raster == rec.soil),
            rec.cn, 0)
        cn_rasters.append(cn_raster_component)

    cn_raster = CellStatistics(cn_rasters, "MAXIMUM")

    # REPROJECT THE RESULTS -------------------
    msg("Reprojecting and saving the results....")
    if not out_cn_raster:
        out_cn_raster = so("cn_raster", "random", "in_memory")

    ProjectRaster_management(in_raster=cn_raster,
                             out_raster=out_cn_raster,
                             out_coor_system=cs,
                             resampling_type="NEAREST",
                             cell_size=env.cellSize)

    # cn_raster.save(out_cn_raster)
    return out_cn_raster

Exemple #36

0

Afficher le fichier

look(table)
# a table and field name can also be provided as arguments
look(table1)
table2 = unflatten(table1, 'lines', 3)
look(table2)

# tocsv

table = [['foo', 'bar'], ['a', 1], ['b', 2], ['c', 2]]

from petl import tocsv, look
look(table)
tocsv(table, 'test.csv', delimiter='\t')
# look what it did
from petl import fromcsv
look(fromcsv('test.csv', delimiter='\t'))

# appendcsv

table = [['foo', 'bar'], ['d', 7], ['e', 42], ['f', 12]]

# look at an existing CSV file
from petl import look, fromcsv
testcsv = fromcsv('test.csv', delimiter='\t')
look(testcsv)
# append some data
look(table)
from petl import appendcsv
appendcsv(table, 'test.csv', delimiter='\t')
# look what it did
look(testcsv)

Exemple #37

0

Afficher le fichier

Fichier : utilities.py Projet : christopher-christofi/football-team-NLP-tweet-analysis

    def extract_staging_csv(dataframe):
        """Nested function to load processed stage csv data to table data frame"""

        table = etl.fromcsv(dataframe, encoding='utf8')

        return table

Exemple #38

0

Afficher le fichier

def main(argv):

    parser = argparse.ArgumentParser()
    parser.add_argument("--input-csv-filename",
                        required=True,
                        help="Input UTF8 CSV to summarize")
    parser.add_argument(
        "--sep-columns",
        required=False,
        nargs='*',
        default=argparse.SUPPRESS,
        help=
        "Column names of columns containing comma- or semi-colon-separated values"
    )
    parser.add_argument("--sep-character", required=False, help="Character used to separate values in multi-value " \
        "fields.  Defaults to ';' if not specified.")
    parser.add_argument("--skip-columns",
                        required=False,
                        nargs='*',
                        default=argparse.SUPPRESS,
                        help="Column names to NOT generate stats for")
    parser.add_argument("--skip-num-rows",
                        required=False,
                        type=int,
                        help="Skip specified number "
                        "of header rows")
    parser.add_argument(
        "--first-ccb-column",
        required=False,
        help="String name of first CCB column.  If "
        "specified, all preceeding columns will be labeled 'Servant Keeper' and this column "
        "and all subsequent will be labeled 'CCB'")
    args = parser.parse_args()

    if args.first_ccb_column is not None:
        column_prefix = 'Servant Keeper '
    else:
        column_prefix = ''

    assert os.path.isfile(
        args.input_csv_filename
    ), "Error: cannot open file '" + args.input_csv_filename + "'"

    table = petl.fromcsv(args.input_csv_filename)

    # Skip header rows
    if args.skip_num_rows:
        skip_num = args.skip_num_rows
        assert skip_num > 0, "--skip-num-rows value '" + str(
            skip_num) + "' is invalid.  Must be positive."
        it = iter(table)
        while skip_num >= 0:
            row = next(it)
            skip_num -= 1
        table = petl.setheader(table, row)
        table = petl.tail(table, petl.nrows(table) - args.skip_num_rows)

    # Print nicely formatted stats for each column
    sep = ''
    args_dict = vars(args)
    skip_columns_specified = 'skip_columns' in args_dict
    sep_char_specified = 'sep_character' in args_dict
    for column in petl.header(table):
        if args.first_ccb_column is not None and column == args.first_ccb_column:
            column_prefix = 'CCB '
        if not skip_columns_specified or column not in args.skip_columns:
            output_str = column_prefix + "Column '" + column + "'"
            print sep + output_str
            print >> sys.stderr, output_str
            if args.sep_columns is not None and column in args.sep_columns:
                if sep_char_specified:
                    sep_character = args.sep_character
                else:
                    sep_character = ';'
                output_str = num_dict2str(
                    dict_dump(sep_valuecounter(table, column, sep_character)))
                print output_str
            else:
                output_str = num_dict2str(dict_dump(valuecounts(table,
                                                                column)))
                print output_str
        sep = '\n'

    # Flush to ensure all output is written
    sys.stdout.flush()
    sys.stderr.flush()

Exemple #39

0

Afficher le fichier

Fichier : data_merger.py Projet : ShaunGerardAlberts/MashupGoogleAPI

def createTableFromCSV(csvFile):
    return etl.fromcsv(csvFile)

Exemple #40

0

Afficher le fichier

# import libraries
import folium
import pandas as pd
from bottle import route, run
from bottle import static_file
import petl as etl

table2 = etl.fromcsv('new.csv')
# Make a data frame with dots to show on the map
data = pd.DataFrame({
    'lat': [
        -25.274398, 20.593684, 30.375321, 35.86166, 32.427908, 50.503887,
        -14.235004, 56.130366, 6.428055, 19.2823, 37.09024, 33.886917,
        47.516231, 33.93911, 38.963745
    ],
    'lon': [
        133.775136, 78.96288, 69.345116, 104.195397, 53.688046, 4.469936,
        -51.92528, -106.346771, -9.429499, 166.6470, -95.712891, 9.537499,
        14.550072, 67.709953, 35.243322
    ],
    'name': [
        "Australia", "India", "Pakistan", "China", "Iran", "Belgium", "Brazil",
        "Canada", "Liberia", "United States Virgin Islands", "United States",
        "Tanzania", "Austria", "Afghanistan", "Turkey"
    ],
})
data
# Make an empty map
m = folium.Map(location=[20, 0], zoom_start=3.5)
m = folium.Map(location=[48.85, 2.35], tiles="OpenStreetMap", zoom_start=2)
s = 'Name: ' + data.iloc[0]['name']

Exemple #41

0

Afficher le fichier

Fichier : 2017-10-01petl_demo_introduction.py Projet : livolleyball/py_etl_airflow

# coding:utf8
# introduction
example_data = """foo,bar,baz
a,1,2.3
b,4,5.6
c,7,8.9
"""
with open('example.csv', 'w') as f:
    f.write(example_data)

import petl as etl

table1 = etl.fromcsv('example.csv')
print(table1)
table2 = etl.convert(table1, 'foo', 'upper')
print(table2)
table3 = etl.convert(table2, {'bar': int, 'baz': float})
print(table3)
table4 = etl.addfield(table3, 'bar*baz', lambda row: row.baz * row.bar)
print(table4.look())

# 管道操作
table=(
    etl
    .fromcsv('example.csv')
    .convert({'foo':'upper','bar':int,'baz':float})
    .addfield('bar*bar_new',lambda r:r.baz* r.bar)
    .convert('baz', lambda v, row: v * float(row.bar),pass_row=True)
)
print(table.look())

Exemple #42

0

Afficher le fichier

def paypal_to_quickbooks(paypal_path,
                         iif_path=None,
                         unprocessed_path=None,
                         start_date=None,
                         end_date=None):
    """
    Process the paypal CSV into a QuickBooks 

    INPUT: paypal.csv
    OUTPUT: output.iif and unprocessed.csv
    
    """
    etl.config.look_style = 'minimal'

    if iif_path is None:
        # If no iif path was specified, default to the same folder
        # as the input, and filename = 'output.iif'
        iif_path = os.path.join(os.path.dirname(paypal_path), 'output.iif')

    if unprocessed_path is None:
        # If no path for unprocessed trades was specified, default to
        # the same folder as the input, and filename = 'unprocessed.csv'
        unprocessed_path = os.path.join(os.path.dirname(paypal_path),
                                        'unprocessed.csv')

    # TODO: Validate that the cart_payment is associated with exactly
    #       cart_payment.Quantity cart_items.

    # --------------------
    # 1. LOAD PAYPAL CSV FILE
    paypal = etl.fromcsv(paypal_path)
    print("Loaded PayPal input file (" + str(paypal.nrows()) + " rows)")

    paypal = cleanup_paypal(paypal)

    if start_date is not None:
        # Eliminiate dates prior to start_date
        paypal = paypal.selectge('Date', start_date)

    if end_date is not None:
        # Eliminiate dates after end_date
        paypal = paypal.selectle('Date', end_date)

    # Any cancelled trades basically cancel, so we can eliminate most of them
    # right off the bat.
    paypal = eliminate_cancellations(paypal)

    # --------------------
    # 2. CREATE QUICKBOOKS IIF FILE
    print("Creating output IIF file")
    # We always delete the file and start fresh
    try:
        os.remove(iif_path)
    except FileNotFoundError:
        # If the file wasn't there in the first place, no problem.
        pass

    # Start with the names data, add that to the .IIF file.
    get_customer_names(paypal).appendtsv(iif_path, write_header=True)

    # TicketLeap fees have a header for both the transaction and the split
    # so I have to write to the IIF file within the function
    paypal = append_TicketLeap_fees(paypal, iif_path)

    # TicketLeap sales receipts make up the bulk of the transactions
    paypal = append_sales_as_deposits(paypal, iif_path)

    # Invoices are for tickets or for membership sales
    paypal = append_invoices(paypal, iif_path)

    # --------------------
    # 3. CREATE UNPROCESSED ROWS FILE
    print("Creating output unprocessed rows CSV file (" + str(paypal.nrows()) +
          " rows)")

    unprocessed_file = open(unprocessed_path, 'w')
    writer = csv.writer(unprocessed_file, lineterminator='\n')
    writer.writerows(paypal)
    unprocessed_file.close()

Exemple #43

0

Afficher le fichier

 def get_table(self) -> etl.Table:
     return etl.fromcsv(self.file.path)

Exemple #44

0

Afficher le fichier

Fichier : dataprepper.py Projet : Ben-Hardy/kickstarterpitchgenerator

import petl
import os

# Use this file to prepare CSV data from
# webrobot.io's kickstarter data

# change this listdir input to whatever
# your path to your data is
files = os.listdir(".")[0:-1]
print(files)
first = True
for i in files:
    data = petl.fromcsv(i)
    blurbs = petl.cut(data, 'blurb')
    print(petl.look(blurbs))
    if first:
        petl.tocsv(blurbs, 'blurbs.csv')
        first = False
    else:
        petl.appendcsv(blurbs, 'blurbs.csv')

Exemple #45

0

Afficher le fichier

    def execute(self, context):
        try:
            nm_arq = 'COSTOS.csv'
            with open(f'{_PROC_FILES}/{nm_arq}', 'wb') as data_from:
                data_from.write(
                    self.client_from.get_blob_client(
                        nm_arq).download_blob().readall())
            table = etl.fromcsv(f'{_PROC_FILES}/{nm_arq}', delimiter='|')

            def rowmapper(row):
                strnull = {'NULL': ''}
                return [
                    row[0].strip(), row[1].strip(), row[2].strip(),
                    row[3].strip(), strnull[row['nr_guia_principal'].strip()]
                    if row['nr_guia_principal'].strip() in strnull else
                    row['nr_guia_principal'].strip(), row[5].strip(),
                    strnull[row['fk_cid'].strip()] if row['fk_cid'].strip()
                    in strnull else row['fk_cid'].strip(),
                    strnull[row['dt_ini_internacao'].strip()]
                    if row['dt_ini_internacao'].strip() in strnull else
                    row['dt_ini_internacao'].strip(),
                    strnull[row['dt_fim_internacao'].strip()]
                    if row['dt_fim_internacao'].strip() in strnull else
                    row['dt_fim_internacao'].strip(),
                    strnull[row['fk_medicosolicitante'].strip()]
                    if row['fk_medicosolicitante'].strip() in strnull else
                    row['fk_medicosolicitante'].strip(),
                    strnull[row['cod_executante'].strip()]
                    if row['cod_executante'].strip() in strnull else
                    row['cod_executante'].strip(), row[11].strip(),
                    row[12].strip(), row[13].strip(), row[14].strip(),
                    row[15].strip(), row[16].strip(),
                    strnull[row['qtd_unica'].strip()]
                    if row['qtd_unica'].strip() in strnull else
                    row['qtd_unica'].strip(), row[18].strip(), row[19].strip(),
                    row[20].strip(), row[21].strip(), row[22].strip(),
                    row[23].strip(), row[24].strip(),
                    strnull[row['tipo_internacao'].strip()]
                    if row['tipo_internacao'].strip() in strnull else
                    row['tipo_internacao'].strip(),
                    strnull[row['tipo_acomodacao'].strip()]
                    if row['tipo_acomodacao'].strip() in strnull else
                    row['tipo_acomodacao'].strip(),
                    strnull[row['ds_indicacao_clinica'].strip()]
                    if row['ds_indicacao_clinica'].strip() in strnull else
                    row['ds_indicacao_clinica'].strip(), row[28].strip(),
                    row[29].strip(), row[30].strip(),
                    strnull[row['partic_medico'].strip()]
                    if row['partic_medico'].strip() in strnull else
                    row['partic_medico'].strip()
                ]

            table1 = etl.rowmap(
                table,
                rowmapper,
                header=[
                    'cod_custo', 'fk_beneficiario', 'mes_competencia',
                    'nr_guia', 'nr_guia_principal', 'dt_realizacao', 'fk_cid',
                    'dt_ini_internacao', 'dt_fim_internacao',
                    'fk_medicosolicitante', 'cod_executante', 'fk_local_atend',
                    'fk_capa_unico', 'fk_destino_pgto', 'fk_servico',
                    'cod_servico_tipo', 'qtd', 'qtd_unica', 'vl_unitario',
                    'vl_total', 'vl_coparticipacao', 'ind_internacao',
                    'tipo_registro', 'tipo_atendimento', 'tipo_guia',
                    'tipo_internacao', 'tipo_acomodacao',
                    'ds_indicacao_clinica', 'vl_hm', 'vl_co', 'vl_filme',
                    'partic_medico'
                ])

            table2 = etl.addfields(table1, [('fk_operadora', _SOURCE)])
            etl.tocsv(table2, f'{_PROC_FILES}/t{nm_arq}', delimiter='|')

            self.logger.info(f"Destino do arquivo {self.client_to}")
            self.logger.info(f"Container destino {self.container_to}")

        except azure.core.exceptions.ResourceNotFoundError:
            print('Entrou na exceção :)')

        upload_file = f'{_PROC_FILES}/t{nm_arq}'
        try:
            if os.path.isfile(upload_file):
                with open(upload_file, "rb") as data:
                    self.client_to.upload_blob(nm_arq, data, overwrite=True)
                    self.logger.info(f'{data} carregado')
            else:
                self.logger.info(f't{nm_arq} não foi encontrado no container')
        finally:
            self.logger.info('Tudo Carregado')

Exemple #46

0

Afficher le fichier

 def read(csv_file):
     return petl.fromcsv(data.csv_open(path + '/' + csv_file),
                         encoding='utf-8',
                         errors='strict')

Exemple #47

0

Afficher le fichier

    def execute(self, context):
        try:
            nm_arq = 'PRESTADORES.csv'
            with open(f'{_PROC_FILES}/{nm_arq}', 'wb') as data_from:
                data_from.write(
                    self.client_from.get_blob_client(
                        nm_arq).download_blob().readall())
            table = etl.fromcsv(f'{_PROC_FILES}/{nm_arq}', delimiter='|')

            def rowmapper(row):
                strnull = {'NULL': ''}
                return [
                    row[0].strip(), row[1].strip(),
                    strnull[row['nome_completo'].strip()]
                    if row['nome_completo'].strip() in strnull else
                    row['nome_completo'].strip(), row[3].strip(),
                    row[4].strip(), strnull[row['dat_nascimento'].strip()]
                    if row['dat_nascimento'].strip() in strnull else
                    row['dat_nascimento'].strip(), row[6].strip(),
                    row[7].strip(), row[8].strip(),
                    strnull[row['cnae'].strip()]
                    if row['cnae'].strip() in strnull else row['cnae'].strip(),
                    row[10].strip(), strnull[row['grau'].strip()]
                    if row['grau'].strip() in strnull else row['grau'].strip(),
                    row[12].strip(), row[13].strip(), row[14].strip(),
                    row[15].strip(), row[16].strip(), row[17].strip(),
                    row[18].strip(), strnull[row['latitude'].strip()]
                    if row['latitude'].strip() in strnull else
                    row['latitude'].strip(), strnull[row['longitude'].strip()]
                    if row['longitude'].strip() in strnull else
                    row['longitude'].strip(), strnull[row['fone1'].strip()] if
                    row['fone1'].strip() in strnull else row['fone1'].strip(),
                    strnull[row['fone2'].strip()] if row['fone2'].strip()
                    in strnull else row['fone2'].strip(),
                    strnull[row['fone3'].strip()] if row['fone3'].strip()
                    in strnull else row['fone3'].strip()
                ]

            table1 = etl.rowmap(
                table,
                rowmapper,
                header=[
                    'pk_medico_prestador', 'nome_fantasia', 'nome_completo',
                    'cod_crm', 'f_crm', 'dat_nascimento', 'num_cpf_cnpj',
                    'tipo_prestador', 'grupo_prestador', 'cnae',
                    'especialidade', 'grau', 'endereço', 'numero',
                    'Complemento', 'bairro', 'cidade_prestador',
                    'uf_prestador', 'cep', 'latitude', 'longitude', 'fone1',
                    'fone2', 'fone3'
                ])

            table2 = etl.addfields(table1, [('fk_operadora', _SOURCE)])
            etl.tocsv(table2, f'{_PROC_FILES}/t{nm_arq}', delimiter='|')

            self.logger.info(f"Destino do arquivo {self.client_to}")
            self.logger.info(f"Container destino {self.container_to}")

        except azure.core.exceptions.ResourceNotFoundError:
            print('Entrou na exceção :)')

        upload_file = f'{_PROC_FILES}/t{nm_arq}'
        try:
            if os.path.isfile(upload_file):
                with open(upload_file, "rb") as data:
                    self.client_to.upload_blob(nm_arq, data, overwrite=True)
                    self.logger.info(f'{data} carregado')
            else:
                self.logger.info(f't{nm_arq} não foi encontrado no container')
        finally:
            self.logger.info('Tudo Carregado')

Exemple #48

0

Afficher le fichier

    standardize_nulls(a['std_high_num'])) else None) \
    .addfield('change_stdir', lambda a: 1 if a['no_address'] != 1 and str(standardize_nulls(a['stdir'])) != str(
    standardize_nulls(a['std_street_predir'])) else None) \
    .addfield('change_stnam', lambda a: 1 if a['no_address'] != 1 and str(standardize_nulls(a['stnam'])) != str(
    standardize_nulls(a['std_street_name'])) else None) \
    .addfield('change_stdes', lambda a: 1 if a['no_address'] != 1 and str(standardize_nulls(a['stdes'])) != str(
    standardize_nulls(a['std_street_suffix'])) else None) \
    .addfield('change_stdessuf',
              lambda a: 1 if a['no_address'] != 1 and str(standardize_nulls(a['stdessuf'])) != str(
                  standardize_nulls(a['std_address_postdir'])) else 0) \
    .tocsv('dor_parcel_address_analysis.csv', write_header=True)
# get address_summary rows with dor_parcel_id as array:
address_summary_rows = address_summary_out_table \
    .addfield('dor_parcel_id_array', lambda d: d.dor_parcel_id.split('|') if d.dor_parcel_id else []) \
    .addfield('opa_account_num_array', lambda d: d.opa_account_num.split('|') if d.opa_account_num else [])
dor_parcel_address_analysis = etl.fromcsv('dor_parcel_address_analysis.csv')
mapreg_count_map = {}
address_count_map = {}
dor_parcel_header = dor_parcel_address_analysis[0]

# count_maps
for row in dor_parcel_address_analysis[1:]:
    dict_row = dict(zip(dor_parcel_header, row))
    mapreg = dict_row['mapreg']
    std_street_address = dict_row['std_street_address']
    if mapreg not in mapreg_count_map:
        mapreg_count_map[mapreg] = 1
    else:
        mapreg_count_map[mapreg] += 1
    if std_street_address not in address_count_map:
        address_count_map[std_street_address] = 1

Exemple #49

0

Afficher le fichier

Fichier : stcfor.py Projet : hsinkai/crowa

    today = parser.parse(inputstr + '00+0000').astimezone(tz.tzlocal())

    dataset = {}
    for i in ('temp', 'hvel', 'zcor'):
        dataset[i] = netCDF4.MFDataset([
            os.path.join(head, 'ocm3_{0}_{1}_H{2}.nc'.format(i, inputstr, j))
            for j in ('-23_00', '01_24', '25_48', '49_72')
        ])

    find = [-1, -5, -10]
    depth = petl.fromcolumns([find], ['depth'])

    points = petl.fromcsv(os.path.join(home, 'var', 'Points')).convert({
        'lat':
        float,
        'lon':
        float,
        'ncpos':
        int
    })
    pPoints = [int(x) for x in open(os.path.join(home, 'var', 'PengHu'))]
    nodelist = points.values('ncpos')

    zcorlist = dataset['zcor']['zcor'][:].take(nodelist, 2)
    templist = dataset['temp']['temp'][:].take(nodelist, 2)
    ulist = dataset['hvel']['u'][:].take(nodelist, 2)
    vlist = dataset['hvel']['v'][:].take(nodelist, 2)

    tempout = numpy.ndarray((timelen, 3, 189), numpy.float32)
    speedout = numpy.ndarray((timelen, 3, 189), numpy.float32)
    dirout = numpy.ndarray((timelen, 3, 189), numpy.float32)

Exemple #50

0

Afficher le fichier

Fichier : 330_finalproject.py Projet : taylun/SI330FinalProject

import petl as etl
import sqlite3
from textblob import TextBlob
import plotly.plotly as py
import plotly
from plotly.graph_objs import *

conn = sqlite3.connect('billboard_top100.db')
cur = conn.cursor()

billboard_top100 = etl.fromcsv("billboard_lyrics_1964-2015-2.csv",
                               encoding="ISO-8859-1")

create_songs = "CREATE TABLE IF NOT EXISTS Songs (id INTEGER PRIMARY KEY AUTOINCREMENT, Rank TEXT, Song TEXT, Artist TEXT, Year TEXT, Lyrics TEXT)"
cur.execute(create_songs)

cut_data = etl.cut(billboard_top100, "Rank", "Song", "Artist", "Year",
                   "Lyrics")
etl.todb(cut_data, conn, 'Songs')

select_lyrics = "SELECT Year, Lyrics from Songs"
cur.execute(select_lyrics)
lyrics_list = cur.fetchall()

lyrics_dict = {}
for (x, y) in lyrics_list:
    if x not in lyrics_dict.keys():
        lyrics_dict[x] = [y]
    else:
        lyrics_dict[x].append(y)

Exemple #51

0

Afficher le fichier

Fichier : HPA.py Projet : pieterlukasse/data_pipeline-1

    def retrieve_rna_data(self):
        """
        Parse 'rna_tissue' csv file,
        RNA levels in 56 cell lines and 37 tissues based on RNA-seq from HPA.

        :return: dict
        """
        self.logger.info('get rna tissue rows into dicts')
        self.logger.debug('melting rna level table into geneid tissue level')

        t_level = (petl.fromcsv(URLZSource(
            self.rna_level_url), delimiter='\t').melt(
                key='ID', variablefield='tissue',
                valuefield='rna_level').rename({
                    'ID': 'gene'
                }).addfield(
                    'tissue_label', lambda rec: name_from_tissue(
                        rec['tissue'].strip(), self.t2m)).addfield(
                            'tissue_code', lambda rec: code_from_tissue(
                                rec['tissue_label'], self.t2m)).addfield(
                                    'anatomical_systems',
                                    lambda rec: asys_from_tissue(
                                        rec['tissue_label'], self.t2m)).
                   addfield(
                       'organs', lambda rec: organs_from_tissue(
                           rec['tissue_label'], self.t2m)).cutout('tissue'))

        t_value = (petl.fromcsv(URLZSource(
            self.rna_value_url), delimiter='\t').melt(
                key='ID', variablefield='tissue',
                valuefield='rna_value').rename({
                    'ID': 'gene'
                }).addfield(
                    'tissue_label', lambda rec: name_from_tissue(
                        rec['tissue'].strip(), self.t2m)).addfield(
                            'tissue_code', lambda rec: code_from_tissue(
                                rec['tissue_label'], self.t2m)).addfield(
                                    'rna_unit', 'TPM').cutout('tissue'))

        t_zscore = (petl.fromcsv(
            URLZSource(self.rna_zscore_url), delimiter='\t').melt(
                key='ID', variablefield='tissue',
                valuefield='zscore_level').rename({
                    'ID': 'gene'
                }).addfield(
                    'tissue_label',
                    lambda rec: name_from_tissue(rec['tissue'].strip(
                    ), self.t2m)).addfield(
                        'tissue_code', lambda rec: code_from_tissue(
                            rec['tissue_label'], self.t2m)).cutout('tissue'))

        t_vl = petl.join(t_level,
                         t_value,
                         key=('gene', 'tissue_code', 'tissue_label'),
                         presorted=True)

        t_join = (petl.join(t_vl,
                            t_zscore,
                            key=('gene', 'tissue_code', 'tissue_label'),
                            presorted=True).aggregate(
                                'gene',
                                aggregation={
                                    'data': (('tissue_code', 'tissue_label',
                                              'rna_level', 'rna_value',
                                              'rna_unit', 'anatomical_systems',
                                              'organs', 'zscore_level'), list)
                                },
                                presorted=True))

        return t_join

Exemple #52

0

Afficher le fichier

Fichier : tureen_data_manip.py Projet : tahmeed14/time-series-viz-english-premier-league

## Purpose: Combining all the newly created datasets from tureen_txt_manip.py and tureen_csv_manip.py to create .db or Database
## The resulting database can then be used to visualize the trend of wins for all the English Premier League Teams for each season
## from Season 2000-2001 to Season 2016-2017

# Import modules
import csv
import petl as etl
import sqlite3

# Needed for Windows encoding, otherwise will not print appropriate symbols
import codecs
import sys
sys.stdout = codecs.getwriter('utf-8')(sys.stdout.buffer)

# Convert CSV files to etl format for further data manipulation
chelsea_data = etl.fromcsv('chelseaResults.csv', encoding='utf-8')
manu_data = etl.fromcsv('manunitedResults.csv', encoding='utf-8')
mancity_data = etl.fromcsv('mancityResults.csv', encoding='utf-8')
arsenal_data = etl.fromcsv('arsenalResults.csv', encoding='utf-8')
tottenham_data = etl.fromcsv('tottenhamResults.csv', encoding='utf-8')
liverpool_data = etl.fromcsv('liverpoolResults.csv', encoding='utf-8')
everton_data = etl.fromcsv('evertonResults.csv', encoding='utf-8')

print(chelsea_data)

## Connect to Sqlite and Initialize a database
conn = sqlite3.connect('epl_teamWins.db')
cur = conn.cursor()

## Create Table for Chelsea F.C.
createCFC = """CREATE TABLE IF NOT EXISTS Chelsea (Year int primary key not null, ChelseaWins int)"""

Exemple #53

0

Afficher le fichier

# List of source and target files for merge

healthcsvfile = './datafiles/Vic_Health_Care.csv'
locationxmlfile = './datafiles/Vic_Locations.xml'
mergedcsvfile = './datafiles/practice_locations.csv'

# xmlfields is a dictionary to be used as 

xmlfields = {'Town_name': 'Town', 'Latitude': 'Lat', 'Longitude': 'Lon'}  # type: Dict[str, str]
xmlparent = 'Town_location'
initialrow = ['Practice_Name', 'Latitude', 'Longitude', 'Town', 'State', 'Post_Code']

# tables in memory created from xml and csv files

csvtable = petl.fromcsv(healthcsvfile)
xmltable = petl.fromxml(locationxmlfile, xmlparent, xmlfields)

# Find the row in xmltable matching town from csv 
lktbl = petl.lookupone(xmltable, 'Town_name')  # type: Union[Dict[Any, Union[tuple[Any], Tuple[Any]]], Any]
nmdtbl = petl.namedtuples(csvtable)
finaltabl = [initialrow]

for lin in nmdtbl:
    tabl = lktbl[lin.Town]
    latitude = tabl[0]
    longitude = tabl[1]

    insertline = (str(lin.Practice_Name) + ',' + latitude + ',' + longitude + ',' + str(
        lin.Town) + ',' + str(lin.State) + ',' + str(lin.Postcode)).split(',')
    print insertline

Exemple #54

0

Afficher le fichier

Fichier : pareto_etl.py Projet : jordanlabs/paretoetl

def read_csv(file):
    return etl.fromcsv(file)

Exemple #55

0

Afficher le fichier

Fichier : carga.py Projet : ocastroj/proyecto_cenfotec_load

            return 'AS'
        elif (country == 'MS Zaandam'):
            return 'NA'
        elif (country == 'Kosovo') or (country == 'Holy See'):
            return 'EU'
        else:
            return 'N/A'


# Fuente de los datos que vamos a leer
uri_confirmed = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
uri_death = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv'
uri_recovered = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv'

# Procesamos primero casos confirmados
t_confirmed = etl.fromcsv(uri_confirmed)

# Cambiamos el nombre a los encabezados
t_confirmed = etl.rename(t_confirmed, {'Country/Region': 'Country'})

# Ajustamos los tipos de datos
# A partir de la columna 5, el tipo de dato es integer, que es el número de personas/casos
# Adicionalmente aprovechamos para cambiar el formato de la fecha de 1/23/20 a 2020-01-23 en el header
headers = etl.fieldnames(t_confirmed)
i = 0
for header in headers:
    if i >= 4:
        t_confirmed = etl.convert(t_confirmed, header,
                                  int)  # corregimos el tipo de dato
        fecha = datetime.datetime.strptime(
            header, '%m/%d/%y')  # calculamos la fecha en formato correcto

Exemple #56

0

Afficher le fichier

import pickle

import petl as etl
import csv

cols = [[0, 1, 2], ['a', 'b', 'c']]
table1 = etl.fromcolumns(cols)
print(table1)

###########################CSV Reading###############################

table2 = etl.fromcsv('AAPL.csv')

print(table2['Date'])
print(table2)

etl.tocsv(table1, 'example.csv')  #wrting to a CSV file

##########################Reading from Pickle files####################
"""" 
what is pickle?
Pickling is a way to convert a python object (list, dict, etc.) into a character stream.
The idea is that this character stream contains all the information necessary to reconstruct
the object in another python script.
"""
#Creating a pickle file

a = ['test value', 'test value 2', 'test value 3']

file_Name = "testfile"
# open the file for writing

Exemple #57

0

Afficher le fichier

 def petl_view(self) -> petl.Table:
     """``petl.Table`` built from stored CSV ``file``."""
     return petl.fromcsv(source=self.file)

Exemple #58

0

Afficher le fichier

def do_etl(bx_users_fn, bx_books_fn, bx_book_ratings_fn, db_fn):
    """Run the ETL procedures over all the BX csv files, populating a single
    sqlite .db file.

    Arguments:
        bx_users_fn -- path to the BX-Users.csv file.
        bx_books_fn -- path to the BX-Books.csv file.
        bx_book_ratings_fn -- path to the BX-Book_Ratings.csv file.
        db_fn -- path to the output sqlite .db file.

    Returns:
        None

    """

    errargs = {"errorvalue": ERROR_VALUE}

    # Transform BX-Users.csv
    tab = etl.fromcsv(bx_users_fn,
                      delimiter=DELIMITER,
                      escapechar=ESCAPECHAR,
                      quoting=QUOTE_ALL,
                      encoding=ENCODING)

    tab = tab.rename({
        "User-ID": "user_id",
        "Location": "location",
        "Age": "age"
    })

    users_tab = (tab.convert("user_id", (lambda s: s.strip()),
                             **errargs).convert("location", clean_text,
                                                **errargs).convert(
                                                    "age", (lambda s: int(s)),
                                                    **errargs))

    # Transform BX-Books.csv
    tab = etl.fromcsv(bx_books_fn,
                      delimiter=DELIMITER,
                      escapechar=ESCAPECHAR,
                      quoting=QUOTE_ALL,
                      encoding=ENCODING)

    tab = tab.rename({
        "ISBN": "isbn",
        "Book-Title": "title",
        "Book-Author": "author",
        "Year-Of-Publication": "year",
        "Publisher": "publisher",
        "Image-URL-S": "img_url_s",
        "Image-URL-M": "img_url_m",
        "Image-URL-L": "img_url_l"
    })

    books_tab = (tab.convert("isbn", clean_isbn, **errargs).convert(
        "title", clean_text,
        **errargs).convert("author", clean_text, **errargs).convert(
            "year", clean_year,
            **errargs).convert("publisher", clean_text, **errargs).convert(
                "img_url_s", (lambda s: s.strip()),
                **errargs).convert("img_url_m", (lambda s: s.strip()),
                                   **errargs).convert("img_url_l",
                                                      (lambda s: s.strip()),
                                                      **errargs))

    # Transform BX-Book-Ratings.csv
    tab = etl.fromcsv(bx_book_ratings_fn,
                      delimiter=DELIMITER,
                      escapechar=ESCAPECHAR,
                      quoting=QUOTE_ALL,
                      encoding=ENCODING)

    tab = tab.rename({
        "User-ID": "user_id",
        "ISBN": "isbn",
        "Book-Rating": "rating"
    })

    ratings_tab = (tab.convert("user_id", (lambda s: s.strip()),
                               **errargs).convert("isbn", clean_isbn,
                                                  **errargs).convert(
                                                      "rating", clean_rating,
                                                      **errargs))

    with sqlite3.connect(db_fn) as conn:
        # Load each table into an sqlite db.
        users_tab.todb(conn, "users", create=True)
        books_tab.todb(conn, "books", create=True, drop=True)
        ratings_tab.todb(conn, "ratings", create=True)
        conn.commit()

    return

Exemple #59

0

Afficher le fichier

Fichier : load_groups_into_ccb.py Projet : Andy-Fraley/sk2ccb

def main(argv):
    parser = argparse.ArgumentParser(description='Dumps CSV-formatted ID-related info for every individual profile ' \
        'in CCB.  Pipe into a file to create CSV file.')
    parser.add_argument("--individuals-filename",
                        required=True,
                        help="Input UTF8 CSV with individuals data "
                        "dumped from Servant Keeper")
    parser.add_argument(
        '--trace',
        action='store_true',
        help="If specified, prints tracing/progress messages to "
        "stdout")
    args = parser.parse_args()

    assert os.path.isfile(
        args.individuals_filename
    ), "Error: cannot open file '" + args.individuals_filename + "'"

    trace('RETRIEVING INDIVIDUALS XML FROM CCB...', args.trace, banner=True)
    tmp_filename = http_get2tmp_file(
        'https://ingomar.ccbchurch.com/api.php?srv=individual_profiles',
        settings.ccbapi.username, settings.ccbapi.password)
    xml_tree = ET.parse(tmp_filename)
    xml_root = xml_tree.getroot()

    trace('WALKING XML TO CREATE SK2CCB ID MAP DICTIONARY...',
          args.trace,
          banner=True)
    sk2ccb_id_map_dict = xml2id_dict(xml_root)
    os.remove(tmp_filename)

    trace(
        'WALKING SK DATA TO IDENTIFY GROUPS TO SET ON PER-INDIVIDUAL BASIS...',
        args.trace,
        banner=True)
    table = petl.fromcsv(args.individuals_filename)
    sk_indiv_id2groups = gather_semi_sep_by_indiv_id(
        table, {
            'Mailing Lists': ['Home Touch', 'Rummage Sale'],
            'Activities': [
                'Veteran', 'Celebration Singers', 'Wesleyan Choir',
                'Praise Team'
            ]
        })
    sk_indiv_id2name = gather_name_by_indiv_id(table)

    trace('RETRIEVING GROUPS XML FROM CCB...', args.trace, banner=True)
    tmp_filename = http_get2tmp_file(
        'https://ingomar.ccbchurch.com/api.php?srv=group_profiles',
        settings.ccbapi.username, settings.ccbapi.password)
    xml_tree = ET.parse(tmp_filename)
    xml_root = xml_tree.getroot()

    group_id_map_dict = xml2group_id_dict(xml_root)
    os.remove(tmp_filename)

    for sk_indiv_id in sk_indiv_id2groups:
        if sk_indiv_id in sk2ccb_id_map_dict:
            for group_name in sk_indiv_id2groups[sk_indiv_id]:
                if not group_name in group_id_map_dict:
                    print "*** Cannot find CCB group name '" + group_name + "' in CCB account."
                print "Adding " + sk_indiv_id2name[sk_indiv_id] + " (Individual ID = " + \
                    sk2ccb_id_map_dict[sk_indiv_id] + ") to group '" + group_name + "' (Group ID = " + \
                    group_id_map_dict[group_name] + ")."
                add_person_to_group(sk2ccb_id_map_dict[sk_indiv_id],
                                    group_id_map_dict[group_name])
        else:
            groups_trying_to_add = ', '.join(sk_indiv_id2groups[sk_indiv_id])
            print "*** No SK->CCB ID map for '" + str(sk_indiv_id) + "' (" + sk_indiv_id2name[sk_indiv_id] + "), " + \
                "so person not added to " + groups_trying_to_add + "."

    sys.stdout.flush()
    sys.stderr.flush()

Exemple #60

0

Afficher le fichier

    def execute(self, context):
        try:
            nm_arq = 'BENEFICIARIO.csv'
            with open(f'{_PROC_FILES}/{nm_arq}', 'wb') as data_from:
                data_from.write(
                    self.client_from.get_blob_client(
                        nm_arq).download_blob().readall())
            table = etl.fromcsv(f'{_PROC_FILES}/{nm_arq}', delimiter='|')

            def rowmapper(row):
                transmf = {'MASCULINO': 'M', 'FEMININO': 'F'}
                strnull = {'NULL': ''}
                return [
                    row[0].strip(), row[1].strip(), row[2].strip(),
                    row[3].strip(), row['nome'].strip(), row[5].strip(),
                    row[6].strip(), row[7].strip(), row[8].strip(),
                    strnull[row['copart_percentual'].strip()]
                    if row['copart_percentual'].strip() in strnull else
                    row['copart_percentual'].strip(),
                    strnull[row['limite_copart'].strip()]
                    if row['limite_copart'].strip() in strnull else
                    row['limite_copart'].strip(), row[11].strip(),
                    row[12].strip(), row[13].strip(), row[14].strip(),
                    strnull[row['dt_exclusao'].strip()]
                    if row['dt_exclusao'].strip() in strnull else
                    row['dt_exclusao'].strip(), row[16],
                    transmf[row['sexo'].strip()]
                    if row['sexo'].strip() in transmf else None,
                    row[18].strip(), row[19].strip(), row[20].strip(),
                    row[21].strip(), row[22].strip(), row[23].strip(),
                    row[24].strip(), row[25].strip(), row[26].strip(),
                    row[27].strip(), row[28].strip(), row[29].strip()
                ]

            table1 = etl.rowmap(
                table,
                rowmapper,
                header=[
                    'pk_beneficiario', 'nr_beneficiario',
                    'nr_beneficiario_tit', 'fk_empresa', 'nome', 'cpf',
                    'dt_nascimento', 'cod_plano', 'descricao_plano',
                    'copart_percentual', 'limite_copart', 'tipo_acomodacao',
                    'abrangencia_plano', 'grau_dependencia', 'dt_inclusao',
                    'dt_exclusao', 'nome_mae', 'sexo', 'tipo_contrato',
                    'endereco', 'bairro', 'cidade', 'uf', 'cep',
                    'tipo_cliente', 'nr_cartaonacionalsaude',
                    'plano_regulamentado', 'descricao_entidade', 'tipo_pessoa',
                    'acao_judicial'
                ])

            table2 = etl.addfields(table1, [('fk_operadora', _SOURCE)])
            etl.tocsv(table2, f'{_PROC_FILES}/t{nm_arq}', delimiter='|')

            self.logger.info(f"Destino do arquivo {self.client_to}")
            self.logger.info(f"Container destino {self.container_to}")

        except azure.core.exceptions.ResourceNotFoundError:
            print('Entrou na exceção :)')

        upload_file = f'{_PROC_FILES}/t{nm_arq}'

        try:
            if os.path.isfile(upload_file):
                with open(upload_file, "rb") as data:
                    self.client_to.upload_blob(nm_arq, data, overwrite=True)
                    self.logger.info(f'{data} carregado')
            else:
                self.logger.info(f't{nm_arq} não foi encontrado no container')
        finally:
            self.logger.info('Tudo Carregado')