Example #1
0
def test_records():
    """Test the records function."""

    table = (('foo', 'bar'), ('a', 1), ('b', 2))
    actual = records(table)
    expect = ({'foo': 'a', 'bar': 1}, {'foo': 'b', 'bar': 2})
    ieq(expect, actual)
Example #2
0
def test_records_shortrows():
    """Test the records function on a table with short rows."""

    table = (('foo', 'bar'), ('a', 1), ('b', ))
    actual = records(table)
    expect = ({'foo': 'a', 'bar': 1}, {'foo': 'b', 'bar': None})
    ieq(expect, actual)
Example #3
0
def test_records_shortrows():
    """Test the records function on a table with short rows."""

    table = (('foo', 'bar'), ('a', 1), ('b',))
    actual = records(table)
    expect = ({'foo': 'a', 'bar': 1}, {'foo': 'b', 'bar': None})
    ieq(expect, actual)
Example #4
0
def test_records():
    """Test the records function."""

    table = (('foo', 'bar'), ('a', 1), ('b', 2))
    actual = records(table)
    expect = ({'foo': 'a', 'bar': 1}, {'foo': 'b', 'bar': 2})
    ieq(expect, actual)
Example #5
0
    def generate_data_types(self, table):
        # Generate column data types

        longest, type_list = [], []

        cont = petl.records(table.table)

        # Populate empty values for the columns
        for col in table.columns:
            longest.append(0)
            type_list.append('')

        for row in cont:
            for i in range(len(row)):
                # NA is the csv null value
                if type_list[i] == 'varchar' or row[i] == 'NA':
                    pass
                else:
                    var_type = self.data_type(row[i], type_list[i])
                    type_list[i] = var_type
                # Calculate width
                width = len(str(row[i]).encode('utf-8'))
                if width > longest[i]:
                    longest[i] = width

        return {
            'longest': longest,
            'headers': table.columns,
            'type_list': type_list
        }
Example #6
0
    def generate_data_types(self, table):
        # Generate column data types

        longest, type_list = [], []

        cont = petl.records(table.table)

        # Populate empty values for the columns
        for col in table.columns:
            longest.append(0)
            type_list.append('')

        for row in cont:
            for i in range(len(row)):
                # NA is the csv null value
                if type_list[i] == 'varchar' or row[i] in ['NA', '']:
                    pass
                else:
                    var_type = self.data_type(row[i], type_list[i])
                    type_list[i] = var_type

                # Calculate width
                if len(str(row[i]).encode('utf-8')) > longest[i]:
                    longest[i] = len(str(row[i]).encode('utf-8'))

        # In L138 'NA' and '' will be skipped
        # If the entire column is either one of those (or a mix of the two)
        # the type will be empty.
        # Fill with a default varchar
        type_list = [typ or 'varchar' for typ in type_list]

        return {'longest': longest,
                'headers': table.columns,
                'type_list': type_list}
def replace_split_transactions(table, dict_split_transaction_details):
    global g

    list_rows_to_add = []
    list_rows_to_remove = []
    for row in petl.records(table):
        if row['Account'] == 'Split Transaction':
            sk_indiv_id = row['SK Individual ID']
            batch_date = row['Batch Date']
            amount_str = row['SK Amount']
            string_key = sk_indiv_id + ',' + batch_date
            contrib_amount = Decimal(re.sub(r'[^\d.]', '', amount_str))
            if string_key in dict_split_transaction_details:
                splits_total = Decimal(0)
                for split_entry in dict_split_transaction_details[string_key]:
                    splits_total += Decimal(re.sub(r'[^\d.]', '', split_entry['Amount']))
                if contrib_amount != splits_total:
                    print "*** ERROR!  For Individual ID, Batch Date " + string_key + ", the main 'Split " \
                        "Transaction' entry amount was " + str(contrib_amount) + "but sum of split detail " \
                        "transactions was " + str(splits_total)
                else:
                    list_rows_to_remove.append({
                        'SK Individual ID': sk_indiv_id,
                        'Batch Date': batch_date,
                        'SK Amount': amount_str,
                        'Account': 'Split Transaction'
                        })
                    for split_entry in dict_split_transaction_details[string_key]:
                        list_rows_to_add.append({
                            'Env #': row['Env #'],
                            'Batch Date': batch_date,
                            'SK Amount': split_entry['Amount'],
                            'Type': row['Type'],
                            'Account': split_entry['Account'],
                            'Tax': split_entry['Tax'],
                            'Check #': row['Check #'],
                            'Notes': "Inserted from 'Split Transaction'. " + row['Notes'],
                            # 'Family ID': row['Family ID'],
                            'SK Individual ID': row['SK Individual ID']
                            # 'To Date': row['To Date'],
                            # 'Contribution Link': row['Contribution Link']
                            })
            else:
                print "*** ERROR!  Cannot find any 'Split Transaction' details for record with 'Batch Date' " + \
                    row['Batch Date'] + ", contributed by 'Individual ID' " + row['SK Individual ID'] + " for the " \
                    "amount " + row['SK Amount']

    print '*** Count before remove_rows(): ' + str(petl.nrows(table))
    table = remove_rows(table, list_rows_to_remove)
    print '*** Count after remove_rows(): ' + str(petl.nrows(table))

    print '*** Count before add_rows(): ' + str(petl.nrows(table))
    table = add_rows(table, list_rows_to_add)
    print '*** Count after add_rows(): ' + str(petl.nrows(table))

    return table
def main(argv):
    global full_name2sk_indiv_id

    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--attendance-filename",
        required=True,
        nargs="+",
        action="append",
        help="Attendance filename (input Servant Keeper attendance report file(s)...can be wildcard)",
    )
    parser.add_argument(
        "--mapping-filename",
        required=True,
        help="'Mapping' filename (CSV mapping file with "
        "'Last Name', 'Preferred Name' and 'Individual ID' Servant Keeper data columns)",
    )
    args = parser.parse_args()

    # Load up mapping matrix to map from Servant Keeper full_name's to Servant Keeper individual_id's
    full_name2sk_indiv_id = {}
    dup_names = {}
    ids_in_dups = {}
    with open(args.mapping_filename, "rb") as csvfile:
        csvreader = csv.reader(csvfile)
        for row in csvreader:
            key = row[0] + ", " + row[1]
            if key in full_name2sk_indiv_id:
                if not key in dup_names:
                    dup_names[key] = [full_name2sk_indiv_id[key]]
                    ids_in_dups[full_name2sk_indiv_id[key]] = key
                dup_names[key].append(row[2])
                ids_in_dups[row[2]] = key
            else:
                full_name2sk_indiv_id[key] = row[2]

    attendance_table = join_tables(args.attendance_filename[0], None, False)

    # print dup_names
    # print attendance_table

    reported_dups = set()
    for row in petl.records(attendance_table):
        if row["Individual ID"] in ids_in_dups and ids_in_dups[row["Individual ID"]] not in reported_dups:
            print "Duplicate name: " + ids_in_dups[row["Individual ID"]]
            reported_dups.add(ids_in_dups[row["Individual ID"]])

    sys.stdout.flush()
    sys.stderr.flush()
 def import_file(self, filename, sheet, columns):
     """
     used to import file and return dictionnary of data
     :param filename:
     :param sheet:
     :param columns:
     :return data: dictionnary
     """
     table = xlsx.fromxlsx(filename, sheet)
     data = []
     for el in petl.records(table):
         data_child = {}
         for col in columns:
             data_child[col] = el[col]
         data.append(data_child)
     return data
def gather_semi_sep_by_indiv_id(table, dict_semi_column_fields):
    dict_id2groups = {}
    for row in petl.records(table):
        for col in dict_semi_column_fields:
            field_list = dict_semi_column_fields[col]
            value = row[col].strip()
            if value == '':
                continue
            else:
                sk_indiv_id = row['Individual ID']
                for group_name in value.split(';'):
                    group_name_str = group_name.strip()
                    if group_name_str in field_list:
                        if not sk_indiv_id in dict_id2groups:
                            dict_id2groups[sk_indiv_id] = []
                        dict_id2groups[sk_indiv_id].append(group_name_str)
    return dict_id2groups
def gather_semi_sep_by_indiv_id(table, dict_semi_column_fields):
    dict_id2groups = {}
    for row in petl.records(table):
        for col in dict_semi_column_fields:
            field_list = dict_semi_column_fields[col]
            value = row[col].strip()
            if value == '':
                continue
            else:
                sk_indiv_id = row['Individual ID']
                for group_name in value.split(';'):
                    group_name_str = group_name.strip()
                    if group_name_str in field_list:
                        if not sk_indiv_id in dict_id2groups:
                            dict_id2groups[sk_indiv_id] = []
                        dict_id2groups[sk_indiv_id].append(group_name_str)
    return dict_id2groups
def load_split_transaction_details(list_split_details_files):
    dict_split_transaction_details = {}
    if list_split_details_files is not None:
        for split_details_file in list_split_details_files:
            assert os.path.isfile(split_details_file), "Error: cannot open file '" + split_details_file + "'"
            table = petl.fromcsv(split_details_file)
            account_name = get_account_name(petl.values(table, 'Account'))
            for row in petl.records(table):
                if row['Account'] == 'Split Transaction':
                    string_key = row['Individual ID'] + ',' + row['Batch Date']
                    if string_key not in dict_split_transaction_details:
                        dict_split_transaction_details[string_key] = []
                    dict_split_transaction_details[string_key].append({
                        'Account': account_name,
                        'Amount': row['Amount'],
                        'Tax': row['Tax']
                        })
    return dict_split_transaction_details
Example #13
0
def valuecounts(table, col_name):
    return_dict = {}
    reported_count = 0
    unreported_count = 0
    column = petl.values(table, col_name)
    nrows = petl.nrows(table)
    non_blanks = petl.select(table, '{' + quote_single_quote(col_name) + "} != ''")
    num_blanks = nrows - petl.nrows(non_blanks)
    counts_table = petl.valuecounts(non_blanks, col_name)
    for row in petl.records(counts_table):
        if row['frequency'] > 0.01:
            return_dict[row[col_name]] = row['count']
            reported_count += row['count']
        else:
            unreported_count += row['count']
    return_dict['<other>'] = unreported_count
    return_dict['<blank>'] = num_blanks
    return return_dict
Example #14
0
    def get_context_data(self, **kwargs):
        context = super().get_context_data(**kwargs)
        context["filename"] = self.object.downloaded_file.name.split(os.path.sep)[-1]
        context["columns_query_kwarg"] = self.columns_query_kwarg
        table = petl.fromcsv(self.object.downloaded_file)

        full_table_header = list(petl.header(table))
        context["column_options"] = full_table_header

        selected_columns = [c for c in self.request.GET.getlist(self.columns_query_kwarg) if c in full_table_header]
        context["selected_columns"] = selected_columns

        if selected_columns:
            context["header"] = selected_columns + ["Count"]
            context["rows"] = petl.records(
                petl.aggregate(table, selected_columns[0] if len(selected_columns) == 1 else selected_columns, len)
            )

        return context
Example #15
0
def valuecounts(table, col_name):
    return_dict = {}
    reported_count = 0
    unreported_count = 0
    column = petl.values(table, col_name)
    nrows = petl.nrows(table)
    non_blanks = petl.select(table,
                             '{' + quote_single_quote(col_name) + "} != ''")
    num_blanks = nrows - petl.nrows(non_blanks)
    counts_table = petl.valuecounts(non_blanks, col_name)
    for row in petl.records(counts_table):
        if row['frequency'] > 0.01:
            return_dict[row[col_name]] = row['count']
            reported_count += row['count']
        else:
            unreported_count += row['count']
    return_dict['<other>'] = unreported_count
    return_dict['<blank>'] = num_blanks
    return return_dict
Example #16
0
def test_records():
    table = (("foo", "bar"), ("a", 1), ("b", 2))
    actual = records(table)
    # access items
    it = iter(actual)
    o = it.next()
    eq_("a", o["foo"])
    eq_(1, o["bar"])
    o = it.next()
    eq_("b", o["foo"])
    eq_(2, o["bar"])
    # access attributes
    it = iter(actual)
    o = it.next()
    eq_("a", o.foo)
    eq_(1, o.bar)
    o = it.next()
    eq_("b", o.foo)
    eq_(2, o.bar)
Example #17
0
def test_records_unevenrows():
    table = (("foo", "bar"), ("a", 1, True), ("b",))
    actual = records(table)
    # access items
    it = iter(actual)
    o = it.next()
    eq_("a", o["foo"])
    eq_(1, o["bar"])
    o = it.next()
    eq_("b", o["foo"])
    eq_(None, o["bar"])
    # access attributes
    it = iter(actual)
    o = it.next()
    eq_("a", o.foo)
    eq_(1, o.bar)
    o = it.next()
    eq_("b", o.foo)
    eq_(None, o.bar)
Example #18
0
def test_records():
    table = (('foo', 'bar'), ('a', 1), ('b', 2))
    actual = records(table)
    # access items
    it = iter(actual)
    o = it.next()
    eq_('a', o['foo'])
    eq_(1, o['bar'])
    o = it.next()
    eq_('b', o['foo'])
    eq_(2, o['bar'])
    # access attributes
    it = iter(actual)
    o = it.next()
    eq_('a', o.foo)
    eq_(1, o.bar)
    o = it.next()
    eq_('b', o.foo)
    eq_(2, o.bar)
Example #19
0
def test_records():
    table = (('foo', 'bar'), ('a', 1), ('b', 2))
    actual = records(table)
    # access items
    it = iter(actual)
    o = it.next()
    eq_('a', o['foo'])
    eq_(1, o['bar'])
    o = it.next()
    eq_('b', o['foo'])
    eq_(2, o['bar'])
    # access attributes
    it = iter(actual)
    o = it.next()
    eq_('a', o.foo)
    eq_(1, o.bar)
    o = it.next()
    eq_('b', o.foo)
    eq_(2, o.bar)
Example #20
0
def test_records_unevenrows():
    table = (('foo', 'bar'), ('a', 1, True), ('b', ))
    actual = records(table)
    # access items
    it = iter(actual)
    o = it.next()
    eq_('a', o['foo'])
    eq_(1, o['bar'])
    o = it.next()
    eq_('b', o['foo'])
    eq_(None, o['bar'])
    # access attributes
    it = iter(actual)
    o = it.next()
    eq_('a', o.foo)
    eq_(1, o.bar)
    o = it.next()
    eq_('b', o.foo)
    eq_(None, o.bar)
Example #21
0
def test_records_unevenrows():
    table = (('foo', 'bar'), ('a', 1, True), ('b',))
    actual = records(table)
    # access items
    it = iter(actual)
    o = it.next()
    eq_('a', o['foo'])
    eq_(1, o['bar'])
    o = it.next()
    eq_('b', o['foo'])
    eq_(None, o['bar'])
    # access attributes
    it = iter(actual)
    o = it.next()
    eq_('a', o.foo)
    eq_(1, o.bar)
    o = it.next()
    eq_('b', o.foo)
    eq_(None, o.bar)
Example #22
0
 def load_to_warehouse(self, db_info):
     connection = pymysql.connect(
         host=db_info['host'],
         user=db_info['user'],
         password=db_info['passwd'],
         db=db_info['db'],
     )
     connection.cursor().execute('SET SQL_MODE=ANSI_QUOTES')
     for table in self.UPDATE_ORDER:
         data = self.etl_table.TABLES[table]()
         print(f'Loading {table}...\n{data}')
         columns = ','.join(etl.header(data))
         values = ','.join(['%s'] * len(etl.header(data)))
         duplicate_updates = ','.join([
             f'{column} = VALUES({column})' for column in etl.header(data)
         ])
         query = f"INSERT {table} ({columns}) VALUES ({values}) ON DUPLICATE KEY UPDATE {duplicate_updates};"
         print(query)
         connection.cursor().executemany(query, etl.records(data))
     connection.close()
def main(argv):
    global full_name2sk_indiv_id

    parser = argparse.ArgumentParser()
    parser.add_argument("--attendance-filename", required=True, nargs='+', action='append', \
        help="Attendance filename (input Servant Keeper attendance report file(s)...can be wildcard)")
    parser.add_argument("--mapping-filename", required=True, help="'Mapping' filename (CSV mapping file with " \
        "'Last Name', 'Preferred Name' and 'Individual ID' Servant Keeper data columns)")
    args = parser.parse_args()

    # Load up mapping matrix to map from Servant Keeper full_name's to Servant Keeper individual_id's
    full_name2sk_indiv_id = {}
    dup_names = {}
    ids_in_dups = {}
    with open(args.mapping_filename, 'rb') as csvfile:
        csvreader = csv.reader(csvfile)
        for row in csvreader:
            key = row[0] + ', ' + row[1]
            if key in full_name2sk_indiv_id:
                if not key in dup_names:
                    dup_names[key] = [full_name2sk_indiv_id[key]]
                    ids_in_dups[full_name2sk_indiv_id[key]] = key
                dup_names[key].append(row[2])
                ids_in_dups[row[2]] = key
            else:
                full_name2sk_indiv_id[key] = row[2]

    attendance_table = join_tables(args.attendance_filename[0], None, False)

    #print dup_names
    #print attendance_table

    reported_dups = set()
    for row in petl.records(attendance_table):
        if row['Individual ID'] in ids_in_dups and ids_in_dups[
                row['Individual ID']] not in reported_dups:
            print 'Duplicate name: ' + ids_in_dups[row['Individual ID']]
            reported_dups.add(ids_in_dups[row['Individual ID']])

    sys.stdout.flush()
    sys.stderr.flush()
Example #24
0
    def get_context_data(self, **kwargs):
        context = super().get_context_data(**kwargs)
        context["filename"] = self.object.downloaded_file.name.split(os.path.sep)[-1]
        context["count_query_kwarg"] = self.count_query_kwarg

        table = petl.fromcsv(self.object.downloaded_file)
        context["header"] = petl.header(table)

        try:
            record_count_to_show = int(self.request.GET.get(self.count_query_kwarg))
        except (TypeError, ValueError):
            record_count_to_show = self.count_increment

        # Potentially expensive, cache / save in database for dataset
        if petl.nrows(table) > record_count_to_show:
            context[
                "load_more_url"
            ] = f"{self.request.path}?{self.count_query_kwarg}={record_count_to_show+self.count_increment}"

        context["rows"] = petl.records(petl.head(table, record_count_to_show))

        return context
def build_cn_raster(landcover_raster,
                    lookup_csv,
                    soils_polygon,
                    soils_hydrogroup_field="SOIL_HYDRO",
                    reference_raster=None,
                    out_cn_raster=None):
    """Build a curve number raster from landcover raster, soils polygon, and a crosswalk between 
    landcover classes, soil hydro groups, and curve numbers.

    :param lookup_csv: [description]
    :type lookup_csv: [type]
    :param landcover_raster: [description]
    :type landcover_raster: [type]
    :param soils_polygon: polygon containing soils with a hydro classification. 
    :type soils_polygon: [type]
    :param soils_hydrogroup_field: [description], defaults to "SOIL_HYDRO" (from the NCRS soils dataset)
    :type soils_hydrogroup_field: str, optional
    :param out_cn_raster: [description]
    :type out_cn_raster: [type]    
    """

    # GP Environment ----------------------------
    msg("Setting up GP Environment...")
    # if reference_raster is provided, we use it to set the GP environment for
    # subsequent raster operations
    if reference_raster:
        if not isinstance(reference_raster, Raster):
            # read in the reference raster as a Raster object.
            reference_raster = Raster(reference_raster)
    else:
        reference_raster = Raster(landcover_raster)

    # set the snap raster, cell size, and extent, and coordinate system for subsequent operations
    env.snapRaster = reference_raster
    env.cellSize = reference_raster.meanCellWidth
    env.extent = reference_raster
    env.outputCoordinateSystem = reference_raster

    cs = env.outputCoordinateSystem.exportToString()

    # SOILS -------------------------------------

    msg("Processing Soils...")
    # read the soils polygon into a raster, get list(set()) of all cell values from the landcover raster
    soils_raster_path = so("soils_raster")
    PolygonToRaster_conversion(soils_polygon, soils_hydrogroup_field,
                               soils_raster_path, "CELL_CENTER")
    soils_raster = Raster(soils_raster_path)

    # use the raster attribute table to build a lookup of raster values to soil hydro codes
    # from the polygon (that were stored in the raster attribute table after conversion)
    if not soils_raster.hasRAT:
        msg("Soils raster does not have an attribute table. Building...",
            "warning")
        BuildRasterAttributeTable_management(soils_raster, "Overwrite")
    # build a 2D array from the RAT
    fields = ["Value", soils_hydrogroup_field]
    rows = [fields]
    # soils_raster_table = MakeTableView_management(soils_raster_path)
    with SearchCursor(soils_raster_path, fields) as sc:
        for row in sc:
            rows.append([row[0], row[1]])
    # turn that into a dictionary, where the key==soil hydro text and value==the raster cell value
    lookup_from_soils = {v: k for k, v in etl.records(rows)}
    # also capture a list of just the values, used to iterate conditionals later
    soil_values = [v['Value'] for v in etl.records(rows)]

    # LANDCOVER ---------------------------------
    msg("Processing Landcover...")
    if not isinstance(landcover_raster, Raster):
        # read in the reference raster as a Raster object.
        landcover_raster_obj = Raster(landcover_raster)
    landcover_values = []
    with SearchCursor(landcover_raster, ["Value"]) as sc:
        for row in sc:
            landcover_values.append(row[0])

    # LOOKUP TABLE ------------------------------
    msg("Processing Lookup Table...")
    # read the lookup csv, clean it up, and use the lookups from above to limit it to just
    # those values in the rasters
    t = etl\
        .fromcsv(lookup_csv)\
        .convert('utc', int)\
        .convert('cn', int)\
        .select('soil', lambda v: v in lookup_from_soils.keys())\
        .convert('soil', lookup_from_soils)\
        .select('utc', lambda v: v in landcover_values)

    # This gets us a table where we the landcover class (as a number) corresponding to the
    # correct value in the converted soil raster, with the corresponding curve number.

    # DETERMINE CURVE NUMBERS -------------------
    msg("Assigning Curve Numbers...")
    # Use that to reassign cell values using conditional map algebra operations
    cn_rasters = []
    for rec in etl.records(t):
        cn_raster_component = Con(
            (landcover_raster_obj == rec.utc) & (soils_raster == rec.soil),
            rec.cn, 0)
        cn_rasters.append(cn_raster_component)

    cn_raster = CellStatistics(cn_rasters, "MAXIMUM")

    # REPROJECT THE RESULTS -------------------
    msg("Reprojecting and saving the results....")
    if not out_cn_raster:
        out_cn_raster = so("cn_raster", "random", "in_memory")

    ProjectRaster_management(in_raster=cn_raster,
                             out_raster=out_cn_raster,
                             out_coor_system=cs,
                             resampling_type="NEAREST",
                             cell_size=env.cellSize)

    # cn_raster.save(out_cn_raster)
    return out_cn_raster
Example #26
0
start_time = time.time()

# connect to mysql
mysql_engine = create_engine('mysql+mysqldb://' + mysql_user + ':' + mysql_pw +
                             '@' + mysql_host + '/' + mysql_db)
mysql_engine.connect().execute('SET SQL_MODE=ANSI_QUOTES')

# connect to sqlite
sqlite_engine = create_engine('sqlite:///' + sqlite_path)

# migrate tables
for table in tables:
    print(table)
    data = etl.fromdb(sqlite_engine, 'select * from ' + table)
    if table == 'employees':
        recs = etl.records(data)
        emails = []
        for rec in recs:
            emails.append(rec['first_name'] + '.' + rec['last_name'] +
                          '@mycompany.com')
        data2 = etl.addcolumn(data, 'email', emails)
    else:
        data2 = data
    etl.todb(data2, mysql_engine, table, create=True)

# load CSV file
data = etl.fromcsv(source=socialmedia_csv)
recs = etl.records(data)
# determine employee numbers
empnos = []
for rec in recs:
Example #27
0
    def write(self,
              rows,
              from_srid=None,
              buffer_size=DEFAULT_WRITE_BUFFER_SIZE):
        """
        Inserts dictionary row objects in the the database
        Args: list of row dicts, table name, ordered field names

        This doesn't currently use petl.todb for a few reasons:
            - petl uses executemany which isn't intended for speed (basically
              the equivalent of running many insert statements)
            - calls to DB functions like ST_GeomFromText end up getting quoted;
              not sure how to disable this.
        """

        # Get fields from the row because some fields from self.fields may be
        # optional, such as autoincrementing integers.
        # raise
        fields = rows.header()
        geom_field = self.geom_field

        # convert rows to records (hybrid objects that can behave like dicts)
        rows = etl.records(rows)

        # Get geom metadata
        if geom_field:
            srid = from_srid or self.get_srid()
            row_geom_type = re.match('[A-Z]+', rows[0][geom_field]).group() \
                if geom_field else None
            table_geom_type = self.geom_type if geom_field else None

        # Do we need to cast the geometry to a MULTI type? (Assuming all rows
        # have the same geom type.)
        if geom_field:
            if self.geom_type.startswith('MULTI') and \
                not row_geom_type.startswith('MULTI'):
                multi_geom = True
            else:
                multi_geom = False

        # Make a map of non geom field name => type
        type_map = OrderedDict()
        for field in fields:
            try:
                type_map[field] = [
                    x['type'] for x in self.metadata if x['name'] == field
                ][0]
            except IndexError:
                raise ValueError('Field `{}` does not exist'.format(field))
        type_map_items = type_map.items()

        fields_joined = ', '.join(fields)
        stmt = "INSERT INTO {} ({}) VALUES ".format(self.name, fields_joined)

        len_rows = len(rows)
        if buffer_size is None or len_rows < buffer_size:
            iterations = 1
        else:
            iterations = int(len_rows / buffer_size)
            iterations += (len_rows % buffer_size > 0)  # round up

        execute = self.db.cursor.execute
        commit = self.db.dbo.commit

        # Make list of value lists
        val_rows = []
        cur_stmt = stmt

        # DEBUG
        import psycopg2

        for i, row in enumerate(rows):
            val_row = []
            for field, type_ in type_map_items:
                if type_ == 'geometry':
                    geom = row[geom_field]
                    val = self._prepare_geom(geom, srid, multi_geom=multi_geom)
                    val_row.append(val)

                else:
                    val = self.prepare_val(row[field], type_)
                    val_row.append(val)
            val_rows.append(val_row)

            # check if it's time to ship a chunk
            if i % buffer_size == 0:
                # Execute
                vals_joined = [
                    '({})'.format(', '.join(vals)) for vals in val_rows
                ]
                rows_joined = ', '.join(vals_joined)
                cur_stmt += rows_joined
                try:
                    execute(cur_stmt)
                except psycopg2.ProgrammingError:
                    print(self.db.cursor.query)
                    raise
                commit()

                val_rows = []
                cur_stmt = stmt

        # Execute remaining rows (TODO clean this up)
        vals_joined = ['({})'.format(', '.join(vals)) for vals in val_rows]
        rows_joined = ', '.join(vals_joined)
        cur_stmt += rows_joined
        execute(cur_stmt)
        commit()
Example #28
0
# namedtuples()
###############

import petl as etl
table = [['foo', 'bar'], ['a', 1], ['b', 2]]
d = etl.namedtuples(table)
d
list(d)

# records()
###############

import petl as etl
table = [['foo', 'bar'], ['a', 1], ['b', 2]]
d = etl.records(table)
d
list(d)

# rowgroupby()
##############

import petl as etl
table1 = [['foo', 'bar', 'baz'], ['a', 1, True], ['b', 3, True], ['b', 2]]
# group entire rows
for key, group in etl.rowgroupby(table1, 'foo'):
    print(key, list(group))

# group specific values
for key, group in etl.rowgroupby(table1, 'foo', 'bar'):
    print(key, list(group))
def gather_name_by_indiv_id(table):
    sk_indiv_id2name = {}
    for row in petl.records(table):
        sk_indiv_id2name[row[
            'Individual ID']] = row['Preferred Name'] + ' ' + row['Last Name']
    return sk_indiv_id2name
def gather_name_by_indiv_id(table):
    sk_indiv_id2name = {}
    for row in petl.records(table):
        sk_indiv_id2name[row['Individual ID']] = row['Preferred Name'] + ' ' + row['Last Name']
    return sk_indiv_id2name
Example #31
0
import petl as etl

table = [["foo", "bar"], ["a", 1], ["b", 2]]
d = etl.namedtuples(table)
d
list(d)


# records()
###############

import petl as etl

table = [["foo", "bar"], ["a", 1], ["b", 2]]
d = etl.records(table)
d
list(d)


# rowgroupby()
##############

import petl as etl

table1 = [["foo", "bar", "baz"], ["a", 1, True], ["b", 3, True], ["b", 2]]
# group entire rows
for key, group in etl.rowgroupby(table1, "foo"):
    print(key, list(group))

# group specific values