def split_table_by_number(xlsTable, row_number, output, sheetName=None, sheetIndex=None): """ Split a table by row number Given a number of rows, this method will split an input table in several tables with a number of rows equal to row_number. TODO: Do it with Pandas """ import xlrd import xlwt from gasp.mng.fld.xls import columns_by_order from gasp.fm import tbl_to_obj COLUMNS_ORDER = columns_by_order(xlsTable, sheet_name=sheetName, sheet_index=sheetIndex) DATA = tbl_to_obj(xlsTable, sheet=sheetIndex if sheetIndex else sheetName, output='array') # Create output out_xls = xlwt.Workbook() l = 1 s = 1 base = sheetName if sheetName else 'data' for row in DATA: if l == 1: sheet = out_xls.add_sheet('{}_{}'.format(base, s)) # Write Columns for col in range(len(COLUMNS_ORDER)): sheet.write(0, col, COLUMNS_ORDER[col]) for col in range(len(COLUMNS_ORDER)): sheet.write(l, col, row[COLUMNS_ORDER[col]]) l += 1 if l == row_number + 1: l = 1 s += 1 # Save result out_xls.save(output)
def merge_feat(shps, outShp, api="ogr2ogr"): """ Get all features in several Shapefiles and save them in one file """ if api == "ogr2ogr": from gasp import exec_cmd from gasp.prop.ff import drv_name out_drv = drv_name(outShp) # Create output and copy some features of one layer (first in shps) cmdout = exec_cmd('ogr2ogr -f "{}" {} {}'.format( out_drv, outShp, shps[0] )) # Append remaining layers lcmd = [exec_cmd( 'ogr2ogr -f "{}" -update -append {} {}'.format( out_drv, outShp, shps[i] ) ) for i in range(1, len(shps))] elif api == 'pandas': """ Merge SHP using pandas """ from gasp.fm import tbl_to_obj from gasp.to.shp import df_to_shp if type(shps) != list: raise ValueError('shps should be a list with paths for Feature Classes') dfs = [tbl_to_obj(shp) for shp in shps] result = dfs[0] for df in dfs[1:]: result = result.append(df, ignore_index=True, sort=True) df_to_shp(result, outShp) else: raise ValueError( "{} API is not available" ) return outShp
def tbl_to_tbl(inTbl, outTbl, inSheet=None, txtDelimiter=None, inTxtDelimiter=None, inEncoding='utf-8'): """ Convert data format """ from gasp.fm import tbl_to_obj data = tbl_to_obj(inTbl, sheet=inSheet, encoding_=inEncoding, _delimiter=inTxtDelimiter) outTbl = obj_to_tbl(data, outTbl, delimiter=txtDelimiter) return outTbl
def predict_fm_mdl(mdlFile, vFile, data, txtCol, method='NaiveBayes'): """ Text classification using file with fit data """ from joblib import load import pandas as pd from gasp.fm import tbl_to_obj classDf = tbl_to_obj(data) if type(data) != pd.DataFrame else data classDf = classDf[pd.notnull(classDf[txtCol])] clf = load(mdlFile) tvect = None if not vFile else load(vFile) if method == 'NaiveBayes': result = clf.predict(tvect.transform(data[txtCol])) data.loc[:, 'classification'] = result elif method == 'LinearSupportVectorMachine': feaTst = tvect.transform(classDf[txtCol]) y_pred = clf.predict(feaTst) data.loc[:, 'classification'] = y_pred elif method == 'RandomForest': feaTst = tvect.transform(classDf[txtCol]) y_pred=clf.predict(feaTst) data.loc[:, 'classification'] = y_pred elif method == 'LogisticRegression': y_pred = clf.predict(classDf[txtCol]) data.loc[:, 'classification'] = y_pred return data
def same_attr_to_shp(inShps, interestCol, outFolder, basename="data_", resultDict=None): """ For several SHPS with the same field, this program will list all values in such field and will create a new shp for all values with the respective geometry regardeless the origin shp. """ import os from gasp import goToList from gasp.fm import tbl_to_obj from gasp.mng.gen import merge_df from gasp.to.shp import df_to_shp EXT = os.path.splitext(inShps[0])[1] shpDfs = [tbl_to_obj(shp) for shp in inShps] DF = merge_df(shpDfs, ignIndex=True) #DF.dropna(axis=0, how='any', inplace=True) uniqueVal = DF[interestCol].unique() nShps = [] if not resultDict else {} for val in uniqueVal: ndf = DF[DF[interestCol] == val] KEY = str(val).split('.')[0] if '.' in str(val) else str(val) nshp = df_to_shp(ndf, os.path.join( outFolder, '{}{}{}'.format(basename, KEY, EXT) )) if not resultDict: nShps.append(nshp) else: nShps[KEY] = nshp return nShps