Esempio n. 1
0
def split_table_by_number(xlsTable,
                          row_number,
                          output,
                          sheetName=None,
                          sheetIndex=None):
    """
    Split a table by row number
    
    Given a number of rows, this method will split an input table
    in several tables with a number of rows equal to row_number.
    
    TODO: Do it with Pandas
    """

    import xlrd
    import xlwt
    from gasp.mng.fld.xls import columns_by_order
    from gasp.fm import tbl_to_obj

    COLUMNS_ORDER = columns_by_order(xlsTable,
                                     sheet_name=sheetName,
                                     sheet_index=sheetIndex)

    DATA = tbl_to_obj(xlsTable,
                      sheet=sheetIndex if sheetIndex else sheetName,
                      output='array')

    # Create output
    out_xls = xlwt.Workbook()

    l = 1
    s = 1
    base = sheetName if sheetName else 'data'
    for row in DATA:
        if l == 1:
            sheet = out_xls.add_sheet('{}_{}'.format(base, s))

            # Write Columns
            for col in range(len(COLUMNS_ORDER)):
                sheet.write(0, col, COLUMNS_ORDER[col])

        for col in range(len(COLUMNS_ORDER)):
            sheet.write(l, col, row[COLUMNS_ORDER[col]])

        l += 1

        if l == row_number + 1:
            l = 1
            s += 1

    # Save result
    out_xls.save(output)
Esempio n. 2
0
def merge_feat(shps, outShp, api="ogr2ogr"):
    """
    Get all features in several Shapefiles and save them in one file
    """
    
    if api == "ogr2ogr":
        from gasp         import exec_cmd
        from gasp.prop.ff import drv_name
        
        out_drv = drv_name(outShp)
        
        # Create output and copy some features of one layer (first in shps)
        cmdout = exec_cmd('ogr2ogr -f "{}" {} {}'.format(
            out_drv, outShp, shps[0]
        ))
        
        # Append remaining layers
        lcmd = [exec_cmd(
            'ogr2ogr -f "{}" -update -append {} {}'.format(
                out_drv, outShp, shps[i]
            )
        ) for i in range(1, len(shps))]
    
    elif api == 'pandas':
        """
        Merge SHP using pandas
        """
        
        from gasp.fm     import tbl_to_obj
        from gasp.to.shp import df_to_shp
        
        if type(shps) != list:
            raise ValueError('shps should be a list with paths for Feature Classes')
        
        dfs = [tbl_to_obj(shp) for shp in shps]
        
        result = dfs[0]
        
        for df in dfs[1:]:
            result = result.append(df, ignore_index=True, sort=True)
        
        df_to_shp(result, outShp)
    
    else:
        raise ValueError(
            "{} API is not available"
        )
    
    return outShp
Esempio n. 3
0
def tbl_to_tbl(inTbl,
               outTbl,
               inSheet=None,
               txtDelimiter=None,
               inTxtDelimiter=None,
               inEncoding='utf-8'):
    """
    Convert data format
    """

    from gasp.fm import tbl_to_obj

    data = tbl_to_obj(inTbl,
                      sheet=inSheet,
                      encoding_=inEncoding,
                      _delimiter=inTxtDelimiter)

    outTbl = obj_to_tbl(data, outTbl, delimiter=txtDelimiter)

    return outTbl
Esempio n. 4
0
File: cls.py Progetto: jasp382/gasp
def predict_fm_mdl(mdlFile, vFile, data, txtCol, method='NaiveBayes'):
    """
    Text classification using file with fit data
    """
    
    from joblib import load
    import pandas as pd
    from gasp.fm import tbl_to_obj
    
    classDf = tbl_to_obj(data) if type(data) != pd.DataFrame else data
    classDf = classDf[pd.notnull(classDf[txtCol])]
    
    clf   = load(mdlFile)
    tvect = None if not vFile else load(vFile)
    
    if method == 'NaiveBayes':
        result = clf.predict(tvect.transform(data[txtCol]))
        
        data.loc[:, 'classification'] = result
    
    elif method == 'LinearSupportVectorMachine':
        feaTst = tvect.transform(classDf[txtCol])
        
        y_pred = clf.predict(feaTst)
        
        data.loc[:, 'classification'] = y_pred
    
    elif method == 'RandomForest':
        feaTst = tvect.transform(classDf[txtCol])
        
        y_pred=clf.predict(feaTst)
        
        data.loc[:, 'classification'] = y_pred
    
    elif method == 'LogisticRegression':
        y_pred = clf.predict(classDf[txtCol])
        
        data.loc[:, 'classification'] = y_pred
    
    return data
Esempio n. 5
0
def same_attr_to_shp(inShps, interestCol, outFolder, basename="data_",
                     resultDict=None):
    """
    For several SHPS with the same field, this program will list
    all values in such field and will create a new shp for all
    values with the respective geometry regardeless the origin shp.
    """
    
    import os
    from gasp         import goToList
    from gasp.fm      import tbl_to_obj
    from gasp.mng.gen import merge_df
    from gasp.to.shp  import df_to_shp
    
    EXT = os.path.splitext(inShps[0])[1]
    
    shpDfs = [tbl_to_obj(shp) for shp in inShps]
    
    DF = merge_df(shpDfs, ignIndex=True)
    #DF.dropna(axis=0, how='any', inplace=True)
    
    uniqueVal = DF[interestCol].unique()
    
    nShps = [] if not resultDict else {}
    for val in uniqueVal:
        ndf = DF[DF[interestCol] == val]
        
        KEY = str(val).split('.')[0] if '.' in str(val) else str(val)
        
        nshp = df_to_shp(ndf, os.path.join(
            outFolder, '{}{}{}'.format(basename, KEY, EXT)
        ))
        
        if not resultDict:
            nShps.append(nshp)
        else:
            nShps[KEY] = nshp
    
    return nShps