Example #1
0
def split_table_by_number(xlsTable, row_number, output,
                          sheetName=None, sheetIndex=None):
    """
    Split a table by row number
    
    Given a number of rows, this method will split an input table
    in several tables with a number of rows equal to row_number.
    
    TODO: Do it with Pandas
    """
    
    import xlrd;          import xlwt
    from gasp.fm          import tbl_to_obj
    from gasp.pyt.xls.fld import col_name
    
    COLUMNS_ORDER = col_name(
        xlsTable, sheet_name=sheetName, sheet_index=sheetIndex
    )
    
    DATA = tbl_to_obj(xlsTable,
        sheet=sheetIndex if sheetIndex else sheetName, output='array'
    )
    
    # Create output
    out_xls = xlwt.Workbook()
    
    l = 1
    s = 1
    base = sheetName if sheetName else 'data'
    for row in DATA:
        if l == 1:
            sheet = out_xls.add_sheet('{}_{}'.format(base, s))
            
            # Write Columns
            for col in range(len(COLUMNS_ORDER)):
                sheet.write(0, col, COLUMNS_ORDER[col])
        
        for col in range(len(COLUMNS_ORDER)):
            sheet.write(l, col, row[COLUMNS_ORDER[col]])
        
        l += 1
        
        if l == row_number + 1:
            l = 1
            s += 1
    
    # Save result
    out_xls.save(output)
Example #2
0
def export_cells_not_in(inTable, noTable, outTable, inSheet, noSheet, inFID,
                        noFID):
    """
    Export to a new file the cells of in Table not in noTable
    """

    import xlrd
    import xlwt
    from gasp.fm import tbl_to_obj
    from gasp.pyt.xls.fld import col_name, get_columns_position
    from gasp.pyt.xls.summ import list_unique_values_column

    # TODO: check if tables are xls

    # Get Data
    inData = tbl_to_obj(inTable, sheet=inSheet, output='array')
    COLUMNS = col_name(inTable, sheet_name=inSheet)

    # From noDATA, get IDS that will not be in the outTable
    noXls = xlrd.open_workbook(noTable)
    _noSheet = noXls.sheet_by_name(noSheet)
    colsPosition = get_columns_position(_noSheet, noFID)
    noFIDS = list_unique_values_column(_noSheet, colsPosition[noFID])

    # Create Output
    out_xls = xlwt.Workbook()
    new_sheet = out_xls.add_sheet(inSheet)

    # Write columns titles
    for c in range(len(COLUMNS)):
        new_sheet.write(0, c, COLUMNS[c])

    # Write data not in noData
    l = 1
    for row in inData:
        if row[inFID] not in noFIDS:
            c = 0
            for col in COLUMNS:
                new_sheet.write(l, c, row[col])
                c += 1

            l += 1

    out_xls.save(outTable)

    return outTable
Example #3
0
def cols_name(ff, sheetName=None, sheetIdx=None):
    """
    Get Columns Name in File, regardeless the type of file
    """
    
    from gasp.pyt.oss import fprop
    
    fFormat = fprop(ff, 'ff')
    
    if fFormat == '.xlsx' or fFormat == '.xls':
        from gasp.pyt.xls.fld import col_name
        
        cols = col_name(ff, sheet_name=sheetName, sheet_index=sheetIdx)
    
    else:
        raise ValueError(
            'File format is not valid!'
        )
    
    return cols
Example #4
0
def join_xls_table(main_table,
                   fid_main,
                   join_table,
                   fid_join,
                   copy_fields,
                   out_table,
                   main_sheet=None,
                   join_sheet=None):
    """
    Join tables using a commum attribute
    
    Relations:
    - 1 to 1
    - N to 1
    
    TODO: Use Pandas Instead
    """

    import xlwt
    from gasp.fm import tbl_to_obj
    from gasp.pyt.xls.fld import col_name

    copy_fields = [copy_fields] if type(copy_fields) == str else \
        copy_fields if type(copy_fields) == list else None

    if not copy_fields:
        raise ValueError('copy_fields should be a list or a string')

    # main_table to dict
    mainData = tbl_to_obj(main_table,
                          sheet=main_sheet,
                          useFirstColAsIndex=True,
                          output='dict')

    # join table to dict
    joinData = tbl_to_obj(join_table,
                          sheet=join_sheet,
                          useFirstColAsIndex=True,
                          output='dict')

    # write output data
    out_sheet_name = 'data' if not main_sheet and not join_sheet else join_sheet \
        if join_sheet and not main_sheet else main_sheet

    out_xls = xlwt.Workbook()
    new_sheet = out_xls.add_sheet(out_sheet_name)

    # Write tiles
    COLUMNS_ORDER = col_name(main_table, sheet_name=main_sheet)

    TITLES = COLUMNS_ORDER + copy_fields
    for i in range(len(TITLES)):
        new_sheet.write(0, i, TITLES[i])

    # parse data
    l = 1
    for fid in mainData:
        new_sheet.write(l, 0, fid)

        c = 1
        for col in COLUMNS_ORDER[1:]:
            new_sheet.write(l, c, mainData[fid][col])
            c += 1

        for col in copy_fields:
            if fid in joinData:
                new_sheet.write(l, c, joinData[fid][col])
            c += 1

        l += 1

    out_xls.save(out_table)
Example #5
0
	
	interest_columns = [interest_columns] if type(interest_columns) == str else \
		interest_columns if type(interest_columns) == list else None
	
	if not interest_columns:
		raise ValueError(
			'interest_columns should be a list or a string'
		)
	
	# XLS data to dict
	data = tbl_to_obj(
        xls_path, sheet_name=sheet, useFirstColAsIndex=True, output='dict'
    )
	
	# Get Order Values
	COLUMNS_BY_ORDER = col_name(xls_path, sheet_name=sheet)
	
	# Store and map changes
	changes = {}
	
	# Replace values
	for fid in data:
		for col in interest_columns:
			if charToReplace in data[fid][col]:
				repObj = data[fid][col].replace(charToReplace, _replacement)
				data[fid][col] = repObj
								
				if fid not in changes:
					changes[fid] = {col : data[fid][col]}
				else:
					changes[fid][col].update({col: data[fid][col]})
Example #6
0
File: fm.py Project: jasp382/gasp
def xlstimedelta_to_pddf(inXls,
                         timecol,
                         sheet_name=None,
                         sheet_index=None,
                         columnsToMantain=None):
    """
    Convert a table with a column with timedelta values to a valid 
    Pandas DataFrame
    """

    import datetime
    import xlrd
    from xlrd import xldate_as_tuple
    from gasp.pyt import obj_to_lst
    from gasp.pyt.xls.sheet import get_sheet_obj
    from gasp.pyt.xls.fld import col_name, get_columns_position

    __xls = xlrd.open_workbook(inXls)
    sheet = get_sheet_obj(__xls, name=sheet_name, index=sheet_index)

    # Get Cols name
    COLS_NAME = col_name(sheet) if not columnsToMantain else \
        obj_to_lst(columnsToMantain)

    if type(timecol) != int:
        if timecol not in COLS_NAME:
            COLS_NAME.append(timecol)

    # Get Cols position
    COLS_POS = get_columns_position(sheet, COLS_NAME)
    POS_COLS = COLS_POS.values()

    if type(timecol) == int:
        COL_TIME_POSITION = timecol
    else:
        COL_TIME_POSITION = COLS_POS[timecol]

    data = []
    for row in range(1, sheet.nrows):
        l_col = []
        for col in range(sheet.ncols):
            if col not in POS_COLS:
                continue

            if col == COL_TIME_POSITION:
                tt = xldate_as_tuple(
                    sheet.cell(row, col).value, __xls.datemode)

                l_col.append(
                    datetime.timedelta(weeks=tt[1],
                                       days=tt[2],
                                       hours=tt[3],
                                       minutes=tt[4],
                                       seconds=tt[5]))

            else:
                l_col.append(sheet.cell(row, col).value)

        data.append(l_col)

    df = pandas.DataFrame(data, columns=COLS_NAME)

    return df