Beispiel #1
0
def _read_placement(excel_data, sheet):
    """
    @type excel_data: ExcelData
    @type sheet: xlrd.sheet.Sheet
    """
    placements = []
    for col in range(number_of_good_cols(sheet)):
        placement_name = sheet.cell(0, col).value

        positions = []
        position = []
        for cell in sheet.col(col)[1:]:
            person = cell.value
            if person:
                is_fixed = person.startswith('*')
                if is_fixed:
                    person = person[1:]
                position.append([person, is_fixed])
            else:
                if position:
                    positions.append(position)
                position = []
        if position:
            positions.append(position)

        placements.append((placement_name, positions))

    excel_data.placements = placements
Beispiel #2
0
def _read_placement(excel_data, sheet):
    """
    @type excel_data: ExcelData
    @type sheet: xlrd.sheet.Sheet
    """
    placements = []
    for col in range(number_of_good_cols(sheet)):
        placement_name = sheet.cell(0, col).value

        positions = []
        position = []
        for cell in sheet.col(col)[1:]:
            person = cell.value
            if person:
                is_fixed = person.startswith('*')
                if is_fixed:
                    person = person[1:]
                position.append([person, is_fixed])
            else:
                if position:
                    positions.append(position)
                position = []
        if position:
            positions.append(position)

        placements.append((placement_name, positions))

    excel_data.placements = placements
Beispiel #3
0
def _read_groups(excel_data, sheet):
    """
    @type excel_data: ExcelData
    @type sheet: xlrd.sheet.Sheet
    """

    names = []
    for name in sheet.col(0)[1:]:
        names.append(name.value)

    group_names = []
    for col in range(1, number_of_good_cols(sheet, nrows=1)):
        group_names.append(sheet.cell(0, col).value)

    groups_by_group_name = {group_name: [] for group_name in group_names}

    for row in range(1, number_of_good_rows(sheet, ncols=1)):
        name = sheet.cell(row, 0).value
        for i, group_name in enumerate(group_names):
            if sheet.cell(row, i + 1).value:
                groups_by_group_name[group_name].append(name)

    excel_data.names = names
    excel_data.groups = [(group_name, groups_by_group_name[group_name])
                         for group_name in group_names]
Beispiel #4
0
 def extractMarkingFromXLS(self, mrkFileName):
     '''
     Process the current Excel file to extract the marking annotation
     and write the output to mrkFileName
     '''
     outputHandle = bz2.BZ2File(
         mrkFileName +
         ".bz2", 'wb', compresslevel=9) if config.isCompress() else open(
             mrkFileName, "w")
     for n in range(self.workbook.nsheets):
         sheet = self.workbook.sheet_by_index(n)
         colns = number_of_good_cols(sheet)
         rowns = number_of_good_rows(sheet)
         self.log.debug("Process %d columns and %d rows" % (colns, rowns))
         for i in range(0, rowns):
             for j in range(0, colns):
                 cell = sheet.cell(i, j)
                 cell_name = cellname(i, j)
                 style_name = self.styles[cell].name
                 # print self.styles[cell].xf
                 if style_name.startswith('TL '):
                     mrk_line = "%d;%s;%s" % (n, cell_name, style_name)
                     outputHandle.write(mrk_line)
                     outputHandle.write("\n")
     outputHandle.close()
Beispiel #5
0
    def __init__(self, data):
        self._mappings = {}

        self.excelFileName = data['file']
        predicate = URIRef(data['predicate'])
        mapping_type = data['mapping_type']

        # Load the mappings
        wb = open_workbook(data['path'] + "/" + self.excelFileName,
                           formatting_info=False,
                           on_demand=True)
        sheet = wb.sheet_by_index(0)
        colns = number_of_good_cols(sheet)
        rowns = number_of_good_rows(sheet)
        for i in range(1, rowns):
            # Get the context (first column)
            context = sheet.cell(i, 0).value

            # Get the string (force reading the cell as a string)
            literal = sheet.cell(i, 1).value
            if type(literal) == type(1.0):
                literal = str(int(literal))
            literal = clean_string(literal)

            # Get the values
            values = []
            for j in range(2, colns):
                value = sheet.cell(i, j).value
                if value != '':
                    # Codes using numbers need to be seen as string
                    if type(value) == type(1.0):
                        value = str(int(value))

                    # Encode the value
                    encoded_value = None
                    if mapping_type == 'uri':
                        prefix = data['prefix']
                        encoded_value = URIRef(prefix + value)
                    elif mapping_type == 'boolean':
                        isTrue = (value == '1' or value == 'true')
                        encoded_value = Literal(isTrue)
                    else:
                        encoded_value = Literal(value)

                    # Prefix the code and pair with predicate
                    pair = (predicate, encoded_value)
                    values.append(pair)

            if len(values) == 0:
                values = None

            # Save the mapping
            self._mappings.setdefault(literal, {})
            if context != '':
                # Store the specific context
                self._mappings[literal].setdefault('context', {})
                self._mappings[literal]['context'][context] = values
            else:
                # Store the default mappings
                self._mappings[literal]['default'] = values
Beispiel #6
0
 def __init__(self, data):
     self._mappings = {}
     
     self.excelFileName = data['file']
     predicate = URIRef(data['predicate'])
     mapping_type = data['mapping_type']
     
     # Load the mappings
     wb = open_workbook(data['path'] + "/" + self.excelFileName, formatting_info=False, on_demand=True)
     sheet = wb.sheet_by_index(0)
     colns = number_of_good_cols(sheet)
     rowns = number_of_good_rows(sheet)
     for i in range(1, rowns):
         # Get the context (first column)
         context = sheet.cell(i, 0).value
         
         # Get the string (force reading the cell as a string)
         literal = sheet.cell(i, 1).value
         if type(literal) == type(1.0):
             literal = str(int(literal))
         literal = clean_string(literal)
         
         # Get the values
         values = []
         for j in range(2, colns):
             value = sheet.cell(i, j).value
             if value != '':
                 # Codes using numbers need to be seen as string
                 if type(value) == type(1.0):
                     value = str(int(value))
                     
                 # Encode the value
                 encoded_value = None
                 if mapping_type == 'uri':
                     prefix = data['prefix']
                     encoded_value = URIRef(prefix + value)
                 elif mapping_type == 'boolean':
                     isTrue = (value == '1' or value == 'true')
                     encoded_value = Literal(isTrue)
                 else:
                     encoded_value = Literal(value)
                     
                 # Prefix the code and pair with predicate
                 pair = (predicate, encoded_value)
                 values.append(pair)
                 
         if len(values) == 0:
             values = None
             
         # Save the mapping
         self._mappings.setdefault(literal, {})
         if context != '':
             # Store the specific context
             self._mappings[literal].setdefault('context', {})
             self._mappings[literal]['context'][context] = values
         else:
             # Store the default mappings
             self._mappings[literal]['default'] = values 
Beispiel #7
0
    def injectMarkingIntoXLS(self, mrkFileName, targetFileName):
        '''
        Load marking instructions from mrkFileName and process the current
        Excel file to generate the annotated targetFileName
        '''
        # Prepare the output
        self.log.debug("Copy book")
        target_workbook = copy(self.workbook)
        #print Styles(target_workbook)

        self.log.debug("Load marking from %s" % mrkFileName)

        # Load marking information
        marking = {}
        for mrk in open(mrkFileName):
            (index_str, cell, style) = mrk.strip().split(';')
            index = int(index_str)
            marking.setdefault(index, {})
            marking[index][cell] = style

        # Create the styles in the target
        styles = {}
        for s in [('TL Metadata', 'yellow'), ('TL ColHeader', 'blue'),
                  ('TL Data', 'white'), ('TL RowHeader', 'yellow'),
                  ('TL RowProperty', 'green'), ('TL Title', 'pink'),
                  ('TL RowLabel', 'blue'), ('TL HRowHeader', 'blue')]:
            name, colour = s
            xf = xlwt.easyxf(
                'name: test; pattern: pattern solid, fore_colour %s;' % colour)
            styles[name] = xf
            print target_workbook.add_style(xf)
            #target_workbook.xf_list.append(xf)
            #index = len(target_workbook.xf_list)
            #target_workbook.style_name_map[name] = (0, index)

        # Process the source workbook
        for n in range(self.workbook.nsheets):
            sheet = self.workbook.sheet_by_index(n)
            target_sheet = target_workbook.get_sheet(n)
            colns = number_of_good_cols(sheet)
            rowns = number_of_good_rows(sheet)
            self.log.debug("Process %d columns and %d rows" % (colns, rowns))
            for i in range(0, rowns):
                for j in range(0, colns):
                    cell = sheet.cell(i, j)
                    cell_name = cellname(i, j)
                    #cell_xf_index = sheet.cell_xf_index(i, j)
                    #print cell_xf_index
                    if cell_name in marking[n]:
                        style = styles[marking[n][cell_name]]
                        # TODO Use matching style defined earlier
                        #print target_sheet
                        target_sheet.write(i, j, label=cell.value, style=style)

        target_workbook.save(targetFileName)
Beispiel #8
0
    def injectMarkingIntoXLS(self, mrkFileName, targetFileName):
        '''
        Load marking instructions from mrkFileName and process the current
        Excel file to generate the annotated targetFileName
        '''
        # Prepare the output
        self.log.debug("Copy book")
        target_workbook = copy(self.workbook)
        #print Styles(target_workbook)
        
        self.log.debug("Load marking from %s" % mrkFileName)
        
        # Load marking information
        marking = {}
        for mrk in open(mrkFileName):
            (index_str, cell, style) = mrk.strip().split(';')
            index = int(index_str)
            marking.setdefault(index, {})
            marking[index][cell] = style

        # Create the styles in the target
        styles = {}
        for s in [('TL Metadata', 'yellow'), ('TL ColHeader', 'blue'),
                  ('TL Data', 'white'), ('TL RowHeader', 'yellow'),
                  ('TL RowProperty', 'green'), ('TL Title', 'pink'),
                  ('TL RowLabel', 'blue'), ('TL HRowHeader', 'blue')]:
            name, colour = s
            xf = xlwt.easyxf('name: test; pattern: pattern solid, fore_colour %s;' % colour)
            styles[name] = xf
            print target_workbook.add_style(xf)
            #target_workbook.xf_list.append(xf)
            #index = len(target_workbook.xf_list)
            #target_workbook.style_name_map[name] = (0, index)
        
        # Process the source workbook
        for n in range(self.workbook.nsheets):
            sheet = self.workbook.sheet_by_index(n)
            target_sheet = target_workbook.get_sheet(n)
            colns = number_of_good_cols(sheet)
            rowns = number_of_good_rows(sheet)
            self.log.debug("Process %d columns and %d rows" % (colns, rowns))
            for i in range(0, rowns):
                for j in range(0, colns):
                    cell = sheet.cell(i, j)
                    cell_name = cellname(i, j)
                    #cell_xf_index = sheet.cell_xf_index(i, j)
                    #print cell_xf_index
                    if cell_name in marking[n]:
                        style = styles[marking[n][cell_name]]
                        # TODO Use matching style defined earlier
                        #print target_sheet
                        target_sheet.write(i, j, label=cell.value, style=style)
                        
        target_workbook.save(targetFileName)
Beispiel #9
0
def _read_tables(excel_data, sheet):
    """
    @type excel_data: ExcelData
    @type sheet: xlrd.sheet.Sheet
    """

    dimensions = []
    for row in range(0, number_of_good_rows(sheet, ncols=1)):
        name = sheet.cell(row, 0).value
        sizes = []
        for col in range(1, number_of_good_cols(sheet)):
            value = sheet.cell(row, col).value
            if value:
                size = int(value)
            else:
                break
            sizes.append(size)
        dimensions.append([name, sizes])
    excel_data.dimensions = dimensions
Beispiel #10
0
def _read_tables(excel_data, sheet):
    """
    @type excel_data: ExcelData
    @type sheet: xlrd.sheet.Sheet
    """

    dimensions = []
    for row in range(0, number_of_good_rows(sheet, ncols=1)):
        name = sheet.cell(row, 0).value
        sizes = []
        for col in range(1, number_of_good_cols(sheet)):
            value = sheet.cell(row, col).value
            if value:
                size = int(value)
            else:
                break
            sizes.append(size)
        dimensions.append([name, sizes])
    excel_data.dimensions = dimensions
Beispiel #11
0
 def getValidRowsCols(self) :
     """
     Determine the number of non-empty rows and columns in the Excel sheet
     
     Returns:
     rowns -- number of rows
     colns -- number of columns
     """
     colns = number_of_good_cols(self.r_sheet)
     rowns = number_of_good_rows(self.r_sheet)
     
     # Check whether the number of good columns and rows are correct
     while self.isEmptyRow(rowns-1, colns) :
         rowns = rowns - 1 
     while self.isEmptyColumn(colns-1, rowns) :
         colns = colns - 1
         
     self.log.debug('Number of rows with content:    {0}'.format(rowns))
     self.log.debug('Number of columns with content: {0}'.format(colns))
     return rowns, colns
Beispiel #12
0
 def extractMarkingFromXLS(self, mrkFileName):
     '''
     Process the current Excel file to extract the marking annotation
     and write the output to mrkFileName
     '''
     outputHandle = bz2.BZ2File(mrkFileName + ".bz2", 'wb', compresslevel=9) if config.isCompress() else open(mrkFileName, "w")
     for n in range(self.workbook.nsheets):
         sheet = self.workbook.sheet_by_index(n)
         colns = number_of_good_cols(sheet)
         rowns = number_of_good_rows(sheet)
         self.log.debug("Process %d columns and %d rows" % (colns, rowns))
         for i in range(0, rowns):
             for j in range(0, colns):
                 cell = sheet.cell(i, j)
                 cell_name = cellname(i, j)
                 style_name = self.styles[cell].name
                 # print self.styles[cell].xf
                 if style_name.startswith('TL '):
                     mrk_line = "%d;%s;%s" % (n, cell_name, style_name)
                     outputHandle.write(mrk_line)
                     outputHandle.write("\n")
     outputHandle.close()
Beispiel #13
0
def _read_groups(excel_data, sheet):
    """
    @type excel_data: ExcelData
    @type sheet: xlrd.sheet.Sheet
    """

    names = []
    for name in sheet.col(0)[1:]:
        names.append(name.value)

    group_names = []
    for col in range(1, number_of_good_cols(sheet, nrows=1)):
        group_names.append(sheet.cell(0, col).value)

    groups_by_group_name = {group_name: [] for group_name in group_names}

    for row in range(1, number_of_good_rows(sheet, ncols=1)):
        name = sheet.cell(row, 0).value
        for i, group_name in enumerate(group_names):
            if sheet.cell(row, i+1).value:
                groups_by_group_name[group_name].append(name)

    excel_data.names = names
    excel_data.groups = [(group_name, groups_by_group_name[group_name]) for group_name in group_names]
    Utility function to clean a string
    TODO speed this up
    """
    # Lower and remove new lines
    text_clean = text.lower().replace("\n", " ").replace("\r", " ")
    # Shrink spaces
    text_clean = re.sub(r"\s+", " ", text_clean)
    # Remove lead and trailing whitespace
    text_clean = text_clean.strip()
    return text_clean


# Load the current file mappings
wb = open_workbook(CURRENT_TYPES, formatting_info=False, on_demand=True)
sheet = wb.sheet_by_index(0)
colns = number_of_good_cols(sheet)
rowns = number_of_good_rows(sheet)
mappings = {}
for i in range(1, rowns):
    # Get the string
    literal = clean_string(sheet.cell(i, 1).value)
    mappings.setdefault(literal, set())
    print ">" + literal

    # Get the values
    for j in range(2, colns):
        value = sheet.cell(i, j).value
        if value != "":
            mappings[literal].add(value)

Beispiel #15
0
    Utility function to clean a string
    TODO speed this up
    """
    # Lower and remove new lines
    text_clean = text.lower().replace('\n', ' ').replace('\r', ' ')
    # Shrink spaces
    text_clean = re.sub(r'\s+', ' ', text_clean)
    # Remove lead and trailing whitespace
    text_clean = text_clean.strip()
    return text_clean


# Load the current file mappings
wb = open_workbook(CURRENT_TYPES, formatting_info=False, on_demand=True)
sheet = wb.sheet_by_index(0)
colns = number_of_good_cols(sheet)
rowns = number_of_good_rows(sheet)
mappings = {}
for i in range(1, rowns):
    # Get the string
    literal = clean_string(sheet.cell(i, 1).value)
    mappings.setdefault(literal, set())
    print '>' + literal

    # Get the values
    for j in range(2, colns):
        value = sheet.cell(i, j).value
        if value != '':
            mappings[literal].add(value)

# Load data from Ashkan