Ejemplo n.º 1
0
 def extractMarkingFromXLS(self, mrkFileName):
     '''
     Process the current Excel file to extract the marking annotation
     and write the output to mrkFileName
     '''
     outputHandle = bz2.BZ2File(
         mrkFileName +
         ".bz2", 'wb', compresslevel=9) if config.isCompress() else open(
             mrkFileName, "w")
     for n in range(self.workbook.nsheets):
         sheet = self.workbook.sheet_by_index(n)
         colns = number_of_good_cols(sheet)
         rowns = number_of_good_rows(sheet)
         self.log.debug("Process %d columns and %d rows" % (colns, rowns))
         for i in range(0, rowns):
             for j in range(0, colns):
                 cell = sheet.cell(i, j)
                 cell_name = cellname(i, j)
                 style_name = self.styles[cell].name
                 # print self.styles[cell].xf
                 if style_name.startswith('TL '):
                     mrk_line = "%d;%s;%s" % (n, cell_name, style_name)
                     outputHandle.write(mrk_line)
                     outputHandle.write("\n")
     outputHandle.close()
Ejemplo n.º 2
0
def _read_groups(excel_data, sheet):
    """
    @type excel_data: ExcelData
    @type sheet: xlrd.sheet.Sheet
    """

    names = []
    for name in sheet.col(0)[1:]:
        names.append(name.value)

    group_names = []
    for col in range(1, number_of_good_cols(sheet, nrows=1)):
        group_names.append(sheet.cell(0, col).value)

    groups_by_group_name = {group_name: [] for group_name in group_names}

    for row in range(1, number_of_good_rows(sheet, ncols=1)):
        name = sheet.cell(row, 0).value
        for i, group_name in enumerate(group_names):
            if sheet.cell(row, i + 1).value:
                groups_by_group_name[group_name].append(name)

    excel_data.names = names
    excel_data.groups = [(group_name, groups_by_group_name[group_name])
                         for group_name in group_names]
Ejemplo n.º 3
0
    def __init__(self, data):
        self._mappings = {}

        self.excelFileName = data['file']
        predicate = URIRef(data['predicate'])
        mapping_type = data['mapping_type']

        # Load the mappings
        wb = open_workbook(data['path'] + "/" + self.excelFileName,
                           formatting_info=False,
                           on_demand=True)
        sheet = wb.sheet_by_index(0)
        colns = number_of_good_cols(sheet)
        rowns = number_of_good_rows(sheet)
        for i in range(1, rowns):
            # Get the context (first column)
            context = sheet.cell(i, 0).value

            # Get the string (force reading the cell as a string)
            literal = sheet.cell(i, 1).value
            if type(literal) == type(1.0):
                literal = str(int(literal))
            literal = clean_string(literal)

            # Get the values
            values = []
            for j in range(2, colns):
                value = sheet.cell(i, j).value
                if value != '':
                    # Codes using numbers need to be seen as string
                    if type(value) == type(1.0):
                        value = str(int(value))

                    # Encode the value
                    encoded_value = None
                    if mapping_type == 'uri':
                        prefix = data['prefix']
                        encoded_value = URIRef(prefix + value)
                    elif mapping_type == 'boolean':
                        isTrue = (value == '1' or value == 'true')
                        encoded_value = Literal(isTrue)
                    else:
                        encoded_value = Literal(value)

                    # Prefix the code and pair with predicate
                    pair = (predicate, encoded_value)
                    values.append(pair)

            if len(values) == 0:
                values = None

            # Save the mapping
            self._mappings.setdefault(literal, {})
            if context != '':
                # Store the specific context
                self._mappings[literal].setdefault('context', {})
                self._mappings[literal]['context'][context] = values
            else:
                # Store the default mappings
                self._mappings[literal]['default'] = values
Ejemplo n.º 4
0
 def __init__(self, data):
     self._mappings = {}
     
     self.excelFileName = data['file']
     predicate = URIRef(data['predicate'])
     mapping_type = data['mapping_type']
     
     # Load the mappings
     wb = open_workbook(data['path'] + "/" + self.excelFileName, formatting_info=False, on_demand=True)
     sheet = wb.sheet_by_index(0)
     colns = number_of_good_cols(sheet)
     rowns = number_of_good_rows(sheet)
     for i in range(1, rowns):
         # Get the context (first column)
         context = sheet.cell(i, 0).value
         
         # Get the string (force reading the cell as a string)
         literal = sheet.cell(i, 1).value
         if type(literal) == type(1.0):
             literal = str(int(literal))
         literal = clean_string(literal)
         
         # Get the values
         values = []
         for j in range(2, colns):
             value = sheet.cell(i, j).value
             if value != '':
                 # Codes using numbers need to be seen as string
                 if type(value) == type(1.0):
                     value = str(int(value))
                     
                 # Encode the value
                 encoded_value = None
                 if mapping_type == 'uri':
                     prefix = data['prefix']
                     encoded_value = URIRef(prefix + value)
                 elif mapping_type == 'boolean':
                     isTrue = (value == '1' or value == 'true')
                     encoded_value = Literal(isTrue)
                 else:
                     encoded_value = Literal(value)
                     
                 # Prefix the code and pair with predicate
                 pair = (predicate, encoded_value)
                 values.append(pair)
                 
         if len(values) == 0:
             values = None
             
         # Save the mapping
         self._mappings.setdefault(literal, {})
         if context != '':
             # Store the specific context
             self._mappings[literal].setdefault('context', {})
             self._mappings[literal]['context'][context] = values
         else:
             # Store the default mappings
             self._mappings[literal]['default'] = values 
Ejemplo n.º 5
0
    def injectMarkingIntoXLS(self, mrkFileName, targetFileName):
        '''
        Load marking instructions from mrkFileName and process the current
        Excel file to generate the annotated targetFileName
        '''
        # Prepare the output
        self.log.debug("Copy book")
        target_workbook = copy(self.workbook)
        #print Styles(target_workbook)

        self.log.debug("Load marking from %s" % mrkFileName)

        # Load marking information
        marking = {}
        for mrk in open(mrkFileName):
            (index_str, cell, style) = mrk.strip().split(';')
            index = int(index_str)
            marking.setdefault(index, {})
            marking[index][cell] = style

        # Create the styles in the target
        styles = {}
        for s in [('TL Metadata', 'yellow'), ('TL ColHeader', 'blue'),
                  ('TL Data', 'white'), ('TL RowHeader', 'yellow'),
                  ('TL RowProperty', 'green'), ('TL Title', 'pink'),
                  ('TL RowLabel', 'blue'), ('TL HRowHeader', 'blue')]:
            name, colour = s
            xf = xlwt.easyxf(
                'name: test; pattern: pattern solid, fore_colour %s;' % colour)
            styles[name] = xf
            print target_workbook.add_style(xf)
            #target_workbook.xf_list.append(xf)
            #index = len(target_workbook.xf_list)
            #target_workbook.style_name_map[name] = (0, index)

        # Process the source workbook
        for n in range(self.workbook.nsheets):
            sheet = self.workbook.sheet_by_index(n)
            target_sheet = target_workbook.get_sheet(n)
            colns = number_of_good_cols(sheet)
            rowns = number_of_good_rows(sheet)
            self.log.debug("Process %d columns and %d rows" % (colns, rowns))
            for i in range(0, rowns):
                for j in range(0, colns):
                    cell = sheet.cell(i, j)
                    cell_name = cellname(i, j)
                    #cell_xf_index = sheet.cell_xf_index(i, j)
                    #print cell_xf_index
                    if cell_name in marking[n]:
                        style = styles[marking[n][cell_name]]
                        # TODO Use matching style defined earlier
                        #print target_sheet
                        target_sheet.write(i, j, label=cell.value, style=style)

        target_workbook.save(targetFileName)
Ejemplo n.º 6
0
    def get_sheet_data(self, index):
        curr_sheet = self.sheets_read[index]
        rows = number_of_good_rows(curr_sheet)
        cols = range(curr_sheet.ncols)
        self.data[index] = []

        for i in range(rows):
            d = []
            for col in cols:
                d.append(curr_sheet.cell_value(i, col))
            self.data[index].append(d)
Ejemplo n.º 7
0
    def injectMarkingIntoXLS(self, mrkFileName, targetFileName):
        '''
        Load marking instructions from mrkFileName and process the current
        Excel file to generate the annotated targetFileName
        '''
        # Prepare the output
        self.log.debug("Copy book")
        target_workbook = copy(self.workbook)
        #print Styles(target_workbook)
        
        self.log.debug("Load marking from %s" % mrkFileName)
        
        # Load marking information
        marking = {}
        for mrk in open(mrkFileName):
            (index_str, cell, style) = mrk.strip().split(';')
            index = int(index_str)
            marking.setdefault(index, {})
            marking[index][cell] = style

        # Create the styles in the target
        styles = {}
        for s in [('TL Metadata', 'yellow'), ('TL ColHeader', 'blue'),
                  ('TL Data', 'white'), ('TL RowHeader', 'yellow'),
                  ('TL RowProperty', 'green'), ('TL Title', 'pink'),
                  ('TL RowLabel', 'blue'), ('TL HRowHeader', 'blue')]:
            name, colour = s
            xf = xlwt.easyxf('name: test; pattern: pattern solid, fore_colour %s;' % colour)
            styles[name] = xf
            print target_workbook.add_style(xf)
            #target_workbook.xf_list.append(xf)
            #index = len(target_workbook.xf_list)
            #target_workbook.style_name_map[name] = (0, index)
        
        # Process the source workbook
        for n in range(self.workbook.nsheets):
            sheet = self.workbook.sheet_by_index(n)
            target_sheet = target_workbook.get_sheet(n)
            colns = number_of_good_cols(sheet)
            rowns = number_of_good_rows(sheet)
            self.log.debug("Process %d columns and %d rows" % (colns, rowns))
            for i in range(0, rowns):
                for j in range(0, colns):
                    cell = sheet.cell(i, j)
                    cell_name = cellname(i, j)
                    #cell_xf_index = sheet.cell_xf_index(i, j)
                    #print cell_xf_index
                    if cell_name in marking[n]:
                        style = styles[marking[n][cell_name]]
                        # TODO Use matching style defined earlier
                        #print target_sheet
                        target_sheet.write(i, j, label=cell.value, style=style)
                        
        target_workbook.save(targetFileName)
Ejemplo n.º 8
0
def _read_tables(excel_data, sheet):
    """
    @type excel_data: ExcelData
    @type sheet: xlrd.sheet.Sheet
    """

    dimensions = []
    for row in range(0, number_of_good_rows(sheet, ncols=1)):
        name = sheet.cell(row, 0).value
        sizes = []
        for col in range(1, number_of_good_cols(sheet)):
            value = sheet.cell(row, col).value
            if value:
                size = int(value)
            else:
                break
            sizes.append(size)
        dimensions.append([name, sizes])
    excel_data.dimensions = dimensions
Ejemplo n.º 9
0
def _read_tables(excel_data, sheet):
    """
    @type excel_data: ExcelData
    @type sheet: xlrd.sheet.Sheet
    """

    dimensions = []
    for row in range(0, number_of_good_rows(sheet, ncols=1)):
        name = sheet.cell(row, 0).value
        sizes = []
        for col in range(1, number_of_good_cols(sheet)):
            value = sheet.cell(row, col).value
            if value:
                size = int(value)
            else:
                break
            sizes.append(size)
        dimensions.append([name, sizes])
    excel_data.dimensions = dimensions
Ejemplo n.º 10
0
 def getValidRowsCols(self) :
     """
     Determine the number of non-empty rows and columns in the Excel sheet
     
     Returns:
     rowns -- number of rows
     colns -- number of columns
     """
     colns = number_of_good_cols(self.r_sheet)
     rowns = number_of_good_rows(self.r_sheet)
     
     # Check whether the number of good columns and rows are correct
     while self.isEmptyRow(rowns-1, colns) :
         rowns = rowns - 1 
     while self.isEmptyColumn(colns-1, rowns) :
         colns = colns - 1
         
     self.log.debug('Number of rows with content:    {0}'.format(rowns))
     self.log.debug('Number of columns with content: {0}'.format(colns))
     return rowns, colns
Ejemplo n.º 11
0
 def extractMarkingFromXLS(self, mrkFileName):
     '''
     Process the current Excel file to extract the marking annotation
     and write the output to mrkFileName
     '''
     outputHandle = bz2.BZ2File(mrkFileName + ".bz2", 'wb', compresslevel=9) if config.isCompress() else open(mrkFileName, "w")
     for n in range(self.workbook.nsheets):
         sheet = self.workbook.sheet_by_index(n)
         colns = number_of_good_cols(sheet)
         rowns = number_of_good_rows(sheet)
         self.log.debug("Process %d columns and %d rows" % (colns, rowns))
         for i in range(0, rowns):
             for j in range(0, colns):
                 cell = sheet.cell(i, j)
                 cell_name = cellname(i, j)
                 style_name = self.styles[cell].name
                 # print self.styles[cell].xf
                 if style_name.startswith('TL '):
                     mrk_line = "%d;%s;%s" % (n, cell_name, style_name)
                     outputHandle.write(mrk_line)
                     outputHandle.write("\n")
     outputHandle.close()
Ejemplo n.º 12
0
def _read_groups(excel_data, sheet):
    """
    @type excel_data: ExcelData
    @type sheet: xlrd.sheet.Sheet
    """

    names = []
    for name in sheet.col(0)[1:]:
        names.append(name.value)

    group_names = []
    for col in range(1, number_of_good_cols(sheet, nrows=1)):
        group_names.append(sheet.cell(0, col).value)

    groups_by_group_name = {group_name: [] for group_name in group_names}

    for row in range(1, number_of_good_rows(sheet, ncols=1)):
        name = sheet.cell(row, 0).value
        for i, group_name in enumerate(group_names):
            if sheet.cell(row, i+1).value:
                groups_by_group_name[group_name].append(name)

    excel_data.names = names
    excel_data.groups = [(group_name, groups_by_group_name[group_name]) for group_name in group_names]
Ejemplo n.º 13
0
def exel(url):
    font0 = xlwt.Font()
    font0.name = 'Arial'
    font0.colour_index = 0
    font0.height = 160
    #    font0.bold = True

    style0 = xlwt.XFStyle()
    style0.font = font0

    style1 = xlwt.XFStyle()
    style1.font = font0
    style1.num_format_str = 'D-MMM-YY'
    merged_list = url_read(url[0])
    xls_file = file_selector(url)

    for i in range(3, len(merged_list), 5):
        inputBook = xlrd.open_workbook(xls_file, formatting_info=True)
        outputBook = copy(inputBook)
        sheet_name = method_name(i, merged_list)
        sheet_name_int = get_sheet_index(sheet_name)
        inputBookSheet = inputBook.sheet_by_name(sheet_name)
        outputBookSheet = outputBook.get_sheet(sheet_name_int)
        list_index = row_index(inputBookSheet)
        start = i - 3
        merge = merged_list[start]
        outputBookSheet.write(list_index, 0, merge, style0)
        outputBookSheet.write(list_index, 1, merged_list[start + 2], style0)
        outputBookSheet.write(list_index, 2, merged_list[start + 1], style0)
        outputBookSheet.write(list_index, 28, datetime.datetime.now(), style1)
        outputBookSheet.write(list_index, 20, merged_list[start + 4], style0)
        outputBook.save(xls_file)

    input_Book = xlrd.open_workbook(xls_file, formatting_info=True)
    output_Book = copy(input_Book)
    for sheet_index in range(0, 10):
        outputBookSheet = output_Book.get_sheet(sheet_index)
        inputBookSheet = input_Book.sheet_by_index(sheet_index)
        for row in xrange(number_of_good_rows(inputBookSheet), 0, -1):
            formula_for_11 = ('if(and(E' + str(row + 1) + '+F' + str(row + 1) + '+G' + str(row + 1) +
                              '+H' + str(row + 1) + '+I' + str(row + 1) + '+J' + str(row + 1) + '<B' + str(
                row + 1) + ';' +
                              'F' + str(row + 1) + '+E' + str(row + 1) + '>5);"TM";"")')
            formula_for_12 = ('if(E' + str(row + 1) + '+F' + str(row + 1) + '+G' + str(row + 1) +
                              '+H' + str(row + 1) + '+I' + str(row + 1) + '+J' + str(row + 1) + '>B' + str(
                row + 1) + ';' +
                              '"TB";"")')
            formula_for_13 = ('if(E' + str(row + 1) + '+G' + str(row + 1) + '+I' + str(row + 1) + '-C' + str(row + 1) +
                              '>F' + str(row + 1) + '+H' + str(row + 1) + '+J' + str(row + 1) + ';"F-";"")')
            formula_for_14 = ('if(and(E' + str(row + 1) + '+G' + str(row + 1) + '+I' + str(row + 1) +
                              '-C' + str(row + 1) + '<F' + str(row + 1) + '+H' + str(row + 1) + '+J' + str(
                row + 1) + ';' +
                              'E' + str(row + 1) + '+F' + str(row + 1) + '>5);"F+";"")')
            formula_for_15 = ('if(V' + str(row + 1) + '+W' + str(row + 1) + '+X' + str(row + 1) + '>1;"P1";"")')
            formula_for_16 = (
                'if(and(V' + str(row + 1) + '+W' + str(row + 1) + '+X' + str(row + 1) + '<2;E' + str(row + 1) +
                '+F' + str(row + 1) + '>5);"P2";"")')
            formula_for_17 = ('if(V' + str(row + 1) + '+W' + str(row + 1) + '=2;"2:0";"")')
            formula_for_18 = (
                'if(and(I' + str(row + 1) + '+J' + str(row + 1) + '>5;V' + str(row + 1) + '+W' + str(row + 1) +
                '+X' + str(row + 1) + '=2);"2:1";"")')
            formula_for_19 = (
                'if(and(I' + str(row + 1) + '+J' + str(row + 1) + '>5;V' + str(row + 1) + '+W' + str(row + 1) +
                '+X' + str(row + 1) + '=1);"1:2";"")')
            formula_for_20 = ('if(and(E' + str(row + 1) + '>0;V' + str(row + 1) + '+W' + str(row + 1) + '=0);"0:2";"")')
            formula_for_22 = ('if(E' + str(row + 1) + '>F' + str(row + 1) + ';"1";"0")')
            formula_for_23 = ('if(G' + str(row + 1) + '>H' + str(row + 1) + ';"1";"0")')
            formula_for_24 = ('if(I' + str(row + 1) + '>J' + str(row + 1) + ';"1";"0")')

            outputBookSheet.write(row, 10, xlwt.Formula(formula_for_11), style0)
            outputBookSheet.write(row, 11, xlwt.Formula(formula_for_12), style0)
            outputBookSheet.write(row, 12, xlwt.Formula(formula_for_13), style0)
            outputBookSheet.write(row, 13, xlwt.Formula(formula_for_14), style0)
            outputBookSheet.write(row, 14, xlwt.Formula(formula_for_15), style0)
            outputBookSheet.write(row, 15, xlwt.Formula(formula_for_16), style0)
            outputBookSheet.write(row, 16, xlwt.Formula(formula_for_17), style0)
            outputBookSheet.write(row, 17, xlwt.Formula(formula_for_18), style0)
            outputBookSheet.write(row, 18, xlwt.Formula(formula_for_19), style0)
            outputBookSheet.write(row, 19, xlwt.Formula(formula_for_20), style0)
            outputBookSheet.write(row, 21, xlwt.Formula(formula_for_22), style0)
            outputBookSheet.write(row, 22, xlwt.Formula(formula_for_23), style0)
            outputBookSheet.write(row, 23, xlwt.Formula(formula_for_24), style0)
    output_Book.save(xls_file)
    TODO speed this up
    """
    # Lower and remove new lines
    text_clean = text.lower().replace("\n", " ").replace("\r", " ")
    # Shrink spaces
    text_clean = re.sub(r"\s+", " ", text_clean)
    # Remove lead and trailing whitespace
    text_clean = text_clean.strip()
    return text_clean


# Load the current file mappings
wb = open_workbook(CURRENT_TYPES, formatting_info=False, on_demand=True)
sheet = wb.sheet_by_index(0)
colns = number_of_good_cols(sheet)
rowns = number_of_good_rows(sheet)
mappings = {}
for i in range(1, rowns):
    # Get the string
    literal = clean_string(sheet.cell(i, 1).value)
    mappings.setdefault(literal, set())
    print ">" + literal

    # Get the values
    for j in range(2, colns):
        value = sheet.cell(i, j).value
        if value != "":
            mappings[literal].add(value)


# Load data from Ashkan
Ejemplo n.º 15
0
    TODO speed this up
    """
    # Lower and remove new lines
    text_clean = text.lower().replace('\n', ' ').replace('\r', ' ')
    # Shrink spaces
    text_clean = re.sub(r'\s+', ' ', text_clean)
    # Remove lead and trailing whitespace
    text_clean = text_clean.strip()
    return text_clean


# Load the current file mappings
wb = open_workbook(CURRENT_TYPES, formatting_info=False, on_demand=True)
sheet = wb.sheet_by_index(0)
colns = number_of_good_cols(sheet)
rowns = number_of_good_rows(sheet)
mappings = {}
for i in range(1, rowns):
    # Get the string
    literal = clean_string(sheet.cell(i, 1).value)
    mappings.setdefault(literal, set())
    print '>' + literal

    # Get the values
    for j in range(2, colns):
        value = sheet.cell(i, j).value
        if value != '':
            mappings[literal].add(value)

# Load data from Ashkan
add = 0