예제 #1
0
 def test_split_heading(self):
     sheet = sheet_from_file('data/schools.xlsx', 6, 6)
     index_locations = get_index_locations([sheet])
     matches = {}
     sheet_blocks = split_blocks(sheet, matches, index_locations)
     print(sheet_blocks)
     self.assertEqual(len(sheet_blocks), 2)
예제 #2
0
 def test_create_blocks(self):
     sheet = sheet_from_file('data/schools.xlsx', 0, 0)
     index_locations = get_index_locations([sheet])
     matches = {}
     blocks = split_blocks(sheet, matches, index_locations)
     gblocks = generalise(blocks)
     output_blocks = create_blocks(gblocks, matches)
     self.assertEqual(len(output_blocks), 9)
예제 #3
0
 def test_split_heading_match(self):
     sheet1 = sheet_from_file('data/schools.xlsx', 6, 6)
     sheet2 = sheet_from_file('data/schools.xlsx', 7, 7)
     sheets = [sheet1, sheet2]
     index_locations = get_index_locations(sheets)
     matches = match(sheets)
     sheet_blocks = split_blocks(sheet1, matches, index_locations)
     print(sheet_blocks)
     self.assertEqual(len(sheet_blocks), 3)
예제 #4
0
 def test_split_empty(self):
     sheet = sheet_from_file('data/schools.xlsx', 5, 0)
     index_map = sheet.index_map
     matches = {}
     index_locations = {v: k for k, v in index_map.items()}
     print(index_locations)
     blocks = split_blocks(sheet, matches, index_locations)
     print(blocks)
     self.assertEqual(len(blocks), 1)
     self.assertEqual(len(blocks[0].cells), 12)
예제 #5
0
 def test_generalise(self):
     sheet = sheet_from_file('data/schools.xlsx', 0, 0)
     index_locations = get_index_locations([sheet])
     matches = {}
     blocks = split_blocks(sheet, matches, index_locations)
     gblocks = generalise(blocks)
     print(gblocks)
     self.assertEqual(len(gblocks), 8)
     self.assertIsInstance(gblocks[-1], FormulaBlockHorizontal)
     self.assertEqual(
         gblocks[-1].dependant_types,
         {'color_3_0_3999755851924192', 'color_3_0_5999938962981048'})
     self.assertIsInstance(gblocks[-2], FormulaBlockVertical)
예제 #6
0
 def test_split_actual_match(self):
     sheet1 = sheet_from_file('data/schools.xlsx', 0, 0)
     sheet2 = sheet_from_file('data/schools.xlsx', 1, 1)
     matches = match([sheet1, sheet2])
     index_locations = get_index_locations([sheet1, sheet2])
     blocks = split_blocks(sheet1, matches, index_locations)
     print(matches)
     print(blocks)
     self.assertEqual(len(blocks), 11)
     self.assertEqual(len(blocks[0].cells), 2)
     self.assertEqual(
         blocks[1].types,
         {'italics', 'color_3_0_7999816888943144', 'theme_3', 'bold'})
     self.assertEqual(len(blocks[1].cells), 1)
예제 #7
0
def extract(filesin, fileout=None):
    sheets = [
        sheet_from_file(filein, sheetnr, sheet_counter)
        for sheet_counter, (filein, sheetnr) in enumerate(filesin)
    ]
    index_locations = get_index_locations(sheets)
    match_tuples = match(sheets)
    match_sets = _match_sets(match_tuples)

    lines = [
        line for sheet in sheets
        for line in split_lines(sheet, match_sets, index_locations)
    ]
    lines.extend(_match_lines(lines, match_sets, index_locations))
    sheet_blocks = [split_blocks(sheet, lines) for sheet in sheets]

    try:
        generalised_sheet_blocks = [
            generalise(blocks) for blocks in sheet_blocks
        ]
        blocks = [
            block for blocks in generalised_sheet_blocks for block in blocks
        ]
        output_blocks = create_blocks(blocks, match_tuples)
        assignment = csp(output_blocks, sheets, match_tuples)
        wb, df = fill_blocks(blocks, output_blocks, assignment)
        print('done')
        pandas.options.display.width = 0
        print(df)
    except Exception as error:
        print('Error')
        print(error)
        raise error
    finally:
        #print(df)
        if fileout:
            for i, blocks in enumerate(sheet_blocks):
                draw_blocks(blocks, fileout + f'/{i}.svg')
    if fileout:
        filen = fileout + '/output.xlsx'
        #df.to_excel(filen, header=False, index=False)
        wb.save(filen)
        copy_prolog_file(fileout)
    return df
예제 #8
0
 def test_split_blocks(self):
     im = {
         (1, 1): ('c1', 'r1'),
         (2, 1): ('c2', 'r1'),
         (1, 2): ('c1', 'r2'),
         (2, 2): ('c2', 'r2'),
     }
     cells = [
         Cell('a', 'c1', 'r1', 1, 1),
         Cell('b', 'c2', 'r1', 2, 1),
         Cell('c', 'c1', 'r2', 1, 2),
         Cell('d', 'c2', 'r2', 2, 2)
     ]
     matches = []
     index_locations = {v: k for k, v in im.items()}
     sheet = Sheet(im, cells)
     blocks = split_blocks(sheet, matches, index_locations)
     self.assertEqual(len(blocks), 1)
     self.assertEqual(len(blocks[0].cells), 4)
예제 #9
0
 def test_csp(self):
     sheet = sheet_from_file('data/schools.xlsx', 0, 0)
     index_locations = get_index_locations([sheet])
     matches = {}
     blocks = split_blocks(sheet, matches, index_locations)
     output_blocks = create_blocks(blocks, matches)
     assignment = csp(output_blocks, [sheet], matches)
     expected = {
         'i_0_00_001': 8,
         'i_0_00_003': 10,
         'i_0_00_004': 11,
         'i_0_00_006': 13,
         'i_0_01_007': 14,
         'j_0_00_001': 12,
         'j_0_00_002': 13,
         'j_0_00_003': 14,
         'j_0_00_005': 16,
         'j_0_00_006': 17,
         'j_0_01_001': 12
     }
     self.assertEqual(assignment, expected)