Example #1
0
    def _load_xlsx(self, file_path: str):
        """
        Data loader for XLSX, this needs to be converted back to YML structure format
        :param file_path: XLSX file path
        """
        wb = utils.excel_load(file_path=file_path)
        data = odict()

        # TODO: add validator, if wrong data input
        # TODO: refactor to less hardcoded?

        # parse phase
        data['phase'] = odict()
        data['phase']['items'] = odict()
        for phase in wb.get('phase', []):
            idx = phase.get('idx', len(data['phase']['items']))
            args = odict()
            raws = phase.get('args', '').split(',')
            for raw in raws:
                kv = raw.split('=')
                if len(kv) == 2:
                    key, value = kv
                    args[key.strip()] = value.strip()
            phase['args'] = args
            data['phase']['items'][str(idx)] = phase

        # parse source
        data['source'] = odict()
        data['source']['items'] = odict()
        for source in wb.get('source', []):
            idx = source.get('idx', len(data['source']['items']))
            data['source']['items'][str(idx)] = source

        # parse prepare
        data['prepare'] = odict()
        data['prepare']['items'] = odict()
        for prepare in wb.get('prepare', []):
            idx = prepare.get('idx', len(data['prepare']['items']))
            data['prepare']['items'][str(idx)] = prepare

        # parse train
        data['train'] = odict()
        data['train']['items'] = odict()
        # lets ensure there is idx
        train_idx, gold_idx = 0, 0
        for train in wb.get('train', []):
            if train.get('text') is not None:
                if gold_idx > 0:
                    train_idx = train_idx + 1
                    gold_idx = 0
                if train.get('idx') is None:
                    train['idx'] = str(train_idx)
            else:
                if train.get('idx') is None:
                    train['idx'] = '%s.%s' % (train_idx, gold_idx)
                gold_idx = gold_idx + 1
        for train in wb.get('train', []):
            idx = str(train.get('idx'))
            train_idx, gold_idx = idx, None
            if '.' in idx:
                train_idx, gold_idx = idx.split('.')
            # add train list
            if train.get('text') is not None:
                t = odict()
                t['items'] = odict()
                for k in ['idx', 'text']:
                    t[k] = train.get(k)
                data['train']['items'][train_idx] = t
            else:
                t = data['train']['items'][train_idx]
                g = odict()
                for k in ['idx', 'subtext', 'offset', 'entity']:
                    g[k] = train.get(k)
                t['items'][idx] = g

        # parse config
        data['config'] = odict()
        for config in wb.get('config', odict()):
            name, value = config.get('name'), config.get('value')
            data['config'][name] = value

        self.parse(data=data)
Example #2
0
 def _prepare_init_file(self, prepare: Prepare):
     wb = utils.excel_load(file_path=self.resolve_path(prepare.value))
     for item in wb.get('prepare', []):
         prepare = Prepare.make(items=item)
         self._prepare_init_base(prepare=prepare)