Python LANGUAGE_CODES_AND_NAMES.keys 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: Dashboard.models

메소드/함수: keys

hotexamples.com에서의 예제들: 6

Python LANGUAGE_CODES_AND_NAMES.keys - 6개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 Dashboard.models.LANGUAGE_CODES_AND_NAMES.keys에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

keys(6)

items(2)

자주 사용되는 메소드들

keys (6)

items (2)

예제 #1

파일 보기

    def compute_accurate_group_status(cls):
        from Dashboard.models import LANGUAGE_CODES_AND_NAMES
        user_status = defaultdict(list)
        qs = cls.objects.filter(completed=True)

        value_names = ('createdBy', 'item__itemType', 'task__id')
        for result in qs.values_list(*value_names):
            if result[1].lower() != 'tgt':
                continue

            annotatorID = result[0]
            taskID = result[2]
            user_status[annotatorID].append(taskID)

        group_status = defaultdict(list)
        for annotatorID in user_status:
            user = User.objects.get(pk=annotatorID)
            usergroups = ';'.join([
                x.name for x in user.groups.all()
                if not x.name in LANGUAGE_CODES_AND_NAMES.keys()
            ])
            if not usergroups:
                usergroups = 'NoGroupInfo'

            group_status[usergroups].extend(user_status[annotatorID])

        group_hits = {}
        for group_name in group_status:
            task_ids = set(group_status[group_name])
            completed_tasks = 0
            for task_id in task_ids:
                if group_status[group_name].count(task_id) >= 70:
                    completed_tasks += 1

            group_hits[group_name] = (completed_tasks, len(task_ids))

        return group_hits

예제 #2

파일 보기

    def dump_all_results_to_csv_file(cls, csv_file):
        from Dashboard.models import LANGUAGE_CODES_AND_NAMES
        system_scores = defaultdict(list)
        user_data = {}
        qs = cls.objects.filter(completed=True)

        value_names = ('item__target1ID', 'score1', 'item__target2ID',
                       'score2', 'start_time', 'end_time', 'createdBy',
                       'item__itemID',
                       'item__metadata__market__sourceLanguageCode',
                       'item__metadata__market__targetLanguageCode',
                       'item__metadata__market__domainName', 'item__itemType',
                       'task__id', 'task__campaign__campaignName')
        for result in qs.values_list(*value_names):

            system1ID = result[0]
            score1 = result[1]
            system2ID = result[2]
            score2 = result[3]
            start_time = result[4]
            end_time = result[5]
            duration = round(float(end_time) - float(start_time), 1)
            annotatorID = result[6]
            segmentID = result[7]
            marketID = '{0}-{1}'.format(result[8], result[9])
            domainName = result[10]
            itemType = result[11]
            taskID = result[12]
            campaignName = result[13]

            if annotatorID in user_data:
                username = user_data[annotatorID][0]
                useremail = user_data[annotatorID][1]
                usergroups = user_data[annotatorID][2]

            else:
                user = User.objects.get(pk=annotatorID)
                username = user.username
                useremail = user.email
                usergroups = ';'.join([
                    x.name for x in user.groups.all()
                    if not x.name in LANGUAGE_CODES_AND_NAMES.keys()
                ])
                if not usergroups:
                    usergroups = 'NoGroupInfo'

                user_data[annotatorID] = (username, useremail, usergroups)

            system_scores[marketID + '-' + domainName].append(
                (taskID, segmentID, username, useremail, usergroups, system1ID,
                 score1, system2ID, score2, start_time, end_time, duration,
                 itemType, campaignName))

        # TODO: this is very intransparent... and needs to be fixed!
        x = system_scores
        s = [
            'taskID,segmentID,username,email,groups,system1ID,score1,system2ID,score2,startTime,endTime,durationInSeconds,itemType,campaignName'
        ]
        for l in x:
            for i in x[l]:
                s.append(','.join([str(a) for a in i]))

        from os.path import join
        from Appraise.settings import BASE_DIR
        media_file_path = join(BASE_DIR, 'media', csv_file)
        with open(media_file_path, 'w') as outfile:
            for c in s:
                outfile.write(c)
                outfile.write('\n')

예제 #3

파일 보기

 def marketTargetLanguageCode(self):
     tokens = str(self.items.first().metadata.market).split('_')
     if len(tokens) == 3 and tokens[1] in LANGUAGE_CODES_AND_NAMES.keys():
         return tokens[1]
     return None

예제 #4

파일 보기

 def marketSourceLanguage(self):
     tokens = str(self.items.first().metadata.market).split('_')
     if len(tokens) == 3 and tokens[0] in LANGUAGE_CODES_AND_NAMES.keys():
         return LANGUAGE_CODES_AND_NAMES[tokens[0]]
     return None

예제 #5

파일 보기

파일: CreateMultiModalAssessmentData.py 프로젝트: amalinovskiy/Appraise

    def handle(self, *args, **options):
        # Validate source and target language codes
        _all = list(set([x.lower() for x in LANGUAGE_CODES_AND_NAMES.keys()]))
        _all.sort()
        _src = options['source_language'].lower()
        if not _src in _all:
            self.stdout.write('Unknown source language: {0}!'.format(_src))
            self.stdout.write('Known languages: {0}'.format(', '.join(_all)))
            return

        _tgt = options['target_language'].lower()
        if not _tgt in _all:
            self.stdout.write('Unknown target language: {0}!'.format(_tgt))
            self.stdout.write('Known languages: {0}'.format(', '.join(_all)))
            return

        # Initialize random number generator
        # Extract batch size number of pairs, randomizing order if requested
        # Serialize pairs into JSON format
        # Write out JSON output file

        batch_size = options['batch_size']

        block_size = 10
        block_annotations = 7
        block_redundants = 1
        block_references = 1
        block_badrefs = 1

        # IF BLOCK DEF IS GIVEN, DO SOMETHING WITH IT
        if options['block_definition'] is not None:
            print("WOOHOO")

        if (batch_size % block_size) > 0:
            self.stdout.write(
                'Batch size needs to be divisible by block size!')
            return

        # CHECK THAT WE END UP WITH EVEN NUMBER OF BLOCKS

        print('We will create {0} blocks'.format(int(batch_size / block_size)))

        # TODO: add parameter to set encoding
        # TODO: need to use OrderedDict to preserve segment IDs' order!
        source_file = Command._load_text_from_file(options['source_file'],
                                                   'utf8')
        print('Loaded {0} source segments'.format(len(source_file.keys())))

        reference_file = Command._load_text_from_file(
            options['reference_file'], 'utf8')
        print('Loaded {0} reference segments'.format(len(
            reference_file.keys())))

        systems_files = []
        systems_path = options['systems_path']
        from glob import iglob
        import os.path
        for system_file in iglob('{0}{1}{2}'.format(systems_path, os.path.sep,
                                                    "*.txt")):
            systems_files.append(system_file)

        random_seed_value = 123456

        systems_files.sort()
        seed(random_seed_value)
        shuffle(systems_files)
        # ADD RANDOMIZED SHUFFLING HERE?

        import hashlib
        hashed_text = {}

        for system_path in systems_files:
            system_txt = Command._load_text_from_file(system_path, 'utf8')
            system_bad = Command._load_text_from_file(
                system_path.replace('.txt', '.bad'), 'utf8')
            system_ids = Command._load_text_from_file(
                system_path.replace('.txt', '.ids'), 'utf8')
            system_url = Command._load_text_from_file(
                system_path.replace('.txt', '.url'), 'utf8')

            for segment_id, segment_text in system_txt.items():
                md5hash = hashlib.new('md5',
                                      segment_text.encode('utf8')).hexdigest()
                if not md5hash in hashed_text.keys():
                    hashed_text[md5hash] = {
                        'segment_id': segment_id,
                        'segment_text': segment_text,
                        'segment_bad': system_bad[segment_id],
                        'segment_ref': reference_file[segment_id],
                        'segment_src': source_file[segment_id],
                        'segment_url': system_url[segment_id],
                        'systems': [os.path.basename(system_path)]
                    }
                else:
                    hashed_text[md5hash]['systems'].append(
                        os.path.basename(system_path))

            print('Loaded {0} system {1} segments'.format(
                len(system_txt.keys()), os.path.basename(system_path)))

        all_keys = list(hashed_text.keys())
        all_keys.sort()
        shuffle(all_keys)

        items_per_batch = 10 * 7

        missing_items = items_per_batch - len(all_keys) % items_per_batch
        print('Missing items is {0}/{1}'.format(missing_items,
                                                items_per_batch))

        all_keys.extend(all_keys[0:missing_items])
        print('Added {0} missing items rotating keys'.format(missing_items))

        total_batches = int(floor(len(all_keys) / items_per_batch))
        print('Total number of batches is {0}'.format(total_batches))

        batch_no = options['batch_no']
        all_batches = options['all_batches']
        source_based = options['source_based']

        # If we don't produce all batches, our batch_id will be batch_no-1.
        # This is because batch numbers are one-based, ids zero-indexed.
        #
        # If we produce all batches, we just use range(total_batches).
        # This implicitly gives us zero-indexed ids already.
        batch_nos = [batch_no-1] if not all_batches \
          else list(range(total_batches))

        json_data = []
        for batch_id in batch_nos:  # range(batch_no):
            block_data = []
            block_offset = batch_id * 10 * 7

            num_blocks = int(batch_size / block_size)
            for block_id in range(num_blocks):
                # Human readable ids are one-based, hence +1
                print('Creating batch {0:05}/{1:05}, block {2:02}'.format(
                    batch_id + 1, total_batches, block_id + 1))

                # Get 7 random system outputs
                block_start = block_offset + 7 * (block_id)
                block_end = block_start + 7
                block_hashes = all_keys[block_start:block_end]

                current_block = {'systems': block_hashes}

                block_data.append(current_block)

            # Compute redundant, reference, bad reference bits
            for block_id in range(num_blocks):
                check_id = int((block_id + (num_blocks / 2)) % num_blocks)
                # Human readable ids are one-based, hence +1
                print('Add checks for batch {0:05}/{1:05}, ' \
                  'block {2:02} to block {3:02}'.format(
                    batch_id+1, total_batches, check_id+1, block_id+1
                  )
                )

                check_systems = block_data[check_id]['systems']
                check_systems.sort()
                shuffle(check_systems)

                block_data[block_id]['redundant'] = check_systems[0]
                block_data[block_id]['reference'] = check_systems[1]
                block_data[block_id]['badref'] = check_systems[2]

            # Direct assessment is reference-based for WMT17
            sourceID = basename(options['reference_file'])

            # Remember, batch numbers are one-based
            taskData = OrderedDict({
                'batchNo': batch_id + 1,
                'batchSize': options['batch_size'],
                'sourceLanguage': options['source_language'],
                'targetLanguage': options['target_language'],
                'requiredAnnotations': 1,
                'randomSeed': random_seed_value
            })
            itemsData = []
            _item = 0

            for block_id in range(num_blocks):
                all_items = [(x, 'TGT')
                             for x in block_data[block_id]['systems']]
                all_items.append((block_data[block_id]['redundant'], 'CHK'))
                all_items.append((block_data[block_id]['reference'], 'REF'))
                all_items.append((block_data[block_id]['badref'], 'BAD'))
                shuffle(all_items)

                for current_item, current_type in all_items:
                    item_data = hashed_text[current_item]

                    item_id = item_data['segment_id']
                    item_text = item_data['segment_text']
                    item_bad = item_data['segment_bad']
                    item_ref = item_data['segment_ref']
                    item_src = item_data['segment_src']
                    item_url = item_data['segment_url']
                    item_systems = item_data['systems']

                    targetID = '+'.join(set(item_systems))
                    targetText = item_text
                    if current_type == 'REF':
                        targetID = basename(options['reference_file'])
                        targetText = item_ref
                    elif current_item == 'BAD':
                        targetText = item_bad

                    obj = OrderedDict()
                    obj['_item'] = _item
                    obj['_block'] = block_id + (10 * batch_id)
                    obj['sourceID'] = sourceID
                    obj['sourceText'] = item_ref if not source_based else item_src
                    obj['targetID'] = targetID
                    obj['targetText'] = targetText
                    obj['itemID'] = item_id
                    obj['itemType'] = current_type
                    obj['imageURL'] = item_url

                    itemsData.append(obj)
                    _item += 1

            outputData = OrderedDict({'task': taskData, 'items': itemsData})

            json_data.append(outputData)

        print(json.dumps(json_data, indent=2))
        json_data = json.dumps(json_data, indent=2)

        with open(options['output_json_file'], mode='w',
                  encoding='utf8') as output_file:
            self.stdout.write('Creating {0} ... '.format(
                options['output_json_file']),
                              ending='')
            output_file.write(str(json_data))
            self.stdout.write('OK')

예제 #6

파일 보기

    def handle(self, *args, **options):
        # Validate source and target language codes
        _all = list(set([x.lower() for x in LANGUAGE_CODES_AND_NAMES.keys()]))
        _all.sort()
        _src = options['source_language'].lower()
        if not _src in _all:
            self.stdout.write('Unknown source language: {0}!'.format(_src))
            self.stdout.write('Known languages: {0}'.format(', '.join(_all)))
            return

        _tgt = options['target_language'].lower()
        if not _tgt in _all:
            self.stdout.write('Unknown target language: {0}!'.format(_tgt))
            self.stdout.write('Known languages: {0}'.format(', '.join(_all)))
            return

        # Initialize random number generator
        # Extract batch size number of pairs, randomizing order if requested
        # Serialize pairs into JSON format
        # Write out JSON output file

        batch_size = options['batch_size']
        unicode_enc = options['unicode']
        use_local_src = options['local_src']
        use_local_ref = options['local_ref']
        create_ids = options['create_ids']
        source_based = options['source_based']

        block_size = 10
        block_annotations = 7
        block_redundants = 1
        block_references = 1
        block_badrefs = 1

        # IF BLOCK DEF IS GIVEN, DO SOMETHING WITH IT
        if options['block_definition'] is not None:
            print("WOOHOO")

        if (batch_size % block_size) > 0:
            self.stdout.write(
                'Batch size needs to be divisible by block size!')
            return

        # CHECK THAT WE END UP WITH EVEN NUMBER OF BLOCKS

        print('We will create {0} blocks'.format(int(batch_size / block_size)))

        # TODO: add parameter to set encoding
        # TODO: need to use OrderedDict to preserve segment IDs' order!
        encoding = 'utf16' if unicode_enc else 'utf8'

        source_file = []
        if not use_local_src:
            source_file = Command._load_text_from_file(options['source_file'],
                                                       encoding)
            print('Loaded {0} source segments'.format(len(source_file.keys())))

        reference_file = []
        if not use_local_ref:
            reference_file = Command._load_text_from_file(
                options['reference_file'], encoding)
            print('Loaded {0} reference segments'.format(
                len(reference_file.keys())))

        systems_files = []
        systems_path = options['systems_path']
        from glob import iglob
        import os.path
        for system_file in iglob('{0}{1}{2}'.format(systems_path, os.path.sep,
                                                    "*.txt")):
            if '+' in basename(system_file):
                print('Cannot use system files with + in names ' \
                  'as this breaks multi-system meta systems:\n' \
                  '{0}'.format(system_file))
                sys_exit(-1)
            systems_files.append(system_file)

        random_seed_value = 123456

        systems_files.sort()
        seed(random_seed_value)
        shuffle(systems_files)
        # ADD RANDOMIZED SHUFFLING HERE?

        import hashlib
        hashed_text = {}
        hashes_by_ids = defaultdict(list)

        character_based = _tgt == 'zho' or _tgt == 'jpn' \
          or options['character_based']

        for system_path in systems_files:
            system_txt = Command._load_text_from_file(system_path, encoding)
            # Generate bad references on the fly
            #
            # To do so, we will load a random source segment to fill in a
            # randomly positioned phrase in the given candidate translation.
            #
            # system_bad = Command._load_text_from_file(system_path.replace('.txt', '.bad'), encoding)

            if not create_ids:
                system_ids = Command._load_text_from_file(
                    system_path.replace('.txt', '.ids'), encoding)
            else:
                system_ids = [x + 1 for x in range(len(system_txt))]
            # BASICALLY: add support for local system_src and system_ref files here.
            #   If such files are present, this will overwrite the global src/ref values.
            #   However, this does not fully resolve the issue as we still have to give
            #   a source text file, while is assumed to be shared...
            #
            # IN A SENSE, using these local files makes better sense. It is wasteful, though.
            #   MAYBE, it is better to simply generate a simple JSON config file?!
            local_src = []
            local_ref = []

            if use_local_src:
                local_src_path = system_path.replace('.txt', '.src')
                if os.path.exists(local_src_path):
                    local_src = Command._load_text_from_file(
                        local_src_path, encoding)

            if use_local_ref:
                local_ref_path = system_path.replace('.txt', '.ref')
                if os.path.exists(local_ref_path):
                    local_ref = Command._load_text_from_file(
                        local_src_path, encoding)

            for segment_id, segment_text in system_txt.items():
                _src = local_src[segment_id] if use_local_src else source_file[
                    segment_id]
                _ref = local_src[
                    segment_id] if use_local_ref else reference_file[segment_id]
                md5hash = hashlib.new(
                    'md5',
                    segment_text.encode(encoding) + _src.encode(encoding) +
                    _ref.encode(encoding)).hexdigest()

                # Determine length of bad phrase, relative to segment length
                #
                # This follows WMT17:
                # - http://statmt.org/wmt17/pdf/WMT17.pdf

                _bad_len = 1
                _tokens = segment_text \
                  if character_based \
                  else segment_text.split(' ')

                if len(_tokens) == 1:
                    _bad_len = 1
                elif len(_tokens) > 1 and len(_tokens) <= 5:
                    _bad_len = 2
                elif len(_tokens) > 5 and len(_tokens) <= 8:
                    _bad_len = 3
                elif len(_tokens) > 8 and len(_tokens) <= 15:
                    _bad_len = 4
                elif len(_tokens) > 15 and len(_tokens) <= 20:
                    _bad_len = 5
                else:
                    _bad_len = len(_tokens) // 4

                if character_based:
                    _bad_len = 2 * _bad_len

                # Choose random src/ref segment
                _bad_tokens = []
                while len(_bad_tokens) <= _bad_len:
                    _bad_id = randrange(0, len(local_ref)) + 1 \
                      if use_local_ref else randrange(0, len(reference_file)) + 1

                    if source_based:
                        _bad_id = randrange(0, len(local_src)) + 1 \
                          if use_local_src else randrange(0, len(source_file)) + 1

                    _bad_text = None
                    #                    if source_based:
                    #                        _bad_text = local_src[_bad_id] if use_local_src else source_file[_bad_id]
                    #                    else:
                    #
                    # We are currently forcing reference-based bad reference
                    # generation. If no reference is available, then a copy
                    # of the source file will work just fine.
                    #
                    if True:
                        _bad_text = local_ref[
                            _bad_id] if use_local_ref else reference_file[
                                _bad_id]

                    _bad_tokens = _bad_text \
                      if character_based \
                      else _bad_text.split(' ')

                # If dealing with Chinese or Japanese, use double the amount
                # of characters for the bad replacement phrase.
                _bad_phrase = None

                _index = randrange(0, len(_bad_tokens) - _bad_len) \
                  if len(_bad_tokens) - _bad_len > 0 else 0
                _bad_phrase = _bad_tokens[_index:_index + _bad_len]

                _index = randrange(0, len(_tokens) - _bad_len) \
                  if len(_tokens) - _bad_len > 0 else 0
                _bad = _tokens[:_index] + _bad_phrase \
                  + _tokens[_index + _bad_len:]

                segment_bad = ''.join(_bad) \
                  if character_based \
                  else ' '.join(_bad)

                if not md5hash in hashed_text.keys():
                    hashed_text[md5hash] = {
                        'segment_id': segment_id,
                        'segment_text': segment_text,
                        'segment_bad': segment_bad,
                        'segment_ref': _ref,
                        'segment_src': _src,
                        'systems': [os.path.basename(system_path)]
                    }

                    hashes_by_ids[segment_id].append(md5hash)
                else:
                    hashed_text[md5hash]['systems'].append(
                        os.path.basename(system_path))

            print('Loaded {0} system {1} segments'.format(
                len(system_txt.keys()), os.path.basename(system_path)))

        # Dump deduplicated segment data to JSON file.
        json_data = json.dumps(hashed_text, indent=2, sort_keys=True)
        with open(options['output_json_file'] + '.segments',
                  mode='w',
                  encoding='utf8') as output_file:
            self.stdout.write(
                'Creating {0} ... '.format(options['output_json_file'] +
                                           '.segments'),
                ending='')
            output_file.write(str(json_data))
            self.stdout.write('OK')

        all_keys = list(hashed_text.keys())
        all_keys.sort()
        shuffle(all_keys)

        # If --full-coverage is specified, we want to collect annotations for
        # all unique translations for any given segment ID. To do so, we loop
        # over the all_keys list and for each MD5 hash we have not consumed,
        # we add not only the MD5 hash itself but also all other MD5 hashes
        # matching the respective segment ID.
        full_coverage = options['full_coverage']
        if full_coverage:
            _sorted_keys = []
            for key in all_keys:
                if not key in _sorted_keys:
                    segment_id = hashed_text[key]['segment_id']
                    matching_keys = hashes_by_ids[segment_id]
                    matching_keys.sort()
                    _sorted_keys.extend(matching_keys)
            all_keys = _sorted_keys

        items_per_batch = 10 * 7

        missing_items = items_per_batch - len(all_keys) % items_per_batch
        print('Missing items is {0}/{1}'.format(missing_items,
                                                items_per_batch))

        all_keys.extend(all_keys[0:missing_items])
        print('Added {0} missing items rotating keys'.format(missing_items))

        total_batches = int(floor(len(all_keys) / items_per_batch))
        print('Total number of batches is {0}'.format(total_batches))

        batch_no = options['batch_no']
        max_batches = options['max_batches']
        all_batches = options['all_batches']

        # If we don't produce all batches, our batch_id will be batch_no-1.
        # This is because batch numbers are one-based, ids zero-indexed.
        #
        # If we produce all batches, we just use range(total_batches).
        # This implicitly gives us zero-indexed ids already.
        batch_nos = [batch_no-1] if not all_batches \
          else list(range(total_batches))
        if max_batches:
            batch_nos = batch_nos[:max_batches]

        json_data = []
        for batch_id in batch_nos:  # range(batch_no):
            block_data = []
            block_offset = batch_id * 10 * 7

            num_blocks = int(batch_size / block_size)
            for block_id in range(num_blocks):
                # Human readable ids are one-based, hence +1
                print('Creating batch {0:05}/{1:05}, block {2:02}'.format(
                    batch_id + 1, total_batches, block_id + 1))

                # Get 7 random system outputs
                block_start = block_offset + 7 * (block_id)
                block_end = block_start + 7
                block_hashes = all_keys[block_start:block_end]

                current_block = {'systems': block_hashes}

                block_data.append(current_block)

            # Compute redundant, reference, bad reference bits
            for block_id in range(num_blocks):
                check_id = int((block_id + (num_blocks / 2)) % num_blocks)
                # Human readable ids are one-based, hence +1
                print('Add checks for batch {0:05}/{1:05}, ' \
                  'block {2:02} to block {3:02}'.format(
                    batch_id+1, total_batches, check_id+1, block_id+1
                  )
                )

                check_systems = block_data[check_id]['systems']
                check_systems.sort()
                shuffle(check_systems)

                block_data[block_id]['redundant'] = check_systems[0]
                block_data[block_id]['reference'] = check_systems[1]
                block_data[block_id]['badref'] = check_systems[2]

            # Direct assessment is reference-based for WMT17
            if source_based:
                sourceID = 'LOCAL_SRC' if use_local_src else basename(
                    options['source_file'])
            else:
                sourceID = 'LOCAL_REF' if use_local_ref else basename(
                    options['reference_file'])

            # Remember, batch numbers are one-based
            taskData = OrderedDict({
                'batchNo':
                batch_id + 1,
                'batchSize':
                options['batch_size'],
                'sourceLanguage':
                options['source_language'],
                'targetLanguage':
                options['target_language'],
                'requiredAnnotations':
                options['required_annotations'],
                'randomSeed':
                random_seed_value
            })
            itemsData = []
            _item = 0

            for block_id in range(num_blocks):
                all_items = [(x, 'TGT')
                             for x in block_data[block_id]['systems']]
                all_items.append((block_data[block_id]['redundant'], 'CHK'))
                all_items.append((block_data[block_id]['reference'], 'REF'))
                all_items.append((block_data[block_id]['badref'], 'BAD'))
                shuffle(all_items)

                for current_item, current_type in all_items:
                    item_data = hashed_text[current_item]

                    item_id = item_data['segment_id']
                    item_text = item_data['segment_text']
                    item_bad = item_data['segment_bad']
                    item_ref = item_data['segment_ref']
                    item_src = item_data['segment_src']
                    item_systems = item_data['systems']

                    targetID = '+'.join(sorted(set(item_systems)))
                    targetText = item_text
                    if current_type == 'REF':
                        targetID = basename(options['reference_file'])
                        targetText = item_ref

                    elif current_type == 'BAD':
                        targetText = item_bad

                    obj = OrderedDict()
                    obj['_item'] = _item
                    obj['_block'] = block_id + (10 * batch_id)
                    obj['sourceID'] = sourceID
                    obj['sourceText'] = item_ref if not source_based else item_src
                    obj['targetID'] = targetID
                    obj['targetText'] = targetText
                    obj['itemID'] = item_id
                    obj['itemType'] = current_type

                    itemsData.append(obj)
                    _item += 1

            outputData = OrderedDict({'task': taskData, 'items': itemsData})

            json_data.append(outputData)

        json_data = json.dumps(json_data, indent=2, sort_keys=True)
        print(json_data)

        with open(options['output_json_file'], mode='w',
                  encoding='utf8') as output_file:
            self.stdout.write('Creating {0} ... '.format(
                options['output_json_file']),
                              ending='')
            output_file.write(str(json_data))
            self.stdout.write('OK')