def altconcat(*args): """ Returns ``[a00, a01, a02, ..., a10, a11, a12, ...]`` >>> ''.join(altconcat('abc', 'ABC')) 'aAbBcC' >>> ''.join(altconcat('abcd', 'ABC')) 'aAbBcCd' >>> ''.join(altconcat('abc', 'ABCD')) 'aAbBcCD' """ skip = object() return [elem for lst in izip_longest(*args, fillvalue=skip) for elem in lst if elem is not skip]
def altconcat(*args): """ Returns ``[a00, a01, a02, ..., a10, a11, a12, ...]`` >>> ''.join(altconcat('abc', 'ABC')) 'aAbBcC' >>> ''.join(altconcat('abcd', 'ABC')) 'aAbBcCd' >>> ''.join(altconcat('abc', 'ABCD')) 'aAbBcCD' """ skip = object() return [ elem for lst in izip_longest(*args, fillvalue=skip) for elem in lst if elem is not skip ]
def as_dict(self): """ Turn a MARC record into a dictionary, which is used for ``as_json``. """ record = {} record['leader'] = self.leader record['fields'] = [] for field in self: if field.is_control_field(): record['fields'].append({field.tag: field.data}) else: fd = {} fd['subfields'] = [] fd['ind1'] = field.indicator1 fd['ind2'] = field.indicator2 for tag, value in izip_longest(*[iter(field.subfields)] * 2): fd['subfields'].append({tag: value}) record['fields'].append({field.tag: fd}) return record # as dict
def transpose_to_ldj(record): json_record = {} json_record['_LEADER'] = record.leader json_record['_FORMAT'] = "MarcXchange" json_record['_TYPE'] = "Bibliographic" for field in record: if isint(field.tag): if field.is_control_field(): json_record[field.tag] = [field.data] else: ind = "".join(field.indicators).replace(" ", "_") ind_obj = [] for k, v in izip_longest(*[iter(field.subfields)] * 2): if "." in ind: ind = ind.replace(".", "_") if "." in k or k.isspace(): k = "_" ind_obj.append({k: v}) if not field.tag in json_record: json_record[field.tag] = [] json_record[field.tag].append({ind: ind_obj}) return json_record
def xor(self, other): return self._bitwise( izip_longest(self.array, other.array, fillvalue=0), lambda x, y: x ^ y)
def grouped(items): args = [iter(items)] * 3 return izip_longest(fillvalue='', *args)
def grouper(iterable, n, fillvalue=None): """ Collect data into fixed-length chunks or blocks """ # grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx args = [iter(iterable)] * n return izip_longest(*args, fillvalue=fillvalue)
def cldf(dataset, concepticon, **kw): language_map = { l['NAME']: l['GLOTTOCODE'] or None for l in dataset.languages } concept_map = { c.english: c.concepticon_id for c in dataset.conceptlist.concepts.values() } concept_map[ 'year'] = '1226' # dunno why this is missing, it's 200 words... wordlists = list(read_csv(dataset)) cogsets = defaultdict(lambda: defaultdict(list)) for wl in wordlists: for concept, (words, cogids) in wl.words.items(): if len(cogids) == 1: cogsets[concept][cogids[0]].append(words[0]) with CldfDataset(( 'ID', 'Language_ID', 'Language_name', 'Parameter_ID', 'Parameter_name', 'Value', 'Segments', 'Source', 'Comment', ), dataset) as ds: ds.sources.add(getEvoBibAsSource(SOURCE)) cognates = [] for wl in wordlists: #print(wl.language) for concept, (words, cogids) in wl.words.items(): if len(cogids) > 1: if len(words) < len(cogids): if len(words) == 1: if ':' in words[0]: words = words[0].split(':') if ',' in words[0]: words = words[0].split(',') assert len(words) >= len(cogids) assert (wl.language, concept) in COGSET_MAP if len(words) > len(cogids): assert (wl.language, concept) in COGSET_MAP if (wl.language, concept) in COGSET_MAP: word_to_cogid = COGSET_MAP[(wl.language, concept)] else: word_to_cogid = dict(izip_longest(words, cogids)) for i, word in enumerate(words): if word.startswith('(') and word.endswith(')'): word = word[1:-1].strip() wid = '%s-%s-%s' % (slug( wl.language), slug(concept), i + 1) ds.add_row([ wid, '', wl.language, concept_map.get(concept, ''), concept, word, clean_string(word, splitters='?')[0], SOURCE, '', ]) if word_to_cogid.get(word): cognates.append([ wid, ds.name, word, '%s-%s' % (slug(concept), word_to_cogid[word]), False, 'expert', SOURCE, '', '', '', ]) dataset.cognates.extend( iter_alignments(ds, cognates, column='Segments'))
def combine(self, pattern, variable): """Combine a pattern and variable parts to be a line string again.""" inter_zip = izip_longest(variable, pattern, fillvalue='') interleaved = [elt for pair in inter_zip for elt in pair] return ''.join(interleaved)
def excepts(self, *other): return self._bitwise( izip_longest(self.array, *[o.array for o in other], fillvalue=0), lambda x, y: x & ~y)
def union(self, *other): return self._bitwise( izip_longest(self.array, *[o.array for o in other], fillvalue=0), lambda x, y: x | y)
def xor(self, other): return self._bitwise(izip_longest(self.array, other.array, fillvalue=0), lambda x, y: x ^ y)