예제 #1
0
파일: cmemsubsets.py 프로젝트: tkf/railgun
def altconcat(*args):
    """
    Returns ``[a00, a01, a02, ..., a10, a11, a12, ...]``

    >>> ''.join(altconcat('abc', 'ABC'))
    'aAbBcC'
    >>> ''.join(altconcat('abcd', 'ABC'))
    'aAbBcCd'
    >>> ''.join(altconcat('abc', 'ABCD'))
    'aAbBcCD'

    """
    skip = object()
    return [elem
            for lst in izip_longest(*args, fillvalue=skip)
            for elem in lst if elem is not skip]
예제 #2
0
def altconcat(*args):
    """
    Returns ``[a00, a01, a02, ..., a10, a11, a12, ...]``

    >>> ''.join(altconcat('abc', 'ABC'))
    'aAbBcC'
    >>> ''.join(altconcat('abcd', 'ABC'))
    'aAbBcCd'
    >>> ''.join(altconcat('abc', 'ABCD'))
    'aAbBcCD'

    """
    skip = object()
    return [
        elem for lst in izip_longest(*args, fillvalue=skip) for elem in lst
        if elem is not skip
    ]
예제 #3
0
파일: record.py 프로젝트: acdha/pymarc
 def as_dict(self):
     """
     Turn a MARC record into a dictionary, which is used for ``as_json``.
     """
     record = {}
     record['leader'] = self.leader
     record['fields'] = []
     for field in self:
         if field.is_control_field():
             record['fields'].append({field.tag: field.data})
         else:
             fd = {}
             fd['subfields'] = []
             fd['ind1'] = field.indicator1
             fd['ind2'] = field.indicator2
             for tag, value in izip_longest(*[iter(field.subfields)] * 2):
                 fd['subfields'].append({tag: value})
             record['fields'].append({field.tag: fd})
     return record  # as dict
예제 #4
0
파일: record.py 프로젝트: cordmata/pymarc
 def as_dict(self):
     """
     Turn a MARC record into a dictionary, which is used for ``as_json``.
     """
     record = {}
     record['leader'] = self.leader
     record['fields'] = []
     for field in self:
         if field.is_control_field():
             record['fields'].append({field.tag: field.data})
         else:
             fd = {}
             fd['subfields'] = []
             fd['ind1'] = field.indicator1
             fd['ind2'] = field.indicator2
             for tag, value in izip_longest(*[iter(field.subfields)] * 2):
                 fd['subfields'].append({tag: value})
             record['fields'].append({field.tag: fd})
     return record  # as dict
def transpose_to_ldj(record):
    json_record = {}
    json_record['_LEADER'] = record.leader
    json_record['_FORMAT'] = "MarcXchange"
    json_record['_TYPE'] = "Bibliographic"
    for field in record:
        if isint(field.tag):
            if field.is_control_field():
                json_record[field.tag] = [field.data]
            else:
                ind = "".join(field.indicators).replace(" ", "_")
                ind_obj = []
                for k, v in izip_longest(*[iter(field.subfields)] * 2):
                    if "." in ind:
                        ind = ind.replace(".", "_")
                    if "." in k or k.isspace():
                        k = "_"
                    ind_obj.append({k: v})
                if not field.tag in json_record:
                    json_record[field.tag] = []
                json_record[field.tag].append({ind: ind_obj})
    return json_record
예제 #6
0
 def xor(self, other):
     return self._bitwise(
         izip_longest(self.array, other.array, fillvalue=0),
         lambda x, y: x ^ y)
예제 #7
0
def grouped(items):
    args = [iter(items)] * 3
    return izip_longest(fillvalue='', *args)
예제 #8
0
def grouper(iterable, n, fillvalue=None):
    """ Collect data into fixed-length chunks or blocks """
    # grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx
    args = [iter(iterable)] * n
    return izip_longest(*args, fillvalue=fillvalue)
예제 #9
0
 def grouped(items):
     args = [iter(items)] * 3
     return izip_longest(fillvalue='', *args)
예제 #10
0
def cldf(dataset, concepticon, **kw):
    language_map = {
        l['NAME']: l['GLOTTOCODE'] or None
        for l in dataset.languages
    }
    concept_map = {
        c.english: c.concepticon_id
        for c in dataset.conceptlist.concepts.values()
    }
    concept_map[
        'year'] = '1226'  # dunno why this is missing, it's 200 words...
    wordlists = list(read_csv(dataset))
    cogsets = defaultdict(lambda: defaultdict(list))
    for wl in wordlists:
        for concept, (words, cogids) in wl.words.items():
            if len(cogids) == 1:
                cogsets[concept][cogids[0]].append(words[0])

    with CldfDataset((
            'ID',
            'Language_ID',
            'Language_name',
            'Parameter_ID',
            'Parameter_name',
            'Value',
            'Segments',
            'Source',
            'Comment',
    ), dataset) as ds:
        ds.sources.add(getEvoBibAsSource(SOURCE))
        cognates = []
        for wl in wordlists:
            #print(wl.language)
            for concept, (words, cogids) in wl.words.items():
                if len(cogids) > 1:
                    if len(words) < len(cogids):
                        if len(words) == 1:
                            if ':' in words[0]:
                                words = words[0].split(':')
                            if ',' in words[0]:
                                words = words[0].split(',')
                        assert len(words) >= len(cogids)
                    assert (wl.language, concept) in COGSET_MAP
                    if len(words) > len(cogids):
                        assert (wl.language, concept) in COGSET_MAP
                if (wl.language, concept) in COGSET_MAP:
                    word_to_cogid = COGSET_MAP[(wl.language, concept)]
                else:
                    word_to_cogid = dict(izip_longest(words, cogids))
                for i, word in enumerate(words):
                    if word.startswith('(') and word.endswith(')'):
                        word = word[1:-1].strip()
                    wid = '%s-%s-%s' % (slug(
                        wl.language), slug(concept), i + 1)
                    ds.add_row([
                        wid,
                        '',
                        wl.language,
                        concept_map.get(concept, ''),
                        concept,
                        word,
                        clean_string(word, splitters='?')[0],
                        SOURCE,
                        '',
                    ])
                    if word_to_cogid.get(word):
                        cognates.append([
                            wid,
                            ds.name,
                            word,
                            '%s-%s' % (slug(concept), word_to_cogid[word]),
                            False,
                            'expert',
                            SOURCE,
                            '',
                            '',
                            '',
                        ])
        dataset.cognates.extend(
            iter_alignments(ds, cognates, column='Segments'))
예제 #11
0
 def combine(self, pattern, variable):
     """Combine a pattern and variable parts to be a line string again."""
     inter_zip = izip_longest(variable, pattern, fillvalue='')
     interleaved = [elt for pair in inter_zip for elt in pair]
     return ''.join(interleaved)
예제 #12
0
 def excepts(self, *other):
     return self._bitwise(
         izip_longest(self.array, *[o.array for o in other], fillvalue=0),
         lambda x, y: x & ~y)
예제 #13
0
파일: bitindex.py 프로젝트: douban/dpark
 def union(self, *other):
     return self._bitwise(
         izip_longest(self.array, *[o.array for o in other], fillvalue=0),
         lambda x, y: x | y)
예제 #14
0
 def union(self, *other):
     return self._bitwise(
         izip_longest(self.array, *[o.array for o in other], fillvalue=0),
         lambda x, y: x | y)
예제 #15
0
 def combine(self, pattern, variable):
     """Combine a pattern and variable parts to be a line string again."""
     inter_zip = izip_longest(variable, pattern, fillvalue='')
     interleaved = [elt for pair in inter_zip for elt in pair]
     return ''.join(interleaved)
예제 #16
0
파일: bitindex.py 프로젝트: douban/dpark
 def excepts(self, *other):
     return self._bitwise(
         izip_longest(self.array, *[o.array for o in other], fillvalue=0),
         lambda x, y: x & ~y)
예제 #17
0
파일: bitindex.py 프로젝트: douban/dpark
 def xor(self, other):
     return self._bitwise(izip_longest(self.array, other.array, fillvalue=0),
                          lambda x, y: x ^ y)