Exemple #1
0
    def test_bibtex2source(self):
        from clld.scripts.util import bibtex2source

        bibtex2source(Record('book', 'id', author='M, R and G, H and Z, U'))
        bibtex2source(Record('book', 'id', editor='M, R and G, H'))
        bibtex2source(
            Record('book', 'id', title='tb', customfield='cf', year="1920}"))
Exemple #2
0
def test_ContextObject():
    from clld.lib.coins import ContextObject

    c = ContextObject('sid', 'journal', ('jtitle', '\xe2'))
    assert '%C3%A2' in c.span_attrs()['title']
    c = ContextObject('sid', 'journal',
                      ('jtitle', binary_type('ä'.encode('utf8'))))
    assert '%C3%A4' in c.span_attrs()['title']

    bib = Record('book', '1', title='The Title', author='L, F')
    ContextObject('sid', 'book', ('btitle', 'the title'))
    ContextObject.from_bibtex('sid', bib)
    bib = Record('article',
                 '1',
                 title='The Title',
                 author='The One and The Other',
                 journal='J')
    ContextObject.from_bibtex('sid', bib)
    bib = Record('phdthesis', '1', title='The Title')
    ContextObject.from_bibtex('sid', bib)
    bib = Record('conference', '1', title='The Title', booktitle='something')
    co = ContextObject.from_bibtex('sid', bib)
    assert isinstance(co.span_attrs(), dict)

    assert ContextObject('äöü', 'äöü').span_attrs()
    assert ContextObject('äöü'.encode('latin1'), None).span_attrs()
Exemple #3
0
def add_sources(args, data):
    bib = Database.from_file(args.data_file('phoible-references.bib'),
                             lowercase=True)
    ext = [
        Record.from_string('@' + s, lowercase=True)
        for s in nfilter(BIB.split('@'))
    ]

    for rec in chain(ext, bib):
        if rec.id not in data['Source']:
            data.add(Source, rec.id, _obj=bibtex2source(rec))

    #
    # add aliases to lookup records with bibtex keys with numeric prefixes without
    # specifying the prefix
    #
    for key in list(data['Source'].keys()):
        if '_' in key:
            no, rem = key.split('_', 1)
            try:
                int(no)
                if rem not in data['Source']:
                    data['Source'][rem] = data['Source'][key]
            except (ValueError, TypeError):
                pass
Exemple #4
0
    def test_linearization(self):
        from clld.lib.bibtex import Record

        for bib, txt in [
            (
                """@book{Dayley-1985,
  address    = {Berkeley},
  author     = {Dayley, Jon P.},
  iso_code   = {tzt; tzj},
  olac_field = {general_linguistics; semantics; morphology; typology; syntax},
  publisher  = {University of California Press},
  series     = {University of California Publications in Linguistics},
  title      = {Tzutujil Grammar},
  volume     = {107},
  wals_code  = {tzu},
  year       = {1985}
}
                """,
                "Dayley, Jon P. 1985. Tzutujil Grammar. (University of California "
                "Publications in Linguistics, 107.) Berkeley: University of California "
                "Press."),
            (
                """@book{318762,
  address    = {Vancouver},
  author     = {Cook, Eung-Do},
  pages      = {670},
  publisher  = {UBC Press},
  series     = {First Nations Languages Series},
  title      = {A Tsilhqút'ín Grammar},
  year       = {2013}
}
                """,
                "Cook, Eung-Do. 2013. A Tsilhqút'ín Grammar. (First Nations Languages "
                "Series.) Vancouver: UBC Press. 670pp."),
            (
                """@inbook{316361,
  author     = {Healey, Alan},
  booktitle  = {New Guinea area languages and language study},
  pages      = {223-232},
  title      = {History of research in Austronesian languages: Admiralty Islands area},
  volume     = {2}
}
                """,
                "Healey, Alan. n.d. History of research in Austronesian languages: "
                "Admiralty Islands area. 2. 223-232."),
            (
                """@inproceedings{moisikesling2011,
  author    = {Moisik, Scott R. and Esling, John H.},
  booktitle = {Proceedings of the Congress of Phonetic Sciences (ICPhS XVII)},
  pages     = {1406-1409},
  title     = {The 'whole larynx' approach to laryngeal features},
  year      = {2011}
}""",
                "Moisik, Scott R. and Esling, John H. 2011. The 'whole larynx' approach "
                "to laryngeal features. In Proceedings of the Congress of "
                "Phonetic Sciences (ICPhS XVII), 1406-1409.")
        ]:
            rec = Record.from_string(bib)
            self.assertEqual(rec.text(), txt)
Exemple #5
0
    def test_Record(self):
        from clld.lib.bibtex import Record

        rec = Record(
            'book', '1',
            title='The Title', editor='ed', booktitle='bt', school='s', issue='i',
            pages='1-4', publisher='M')
        self.assertTrue('@book' in rec.__unicode__())
        self.assertTrue('@book' in rec.__str__())
        self.assertTrue('The Title' in rec.text())

        for fmt in ['txt', 'en', 'ris', 'mods']:
            rec.format(fmt)

        rec = Record.from_string(rec.__unicode__())
        rec = Record.from_object(Mock())
Exemple #6
0
    def test_linearization(self):
        from clld.lib.bibtex import Record

        for bib, txt in [
            ("""@book{Dayley-1985,
  address    = {Berkeley},
  author     = {Dayley, Jon P.},
  iso_code   = {tzt; tzj},
  olac_field = {general_linguistics; semantics; morphology; typology; syntax},
  publisher  = {University of California Press},
  series     = {University of California Publications in Linguistics},
  title      = {Tzutujil Grammar},
  volume     = {107},
  wals_code  = {tzu},
  year       = {1985}
}
                """,
             "Dayley, Jon P. 1985. Tzutujil Grammar. (University of California "
             "Publications in Linguistics, 107.) Berkeley: University of California "
             "Press."),
            ("""@book{318762,
  address    = {Vancouver},
  author     = {Cook, Eung-Do},
  pages      = {670},
  publisher  = {UBC Press},
  series     = {First Nations Languages Series},
  title      = {A Tsilhqút'ín Grammar},
  year       = {2013}
}
                """,
             "Cook, Eung-Do. 2013. A Tsilhqút'ín Grammar. (First Nations Languages "
             "Series.) Vancouver: UBC Press. 670pp."),
            ("""@inbook{316361,
  author     = {Healey, Alan},
  booktitle  = {New Guinea area languages and language study},
  pages      = {223-232},
  title      = {History of research in Austronesian languages: Admiralty Islands area},
  volume     = {2}
}
                """,
             "Healey, Alan. n.d. History of research in Austronesian languages: "
             "Admiralty Islands area. 2. 223-232."),
            ("""@inproceedings{moisikesling2011,
  author    = {Moisik, Scott R. and Esling, John H.},
  booktitle = {Proceedings of the Congress of Phonetic Sciences (ICPhS XVII)},
  pages     = {1406-1409},
  title     = {The 'whole larynx' approach to laryngeal features},
  year      = {2011}
}""", "Moisik, Scott R. and Esling, John H. 2011. The 'whole larynx' approach "
             "to laryngeal features. In Proceedings of the Congress of "
             "Phonetic Sciences (ICPhS XVII), 1406-1409.")
        ]:
            rec = Record.from_string(bib)
            self.assertEqual(rec.text(), txt)
Exemple #7
0
    def test_ContextObject(self):
        from clld.lib.coins import ContextObject

        bib = Record('book', '1', title='The Title', author='L, F')
        co = ContextObject('sid', 'book', ('btitle', 'the title'))
        co = ContextObject.from_bibtex('sid', bib)
        bib = Record('article',
                     '1',
                     title='The Title',
                     author='The One and The Other',
                     journal='J')
        co = ContextObject.from_bibtex('sid', bib)
        bib = Record('phdthesis', '1', title='The Title')
        co = ContextObject.from_bibtex('sid', bib)
        bib = Record('conference',
                     '1',
                     title='The Title',
                     booktitle='something')
        co = ContextObject.from_bibtex('sid', bib)
        self.assertTrue(isinstance(co.span_attrs(), dict))
Exemple #8
0
    def test_Record(self):
        from clld.lib.bibtex import Record

        rec = Record('book',
                     '1',
                     title='The Title',
                     editor='ed',
                     booktitle='bt',
                     school='s',
                     issue='i',
                     pages='1-4',
                     publisher='M')
        self.assertTrue('@book' in rec.__unicode__())
        self.assertTrue('@book' in rec.__str__())
        self.assertTrue('The Title' in rec.text())

        for fmt in ['txt', 'en', 'ris', 'mods']:
            rec.format(fmt)

        rec = Record.from_string(rec.__unicode__())
        rec = Record.from_object(Mock())
Exemple #9
0
    def test_Database(self):
        from clld.lib.bibtex import Record, Database

        db = Database([])
        self.assertEqual(len(db), 0)
        db = Database([Record('book', 'id')])
        self.assertEqual(db[0], db['id'])
        assert unicode(db)
        db = Database.from_file('notexisting.bib')
        self.assertEqual(len(db), 0)
        db = Database.from_file(TESTS_DIR.joinpath('test.bib'))
        self.assertEqual(len(db), 1)
Exemple #10
0
def _get_bibtex(refs):
    for ref in refs:
        genre = 'misc'
        id = ref['id']
        attrs = dict(all=ref['text'])
        t = ref['text']
        match = YEAR.search(t)
        if match:
            authors = 'editor' if match.group('ed') else 'author'
            attrs['key'], attrs[authors] = normalized_author(t[:match.start()].strip())
            attrs['title'], rem = [s.strip() for s in re.split('\.|\?', t[match.end():], 1)]
            attrs['year'] = match.group('year')
            attrs['key'] = '%(key)s %(year)s' % attrs
            m = EDS.match(rem)
            if m:
                assert 'editor' not in attrs
                attrs['editor'] = normalized_author(m.group('eds').strip())[1]
                genre = 'incollection'
                rem = rem[m.end():].strip()
                mm = BTITLE_PAGES.match(rem)
                if mm:
                    attrs['booktitle'] = mm.group('btitle').strip()
                    attrs['pages'] = mm.group('pages').strip()
                    rem = rem[mm.end():].strip()
            else:
                mm = JOURNAL.match(rem)
                if mm:
                    genre = 'article'
                    attrs['journal'] = mm.group('journal').strip()
                    attrs['volume'] = mm.group('volume').strip()
                    if mm.group('number'):
                        attrs['number'] = mm.group('number').strip()
                    attrs['pages'] = mm.group('pages').strip()
                    rem = rem[mm.end():].strip()
            m = PUBLISHER.match(rem)
            if m:
                if genre == 'misc':
                    genre = 'book'
                attrs['place'] = m.group('place').strip()
                attrs['publisher'] = m.group('publisher').strip()
                rem = rem[m.end():].strip()
            _rem = []
            for piece in [p.strip() for p in re.split('\.(?:\s+|$)', rem) if p.strip()]:
                if piece.startswith('http') and not re.search('\s+', piece):
                    attrs['url'] = piece
                elif piece.startswith('(') and piece.endswith(')'):
                    attrs['note'] = piece[1:-1].strip()
                else:
                    _rem.append(piece)
            rem = '. '.join(_rem)
            if not slug(unicode(rem)):
                del attrs['all']
        yield Record(genre, id, **attrs)
Exemple #11
0
    def test_Database(self):
        from clld.lib.bibtex import Record, Database

        db = Database([])
        self.assertEqual(len(db), 0)
        db = Database([Record('book', 'id')])
        self.assertEqual(db[0], db['id'])
        assert text_type(db)
        db = Database.from_file('notexisting.bib')
        self.assertEqual(len(db), 0)
        db = Database.from_file(TESTS_DIR.joinpath('test.bib'))
        self.assertEqual(len(db), 1)
        assert '@' in db[0]['title']
        assert [r for r in db]
        self.assertRaises(NotImplementedError, db.format, 'txt')
Exemple #12
0
def add_sources(args, data):
    bib = Database.from_file(args.data_file('phoible-references.bib'), lowercase=True)
    ext = [Record.from_string('@' + s, lowercase=True) for s in nfilter(BIB.split('@'))]

    for rec in chain(ext, bib):
        if rec.id not in data['Source']:
            data.add(Source, rec.id, _obj=bibtex2source(rec))

    #
    # add aliases to lookup records with bibtex keys with numeric prefixes without
    # specifying the prefix
    #
    for key in list(data['Source'].keys()):
        if '_' in key:
            no, rem = key.split('_', 1)
            try:
                int(no)
                if rem not in data['Source']:
                    data['Source'][rem] = data['Source'][key]
            except (ValueError, TypeError):
                pass
def main(args):  # pragma: no cover
    ds = StructureDataset.from_metadata(DS)
    data = Data()
    for source in ds.sources:
        data.add(common.Source, source.id, _obj=bibtex2source(source))

    ext = [
        Record.from_string('@' + s, lowercase=True)
        for s in nfilter(BIB.split('@'))
    ]
    for rec in ext:
        if rec.id not in data['Source']:
            data.add(common.Source, rec.id, _obj=bibtex2source(rec))

    for contrib in ds['contributors.csv']:
        o = data.add(
            common.Contributor,
            contrib['ID'],
            id=contrib['ID'].upper(),
            name=contrib['Name'],
            description=contrib['Description'],
            url=contrib['URL'],
            jsondata={
                'readme': contrib['Readme'],
                'contents': contrib['Contents']
            },
        )
        for src in contrib['Source']:
            DBSession.add(
                models.ContributorReference(source=data['Source'][src],
                                            contributor=o))

    dataset = data.add(
        common.Dataset,
        'phoible',
        id='phoible',
        name='PHOIBLE 2.0',
        description='PHOIBLE 2.0',
        publisher_name="Max Planck Institute for the Science of Human History",
        publisher_place="Jena",
        publisher_url="https://www.shh.mpg.de",
        domain='phoible.org',
        license='https://creativecommons.org/licenses/by-sa/3.0/',
        contact='*****@*****.**',
        jsondata={
            'license_icon':
            'https://i.creativecommons.org/l/by-sa/3.0/88x31.png',
            'license_name':
            'Creative Commons Attribution-ShareAlike 3.0 Unported License'
        })

    for i, (cid, name) in enumerate([
        ('UZ', "Steven Moran"),
        ('mccloy', "Daniel McCloy"),
    ],
                                    start=1):
        contrib = data['Contributor'].get(cid)
        if not contrib:
            contrib = common.Contributor(id=cid, name=name)
        DBSession.add(
            common.Editor(dataset=dataset, ord=i, contributor=contrib))

    glottolog = Glottolog(
        Path(phoible.__file__).parent.parent.parent.parent.joinpath(
            'glottolog', 'glottolog'))

    for lang in ds['LanguageTable']:
        l = data.add(
            models.Variety,
            lang['ID'],
            id=lang['ID'],
            name=lang['Name'],
        )

    load_families(data, [(l.id, l)
                         for l in data['Variety'].values() if len(l.id) == 8],
                  glottolog.repos)
    DBSession.flush()

    # assign color codes:
    families = defaultdict(list)
    for l in data['Variety'].values():
        families[l.family_pk].append(l)

    colors = color.qualitative_colors(len(families))
    for i, langs in enumerate(sorted(families.values(),
                                     key=lambda v: -len(v))):
        for l in langs:
            l.jsondata = {'color': colors[i]}

    for segment in ds['ParameterTable']:
        equivalence_class = ''.join([
            t[0] for t in [(c, unicodedata.name(c)) for c in segment['Name']]
            if t[1].split()[0] not in ['COMBINING', 'MODIFIER']
        ]),
        data.add(models.Segment,
                 segment['ID'],
                 id=segment['ID'],
                 name=segment['Name'],
                 description=segment['Description'],
                 segment_class=segment['SegmentClass'],
                 equivalence_class=equivalence_class)
    DBSession.flush()

    # Add redirects for old language pages! get relevant ISO codes and map to Glottocode!
    for model, repls in load(
            Path(phoible.__file__).parent.parent /
            'replacements.json').items():
        if model == 'Language':
            languoids = {l.id: l for l in glottolog.languoids()}
            iso_languoids = {l.iso: l for l in languoids.values() if l.iso}
            gl_in_phoible = set(data['Variety'].keys())
            for oid, nid in repls.items():
                gls = descendants_from_nodemap(
                    iso_languoids.get(oid),
                    languoids).intersection(gl_in_phoible)
                if gls:
                    nid = gls.pop()
                    if len(gls) > 1:
                        print('+++', oid, gls)
                else:
                    print('---', oid)
                common.Config.add_replacement(oid, nid, common.Language)
        elif model == 'Parameter':
            segments_in_phoible = set(data['Segment'].keys())
            for oid, nid in repls.items():
                id_ = nid if nid in segments_in_phoible else None
                common.Config.add_replacement(oid, id_, common.Parameter)

    for segment in ds['ParameterTable']:
        for i, (k, v) in enumerate(sorted(segment.items())):
            if k not in ['ID', 'Name', 'Description', 'SegmentClass']:
                DBSession.add(
                    common.Parameter_data(
                        key=feature_name(k),
                        value=v,
                        ord=i,
                        object_pk=data['Segment'][segment['ID']].pk))

    for inventory in ds['contributions.csv']:
        inv = data.add(
            models.Inventory,
            inventory['ID'],
            id=inventory['ID'],
            name='{0} ({1} {2})'.format(
                inventory['Name'],
                inventory['Contributor_ID'].upper(),
                inventory['ID'],
            ),
            source_url=inventory['URL'],
            count_tone=inventory['count_tones'],
            count_vowel=inventory['count_vowels'],
            count_consonant=inventory['count_consonants'],
        )
        DBSession.add(
            common.ContributionContributor(
                contribution=inv,
                contributor=data['Contributor'][
                    inventory['Contributor_ID'].upper()]))
        for src in inventory['Source']:
            DBSession.add(
                common.ContributionReference(contribution=inv,
                                             source=data['Source'][src]))

    for phoneme in ds['ValueTable']:
        lang = data['Variety'][phoneme['Language_ID']]
        inv = data['Inventory'][phoneme['Contribution_ID']]
        if not inv.language:
            inv.language = lang
        vs = common.ValueSet(
            id=phoneme['ID'],
            contribution=inv,
            language=lang,
            parameter=data['Segment'][phoneme['Parameter_ID']])

        for ref in phoneme['Source']:
            DBSession.add(
                common.ValueSetReference(source=data['Source'][ref],
                                         valueset=vs))

        DBSession.add(
            models.Phoneme(
                id=phoneme['ID'],
                name='%s %s' %
                (phoneme['Value'],
                 data['Inventory'][phoneme['Contribution_ID']].name),
                allophones=' '.join(phoneme['Allophones']),
                marginal=phoneme['Marginal'],
                valueset=vs))

    return
Exemple #14
0
    def test_Record(self):
        from clld.lib.bibtex import Record, EntryType

        rec = Record('article', '1', author=['a', 'b'], editor='a and b')
        self.assertEqual(rec['author'], 'a and b')
        self.assertEqual(rec.get('author'), rec.getall('author'))
        self.assertEqual(rec['editor'], rec.get('editor'))
        self.assertEqual(rec.getall('editor'), ['a', 'b'])

        rec = Record(
            'book', '1',
            title='The Title',
            author='author',
            editor='ed',
            booktitle='bt',
            school='s',
            issue='i',
            pages='1-4',
            publisher='M',
            note="Revised edition")
        self.assertIn('@book', rec.__unicode__())
        self.assertIn('@book', rec.__str__())
        self.assertIn('bt', rec.text())

        for fmt in ['txt', 'en', 'ris', 'mods']:
            rec.format(fmt)

        Record.from_string(rec.__unicode__(), lowercase=True)
        Record.from_object(Mock())

        rec = Record(
            'incollection', '1',
            title='The Title', editor='ed', booktitle='bt', school='s', issue='i',
            pages='1-4', publisher='M', note="Revised edition")
        self.assertIn('In ', rec.text())

        rec = Record(
            'article', '1',
            title='The Title', journal='The Journal', volume="The volume", issue='issue')
        self.assertTrue('The Journal' in rec.text())

        rec = Record('xmisc', '1', note='Something')
        self.assertEqual(rec.genre, EntryType.misc)
        self.assertIn('Something', rec.text())
Exemple #15
0
    def test_bibtex2source(self):
        from clld.scripts.util import bibtex2source

        bibtex2source(Record('book', 'id', title='tb', customfield='cf', year="1920}"))
Exemple #16
0
# -*- coding: utf-8 -*-
from clld.lib.bibtex import Record
from clldutils.misc import slug

CFG = {
    'EXPORTS': [],
    'PUBLICATIONS': [
        Record(
            'ARTICLE',
            'HammarstroemEtAl2011Oslo',
            ('author', u'Harald Hammarström and Sebastian Nordhoff'),
            ('year', '2011'),
            ('title',
             'LangDoc: Bibliographic Infrastructure for Linguistic Typology'),
            ('journal', 'Oslo Studies in Language'),
            ('volume', '3'),
            ('number', '2'),
            ('pages', '31-43'),
            ('url',
             'https://www.journals.uio.no/index.php/osla/article/view/75/199'),
        ),
        Record(
            'UNPUBLISHED',
            'HammarstroemEtAl2011Howmany',
            ('author', u"Hammarström, Harald and Nordhoff, Sebastian"),
            ('year', u"2011"),
            ('title', u"How many languages have so far been described?"),
            ('howpublished',
             u"Paper presented at NWO Endangered Languages Programme Conference, Leiden, April 2011"
             ),
        ),
Exemple #17
0
    def test_Record(self):
        from clld.lib.bibtex import Record, EntryType

        rec = Record('article', '1', author=['a', 'b'], editor='a and b')
        self.assertEqual(rec['author'], 'a and b')
        self.assertEqual(rec.get('author'), rec.getall('author'))
        self.assertEqual(rec['editor'], rec.get('editor'))
        self.assertEqual(rec.getall('editor'), ['a', 'b'])

        rec = Record('book',
                     '1',
                     title='The Title',
                     author='author',
                     editor='ed',
                     booktitle='bt',
                     school='s',
                     issue='i',
                     pages='1-4',
                     publisher='M',
                     note="Revised edition")
        self.assertIn('@book', rec.__unicode__())
        self.assertIn('@book', rec.__str__())
        self.assertIn('bt', rec.text())

        for fmt in ['txt', 'en', 'ris', 'mods']:
            rec.format(fmt)

        Record.from_string(rec.__unicode__(), lowercase=True)
        Record.from_object(Mock())

        rec = Record('incollection',
                     '1',
                     title='The Title',
                     editor='ed',
                     booktitle='bt',
                     school='s',
                     issue='i',
                     pages='1-4',
                     publisher='M',
                     note="Revised edition")
        self.assertIn('In ', rec.text())

        rec = Record('article',
                     '1',
                     title='The Title',
                     journal='The Journal',
                     volume="The volume",
                     issue='issue')
        self.assertTrue('The Journal' in rec.text())

        rec = Record('xmisc', '1', note='Something')
        self.assertEqual(rec.genre, EntryType.misc)
        self.assertIn('Something', rec.text())