Ejemplo n.º 1
0
    def test03(self):

        ag = Fields.AuthorGroup()

        ag.append(Fields.Author("Jäger, Herbert"))
        item = {'author': ag, 'title': 'Bonjour tristesse'}
        r = userformat.author_title_format(item)
        self.assertEqual(r, 'Jäger, H.: Bonjour tristesse')
Ejemplo n.º 2
0
    def test02(self):

        ag = Fields.AuthorGroup()

        ag.append(Fields.Author("Françoise Sagan"))
        item = {'author': ag, 'title': 'Bonjour tristesse'}
        r = userformat.author_title_format(item)
        self.assertEqual(r, 'Sagan, F.: Bonjour tristesse')
Ejemplo n.º 3
0
    def update_content(self, entry, text):
        group = Fields.AuthorGroup()
        for author in string.split(text, '\n'):
            author = string.strip(author)
            if author == '': continue

            fields = string.split(author, ',')

            if len(fields) == 1:
                (first, last, lineage) = (None, fields[0], None)
            elif len(fields) == 2:
                (first, last, lineage) = (fields[1], fields[0], None)
            else:
                (first, last, lineage) = (fields[2], fields[0], fields[1])

            group.append(Fields.Author((None, first, last, lineage)))

        entry[self.field] = group
        return
Ejemplo n.º 4
0
    def test01(self):
        ag = Fields.AuthorGroup()

        ag.append(Fields.Author("Bruce Rind"))
        item = {'author': ag}
        r = userformat.author_editor_format(item)
        self.assertEqual(r, 'Rind, B.')

        ag.append(Fields.Author("Otto Forster"))
        r = userformat.author_editor_format(item)
        self.assertEqual(r, 'Rind/Forster')

        ag.append(Fields.Author("Nicolas Bourbaki"))
        r = userformat.author_editor_format(item)
        self.assertEqual(r, 'Rind/Forster/Bourbaki')

        ag.append(Fields.Author("Léonid Kameneff"))
        r = userformat.author_editor_format(item)
        self.assertEqual(r, 'Rind, B. et al.')
Ejemplo n.º 5
0
    def parse_author(self, text):

        ag = Fields.AuthorGroup()
        rx = re.compile('\.(?:$|\s+)')
        ry = re.compile('(.)')

        for name in rx.split(text):
            if not name: continue
            la = name.split()

            if len(la) == 1:
                last, first = la[0], None
            else:
                last = ' '.join(la[:-1])
                first = la[-1]
                first = ry.sub(r'\1. ', first)

            auth = Fields.Author(copy=(None, first, last, None))
            ag.append(auth)

        return ag
Ejemplo n.º 6
0
    def next(self):
        current = None
        data = ''

        table = {}

        # Skip whitespace
        while 1:
            line = self.file.readline()
            if line == '': return table

            line = string.rstrip(line)
            if line != '': break

        while 1:
            head = header.match(line)
            if head:
                if current:
                    if table.has_key(current):
                        table[current].append(data)
                    else:
                        table[current] = [data]

                current = string.strip(head.group(1))
                data = head.group(2)
            else:
                cont = contin.match(line)
                if cont:
                    data = data + ' ' + cont.group(1)

            line = self.file.readline()
            if line == '': break

            line = string.rstrip(line)
            if line == '': break

        # don't forget the last item
        if current:
            if table.has_key(current):
                table[current].append(data)
            else:
                table[current] = [data]

        # create the entry with the actual fields
        norm = {}
        type = Types.get_entry('article')

        if table.has_key('PMID'):
            norm['url'] = Fields.URL(medurl + table['PMID'][0])
            norm['medline-pmid'] = Fields.Text(table['PMID'][0])
            del table['PMID']

        if table.has_key('UI'):
            norm[one_to_one['UI']] = Fields.Text(table['UI'][0])
            del table['UI']

        if table.has_key('AU'):
            group = Fields.AuthorGroup()

            for au in table['AU']:
                # analyze the author by ourself.
                first, last, lineage = [], [], []

                for part in string.split(au, ' '):
                    if part.isupper():
                        # in upper-case, this is a first name
                        if len(last) > 0:
                            first.append(part)
                        else:
                            # if there is no last name, there can't be a first name
                            last.append(part)
                    else:
                        if len(first) > 0:
                            # there was a first name, this must be a lineage
                            lineage.append(part)
                        else:
                            last.append(part)

                if len(first) > 1:
                    print "medline: long first name found. skipping."
                    first = first[0:1]

                if len(first) > 0:
                    first = string.join(first[0], '. ') + '.'
                else:
                    first = None

                if len(last) > 0:
                    last = string.join(last, ' ')
                else:
                    last = None

                if len(lineage) > 0:
                    lineage = string.join(lineage, ' ')
                else:
                    lineage = None

                group.append(Fields.Author((None, first, last, lineage)))

            norm[one_to_one['AU']] = group
            del table['AU']

        if table.has_key('DP'):
            fields = string.split(table['DP'][0], ' ')
            norm[one_to_one['DP']] = Fields.Date(fields[0])
            del table['DP']

        # The simple fields...
        for f in table.keys():
            f_mapped = one_to_one.get(f, 'medline-%s' % (f.lower()))
            text_type = Types.get_field(f_mapped).type
            norm[f_mapped] = text_type(string.join(table[f], " ; "))

        return Base.Entry(None, type, norm)
Ejemplo n.º 7
0
    def next (self):

        lines = {}
        in_table = {}
        file_notes, file_time, file_version, file_format = ('','','','')

        while 1:
            line = self.file.readline()
            if line == '': return lines # what does that mean ??
            head = xheader.match(line)
            if not head :
                pass
            elif head.group(1) == 'Date:':
                file_time = time.strftime(
                    "%Y-%m-%d %H:%M", rfc822.parsedate(head.group(2)))
            elif head.group(1) == 'Notes:':
                file_notes = string.strip(head.group(2))
            elif head.group(1) == 'FN':
                file_format = head.group(2)
            elif head.group(1) == 'VR':
                file_version = head.group(2)
            elif len(head.group(1)) == 2 :
                break
            else :
                pass
            self.extraneous.append(line)

        self.isifileformat = self.isifileformat or "Isifile format %s(%s)" % (
            file_format, file_version)
        self.isifileinfo = self.isifileinfo or "ISI %s (%s) %s" %(
            file_time, file_notes, login_name)


        while 1:
            if line == 'ER':break
            if head :
                key = head.group(1)
                if key == 'ER': break
                val = head.group(3)
                if lines.has_key(key):
                    lines[key].append(val)
                else:
                    lines[key] = [val]
            else:
                cont = contin.match(line)
                if cont :
                    val = cont.group(1)
                    lines[key].append(val)
                else: break
            line = self.file.readline()
            if line == '': break # error situation
            head = header.match (line)


        key = 'PT'
        if lines.has_key(key):
            if string.strip(lines[key][0])[0] == 'J':
                del lines [key]
            else:
                print 'Warning: Unknown type of entry (%s) -- may need editing.' %(
                    lines[key])

        type = Types.get_entry ('article')


	for key in ( 'AU', 'ED'):
	    if lines.has_key(key):
		group = Fields.AuthorGroup()
		for item in lines[key]:
		    if string.strip(item) =='[Anon]' :
			auth = [item]
		    else:
			name, firstn = string.split (item, ',')
			auth = ["%s, " % name]
			for i in string.strip (firstn):
			    auth.append ("%s. " % i)
		    group.append (Fields.Author("".join(auth)))
		if key == 'AU':
		    in_table['author'] = group
		elif key == 'ED':
		    in_table['editor'] = group
		del lines[key]

        key, key1, key2 = 'PG', 'BP', 'EP'
        if lines.has_key(key1) and lines.has_key(key2):
            if len(lines[key1]) == len(lines[key2]):
                pages = []
                for i in range(len(lines[key1])):
                    firstpg = lines[key1] [i]
                    lastpg  = lines[key2] [i]
                    pages.append(('%s -- %s' % (firstpg, lastpg)))
                in_table['pages'] = Fields.Text (string.join(pages, '; '))
                del lines[key1]; del lines[key2]
            else: print 'inconsistent BP, EP fields found'

        if lines.has_key(key):
            in_table['size'] = Fields.Text ('%s p.' %(lines[key][0]))
            del lines[key]


        key = 'PY'
        if lines.has_key(key):
            val = lines[key][0]
            in_table['date'] = Fields.Date(val)
            del lines[key]

        key = 'ID'
        if lines. has_key(key):
            val = "[ISI:] %s ;;" %(string.lower(string.join(lines[key], ' ')))
            if lines.has_key('DE'):
                lines['DE'].append ('; ' + val)
            else :
                lines['DE'] = [val]
            del lines[key]

        # journal titles come in various forms

        if lines.has_key ('SO'):
            uc_title =  ' '.join(lines['SO'])
            in_table ['journal'] = Fields.Text (uc_title)
            if lines.has_key('JI'):
                uc_title = re.split(r"([- .,/]+)", uc_title)
                ca_title = re.split(r"[- .,/]+", ' '.join(lines['JI']))
                i , Title = 0, []
                for word in uc_title:
                    Word = string.capitalize(word)
                    if word == ca_title [i]:
                        Title.append (word)
                        i += 1
                    elif Word.startswith(ca_title[i]):
                        Title.append(Word)
                        i += 1
                    else:
                        Title.append(string.lower(word))
                del lines['JI']
                in_table['journal'] =  Fields.Text ("".join(Title))
            del lines['SO']



        for key in lines.keys():
            mapped_key, joiner = key_map.get(
                key, ('isifile-%s' %(key.lower()), ' ; '))
            text_type = Types.get_field (mapped_key).type
            in_table [mapped_key] = text_type (joiner.join(lines[key]))

        return Base.Entry ( None, type, in_table)
Ejemplo n.º 8
0
    def next(self):
        data = ''
        fields = {}
        type = None
        label = None

        while 1:
            line = self.file.readline()
            if line == '' and not fields:
                return None

            line = string.strip(line)

            if line == '' and fields:

                # store the current field
                if type:
                    if type == "label":
                        label = string.join(string.split(data), ' ')
                    elif fields.has_key(type):
                        fields[type].append(
                            string.join(string.split(data), ' '))
                    else:
                        fields[type] = [string.join(string.split(data), ' ')]

                # determine the real type
                while 1:
                    if fields.has_key('journal'):
                        type = 'article'
                        break

                    if fields.has_key('booktitle'):
                        type = 'inbook'
                        break

                    if fields.has_key('volume') or fields.has_key('number'):
                        type = 'inproceedings'
                        break

                    if fields.has_key('publisher'):
                        type = 'book'
                        break

                    if fields.has_key('author') and fields.has_key('title'):
                        type = 'unpublished'
                        break

                    type = 'misc'
                    break

                entry = Types.get_entry(type)

                for f in fields.keys():
                    type = Types.get_field(f).type

                    if type == Fields.AuthorGroup:
                        group = Fields.AuthorGroup()

                        for auth in fields[f]:
                            group.append(Fields.Author(auth))

                        fields[f] = group

                    else:
                        if len(fields[f]) > 1:
                            sys.stderr.write(
                                "warning: field `%s' is defined more than once"
                                % f)
                            continue

                        fields[f] = type(fields[f][0])

                if label:
                    key = Key.Key(None, label)
                    return Base.Entry(key, entry, fields)
                else:
                    return Base.Entry(None, entry, fields)

            t = tag_re.match(line)
            # we matched a new field start
            if t:
                if type:
                    if type == "label":
                        label = string.join(string.split(data), ' ')
                    elif fields.has_key(type):
                        fields[type].append(
                            string.join(string.split(data), ' '))
                    else:
                        fields[type] = [string.join(string.split(data), ' ')]

                type = t.group(1)
                if not self.mapping.has_key(type):
                    print "warning: key `%s' has been skipped" % (type)
                    type = None
                    data = ''
                else:
                    # store the current field
                    type = self.mapping[type][0]
                    data = t.group(2)

                continue

        # in the general case, append the new text
            data = data + ' ' + line