def test03(self): ag = Fields.AuthorGroup() ag.append(Fields.Author("Jäger, Herbert")) item = {'author': ag, 'title': 'Bonjour tristesse'} r = userformat.author_title_format(item) self.assertEqual(r, 'Jäger, H.: Bonjour tristesse')
def test02(self): ag = Fields.AuthorGroup() ag.append(Fields.Author("Françoise Sagan")) item = {'author': ag, 'title': 'Bonjour tristesse'} r = userformat.author_title_format(item) self.assertEqual(r, 'Sagan, F.: Bonjour tristesse')
def update_content(self, entry, text): group = Fields.AuthorGroup() for author in string.split(text, '\n'): author = string.strip(author) if author == '': continue fields = string.split(author, ',') if len(fields) == 1: (first, last, lineage) = (None, fields[0], None) elif len(fields) == 2: (first, last, lineage) = (fields[1], fields[0], None) else: (first, last, lineage) = (fields[2], fields[0], fields[1]) group.append(Fields.Author((None, first, last, lineage))) entry[self.field] = group return
def test01(self): ag = Fields.AuthorGroup() ag.append(Fields.Author("Bruce Rind")) item = {'author': ag} r = userformat.author_editor_format(item) self.assertEqual(r, 'Rind, B.') ag.append(Fields.Author("Otto Forster")) r = userformat.author_editor_format(item) self.assertEqual(r, 'Rind/Forster') ag.append(Fields.Author("Nicolas Bourbaki")) r = userformat.author_editor_format(item) self.assertEqual(r, 'Rind/Forster/Bourbaki') ag.append(Fields.Author("Léonid Kameneff")) r = userformat.author_editor_format(item) self.assertEqual(r, 'Rind, B. et al.')
def parse_author(self, text): ag = Fields.AuthorGroup() rx = re.compile('\.(?:$|\s+)') ry = re.compile('(.)') for name in rx.split(text): if not name: continue la = name.split() if len(la) == 1: last, first = la[0], None else: last = ' '.join(la[:-1]) first = la[-1] first = ry.sub(r'\1. ', first) auth = Fields.Author(copy=(None, first, last, None)) ag.append(auth) return ag
def next(self): current = None data = '' table = {} # Skip whitespace while 1: line = self.file.readline() if line == '': return table line = string.rstrip(line) if line != '': break while 1: head = header.match(line) if head: if current: if table.has_key(current): table[current].append(data) else: table[current] = [data] current = string.strip(head.group(1)) data = head.group(2) else: cont = contin.match(line) if cont: data = data + ' ' + cont.group(1) line = self.file.readline() if line == '': break line = string.rstrip(line) if line == '': break # don't forget the last item if current: if table.has_key(current): table[current].append(data) else: table[current] = [data] # create the entry with the actual fields norm = {} type = Types.get_entry('article') if table.has_key('PMID'): norm['url'] = Fields.URL(medurl + table['PMID'][0]) norm['medline-pmid'] = Fields.Text(table['PMID'][0]) del table['PMID'] if table.has_key('UI'): norm[one_to_one['UI']] = Fields.Text(table['UI'][0]) del table['UI'] if table.has_key('AU'): group = Fields.AuthorGroup() for au in table['AU']: # analyze the author by ourself. first, last, lineage = [], [], [] for part in string.split(au, ' '): if part.isupper(): # in upper-case, this is a first name if len(last) > 0: first.append(part) else: # if there is no last name, there can't be a first name last.append(part) else: if len(first) > 0: # there was a first name, this must be a lineage lineage.append(part) else: last.append(part) if len(first) > 1: print "medline: long first name found. skipping." first = first[0:1] if len(first) > 0: first = string.join(first[0], '. ') + '.' else: first = None if len(last) > 0: last = string.join(last, ' ') else: last = None if len(lineage) > 0: lineage = string.join(lineage, ' ') else: lineage = None group.append(Fields.Author((None, first, last, lineage))) norm[one_to_one['AU']] = group del table['AU'] if table.has_key('DP'): fields = string.split(table['DP'][0], ' ') norm[one_to_one['DP']] = Fields.Date(fields[0]) del table['DP'] # The simple fields... for f in table.keys(): f_mapped = one_to_one.get(f, 'medline-%s' % (f.lower())) text_type = Types.get_field(f_mapped).type norm[f_mapped] = text_type(string.join(table[f], " ; ")) return Base.Entry(None, type, norm)
def next (self): lines = {} in_table = {} file_notes, file_time, file_version, file_format = ('','','','') while 1: line = self.file.readline() if line == '': return lines # what does that mean ?? head = xheader.match(line) if not head : pass elif head.group(1) == 'Date:': file_time = time.strftime( "%Y-%m-%d %H:%M", rfc822.parsedate(head.group(2))) elif head.group(1) == 'Notes:': file_notes = string.strip(head.group(2)) elif head.group(1) == 'FN': file_format = head.group(2) elif head.group(1) == 'VR': file_version = head.group(2) elif len(head.group(1)) == 2 : break else : pass self.extraneous.append(line) self.isifileformat = self.isifileformat or "Isifile format %s(%s)" % ( file_format, file_version) self.isifileinfo = self.isifileinfo or "ISI %s (%s) %s" %( file_time, file_notes, login_name) while 1: if line == 'ER':break if head : key = head.group(1) if key == 'ER': break val = head.group(3) if lines.has_key(key): lines[key].append(val) else: lines[key] = [val] else: cont = contin.match(line) if cont : val = cont.group(1) lines[key].append(val) else: break line = self.file.readline() if line == '': break # error situation head = header.match (line) key = 'PT' if lines.has_key(key): if string.strip(lines[key][0])[0] == 'J': del lines [key] else: print 'Warning: Unknown type of entry (%s) -- may need editing.' %( lines[key]) type = Types.get_entry ('article') for key in ( 'AU', 'ED'): if lines.has_key(key): group = Fields.AuthorGroup() for item in lines[key]: if string.strip(item) =='[Anon]' : auth = [item] else: name, firstn = string.split (item, ',') auth = ["%s, " % name] for i in string.strip (firstn): auth.append ("%s. " % i) group.append (Fields.Author("".join(auth))) if key == 'AU': in_table['author'] = group elif key == 'ED': in_table['editor'] = group del lines[key] key, key1, key2 = 'PG', 'BP', 'EP' if lines.has_key(key1) and lines.has_key(key2): if len(lines[key1]) == len(lines[key2]): pages = [] for i in range(len(lines[key1])): firstpg = lines[key1] [i] lastpg = lines[key2] [i] pages.append(('%s -- %s' % (firstpg, lastpg))) in_table['pages'] = Fields.Text (string.join(pages, '; ')) del lines[key1]; del lines[key2] else: print 'inconsistent BP, EP fields found' if lines.has_key(key): in_table['size'] = Fields.Text ('%s p.' %(lines[key][0])) del lines[key] key = 'PY' if lines.has_key(key): val = lines[key][0] in_table['date'] = Fields.Date(val) del lines[key] key = 'ID' if lines. has_key(key): val = "[ISI:] %s ;;" %(string.lower(string.join(lines[key], ' '))) if lines.has_key('DE'): lines['DE'].append ('; ' + val) else : lines['DE'] = [val] del lines[key] # journal titles come in various forms if lines.has_key ('SO'): uc_title = ' '.join(lines['SO']) in_table ['journal'] = Fields.Text (uc_title) if lines.has_key('JI'): uc_title = re.split(r"([- .,/]+)", uc_title) ca_title = re.split(r"[- .,/]+", ' '.join(lines['JI'])) i , Title = 0, [] for word in uc_title: Word = string.capitalize(word) if word == ca_title [i]: Title.append (word) i += 1 elif Word.startswith(ca_title[i]): Title.append(Word) i += 1 else: Title.append(string.lower(word)) del lines['JI'] in_table['journal'] = Fields.Text ("".join(Title)) del lines['SO'] for key in lines.keys(): mapped_key, joiner = key_map.get( key, ('isifile-%s' %(key.lower()), ' ; ')) text_type = Types.get_field (mapped_key).type in_table [mapped_key] = text_type (joiner.join(lines[key])) return Base.Entry ( None, type, in_table)
def next(self): data = '' fields = {} type = None label = None while 1: line = self.file.readline() if line == '' and not fields: return None line = string.strip(line) if line == '' and fields: # store the current field if type: if type == "label": label = string.join(string.split(data), ' ') elif fields.has_key(type): fields[type].append( string.join(string.split(data), ' ')) else: fields[type] = [string.join(string.split(data), ' ')] # determine the real type while 1: if fields.has_key('journal'): type = 'article' break if fields.has_key('booktitle'): type = 'inbook' break if fields.has_key('volume') or fields.has_key('number'): type = 'inproceedings' break if fields.has_key('publisher'): type = 'book' break if fields.has_key('author') and fields.has_key('title'): type = 'unpublished' break type = 'misc' break entry = Types.get_entry(type) for f in fields.keys(): type = Types.get_field(f).type if type == Fields.AuthorGroup: group = Fields.AuthorGroup() for auth in fields[f]: group.append(Fields.Author(auth)) fields[f] = group else: if len(fields[f]) > 1: sys.stderr.write( "warning: field `%s' is defined more than once" % f) continue fields[f] = type(fields[f][0]) if label: key = Key.Key(None, label) return Base.Entry(key, entry, fields) else: return Base.Entry(None, entry, fields) t = tag_re.match(line) # we matched a new field start if t: if type: if type == "label": label = string.join(string.split(data), ' ') elif fields.has_key(type): fields[type].append( string.join(string.split(data), ' ')) else: fields[type] = [string.join(string.split(data), ' ')] type = t.group(1) if not self.mapping.has_key(type): print "warning: key `%s' has been skipped" % (type) type = None data = '' else: # store the current field type = self.mapping[type][0] data = t.group(2) continue # in the general case, append the new text data = data + ' ' + line